/* * Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012, 2022 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. *
*/
#ifdef ASSERT // On RISC, there's no benefit to verifying instruction boundaries. bool AbstractAssembler::pd_check_instruction_mark() { returnfalse; } #endif
void MacroAssembler::ld_largeoffset_unchecked(Register d, int si31, Register a, int emit_filler_nop) {
assert(Assembler::is_simm(si31, 31) && si31 >= 0, "si31 out of range"); if (Assembler::is_simm(si31, 16)) {
ld(d, si31, a); if (emit_filler_nop) nop();
} else { constint hi = MacroAssembler::largeoffset_si16_si16_hi(si31); constint lo = MacroAssembler::largeoffset_si16_si16_lo(si31);
addis(d, a, hi);
ld(d, lo, d);
}
}
void MacroAssembler::ld_largeoffset(Register d, int si31, Register a, int emit_filler_nop) {
assert_different_registers(d, a);
ld_largeoffset_unchecked(d, si31, a, emit_filler_nop);
}
// Issue instructions that calculate given TOC from global TOC. void MacroAssembler::calculate_address_from_global_toc(Register dst, address addr, bool hi16, bool lo16, bool add_relocation, bool emit_dummy_addr) { int offset = -1; if (emit_dummy_addr) {
offset = -128; // dummy address
} elseif (addr != (address)(intptr_t)-1) {
offset = MacroAssembler::offset_to_global_toc(addr);
}
if (hi16) {
addis(dst, R29_TOC, MacroAssembler::largeoffset_si16_si16_hi(offset));
} if (lo16) { if (add_relocation) { // Relocate at the addi to avoid confusion with a load from the method's TOC.
relocate(internal_word_Relocation::spec(addr));
}
addi(dst, dst, MacroAssembler::largeoffset_si16_si16_lo(offset));
}
}
// The relocation points to the second instruction, the addi, // and the addi reads and writes the same register dst. constint dst = inv_rt_field(inst2);
assert(is_addi(inst2) && inv_ra_field(inst2) == dst, "must be addi reading and writing dst");
// Now, find the preceding addis which writes to dst. int inst1 = 0;
address inst1_addr = inst2_addr - BytesPerInstWord; while (inst1_addr >= bound) {
inst1 = *(int *) inst1_addr; if (is_addis(inst1) && inv_rt_field(inst1) == dst) { // Stop, found the addis which writes dst. break;
}
inst1_addr -= BytesPerInstWord;
}
assert(is_addis(inst1) && inv_ra_field(inst1) == 29 /* R29 */, "source must be global TOC");
set_imm((int *)inst1_addr, MacroAssembler::largeoffset_si16_si16_hi(offset));
set_imm((int *)inst2_addr, MacroAssembler::largeoffset_si16_si16_lo(offset)); return inst1_addr;
}
// The relocation points to the second instruction, the addi, // and the addi reads and writes the same register dst. constint dst = inv_rt_field(inst2);
assert(is_addi(inst2) && inv_ra_field(inst2) == dst, "must be addi reading and writing dst");
// Now, find the preceding addis which writes to dst. int inst1 = 0;
address inst1_addr = inst2_addr - BytesPerInstWord; while (inst1_addr >= bound) {
inst1 = *(int *) inst1_addr; if (is_addis(inst1) && inv_rt_field(inst1) == dst) { // stop, found the addis which writes dst break;
}
inst1_addr -= BytesPerInstWord;
}
assert(is_addis(inst1) && inv_ra_field(inst1) == 29 /* R29 */, "source must be global TOC");
int offset = (get_imm(inst1_addr, 0) << 16) + get_imm(inst2_addr, 0); // -1 is a special case if (offset == -1) { return (address)(intptr_t)-1;
} else { return global_toc() + offset;
}
}
#ifdef _LP64 // Patch compressed oops or klass constants. // Assembler sequence is // 1) compressed oops: // lis rx = const.hi // ori rx = rx | const.lo // 2) compressed klass: // lis rx = const.hi // clrldi rx = rx & 0xFFFFffff // clearMS32b, optional // ori rx = rx | const.lo // Clrldi will be passed by.
address MacroAssembler::patch_set_narrow_oop(address a, address bound, narrowOop data) {
assert(UseCompressedOops, "Should only patch compressed oops");
// The relocation points to the second instruction, the ori, // and the ori reads and writes the same register dst. constint dst = inv_rta_field(inst2);
assert(is_ori(inst2) && inv_rs_field(inst2) == dst, "must be ori reading and writing dst"); // Now, find the preceding addis which writes to dst. int inst1 = 0;
address inst1_addr = inst2_addr - BytesPerInstWord; bool inst1_found = false; while (inst1_addr >= bound) {
inst1 = *(int *)inst1_addr; if (is_lis(inst1) && inv_rs_field(inst1) == dst) { inst1_found = true; break; }
inst1_addr -= BytesPerInstWord;
}
assert(inst1_found, "inst is not lis");
uint32_t data_value = CompressedOops::narrow_oop_value(data); int xc = (data_value >> 16) & 0xffff; int xd = (data_value >> 0) & 0xffff;
set_imm((int *)inst1_addr, (short)(xc)); // see enc_load_con_narrow_hi/_lo
set_imm((int *)inst2_addr, (xd)); // unsigned int return inst1_addr;
}
// Get compressed oop constant.
narrowOop MacroAssembler::get_narrow_oop(address a, address bound) {
assert(UseCompressedOops, "Should only patch compressed oops");
// The relocation points to the second instruction, the ori, // and the ori reads and writes the same register dst. constint dst = inv_rta_field(inst2);
assert(is_ori(inst2) && inv_rs_field(inst2) == dst, "must be ori reading and writing dst"); // Now, find the preceding lis which writes to dst. int inst1 = 0;
address inst1_addr = inst2_addr - BytesPerInstWord; bool inst1_found = false;
while (inst1_addr >= bound) {
inst1 = *(int *) inst1_addr; if (is_lis(inst1) && inv_rs_field(inst1) == dst) { inst1_found = true; break;}
inst1_addr -= BytesPerInstWord;
}
assert(inst1_found, "inst is not lis");
// Returns true if successful. bool MacroAssembler::load_const_from_method_toc(Register dst, AddressLiteral& a, Register toc, bool fixed_size) { int toc_offset = 0; // Use RelocationHolder::none for the constant pool entry, otherwise // we will end up with a failing NativeCall::verify(x) where x is // the address of the constant pool entry. // FIXME: We should insert relocation information for oops at the constant // pool entries instead of inserting it at the loads; patching of a constant // pool entry should be less expensive.
address const_address = address_constant((address)a.value(), RelocationHolder::none); if (const_address == NULL) { returnfalse; } // allocation failure // Relocate at the pc of the load.
relocate(a.rspec());
toc_offset = (int)(const_address - code()->consts()->start());
ld_largeoffset_unchecked(dst, toc_offset, toc, fixed_size); returntrue;
}
// The relocation points to the ld or the addis. return (is_ld(inst1)) ||
(is_addis(inst1) && inv_ra_field(inst1) != 0);
}
int MacroAssembler::get_offset_of_load_const_from_method_toc_at(address a) {
assert(is_load_const_from_method_toc_at(a), "must be load_const_from_method_toc");
// Conditional far branch for destinations encodable in 24+2 bits. void MacroAssembler::bc_far(int boint, int biint, Label& dest, int optimize) {
// If requested by flag optimize, relocate the bc_far as a // runtime_call and prepare for optimizing it when the code gets // relocated. if (optimize == bc_far_optimize_on_relocate) {
relocate(relocInfo::runtime_call_type);
}
// We emit two branches. // First, a conditional branch which jumps around the far branch. const address not_taken_pc = pc() + 2 * BytesPerInstWord; const address bc_pc = pc();
bc(opposite_boint, biint, not_taken_pc);
// Second, an unconditional far branch which jumps to dest. // Note: target(dest) remembers the current pc (see CodeSection::target) // and returns the current pc if the label is not bound yet; when // the label gets bound, the unconditional far branch will be patched. const address target_pc = target(dest); const address b_pc = pc();
b(target_pc);
if (is_bc_far_variant3_at(instruction_addr)) { // variant 3, far cond branch to the next instruction, already patched to nops: // // nop // endgroup // SKIP/DEST: // return;
}
// first, extract boint and biint from the current branch int boint = 0; int biint = 0;
ResourceMark rm; constint code_size = 2 * BytesPerInstWord;
CodeBuffer buf(instruction_addr, code_size);
MacroAssembler masm(&buf); if (is_bc_far_variant2_at(instruction_addr) && dest == instruction_addr + 8) { // Far branch to next instruction: Optimize it by patching nops (produce variant 3).
masm.nop();
masm.endgroup();
} else { if (is_bc_far_variant1_at(instruction_addr)) { // variant 1, the 1st instruction contains the destination address: // // bcxx DEST // nop // constint instruction_1 = *(int*)(instruction_addr);
boint = inv_bo_field(instruction_1);
biint = inv_bi_field(instruction_1);
} elseif (is_bc_far_variant2_at(instruction_addr)) { // variant 2, the 2nd instruction contains the destination address: // // b!cxx SKIP // bxx DEST // SKIP: // constint instruction_1 = *(int*)(instruction_addr);
boint = add_bhint_to_boint(opposite_bhint(inv_boint_bhint(inv_bo_field(instruction_1))),
opposite_bcond(inv_boint_bcond(inv_bo_field(instruction_1))));
biint = inv_bi_field(instruction_1);
} else { // variant 4???
ShouldNotReachHere();
}
// second, set the new branch destination and optimize the code if (dest != instruction_addr + 4 && // the bc_far is still unbound!
masm.is_within_range_of_bcxx(dest, instruction_addr)) { // variant 1: // // bcxx DEST // nop //
masm.bc(boint, biint, dest);
masm.nop();
} else { // variant 2: // // b!cxx SKIP // bxx DEST // SKIP: // constint opposite_boint = add_bhint_to_boint(opposite_bhint(inv_boint_bhint(boint)),
opposite_bcond(inv_boint_bcond(boint))); const address not_taken_pc = masm.pc() + 2 * BytesPerInstWord;
masm.bc(opposite_boint, biint, not_taken_pc);
masm.b(dest);
}
}
ICache::ppc64_flush_icache_bytes(instruction_addr, code_size);
}
// Emit a NOT mt-safe patchable 64 bit absolute call/jump. void MacroAssembler::bxx64_patchable(address dest, relocInfo::relocType rt, bool link) { // get current pc
uint64_t start_pc = (uint64_t) pc();
const address pc_of_bl = (address) (start_pc + (6*BytesPerInstWord)); // bl is last const address pc_of_b = (address) (start_pc + (0*BytesPerInstWord)); // b is first
// relocate here if (rt != relocInfo::none) {
relocate(rt);
}
if ( ReoptimizeCallSequences &&
(( link && is_within_range_of_b(dest, pc_of_bl)) ||
(!link && is_within_range_of_b(dest, pc_of_b)))) { // variant 2: // Emit an optimized, pc-relative call/jump.
if (link) { // some padding
nop();
nop();
nop();
nop();
nop();
nop();
// do the call
assert(pc() == pc_of_bl, "just checking");
bl(dest, relocInfo::none);
} else { // do the jump
assert(pc() == pc_of_b, "just checking");
b(dest, relocInfo::none);
// some padding
nop();
nop();
nop();
nop();
nop();
nop();
}
// Assert that we can identify the emitted call/jump.
assert(is_bxx64_patchable_variant2_at((address)start_pc, link), "can't identify emitted call");
} else { // variant 1:
mr(R0, R11); // spill R11 -> R0.
// Load the destination address into CTR, // calculate destination relative to global toc.
calculate_address_from_global_toc(R11, dest, true, true, false);
// do the call/jump if (link) {
bctrl();
} else{
bctr();
} // Assert that we can identify the emitted call/jump.
assert(is_bxx64_patchable_variant1b_at((address)start_pc, link), "can't identify emitted call");
}
// Assert that we can identify the emitted call/jump.
assert(is_bxx64_patchable_at((address)start_pc, link), "can't identify emitted call");
assert(get_dest_of_bxx64_patchable_at((address)start_pc, link) == dest, "wrong encoding of dest address");
}
// Does the call64_patchable instruction use a pc-relative encoding of // the call destination? bool MacroAssembler::is_bxx64_patchable_pcrelative_at(address instruction_addr, bool link) { // variant 2 is pc-relative return is_bxx64_patchable_variant2_at(instruction_addr, link);
}
// Preserve stack pointer register (R1_SP) and system thread id register (R13); // although they're technically volatile for (int i = 2; i < 13; i++) { Register reg = as_Register(i); if (reg == excluded_register) { continue;
}
void MacroAssembler::save_LR_CR(Register tmp) {
mfcr(tmp);
std(tmp, _abi0(cr), R1_SP);
mflr(tmp);
std(tmp, _abi0(lr), R1_SP); // Tmp must contain lr on exit! (see return_addr and prolog in ppc64.ad)
}
// Push a frame of size `bytes'. void MacroAssembler::push_frame(unsignedint bytes, Register tmp) { long offset = align_addr(bytes, frame::alignment_in_bytes); if (is_simm(-offset, 16)) {
stdu(R1_SP, -offset, R1_SP);
} else {
load_const_optimized(tmp, -offset);
stdux(R1_SP, R1_SP, tmp);
}
}
// Push a frame of size `bytes' plus abi_reg_args on top. void MacroAssembler::push_frame_reg_args(unsignedint bytes, Register tmp) {
push_frame(bytes + frame::abi_reg_args_size, tmp);
}
// Setup up a new C frame with a spill area for non-volatile GPRs and // additional space for local variables. void MacroAssembler::push_frame_reg_args_nonvolatiles(unsignedint bytes, Register tmp) {
push_frame(bytes + frame::abi_reg_args_size + frame::spill_nonvolatiles_size, tmp);
}
// Pop current C frame. void MacroAssembler::pop_frame() {
ld(R1_SP, _abi0(callers_sp), R1_SP);
}
#ifdefined(ABI_ELFv2)
address MacroAssembler::branch_to(Register r_function_entry, bool and_link) { // TODO(asmundak): make sure the caller uses R12 as function descriptor // most of the times. if (R12 != r_function_entry) {
mr(R12, r_function_entry);
}
mtctr(R12); // Do a call or a branch. if (and_link) {
bctrl();
} else {
bctr();
}
_last_calls_return_pc = pc();
return _last_calls_return_pc;
}
// Call a C function via a function descriptor and use full C // calling conventions. Updates and returns _last_calls_return_pc.
address MacroAssembler::call_c(Register r_function_entry) { return branch_to(r_function_entry, /*and_link=*/true);
}
// For tail calls: only branch, don't link, so callee returns to caller of this function.
address MacroAssembler::call_c_and_return_to_caller(Register r_function_entry) { return branch_to(r_function_entry, /*and_link=*/false);
}
#else // Generic version of a call to C function via a function descriptor // with variable support for C calling conventions (TOC, ENV, etc.). // Updates and returns _last_calls_return_pc.
address MacroAssembler::branch_to(Register function_descriptor, bool and_link, bool save_toc_before_call, bool restore_toc_after_call, bool load_toc_of_callee, bool load_env_of_callee) { // we emit standard ptrgl glue code here
assert((function_descriptor != R0), "function_descriptor cannot be R0");
// retrieve necessary entries from the function descriptor
ld(R0, in_bytes(FunctionDescriptor::entry_offset()), function_descriptor);
mtctr(R0);
if (load_toc_of_callee) {
ld(R2_TOC, in_bytes(FunctionDescriptor::toc_offset()), function_descriptor);
} if (load_env_of_callee) {
ld(R11, in_bytes(FunctionDescriptor::env_offset()), function_descriptor);
} elseif (load_toc_of_callee) {
li(R11, 0);
}
// do a call or a branch if (and_link) {
bctrl();
} else {
bctr();
}
_last_calls_return_pc = pc();
return _last_calls_return_pc;
}
// Call a C function via a function descriptor and use full C calling // conventions. // We don't use the TOC in generated code, so there is no need to save // and restore its value.
address MacroAssembler::call_c(Register fd) { return branch_to(fd, /*and_link=*/true, /*save toc=*/false, /*restore toc=*/false, /*load toc=*/true, /*load env=*/true);
}
address MacroAssembler::call_c(const FunctionDescriptor* fd, relocInfo::relocType rt) { if (rt != relocInfo::none) { // this call needs to be relocatable if (!ReoptimizeCallSequences
|| (rt != relocInfo::runtime_call_type && rt != relocInfo::none)
|| fd == NULL // support code-size estimation
|| !fd->is_friend_function()
|| fd->entry() == NULL) { // it's not a friend function as defined by class FunctionDescriptor, // so do a full call-c here.
load_const(R11, (address)fd, R0);
bool has_env = (fd != NULL && fd->env() != NULL); return branch_to(R11, /*and_link=*/true, /*save toc=*/false, /*restore toc=*/false, /*load toc=*/true, /*load env=*/has_env);
} else { // It's a friend function. Load the entry point and don't care about // toc and env. Use an optimizable call instruction, but ensure the // same code-size as in the case of a non-friend function.
nop();
nop();
nop();
bl64_patchable(fd->entry(), rt);
_last_calls_return_pc = pc(); return _last_calls_return_pc;
}
} else { // This call does not need to be relocatable, do more aggressive // optimizations. if (!ReoptimizeCallSequences
|| !fd->is_friend_function()) { // It's not a friend function as defined by class FunctionDescriptor, // so do a full call-c here.
load_const(R11, (address)fd, R0); return branch_to(R11, /*and_link=*/true, /*save toc=*/false, /*restore toc=*/false, /*load toc=*/true, /*load env=*/true);
} else { // it's a friend function, load the entry point and don't care about // toc and env.
address dest = fd->entry(); if (is_within_range_of_b(dest, pc())) {
bl(dest);
} else {
bl64_patchable(dest, rt);
}
_last_calls_return_pc = pc(); return _last_calls_return_pc;
}
}
}
// Call a C function. All constants needed reside in TOC. // // Read the address to call from the TOC. // Read env from TOC, if fd specifies an env. // Read new TOC from TOC.
address MacroAssembler::call_c_using_toc(const FunctionDescriptor* fd,
relocInfo::relocType rt, Register toc) { if (!ReoptimizeCallSequences
|| (rt != relocInfo::runtime_call_type && rt != relocInfo::none)
|| !fd->is_friend_function()) { // It's not a friend function as defined by class FunctionDescriptor, // so do a full call-c here.
assert(fd->entry() != NULL, "function must be linked");
AddressLiteral fd_entry(fd->entry()); bool success = load_const_from_method_toc(R11, fd_entry, toc, /*fixed_size*/ true);
mtctr(R11); if (fd->env() == NULL) {
li(R11, 0);
nop();
} else {
AddressLiteral fd_env(fd->env());
success = success && load_const_from_method_toc(R11, fd_env, toc, /*fixed_size*/ true);
}
AddressLiteral fd_toc(fd->toc()); // Set R2_TOC (load from toc)
success = success && load_const_from_method_toc(R2_TOC, fd_toc, toc, /*fixed_size*/ true);
bctrl();
_last_calls_return_pc = pc(); if (!success) { return NULL; }
} else { // It's a friend function, load the entry point and don't care about // toc and env. Use an optimizable call instruction, but ensure the // same code-size as in the case of a non-friend function.
nop();
bl64_patchable(fd->entry(), rt);
_last_calls_return_pc = pc();
} return _last_calls_return_pc;
} #endif// ABI_ELFv2
void MacroAssembler::post_call_nop() { // Make inline again when loom is always enabled. if (!Continuations::enabled()) { return;
}
nop();
}
// Check whether instruction is a read access to the polling page // which was emitted by load_from_polling_page(..). bool MacroAssembler::is_load_from_polling_page(int instruction, void* ucontext,
address* polling_address_ptr) { if (!is_ld(instruction)) returnfalse; // It's not a ld. Fail.
int rt = inv_rt_field(instruction); int ra = inv_ra_field(instruction); int ds = inv_ds_field(instruction); if (!(ds == 0 && ra != 0 && rt == 0)) { returnfalse; // It's not a ld(r0, X, ra). Fail.
}
if (!ucontext) { // Set polling address. if (polling_address_ptr != NULL) {
*polling_address_ptr = NULL;
} returntrue; // No ucontext given. Can't check value of ra. Assume true.
}
#ifdef LINUX // Ucontext given. Check that register ra contains the address of // the safepoing polling page.
ucontext_t* uc = (ucontext_t*) ucontext; // Set polling address.
address addr = (address)uc->uc_mcontext.regs->gpr[ra] + (ssize_t)ds; if (polling_address_ptr != NULL) {
*polling_address_ptr = addr;
} return SafepointMechanism::is_poll_address(addr); #else // Not on Linux, ucontext must be NULL.
ShouldNotReachHere(); returnfalse; #endif
}
void MacroAssembler::bang_stack_with_offset(int offset) { // When increasing the stack, the old stack pointer will be written // to the new top of stack according to the PPC64 abi. // Therefore, stack banging is not necessary when increasing // the stack by <= os::vm_page_size() bytes. // When increasing the stack by a larger amount, this method is // called repeatedly to bang the intermediate pages.
// Stack grows down, caller passes positive offset.
assert(offset > 0, "must bang with positive offset");
long stdoffset = -offset;
if (is_simm(stdoffset, 16)) { // Signed 16 bit offset, a simple std is ok. if (UseLoadInstructionsForStackBangingPPC64) {
ld(R0, (int)(signedshort)stdoffset, R1_SP);
} else {
std(R0,(int)(signedshort)stdoffset, R1_SP);
}
} elseif (is_simm(stdoffset, 31)) { constint hi = MacroAssembler::largeoffset_si16_si16_hi(stdoffset); constint lo = MacroAssembler::largeoffset_si16_si16_lo(stdoffset);
// Temps and addr_base are killed if size < 4 and processor does not support respective instructions. // Only signed types are supported with size < 4. // Atomic add always kills tmp1. void MacroAssembler::atomic_get_and_modify_generic(Register dest_current_value, Register exchange_value, Register addr_base, Register tmp1, Register tmp2, Register tmp3, bool cmpxchgx_hint, bool is_add, int size) { // Sub-word instructions are available since Power 8. // For older processors, instruction_type != size holds, and we // emulate the sub-word instructions by constructing a 4-byte value // that leaves the other bytes unchanged. constint instruction_type = VM_Version::has_lqarx() ? size : 4;
switch (instruction_type) { case 4: lwarx(val32, addr_base, cmpxchgx_hint); break; case 2: lharx(val32, addr_base, cmpxchgx_hint); break; case 1: lbarx(val32, addr_base, cmpxchgx_hint); break; default: ShouldNotReachHere();
}
if (instruction_type != size) {
srw(dest_current_value, val32, shift_amount);
}
if (is_add) { add(modval, dest_current_value, exchange_value); }
if (instruction_type != size) { // Transform exchange value such that the replacement can be done by one xor instruction.
xorr(modval, dest_current_value, is_add ? modval : exchange_value);
clrldi(modval, modval, (size == 1) ? 56 : 48);
slw(modval, modval, shift_amount);
xorr(modval, val32, modval);
}
switch (instruction_type) { case 4: stwcx_(modval, addr_base); break; case 2: sthcx_(modval, addr_base); break; case 1: stbcx_(modval, addr_base); break; default: ShouldNotReachHere();
}
// Temps, addr_base and exchange_value are killed if size < 4 and processor does not support respective instructions. // Only signed types are supported with size < 4. void MacroAssembler::cmpxchg_loop_body(ConditionRegister flag, Register dest_current_value, Register compare_value, Register exchange_value, Register addr_base, Register tmp1, Register tmp2,
Label &retry, Label &failed, bool cmpxchgx_hint, int size) { // Sub-word instructions are available since Power 8. // For older processors, instruction_type != size holds, and we // emulate the sub-word instructions by constructing a 4-byte value // that leaves the other bytes unchanged. constint instruction_type = VM_Version::has_lqarx() ? size : 4;
// Save one branch if result is returned via register and // result register is different from the other ones. bool use_result_reg = (int_flag_success != noreg); bool preset_result_reg = (int_flag_success != dest_current_value && int_flag_success != compare_value &&
int_flag_success != exchange_value && int_flag_success != addr_base &&
int_flag_success != tmp1 && int_flag_success != tmp2);
assert(!weak || flag == CCR0, "weak only supported with CCR0");
assert(size == 1 || size == 2 || size == 4, "unsupported");
if (use_result_reg && preset_result_reg) {
li(int_flag_success, 0); // preset (assume cas failed)
}
// Add simple guard in order to reduce risk of starving under high contention (recommended by IBM). if (contention_hint) { // Don't try to reserve if cmp fails. switch (size) { case 1: lbz(dest_current_value, 0, addr_base); extsb(dest_current_value, dest_current_value); break; case 2: lha(dest_current_value, 0, addr_base); break; case 4: lwz(dest_current_value, 0, addr_base); break; default: ShouldNotReachHere();
}
cmpw(flag, dest_current_value, compare_value);
bne(flag, failed);
}
// release/fence semantics if (semantics & MemBarRel) {
release();
}
// Result in register (must do this at the end because int_flag_success can be the // same register as one above). if (use_result_reg) {
li(int_flag_success, 1);
}
// Performs atomic compare exchange: // if (compare_value == *addr_base) // *addr_base = exchange_value // int_flag_success = 1; // else // int_flag_success = 0; // // ConditionRegister flag = cmp(compare_value, *addr_base) // Register dest_current_value = *addr_base // Register compare_value Used to compare with value in memory // Register exchange_value Written to memory if compare_value == *addr_base // Register addr_base The memory location to compareXChange // Register int_flag_success Set to 1 if exchange_value was written to *addr_base // // To avoid the costly compare exchange the value is tested beforehand. // Several special cases exist to avoid that unnecessary information is generated. // void MacroAssembler::cmpxchgd(ConditionRegister flag, Register dest_current_value, RegisterOrConstant compare_value, Register exchange_value, Register addr_base, int semantics, bool cmpxchgx_hint, Register int_flag_success, Label* failed_ext, bool contention_hint, bool weak) {
Label retry;
Label failed_int;
Label& failed = (failed_ext != NULL) ? *failed_ext : failed_int;
Label done;
// Save one branch if result is returned via register and result register is different from the other ones. bool use_result_reg = (int_flag_success!=noreg); bool preset_result_reg = (int_flag_success!=dest_current_value && int_flag_success!=compare_value.register_or_noreg() &&
int_flag_success!=exchange_value && int_flag_success!=addr_base);
assert(!weak || flag == CCR0, "weak only supported with CCR0");
assert(int_flag_success == noreg || failed_ext == NULL, "cannot have both");
if (use_result_reg && preset_result_reg) {
li(int_flag_success, 0); // preset (assume cas failed)
}
// Add simple guard in order to reduce risk of starving under high contention (recommended by IBM). if (contention_hint) { // Don't try to reserve if cmp fails.
ld(dest_current_value, 0, addr_base);
cmpd(flag, compare_value, dest_current_value);
bne(flag, failed);
}
// release/fence semantics if (semantics & MemBarRel) {
release();
}
// result in register (must do this at the end because int_flag_success can be the same register as one above) if (use_result_reg) {
li(int_flag_success, 1);
}
// Look up the method for a megamorphic invokeinterface call. // The target method is determined by <intf_klass, itable_index>. // The receiver klass is in recv_klass. // On success, the result will be in method_result, and execution falls through. // On failure, execution transfers to the given label. void MacroAssembler::lookup_interface_method(Register recv_klass, Register intf_klass,
RegisterOrConstant itable_index, Register method_result, Register scan_temp, Register temp2,
Label& L_no_such_interface, bool return_method) {
assert_different_registers(recv_klass, intf_klass, method_result, scan_temp);
// Compute start of first itableOffsetEntry (which is at the end of the vtable). int vtable_base = in_bytes(Klass::vtable_start_offset()); int itentry_off = itableMethodEntry::method_offset_in_bytes(); int logMEsize = exact_log2(itableMethodEntry::size() * wordSize); int scan_step = itableOffsetEntry::size() * wordSize; int log_vte_size= exact_log2(vtableEntry::size_in_bytes());
lwz(scan_temp, in_bytes(Klass::vtable_length_offset()), recv_klass); // %%% We should store the aligned, prescaled offset in the klassoop. // Then the next several instructions would fold away.
for (int peel = 1; peel >= 0; peel--) { // %%%% Could load both offset and interface in one ldx, if they were // in the opposite order. This would save a load.
ld(temp2, itableOffsetEntry::interface_offset_in_bytes(), scan_temp);
// Check that this entry is non-null. A null entry means that // the receiver class doesn't implement the interface, and wasn't the // same as when the caller was compiled.
cmpd(CCR0, temp2, intf_klass);
if (peel) {
beq(CCR0, found_method);
} else {
bne(CCR0, search); // (invert the test to fall through to found_method...)
}
Label L_fallthrough; int label_nulls = 0; if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
assert(label_nulls <= 1 ||
(L_slow_path == &L_fallthrough && label_nulls <= 2 && !need_slow_path), "at most one NULL in the batch, usually");
// If the pointers are equal, we are done (e.g., String[] elements). // This self-check enables sharing of secondary supertype arrays among // non-primary types such as array-of-interface. Otherwise, each such // type would need its own customized SSA. // We move this check to the front of the fast path because many // type checks are in fact trivially successful in this manner, // so we get a nicely predicted branch right at the start of the check.
cmpd(CCR0, sub_klass, super_klass);
beq(CCR0, *L_success);
// Check the supertype display: if (must_load_sco) { // The super check offset is always positive...
lwz(check_cache_offset, sco_offset, super_klass);
super_check_offset = RegisterOrConstant(check_cache_offset); // super_check_offset is register.
assert_different_registers(sub_klass, super_klass, cached_super, super_check_offset.as_register());
} // The loaded value is the offset from KlassOopDesc.
// This check has worked decisively for primary supers. // Secondary supers are sought in the super_cache ('super_cache_addr'). // (Secondary supers are interfaces and very deeply nested subtypes.) // This works in the same check above because of a tricky aliasing // between the super_cache and the primary super display elements. // (The 'super_check_addr' can address either, as the case requires.) // Note that the cache is updated below if it does not help us find // what we need immediately. // So if it was a primary super, we can just fail immediately. // Otherwise, it's the slow path for us (no success at this point).
#define FINAL_JUMP(label) if (&(label) != &L_fallthrough) { b(label); }
if (super_check_offset.is_register()) {
beq(CCR0, *L_success);
cmpwi(CCR0, super_check_offset.as_register(), sc_offset); if (L_failure == &L_fallthrough) {
beq(CCR0, *L_slow_path);
} else {
bne(CCR0, *L_failure);
FINAL_JUMP(*L_slow_path);
}
} else { if (super_check_offset.as_constant() == sc_offset) { // Need a slow path; fast failure is impossible. if (L_slow_path == &L_fallthrough) {
beq(CCR0, *L_success);
} else {
bne(CCR0, *L_slow_path);
FINAL_JUMP(*L_success);
}
} else { // No slow path; it's a fast decision. if (L_failure == &L_fallthrough) {
beq(CCR0, *L_success);
} else {
bne(CCR0, *L_failure);
FINAL_JUMP(*L_success);
}
}
}
bind(L_fallthrough); #undef FINAL_JUMP
}
void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, Register super_klass, Register temp1_reg, Register temp2_reg,
Label* L_success, Register result_reg) { constRegister array_ptr = temp1_reg; // current value from cache array constRegister temp = temp2_reg;
bind(loop); // Oops in table are NO MORE compressed.
ld(temp, base_offset, array_ptr);
cmpd(CCR0, temp, super_klass);
beq(CCR0, hit);
addi(array_ptr, array_ptr, BytesPerWord);
bdnz(loop);
bind(failure); if (result_reg!=noreg) li(result_reg, 1); // load non-zero result (indicates a miss)
b(fallthru);
bind(hit);
std(super_klass, target_offset, sub_klass); // save result to cache if (result_reg != noreg) { li(result_reg, 0); } // load zero result (indicates a hit) if (L_success != NULL) { b(*L_success); } elseif (result_reg == noreg) { blr(); } // return with CR0.eq if neither label nor result reg provided
bind(fallthru);
}
// Try fast path, then go to slow one if not successful void MacroAssembler::check_klass_subtype(Register sub_klass, Register super_klass, Register temp1_reg, Register temp2_reg,
Label& L_success) {
Label L_failure;
check_klass_subtype_fast_path(sub_klass, super_klass, temp1_reg, temp2_reg, &L_success, &L_failure);
check_klass_subtype_slow_path(sub_klass, super_klass, temp1_reg, temp2_reg, &L_success);
bind(L_failure); // Fallthru if not successful.
}
void MacroAssembler::clinit_barrier(Register klass, Register thread, Label* L_fast_path, Label* L_slow_path) {
assert(L_fast_path != NULL || L_slow_path != NULL, "at least one is required");
// Fast path check: class is fully initialized
lbz(R0, in_bytes(InstanceKlass::init_state_offset()), klass);
cmpwi(CCR0, R0, InstanceKlass::fully_initialized);
beq(CCR0, *L_fast_path);
// Fast path check: current thread is initializer thread
ld(R0, in_bytes(InstanceKlass::init_thread_offset()), klass);
cmpd(CCR0, thread, R0); if (L_slow_path == &L_fallthrough) {
beq(CCR0, *L_fast_path);
} elseif (L_fast_path == &L_fallthrough) {
bne(CCR0, *L_slow_path);
} else {
Unimplemented();
}
bind(L_fallthrough);
}
RegisterOrConstant MacroAssembler::argument_offset(RegisterOrConstant arg_slot, Register temp_reg, int extra_slot_offset) { // cf. TemplateTable::prepare_invoke(), if (load_receiver). int stackElementSize = Interpreter::stackElementSize; int offset = extra_slot_offset * stackElementSize; if (arg_slot.is_constant()) {
offset += arg_slot.as_constant() * stackElementSize; return offset;
} else {
assert(temp_reg != noreg, "must specify");
sldi(temp_reg, arg_slot.as_register(), exact_log2(stackElementSize)); if (offset != 0)
addi(temp_reg, temp_reg, offset); return temp_reg;
}
}
void MacroAssembler::tlab_allocate( Register obj, // result: pointer to object after successful allocation Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise int con_size_in_bytes, // object size in bytes if known at compile time Register t1, // temp register
Label& slow_case // continuation point if fast allocation fails
) { // make sure arguments make sense
assert_different_registers(obj, var_size_in_bytes, t1);
assert(0 <= con_size_in_bytes && is_simm16(con_size_in_bytes), "illegal object size");
assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0, "object size is not multiple of alignment");
constRegister new_top = t1; //verify_tlab(); not implemented
#ifdef ASSERT // make sure new free pointer is properly aligned
{
Label L;
andi_(R0, new_top, MinObjAlignmentInBytesMask);
beq(CCR0, L);
stop("updated TLAB free is not properly aligned");
bind(L);
} #endif// ASSERT
// update the tlab top pointer
std(new_top, in_bytes(JavaThread::tlab_top_offset()), R16_thread); //verify_tlab(); not implemented
} void MacroAssembler::incr_allocated_bytes(RegisterOrConstant size_in_bytes, Register t1, Register t2) {
unimplemented("incr_allocated_bytes");
}
address MacroAssembler::emit_trampoline_stub(int destination_toc_offset, int insts_call_instruction_offset, Register Rtoc) { // Start the stub.
address stub = start_a_stub(64); if (stub == NULL) { return NULL; } // CodeCache full: bail out
// Create a trampoline stub relocation which relates this trampoline stub // with the call instruction at insts_call_instruction_offset in the // instructions code-section.
relocate(trampoline_stub_Relocation::spec(code()->insts()->start() + insts_call_instruction_offset)); constint stub_start_offset = offset();
// For java_to_interp stubs we use R11_scratch1 as scratch register // and in call trampoline stubs we use R12_scratch2. This way we // can distinguish them (see is_NativeCallTrampolineStub_at()). Register reg_scratch = R12_scratch2;
// Now, create the trampoline stub's code: // - load the TOC // - load the call target from the constant pool // - call if (Rtoc == noreg) {
calculate_address_from_global_toc(reg_scratch, method_toc());
Rtoc = reg_scratch;
}
// Assert that the encoded destination_toc_offset can be identified and that it is correct.
assert(destination_toc_offset == NativeCallTrampolineStub_at(stub_start_addr)->destination_toc_offset(), "encoded offset into the constant pool must match"); // Trampoline_stub_size should be good.
assert((uint)(offset() - stub_start_offset) <= trampoline_stub_size, "should be good size");
assert(is_NativeCallTrampolineStub_at(stub_start_addr), "doesn't look like a trampoline");
// Update rtm_counters based on abort status // input: abort_status // rtm_counters_Reg (RTMLockingCounters*) void MacroAssembler::rtm_counters_update(Register abort_status, Register rtm_counters_Reg) { // Mapping to keep PreciseRTMLockingStatistics similar to x86. // x86 ppc (! means inverted, ? means not the same) // 0 31 Set if abort caused by XABORT instruction. // 1 ! 7 If set, the transaction may succeed on a retry. This bit is always clear if bit 0 is set. // 2 13 Set if another logical processor conflicted with a memory address that was part of the transaction that aborted. // 3 10 Set if an internal buffer overflowed. // 4 ?12 Set if a debug breakpoint was hit. // 5 ?32 Set if an abort occurred during execution of a nested transaction. constint failure_bit[] = {tm_tabort, // Signal handler will set this too.
tm_failure_persistent,
tm_non_trans_cf,
tm_trans_cf,
tm_footprint_of,
tm_failure_code,
tm_transaction_level};
constint bit2counter_map[][num_counters] = // 0 = no map; 1 = mapped, no inverted logic; -1 = mapped, inverted logic // Inverted logic means that if a bit is set don't count it, or vice-versa. // Care must be taken when mapping bits to counters as bits for a given // counter must be mutually exclusive. Otherwise, the counter will be // incremented more than once. // counters: // 0 1 2 3 4 5 // abort , persist, conflict, overflow, debug , nested bits:
{{ 1 , 0 , 0 , 0 , 0 , 0 }, // abort
{ 0 , -1 , 0 , 0 , 0 , 0 }, // failure_persistent
{ 0 , 0 , 1 , 0 , 0 , 0 }, // non_trans_cf
{ 0 , 0 , 1 , 0 , 0 , 0 }, // trans_cf
{ 0 , 0 , 0 , 1 , 0 , 0 }, // footprint_of
{ 0 , 0 , 0 , 0 , -1 , 0 }, // failure_code = 0xD4
{ 0 , 0 , 0 , 0 , 0 , 1 }}; // transaction_level > 1 // ...
// Move abort_status value to R0 and use abort_status register as a // temporary register because R0 as third operand in ld/std is treated // as base address zero (value). Likewise, R0 as second operand in addi // is problematic because it amounts to li. constRegister temp_Reg = abort_status; constRegister abort_status_R0 = R0;
mr(abort_status_R0, abort_status);
// Increment total abort counter. int counters_offs = RTMLockingCounters::abort_count_offset();
ld(temp_Reg, counters_offs, rtm_counters_Reg);
addi(temp_Reg, temp_Reg, 1);
std(temp_Reg, counters_offs, rtm_counters_Reg);
// Increment specific abort counters. if (PrintPreciseRTMLockingStatistics) {
// #0 counter offset. int abortX_offs = RTMLockingCounters::abortX_count_offset();
for (int nbit = 0; nbit < num_failure_bits; nbit++) { for (int ncounter = 0; ncounter < num_counters; ncounter++) { if (bit2counter_map[nbit][ncounter] != 0) {
Label check_abort; int abort_counter_offs = abortX_offs + (ncounter << 3);
if (failure_bit[nbit] == tm_transaction_level) { // Don't check outer transaction, TL = 1 (bit 63). Hence only // 11 bits in the TL field are checked to find out if failure // occurred in a nested transaction. This check also matches // the case when nesting_of = 1 (nesting overflow).
rldicr_(temp_Reg, abort_status_R0, failure_bit[nbit], 10);
} elseif (failure_bit[nbit] == tm_failure_code) { // Check failure code for trap or illegal caught in TM. // Bits 0:7 are tested as bit 7 (persistent) is copied from // tabort or treclaim source operand. // On Linux: trap or illegal is TM_CAUSE_SIGNAL (0xD4).
rldicl(temp_Reg, abort_status_R0, 8, 56);
cmpdi(CCR0, temp_Reg, 0xD4);
} else {
rldicr_(temp_Reg, abort_status_R0, failure_bit[nbit], 0);
}
assert(rtm_counters != NULL, "should not be NULL when profiling RTM"); // Update rtm counters based on state at abort. // Reads abort_status_Reg, updates flags.
assert_different_registers(abort_status_Reg, temp_Reg);
load_const_optimized(temp_Reg, (address)rtm_counters, R0);
rtm_counters_update(abort_status_Reg, temp_Reg); if (profile_rtm) {
assert(rtm_counters != NULL, "should not be NULL when profiling RTM");
rtm_abort_ratio_calculation(temp_Reg, rtm_counters, method_data);
}
}
// Retry on abort if abort's status indicates non-persistent failure. // inputs: retry_count_Reg // : abort_status_Reg // output: retry_count_Reg decremented by 1 void MacroAssembler::rtm_retry_lock_on_abort(Register retry_count_Reg, Register abort_status_Reg,
Label& retryLabel, Label* checkRetry) {
Label doneRetry;
// Don't retry if failure is persistent. // The persistent bit is set when a (A) Disallowed operation is performed in // transactional state, like for instance trying to write the TFHAR after a // transaction is started; or when there is (B) a Nesting Overflow (too many // nested transactions); or when (C) the Footprint overflows (too many // addresses touched in TM state so there is no more space in the footprint // area to track them); or in case of (D) a Self-Induced Conflict, i.e. a // store is performed to a given address in TM state, then once in suspended // state the same address is accessed. Failure (A) is very unlikely to occur // in the JVM. Failure (D) will never occur because Suspended state is never // used in the JVM. Thus mostly (B) a Nesting Overflow or (C) a Footprint // Overflow will set the persistent bit.
rldicr_(R0, abort_status_Reg, tm_failure_persistent, 0);
bne(CCR0, doneRetry);
// Don't retry if transaction was deliberately aborted, i.e. caused by a // tabort instruction.
rldicr_(R0, abort_status_Reg, tm_tabort, 0);
bne(CCR0, doneRetry);
// Retry if transaction aborted due to a conflict with another thread. if (checkRetry) { bind(*checkRetry); }
addic_(retry_count_Reg, retry_count_Reg, -1);
blt(CCR0, doneRetry);
b(retryLabel);
bind(doneRetry);
}
// Spin and retry if lock is busy. // inputs: owner_addr_Reg (monitor address) // : retry_count_Reg // output: retry_count_Reg decremented by 1 // CTR is killed void MacroAssembler::rtm_retry_lock_on_busy(Register retry_count_Reg, Register owner_addr_Reg, Label& retryLabel) {
Label SpinLoop, doneRetry, doRetry;
addic_(retry_count_Reg, retry_count_Reg, -1);
blt(CCR0, doneRetry);
if (RTMSpinLoopCount > 1) {
li(R0, RTMSpinLoopCount);
mtctr(R0);
}
// restore thread priority to default in userspace #ifdef LINUX
smt_prio_medium_low(); #else
smt_prio_medium(); #endif
b(retryLabel);
bind(doneRetry);
}
// Use RTM for normal stack locks. // Input: objReg (object to lock) void MacroAssembler::rtm_stack_locking(ConditionRegister flag, Register obj, Register mark_word, Register tmp, Register retry_on_abort_count_Reg,
RTMLockingCounters* stack_rtm_counters,
Metadata* method_data, bool profile_rtm,
Label& DONE_LABEL, Label& IsInflated) {
assert(UseRTMForStackLocks, "why call this otherwise?");
Label L_rtm_retry, L_decrement_retry, L_on_abort;
if (RTMRetryCount > 0) {
load_const_optimized(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort
bind(L_rtm_retry);
}
andi_(R0, mark_word, markWord::monitor_value); // inflated vs stack-locked|neutral
bne(CCR0, IsInflated);
if (PrintPreciseRTMLockingStatistics || profile_rtm) {
Label L_noincrement; if (RTMTotalCountIncrRate > 1) {
branch_on_random_using_tb(tmp, RTMTotalCountIncrRate, L_noincrement);
}
assert(stack_rtm_counters != NULL, "should not be NULL when profiling RTM");
load_const_optimized(tmp, (address)stack_rtm_counters->total_count_addr(), R0); //atomic_inc_ptr(tmp, /*temp, will be reloaded*/mark_word); We don't increment atomically
ldx(mark_word, tmp);
addi(mark_word, mark_word, 1);
stdx(mark_word, tmp);
bind(L_noincrement);
}
tbegin_();
beq(CCR0, L_on_abort);
ld(mark_word, oopDesc::mark_offset_in_bytes(), obj); // Reload in transaction, conflicts need to be tracked.
andi(R0, mark_word, markWord::lock_mask_in_place); // look at 2 lock bits
cmpwi(flag, R0, markWord::unlocked_value); // bits = 01 unlocked
beq(flag, DONE_LABEL); // all done if unlocked
if (UseRTMXendForLockBusy) {
tend_();
b(L_decrement_retry);
} else {
tabort_();
}
bind(L_on_abort); constRegister abort_status_Reg = tmp;
mftexasr(abort_status_Reg); if (PrintPreciseRTMLockingStatistics || profile_rtm) {
rtm_profiling(abort_status_Reg, /*temp*/mark_word, stack_rtm_counters, method_data, profile_rtm);
}
ld(mark_word, oopDesc::mark_offset_in_bytes(), obj); // reload if (RTMRetryCount > 0) { // Retry on lock abort if abort status is not permanent.
rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry, &L_decrement_retry);
} else {
bind(L_decrement_retry);
}
}
// Use RTM for inflating locks // inputs: obj (object to lock) // mark_word (current header - KILLED) // boxReg (on-stack box address (displaced header location) - KILLED) void MacroAssembler::rtm_inflated_locking(ConditionRegister flag, Register obj, Register mark_word, Register boxReg, Register retry_on_busy_count_Reg, Register retry_on_abort_count_Reg,
RTMLockingCounters* rtm_counters,
Metadata* method_data, bool profile_rtm,
Label& DONE_LABEL) {
assert(UseRTMLocking, "why call this otherwise?");
Label L_rtm_retry, L_decrement_retry, L_on_abort; // Clean monitor_value bit to get valid pointer. int owner_offset = ObjectMonitor::owner_offset_in_bytes() - markWord::monitor_value;
// Store non-null, using boxReg instead of (intptr_t)markWord::unused_mark().
std(boxReg, BasicLock::displaced_header_offset_in_bytes(), boxReg); constRegister tmpReg = boxReg; constRegister owner_addr_Reg = mark_word;
addi(owner_addr_Reg, mark_word, owner_offset);
if (RTMRetryCount > 0) {
load_const_optimized(retry_on_busy_count_Reg, RTMRetryCount); // Retry on lock busy.
load_const_optimized(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort.
bind(L_rtm_retry);
} if (PrintPreciseRTMLockingStatistics || profile_rtm) {
Label L_noincrement; if (RTMTotalCountIncrRate > 1) {
branch_on_random_using_tb(R0, RTMTotalCountIncrRate, L_noincrement);
}
assert(rtm_counters != NULL, "should not be NULL when profiling RTM");
load_const(R0, (address)rtm_counters->total_count_addr(), tmpReg); //atomic_inc_ptr(R0, tmpReg); We don't increment atomically
ldx(tmpReg, R0);
addi(tmpReg, tmpReg, 1);
stdx(tmpReg, R0);
bind(L_noincrement);
}
tbegin_();
beq(CCR0, L_on_abort); // We don't reload mark word. Will only be reset at safepoint.
ld(R0, 0, owner_addr_Reg); // Load in transaction, conflicts need to be tracked.
cmpdi(flag, R0, 0);
beq(flag, DONE_LABEL);
if (UseRTMXendForLockBusy) {
tend_();
b(L_decrement_retry);
} else {
tabort_();
}
bind(L_on_abort); constRegister abort_status_Reg = tmpReg;
mftexasr(abort_status_Reg); if (PrintPreciseRTMLockingStatistics || profile_rtm) {
rtm_profiling(abort_status_Reg, /*temp*/ owner_addr_Reg, rtm_counters, method_data, profile_rtm); // Restore owner_addr_Reg
ld(mark_word, oopDesc::mark_offset_in_bytes(), obj); #ifdef ASSERT
andi_(R0, mark_word, markWord::monitor_value);
asm_assert_ne("must be inflated"); // Deflating only allowed at safepoint. #endif
addi(owner_addr_Reg, mark_word, owner_offset);
} if (RTMRetryCount > 0) { // Retry on lock abort if abort status is not permanent.
rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry);
}
// Appears unlocked - try to swing _owner from null to non-null.
cmpxchgd(flag, /*current val*/ R0, (intptr_t)0, /*new val*/ R16_thread, owner_addr_Reg,
MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
MacroAssembler::cmpxchgx_hint_acquire_lock(), noreg, &L_decrement_retry, true);
if (RTMRetryCount > 0) { // success done else retry
b(DONE_LABEL);
bind(L_decrement_retry); // Spin and retry if lock is busy.
rtm_retry_lock_on_busy(retry_on_busy_count_Reg, owner_addr_Reg, L_rtm_retry);
} else {
bind(L_decrement_retry);
}
}
#endif// INCLUDE_RTM_OPT
// "The box" is the space on the stack where we copy the object mark. void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register oop, Register box, Register temp, Register displaced_header, Register current_header,
RTMLockingCounters* rtm_counters,
RTMLockingCounters* stack_rtm_counters,
Metadata* method_data, bool use_rtm, bool profile_rtm) {
assert_different_registers(oop, box, temp, displaced_header, current_header);
assert(flag != CCR0, "bad condition register");
Label cont;
Label object_has_monitor;
Label cas_failed;
Label success, failure;
// Load markWord from object into displaced_header.
ld(displaced_header, oopDesc::mark_offset_in_bytes(), oop);
// Handle existing monitor. // The object has an existing monitor iff (mark & monitor_value) != 0.
andi_(temp, displaced_header, markWord::monitor_value);
bne(CCR0, object_has_monitor);
if (!UseHeavyMonitors) { // Set displaced_header to be (markWord of object | UNLOCK_VALUE).
ori(displaced_header, displaced_header, markWord::unlocked_value);
// Load Compare Value application register.
// Initialize the box. (Must happen before we update the object mark!)
std(displaced_header, BasicLock::displaced_header_offset_in_bytes(), box);
// Must fence, otherwise, preceding store(s) may float below cmpxchg. // Compare object markWord with mark and if equal exchange scratch1 with object markWord.
cmpxchgd(/*flag=*/flag, /*current_value=*/current_header, /*compare_value=*/displaced_header, /*exchange_value=*/box, /*where=*/oop,
MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
MacroAssembler::cmpxchgx_hint_acquire_lock(),
noreg,
&cas_failed, /*check without membar and ldarx first*/true);
assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); // If the compare-and-exchange succeeded, then we found an unlocked // object and we have now locked it.
b(success);
} else { // Set NE to indicate 'failure' -> take slow-path.
crandc(flag, Assembler::equal, flag, Assembler::equal);
b(failure);
}
bind(cas_failed); // We did not see an unlocked object so try the fast recursive case.
// Check if the owner is self by comparing the value in the markWord of object // (current_header) with the stack pointer.
sub(current_header, current_header, R1_SP);
load_const_optimized(temp, ~(os::vm_page_size()-1) | markWord::lock_mask_in_place);
and_(R0/*==0?*/, current_header, temp); // If condition is true we are cont and hence we can store 0 as the // displaced header in the box, which indicates that it is a recursive lock.
mcrf(flag,CCR0);
std(R0/*==0, perhaps*/, BasicLock::displaced_header_offset_in_bytes(), box);
b(cont);
// Handle existing monitor.
bind(object_has_monitor); // The object's monitor m is unlocked iff m->owner == NULL, // otherwise m->owner may contain a thread or a stack address.
#if INCLUDE_RTM_OPT // Use the same RTM locking code in 32- and 64-bit VM. if (use_rtm) {
rtm_inflated_locking(flag, oop, displaced_header, box, temp, /*temp*/ current_header,
rtm_counters, method_data, profile_rtm, cont);
} else { #endif// INCLUDE_RTM_OPT
// Try to CAS m->owner from NULL to current thread.
addi(temp, displaced_header, ObjectMonitor::owner_offset_in_bytes()-markWord::monitor_value);
cmpxchgd(/*flag=*/flag, /*current_value=*/current_header, /*compare_value=*/(intptr_t)0, /*exchange_value=*/R16_thread, /*where=*/temp,
MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
MacroAssembler::cmpxchgx_hint_acquire_lock());
// Store a non-null value into the box.
std(box, BasicLock::displaced_header_offset_in_bytes(), box);
beq(flag, success);
// Check for recursive locking.
cmpd(flag, current_header, R16_thread);
bne(flag, failure);
// Current thread already owns the lock. Just increment recursions. Register recursions = displaced_header;
ld(recursions, ObjectMonitor::recursions_offset_in_bytes()-ObjectMonitor::owner_offset_in_bytes(), temp);
addi(recursions, recursions, 1);
std(recursions, ObjectMonitor::recursions_offset_in_bytes()-ObjectMonitor::owner_offset_in_bytes(), temp);
#if INCLUDE_RTM_OPT
} // use_rtm() #endif
bind(cont); // flag == EQ indicates success, increment held monitor count // flag == NE indicates failure
bne(flag, failure);
bind(success);
inc_held_monitor_count(temp);
bind(failure);
}
if (!UseHeavyMonitors) { // Find the lock address and load the displaced header from the stack.
ld(displaced_header, BasicLock::displaced_header_offset_in_bytes(), box);
// If the displaced header is 0, we have a recursive unlock.
cmpdi(flag, displaced_header, 0);
beq(flag, success);
}
// Handle existing monitor. // The object has an existing monitor iff (mark & monitor_value) != 0.
RTM_OPT_ONLY( if (!(UseRTMForStackLocks && use_rtm)) ) // skip load if already done
ld(current_header, oopDesc::mark_offset_in_bytes(), oop);
andi_(R0, current_header, markWord::monitor_value);
bne(CCR0, object_has_monitor);
if (!UseHeavyMonitors) { // Check if it is still a light weight lock, this is is true if we see // the stack address of the basicLock in the markWord of the object. // Cmpxchg sets flag to cmpd(current_header, box).
cmpxchgd(/*flag=*/flag, /*current_value=*/current_header, /*compare_value=*/box, /*exchange_value=*/displaced_header, /*where=*/oop,
MacroAssembler::MemBarRel,
MacroAssembler::cmpxchgx_hint_release_lock(),
noreg,
&failure);
assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
b(success);
} else { // Set NE to indicate 'failure' -> take slow-path.
crandc(flag, Assembler::equal, flag, Assembler::equal);
b(failure);
}
addic_(displaced_header, displaced_header, -1);
blt(CCR0, notRecursive); // Not recursive if negative after decrement.
std(displaced_header, ObjectMonitor::recursions_offset_in_bytes(), current_header);
b(success); // flag is already EQ here.
bind(notRecursive);
ld(temp, ObjectMonitor::EntryList_offset_in_bytes(), current_header);
ld(displaced_header, ObjectMonitor::cxq_offset_in_bytes(), current_header);
orr(temp, temp, displaced_header); // Will be 0 if both are 0.
cmpdi(flag, temp, 0);
bne(flag, failure);
release();
std(temp, ObjectMonitor::owner_offset_in_bytes(), current_header);
// flag == EQ indicates success, decrement held monitor count // flag == NE indicates failure
bind(success);
dec_held_monitor_count(temp);
bind(failure);
}
if (at_return) { if (in_nmethod) { if (UseSIGTRAP) { // Use Signal Handler.
relocate(relocInfo::poll_return_type);
td(traptoGreaterThanUnsigned, R1_SP, temp);
} else {
cmpld(CCR0, R1_SP, temp); // Stub may be out of range for short conditional branch.
bc_far_optimized(Assembler::bcondCRbiIs1, bi0(CCR0, Assembler::greater), slow_path);
}
} else { // Not in nmethod. // Frame still on stack, need to get fp. Register fp = R0;
ld(fp, _abi0(callers_sp), R1_SP);
cmpld(CCR0, fp, temp);
bgt(CCR0, slow_path);
}
} else { // Normal safepoint poll. Not at return.
assert(!in_nmethod, "should use load_from_polling_page");
andi_(temp, temp, SafepointMechanism::poll_bit());
bne(CCR0, slow_path);
}
}
// Values for last_Java_pc, and last_Java_sp must comply to the rules // in frame_ppc.hpp. void MacroAssembler::set_last_Java_frame(Register last_Java_sp, Register last_Java_pc) { // Always set last_Java_pc and flags first because once last_Java_sp // is visible has_last_Java_frame is true and users will look at the // rest of the fields. (Note: flags should always be zero before we // get here so doesn't need to be set.)
// Verify that last_Java_pc was zeroed on return to Java
asm_assert_mem8_is_zero(in_bytes(JavaThread::last_Java_pc_offset()), R16_thread, "last_Java_pc not zeroed before leaving Java");
// When returning from calling out from Java mode the frame anchor's // last_Java_pc will always be set to NULL. It is set here so that // if we are doing a call to native (not VM) that we capture the // known pc and don't have to rely on the native call having a // standard frame linkage where we can find the pc. if (last_Java_pc != noreg)
std(last_Java_pc, in_bytes(JavaThread::last_Java_pc_offset()), R16_thread);
// Set last_Java_sp last.
std(last_Java_sp, in_bytes(JavaThread::last_Java_sp_offset()), R16_thread);
}
void MacroAssembler::reset_last_Java_frame(void) {
asm_assert_mem8_isnot_zero(in_bytes(JavaThread::last_Java_sp_offset()),
R16_thread, "SP was not set, still zero");
// sp points to a TOP_IJAVA_FRAME, retrieve frame's PC via // TOP_IJAVA_FRAME_ABI. // FIXME: assert that we really have a TOP_IJAVA_FRAME here!
address entry = pc();
load_const_optimized(tmp1, entry);
Register MacroAssembler::encode_klass_not_null(Register dst, Register src) { Register current = (src != noreg) ? src : dst; // Klass is in dst if no src provided. if (CompressedKlassPointers::base() != 0) { // Use dst as temp if it is free.
sub_const_optimized(dst, current, CompressedKlassPointers::base(), R0);
current = dst;
} if (CompressedKlassPointers::shift() != 0) {
srdi(dst, current, CompressedKlassPointers::shift());
current = dst;
} return current;
}
void MacroAssembler::store_klass_gap(Register dst_oop, Register val) { if (UseCompressedClassPointers) { if (val == noreg) {
val = R0;
li(val, 0);
}
stw(val, oopDesc::klass_gap_offset_in_bytes(), dst_oop); // klass gap if compressed
}
}
int MacroAssembler::instr_size_for_decode_klass_not_null() { staticint computed_size = -1;
// Not yet computed? if (computed_size == -1) {
if (!UseCompressedClassPointers) {
computed_size = 0;
} else { // Determine by scratch emit.
ResourceMark rm; int code_size = 8 * BytesPerInstWord;
CodeBuffer cb("decode_klass_not_null scratch buffer", code_size, 0);
MacroAssembler* a = new MacroAssembler(&cb);
a->decode_klass_not_null(R11_scratch1);
computed_size = a->offset();
}
}
return computed_size;
}
void MacroAssembler::decode_klass_not_null(Register dst, Register src) {
assert(dst != R0, "Dst reg may not be R0, as R0 is used here."); if (src == noreg) src = dst; Register shifted_src = src; if (CompressedKlassPointers::shift() != 0 ||
CompressedKlassPointers::base() == 0 && src != dst) { // Move required.
shifted_src = dst;
sldi(shifted_src, src, CompressedKlassPointers::shift());
} if (CompressedKlassPointers::base() != 0) {
add_const_optimized(dst, shifted_src, CompressedKlassPointers::base(), R0);
}
}
// Clear Array // For very short arrays. tmp == R0 is allowed. void MacroAssembler::clear_memory_unrolled(Register base_ptr, int cnt_dwords, Register tmp, int offset) { if (cnt_dwords > 0) { li(tmp, 0); } for (int i = 0; i < cnt_dwords; ++i) { std(tmp, offset + i * 8, base_ptr); }
}
// Version for constant short array length. Kills base_ptr. tmp == R0 is allowed. void MacroAssembler::clear_memory_constlen(Register base_ptr, int cnt_dwords, Register tmp) { if (cnt_dwords < 8) {
clear_memory_unrolled(base_ptr, cnt_dwords, tmp); return;
}
// Helpers for Intrinsic Emitters // // Revert the byte order of a 32bit value in a register // src: 0x44556677 // dst: 0x77665544 // Three steps to obtain the result: // 1) Rotate src (as doubleword) left 5 bytes. That puts the leftmost byte of the src word // into the rightmost byte position. Afterwards, everything left of the rightmost byte is cleared. // This value initializes dst. // 2) Rotate src (as word) left 3 bytes. That puts the rightmost byte of the src word into the leftmost // byte position. Furthermore, byte 5 is rotated into byte 6 position where it is supposed to go. // This value is mask inserted into dst with a [0..23] mask of 1s. // 3) Rotate src (as word) left 1 byte. That puts byte 6 into byte 5 position. // This value is mask inserted into dst with a [8..15] mask of 1s. void MacroAssembler::load_reverse_32(Register dst, Register src) {
assert_different_registers(dst, src);
rldicl(dst, src, (4+1)*8, 56); // Rotate byte 4 into position 7 (rightmost), clear all to the left.
rlwimi(dst, src, 3*8, 0, 23); // Insert byte 5 into position 6, 7 into 4, leave pos 7 alone.
rlwimi(dst, src, 1*8, 8, 15); // Insert byte 6 into position 5, leave the rest alone.
}
// Calculate the column addresses of the crc32 lookup table into distinct registers. // This loop-invariant calculation is moved out of the loop body, reducing the loop // body size from 20 to 16 instructions. // Returns the offset that was used to calculate the address of column tc3. // Due to register shortage, setting tc3 may overwrite table. With the return offset // at hand, the original table address can be easily reconstructed. int MacroAssembler::crc32_table_columns(Register table, Register tc0, Register tc1, Register tc2, Register tc3) {
assert(!VM_Version::has_vpmsumb(), "Vector version should be used instead!");
if (crc == val) { // Must rotate first to use the unmodified value.
rlwinm(tmp, val, 2, 24-2, 31-2); // Insert (rightmost) byte 7 of val, shifted left by 2, into byte 6..7 of tmp, clear the rest. // As we use a word (4-byte) instruction, we have to adapt the mask bit positions.
srwi(crc, crc, 8); // Unsigned shift, clear leftmost 8 bits.
} else {
srwi(crc, crc, 8); // Unsigned shift, clear leftmost 8 bits.
rlwinm(tmp, val, 2, 24-2, 31-2); // Insert (rightmost) byte 7 of val, shifted left by 2, into byte 6..7 of tmp, clear the rest.
}
lwzx(tmp, table, tmp);
xorr(crc, crc, tmp);
}
/** * Emits code to update CRC-32 with a byte value according to constants in table. * * @param [in,out]crc Register containing the crc. * @param [in]val Register containing the byte to fold into the CRC. * @param [in]table Register containing the table of crc constants. * * uint32_t crc; * val = crc_table[(val ^ crc) & 0xFF]; * crc = val ^ (crc >> 8);
*/ void MacroAssembler::update_byte_crc32(Register crc, Register val, Register table) {
BLOCK_COMMENT("update_byte_crc32:");
xorr(val, val, crc);
fold_byte_crc32(crc, val, table, val);
}
/** * Emits code to update CRC-32 with a 4-byte value according to constants in table * Implementation according to jdk/src/share/native/java/util/zip/zlib-1.2.8/crc32.c
*/ // A note on the lookup table address(es): // The implementation uses 4 table columns (byte-reversed versions for Big Endian). // To save the effort of adding the column offset to the table address each time // a table element is looked up, it is possible to pass the pre-calculated // column addresses. // Uses R9..R12 as work register. Must be saved/restored by caller, if necessary. void MacroAssembler::update_1word_crc32(Register crc, Register buf, Register table, intbufDisp, int bufInc, Register t0, Register t1, Register t2, Register t3, Register tc0, Register tc1, Register tc2, Register tc3) {
assert_different_registers(crc, t3);
// XOR crc with next four bytes of buffer.
lwz(t3, bufDisp, buf); if (bufInc != 0) {
addi(buf, buf, bufInc);
}
xorr(t3, t3, crc);
// Don't test for len <= 0 here. This pathological case should not occur anyway. // Optimizing for it by adding a test and a branch seems to be a waste of CPU cycles // for all well-behaved cases. The situation itself is detected and handled correctly // within update_byteLoop_crc32.
assert(tailLoop_stepping == 1, "check tailLoop_stepping!");
BLOCK_COMMENT("kernel_crc32_1word {");
if (invertCRC) {
nand(crc, crc, crc); // 1s complement of crc
}
// Check for short (<mainLoop_stepping) buffer.
cmpdi(CCR0, len, complexThreshold);
blt(CCR0, L_tail);
// Pre-mainLoop alignment did show a slight (1%) positive effect on performance. // We leave the code in for reference. Maybe we need alignment when we exploit vector instructions.
{ // Align buf addr to mainLoop_stepping boundary.
neg(tmp2, buf); // Calculate # preLoop iterations for alignment.
rldicl(tmp2, tmp2, 0, 64-log_stepping); // Rotate tmp2 0 bits, insert into tmp2, anding with mask with 1s from 62..63.
if (complexThreshold > mainLoop_stepping) {
sub(len, len, tmp2); // Remaining bytes for main loop (>=mainLoop_stepping is guaranteed).
} else {
sub(tmp, len, tmp2); // Remaining bytes for main loop.
cmpdi(CCR0, tmp, mainLoop_stepping);
blt(CCR0, L_tail); // For less than one mainloop_stepping left, do only tail processing
mr(len, tmp); // remaining bytes for main loop (>=mainLoop_stepping is guaranteed).
}
update_byteLoop_crc32(crc, buf, tmp2, table, data, false);
}
srdi(tmp2, len, log_stepping); // #iterations for mainLoop
andi(len, len, mainLoop_stepping-1); // remaining bytes for tailLoop
mtctr(tmp2);
#ifdef VM_LITTLE_ENDIAN Register crc_rv = crc; #else Register crc_rv = tmp; // Load_reverse needs separate registers to work on. // Occupies tmp, but frees up crc.
load_reverse_32(crc_rv, crc); // Revert byte order because we are dealing with big-endian data.
tmp = crc; #endif
int reconstructTableOffset = crc32_table_columns(table, tc0, tc1, tc2, tc3);
#ifndef VM_LITTLE_ENDIAN
load_reverse_32(crc, crc_rv); // Revert byte order because we are dealing with big-endian data.
tmp = crc_rv; // Tmp uses it's original register again. #endif
// Restore original table address for tailLoop. if (reconstructTableOffset != 0) {
addi(table, table, -reconstructTableOffset);
}
// Process last few (<complexThreshold) bytes of buffer.
BIND(L_tail);
update_byteLoop_crc32(crc, buf, len, table, data, false);
if (invertCRC) {
nand(crc, crc, crc); // 1s complement of crc
}
BLOCK_COMMENT("} kernel_crc32_1word");
}
// Calculate from first aligned address as far as possible.
addi(constants, constants, CRC32_TABLE_SIZE); // Point to vector constants.
kernel_crc32_vpmsum_aligned(crc, buf, len, constants, t0, t1, t2, t3, t4, t5, t6);
addi(constants, constants, -CRC32_TABLE_SIZE); // Point to table again.
// Implementation uses an inner loop which uses between 256 and 16 * unroll_factor // bytes per iteration. The basic scheme is: // lvx: load vector (Big Endian needs reversal) // vpmsumw: carry-less 32 bit multiplications with constant representing a large CRC shift // vxor: xor partial results together to get unroll_factor2 vectors
// Outer loop performs the CRC shifts needed to combine the unroll_factor2 vectors.
// Using 16 * unroll_factor / unroll_factor_2 bytes for constants. constint unroll_factor = CRC32_UNROLL_FACTOR,
unroll_factor2 = CRC32_UNROLL_FACTOR2;
// Tail of last iteration (no loads). for (int i = 0; i < unroll_factor2 / 2; ++i) {
BE_swap_bytes(data1[i + unroll_factor2 / 2]);
vxor(data0[i], data0[i], data1[i]);
vpmsumw(data1[i + unroll_factor2 / 2], data1[i + unroll_factor2 / 2], consts1[1]);
} for (int i = 0; i < unroll_factor2 / 2; ++i) {
vpmsumw(data0[i], data0[i], consts0[unroll_factor2 - 2 - i]); // First half of fixup shifts.
vxor(data0[i + unroll_factor2 / 2], data0[i + unroll_factor2 / 2], data1[i + unroll_factor2 / 2]);
}
// Last data register is ok, other ones need fixup shift. for (int i = unroll_factor2 / 2; i < unroll_factor2 - 1; ++i) {
vpmsumw(data0[i], data0[i], consts0[unroll_factor2 - 2 - i]);
}
// Combine to 128 bit result vector VCRC = data0[0]. for (int i = 1; i < unroll_factor2; i<<=1) { for (int j = 0; j <= unroll_factor2 - 2*i; j+=2*i) {
vxor(data0[j], data0[j], data0[j+i]);
}
}
cmpd(CCR0, len, num_bytes);
bge(CCR0, L_outer_loop);
// Last chance with lower num_bytes.
bind(L_last);
srdi(loop_count, len, exact_log2(16 * 2 * unroll_factor2)); // Use double-iterations. // Point behind last const for inner loop.
add_const_optimized(cur_const, constants, outer_consts_size + inner_consts_size);
sldi(R0, loop_count, exact_log2(16 * 2)); // Bytes of constants to be used.
clrrdi(num_bytes, len, exact_log2(16 * 2 * unroll_factor2));
subf(cur_const, R0, cur_const); // Point to constant to be used first.
addic_(loop_count, loop_count, -1); // One double-iteration peeled off.
bgt(CCR0, L_outer_loop); // ********** Main loop end **********
// Point to const (same as last const for inner loop).
add_const_optimized(cur_const, constants, outer_consts_size + inner_consts_size - 16);
mtctr(t0);
lvx(Vtmp2, cur_const);
align(32);
bind(L_loop);
lvx(Vtmp, buf);
addi(buf, buf, 16);
vpermxor(VCRC, VCRC, VCRC, Vc); // xor both halves to 64 bit result.
BE_swap_bytes(Vtmp);
vxor(VCRC, VCRC, Vtmp);
vpmsumw(VCRC, VCRC, Vtmp2);
bdnz(L_loop);
bind(L_done);
} // ********** Simple loop end ********** #undef BE_swap_bytes
// Point to Barrett constants
add_const_optimized(cur_const, constants, outer_consts_size + inner_consts_size);
vspltisb(zeroes, 0);
// Combine to 64 bit result.
vpermxor(VCRC, VCRC, VCRC, Vc); // xor both halves to 64 bit result.
// Reduce to 32 bit CRC: Remainder by multiply-high.
lvx(Vtmp, cur_const);
vsldoi(Vtmp2, zeroes, VCRC, 12); // Extract high 32 bit.
vpmsumd(Vtmp2, Vtmp2, Vtmp); // Multiply by inverse long poly.
vsldoi(Vtmp2, zeroes, Vtmp2, 12); // Extract high 32 bit.
vsldoi(Vtmp, zeroes, Vtmp, 8);
vpmsumd(Vtmp2, Vtmp2, Vtmp); // Multiply quotient by long poly.
vxor(VCRC, VCRC, Vtmp2); // Remainder fits into 32 bit.
// Move result. len is already updated.
vsldoi(VCRC, VCRC, zeroes, 8);
mfvrd(crc, VCRC);
if (msg_present) {
type |= stop_msg_present;
}
tdi_unchecked(traptoUnconditional, 0/*reg 0*/, type); if (msg_present) {
emit_int64((uintptr_t)msg);
}
block_comment("} stop;");
}
#ifndef PRODUCT // Write pattern 0x0101010101010101 in memory region [low-before, high+after]. // Val, addr are temp registers. // If low == addr, addr is killed. // High is preserved. void MacroAssembler::zap_from_to(Register low, int before, Register high, int after, Register val, Register addr) { if (!ZapMemory) return;
assert_different_registers(low, val);
BLOCK_COMMENT("zap memory region {");
load_const_optimized(val, 0x0101010101010101); int size = before + after; if (low == high && size < 5 && size > 0) { int offset = -before*BytesPerWord; for (int i = 0; i < size; ++i) {
std(val, offset, low);
offset += (1*BytesPerWord);
}
} else {
addi(addr, low, -before*BytesPerWord);
assert_different_registers(high, val); if (after) addi(high, high, after * BytesPerWord);
Label loop;
bind(loop);
std(val, 0, addr);
addi(addr, addr, 8);
cmpd(CCR6, addr, high);
ble(CCR6, loop); if (after) addi(high, high, -after * BytesPerWord); // Correct back to old value.
}
BLOCK_COMMENT("} zap memory region");
}
void MacroAssembler::cache_wb(Address line) {
assert(line.index() == noreg, "index should be noreg");
assert(line.disp() == 0, "displacement should be 0");
assert(VM_Version::supports_data_cache_line_flush(), "CPU or OS does not support flush to persistent memory"); // Data Cache Store, not really a flush, so it works like a sync of cache // line and persistent mem, i.e. copying the cache line to persistent whilst // not invalidating the cache line.
dcbst(line.base());
}
void MacroAssembler::cache_wbsync(bool is_presync) {
assert(VM_Version::supports_data_cache_line_flush(), "CPU or OS does not support sync related to persistent memory"); // We only need a post sync barrier. Post means _after_ a cache line flush or // store instruction, pre means a barrier emitted before such a instructions. if (!is_presync) {
fence();
}
}
¤ Diese beiden folgenden Angebotsgruppen bietet das Unternehmen0.71Angebot
(Wie Sie bei der Firma Beratungs- und Dienstleistungen beauftragen können 2026-04-26)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.