/* * Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. *
*/
// expression stack // (Note: Must not use symmetric equivalents at_rsp_m1/2 since they store // data beyond the rsp which is potentially unsafe in an MT environment; // an interrupt may overwrite that data.) staticinline Address at_rsp () { return Address(rsp, 0);
}
// At top of Java expression stack which may be different than esp(). It // isn't for category 1 objects. staticinline Address at_tos () { return Address(rsp, Interpreter::expr_offset_in_bytes(0));
}
void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg, Register temp_reg, bool load_bc_into_bc_reg/*=true*/, int byte_no) { if (!RewriteBytecodes) return;
Label L_patch_done;
switch (bc) { case Bytecodes::_fast_aputfield: case Bytecodes::_fast_bputfield: case Bytecodes::_fast_zputfield: case Bytecodes::_fast_cputfield: case Bytecodes::_fast_dputfield: case Bytecodes::_fast_fputfield: case Bytecodes::_fast_iputfield: case Bytecodes::_fast_lputfield: case Bytecodes::_fast_sputfield:
{ // We skip bytecode quickening for putfield instructions when // the put_code written to the constant pool cache is zero. // This is required so that every execution of this instruction // calls out to InterpreterRuntime::resolve_get_put to do // additional, required work.
assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
assert(load_bc_into_bc_reg, "we use bc_reg as temp");
__ get_cache_and_index_and_bytecode_at_bcp(temp_reg, bc_reg, temp_reg, byte_no, 1);
__ movl(bc_reg, bc);
__ cmpl(temp_reg, (int) 0);
__ jcc(Assembler::zero, L_patch_done); // don't patch
} break; default:
assert(byte_no == -1, "sanity"); // the pair bytecodes have already done the load. if (load_bc_into_bc_reg) {
__ movl(bc_reg, bc);
}
}
if (JvmtiExport::can_post_breakpoint()) {
Label L_fast_patch; // if a breakpoint is present we can't rewrite the stream directly
__ movzbl(temp_reg, at_bcp(0));
__ cmpl(temp_reg, Bytecodes::_breakpoint);
__ jcc(Assembler::notEqual, L_fast_patch);
__ get_method(temp_reg); // Let breakpoint table handling rewrite to quicker bytecode
__ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::set_original_bytecode_at), temp_reg, rbcp, bc_reg); #ifndef ASSERT
__ jmpb(L_patch_done); #else
__ jmp(L_patch_done); #endif
__ bind(L_fast_patch);
}
// get type
__ movzbl(rdx, Address(rax, rbx, Address::times_1, tags_offset));
// unresolved class - get the resolved class
__ cmpl(rdx, JVM_CONSTANT_UnresolvedClass);
__ jccb(Assembler::equal, call_ldc);
// unresolved class in error state - call into runtime to throw the error // from the first resolution attempt
__ cmpl(rdx, JVM_CONSTANT_UnresolvedClassInError);
__ jccb(Assembler::equal, call_ldc);
// resolved class - need to call vm to get java mirror of the class
__ cmpl(rdx, JVM_CONSTANT_Class);
__ jcc(Assembler::notEqual, notClass);
// first time invocation - must resolve first
__ movl(rarg, (int)bytecode());
__ call_VM(result, entry, rarg);
__ bind(resolved);
{ // Check for the null sentinel. // If we just called the VM, it already did the mapping for us, // but it's harmless to retry.
Label notNull;
ExternalAddress null_sentinel((address)Universe::the_null_sentinel_addr());
__ movptr(tmp, null_sentinel);
__ resolve_oop_handle(tmp, rscratch2);
__ cmpoop(tmp, result);
__ jccb(Assembler::notEqual, notNull);
__ xorptr(result, result); // NULL object reference
__ bind(notNull);
}
// get next byte
__ load_unsigned_byte(rbx,
at_bcp(Bytecodes::length_for(Bytecodes::_iload))); // if _iload, wait to rewrite to iload2. We only want to rewrite the // last two iloads in a pair. Comparing against fast_iload means that // the next bytecode is neither an iload or a caload, and therefore // an iload pair.
__ cmpl(rbx, Bytecodes::_iload);
__ jcc(Assembler::equal, done);
// iload followed by caload frequent pair void TemplateTable::fast_icaload() {
transition(vtos, itos); // load index out of locals
locals_index(rbx);
__ movl(rax, iaddress(rbx));
void TemplateTable::aload_0_internal(RewriteControl rc) {
transition(vtos, atos); // According to bytecode histograms, the pairs: // // _aload_0, _fast_igetfield // _aload_0, _fast_agetfield // _aload_0, _fast_fgetfield // // occur frequently. If RewriteFrequentPairs is set, the (slow) // _aload_0 bytecode checks if the next bytecode is either // _fast_igetfield, _fast_agetfield or _fast_fgetfield and then // rewrites the current bytecode into a pair bytecode; otherwise it // rewrites the current bytecode into _fast_aload_0 that doesn't do // the pair check anymore. // // Note: If the next bytecode is _getfield, the rewrite must be // delayed, otherwise we may miss an opportunity for a pair. // // Also rewrite frequent pairs // aload_0, aload_1 // aload_0, iload_1 // These bytecodes with a small amount of code are most profitable // to rewrite if (RewriteFrequentPairs && rc == may_rewrite) {
Label rewrite, done;
constRegister bc = LP64_ONLY(c_rarg3) NOT_LP64(rcx);
LP64_ONLY(assert(rbx != bc, "register damaged"));
// get next byte
__ load_unsigned_byte(rbx, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0)));
// if _getfield then wait with rewrite
__ cmpl(rbx, Bytecodes::_getfield);
__ jcc(Assembler::equal, done);
// if _igetfield then rewrite to _fast_iaccess_0
assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) == Bytecodes::_aload_0, "fix bytecode definition");
__ cmpl(rbx, Bytecodes::_fast_igetfield);
__ movl(bc, Bytecodes::_fast_iaccess_0);
__ jccb(Assembler::equal, rewrite);
// if _agetfield then rewrite to _fast_aaccess_0
assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) == Bytecodes::_aload_0, "fix bytecode definition");
__ cmpl(rbx, Bytecodes::_fast_agetfield);
__ movl(bc, Bytecodes::_fast_aaccess_0);
__ jccb(Assembler::equal, rewrite);
// if _fgetfield then rewrite to _fast_faccess_0
assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) == Bytecodes::_aload_0, "fix bytecode definition");
__ cmpl(rbx, Bytecodes::_fast_fgetfield);
__ movl(bc, Bytecodes::_fast_faccess_0);
__ jccb(Assembler::equal, rewrite);
// Generate subtype check. Blows rcx, rdi // Superklass in rax. Subklass in rbx.
__ gen_subtype_check(rbx, ok_is_subtype);
// Come here on failure // object is at TOS
__ jump(ExternalAddress(Interpreter::_throw_ArrayStoreException_entry));
// Come here on success
__ bind(ok_is_subtype);
// Get the value we will store
__ movptr(rax, at_tos());
__ movl(rcx, at_tos_p1()); // index // Now store using the appropriate barrier
do_oop_store(_masm, element_address, rax, IS_ARRAY);
__ jmp(done);
// Have a NULL in rax, rdx=array, ecx=index. Store NULL at ary[idx]
__ bind(is_null);
__ profile_null_seen(rbx);
// Store a NULL
do_oop_store(_masm, element_address, noreg, IS_ARRAY);
void TemplateTable::bastore() {
transition(itos, vtos);
__ pop_i(rbx); // rax: value // rbx: index // rdx: array
index_check(rdx, rbx); // prefer index in rbx // Need to check whether array is boolean or byte // since both types share the bastore bytecode.
__ load_klass(rcx, rdx, rscratch1);
__ movl(rcx, Address(rcx, Klass::layout_helper_offset())); int diffbit = Klass::layout_helper_boolean_diffbit();
__ testl(rcx, diffbit);
Label L_skip;
__ jccb(Assembler::zero, L_skip);
__ andl(rax, 1); // if it is a T_BOOLEAN array, mask the stored value to 0/1
__ bind(L_skip);
__ access_store_at(T_BYTE, IN_HEAP | IS_ARRAY,
Address(rdx, rbx,Address::times_1,
arrayOopDesc::base_offset_in_bytes(T_BYTE)),
rax, noreg, noreg, noreg);
}
void TemplateTable::castore() {
transition(itos, vtos);
__ pop_i(rbx); // rax: value // rbx: index // rdx: array
index_check(rdx, rbx); // prefer index in rbx
__ access_store_at(T_CHAR, IN_HEAP | IS_ARRAY,
Address(rdx, rbx, Address::times_2,
arrayOopDesc::base_offset_in_bytes(T_CHAR)),
rax, noreg, noreg, noreg);
}
void TemplateTable::dup() {
transition(vtos, vtos);
__ load_ptr(0, rax);
__ push_ptr(rax); // stack: ..., a, a
}
void TemplateTable::dup_x1() {
transition(vtos, vtos); // stack: ..., a, b
__ load_ptr( 0, rax); // load b
__ load_ptr( 1, rcx); // load a
__ store_ptr(1, rax); // store b
__ store_ptr(0, rcx); // store a
__ push_ptr(rax); // push b // stack: ..., b, a, b
}
void TemplateTable::dup_x2() {
transition(vtos, vtos); // stack: ..., a, b, c
__ load_ptr( 0, rax); // load c
__ load_ptr( 2, rcx); // load a
__ store_ptr(2, rax); // store c in a
__ push_ptr(rax); // push c // stack: ..., c, b, c, c
__ load_ptr( 2, rax); // load b
__ store_ptr(2, rcx); // store a in b // stack: ..., c, a, c, c
__ store_ptr(1, rax); // store b in c // stack: ..., c, a, b, c
}
void TemplateTable::dup2() {
transition(vtos, vtos); // stack: ..., a, b
__ load_ptr(1, rax); // load a
__ push_ptr(rax); // push a
__ load_ptr(1, rax); // load b
__ push_ptr(rax); // push b // stack: ..., a, b, a, b
}
void TemplateTable::dup2_x1() {
transition(vtos, vtos); // stack: ..., a, b, c
__ load_ptr( 0, rcx); // load c
__ load_ptr( 1, rax); // load b
__ push_ptr(rax); // push b
__ push_ptr(rcx); // push c // stack: ..., a, b, c, b, c
__ store_ptr(3, rcx); // store c in b // stack: ..., a, c, c, b, c
__ load_ptr( 4, rcx); // load a
__ store_ptr(2, rcx); // store a in 2nd c // stack: ..., a, c, a, b, c
__ store_ptr(4, rax); // store b in a // stack: ..., b, c, a, b, c
}
void TemplateTable::dup2_x2() {
transition(vtos, vtos); // stack: ..., a, b, c, d
__ load_ptr( 0, rcx); // load d
__ load_ptr( 1, rax); // load c
__ push_ptr(rax); // push c
__ push_ptr(rcx); // push d // stack: ..., a, b, c, d, c, d
__ load_ptr( 4, rax); // load b
__ store_ptr(2, rax); // store b in d
__ store_ptr(4, rcx); // store d in b // stack: ..., a, d, c, b, c, d
__ load_ptr( 5, rcx); // load a
__ load_ptr( 3, rax); // load c
__ store_ptr(3, rcx); // store a in c
__ store_ptr(5, rax); // store c in a // stack: ..., c, d, a, b, c, d
}
void TemplateTable::swap() {
transition(vtos, vtos); // stack: ..., a, b
__ load_ptr( 1, rcx); // load a
__ load_ptr( 0, rax); // load b
__ store_ptr(0, rcx); // store a in b
__ store_ptr(1, rax); // store b in a // stack: ..., b, a
}
void TemplateTable::idiv() {
transition(itos, itos);
__ movl(rcx, rax);
__ pop_i(rax); // Note: could xor rax and ecx and compare with (-1 ^ min_int). If // they are not equal, one could do a normal division (no correction // needed), which may speed up this implementation for the common case. // (see also JVM spec., p.243 & p.271)
__ corrected_idivl(rcx);
}
void TemplateTable::irem() {
transition(itos, itos);
__ movl(rcx, rax);
__ pop_i(rax); // Note: could xor rax and ecx and compare with (-1 ^ min_int). If // they are not equal, one could do a normal division (no correction // needed), which may speed up this implementation for the common case. // (see also JVM spec., p.243 & p.271)
__ corrected_idivl(rcx);
__ movl(rax, rdx);
}
void TemplateTable::ldiv() {
transition(ltos, ltos); #ifdef _LP64
__ mov(rcx, rax);
__ pop_l(rax); // generate explicit div0 check
__ testq(rcx, rcx);
__ jump_cc(Assembler::zero,
ExternalAddress(Interpreter::_throw_ArithmeticException_entry)); // Note: could xor rax and rcx and compare with (-1 ^ min_int). If // they are not equal, one could do a normal division (no correction // needed), which may speed up this implementation for the common case. // (see also JVM spec., p.243 & p.271)
__ corrected_idivq(rcx); // kills rbx #else
__ pop_l(rbx, rcx);
__ push(rcx); __ push(rbx);
__ push(rdx); __ push(rax); // check if y = 0
__ orl(rax, rdx);
__ jump_cc(Assembler::zero,
ExternalAddress(Interpreter::_throw_ArithmeticException_entry));
__ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::ldiv));
__ addptr(rsp, 4 * wordSize); // take off temporaries #endif
}
void TemplateTable::lrem() {
transition(ltos, ltos); #ifdef _LP64
__ mov(rcx, rax);
__ pop_l(rax);
__ testq(rcx, rcx);
__ jump_cc(Assembler::zero,
ExternalAddress(Interpreter::_throw_ArithmeticException_entry)); // Note: could xor rax and rcx and compare with (-1 ^ min_int). If // they are not equal, one could do a normal division (no correction // needed), which may speed up this implementation for the common case. // (see also JVM spec., p.243 & p.271)
__ corrected_idivq(rcx); // kills rbx
__ mov(rax, rdx); #else
__ pop_l(rbx, rcx);
__ push(rcx); __ push(rbx);
__ push(rdx); __ push(rax); // check if y = 0
__ orl(rax, rdx);
__ jump_cc(Assembler::zero,
ExternalAddress(Interpreter::_throw_ArithmeticException_entry));
__ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::lrem));
__ addptr(rsp, 4 * wordSize); #endif
}
void TemplateTable::lshl() {
transition(itos, ltos);
__ movl(rcx, rax); // get shift count #ifdef _LP64
__ pop_l(rax); // get shift value
__ shlq(rax); #else
__ pop_l(rax, rdx); // get shift value
__ lshl(rdx, rax); #endif
}
void TemplateTable::lshr() { #ifdef _LP64
transition(itos, ltos);
__ movl(rcx, rax); // get shift count
__ pop_l(rax); // get shift value
__ sarq(rax); #else
transition(itos, ltos);
__ mov(rcx, rax); // get shift count
__ pop_l(rax, rdx); // get shift value
__ lshr(rdx, rax, true); #endif
}
void TemplateTable::lushr() {
transition(itos, ltos); #ifdef _LP64
__ movl(rcx, rax); // get shift count
__ pop_l(rax); // get shift value
__ shrq(rax); #else
__ mov(rcx, rax); // get shift count
__ pop_l(rax, rdx); // get shift value
__ lshr(rdx, rax); #endif
}
if (UseSSE >= 1) { switch (op) { case add:
__ addss(xmm0, at_rsp());
__ addptr(rsp, Interpreter::stackElementSize); break; case sub:
__ movflt(xmm1, xmm0);
__ pop_f(xmm0);
__ subss(xmm0, xmm1); break; case mul:
__ mulss(xmm0, at_rsp());
__ addptr(rsp, Interpreter::stackElementSize); break; case div:
__ movflt(xmm1, xmm0);
__ pop_f(xmm0);
__ divss(xmm0, xmm1); break; case rem: // On x86_64 platforms the SharedRuntime::frem method is called to perform the // modulo operation. The frem method calls the function // double fmod(double x, double y) in math.h. The documentation of fmod states: // "If x or y is a NaN, a NaN is returned." without specifying what type of NaN // (signalling or quiet) is returned. // // On x86_32 platforms the FPU is used to perform the modulo operation. The // reason is that on 32-bit Windows the sign of modulo operations diverges from // what is considered the standard (e.g., -0.0f % -3.14f is 0.0f (and not -0.0f). // The fprem instruction used on x86_32 is functionally equivalent to // SharedRuntime::frem in that it returns a NaN. #ifdef _LP64
__ movflt(xmm1, xmm0);
__ pop_f(xmm0);
__ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem), 2); #else// !_LP64
__ push_f(xmm0);
__ pop_f();
__ fld_s(at_rsp());
__ fremr(rax);
__ f2ieee();
__ pop(rax); // pop second operand off the stack
__ push_f();
__ pop_f(xmm0); #endif// _LP64 break; default:
ShouldNotReachHere(); break;
}
} else { #ifdef _LP64
ShouldNotReachHere(); #else// !_LP64 switch (op) { case add: __ fadd_s (at_rsp()); break; case sub: __ fsubr_s(at_rsp()); break; case mul: __ fmul_s (at_rsp()); break; case div: __ fdivr_s(at_rsp()); break; case rem: __ fld_s (at_rsp()); __ fremr(rax); break; default : ShouldNotReachHere();
}
__ f2ieee();
__ pop(rax); // pop second operand off the stack #endif// _LP64
}
}
void TemplateTable::dop2(Operation op) {
transition(dtos, dtos); if (UseSSE >= 2) { switch (op) { case add:
__ addsd(xmm0, at_rsp());
__ addptr(rsp, 2 * Interpreter::stackElementSize); break; case sub:
__ movdbl(xmm1, xmm0);
__ pop_d(xmm0);
__ subsd(xmm0, xmm1); break; case mul:
__ mulsd(xmm0, at_rsp());
__ addptr(rsp, 2 * Interpreter::stackElementSize); break; case div:
__ movdbl(xmm1, xmm0);
__ pop_d(xmm0);
__ divsd(xmm0, xmm1); break; case rem: // Similar to fop2(), the modulo operation is performed using the // SharedRuntime::drem method (on x86_64 platforms) or using the // FPU (on x86_32 platforms) for the same reasons as mentioned in fop2(). #ifdef _LP64
__ movdbl(xmm1, xmm0);
__ pop_d(xmm0);
__ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem), 2); #else// !_LP64
__ push_d(xmm0);
__ pop_d();
__ fld_d(at_rsp());
__ fremr(rax);
__ d2ieee();
__ pop(rax);
__ pop(rdx);
__ push_d();
__ pop_d(xmm0); #endif// _LP64 break; default:
ShouldNotReachHere(); break;
}
} else { #ifdef _LP64
ShouldNotReachHere(); #else// !_LP64 switch (op) { case add: __ fadd_d (at_rsp()); break; case sub: __ fsubr_d(at_rsp()); break; case mul: { // strict semantics
__ fld_x(ExternalAddress(StubRoutines::x86::addr_fpu_subnormal_bias1()));
__ fmulp();
__ fmul_d (at_rsp());
__ fld_x(ExternalAddress(StubRoutines::x86::addr_fpu_subnormal_bias2()));
__ fmulp(); break;
} case div: { // strict semantics
__ fld_x(ExternalAddress(StubRoutines::x86::addr_fpu_subnormal_bias1()));
__ fmul_d (at_rsp());
__ fdivrp();
__ fld_x(ExternalAddress(StubRoutines::x86::addr_fpu_subnormal_bias2()));
__ fmulp(); break;
} case rem: __ fld_d (at_rsp()); __ fremr(rax); break; default : ShouldNotReachHere();
}
__ d2ieee(); // Pop double precision number from rsp.
__ pop(rax);
__ pop(rdx); #endif// _LP64
}
}
// Note: 'double' and 'long long' have 32-bits alignment on x86. static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) { // Use the expression (adr)&(~0xF) to provide 128-bits aligned address // of 128-bits operands for SSE instructions.
jlong *operand = (jlong*)(((intptr_t)adr)&((intptr_t)(~0xF))); // Store the value to a 128-bits operand.
operand[0] = lo;
operand[1] = hi; return operand;
}
// Buffer for 128-bits masks used by SSE instructions. static jlong float_signflip_pool[2*2]; static jlong double_signflip_pool[2*2];
void TemplateTable::wide_iinc() {
transition(vtos, vtos);
__ movl(rdx, at_bcp(4)); // get constant
locals_index_wide(rbx);
__ bswapl(rdx); // swap bytes & sign-extend constant
__ sarl(rdx, 16);
__ addl(iaddress(rbx), rdx); // Note: should probably use only one movl to get both // the index and the constant -> fix this
}
void TemplateTable::convert() { #ifdef _LP64 // Checking #ifdef ASSERT
{
TosState tos_in = ilgl;
TosState tos_out = ilgl; switch (bytecode()) { case Bytecodes::_i2l: // fall through case Bytecodes::_i2f: // fall through case Bytecodes::_i2d: // fall through case Bytecodes::_i2b: // fall through case Bytecodes::_i2c: // fall through case Bytecodes::_i2s: tos_in = itos; break; case Bytecodes::_l2i: // fall through case Bytecodes::_l2f: // fall through case Bytecodes::_l2d: tos_in = ltos; break; case Bytecodes::_f2i: // fall through case Bytecodes::_f2l: // fall through case Bytecodes::_f2d: tos_in = ftos; break; case Bytecodes::_d2i: // fall through case Bytecodes::_d2l: // fall through case Bytecodes::_d2f: tos_in = dtos; break; default : ShouldNotReachHere();
} switch (bytecode()) { case Bytecodes::_l2i: // fall through case Bytecodes::_f2i: // fall through case Bytecodes::_d2i: // fall through case Bytecodes::_i2b: // fall through case Bytecodes::_i2c: // fall through case Bytecodes::_i2s: tos_out = itos; break; case Bytecodes::_i2l: // fall through case Bytecodes::_f2l: // fall through case Bytecodes::_d2l: tos_out = ltos; break; case Bytecodes::_i2f: // fall through case Bytecodes::_l2f: // fall through case Bytecodes::_d2f: tos_out = ftos; break; case Bytecodes::_i2d: // fall through case Bytecodes::_l2d: // fall through case Bytecodes::_f2d: tos_out = dtos; break; default : ShouldNotReachHere();
}
transition(tos_in, tos_out);
} #endif// ASSERT
staticconst int64_t is_nan = 0x8000000000000000L;
// Conversion switch (bytecode()) { case Bytecodes::_i2l:
__ movslq(rax, rax); break; case Bytecodes::_i2f:
__ cvtsi2ssl(xmm0, rax); break; case Bytecodes::_i2d:
__ cvtsi2sdl(xmm0, rax); break; case Bytecodes::_i2b:
__ movsbl(rax, rax); break; case Bytecodes::_i2c:
__ movzwl(rax, rax); break; case Bytecodes::_i2s:
__ movswl(rax, rax); break; case Bytecodes::_l2i:
__ movl(rax, rax); break; case Bytecodes::_l2f:
__ cvtsi2ssq(xmm0, rax); break; case Bytecodes::_l2d:
__ cvtsi2sdq(xmm0, rax); break; case Bytecodes::_f2i:
{
Label L;
__ cvttss2sil(rax, xmm0);
__ cmpl(rax, 0x80000000); // NaN or overflow/underflow?
__ jcc(Assembler::notEqual, L);
__ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2i), 1);
__ bind(L);
} break; case Bytecodes::_f2l:
{
Label L;
__ cvttss2siq(rax, xmm0); // NaN or overflow/underflow?
__ cmp64(rax, ExternalAddress((address) &is_nan), rscratch1);
__ jcc(Assembler::notEqual, L);
__ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2l), 1);
__ bind(L);
} break; case Bytecodes::_f2d:
__ cvtss2sd(xmm0, xmm0); break; case Bytecodes::_d2i:
{
Label L;
__ cvttsd2sil(rax, xmm0);
__ cmpl(rax, 0x80000000); // NaN or overflow/underflow?
__ jcc(Assembler::notEqual, L);
__ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2i), 1);
__ bind(L);
} break; case Bytecodes::_d2l:
{
Label L;
__ cvttsd2siq(rax, xmm0); // NaN or overflow/underflow?
__ cmp64(rax, ExternalAddress((address) &is_nan), rscratch1);
__ jcc(Assembler::notEqual, L);
__ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2l), 1);
__ bind(L);
} break; case Bytecodes::_d2f:
__ cvtsd2ss(xmm0, xmm0); break; default:
ShouldNotReachHere();
} #else// !_LP64 // Checking #ifdef ASSERT
{ TosState tos_in = ilgl;
TosState tos_out = ilgl; switch (bytecode()) { case Bytecodes::_i2l: // fall through case Bytecodes::_i2f: // fall through case Bytecodes::_i2d: // fall through case Bytecodes::_i2b: // fall through case Bytecodes::_i2c: // fall through case Bytecodes::_i2s: tos_in = itos; break; case Bytecodes::_l2i: // fall through case Bytecodes::_l2f: // fall through case Bytecodes::_l2d: tos_in = ltos; break; case Bytecodes::_f2i: // fall through case Bytecodes::_f2l: // fall through case Bytecodes::_f2d: tos_in = ftos; break; case Bytecodes::_d2i: // fall through case Bytecodes::_d2l: // fall through case Bytecodes::_d2f: tos_in = dtos; break; default : ShouldNotReachHere();
} switch (bytecode()) { case Bytecodes::_l2i: // fall through case Bytecodes::_f2i: // fall through case Bytecodes::_d2i: // fall through case Bytecodes::_i2b: // fall through case Bytecodes::_i2c: // fall through case Bytecodes::_i2s: tos_out = itos; break; case Bytecodes::_i2l: // fall through case Bytecodes::_f2l: // fall through case Bytecodes::_d2l: tos_out = ltos; break; case Bytecodes::_i2f: // fall through case Bytecodes::_l2f: // fall through case Bytecodes::_d2f: tos_out = ftos; break; case Bytecodes::_i2d: // fall through case Bytecodes::_l2d: // fall through case Bytecodes::_f2d: tos_out = dtos; break; default : ShouldNotReachHere();
}
transition(tos_in, tos_out);
} #endif// ASSERT
// Conversion // (Note: use push(rcx)/pop(rcx) for 1/2-word stack-ptr manipulation) switch (bytecode()) { case Bytecodes::_i2l:
__ extend_sign(rdx, rax); break; case Bytecodes::_i2f: if (UseSSE >= 1) {
__ cvtsi2ssl(xmm0, rax);
} else {
__ push(rax); // store int on tos
__ fild_s(at_rsp()); // load int to ST0
__ f2ieee(); // truncate to float size
__ pop(rcx); // adjust rsp
} break; case Bytecodes::_i2d: if (UseSSE >= 2) {
__ cvtsi2sdl(xmm0, rax);
} else {
__ push(rax); // add one slot for d2ieee()
__ push(rax); // store int on tos
__ fild_s(at_rsp()); // load int to ST0
__ d2ieee(); // truncate to double size
__ pop(rcx); // adjust rsp
__ pop(rcx);
} break; case Bytecodes::_i2b:
__ shll(rax, 24); // truncate upper 24 bits
__ sarl(rax, 24); // and sign-extend byte
LP64_ONLY(__ movsbl(rax, rax)); break; case Bytecodes::_i2c:
__ andl(rax, 0xFFFF); // truncate upper 16 bits
LP64_ONLY(__ movzwl(rax, rax)); break; case Bytecodes::_i2s:
__ shll(rax, 16); // truncate upper 16 bits
__ sarl(rax, 16); // and sign-extend short
LP64_ONLY(__ movswl(rax, rax)); break; case Bytecodes::_l2i: /* nothing to do */ break; case Bytecodes::_l2f: // On 64-bit platforms, the cvtsi2ssq instruction is used to convert // 64-bit long values to floats. On 32-bit platforms it is not possible // to use that instruction with 64-bit operands, therefore the FPU is // used to perform the conversion.
__ push(rdx); // store long on tos
__ push(rax);
__ fild_d(at_rsp()); // load long to ST0
__ f2ieee(); // truncate to float size
__ pop(rcx); // adjust rsp
__ pop(rcx); if (UseSSE >= 1) {
__ push_f();
__ pop_f(xmm0);
} break; case Bytecodes::_l2d: // On 32-bit platforms the FPU is used for conversion because on // 32-bit platforms it is not not possible to use the cvtsi2sdq // instruction with 64-bit operands.
__ push(rdx); // store long on tos
__ push(rax);
__ fild_d(at_rsp()); // load long to ST0
__ d2ieee(); // truncate to double size
__ pop(rcx); // adjust rsp
__ pop(rcx); if (UseSSE >= 2) {
__ push_d();
__ pop_d(xmm0);
} break; case Bytecodes::_f2i: // SharedRuntime::f2i does not differentiate between sNaNs and qNaNs // as it returns 0 for any NaN. if (UseSSE >= 1) {
__ push_f(xmm0);
} else {
__ push(rcx); // reserve space for argument
__ fstp_s(at_rsp()); // pass float argument on stack
}
__ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2i), 1); break; case Bytecodes::_f2l: // SharedRuntime::f2l does not differentiate between sNaNs and qNaNs // as it returns 0 for any NaN. if (UseSSE >= 1) {
__ push_f(xmm0);
} else {
__ push(rcx); // reserve space for argument
__ fstp_s(at_rsp()); // pass float argument on stack
}
__ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2l), 1); break; case Bytecodes::_f2d: if (UseSSE < 1) { /* nothing to do */
} elseif (UseSSE == 1) {
__ push_f(xmm0);
__ pop_f();
} else { // UseSSE >= 2
__ cvtss2sd(xmm0, xmm0);
} break; case Bytecodes::_d2i: if (UseSSE >= 2) {
__ push_d(xmm0);
} else {
__ push(rcx); // reserve space for argument
__ push(rcx);
__ fstp_d(at_rsp()); // pass double argument on stack
}
__ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2i), 2); break; case Bytecodes::_d2l: if (UseSSE >= 2) {
__ push_d(xmm0);
} else {
__ push(rcx); // reserve space for argument
__ push(rcx);
__ fstp_d(at_rsp()); // pass double argument on stack
}
__ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2l), 2); break; case Bytecodes::_d2f: if (UseSSE <= 1) {
__ push(rcx); // reserve space for f2ieee()
__ f2ieee(); // truncate to float size
__ pop(rcx); // adjust rsp if (UseSSE == 1) { // The cvtsd2ss instruction is not available if UseSSE==1, therefore // the conversion is performed using the FPU in this case.
__ push_f();
__ pop_f(xmm0);
}
} else { // UseSSE >= 2
__ cvtsd2ss(xmm0, xmm0);
} break; default :
ShouldNotReachHere();
} #endif// _LP64
}
// Load up edx with the branch displacement if (is_wide) {
__ movl(rdx, at_bcp(1));
} else {
__ load_signed_short(rdx, at_bcp(1));
}
__ bswapl(rdx);
if (!is_wide) {
__ sarl(rdx, 16);
}
LP64_ONLY(__ movl2ptr(rdx, rdx));
// Handle all the JSR stuff here, then exit. // It's much shorter and cleaner than intermingling with the non-JSR // normal-branch stuff occurring below. if (is_jsr) { // Pre-load the next target bytecode into rbx
__ load_unsigned_byte(rbx, Address(rbcp, rdx, Address::times_1, 0));
// compute return address as bci in rax
__ lea(rax, at_bcp((is_wide ? 5 : 3) -
in_bytes(ConstMethod::codes_offset())));
__ subptr(rax, Address(rcx, Method::const_offset())); // Adjust the bcp in r13 by the displacement in rdx
__ addptr(rbcp, rdx); // jsr returns atos that is not an oop
__ push_i(rax);
__ dispatch_only(vtos, true); return;
}
// Normal (non-jsr) branch handling
// Adjust the bcp in r13 by the displacement in rdx
__ addptr(rbcp, rdx);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung ist noch experimentell.