Ziele Untersuchung
mit Columbo Integrität von
Datenbanken Interaktion und
Portierbarkeit Ergonomie der
Schnittstellen

Angebot Produkte Projekt Beratung

Mittel Analytik Modellierung Sprachen Algebra Logik Hardware Denken Kreativität

Zusammenhänge Gesellschaft Wirtschaft Branche Firma


products/Sources/formale Sprachen/Java/Openjdk/src/hotspot/cpu/x86/ (Sun/Oracle ^©) Datei vom 13.11.2022 mit Größe 379 kB

Quelle x86_64.ad Sprache: unbekannt

Spracherkennung für: .ad vermutete Sprache: Unknown {[0] [0] [0]} [Methode: Schwerpunktbildung, einfache Gewichte, sechs Dimensionen]

//
// Copyright (c) 2003, 2022, Oracle and/or its affiliates. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
// This code is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License version 2 only, as
// published by the Free Software Foundation.
//
// This code is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
// version 2 for more details (a copy is included in the LICENSE file that
// accompanied this code).
//
// You should have received a copy of the GNU General Public License version
// 2 along with this work; if not, write to the Free Software Foundation,
// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
//
// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
// or visit www.oracle.com if you need additional information or have any
// questions.
//
//

// AMD64 Architecture Description File

//----------REGISTER DEFINITION BLOCK------------------------------------------
// This information is used by the matcher and the register allocator to
// describe individual registers and classes of registers within the target
// architecture.

register %{
//----------Architecture Description Register Definitions----------------------
// General Registers
// "reg_def"  name ( register save type, C convention save type,
//                   ideal register type, encoding );
// Register Save Types:
//
// NS  = No-Save:       The register allocator assumes that these registers
//                      can be used without saving upon entry to the method, &
//                      that they do not need to be saved at call sites.
//
// SOC = Save-On-Call:  The register allocator assumes that these registers
//                      can be used without saving upon entry to the method,
//                      but that they must be saved at call sites.
//
// SOE = Save-On-Entry: The register allocator assumes that these registers
//                      must be saved before using them upon entry to the
//                      method, but they do not need to be saved at call
//                      sites.
//
// AS  = Always-Save:   The register allocator assumes that these registers
//                      must be saved before using them upon entry to the
//                      method, & that they must be saved at call sites.
//
// Ideal Register Type is used to determine how to save & restore a
// register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
// spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
//
// The encoding number is the actual bit-pattern placed into the opcodes.

// General Registers
// R8-R15 must be encoded with REX.  (RSP, RBP, RSI, RDI need REX when
// used as byte registers)

// Previously set RBX, RSI, and RDI as save-on-entry for java code
// Turn off SOE in java-code due to frequent use of uncommon-traps.
// Now that allocator is better, turn on RSI and RDI as SOE registers.

reg_def RAX  (SOC, SOC, Op_RegI,  0, rax->as_VMReg());
reg_def RAX_H(SOC, SOC, Op_RegI,  0, rax->as_VMReg()->next());

reg_def RCX  (SOC, SOC, Op_RegI,  1, rcx->as_VMReg());
reg_def RCX_H(SOC, SOC, Op_RegI,  1, rcx->as_VMReg()->next());

reg_def RDX  (SOC, SOC, Op_RegI,  2, rdx->as_VMReg());
reg_def RDX_H(SOC, SOC, Op_RegI,  2, rdx->as_VMReg()->next());

reg_def RBX  (SOC, SOE, Op_RegI,  3, rbx->as_VMReg());
reg_def RBX_H(SOC, SOE, Op_RegI,  3, rbx->as_VMReg()->next());

reg_def RSP  (NS,  NS,  Op_RegI,  4, rsp->as_VMReg());
reg_def RSP_H(NS,  NS,  Op_RegI,  4, rsp->as_VMReg()->next());

// now that adapter frames are gone RBP is always saved and restored by the prolog/epilog code
reg_def RBP  (NS, SOE, Op_RegI,  5, rbp->as_VMReg());
reg_def RBP_H(NS, SOE, Op_RegI,  5, rbp->as_VMReg()->next());

#ifdef _WIN64

reg_def RSI  (SOC, SOE, Op_RegI,  6, rsi->as_VMReg());
reg_def RSI_H(SOC, SOE, Op_RegI,  6, rsi->as_VMReg()->next());

reg_def RDI  (SOC, SOE, Op_RegI,  7, rdi->as_VMReg());
reg_def RDI_H(SOC, SOE, Op_RegI,  7, rdi->as_VMReg()->next());

#else

reg_def RSI  (SOC, SOC, Op_RegI,  6, rsi->as_VMReg());
reg_def RSI_H(SOC, SOC, Op_RegI,  6, rsi->as_VMReg()->next());

reg_def RDI  (SOC, SOC, Op_RegI,  7, rdi->as_VMReg());
reg_def RDI_H(SOC, SOC, Op_RegI,  7, rdi->as_VMReg()->next());

#endif

reg_def R8   (SOC, SOC, Op_RegI,  8, r8->as_VMReg());
reg_def R8_H (SOC, SOC, Op_RegI,  8, r8->as_VMReg()->next());

reg_def R9   (SOC, SOC, Op_RegI,  9, r9->as_VMReg());
reg_def R9_H (SOC, SOC, Op_RegI,  9, r9->as_VMReg()->next());

reg_def R10  (SOC, SOC, Op_RegI, 10, r10->as_VMReg());
reg_def R10_H(SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());

reg_def R11  (SOC, SOC, Op_RegI, 11, r11->as_VMReg());
reg_def R11_H(SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());

reg_def R12  (SOC, SOE, Op_RegI, 12, r12->as_VMReg());
reg_def R12_H(SOC, SOE, Op_RegI, 12, r12->as_VMReg()->next());

reg_def R13  (SOC, SOE, Op_RegI, 13, r13->as_VMReg());
reg_def R13_H(SOC, SOE, Op_RegI, 13, r13->as_VMReg()->next());

reg_def R14  (SOC, SOE, Op_RegI, 14, r14->as_VMReg());
reg_def R14_H(SOC, SOE, Op_RegI, 14, r14->as_VMReg()->next());

reg_def R15  (SOC, SOE, Op_RegI, 15, r15->as_VMReg());
reg_def R15_H(SOC, SOE, Op_RegI, 15, r15->as_VMReg()->next());

// Floating Point Registers

// Specify priority of register selection within phases of register
// allocation.  Highest priority is first.  A useful heuristic is to
// give registers a low priority when they are required by machine
// instructions, like EAX and EDX on I486, and choose no-save registers
// before save-on-call, & save-on-call before save-on-entry.  Registers
// which participate in fixed calling sequences should come last.
// Registers which are used as pairs must fall on an even boundary.

alloc_class chunk0(R10,         R10_H,
                   R11,         R11_H,
                   R8,          R8_H,
                   R9,          R9_H,
                   R12,         R12_H,
                   RCX,         RCX_H,
                   RBX,         RBX_H,
                   RDI,         RDI_H,
                   RDX,         RDX_H,
                   RSI,         RSI_H,
                   RAX,         RAX_H,
                   RBP,         RBP_H,
                   R13,         R13_H,
                   R14,         R14_H,
                   R15,         R15_H,
                   RSP,         RSP_H);

//----------Architecture Description Register Classes--------------------------
// Several register classes are automatically defined based upon information in
// this architecture description.
// 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
// 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
//

// Empty register class.
reg_class no_reg();

// Class for all pointer/long registers
reg_class all_reg(RAX, RAX_H,
                  RDX, RDX_H,
                  RBP, RBP_H,
                  RDI, RDI_H,
                  RSI, RSI_H,
                  RCX, RCX_H,
                  RBX, RBX_H,
                  RSP, RSP_H,
                  R8,  R8_H,
                  R9,  R9_H,
                  R10, R10_H,
                  R11, R11_H,
                  R12, R12_H,
                  R13, R13_H,
                  R14, R14_H,
                  R15, R15_H);

// Class for all int registers
reg_class all_int_reg(RAX
                      RDX,
                      RBP,
                      RDI,
                      RSI,
                      RCX,
                      RBX,
                      R8,
                      R9,
                      R10,
                      R11,
                      R12,
                      R13,
                      R14);

// Class for all pointer registers
reg_class any_reg %{
  return _ANY_REG_mask;
%}

// Class for all pointer registers (excluding RSP)
reg_class ptr_reg %{
  return _PTR_REG_mask;
%}

// Class for all pointer registers (excluding RSP and RBP)
reg_class ptr_reg_no_rbp %{
  return _PTR_REG_NO_RBP_mask;
%}

// Class for all pointer registers (excluding RAX and RSP)
reg_class ptr_no_rax_reg %{
  return _PTR_NO_RAX_REG_mask;
%}

// Class for all pointer registers (excluding RAX, RBX, and RSP)
reg_class ptr_no_rax_rbx_reg %{
  return _PTR_NO_RAX_RBX_REG_mask;
%}

// Class for all long registers (excluding RSP)
reg_class long_reg %{
  return _LONG_REG_mask;
%}

// Class for all long registers (excluding RAX, RDX and RSP)
reg_class long_no_rax_rdx_reg %{
  return _LONG_NO_RAX_RDX_REG_mask;
%}

// Class for all long registers (excluding RCX and RSP)
reg_class long_no_rcx_reg %{
  return _LONG_NO_RCX_REG_mask;
%}

// Class for all long registers (excluding RBP and R13)
reg_class long_no_rbp_r13_reg %{
  return _LONG_NO_RBP_R13_REG_mask;
%}

// Class for all int registers (excluding RSP)
reg_class int_reg %{
  return _INT_REG_mask;
%}

// Class for all int registers (excluding RAX, RDX, and RSP)
reg_class int_no_rax_rdx_reg %{
  return _INT_NO_RAX_RDX_REG_mask;
%}

// Class for all int registers (excluding RCX and RSP)
reg_class int_no_rcx_reg %{
  return _INT_NO_RCX_REG_mask;
%}

// Class for all int registers (excluding RBP and R13)
reg_class int_no_rbp_r13_reg %{
  return _INT_NO_RBP_R13_REG_mask;
%}

// Singleton class for RAX pointer register
reg_class ptr_rax_reg(RAX, RAX_H);

// Singleton class for RBX pointer register
reg_class ptr_rbx_reg(RBX, RBX_H);

// Singleton class for RSI pointer register
reg_class ptr_rsi_reg(RSI, RSI_H);

// Singleton class for RBP pointer register
reg_class ptr_rbp_reg(RBP, RBP_H);

// Singleton class for RDI pointer register
reg_class ptr_rdi_reg(RDI, RDI_H);

// Singleton class for stack pointer
reg_class ptr_rsp_reg(RSP, RSP_H);

// Singleton class for TLS pointer
reg_class ptr_r15_reg(R15, R15_H);

// Singleton class for RAX long register
reg_class long_rax_reg(RAX, RAX_H);

// Singleton class for RCX long register
reg_class long_rcx_reg(RCX, RCX_H);

// Singleton class for RDX long register
reg_class long_rdx_reg(RDX, RDX_H);

// Singleton class for RAX int register
reg_class int_rax_reg(RAX);

// Singleton class for RBX int register
reg_class int_rbx_reg(RBX);

// Singleton class for RCX int register
reg_class int_rcx_reg(RCX);

// Singleton class for RCX int register
reg_class int_rdx_reg(RDX);

// Singleton class for RCX int register
reg_class int_rdi_reg(RDI);

// Singleton class for instruction pointer
// reg_class ip_reg(RIP);

%}

//----------SOURCE BLOCK-------------------------------------------------------
// This is a block of C++ code which provides values, functions, and
// definitions necessary in the rest of the architecture description

source_hpp %{

#include "peephole_x86_64.hpp"

%}

// Register masks
source_hpp %{

extern RegMask _ANY_REG_mask;
extern RegMask _PTR_REG_mask;
extern RegMask _PTR_REG_NO_RBP_mask;
extern RegMask _PTR_NO_RAX_REG_mask;
extern RegMask _PTR_NO_RAX_RBX_REG_mask;
extern RegMask _LONG_REG_mask;
extern RegMask _LONG_NO_RAX_RDX_REG_mask;
extern RegMask _LONG_NO_RCX_REG_mask;
extern RegMask _LONG_NO_RBP_R13_REG_mask;
extern RegMask _INT_REG_mask;
extern RegMask _INT_NO_RAX_RDX_REG_mask;
extern RegMask _INT_NO_RCX_REG_mask;
extern RegMask _INT_NO_RBP_R13_REG_mask;
extern RegMask _FLOAT_REG_mask;

extern RegMask _STACK_OR_PTR_REG_mask;
extern RegMask _STACK_OR_LONG_REG_mask;
extern RegMask _STACK_OR_INT_REG_mask;

inline const RegMask& STACK_OR_PTR_REG_mask()  { return _STACK_OR_PTR_REG_mask;  }
inline const RegMask& STACK_OR_LONG_REG_mask() { return _STACK_OR_LONG_REG_mask; }
inline const RegMask& STACK_OR_INT_REG_mask()  { return _STACK_OR_INT_REG_mask;  }

%}

source %{
#define   RELOC_IMM64    Assembler::imm_operand
#define   RELOC_DISP32   Assembler::disp32_operand

#define __ _masm.

RegMask _ANY_REG_mask;
RegMask _PTR_REG_mask;
RegMask _PTR_REG_NO_RBP_mask;
RegMask _PTR_NO_RAX_REG_mask;
RegMask _PTR_NO_RAX_RBX_REG_mask;
RegMask _LONG_REG_mask;
RegMask _LONG_NO_RAX_RDX_REG_mask;
RegMask _LONG_NO_RCX_REG_mask;
RegMask _LONG_NO_RBP_R13_REG_mask;
RegMask _INT_REG_mask;
RegMask _INT_NO_RAX_RDX_REG_mask;
RegMask _INT_NO_RCX_REG_mask;
RegMask _INT_NO_RBP_R13_REG_mask;
RegMask _FLOAT_REG_mask;
RegMask _STACK_OR_PTR_REG_mask;
RegMask _STACK_OR_LONG_REG_mask;
RegMask _STACK_OR_INT_REG_mask;

static bool need_r12_heapbase() {
  return UseCompressedOops;
}

void reg_mask_init() {
  // _ALL_REG_mask is generated by adlc from the all_reg register class below.
  // We derive a number of subsets from it.
  _ANY_REG_mask = _ALL_REG_mask;

  if (PreserveFramePointer) {
    _ANY_REG_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
    _ANY_REG_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
  }
  if (need_r12_heapbase()) {
    _ANY_REG_mask.Remove(OptoReg::as_OptoReg(r12->as_VMReg()));
    _ANY_REG_mask.Remove(OptoReg::as_OptoReg(r12->as_VMReg()->next()));
  }

  _PTR_REG_mask = _ANY_REG_mask;
  _PTR_REG_mask.Remove(OptoReg::as_OptoReg(rsp->as_VMReg()));
  _PTR_REG_mask.Remove(OptoReg::as_OptoReg(rsp->as_VMReg()->next()));
  _PTR_REG_mask.Remove(OptoReg::as_OptoReg(r15->as_VMReg()));
  _PTR_REG_mask.Remove(OptoReg::as_OptoReg(r15->as_VMReg()->next()));

  _STACK_OR_PTR_REG_mask = _PTR_REG_mask;
  _STACK_OR_PTR_REG_mask.OR(STACK_OR_STACK_SLOTS_mask());

  _PTR_REG_NO_RBP_mask = _PTR_REG_mask;
  _PTR_REG_NO_RBP_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
  _PTR_REG_NO_RBP_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));

  _PTR_NO_RAX_REG_mask = _PTR_REG_mask;
  _PTR_NO_RAX_REG_mask.Remove(OptoReg::as_OptoReg(rax->as_VMReg()));
  _PTR_NO_RAX_REG_mask.Remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));

  _PTR_NO_RAX_RBX_REG_mask = _PTR_NO_RAX_REG_mask;
  _PTR_NO_RAX_RBX_REG_mask.Remove(OptoReg::as_OptoReg(rbx->as_VMReg()));
  _PTR_NO_RAX_RBX_REG_mask.Remove(OptoReg::as_OptoReg(rbx->as_VMReg()->next()));

  _LONG_REG_mask = _PTR_REG_mask;
  _STACK_OR_LONG_REG_mask = _LONG_REG_mask;
  _STACK_OR_LONG_REG_mask.OR(STACK_OR_STACK_SLOTS_mask());

  _LONG_NO_RAX_RDX_REG_mask = _LONG_REG_mask;
  _LONG_NO_RAX_RDX_REG_mask.Remove(OptoReg::as_OptoReg(rax->as_VMReg()));
  _LONG_NO_RAX_RDX_REG_mask.Remove(OptoReg::as_OptoReg(rax->as_VMReg()->next()));
  _LONG_NO_RAX_RDX_REG_mask.Remove(OptoReg::as_OptoReg(rdx->as_VMReg()));
  _LONG_NO_RAX_RDX_REG_mask.Remove(OptoReg::as_OptoReg(rdx->as_VMReg()->next()));

  _LONG_NO_RCX_REG_mask = _LONG_REG_mask;
  _LONG_NO_RCX_REG_mask.Remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
  _LONG_NO_RCX_REG_mask.Remove(OptoReg::as_OptoReg(rcx->as_VMReg()->next()));

  _LONG_NO_RBP_R13_REG_mask = _LONG_REG_mask;
  _LONG_NO_RBP_R13_REG_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
  _LONG_NO_RBP_R13_REG_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()->next()));
  _LONG_NO_RBP_R13_REG_mask.Remove(OptoReg::as_OptoReg(r13->as_VMReg()));
  _LONG_NO_RBP_R13_REG_mask.Remove(OptoReg::as_OptoReg(r13->as_VMReg()->next()));

  _INT_REG_mask = _ALL_INT_REG_mask;
  if (PreserveFramePointer) {
    _INT_REG_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
  }
  if (need_r12_heapbase()) {
    _INT_REG_mask.Remove(OptoReg::as_OptoReg(r12->as_VMReg()));
  }

  _STACK_OR_INT_REG_mask = _INT_REG_mask;
  _STACK_OR_INT_REG_mask.OR(STACK_OR_STACK_SLOTS_mask());

  _INT_NO_RAX_RDX_REG_mask = _INT_REG_mask;
  _INT_NO_RAX_RDX_REG_mask.Remove(OptoReg::as_OptoReg(rax->as_VMReg()));
  _INT_NO_RAX_RDX_REG_mask.Remove(OptoReg::as_OptoReg(rdx->as_VMReg()));

  _INT_NO_RCX_REG_mask = _INT_REG_mask;
  _INT_NO_RCX_REG_mask.Remove(OptoReg::as_OptoReg(rcx->as_VMReg()));

  _INT_NO_RBP_R13_REG_mask = _INT_REG_mask;
  _INT_NO_RBP_R13_REG_mask.Remove(OptoReg::as_OptoReg(rbp->as_VMReg()));
  _INT_NO_RBP_R13_REG_mask.Remove(OptoReg::as_OptoReg(r13->as_VMReg()));

  // _FLOAT_REG_LEGACY_mask/_FLOAT_REG_EVEX_mask is generated by adlc
  // from the float_reg_legacy/float_reg_evex register class.
  _FLOAT_REG_mask = VM_Version::supports_evex() ? _FLOAT_REG_EVEX_mask : _FLOAT_REG_LEGACY_mask;
}

static bool generate_vzeroupper(Compile* C) {
  return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false;  // Generate vzeroupper
}

static int clear_avx_size() {
  return generate_vzeroupper(Compile::current()) ? 3: 0;  // vzeroupper
}

// !!!!! Special hack to get all types of calls to specify the byte offset
//       from the start of the call to the point where the return address
//       will point.
int MachCallStaticJavaNode::ret_addr_offset()
{
  int offset = 5; // 5 bytes from start of call to where return address points
  offset += clear_avx_size();
  return offset;
}

int MachCallDynamicJavaNode::ret_addr_offset()
{
  int offset = 15; // 15 bytes from start of call to where return address points
  offset += clear_avx_size();
  return offset;
}

int MachCallRuntimeNode::ret_addr_offset() {
  int offset = 13; // movq r10,#addr; callq (r10)
  if (this->ideal_Opcode() != Op_CallLeafVector) {
    offset += clear_avx_size();
  }
  return offset;
}
//
// Compute padding required for nodes which need alignment
//

// The address of the call instruction needs to be 4-byte aligned to
// ensure that it does not span a cache line so that it can be patched.
int CallStaticJavaDirectNode::compute_padding(int current_offset) const
{
  current_offset += clear_avx_size(); // skip vzeroupper
  current_offset += 1; // skip call opcode byte
  return align_up(current_offset, alignment_required()) - current_offset;
}

// The address of the call instruction needs to be 4-byte aligned to
// ensure that it does not span a cache line so that it can be patched.
int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
{
  current_offset += clear_avx_size(); // skip vzeroupper
  current_offset += 11; // skip movq instruction + call opcode byte
  return align_up(current_offset, alignment_required()) - current_offset;
}

// EMIT_RM()
void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
  unsigned char c = (unsigned char) ((f1 << 6) | (f2 << 3) | f3);
  cbuf.insts()->emit_int8(c);
}

// EMIT_CC()
void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
  unsigned char c = (unsigned char) (f1 | f2);
  cbuf.insts()->emit_int8(c);
}

// EMIT_OPCODE()
void emit_opcode(CodeBuffer &cbuf, int code) {
  cbuf.insts()->emit_int8((unsigned char) code);
}

// EMIT_OPCODE() w/ relocation information
void emit_opcode(CodeBuffer &cbuf,
                 int code, relocInfo::relocType reloc, int offset, int format)
{
  cbuf.relocate(cbuf.insts_mark() + offset, reloc, format);
  emit_opcode(cbuf, code);
}

// EMIT_D8()
void emit_d8(CodeBuffer &cbuf, int d8) {
  cbuf.insts()->emit_int8((unsigned char) d8);
}

// EMIT_D16()
void emit_d16(CodeBuffer &cbuf, int d16) {
  cbuf.insts()->emit_int16(d16);
}

// EMIT_D32()
void emit_d32(CodeBuffer &cbuf, int d32) {
  cbuf.insts()->emit_int32(d32);
}

// EMIT_D64()
void emit_d64(CodeBuffer &cbuf, int64_t d64) {
  cbuf.insts()->emit_int64(d64);
}

// emit 32 bit value and construct relocation entry from relocInfo::relocType
void emit_d32_reloc(CodeBuffer& cbuf,
                    int d32,
                    relocInfo::relocType reloc,
                    int format)
{
  assert(reloc != relocInfo::external_word_type, "use 2-arg emit_d32_reloc");
  cbuf.relocate(cbuf.insts_mark(), reloc, format);
  cbuf.insts()->emit_int32(d32);
}

// emit 32 bit value and construct relocation entry from RelocationHolder
void emit_d32_reloc(CodeBuffer& cbuf, int d32, RelocationHolder const& rspec, int format) {
#ifdef ASSERT
  if (rspec.reloc()->type() == relocInfo::oop_type &&
      d32 != 0 && d32 != (intptr_t) Universe::non_oop_word()) {
    assert(Universe::heap()->is_in((address)(intptr_t)d32), "should be real oop");
    assert(oopDesc::is_oop(cast_to_oop((intptr_t)d32)), "cannot embed broken oops in code");
  }
#endif
  cbuf.relocate(cbuf.insts_mark(), rspec, format);
  cbuf.insts()->emit_int32(d32);
}

void emit_d32_reloc(CodeBuffer& cbuf, address addr) {
  address next_ip = cbuf.insts_end() + 4;
  emit_d32_reloc(cbuf, (int) (addr - next_ip),
                 external_word_Relocation::spec(addr),
                 RELOC_DISP32);
}

// emit 64 bit value and construct relocation entry from relocInfo::relocType
void emit_d64_reloc(CodeBuffer& cbuf, int64_t d64, relocInfo::relocType reloc, int format) {
  cbuf.relocate(cbuf.insts_mark(), reloc, format);
  cbuf.insts()->emit_int64(d64);
}

// emit 64 bit value and construct relocation entry from RelocationHolder
void emit_d64_reloc(CodeBuffer& cbuf, int64_t d64, RelocationHolder const& rspec, int format) {
#ifdef ASSERT
  if (rspec.reloc()->type() == relocInfo::oop_type &&
      d64 != 0 && d64 != (int64_t) Universe::non_oop_word()) {
    assert(Universe::heap()->is_in((address)d64), "should be real oop");
    assert(oopDesc::is_oop(cast_to_oop(d64)), "cannot embed broken oops in code");
  }
#endif
  cbuf.relocate(cbuf.insts_mark(), rspec, format);
  cbuf.insts()->emit_int64(d64);
}

// Access stack slot for load or store
void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp)
{
  emit_opcode(cbuf, opcode);                  // (e.g., FILD   [RSP+src])
  if (-0x80 <= disp && disp < 0x80) {
    emit_rm(cbuf, 0x01, rm_field, RSP_enc);   // R/M byte
    emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
    emit_d8(cbuf, disp);     // Displacement  // R/M byte
  } else {
    emit_rm(cbuf, 0x02, rm_field, RSP_enc);   // R/M byte
    emit_rm(cbuf, 0x00, RSP_enc, RSP_enc);    // SIB byte
    emit_d32(cbuf, disp);     // Displacement // R/M byte
  }
}

   // rRegI ereg, memory mem) %{    // emit_reg_mem
void encode_RegMem(CodeBuffer &cbuf,
                   int reg,
                   int base, int index, int scale, int disp, relocInfo::relocType disp_reloc)
{
  assert(disp_reloc == relocInfo::none, "cannot have disp");
  int regenc = reg & 7;
  int baseenc = base & 7;
  int indexenc = index & 7;

  // There is no index & no scale, use form without SIB byte
  if (index == 0x4 && scale == 0 && base != RSP_enc && base != R12_enc) {
    // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
    if (disp == 0 && base != RBP_enc && base != R13_enc) {
      emit_rm(cbuf, 0x0, regenc, baseenc); // *
    } else if (-0x80 <= disp && disp < 0x80 && disp_reloc == relocInfo::none) {
      // If 8-bit displacement, mode 0x1
      emit_rm(cbuf, 0x1, regenc, baseenc); // *
      emit_d8(cbuf, disp);
    } else {
      // If 32-bit displacement
      if (base == -1) { // Special flag for absolute address
        emit_rm(cbuf, 0x0, regenc, 0x5); // *
        if (disp_reloc != relocInfo::none) {
          emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
        } else {
          emit_d32(cbuf, disp);
        }
      } else {
        // Normal base + offset
        emit_rm(cbuf, 0x2, regenc, baseenc); // *
        if (disp_reloc != relocInfo::none) {
          emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
        } else {
          emit_d32(cbuf, disp);
        }
      }
    }
  } else {
    // Else, encode with the SIB byte
    // If no displacement, mode is 0x0; unless base is [RBP] or [R13]
    if (disp == 0 && base != RBP_enc && base != R13_enc) {
      // If no displacement
      emit_rm(cbuf, 0x0, regenc, 0x4); // *
      emit_rm(cbuf, scale, indexenc, baseenc);
    } else {
      if (-0x80 <= disp && disp < 0x80 && disp_reloc == relocInfo::none) {
        // If 8-bit displacement, mode 0x1
        emit_rm(cbuf, 0x1, regenc, 0x4); // *
        emit_rm(cbuf, scale, indexenc, baseenc);
        emit_d8(cbuf, disp);
      } else {
        // If 32-bit displacement
        if (base == 0x04 ) {
          emit_rm(cbuf, 0x2, regenc, 0x4);
          emit_rm(cbuf, scale, indexenc, 0x04); // XXX is this valid???
        } else {
          emit_rm(cbuf, 0x2, regenc, 0x4);
          emit_rm(cbuf, scale, indexenc, baseenc); // *
        }
        if (disp_reloc != relocInfo::none) {
          emit_d32_reloc(cbuf, disp, relocInfo::oop_type, RELOC_DISP32);
        } else {
          emit_d32(cbuf, disp);
        }
      }
    }
  }
}

// This could be in MacroAssembler but it's fairly C2 specific
void emit_cmpfp_fixup(MacroAssembler& _masm) {
  Label exit;
  __ jccb(Assembler::noParity, exit);
  __ pushf();
  //
  // comiss/ucomiss instructions set ZF,PF,CF flags and
  // zero OF,AF,SF for NaN values.
  // Fixup flags by zeroing ZF,PF so that compare of NaN
  // values returns 'less than' result (CF is set).
  // Leave the rest of flags unchanged.
  //
  //    7 6 5 4 3 2 1 0
  //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
  //    0 0 1 0 1 0 1 1   (0x2B)
  //
  __ andq(Address(rsp, 0), 0xffffff2b);
  __ popf();
  __ bind(exit);
}

void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
  Label done;
  __ movl(dst, -1);
  __ jcc(Assembler::parity, done);
  __ jcc(Assembler::below, done);
  __ setb(Assembler::notEqual, dst);
  __ movzbl(dst, dst);
  __ bind(done);
}

// Math.min()    # Math.max()
// --------------------------
// ucomis[s/d]   #
// ja   -> b     # a
// jp   -> NaN   # NaN
// jb   -> a     # b
// je            #
// |-jz -> a | b # a & b
// |    -> a     #
void emit_fp_min_max(MacroAssembler& _masm, XMMRegister dst,
                     XMMRegister a, XMMRegister b,
                     XMMRegister xmmt, Register rt,
                     bool min, bool single) {

  Label nan, zero, below, above, done;

  if (single)
    __ ucomiss(a, b);
  else
    __ ucomisd(a, b);

  if (dst->encoding() != (min ? b : a)->encoding())
    __ jccb(Assembler::above, above); // CF=0 & ZF=0
  else
    __ jccb(Assembler::above, done);

  __ jccb(Assembler::parity, nan);  // PF=1
  __ jccb(Assembler::below, below); // CF=1

  // equal
  __ vpxor(xmmt, xmmt, xmmt, Assembler::AVX_128bit);
  if (single) {
    __ ucomiss(a, xmmt);
    __ jccb(Assembler::equal, zero);

    __ movflt(dst, a);
    __ jmp(done);
  }
  else {
    __ ucomisd(a, xmmt);
    __ jccb(Assembler::equal, zero);

    __ movdbl(dst, a);
    __ jmp(done);
  }

  __ bind(zero);
  if (min)
    __ vpor(dst, a, b, Assembler::AVX_128bit);
  else
    __ vpand(dst, a, b, Assembler::AVX_128bit);

  __ jmp(done);

  __ bind(above);
  if (single)
    __ movflt(dst, min ? b : a);
  else
    __ movdbl(dst, min ? b : a);

  __ jmp(done);

  __ bind(nan);
  if (single) {
    __ movl(rt, 0x7fc00000); // Float.NaN
    __ movdl(dst, rt);
  }
  else {
    __ mov64(rt, 0x7ff8000000000000L); // Double.NaN
    __ movdq(dst, rt);
  }
  __ jmp(done);

  __ bind(below);
  if (single)
    __ movflt(dst, min ? a : b);
  else
    __ movdbl(dst, min ? a : b);

  __ bind(done);
}

//=============================================================================
const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;

int ConstantTable::calculate_table_base_offset() const {
  return 0;  // absolute addressing, no offset
}

bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
  ShouldNotReachHere();
}

void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
  // Empty encoding
}

uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
  return 0;
}

#ifndef PRODUCT
void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  st->print("# MachConstantBaseNode (empty encoding)");
}
#endif

//=============================================================================
#ifndef PRODUCT
void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  Compile* C = ra_->C;

  int framesize = C->output()->frame_size_in_bytes();
  int bangsize = C->output()->bang_size_in_bytes();
  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  // Remove wordSize for return addr which is already pushed.
  framesize -= wordSize;

  if (C->output()->need_stack_bang(bangsize)) {
    framesize -= wordSize;
    st->print("# stack bang (%d bytes)", bangsize);
    st->print("\n\t");
    st->print("pushq   rbp\t# Save rbp");
    if (PreserveFramePointer) {
        st->print("\n\t");
        st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
    }
    if (framesize) {
      st->print("\n\t");
      st->print("subq    rsp, #%d\t# Create frame",framesize);
    }
  } else {
    st->print("subq    rsp, #%d\t# Create frame",framesize);
    st->print("\n\t");
    framesize -= wordSize;
    st->print("movq    [rsp + #%d], rbp\t# Save rbp",framesize);
    if (PreserveFramePointer) {
      st->print("\n\t");
      st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
      if (framesize > 0) {
        st->print("\n\t");
        st->print("addq    rbp, #%d", framesize);
      }
    }
  }

  if (VerifyStackAtCalls) {
    st->print("\n\t");
    framesize -= wordSize;
    st->print("movq    [rsp + #%d], 0xbadb100d\t# Majik cookie for stack depth check",framesize);
#ifdef ASSERT
    st->print("\n\t");
    st->print("# stack alignment check");
#endif
  }
  if (C->stub_function() != NULL && BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) {
    st->print("\n\t");
    st->print("cmpl    [r15_thread + #disarmed_offset], #disarmed_value\t");
    st->print("\n\t");
    st->print("je      fast_entry\t");
    st->print("\n\t");
    st->print("call    #nmethod_entry_barrier_stub\t");
    st->print("\n\tfast_entry:");
  }
  st->cr();
}
#endif

void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  Compile* C = ra_->C;
  C2_MacroAssembler _masm(&cbuf);

  int framesize = C->output()->frame_size_in_bytes();
  int bangsize = C->output()->bang_size_in_bytes();

  if (C->clinit_barrier_on_entry()) {
    assert(VM_Version::supports_fast_class_init_checks(), "sanity");
    assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");

    Label L_skip_barrier;
    Register klass = rscratch1;

    __ mov_metadata(klass, C->method()->holder()->constant_encoding());
    __ clinit_barrier(klass, r15_thread, &L_skip_barrier /*L_fast_path*/);

    __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path

    __ bind(L_skip_barrier);
  }

  __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != NULL);

  C->output()->set_frame_complete(cbuf.insts_size());

  if (C->has_mach_constant_base_node()) {
    // NOTE: We set the table base offset here because users might be
    // emitted before MachConstantBaseNode.
    ConstantTable& constant_table = C->output()->constant_table();
    constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  }
}

uint MachPrologNode::size(PhaseRegAlloc* ra_) const
{
  return MachNode::size(ra_); // too many variables; just compute it
                              // the hard way
}

int MachPrologNode::reloc() const
{
  return 0; // a large enough number
}

//=============================================================================
#ifndef PRODUCT
void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
{
  Compile* C = ra_->C;
  if (generate_vzeroupper(C)) {
    st->print("vzeroupper");
    st->cr(); st->print("\t");
  }

  int framesize = C->output()->frame_size_in_bytes();
  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  // Remove word for return adr already pushed
  // and RBP
  framesize -= 2*wordSize;

  if (framesize) {
    st->print_cr("addq    rsp, %d\t# Destroy frame", framesize);
    st->print("\t");
  }

  st->print_cr("popq    rbp");
  if (do_polling() && C->is_method_compilation()) {
    st->print("\t");
    st->print_cr("cmpq    rsp, poll_offset[r15_thread] \n\t"
                 "ja      #safepoint_stub\t"
                 "# Safepoint: poll for GC");
  }
}
#endif

void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
{
  Compile* C = ra_->C;
  MacroAssembler _masm(&cbuf);

  if (generate_vzeroupper(C)) {
    // Clear upper bits of YMM registers when current compiled code uses
    // wide vectors to avoid AVX <-> SSE transition penalty during call.
    __ vzeroupper();
  }

  int framesize = C->output()->frame_size_in_bytes();
  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
  // Remove word for return adr already pushed
  // and RBP
  framesize -= 2*wordSize;

  // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here

  if (framesize) {
    emit_opcode(cbuf, Assembler::REX_W);
    if (framesize < 0x80) {
      emit_opcode(cbuf, 0x83); // addq rsp, #framesize
      emit_rm(cbuf, 0x3, 0x00, RSP_enc);
      emit_d8(cbuf, framesize);
    } else {
      emit_opcode(cbuf, 0x81); // addq rsp, #framesize
      emit_rm(cbuf, 0x3, 0x00, RSP_enc);
      emit_d32(cbuf, framesize);
    }
  }

  // popq rbp
  emit_opcode(cbuf, 0x58 | RBP_enc);

  if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
    __ reserved_stack_check();
  }

  if (do_polling() && C->is_method_compilation()) {
    MacroAssembler _masm(&cbuf);
    Label dummy_label;
    Label* code_stub = &dummy_label;
    if (!C->output()->in_scratch_emit_size()) {
      code_stub = &C->output()->safepoint_poll_table()->add_safepoint(__ offset());
    }
    __ relocate(relocInfo::poll_return_type);
    __ safepoint_poll(*code_stub, r15_thread, true /* at_return */, true /* in_nmethod */);
  }
}

uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
{
  return MachNode::size(ra_); // too many variables; just compute it
                              // the hard way
}

int MachEpilogNode::reloc() const
{
  return 2; // a large enough number
}

const Pipeline* MachEpilogNode::pipeline() const
{
  return MachNode::pipeline_class();
}

//=============================================================================

enum RC {
  rc_bad,
  rc_int,
  rc_kreg,
  rc_float,
  rc_stack
};

static enum RC rc_class(OptoReg::Name reg)
{
  if( !OptoReg::is_valid(reg)  ) return rc_bad;

  if (OptoReg::is_stack(reg)) return rc_stack;

  VMReg r = OptoReg::as_VMReg(reg);

  if (r->is_Register()) return rc_int;

  if (r->is_KRegister()) return rc_kreg;

  assert(r->is_XMMRegister(), "must be");
  return rc_float;
}

// Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo,
                          int src_hi, int dst_hi, uint ireg, outputStream* st);

void vec_spill_helper(CodeBuffer *cbuf, bool is_load,
                     int stack_offset, int reg, uint ireg, outputStream* st);

static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
                                      int dst_offset, uint ireg, outputStream* st) {
  if (cbuf) {
    MacroAssembler _masm(cbuf);
    switch (ireg) {
    case Op_VecS:
      __ movq(Address(rsp, -8), rax);
      __ movl(rax, Address(rsp, src_offset));
      __ movl(Address(rsp, dst_offset), rax);
      __ movq(rax, Address(rsp, -8));
      break;
    case Op_VecD:
      __ pushq(Address(rsp, src_offset));
      __ popq (Address(rsp, dst_offset));
      break;
    case Op_VecX:
      __ pushq(Address(rsp, src_offset));
      __ popq (Address(rsp, dst_offset));
      __ pushq(Address(rsp, src_offset+8));
      __ popq (Address(rsp, dst_offset+8));
      break;
    case Op_VecY:
      __ vmovdqu(Address(rsp, -32), xmm0);
      __ vmovdqu(xmm0, Address(rsp, src_offset));
      __ vmovdqu(Address(rsp, dst_offset), xmm0);
      __ vmovdqu(xmm0, Address(rsp, -32));
      break;
    case Op_VecZ:
      __ evmovdquq(Address(rsp, -64), xmm0, 2);
      __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
      __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
      __ evmovdquq(xmm0, Address(rsp, -64), 2);
      break;
    default:
      ShouldNotReachHere();
    }
#ifndef PRODUCT
  } else {
    switch (ireg) {
    case Op_VecS:
      st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
                "movl    rax, [rsp + #%d]\n\t"
                "movl    [rsp + #%d], rax\n\t"
                "movq    rax, [rsp - #8]",
                src_offset, dst_offset);
      break;
    case Op_VecD:
      st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
                "popq    [rsp + #%d]",
                src_offset, dst_offset);
      break;
     case Op_VecX:
      st->print("pushq   [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
                "popq    [rsp + #%d]\n\t"
                "pushq   [rsp + #%d]\n\t"
                "popq    [rsp + #%d]",
                src_offset, dst_offset, src_offset+8, dst_offset+8);
      break;
    case Op_VecY:
      st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
                "vmovdqu xmm0, [rsp + #%d]\n\t"
                "vmovdqu [rsp + #%d], xmm0\n\t"
                "vmovdqu xmm0, [rsp - #32]",
                src_offset, dst_offset);
      break;
    case Op_VecZ:
      st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
                "vmovdqu xmm0, [rsp + #%d]\n\t"
                "vmovdqu [rsp + #%d], xmm0\n\t"
                "vmovdqu xmm0, [rsp - #64]",
                src_offset, dst_offset);
      break;
    default:
      ShouldNotReachHere();
    }
#endif
  }
}

uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
                                       PhaseRegAlloc* ra_,
                                       bool do_size,
                                       outputStream* st) const {
  assert(cbuf != NULL || st  != NULL, "sanity");
  // Get registers to move
  OptoReg::Name src_second = ra_->get_reg_second(in(1));
  OptoReg::Name src_first = ra_->get_reg_first(in(1));
  OptoReg::Name dst_second = ra_->get_reg_second(this);
  OptoReg::Name dst_first = ra_->get_reg_first(this);

  enum RC src_second_rc = rc_class(src_second);
  enum RC src_first_rc = rc_class(src_first);
  enum RC dst_second_rc = rc_class(dst_second);
  enum RC dst_first_rc = rc_class(dst_first);

  assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
         "must move at least 1 register" );

  if (src_first == dst_first && src_second == dst_second) {
    // Self copy, no move
    return 0;
  }
  if (bottom_type()->isa_vect() != NULL && bottom_type()->isa_vectmask() == NULL) {
    uint ireg = ideal_reg();
    assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
    assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
    if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
      // mem -> mem
      int src_offset = ra_->reg2offset(src_first);
      int dst_offset = ra_->reg2offset(dst_first);
      vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
    } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
      vec_mov_helper(cbuf, src_first, dst_first, src_second, dst_second, ireg, st);
    } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
      int stack_offset = ra_->reg2offset(dst_first);
      vec_spill_helper(cbuf, false, stack_offset, src_first, ireg, st);
    } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
      int stack_offset = ra_->reg2offset(src_first);
      vec_spill_helper(cbuf, true,  stack_offset, dst_first, ireg, st);
    } else {
      ShouldNotReachHere();
    }
    return 0;
  }
  if (src_first_rc == rc_stack) {
    // mem ->
    if (dst_first_rc == rc_stack) {
      // mem -> mem
      assert(src_second != dst_first, "overlap");
      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
        // 64-bit
        int src_offset = ra_->reg2offset(src_first);
        int dst_offset = ra_->reg2offset(dst_first);
        if (cbuf) {
          MacroAssembler _masm(cbuf);
          __ pushq(Address(rsp, src_offset));
          __ popq (Address(rsp, dst_offset));
#ifndef PRODUCT
        } else {
          st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
                    "popq    [rsp + #%d]",
                     src_offset, dst_offset);
#endif
        }
      } else {
        // 32-bit
        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
        // No pushl/popl, so:
        int src_offset = ra_->reg2offset(src_first);
        int dst_offset = ra_->reg2offset(dst_first);
        if (cbuf) {
          MacroAssembler _masm(cbuf);
          __ movq(Address(rsp, -8), rax);
          __ movl(rax, Address(rsp, src_offset));
          __ movl(Address(rsp, dst_offset), rax);
          __ movq(rax, Address(rsp, -8));
#ifndef PRODUCT
        } else {
          st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
                    "movl    rax, [rsp + #%d]\n\t"
                    "movl    [rsp + #%d], rax\n\t"
                    "movq    rax, [rsp - #8]",
                     src_offset, dst_offset);
#endif
        }
      }
      return 0;
    } else if (dst_first_rc == rc_int) {
      // mem -> gpr
      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
        // 64-bit
        int offset = ra_->reg2offset(src_first);
        if (cbuf) {
          MacroAssembler _masm(cbuf);
          __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
#ifndef PRODUCT
        } else {
          st->print("movq    %s, [rsp + #%d]\t# spill",
                     Matcher::regName[dst_first],
                     offset);
#endif
        }
      } else {
        // 32-bit
        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
        int offset = ra_->reg2offset(src_first);
        if (cbuf) {
          MacroAssembler _masm(cbuf);
          __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
#ifndef PRODUCT
        } else {
          st->print("movl    %s, [rsp + #%d]\t# spill",
                     Matcher::regName[dst_first],
                     offset);
#endif
        }
      }
      return 0;
    } else if (dst_first_rc == rc_float) {
      // mem-> xmm
      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
        // 64-bit
        int offset = ra_->reg2offset(src_first);
        if (cbuf) {
          MacroAssembler _masm(cbuf);
          __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
#ifndef PRODUCT
        } else {
          st->print("%s  %s, [rsp + #%d]\t# spill",
                     UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
                     Matcher::regName[dst_first],
                     offset);
#endif
        }
      } else {
        // 32-bit
        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
        int offset = ra_->reg2offset(src_first);
        if (cbuf) {
          MacroAssembler _masm(cbuf);
          __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
#ifndef PRODUCT
        } else {
          st->print("movss   %s, [rsp + #%d]\t# spill",
                     Matcher::regName[dst_first],
                     offset);
#endif
        }
      }
      return 0;
    } else if (dst_first_rc == rc_kreg) {
      // mem -> kreg
      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
        // 64-bit
        int offset = ra_->reg2offset(src_first);
        if (cbuf) {
          MacroAssembler _masm(cbuf);
          __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
#ifndef PRODUCT
        } else {
          st->print("kmovq   %s, [rsp + #%d]\t# spill",
                     Matcher::regName[dst_first],
                     offset);
#endif
        }
      }
      return 0;
    }
  } else if (src_first_rc == rc_int) {
    // gpr ->
    if (dst_first_rc == rc_stack) {
      // gpr -> mem
      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
        // 64-bit
        int offset = ra_->reg2offset(dst_first);
        if (cbuf) {
          MacroAssembler _masm(cbuf);
          __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
#ifndef PRODUCT
        } else {
          st->print("movq    [rsp + #%d], %s\t# spill",
                     offset,
                     Matcher::regName[src_first]);
#endif
        }
      } else {
        // 32-bit
        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
        int offset = ra_->reg2offset(dst_first);
        if (cbuf) {
          MacroAssembler _masm(cbuf);
          __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
#ifndef PRODUCT
        } else {
          st->print("movl    [rsp + #%d], %s\t# spill",
                     offset,
                     Matcher::regName[src_first]);
#endif
        }
      }
      return 0;
    } else if (dst_first_rc == rc_int) {
      // gpr -> gpr
      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
        // 64-bit
        if (cbuf) {
          MacroAssembler _masm(cbuf);
          __ movq(as_Register(Matcher::_regEncode[dst_first]),
                  as_Register(Matcher::_regEncode[src_first]));
#ifndef PRODUCT
        } else {
          st->print("movq    %s, %s\t# spill",
                     Matcher::regName[dst_first],
                     Matcher::regName[src_first]);
#endif
        }
        return 0;
      } else {
        // 32-bit
        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
        if (cbuf) {
          MacroAssembler _masm(cbuf);
          __ movl(as_Register(Matcher::_regEncode[dst_first]),
                  as_Register(Matcher::_regEncode[src_first]));
#ifndef PRODUCT
        } else {
          st->print("movl    %s, %s\t# spill",
                     Matcher::regName[dst_first],
                     Matcher::regName[src_first]);
#endif
        }
        return 0;
      }
    } else if (dst_first_rc == rc_float) {
      // gpr -> xmm
      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
        // 64-bit
        if (cbuf) {
          MacroAssembler _masm(cbuf);
          __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
#ifndef PRODUCT
        } else {
          st->print("movdq   %s, %s\t# spill",
                     Matcher::regName[dst_first],
                     Matcher::regName[src_first]);
#endif
        }
      } else {
        // 32-bit
        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
        if (cbuf) {
          MacroAssembler _masm(cbuf);
          __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
#ifndef PRODUCT
        } else {
          st->print("movdl   %s, %s\t# spill",
                     Matcher::regName[dst_first],
                     Matcher::regName[src_first]);
#endif
        }
      }
      return 0;
    } else if (dst_first_rc == rc_kreg) {
      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
        // 64-bit
        if (cbuf) {
          MacroAssembler _masm(cbuf);
          __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
  #ifndef PRODUCT
        } else {
           st->print("kmovq   %s, %s\t# spill",
                       Matcher::regName[dst_first],
                       Matcher::regName[src_first]);
  #endif
        }
      }
      Unimplemented();
      return 0;
    }
  } else if (src_first_rc == rc_float) {
    // xmm ->
    if (dst_first_rc == rc_stack) {
      // xmm -> mem
      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
        // 64-bit
        int offset = ra_->reg2offset(dst_first);
        if (cbuf) {
          MacroAssembler _masm(cbuf);
          __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
#ifndef PRODUCT
        } else {
          st->print("movsd   [rsp + #%d], %s\t# spill",
                     offset,
                     Matcher::regName[src_first]);
#endif
        }
      } else {
        // 32-bit
        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
        int offset = ra_->reg2offset(dst_first);
        if (cbuf) {
          MacroAssembler _masm(cbuf);
          __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
#ifndef PRODUCT
        } else {
          st->print("movss   [rsp + #%d], %s\t# spill",
                     offset,
                     Matcher::regName[src_first]);
#endif
        }
      }
      return 0;
    } else if (dst_first_rc == rc_int) {
      // xmm -> gpr
      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
        // 64-bit
        if (cbuf) {
          MacroAssembler _masm(cbuf);
          __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
#ifndef PRODUCT
        } else {
          st->print("movdq   %s, %s\t# spill",
                     Matcher::regName[dst_first],
                     Matcher::regName[src_first]);
#endif
        }
      } else {
        // 32-bit
        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
        if (cbuf) {
          MacroAssembler _masm(cbuf);
          __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
#ifndef PRODUCT
        } else {
          st->print("movdl   %s, %s\t# spill",
                     Matcher::regName[dst_first],
                     Matcher::regName[src_first]);
#endif
        }
      }
      return 0;
    } else if (dst_first_rc == rc_float) {
      // xmm -> xmm
      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
        // 64-bit
        if (cbuf) {
          MacroAssembler _masm(cbuf);
          __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
#ifndef PRODUCT
        } else {
          st->print("%s  %s, %s\t# spill",
                     UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
                     Matcher::regName[dst_first],
                     Matcher::regName[src_first]);
#endif
        }
      } else {
        // 32-bit
        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
        if (cbuf) {
          MacroAssembler _masm(cbuf);
          __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
#ifndef PRODUCT
        } else {
          st->print("%s  %s, %s\t# spill",
                     UseXmmRegToRegMoveAll ? "movaps" : "movss ",
                     Matcher::regName[dst_first],
                     Matcher::regName[src_first]);
#endif
        }
      }
      return 0;
    } else if (dst_first_rc == rc_kreg) {
      assert(false, "Illegal spilling");
      return 0;
    }
  } else if (src_first_rc == rc_kreg) {
    if (dst_first_rc == rc_stack) {
      // mem -> kreg
      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
        // 64-bit
        int offset = ra_->reg2offset(dst_first);
        if (cbuf) {
          MacroAssembler _masm(cbuf);
          __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
#ifndef PRODUCT
        } else {
          st->print("kmovq   [rsp + #%d] , %s\t# spill",
                     offset,
                     Matcher::regName[src_first]);
#endif
        }
      }
      return 0;
    } else if (dst_first_rc == rc_int) {
      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
        // 64-bit
        if (cbuf) {
          MacroAssembler _masm(cbuf);
          __ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
#ifndef PRODUCT
        } else {
         st->print("kmovq   %s, %s\t# spill",
                     Matcher::regName[dst_first],
                     Matcher::regName[src_first]);
#endif
        }
      }
      Unimplemented();
      return 0;
    } else if (dst_first_rc == rc_kreg) {
      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
        // 64-bit
        if (cbuf) {
          MacroAssembler _masm(cbuf);
          __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
#ifndef PRODUCT
        } else {
         st->print("kmovq   %s, %s\t# spill",
                     Matcher::regName[dst_first],
                     Matcher::regName[src_first]);
#endif
        }
      }
      return 0;
    } else if (dst_first_rc == rc_float) {
      assert(false, "Illegal spill");
      return 0;
    }
  }

  assert(0," foo ");
  Unimplemented();
  return 0;
}

#ifndef PRODUCT
void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
  implementation(NULL, ra_, false, st);
}
#endif

void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  implementation(&cbuf, ra_, false, NULL);
}

uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
  return MachNode::size(ra_);
}

//=============================================================================
#ifndef PRODUCT
void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
{
  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
  int reg = ra_->get_reg_first(this);
  st->print("leaq    %s, [rsp + #%d]\t# box lock",
            Matcher::regName[reg], offset);
}
#endif

void BoxLockNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
{
  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
  int reg = ra_->get_encode(this);
  if (offset >= 0x80) {
    emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
    emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
    emit_rm(cbuf, 0x2, reg & 7, 0x04);
    emit_rm(cbuf, 0x0, 0x04, RSP_enc);
    emit_d32(cbuf, offset);
  } else {
    emit_opcode(cbuf, reg < 8 ? Assembler::REX_W : Assembler::REX_WR);
    emit_opcode(cbuf, 0x8D); // LEA  reg,[SP+offset]
    emit_rm(cbuf, 0x1, reg & 7, 0x04);
    emit_rm(cbuf, 0x0, 0x04, RSP_enc);
    emit_d8(cbuf, offset);
  }
}

uint BoxLockNode::size(PhaseRegAlloc *ra_) const
{
  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
  return (offset < 0x80) ? 5 : 8; // REX
}

//=============================================================================
#ifndef PRODUCT
void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
{
  if (UseCompressedClassPointers) {
    st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
    st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
    st->print_cr("\tcmpq    rax, rscratch1\t # Inline cache check");
  } else {
    st->print_cr("\tcmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t"
                 "# Inline cache check");
  }
  st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
  st->print_cr("\tnop\t# nops to align entry point");
}
#endif

void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
{
  MacroAssembler masm(&cbuf);
  uint insts_size = cbuf.insts_size();
  if (UseCompressedClassPointers) {
    masm.load_klass(rscratch1, j_rarg0, rscratch2);
    masm.cmpptr(rax, rscratch1);
  } else {
    masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes()));
  }

  masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub()));

  /* WARNING these NOPs are critical so that verified entry point is properly
     4 bytes aligned for patching by NativeJump::patch_verified_entry() */
  int nops_cnt = 4 - ((cbuf.insts_size() - insts_size) & 0x3);
  if (OptoBreakpoint) {
    // Leave space for int3
    nops_cnt -= 1;
  }
  nops_cnt &= 0x3; // Do not add nops if code is aligned.
  if (nops_cnt > 0)
    masm.nop(nops_cnt);
}

uint MachUEPNode::size(PhaseRegAlloc* ra_) const
{
  return MachNode::size(ra_); // too many variables; just compute it
                              // the hard way
}

//=============================================================================

const bool Matcher::supports_vector_calling_convention(void) {
  if (EnableVectorSupport && UseVectorStubs) {
    return true;
  }
  return false;
}

OptoRegPair Matcher::vector_return_value(uint ideal_reg) {
  assert(EnableVectorSupport && UseVectorStubs, "sanity");
  int lo = XMM0_num;
  int hi = XMM0b_num;
  if (ideal_reg == Op_VecX) hi = XMM0d_num;
  else if (ideal_reg == Op_VecY) hi = XMM0h_num;
  else if (ideal_reg == Op_VecZ) hi = XMM0p_num;
  return OptoRegPair(hi, lo);
}

// Is this branch offset short enough that a short branch can be used?
//
// NOTE: If the platform does not provide any short branch variants, then
//       this method should return false for offset 0.
bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
  // The passed offset is relative to address of the branch.
  // On 86 a branch displacement is calculated relative to address
  // of a next instruction.
  offset -= br_size;

  // the short version of jmpConUCF2 contains multiple branches,
  // making the reach slightly less
  if (rule == jmpConUCF2_rule)
    return (-126 <= offset && offset <= 125);
  return (-128 <= offset && offset <= 127);
}

// Return whether or not this register is ever used as an argument.
// This function is used on startup to build the trampoline stubs in
// generateOptoStub.  Registers not mentioned will be killed by the VM
// call in the trampoline, and arguments in those registers not be
// available to the callee.
bool Matcher::can_be_java_arg(int reg)
{
  return
    reg ==  RDI_num || reg == RDI_H_num ||
    reg ==  RSI_num || reg == RSI_H_num ||
    reg ==  RDX_num || reg == RDX_H_num ||
    reg ==  RCX_num || reg == RCX_H_num ||
    reg ==   R8_num || reg ==  R8_H_num ||
    reg ==   R9_num || reg ==  R9_H_num ||
    reg ==  R12_num || reg == R12_H_num ||
    reg == XMM0_num || reg == XMM0b_num ||
    reg == XMM1_num || reg == XMM1b_num ||
    reg == XMM2_num || reg == XMM2b_num ||
    reg == XMM3_num || reg == XMM3b_num ||
    reg == XMM4_num || reg == XMM4b_num ||
    reg == XMM5_num || reg == XMM5b_num ||
    reg == XMM6_num || reg == XMM6b_num ||
    reg == XMM7_num || reg == XMM7b_num;
}

bool Matcher::is_spillable_arg(int reg)
{
  return can_be_java_arg(reg);
}

uint Matcher::int_pressure_limit()
{
  return (INTPRESSURE == -1) ? _INT_REG_mask.Size() : INTPRESSURE;
}

uint Matcher::float_pressure_limit()
{
  // After experiment around with different values, the following default threshold
  // works best for LCM's register pressure scheduling on x64.
  uint dec_count  = VM_Version::supports_evex() ? 4 : 2;
  uint default_float_pressure_threshold = _FLOAT_REG_mask.Size() - dec_count;
  return (FLOATPRESSURE == -1) ? default_float_pressure_threshold : FLOATPRESSURE;
}

bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
  // In 64 bit mode a code which use multiply when
  // devisor is constant is faster than hardware
  // DIV instruction (it uses MulHiL).
  return false;
}

// Register for DIVI projection of divmodI
RegMask Matcher::divI_proj_mask() {
  return INT_RAX_REG_mask();
}

// Register for MODI projection of divmodI
RegMask Matcher::modI_proj_mask() {
  return INT_RDX_REG_mask();
}

// Register for DIVL projection of divmodL
RegMask Matcher::divL_proj_mask() {
  return LONG_RAX_REG_mask();
}

// Register for MODL projection of divmodL
RegMask Matcher::modL_proj_mask() {
  return LONG_RDX_REG_mask();
}

// Register for saving SP into on method handle invokes. Not used on x86_64.
const RegMask Matcher::method_handle_invoke_SP_save_mask() {
    return NO_REG_mask();
}

%}

//----------ENCODING BLOCK-----------------------------------------------------
// This block specifies the encoding classes used by the compiler to
// output byte streams.  Encoding classes are parameterized macros
// used by Machine Instruction Nodes in order to generate the bit
// encoding of the instruction.  Operands specify their base encoding
// interface with the interface keyword.  There are currently
// supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
// COND_INTER.  REG_INTER causes an operand to generate a function
// which returns its register number when queried.  CONST_INTER causes
// an operand to generate a function which returns the value of the
// constant when queried.  MEMORY_INTER causes an operand to generate
// four functions which return the Base Register, the Index Register,
// the Scale Value, and the Offset Value of the operand when queried.
// COND_INTER causes an operand to generate six functions which return
// the encoding code (ie - encoding bits for the instruction)
// associated with each basic boolean condition for a conditional
// instruction.
//
// Instructions specify two basic values for encoding.  Again, a
// function is available to check if the constant displacement is an
// oop. They use the ins_encode keyword to specify their encoding
// classes (which must be a sequence of enc_class names, and their
// parameters, specified in the encoding block), and they use the
// opcode keyword to specify, in order, their primary, secondary, and
// tertiary opcode.  Only the opcode sections which a particular
// instruction needs for encoding need to be specified.
encode %{
  // Build emit functions for each basic byte or larger field in the
  // intel encoding scheme (opcode, rm, sib, immediate), and call them
  // from C++ code in the enc_class source block.  Emit functions will
  // live in the main source block for now.  In future, we can
  // generalize this by adding a syntax that specifies the sizes of
  // fields in an order, so that the adlc can build the emit functions
  // automagically

  // Emit primary opcode
  enc_class OpcP
  %{
    emit_opcode(cbuf, $primary);
  %}

  // Emit secondary opcode
  enc_class OpcS
  %{
    emit_opcode(cbuf, $secondary);
  %}

  // Emit tertiary opcode
  enc_class OpcT
  %{
    emit_opcode(cbuf, $tertiary);
  %}

  // Emit opcode directly
  enc_class Opcode(immI d8)
  %{
    emit_opcode(cbuf, $d8$$constant);
  %}

  // Emit size prefix
  enc_class SizePrefix
  %{
    emit_opcode(cbuf, 0x66);
  %}

  enc_class reg(rRegI reg)
  %{
    emit_rm(cbuf, 0x3, 0, $reg$$reg & 7);
  %}

  enc_class reg_reg(rRegI dst, rRegI src)
  %{
    emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
  %}

  enc_class opc_reg_reg(immI opcode, rRegI dst, rRegI src)
  %{
    emit_opcode(cbuf, $opcode$$constant);
    emit_rm(cbuf, 0x3, $dst$$reg & 7, $src$$reg & 7);
  %}

  enc_class cdql_enc(no_rax_rdx_RegI div)
  %{
    // Full implementation of Java idiv and irem; checks for
    // special case as described in JVM spec., p.243 & p.271.
    //
    //         normal case                           special case
    //
    // input : rax: dividend                         min_int
    //         reg: divisor                          -1
    //
    // output: rax: quotient  (= rax idiv reg)       min_int
    //         rdx: remainder (= rax irem reg)       0
    //
    //  Code sequnce:
    //
    //    0:   3d 00 00 00 80          cmp    $0x80000000,%eax
    //    5:   75 07/08                jne    e <normal>
    //    7:   33 d2                   xor    %edx,%edx
    //  [div >= 8 -> offset + 1]
    //  [REX_B]
    //    9:   83 f9 ff                cmp    $0xffffffffffffffff,$div
    //    c:   74 03/04                je     11 <done>
    // 000000000000000e <normal>:
    //    e:   99                      cltd
    //  [div >= 8 -> offset + 1]
    //  [REX_B]
    //    f:   f7 f9                   idiv   $div
    // 0000000000000011 <done>:
    MacroAssembler _masm(&cbuf);
    Label normal;
    Label done;

    // cmp    $0x80000000,%eax
    __ cmpl(as_Register(RAX_enc), 0x80000000);

    // jne    e <normal>
    __ jccb(Assembler::notEqual, normal);

    // xor    %edx,%edx
    __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));

    // cmp    $0xffffffffffffffff,%ecx
    __ cmpl($div$$Register, -1);

    // je     11 <done>
    __ jccb(Assembler::equal, done);

    // <normal>
    // cltd
    __ bind(normal);
    __ cdql();

    // idivl
    // <done>
    __ idivl($div$$Register);
    __ bind(done);
  %}

  enc_class cdqq_enc(no_rax_rdx_RegL div)
  %{
    // Full implementation of Java ldiv and lrem; checks for
    // special case as described in JVM spec., p.243 & p.271.
    //
    //         normal case                           special case
    //
    // input : rax: dividend                         min_long
    //         reg: divisor                          -1
    //
    // output: rax: quotient  (= rax idiv reg)       min_long
    //         rdx: remainder (= rax irem reg)       0
    //
    //  Code sequnce:
    //
    //    0:   48 ba 00 00 00 00 00    mov    $0x8000000000000000,%rdx
    //    7:   00 00 80
    //    a:   48 39 d0                cmp    %rdx,%rax
    //    d:   75 08                   jne    17 <normal>
    //    f:   33 d2                   xor    %edx,%edx
    //   11:   48 83 f9 ff             cmp    $0xffffffffffffffff,$div
    //   15:   74 05                   je     1c <done>
    // 0000000000000017 <normal>:
    //   17:   48 99                   cqto
    //   19:   48 f7 f9                idiv   $div
    // 000000000000001c <done>:
    MacroAssembler _masm(&cbuf);
    Label normal;
    Label done;

    // mov    $0x8000000000000000,%rdx
    __ mov64(as_Register(RDX_enc), 0x8000000000000000);

    // cmp    %rdx,%rax
    __ cmpq(as_Register(RAX_enc), as_Register(RDX_enc));

    // jne    17 <normal>
    __ jccb(Assembler::notEqual, normal);

    // xor    %edx,%edx
    __ xorl(as_Register(RDX_enc), as_Register(RDX_enc));

    // cmp    $0xffffffffffffffff,$div
    __ cmpq($div$$Register, -1);

    // je     1e <done>
    __ jccb(Assembler::equal, done);

    // <normal>
    // cqto
    __ bind(normal);
    __ cdqq();

    // idivq (note: must be emitted by the user of this rule)
    // <done>
    __ idivq($div$$Register);
    __ bind(done);
  %}

  // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
  enc_class OpcSE(immI imm)
  %{
    // Emit primary opcode and set sign-extend bit
    // Check for 8-bit immediate, and set sign extend bit in opcode
    if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
      emit_opcode(cbuf, $primary | 0x02);
    } else {
      // 32-bit immediate
      emit_opcode(cbuf, $primary);
    }
  %}

  enc_class OpcSErm(rRegI dst, immI imm)
  %{
    // OpcSEr/m
    int dstenc = $dst$$reg;
    if (dstenc >= 8) {
      emit_opcode(cbuf, Assembler::REX_B);
      dstenc -= 8;
    }
    // Emit primary opcode and set sign-extend bit
    // Check for 8-bit immediate, and set sign extend bit in opcode
    if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
      emit_opcode(cbuf, $primary | 0x02);
    } else {
      // 32-bit immediate
      emit_opcode(cbuf, $primary);
    }
    // Emit r/m byte with secondary opcode, after primary opcode.
    emit_rm(cbuf, 0x3, $secondary, dstenc);
  %}

  enc_class OpcSErm_wide(rRegL dst, immI imm)
  %{
    // OpcSEr/m
    int dstenc = $dst$$reg;
    if (dstenc < 8) {
      emit_opcode(cbuf, Assembler::REX_W);
    } else {
      emit_opcode(cbuf, Assembler::REX_WB);
      dstenc -= 8;
    }
    // Emit primary opcode and set sign-extend bit
    // Check for 8-bit immediate, and set sign extend bit in opcode
    if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
      emit_opcode(cbuf, $primary | 0x02);
    } else {
      // 32-bit immediate
      emit_opcode(cbuf, $primary);
    }
    // Emit r/m byte with secondary opcode, after primary opcode.
    emit_rm(cbuf, 0x3, $secondary, dstenc);
  %}

  enc_class Con8or32(immI imm)
  %{
    // Check for 8-bit immediate, and set sign extend bit in opcode
    if (-0x80 <= $imm$$constant && $imm$$constant < 0x80) {
      $$$emit8$imm$$constant;
    } else {
      // 32-bit immediate
      $$$emit32$imm$$constant;
    }
  %}

  enc_class opc2_reg(rRegI dst)
  %{
    // BSWAP
    emit_cc(cbuf, $secondary, $dst$$reg);
  %}

  enc_class opc3_reg(rRegI dst)
  %{
    // BSWAP
    emit_cc(cbuf, $tertiary, $dst$$reg);
  %}

  enc_class reg_opc(rRegI div)
  %{
    // INC, DEC, IDIV, IMOD, JMP indirect, ...
    emit_rm(cbuf, 0x3, $secondary, $div$$reg & 7);
  %}

  enc_class enc_cmov(cmpOp cop)
  %{
    // CMOV
    $$$emit8$primary;
    emit_cc(cbuf, $secondary, $cop$$cmpcode);
  %}

  enc_class enc_PartialSubtypeCheck()
  %{
    Register Rrdi = as_Register(RDI_enc); // result register
    Register Rrax = as_Register(RAX_enc); // super class
    Register Rrcx = as_Register(RCX_enc); // killed
    Register Rrsi = as_Register(RSI_enc); // sub class
    Label miss;
    const bool set_cond_codes = true;

    MacroAssembler _masm(&cbuf);
    __ check_klass_subtype_slow_path(Rrsi, Rrax, Rrcx, Rrdi,
                                     NULL, &miss,
                                     /*set_cond_codes:*/ true);
    if ($primary) {
      __ xorptr(Rrdi, Rrdi);
    }
    __ bind(miss);
  %}

  enc_class clear_avx %{
    debug_only(int off0 = cbuf.insts_size());
    if (generate_vzeroupper(Compile::current())) {
      // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
      // Clear upper bits of YMM registers when current compiled code uses
      // wide vectors to avoid AVX <-> SSE transition penalty during call.
      MacroAssembler _masm(&cbuf);
      __ vzeroupper();
    }
    debug_only(int off1 = cbuf.insts_size());
    assert(off1 - off0 == clear_avx_size(), "correct size prediction");
  %}

  enc_class Java_To_Runtime(method meth) %{
    // No relocation needed
    MacroAssembler _masm(&cbuf);
    __ mov64(r10, (int64_t) $meth$$method);
    __ call(r10);
    __ post_call_nop();
  %}

  enc_class Java_Static_Call(method meth)
  %{
    // JAVA STATIC CALL
    // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
    // determine who we intended to call.
    MacroAssembler _masm(&cbuf);
    cbuf.set_insts_mark();

    if (!_method) {
      $$$emit8$primary;
      emit_d32_reloc(cbuf, (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
                     runtime_call_Relocation::spec(),
                     RELOC_DISP32);
    } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
      // The NOP here is purely to ensure that eliding a call to
      // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
      __ addr_nop_5();
      __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
    } else {
      $$$emit8$primary;
      int method_index = resolved_method_index(cbuf);
      RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
                                                  : static_call_Relocation::spec(method_index);
      emit_d32_reloc(cbuf, (int) ($meth$$method - ((intptr_t) cbuf.insts_end()) - 4),
                     rspec, RELOC_DISP32);
      address mark = cbuf.insts_mark();
      if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) {
        // Calls of the same statically bound method can share
        // a stub to the interpreter.
        cbuf.shared_stub_to_interp_for(_method, cbuf.insts()->mark_off());
      } else {
        // Emit stubs for static call.
        address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, mark);
        if (stub == NULL) {
          ciEnv::current()->record_failure("CodeCache is full");
          return;
        }
      }
    }
    _masm.clear_inst_mark();
    __ post_call_nop();
  %}

  enc_class Java_Dynamic_Call(method meth) %{
    MacroAssembler _masm(&cbuf);
    __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
    __ post_call_nop();
  %}

  enc_class reg_opc_imm(rRegI dst, immI8 shift)
  %{
    // SAL, SAR, SHR
    int dstenc = $dst$$reg;
    if (dstenc >= 8) {
      emit_opcode(cbuf, Assembler::REX_B);
      dstenc -= 8;
    }
    $$$emit8$primary;
    emit_rm(cbuf, 0x3, $secondary, dstenc);
    $$$emit8$shift$$constant;
  %}

  enc_class reg_opc_imm_wide(rRegL dst, immI8 shift)
  %{
    // SAL, SAR, SHR
    int dstenc = $dst$$reg;
    if (dstenc < 8) {
      emit_opcode(cbuf, Assembler::REX_W);
    } else {
      emit_opcode(cbuf, Assembler::REX_WB);
      dstenc -= 8;
    }
    $$$emit8$primary;
    emit_rm(cbuf, 0x3, $secondary, dstenc);
    $$$emit8$shift$$constant;
  %}

  enc_class load_immI(rRegI dst, immI src)
  %{
    int dstenc = $dst$$reg;
    if (dstenc >= 8) {
      emit_opcode(cbuf, Assembler::REX_B);
      dstenc -= 8;
    }
    emit_opcode(cbuf, 0xB8 | dstenc);
    $$$emit32$src$$constant;
  %}

  enc_class load_immL(rRegL dst, immL src)
  %{
    int dstenc = $dst$$reg;
    if (dstenc < 8) {
      emit_opcode(cbuf, Assembler::REX_W);
    } else {
      emit_opcode(cbuf, Assembler::REX_WB);
      dstenc -= 8;
    }
    emit_opcode(cbuf, 0xB8 | dstenc);
    emit_d64(cbuf, $src$$constant);
  %}

  enc_class load_immUL32(rRegL dst, immUL32 src)
  %{
    // same as load_immI, but this time we care about zeroes in the high word
    int dstenc = $dst$$reg;
    if (dstenc >= 8) {
      emit_opcode(cbuf, Assembler::REX_B);
      dstenc -= 8;
    }
    emit_opcode(cbuf, 0xB8 | dstenc);
    $$$emit32$src$$constant;
  %}

  enc_class load_immL32(rRegL dst, immL32 src)
  %{
    int dstenc = $dst$$reg;
    if (dstenc < 8) {
      emit_opcode(cbuf, Assembler::REX_W);
    } else {
      emit_opcode(cbuf, Assembler::REX_WB);
      dstenc -= 8;
    }
    emit_opcode(cbuf, 0xC7);
    emit_rm(cbuf, 0x03, 0x00, dstenc);
    $$$emit32$src$$constant;
  %}

  enc_class load_immP31(rRegP dst, immP32 src)
  %{
    // same as load_immI, but this time we care about zeroes in the high word
    int dstenc = $dst$$reg;
    if (dstenc >= 8) {
      emit_opcode(cbuf, Assembler::REX_B);
      dstenc -= 8;
    }
    emit_opcode(cbuf, 0xB8 | dstenc);
    $$$emit32$src$$constant;
  %}

  enc_class load_immP(rRegP dst, immP src)
  %{
    int dstenc = $dst$$reg;
    if (dstenc < 8) {
      emit_opcode(cbuf, Assembler::REX_W);
    } else {
      emit_opcode(cbuf, Assembler::REX_WB);
      dstenc -= 8;
    }
    emit_opcode(cbuf, 0xB8 | dstenc);
    // This next line should be generated from ADLC
    if ($src->constant_reloc() != relocInfo::none) {
      emit_d64_reloc(cbuf, $src$$constant, $src->constant_reloc(), RELOC_IMM64);
    } else {
      emit_d64(cbuf, $src$$constant);
    }
  %}

  enc_class Con32(immI src)
  %{
    // Output immediate
    $$$emit32$src$$constant;
  %}

  enc_class Con32F_as_bits(immF src)
  %{
    // Output Float immediate bits
    jfloat jf = $src$$constant;
    jint jf_as_bits = jint_cast(jf);
    emit_d32(cbuf, jf_as_bits);
  %}

  enc_class Con16(immI src)
  %{
    // Output immediate
    $$$emit16$src$$constant;
  %}

  // How is this different from Con32??? XXX
  enc_class Con_d32(immI src)
  %{
    emit_d32(cbuf,$src$$constant);
  %}

  enc_class conmemref (rRegP t1) %{    // Con32(storeImmI)
    // Output immediate memory reference
    emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
    emit_d32(cbuf, 0x00);
  %}

  enc_class lock_prefix()
  %{
    emit_opcode(cbuf, 0xF0); // lock
  %}

  enc_class REX_mem(memory mem)
  %{
    if ($mem$$base >= 8) {
      if ($mem$$index < 8) {
        emit_opcode(cbuf, Assembler::REX_B);
      } else {
        emit_opcode(cbuf, Assembler::REX_XB);
      }
    } else {
      if ($mem$$index >= 8) {
        emit_opcode(cbuf, Assembler::REX_X);
      }
    }
  %}

  enc_class REX_mem_wide(memory mem)
  %{
    if ($mem$$base >= 8) {
      if ($mem$$index < 8) {
        emit_opcode(cbuf, Assembler::REX_WB);
      } else {
        emit_opcode(cbuf, Assembler::REX_WXB);
      }
    } else {
      if ($mem$$index < 8) {
        emit_opcode(cbuf, Assembler::REX_W);
      } else {
        emit_opcode(cbuf, Assembler::REX_WX);
      }
    }
  %}

  // for byte regs
  enc_class REX_breg(rRegI reg)
  %{
    if ($reg$$reg >= 4) {
      emit_opcode(cbuf, $reg$$reg < 8 ? Assembler::REX : Assembler::REX_B);
    }
  %}

  // for byte regs
  enc_class REX_reg_breg(rRegI dst, rRegI src)
  %{
    if ($dst$$reg < 8) {
      if ($src$$reg >= 4) {
        emit_opcode(cbuf, $src$$reg < 8 ? Assembler::REX : Assembler::REX_B);
      }
    } else {
      if ($src$$reg < 8) {
        emit_opcode(cbuf, Assembler::REX_R);
      } else {
        emit_opcode(cbuf, Assembler::REX_RB);
      }
    }
  %}

  // for byte regs
  enc_class REX_breg_mem(rRegI reg, memory mem)
  %{
    if ($reg$$reg < 8) {
      if ($mem$$base < 8) {
        if ($mem$$index >= 8) {
          emit_opcode(cbuf, Assembler::REX_X);
        } else if ($reg$$reg >= 4) {
          emit_opcode(cbuf, Assembler::REX);
        }
      } else {
        if ($mem$$index < 8) {
          emit_opcode(cbuf, Assembler::REX_B);
        } else {
          emit_opcode(cbuf, Assembler::REX_XB);
        }
      }
    } else {
      if ($mem$$base < 8) {
        if ($mem$$index < 8) {
          emit_opcode(cbuf, Assembler::REX_R);
        } else {
          emit_opcode(cbuf, Assembler::REX_RX);
        }
      } else {
        if ($mem$$index < 8) {
          emit_opcode(cbuf, Assembler::REX_RB);
        } else {
          emit_opcode(cbuf, Assembler::REX_RXB);
        }
      }
    }
  %}

  enc_class REX_reg(rRegI reg)
  %{
    if ($reg$$reg >= 8) {
      emit_opcode(cbuf, Assembler::REX_B);
    }
  %}

  enc_class REX_reg_wide(rRegI reg)
  %{
    if ($reg$$reg < 8) {
      emit_opcode(cbuf, Assembler::REX_W);
    } else {
      emit_opcode(cbuf, Assembler::REX_WB);
    }
  %}

  enc_class REX_reg_reg(rRegI dst, rRegI src)
  %{
    if ($dst$$reg < 8) {
      if ($src$$reg >= 8) {
        emit_opcode(cbuf, Assembler::REX_B);
      }
    } else {
      if ($src$$reg < 8) {
        emit_opcode(cbuf, Assembler::REX_R);
      } else {
        emit_opcode(cbuf, Assembler::REX_RB);
      }
    }
  %}

  enc_class REX_reg_reg_wide(rRegI dst, rRegI src)
  %{
    if ($dst$$reg < 8) {
      if ($src$$reg < 8) {
        emit_opcode(cbuf, Assembler::REX_W);
      } else {
        emit_opcode(cbuf, Assembler::REX_WB);
      }
    } else {
      if ($src$$reg < 8) {
        emit_opcode(cbuf, Assembler::REX_WR);
      } else {
        emit_opcode(cbuf, Assembler::REX_WRB);
      }
    }
  %}

  enc_class REX_reg_mem(rRegI reg, memory mem)
  %{
    if ($reg$$reg < 8) {
      if ($mem$$base < 8) {
        if ($mem$$index >= 8) {
          emit_opcode(cbuf, Assembler::REX_X);
        }
      } else {
        if ($mem$$index < 8) {
          emit_opcode(cbuf, Assembler::REX_B);
        } else {
          emit_opcode(cbuf, Assembler::REX_XB);
        }
      }
    } else {
      if ($mem$$base < 8) {
        if ($mem$$index < 8) {
          emit_opcode(cbuf, Assembler::REX_R);
        } else {
          emit_opcode(cbuf, Assembler::REX_RX);
        }
      } else {
        if ($mem$$index < 8) {
          emit_opcode(cbuf, Assembler::REX_RB);
        } else {
          emit_opcode(cbuf, Assembler::REX_RXB);
        }
      }
    }
  %}

  enc_class REX_reg_mem_wide(rRegL reg, memory mem)
  %{
    if ($reg$$reg < 8) {
      if ($mem$$base < 8) {
        if ($mem$$index < 8) {
          emit_opcode(cbuf, Assembler::REX_W);
        } else {
          emit_opcode(cbuf, Assembler::REX_WX);
        }
      } else {
        if ($mem$$index < 8) {
          emit_opcode(cbuf, Assembler::REX_WB);
        } else {
          emit_opcode(cbuf, Assembler::REX_WXB);
        }
      }
    } else {
      if ($mem$$base < 8) {
        if ($mem$$index < 8) {
          emit_opcode(cbuf, Assembler::REX_WR);
        } else {
          emit_opcode(cbuf, Assembler::REX_WRX);
        }
      } else {
        if ($mem$$index < 8) {
          emit_opcode(cbuf, Assembler::REX_WRB);
        } else {
          emit_opcode(cbuf, Assembler::REX_WRXB);
        }
      }
    }
  %}

  enc_class reg_mem(rRegI ereg, memory mem)
  %{
    // High registers handle in encode_RegMem
    int reg = $ereg$$reg;
    int base = $mem$$base;
    int index = $mem$$index;
    int scale = $mem$$scale;
    int disp = $mem$$disp;
    relocInfo::relocType disp_reloc = $mem->disp_reloc();

    encode_RegMem(cbuf, reg, base, index, scale, disp, disp_reloc);
  %}

  enc_class RM_opc_mem(immI rm_opcode, memory mem)
  %{
    int rm_byte_opcode = $rm_opcode$$constant;

    // High registers handle in encode_RegMem
    int base = $mem$$base;
    int index = $mem$$index;
    int scale = $mem$$scale;
    int displace = $mem$$disp;

    relocInfo::relocType disp_reloc = $mem->disp_reloc();       // disp-as-oop when
                                            // working with static
                                            // globals
    encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace,
                  disp_reloc);
  %}

  enc_class reg_lea(rRegI dst, rRegI src0, immI src1)
  %{
    int reg_encoding = $dst$$reg;
    int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
    int index        = 0x04;            // 0x04 indicates no index
    int scale        = 0x00;            // 0x00 indicates no scale
    int displace     = $src1$$constant; // 0x00 indicates no displacement
    relocInfo::relocType disp_reloc = relocInfo::none;
    encode_RegMem(cbuf, reg_encoding, base, index, scale, displace,
                  disp_reloc);
  %}

  enc_class neg_reg(rRegI dst)
  %{
    int dstenc = $dst$$reg;
    if (dstenc >= 8) {
      emit_opcode(cbuf, Assembler::REX_B);
      dstenc -= 8;
    }
    // NEG $dst
    emit_opcode(cbuf, 0xF7);
    emit_rm(cbuf, 0x3, 0x03, dstenc);
  %}

  enc_class neg_reg_wide(rRegI dst)
  %{
    int dstenc = $dst$$reg;
    if (dstenc < 8) {
      emit_opcode(cbuf, Assembler::REX_W);
    } else {
      emit_opcode(cbuf, Assembler::REX_WB);
      dstenc -= 8;
    }
    // NEG $dst
    emit_opcode(cbuf, 0xF7);
    emit_rm(cbuf, 0x3, 0x03, dstenc);
  %}

  enc_class setLT_reg(rRegI dst)
  %{
    int dstenc = $dst$$reg;
    if (dstenc >= 8) {
      emit_opcode(cbuf, Assembler::REX_B);
      dstenc -= 8;
    } else if (dstenc >= 4) {
      emit_opcode(cbuf, Assembler::REX);
    }
    // SETLT $dst
    emit_opcode(cbuf, 0x0F);
    emit_opcode(cbuf, 0x9C);
    emit_rm(cbuf, 0x3, 0x0, dstenc);
  %}

  enc_class setNZ_reg(rRegI dst)
  %{
    int dstenc = $dst$$reg;
    if (dstenc >= 8) {
      emit_opcode(cbuf, Assembler::REX_B);
      dstenc -= 8;
    } else if (dstenc >= 4) {
      emit_opcode(cbuf, Assembler::REX);
    }
    // SETNZ $dst
    emit_opcode(cbuf, 0x0F);
    emit_opcode(cbuf, 0x95);
    emit_rm(cbuf, 0x3, 0x0, dstenc);
  %}

  // Compare the lonogs and set -1, 0, or 1 into dst
  enc_class cmpl3_flag(rRegL src1, rRegL src2, rRegI dst)
  %{
    int src1enc = $src1$$reg;
    int src2enc = $src2$$reg;
    int dstenc = $dst$$reg;

    // cmpq $src1, $src2
    if (src1enc < 8) {
      if (src2enc < 8) {
        emit_opcode(cbuf, Assembler::REX_W);
      } else {
        emit_opcode(cbuf, Assembler::REX_WB);
      }
    } else {
      if (src2enc < 8) {
        emit_opcode(cbuf, Assembler::REX_WR);
      } else {
        emit_opcode(cbuf, Assembler::REX_WRB);
      }
    }
    emit_opcode(cbuf, 0x3B);
    emit_rm(cbuf, 0x3, src1enc & 7, src2enc & 7);

    // movl $dst, -1
    if (dstenc >= 8) {
      emit_opcode(cbuf, Assembler::REX_B);
    }
    emit_opcode(cbuf, 0xB8 | (dstenc & 7));
    emit_d32(cbuf, -1);

    // jl,s done
    emit_opcode(cbuf, 0x7C);
    emit_d8(cbuf, dstenc < 4 ? 0x06 : 0x08);

    // setne $dst
    if (dstenc >= 4) {
      emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_B);
    }
    emit_opcode(cbuf, 0x0F);
    emit_opcode(cbuf, 0x95);
    emit_opcode(cbuf, 0xC0 | (dstenc & 7));

    // movzbl $dst, $dst
    if (dstenc >= 4) {
      emit_opcode(cbuf, dstenc < 8 ? Assembler::REX : Assembler::REX_RB);
    }
    emit_opcode(cbuf, 0x0F);
    emit_opcode(cbuf, 0xB6);
    emit_rm(cbuf, 0x3, dstenc & 7, dstenc & 7);
  %}

  enc_class Push_ResultXD(regD dst) %{
    MacroAssembler _masm(&cbuf);
    __ fstp_d(Address(rsp, 0));
    __ movdbl($dst$$XMMRegister, Address(rsp, 0));
    __ addptr(rsp, 8);
  %}

  enc_class Push_SrcXD(regD src) %{
    MacroAssembler _masm(&cbuf);
    __ subptr(rsp, 8);
    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
    __ fld_d(Address(rsp, 0));
  %}

  enc_class enc_rethrow()
  %{
    cbuf.set_insts_mark();
    emit_opcode(cbuf, 0xE9); // jmp entry
    emit_d32_reloc(cbuf,
                   (int) (OptoRuntime::rethrow_stub() - cbuf.insts_end() - 4),
                   runtime_call_Relocation::spec(),
                   RELOC_DISP32);
  %}

%}

//----------FRAME--------------------------------------------------------------
// Definition of frame structure and management information.
//
//  S T A C K   L A Y O U T    Allocators stack-slot number
//                             |   (to get allocators register number
//  G  Owned by    |        |  v    add OptoReg::stack0())
//  r   CALLER     |        |
//  o     |        +--------+      pad to even-align allocators stack-slot
//  w     V        |  pad0  |        numbers; owned by CALLER
//  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
//  h     ^        |   in   |  5
//        |        |  args  |  4   Holes in incoming args owned by SELF
//  |     |        |        |  3
//  |     |        +--------+
//  V     |        | old out|      Empty on Intel, window on Sparc
//        |    old |preserve|      Must be even aligned.
//        |     SP-+--------+----> Matcher::_old_SP, even aligned
//        |        |   in   |  3   area for Intel ret address
//     Owned by    |preserve|      Empty on Sparc.
//       SELF      +--------+
//        |        |  pad2  |  2   pad to align old SP
//        |        +--------+  1
//        |        | locks  |  0
//        |        +--------+----> OptoReg::stack0(), even aligned
//        |        |  pad1  | 11   pad to align new SP
//        |        +--------+
//        |        |        | 10
//        |        | spills |  9   spills
//        V        |        |  8   (pad0 slot for callee)
//      -----------+--------+----> Matcher::_out_arg_limit, unaligned
//        ^        |  out   |  7
//        |        |  args  |  6   Holes in outgoing args owned by CALLEE
//     Owned by    +--------+
//      CALLEE     | new out|  6   Empty on Intel, window on Sparc
//        |    new |preserve|      Must be even-aligned.
//        |     SP-+--------+----> Matcher::_new_SP, even aligned
//        |        |        |
//
// Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
//         known from SELF's arguments and the Java calling convention.
//         Region 6-7 is determined per call site.
// Note 2: If the calling convention leaves holes in the incoming argument
//         area, those holes are owned by SELF.  Holes in the outgoing area
//         are owned by the CALLEE.  Holes should not be necessary in the
//         incoming area, as the Java calling convention is completely under
//         the control of the AD file.  Doubles can be sorted and packed to
//         avoid holes.  Holes in the outgoing arguments may be necessary for
//         varargs C calling conventions.
// Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
//         even aligned with pad0 as needed.
//         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
//         region 6-11 is even aligned; it may be padded out more so that
//         the region from SP to FP meets the minimum stack alignment.
// Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
//         alignment.  Region 11, pad1, may be dynamically extended so that
//         SP meets the minimum alignment.

frame
%{
  // These three registers define part of the calling convention
  // between compiled code and the interpreter.
  inline_cache_reg(RAX);                // Inline Cache Register

  // Optional: name the operand used by cisc-spilling to access
  // [stack_pointer + offset]
  cisc_spilling_operand_name(indOffset32);

  // Number of stack slots consumed by locking an object
  sync_stack_slots(2);

  // Compiled code's Frame Pointer
  frame_pointer(RSP);

  // Interpreter stores its frame pointer in a register which is
  // stored to the stack by I2CAdaptors.
  // I2CAdaptors convert from interpreted java to compiled java.
  interpreter_frame_pointer(RBP);

  // Stack alignment requirement
  stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)

  // Number of outgoing stack slots killed above the out_preserve_stack_slots
  // for calls to C.  Supports the var-args backing area for register parms.
  varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);

  // The after-PROLOG location of the return address.  Location of
  // return address specifies a type (REG or STACK) and a number
  // representing the register number (i.e. - use a register name) or
  // stack slot.
  // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
  // Otherwise, it is above the locks and verification slot and alignment word
  return_addr(STACK - 2 +
              align_up((Compile::current()->in_preserve_stack_slots() +
                        Compile::current()->fixed_slots()),
                       stack_alignment_in_slots()));

  // Location of compiled Java return values.  Same as C for now.
  return_value
  %{
    assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
           "only return normal values");

    static const int lo[Op_RegL + 1] = {
      0,
      0,
      RAX_num,  // Op_RegN
      RAX_num,  // Op_RegI
      RAX_num,  // Op_RegP
      XMM0_num, // Op_RegF
      XMM0_num, // Op_RegD
      RAX_num   // Op_RegL
    };
    static const int hi[Op_RegL + 1] = {
      0,
      0,
      OptoReg::Bad, // Op_RegN
      OptoReg::Bad, // Op_RegI
      RAX_H_num,    // Op_RegP
      OptoReg::Bad, // Op_RegF
      XMM0b_num,    // Op_RegD
      RAX_H_num     // Op_RegL
    };
    // Excluded flags and vector registers.
    assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
    return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
  %}
%}

//----------ATTRIBUTES---------------------------------------------------------
//----------Operand Attributes-------------------------------------------------
op_attrib op_cost(0);        // Required cost attribute

//----------Instruction Attributes---------------------------------------------
ins_attrib ins_cost(100);       // Required cost attribute
ins_attrib ins_size(8);         // Required size attribute (in bits)
ins_attrib ins_short_branch(0); // Required flag: is this instruction
                                // a non-matching short branch variant
                                // of some long branch?
ins_attrib ins_alignment(1);    // Required alignment attribute (must
                                // be a power of 2) specifies the
                                // alignment that some part of the
                                // instruction (not necessarily the
                                // start) requires.  If > 1, a
                                // compute_padding() function must be
                                // provided for the instruction

//----------OPERANDS-----------------------------------------------------------
// Operand definitions must precede instruction definitions for correct parsing
// in the ADLC because operands constitute user defined types which are used in
// instruction definitions.

//----------Simple Operands----------------------------------------------------
// Immediate Operands
// Integer Immediate
operand immI()
%{
  match(ConI);

  op_cost(10);
  format %{ %}
  interface(CONST_INTER);
%}

// Constant for test vs zero
operand immI_0()
%{
  predicate(n->get_int() == 0);
  match(ConI);

  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// Constant for increment
operand immI_1()
%{
  predicate(n->get_int() == 1);
  match(ConI);

  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// Constant for decrement
operand immI_M1()
%{
  predicate(n->get_int() == -1);
  match(ConI);

  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

operand immI_2()
%{
  predicate(n->get_int() == 2);
  match(ConI);

  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

operand immI_4()
%{
  predicate(n->get_int() == 4);
  match(ConI);

  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

operand immI_8()
%{
  predicate(n->get_int() == 8);
  match(ConI);

  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// Valid scale values for addressing modes
operand immI2()
%{
  predicate(0 <= n->get_int() && (n->get_int() <= 3));
  match(ConI);

  format %{ %}
  interface(CONST_INTER);
%}

operand immU7()
%{
  predicate((0 <= n->get_int()) && (n->get_int() <= 0x7F));
  match(ConI);

  op_cost(5);
  format %{ %}
  interface(CONST_INTER);
%}

operand immI8()
%{
  predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
  match(ConI);

  op_cost(5);
  format %{ %}
  interface(CONST_INTER);
%}

operand immU8()
%{
  predicate((0 <= n->get_int()) && (n->get_int() <= 255));
  match(ConI);

  op_cost(5);
  format %{ %}
  interface(CONST_INTER);
%}

operand immI16()
%{
  predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
  match(ConI);

  op_cost(10);
  format %{ %}
  interface(CONST_INTER);
%}

// Int Immediate non-negative
operand immU31()
%{
  predicate(n->get_int() >= 0);
  match(ConI);

  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// Constant for long shifts
operand immI_32()
%{
  predicate( n->get_int() == 32 );
  match(ConI);

  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// Constant for long shifts
operand immI_64()
%{
  predicate( n->get_int() == 64 );
  match(ConI);

  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// Pointer Immediate
operand immP()
%{
  match(ConP);

  op_cost(10);
  format %{ %}
  interface(CONST_INTER);
%}

// NULL Pointer Immediate
operand immP0()
%{
  predicate(n->get_ptr() == 0);
  match(ConP);

  op_cost(5);
  format %{ %}
  interface(CONST_INTER);
%}

// Pointer Immediate
operand immN() %{
  match(ConN);

  op_cost(10);
  format %{ %}
  interface(CONST_INTER);
%}

operand immNKlass() %{
  match(ConNKlass);

  op_cost(10);
  format %{ %}
  interface(CONST_INTER);
%}

// NULL Pointer Immediate
operand immN0() %{
  predicate(n->get_narrowcon() == 0);
  match(ConN);

  op_cost(5);
  format %{ %}
  interface(CONST_INTER);
%}

operand immP31()
%{
  predicate(n->as_Type()->type()->reloc() == relocInfo::none
            && (n->get_ptr() >> 31) == 0);
  match(ConP);

  op_cost(5);
  format %{ %}
  interface(CONST_INTER);
%}

// Long Immediate
operand immL()
%{
  match(ConL);

  op_cost(20);
  format %{ %}
  interface(CONST_INTER);
%}

// Long Immediate 8-bit
operand immL8()
%{
  predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
  match(ConL);

  op_cost(5);
  format %{ %}
  interface(CONST_INTER);
%}

// Long Immediate 32-bit unsigned
operand immUL32()
%{
  predicate(n->get_long() == (unsigned int) (n->get_long()));
  match(ConL);

  op_cost(10);
  format %{ %}
  interface(CONST_INTER);
%}

// Long Immediate 32-bit signed
operand immL32()
%{
  predicate(n->get_long() == (int) (n->get_long()));
  match(ConL);

  op_cost(15);
  format %{ %}
  interface(CONST_INTER);
%}

operand immL_Pow2()
%{
  predicate(is_power_of_2((julong)n->get_long()));
  match(ConL);

  op_cost(15);
  format %{ %}
  interface(CONST_INTER);
%}

operand immL_NotPow2()
%{
  predicate(is_power_of_2((julong)~n->get_long()));
  match(ConL);

  op_cost(15);
  format %{ %}
  interface(CONST_INTER);
%}

// Long Immediate zero
operand immL0()
%{
  predicate(n->get_long() == 0L);
  match(ConL);

  op_cost(10);
  format %{ %}
  interface(CONST_INTER);
%}

// Constant for increment
operand immL1()
%{
  predicate(n->get_long() == 1);
  match(ConL);

  format %{ %}
  interface(CONST_INTER);
%}

// Constant for decrement
operand immL_M1()
%{
  predicate(n->get_long() == -1);
  match(ConL);

  format %{ %}
  interface(CONST_INTER);
%}

// Long Immediate: the value 10
operand immL10()
%{
  predicate(n->get_long() == 10);
  match(ConL);

  format %{ %}
  interface(CONST_INTER);
%}

// Long immediate from 0 to 127.
// Used for a shorter form of long mul by 10.
operand immL_127()
%{
  predicate(0 <= n->get_long() && n->get_long() < 0x80);
  match(ConL);

  op_cost(10);
  format %{ %}
  interface(CONST_INTER);
%}

// Long Immediate: low 32-bit mask
operand immL_32bits()
%{
  predicate(n->get_long() == 0xFFFFFFFFL);
  match(ConL);
  op_cost(20);

  format %{ %}
  interface(CONST_INTER);
%}

// Int Immediate: 2^n-1, positive
operand immI_Pow2M1()
%{
  predicate((n->get_int() > 0)
            && is_power_of_2(n->get_int() + 1));
  match(ConI);

  op_cost(20);
  format %{ %}
  interface(CONST_INTER);
%}

// Float Immediate zero
operand immF0()
%{
  predicate(jint_cast(n->getf()) == 0);
  match(ConF);

  op_cost(5);
  format %{ %}
  interface(CONST_INTER);
%}

// Float Immediate
operand immF()
%{
  match(ConF);

  op_cost(15);
  format %{ %}
  interface(CONST_INTER);
%}

// Double Immediate zero
operand immD0()
%{
  predicate(jlong_cast(n->getd()) == 0);
  match(ConD);

  op_cost(5);
  format %{ %}
  interface(CONST_INTER);
%}

// Double Immediate
operand immD()
%{
  match(ConD);

  op_cost(15);
  format %{ %}
  interface(CONST_INTER);
%}

// Immediates for special shifts (sign extend)

// Constants for increment
operand immI_16()
%{
  predicate(n->get_int() == 16);
  match(ConI);

  format %{ %}
  interface(CONST_INTER);
%}

operand immI_24()
%{
  predicate(n->get_int() == 24);
  match(ConI);

  format %{ %}
  interface(CONST_INTER);
%}

// Constant for byte-wide masking
operand immI_255()
%{
  predicate(n->get_int() == 255);
  match(ConI);

  format %{ %}
  interface(CONST_INTER);
%}

// Constant for short-wide masking
operand immI_65535()
%{
  predicate(n->get_int() == 65535);
  match(ConI);

  format %{ %}
  interface(CONST_INTER);
%}

// Constant for byte-wide masking
operand immL_255()
%{
  predicate(n->get_long() == 255);
  match(ConL);

  format %{ %}
  interface(CONST_INTER);
%}

// Constant for short-wide masking
operand immL_65535()
%{
  predicate(n->get_long() == 65535);
  match(ConL);

  format %{ %}
  interface(CONST_INTER);
%}

operand kReg()
%{
  constraint(ALLOC_IN_RC(vectmask_reg));
  match(RegVectMask);
  format %{%}
  interface(REG_INTER);
%}

operand kReg_K1()
%{
  constraint(ALLOC_IN_RC(vectmask_reg_K1));
  match(RegVectMask);
  format %{%}
  interface(REG_INTER);
%}

operand kReg_K2()
%{
  constraint(ALLOC_IN_RC(vectmask_reg_K2));
  match(RegVectMask);
  format %{%}
  interface(REG_INTER);
%}

// Special Registers
operand kReg_K3()
%{
  constraint(ALLOC_IN_RC(vectmask_reg_K3));
  match(RegVectMask);
  format %{%}
  interface(REG_INTER);
%}

operand kReg_K4()
%{
  constraint(ALLOC_IN_RC(vectmask_reg_K4));
  match(RegVectMask);
  format %{%}
  interface(REG_INTER);
%}

operand kReg_K5()
%{
  constraint(ALLOC_IN_RC(vectmask_reg_K5));
  match(RegVectMask);
  format %{%}
  interface(REG_INTER);
%}

operand kReg_K6()
%{
  constraint(ALLOC_IN_RC(vectmask_reg_K6));
  match(RegVectMask);
  format %{%}
  interface(REG_INTER);
%}

// Special Registers
operand kReg_K7()
%{
  constraint(ALLOC_IN_RC(vectmask_reg_K7));
  match(RegVectMask);
  format %{%}
  interface(REG_INTER);
%}

// Register Operands
// Integer Register
operand rRegI()
%{
  constraint(ALLOC_IN_RC(int_reg));
  match(RegI);

  match(rax_RegI);
  match(rbx_RegI);
  match(rcx_RegI);
  match(rdx_RegI);
  match(rdi_RegI);

  format %{ %}
  interface(REG_INTER);
%}

// Special Registers
operand rax_RegI()
%{
  constraint(ALLOC_IN_RC(int_rax_reg));
  match(RegI);
  match(rRegI);

  format %{ "RAX" %}
  interface(REG_INTER);
%}

// Special Registers
operand rbx_RegI()
%{
  constraint(ALLOC_IN_RC(int_rbx_reg));
  match(RegI);
  match(rRegI);

  format %{ "RBX" %}
  interface(REG_INTER);
%}

operand rcx_RegI()
%{
  constraint(ALLOC_IN_RC(int_rcx_reg));
  match(RegI);
  match(rRegI);

  format %{ "RCX" %}
  interface(REG_INTER);
%}

operand rdx_RegI()
%{
  constraint(ALLOC_IN_RC(int_rdx_reg));
  match(RegI);
  match(rRegI);

  format %{ "RDX" %}
  interface(REG_INTER);
%}

operand rdi_RegI()
%{
  constraint(ALLOC_IN_RC(int_rdi_reg));
  match(RegI);
  match(rRegI);

  format %{ "RDI" %}
  interface(REG_INTER);
%}

operand no_rax_rdx_RegI()
%{
  constraint(ALLOC_IN_RC(int_no_rax_rdx_reg));
  match(RegI);
  match(rbx_RegI);
  match(rcx_RegI);
  match(rdi_RegI);

  format %{ %}
  interface(REG_INTER);
%}

operand no_rbp_r13_RegI()
%{
  constraint(ALLOC_IN_RC(int_no_rbp_r13_reg));
  match(RegI);
  match(rRegI);
  match(rax_RegI);
  match(rbx_RegI);
  match(rcx_RegI);
  match(rdx_RegI);
  match(rdi_RegI);

  format %{ %}
  interface(REG_INTER);
%}

// Pointer Register
operand any_RegP()
%{
  constraint(ALLOC_IN_RC(any_reg));
  match(RegP);
  match(rax_RegP);
  match(rbx_RegP);
  match(rdi_RegP);
  match(rsi_RegP);
  match(rbp_RegP);
  match(r15_RegP);
  match(rRegP);

  format %{ %}
  interface(REG_INTER);
%}

operand rRegP()
%{
  constraint(ALLOC_IN_RC(ptr_reg));
  match(RegP);
  match(rax_RegP);
  match(rbx_RegP);
  match(rdi_RegP);
  match(rsi_RegP);
  match(rbp_RegP);  // See Q&A below about
  match(r15_RegP);  // r15_RegP and rbp_RegP.

  format %{ %}
  interface(REG_INTER);
%}

operand rRegN() %{
  constraint(ALLOC_IN_RC(int_reg));
  match(RegN);

  format %{ %}
  interface(REG_INTER);
%}

// Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
// Answer: Operand match rules govern the DFA as it processes instruction inputs.
// It's fine for an instruction input that expects rRegP to match a r15_RegP.
// The output of an instruction is controlled by the allocator, which respects
// register class masks, not match rules.  Unless an instruction mentions
// r15_RegP or any_RegP explicitly as its output, r15 will not be considered
// by the allocator as an input.
// The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
// the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
// result, RBP is not included in the output of the instruction either.

operand no_rax_RegP()
%{
  constraint(ALLOC_IN_RC(ptr_no_rax_reg));
  match(RegP);
  match(rbx_RegP);
  match(rsi_RegP);
  match(rdi_RegP);

  format %{ %}
  interface(REG_INTER);
%}

// This operand is not allowed to use RBP even if
// RBP is not used to hold the frame pointer.
operand no_rbp_RegP()
%{
  constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
  match(RegP);
  match(rbx_RegP);
  match(rsi_RegP);
  match(rdi_RegP);

  format %{ %}
  interface(REG_INTER);
%}

operand no_rax_rbx_RegP()
%{
  constraint(ALLOC_IN_RC(ptr_no_rax_rbx_reg));
  match(RegP);
  match(rsi_RegP);
  match(rdi_RegP);

  format %{ %}
  interface(REG_INTER);
%}

// Special Registers
// Return a pointer value
operand rax_RegP()
%{
  constraint(ALLOC_IN_RC(ptr_rax_reg));
  match(RegP);
  match(rRegP);

  format %{ %}
  interface(REG_INTER);
%}

// Special Registers
// Return a compressed pointer value
operand rax_RegN()
%{
  constraint(ALLOC_IN_RC(int_rax_reg));
  match(RegN);
  match(rRegN);

  format %{ %}
  interface(REG_INTER);
%}

// Used in AtomicAdd
operand rbx_RegP()
%{
  constraint(ALLOC_IN_RC(ptr_rbx_reg));
  match(RegP);
  match(rRegP);

  format %{ %}
  interface(REG_INTER);
%}

operand rsi_RegP()
%{
  constraint(ALLOC_IN_RC(ptr_rsi_reg));
  match(RegP);
  match(rRegP);

  format %{ %}
  interface(REG_INTER);
%}

operand rbp_RegP()
%{
  constraint(ALLOC_IN_RC(ptr_rbp_reg));
  match(RegP);
  match(rRegP);

  format %{ %}
  interface(REG_INTER);
%}

// Used in rep stosq
operand rdi_RegP()
%{
  constraint(ALLOC_IN_RC(ptr_rdi_reg));
  match(RegP);
  match(rRegP);

  format %{ %}
  interface(REG_INTER);
%}

operand r15_RegP()
%{
  constraint(ALLOC_IN_RC(ptr_r15_reg));
  match(RegP);
  match(rRegP);

  format %{ %}
  interface(REG_INTER);
%}

operand rRegL()
%{
  constraint(ALLOC_IN_RC(long_reg));
  match(RegL);
  match(rax_RegL);
  match(rdx_RegL);

  format %{ %}
  interface(REG_INTER);
%}

// Special Registers
operand no_rax_rdx_RegL()
%{
  constraint(ALLOC_IN_RC(long_no_rax_rdx_reg));
  match(RegL);
  match(rRegL);

  format %{ %}
  interface(REG_INTER);
%}

operand rax_RegL()
%{
  constraint(ALLOC_IN_RC(long_rax_reg));
  match(RegL);
  match(rRegL);

  format %{ "RAX" %}
  interface(REG_INTER);
%}

operand rcx_RegL()
%{
  constraint(ALLOC_IN_RC(long_rcx_reg));
  match(RegL);
  match(rRegL);

  format %{ %}
  interface(REG_INTER);
%}

operand rdx_RegL()
%{
  constraint(ALLOC_IN_RC(long_rdx_reg));
  match(RegL);
  match(rRegL);

  format %{ %}
  interface(REG_INTER);
%}

operand no_rbp_r13_RegL()
%{
  constraint(ALLOC_IN_RC(long_no_rbp_r13_reg));
  match(RegL);
  match(rRegL);
  match(rax_RegL);
  match(rcx_RegL);
  match(rdx_RegL);

  format %{ %}
  interface(REG_INTER);
%}

// Flags register, used as output of compare instructions
operand rFlagsReg()
%{
  constraint(ALLOC_IN_RC(int_flags));
  match(RegFlags);

  format %{ "RFLAGS" %}
  interface(REG_INTER);
%}

// Flags register, used as output of FLOATING POINT compare instructions
operand rFlagsRegU()
%{
  constraint(ALLOC_IN_RC(int_flags));
  match(RegFlags);

  format %{ "RFLAGS_U" %}
  interface(REG_INTER);
%}

operand rFlagsRegUCF() %{
  constraint(ALLOC_IN_RC(int_flags));
  match(RegFlags);
  predicate(false);

  format %{ "RFLAGS_U_CF" %}
  interface(REG_INTER);
%}

// Float register operands
operand regF() %{
   constraint(ALLOC_IN_RC(float_reg));
   match(RegF);

   format %{ %}
   interface(REG_INTER);
%}

// Float register operands
operand legRegF() %{
   constraint(ALLOC_IN_RC(float_reg_legacy));
   match(RegF);

   format %{ %}
   interface(REG_INTER);
%}

// Float register operands
operand vlRegF() %{
   constraint(ALLOC_IN_RC(float_reg_vl));
   match(RegF);

   format %{ %}
   interface(REG_INTER);
%}

// Double register operands
operand regD() %{
   constraint(ALLOC_IN_RC(double_reg));
   match(RegD);

   format %{ %}
   interface(REG_INTER);
%}

// Double register operands
operand legRegD() %{
   constraint(ALLOC_IN_RC(double_reg_legacy));
   match(RegD);

   format %{ %}
   interface(REG_INTER);
%}

// Double register operands
operand vlRegD() %{
   constraint(ALLOC_IN_RC(double_reg_vl));
   match(RegD);

   format %{ %}
   interface(REG_INTER);
%}

//----------Memory Operands----------------------------------------------------
// Direct Memory Operand
// operand direct(immP addr)
// %{
//   match(addr);

//   format %{ "[$addr]" %}
//   interface(MEMORY_INTER) %{
//     base(0xFFFFFFFF);
//     index(0x4);
//     scale(0x0);
//     disp($addr);
//   %}
// %}

// Indirect Memory Operand
operand indirect(any_RegP reg)
%{
  constraint(ALLOC_IN_RC(ptr_reg));
  match(reg);

  format %{ "[$reg]" %}
  interface(MEMORY_INTER) %{
    base($reg);
    index(0x4);
    scale(0x0);
    disp(0x0);
  %}
%}

// Indirect Memory Plus Short Offset Operand
operand indOffset8(any_RegP reg, immL8 off)
%{
  constraint(ALLOC_IN_RC(ptr_reg));
  match(AddP reg off);

  format %{ "[$reg + $off (8-bit)]" %}
  interface(MEMORY_INTER) %{
    base($reg);
    index(0x4);
    scale(0x0);
    disp($off);
  %}
%}

// Indirect Memory Plus Long Offset Operand
operand indOffset32(any_RegP reg, immL32 off)
%{
  constraint(ALLOC_IN_RC(ptr_reg));
  match(AddP reg off);

  format %{ "[$reg + $off (32-bit)]" %}
  interface(MEMORY_INTER) %{
    base($reg);
    index(0x4);
    scale(0x0);
    disp($off);
  %}
%}

// Indirect Memory Plus Index Register Plus Offset Operand
operand indIndexOffset(any_RegP reg, rRegL lreg, immL32 off)
%{
  constraint(ALLOC_IN_RC(ptr_reg));
  match(AddP (AddP reg lreg) off);

  op_cost(10);
  format %{"[$reg + $off + $lreg]" %}
  interface(MEMORY_INTER) %{
    base($reg);
    index($lreg);
    scale(0x0);
    disp($off);
  %}
%}

// Indirect Memory Plus Index Register Plus Offset Operand
operand indIndex(any_RegP reg, rRegL lreg)
%{
  constraint(ALLOC_IN_RC(ptr_reg));
  match(AddP reg lreg);

  op_cost(10);
  format %{"[$reg + $lreg]" %}
  interface(MEMORY_INTER) %{
    base($reg);
    index($lreg);
    scale(0x0);
    disp(0x0);
  %}
%}

// Indirect Memory Times Scale Plus Index Register
operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
%{
  constraint(ALLOC_IN_RC(ptr_reg));
  match(AddP reg (LShiftL lreg scale));

  op_cost(10);
  format %{"[$reg + $lreg << $scale]" %}
  interface(MEMORY_INTER) %{
    base($reg);
    index($lreg);
    scale($scale);
    disp(0x0);
  %}
%}

operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
%{
  constraint(ALLOC_IN_RC(ptr_reg));
  predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
  match(AddP reg (LShiftL (ConvI2L idx) scale));

  op_cost(10);
  format %{"[$reg + pos $idx << $scale]" %}
  interface(MEMORY_INTER) %{
    base($reg);
    index($idx);
    scale($scale);
    disp(0x0);
  %}
%}

// Indirect Memory Times Scale Plus Index Register Plus Offset Operand
operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
%{
  constraint(ALLOC_IN_RC(ptr_reg));
  match(AddP (AddP reg (LShiftL lreg scale)) off);

  op_cost(10);
  format %{"[$reg + $off + $lreg << $scale]" %}
  interface(MEMORY_INTER) %{
    base($reg);
    index($lreg);
    scale($scale);
    disp($off);
  %}
%}

// Indirect Memory Plus Positive Index Register Plus Offset Operand
operand indPosIndexOffset(any_RegP reg, immL32 off, rRegI idx)
%{
  constraint(ALLOC_IN_RC(ptr_reg));
  predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
  match(AddP (AddP reg (ConvI2L idx)) off);

  op_cost(10);
  format %{"[$reg + $off + $idx]" %}
  interface(MEMORY_INTER) %{
    base($reg);
    index($idx);
    scale(0x0);
    disp($off);
  %}
%}

// Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
%{
  constraint(ALLOC_IN_RC(ptr_reg));
  predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
  match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);

  op_cost(10);
  format %{"[$reg + $off + $idx << $scale]" %}
  interface(MEMORY_INTER) %{
    base($reg);
    index($idx);
    scale($scale);
    disp($off);
  %}
%}

// Indirect Narrow Oop Plus Offset Operand
// Note: x86 architecture doesn't support "scale * index + offset" without a base
// we can't free r12 even with CompressedOops::base() == NULL.
operand indCompressedOopOffset(rRegN reg, immL32 off) %{
  predicate(UseCompressedOops && (CompressedOops::shift() == Address::times_8));
  constraint(ALLOC_IN_RC(ptr_reg));
  match(AddP (DecodeN reg) off);

  op_cost(10);
  format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
  interface(MEMORY_INTER) %{
    base(0xc); // R12
    index($reg);
    scale(0x3);
    disp($off);
  %}
%}

// Indirect Memory Operand
operand indirectNarrow(rRegN reg)
%{
  predicate(CompressedOops::shift() == 0);
  constraint(ALLOC_IN_RC(ptr_reg));
  match(DecodeN reg);

  format %{ "[$reg]" %}
  interface(MEMORY_INTER) %{
    base($reg);
    index(0x4);
    scale(0x0);
    disp(0x0);
  %}
%}

// Indirect Memory Plus Short Offset Operand
operand indOffset8Narrow(rRegN reg, immL8 off)
%{
  predicate(CompressedOops::shift() == 0);
  constraint(ALLOC_IN_RC(ptr_reg));
  match(AddP (DecodeN reg) off);

  format %{ "[$reg + $off (8-bit)]" %}
  interface(MEMORY_INTER) %{
    base($reg);
    index(0x4);
    scale(0x0);
    disp($off);
  %}
%}

// Indirect Memory Plus Long Offset Operand
operand indOffset32Narrow(rRegN reg, immL32 off)
%{
  predicate(CompressedOops::shift() == 0);
  constraint(ALLOC_IN_RC(ptr_reg));
  match(AddP (DecodeN reg) off);

  format %{ "[$reg + $off (32-bit)]" %}
  interface(MEMORY_INTER) %{
    base($reg);
    index(0x4);
    scale(0x0);
    disp($off);
  %}
%}

// Indirect Memory Plus Index Register Plus Offset Operand
operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
%{
  predicate(CompressedOops::shift() == 0);
  constraint(ALLOC_IN_RC(ptr_reg));
  match(AddP (AddP (DecodeN reg) lreg) off);

  op_cost(10);
  format %{"[$reg + $off + $lreg]" %}
  interface(MEMORY_INTER) %{
    base($reg);
    index($lreg);
    scale(0x0);
    disp($off);
  %}
%}

// Indirect Memory Plus Index Register Plus Offset Operand
operand indIndexNarrow(rRegN reg, rRegL lreg)
%{
  predicate(CompressedOops::shift() == 0);
  constraint(ALLOC_IN_RC(ptr_reg));
  match(AddP (DecodeN reg) lreg);

  op_cost(10);
  format %{"[$reg + $lreg]" %}
  interface(MEMORY_INTER) %{
    base($reg);
    index($lreg);
    scale(0x0);
    disp(0x0);
  %}
%}

// Indirect Memory Times Scale Plus Index Register
operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
%{
  predicate(CompressedOops::shift() == 0);
  constraint(ALLOC_IN_RC(ptr_reg));
  match(AddP (DecodeN reg) (LShiftL lreg scale));

  op_cost(10);
  format %{"[$reg + $lreg << $scale]" %}
  interface(MEMORY_INTER) %{
    base($reg);
    index($lreg);
    scale($scale);
    disp(0x0);
  %}
%}

// Indirect Memory Times Scale Plus Index Register Plus Offset Operand
operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
%{
  predicate(CompressedOops::shift() == 0);
  constraint(ALLOC_IN_RC(ptr_reg));
  match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);

  op_cost(10);
  format %{"[$reg + $off + $lreg << $scale]" %}
  interface(MEMORY_INTER) %{
    base($reg);
    index($lreg);
    scale($scale);
    disp($off);
  %}
%}

// Indirect Memory Times Plus Positive Index Register Plus Offset Operand
operand indPosIndexOffsetNarrow(rRegN reg, immL32 off, rRegI idx)
%{
  constraint(ALLOC_IN_RC(ptr_reg));
  predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
  match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);

  op_cost(10);
  format %{"[$reg + $off + $idx]" %}
  interface(MEMORY_INTER) %{
    base($reg);
    index($idx);
    scale(0x0);
    disp($off);
  %}
%}

// Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
%{
  constraint(ALLOC_IN_RC(ptr_reg));
  predicate(CompressedOops::shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
  match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);

  op_cost(10);
  format %{"[$reg + $off + $idx << $scale]" %}
  interface(MEMORY_INTER) %{
    base($reg);
    index($idx);
    scale($scale);
    disp($off);
  %}
%}

//----------Special Memory Operands--------------------------------------------
// Stack Slot Operand - This operand is used for loading and storing temporary
//                      values on the stack where a match requires a value to
//                      flow through memory.
operand stackSlotP(sRegP reg)
%{
  constraint(ALLOC_IN_RC(stack_slots));
  // No match rule because this operand is only generated in matching

  format %{ "[$reg]" %}
  interface(MEMORY_INTER) %{
    base(0x4);   // RSP
    index(0x4);  // No Index
    scale(0x0);  // No Scale
    disp($reg);  // Stack Offset
  %}
%}

operand stackSlotI(sRegI reg)
%{
  constraint(ALLOC_IN_RC(stack_slots));
  // No match rule because this operand is only generated in matching

  format %{ "[$reg]" %}
  interface(MEMORY_INTER) %{
    base(0x4);   // RSP
    index(0x4);  // No Index
    scale(0x0);  // No Scale
    disp($reg);  // Stack Offset
  %}
%}

operand stackSlotF(sRegF reg)
%{
  constraint(ALLOC_IN_RC(stack_slots));
  // No match rule because this operand is only generated in matching

  format %{ "[$reg]" %}
  interface(MEMORY_INTER) %{
    base(0x4);   // RSP
    index(0x4);  // No Index
    scale(0x0);  // No Scale
    disp($reg);  // Stack Offset
  %}
%}

operand stackSlotD(sRegD reg)
%{
  constraint(ALLOC_IN_RC(stack_slots));
  // No match rule because this operand is only generated in matching

  format %{ "[$reg]" %}
  interface(MEMORY_INTER) %{
    base(0x4);   // RSP
    index(0x4);  // No Index
    scale(0x0);  // No Scale
    disp($reg);  // Stack Offset
  %}
%}
operand stackSlotL(sRegL reg)
%{
  constraint(ALLOC_IN_RC(stack_slots));
  // No match rule because this operand is only generated in matching

  format %{ "[$reg]" %}
  interface(MEMORY_INTER) %{
    base(0x4);   // RSP
    index(0x4);  // No Index
    scale(0x0);  // No Scale
    disp($reg);  // Stack Offset
  %}
%}

//----------Conditional Branch Operands----------------------------------------
// Comparison Op  - This is the operation of the comparison, and is limited to
//                  the following set of codes:
//                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
//
// Other attributes of the comparison, such as unsignedness, are specified
// by the comparison instruction that sets a condition code flags register.
// That result is represented by a flags operand whose subtype is appropriate
// to the unsignedness (etc.) of the comparison.
//
// Later, the instruction which matches both the Comparison Op (a Bool) and
// the flags (produced by the Cmp) specifies the coding of the comparison op
// by matching a specific subtype of Bool operand below, such as cmpOpU.

// Comparison Code
operand cmpOp()
%{
  match(Bool);

  format %{ "" %}
  interface(COND_INTER) %{
    equal(0x4, "e");
    not_equal(0x5, "ne");
    less(0xC, "l");
    greater_equal(0xD, "ge");
    less_equal(0xE, "le");
    greater(0xF, "g");
    overflow(0x0, "o");
    no_overflow(0x1, "no");
  %}
%}

// Comparison Code, unsigned compare.  Used by FP also, with
// C2 (unordered) turned into GT or LT already.  The other bits
// C0 and C3 are turned into Carry & Zero flags.
operand cmpOpU()
%{
  match(Bool);

  format %{ "" %}
  interface(COND_INTER) %{
    equal(0x4, "e");
    not_equal(0x5, "ne");
    less(0x2, "b");
    greater_equal(0x3, "ae");
    less_equal(0x6, "be");
    greater(0x7, "a");
    overflow(0x0, "o");
    no_overflow(0x1, "no");
  %}
%}

// Floating comparisons that don't require any fixup for the unordered case,
// If both inputs of the comparison are the same, ZF is always set so we
// don't need to use cmpOpUCF2 for eq/ne
operand cmpOpUCF() %{
  match(Bool);
  predicate(n->as_Bool()->_test._test == BoolTest::lt ||
            n->as_Bool()->_test._test == BoolTest::ge ||
            n->as_Bool()->_test._test == BoolTest::le ||
            n->as_Bool()->_test._test == BoolTest::gt ||
            n->in(1)->in(1) == n->in(1)->in(2));
  format %{ "" %}
  interface(COND_INTER) %{
    equal(0xb, "np");
    not_equal(0xa, "p");
    less(0x2, "b");
    greater_equal(0x3, "ae");
    less_equal(0x6, "be");
    greater(0x7, "a");
    overflow(0x0, "o");
    no_overflow(0x1, "no");
  %}
%}

// Floating comparisons that can be fixed up with extra conditional jumps
operand cmpOpUCF2() %{
  match(Bool);
  predicate((n->as_Bool()->_test._test == BoolTest::ne ||
             n->as_Bool()->_test._test == BoolTest::eq) &&
            n->in(1)->in(1) != n->in(1)->in(2));
  format %{ "" %}
  interface(COND_INTER) %{
    equal(0x4, "e");
    not_equal(0x5, "ne");
    less(0x2, "b");
    greater_equal(0x3, "ae");
    less_equal(0x6, "be");
    greater(0x7, "a");
    overflow(0x0, "o");
    no_overflow(0x1, "no");
  %}
%}

//----------OPERAND CLASSES----------------------------------------------------
// Operand Classes are groups of operands that are used as to simplify
// instruction definitions by not requiring the AD writer to specify separate
// instructions for every form of operand when the instruction accepts
// multiple operand types with the same basic encoding and format.  The classic
// case of this is memory operands.

opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
               indIndexScale, indPosIndexScale, indIndexScaleOffset, indPosIndexOffset, indPosIndexScaleOffset,
               indCompressedOopOffset,
               indirectNarrow, indOffset8Narrow, indOffset32Narrow,
               indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
               indIndexScaleOffsetNarrow, indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);

//----------PIPELINE-----------------------------------------------------------
// Rules which define the behavior of the target architectures pipeline.
pipeline %{

//----------ATTRIBUTES---------------------------------------------------------
attributes %{
  variable_size_instructions;        // Fixed size instructions
  max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
  instruction_unit_size = 1;         // An instruction is 1 bytes long
  instruction_fetch_unit_size = 16;  // The processor fetches one line
  instruction_fetch_units = 1;       // of 16 bytes

  // List of nop instructions
  nops( MachNop );
%}

//----------RESOURCES----------------------------------------------------------
// Resources are the functional units available to the machine

// Generic P2/P3 pipeline
// 3 decoders, only D0 handles big operands; a "bundle" is the limit of
// 3 instructions decoded per cycle.
// 2 load/store ops per cycle, 1 branch, 1 FPU,
// 3 ALU op, only ALU0 handles mul instructions.
resources( D0, D1, D2, DECODE = D0 | D1 | D2,
           MS0, MS1, MS2, MEM = MS0 | MS1 | MS2,
           BR, FPU,
           ALU0, ALU1, ALU2, ALU = ALU0 | ALU1 | ALU2);

//----------PIPELINE DESCRIPTION-----------------------------------------------
// Pipeline Description specifies the stages in the machine's pipeline

// Generic P2/P3 pipeline
pipe_desc(S0, S1, S2, S3, S4, S5);

//----------PIPELINE CLASSES---------------------------------------------------
// Pipeline Classes describe the stages in which input and output are
// referenced by the hardware pipeline.

// Naming convention: ialu or fpu
// Then: _reg
// Then: _reg if there is a 2nd register
// Then: _long if it's a pair of instructions implementing a long
// Then: _fat if it requires the big decoder
//   Or: _mem if it requires the big decoder and a memory unit.

// Integer ALU reg operation
pipe_class ialu_reg(rRegI dst)
%{
    single_instruction;
    dst    : S4(write);
    dst    : S3(read);
    DECODE : S0;        // any decoder
    ALU    : S3;        // any alu
%}

// Long ALU reg operation
pipe_class ialu_reg_long(rRegL dst)
%{
    instruction_count(2);
    dst    : S4(write);
    dst    : S3(read);
    DECODE : S0(2);     // any 2 decoders
    ALU    : S3(2);     // both alus
%}

// Integer ALU reg operation using big decoder
pipe_class ialu_reg_fat(rRegI dst)
%{
    single_instruction;
    dst    : S4(write);
    dst    : S3(read);
    D0     : S0;        // big decoder only
    ALU    : S3;        // any alu
%}

// Integer ALU reg-reg operation
pipe_class ialu_reg_reg(rRegI dst, rRegI src)
%{
    single_instruction;
    dst    : S4(write);
    src    : S3(read);
    DECODE : S0;        // any decoder
    ALU    : S3;        // any alu
%}

// Integer ALU reg-reg operation
pipe_class ialu_reg_reg_fat(rRegI dst, memory src)
%{
    single_instruction;
    dst    : S4(write);
    src    : S3(read);
    D0     : S0;        // big decoder only
    ALU    : S3;        // any alu
%}

// Integer ALU reg-mem operation
pipe_class ialu_reg_mem(rRegI dst, memory mem)
%{
    single_instruction;
    dst    : S5(write);
    mem    : S3(read);
    D0     : S0;        // big decoder only
    ALU    : S4;        // any alu
    MEM    : S3;        // any mem
%}

// Integer mem operation (prefetch)
pipe_class ialu_mem(memory mem)
%{
    single_instruction;
    mem    : S3(read);
    D0     : S0;        // big decoder only
    MEM    : S3;        // any mem
%}

// Integer Store to Memory
pipe_class ialu_mem_reg(memory mem, rRegI src)
%{
    single_instruction;
    mem    : S3(read);
    src    : S5(read);
    D0     : S0;        // big decoder only
    ALU    : S4;        // any alu
    MEM    : S3;
%}

// // Long Store to Memory
// pipe_class ialu_mem_long_reg(memory mem, rRegL src)
// %{
//     instruction_count(2);
//     mem    : S3(read);
//     src    : S5(read);
//     D0     : S0(2);          // big decoder only; twice
//     ALU    : S4(2);     // any 2 alus
//     MEM    : S3(2);  // Both mems
// %}

// Integer Store to Memory
pipe_class ialu_mem_imm(memory mem)
%{
    single_instruction;
    mem    : S3(read);
    D0     : S0;        // big decoder only
    ALU    : S4;        // any alu
    MEM    : S3;
%}

// Integer ALU0 reg-reg operation
pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src)
%{
    single_instruction;
    dst    : S4(write);
    src    : S3(read);
    D0     : S0;        // Big decoder only
    ALU0   : S3;        // only alu0
%}

// Integer ALU0 reg-mem operation
pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem)
%{
    single_instruction;
    dst    : S5(write);
    mem    : S3(read);
    D0     : S0;        // big decoder only
    ALU0   : S4;        // ALU0 only
    MEM    : S3;        // any mem
%}

// Integer ALU reg-reg operation
pipe_class ialu_cr_reg_reg(rFlagsReg cr, rRegI src1, rRegI src2)
%{
    single_instruction;
    cr     : S4(write);
    src1   : S3(read);
    src2   : S3(read);
    DECODE : S0;        // any decoder
    ALU    : S3;        // any alu
%}

// Integer ALU reg-imm operation
pipe_class ialu_cr_reg_imm(rFlagsReg cr, rRegI src1)
%{
    single_instruction;
    cr     : S4(write);
    src1   : S3(read);
    DECODE : S0;        // any decoder
    ALU    : S3;        // any alu
%}

// Integer ALU reg-mem operation
pipe_class ialu_cr_reg_mem(rFlagsReg cr, rRegI src1, memory src2)
%{
    single_instruction;
    cr     : S4(write);
    src1   : S3(read);
    src2   : S3(read);
    D0     : S0;        // big decoder only
    ALU    : S4;        // any alu
    MEM    : S3;
%}

// Conditional move reg-reg
pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y)
%{
    instruction_count(4);
    y      : S4(read);
    q      : S3(read);
    p      : S3(read);
    DECODE : S0(4);     // any decoder
%}

// Conditional move reg-reg
pipe_class pipe_cmov_reg( rRegI dst, rRegI src, rFlagsReg cr)
%{
    single_instruction;
    dst    : S4(write);
    src    : S3(read);
    cr     : S3(read);
    DECODE : S0;        // any decoder
%}

// Conditional move reg-mem
pipe_class pipe_cmov_mem( rFlagsReg cr, rRegI dst, memory src)
%{
    single_instruction;
    dst    : S4(write);
    src    : S3(read);
    cr     : S3(read);
    DECODE : S0;        // any decoder
    MEM    : S3;
%}

// Conditional move reg-reg long
pipe_class pipe_cmov_reg_long( rFlagsReg cr, rRegL dst, rRegL src)
%{
    single_instruction;
    dst    : S4(write);
    src    : S3(read);
    cr     : S3(read);
    DECODE : S0(2);     // any 2 decoders
%}

// XXX
// // Conditional move double reg-reg
// pipe_class pipe_cmovD_reg( rFlagsReg cr, regDPR1 dst, regD src)
// %{
//     single_instruction;
//     dst    : S4(write);
//     src    : S3(read);
//     cr     : S3(read);
//     DECODE : S0;     // any decoder
// %}

// Float reg-reg operation
pipe_class fpu_reg(regD dst)
%{
    instruction_count(2);
    dst    : S3(read);
    DECODE : S0(2);     // any 2 decoders
    FPU    : S3;
%}

// Float reg-reg operation
pipe_class fpu_reg_reg(regD dst, regD src)
%{
    instruction_count(2);
    dst    : S4(write);
    src    : S3(read);
    DECODE : S0(2);     // any 2 decoders
    FPU    : S3;
%}

// Float reg-reg operation
pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2)
%{
    instruction_count(3);
    dst    : S4(write);
    src1   : S3(read);
    src2   : S3(read);
    DECODE : S0(3);     // any 3 decoders
    FPU    : S3(2);
%}

// Float reg-reg operation
pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3)
%{
    instruction_count(4);
    dst    : S4(write);
    src1   : S3(read);
    src2   : S3(read);
    src3   : S3(read);
    DECODE : S0(4);     // any 3 decoders
    FPU    : S3(2);
%}

// Float reg-reg operation
pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3)
%{
    instruction_count(4);
    dst    : S4(write);
    src1   : S3(read);
    src2   : S3(read);
    src3   : S3(read);
    DECODE : S1(3);     // any 3 decoders
    D0     : S0;        // Big decoder only
    FPU    : S3(2);
    MEM    : S3;
%}

// Float reg-mem operation
pipe_class fpu_reg_mem(regD dst, memory mem)
%{
    instruction_count(2);
    dst    : S5(write);
    mem    : S3(read);
    D0     : S0;        // big decoder only
    DECODE : S1;        // any decoder for FPU POP
    FPU    : S4;
    MEM    : S3;        // any mem
%}

// Float reg-mem operation
pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem)
%{
    instruction_count(3);
    dst    : S5(write);
    src1   : S3(read);
    mem    : S3(read);
    D0     : S0;        // big decoder only
    DECODE : S1(2);     // any decoder for FPU POP
    FPU    : S4;
    MEM    : S3;        // any mem
%}

// Float mem-reg operation
pipe_class fpu_mem_reg(memory mem, regD src)
%{
    instruction_count(2);
    src    : S5(read);
    mem    : S3(read);
    DECODE : S0;        // any decoder for FPU PUSH
    D0     : S1;        // big decoder only
    FPU    : S4;
    MEM    : S3;        // any mem
%}

pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2)
%{
    instruction_count(3);
    src1   : S3(read);
    src2   : S3(read);
    mem    : S3(read);
    DECODE : S0(2);     // any decoder for FPU PUSH
    D0     : S1;        // big decoder only
    FPU    : S4;
    MEM    : S3;        // any mem
%}

pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2)
%{
    instruction_count(3);
    src1   : S3(read);
    src2   : S3(read);
    mem    : S4(read);
    DECODE : S0;        // any decoder for FPU PUSH
    D0     : S0(2);     // big decoder only
    FPU    : S4;
    MEM    : S3(2);     // any mem
%}

pipe_class fpu_mem_mem(memory dst, memory src1)
%{
    instruction_count(2);
    src1   : S3(read);
    dst    : S4(read);
    D0     : S0(2);     // big decoder only
    MEM    : S3(2);     // any mem
%}

pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2)
%{
    instruction_count(3);
    src1   : S3(read);
    src2   : S3(read);
    dst    : S4(read);
    D0     : S0(3);     // big decoder only
    FPU    : S4;
    MEM    : S3(3);     // any mem
%}

pipe_class fpu_mem_reg_con(memory mem, regD src1)
%{
    instruction_count(3);
    src1   : S4(read);
    mem    : S4(read);
    DECODE : S0;        // any decoder for FPU PUSH
    D0     : S0(2);     // big decoder only
    FPU    : S4;
    MEM    : S3(2);     // any mem
%}

// Float load constant
pipe_class fpu_reg_con(regD dst)
%{
    instruction_count(2);
    dst    : S5(write);
    D0     : S0;        // big decoder only for the load
    DECODE : S1;        // any decoder for FPU POP
    FPU    : S4;
    MEM    : S3;        // any mem
%}

// Float load constant
pipe_class fpu_reg_reg_con(regD dst, regD src)
%{
    instruction_count(3);
    dst    : S5(write);
    src    : S3(read);
    D0     : S0;        // big decoder only for the load
    DECODE : S1(2);     // any decoder for FPU POP
    FPU    : S4;
    MEM    : S3;        // any mem
%}

// UnConditional branch
pipe_class pipe_jmp(label labl)
%{
    single_instruction;
    BR   : S3;
%}

// Conditional branch
pipe_class pipe_jcc(cmpOp cmp, rFlagsReg cr, label labl)
%{
    single_instruction;
    cr    : S1(read);
    BR    : S3;
%}

// Allocation idiom
pipe_class pipe_cmpxchg(rRegP dst, rRegP heap_ptr)
%{
    instruction_count(1); force_serialization;
    fixed_latency(6);
    heap_ptr : S3(read);
    DECODE   : S0(3);
    D0       : S2;
    MEM      : S3;
    ALU      : S3(2);
    dst      : S5(write);
    BR       : S5;
%}

// Generic big/slow expanded idiom
pipe_class pipe_slow()
%{
    instruction_count(10); multiple_bundles; force_serialization;
    fixed_latency(100);
    D0  : S0(2);
    MEM : S3(2);
%}

// The real do-nothing guy
pipe_class empty()
%{
    instruction_count(0);
%}

// Define the class for the Nop node
define
%{
   MachNop = empty;
%}

%}

//----------INSTRUCTIONS-------------------------------------------------------
//
// match      -- States which machine-independent subtree may be replaced
//               by this instruction.
// ins_cost   -- The estimated cost of this instruction is used by instruction
//               selection to identify a minimum cost tree of machine
//               instructions that matches a tree of machine-independent
//               instructions.
// format     -- A string providing the disassembly for this instruction.
//               The value of an instruction's operand may be inserted
//               by referring to it with a '$' prefix.
// opcode     -- Three instruction opcodes may be provided.  These are referred
//               to within an encode class as $primary, $secondary, and $tertiary
//               rrspectively.  The primary opcode is commonly used to
//               indicate the type of machine instruction, while secondary
//               and tertiary are often used for prefix options or addressing
//               modes.
// ins_encode -- A list of encode classes with parameters. The encode class
//               name must have been defined in an 'enc_class' specification
//               in the encode section of the architecture description.

// Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
// Load Float
instruct MoveF2VL(vlRegF dst, regF src) %{
  match(Set dst src);
  format %{ "movss $dst,$src\t! load float (4 bytes)" %}
  ins_encode %{
    ShouldNotReachHere();
  %}
  ins_pipe( fpu_reg_reg );
%}

// Load Float
instruct MoveF2LEG(legRegF dst, regF src) %{
  match(Set dst src);
  format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
  ins_encode %{
    ShouldNotReachHere();
  %}
  ins_pipe( fpu_reg_reg );
%}

// Load Float
instruct MoveVL2F(regF dst, vlRegF src) %{
  match(Set dst src);
  format %{ "movss $dst,$src\t! load float (4 bytes)" %}
  ins_encode %{
    ShouldNotReachHere();
  %}
  ins_pipe( fpu_reg_reg );
%}

// Load Float
instruct MoveLEG2F(regF dst, legRegF src) %{
  match(Set dst src);
  format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
  ins_encode %{
    ShouldNotReachHere();
  %}
  ins_pipe( fpu_reg_reg );
%}

// Load Double
instruct MoveD2VL(vlRegD dst, regD src) %{
  match(Set dst src);
  format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
  ins_encode %{
    ShouldNotReachHere();
  %}
  ins_pipe( fpu_reg_reg );
%}

// Load Double
instruct MoveD2LEG(legRegD dst, regD src) %{
  match(Set dst src);
  format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
  ins_encode %{
    ShouldNotReachHere();
  %}
  ins_pipe( fpu_reg_reg );
%}

// Load Double
instruct MoveVL2D(regD dst, vlRegD src) %{
  match(Set dst src);
  format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
  ins_encode %{
    ShouldNotReachHere();
  %}
  ins_pipe( fpu_reg_reg );
%}

// Load Double
instruct MoveLEG2D(regD dst, legRegD src) %{
  match(Set dst src);
  format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
  ins_encode %{
    ShouldNotReachHere();
  %}
  ins_pipe( fpu_reg_reg );
%}

//----------Load/Store/Move Instructions---------------------------------------
//----------Load Instructions--------------------------------------------------

// Load Byte (8 bit signed)
instruct loadB(rRegI dst, memory mem)
%{
  match(Set dst (LoadB mem));

  ins_cost(125);
  format %{ "movsbl  $dst, $mem\t# byte" %}

  ins_encode %{
    __ movsbl($dst$$Register, $mem$$Address);
  %}

  ins_pipe(ialu_reg_mem);
%}

// Load Byte (8 bit signed) into Long Register
instruct loadB2L(rRegL dst, memory mem)
%{
  match(Set dst (ConvI2L (LoadB mem)));

  ins_cost(125);
  format %{ "movsbq  $dst, $mem\t# byte -> long" %}

  ins_encode %{
    __ movsbq($dst$$Register, $mem$$Address);
  %}

  ins_pipe(ialu_reg_mem);
%}

// Load Unsigned Byte (8 bit UNsigned)
instruct loadUB(rRegI dst, memory mem)
%{
  match(Set dst (LoadUB mem));

  ins_cost(125);
  format %{ "movzbl  $dst, $mem\t# ubyte" %}

  ins_encode %{
    __ movzbl($dst$$Register, $mem$$Address);
  %}

  ins_pipe(ialu_reg_mem);
%}

// Load Unsigned Byte (8 bit UNsigned) into Long Register
instruct loadUB2L(rRegL dst, memory mem)
%{
  match(Set dst (ConvI2L (LoadUB mem)));

  ins_cost(125);
  format %{ "movzbq  $dst, $mem\t# ubyte -> long" %}

  ins_encode %{
    __ movzbq($dst$$Register, $mem$$Address);
  %}

  ins_pipe(ialu_reg_mem);
%}

// Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
instruct loadUB2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
  match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
  effect(KILL cr);

  format %{ "movzbq  $dst, $mem\t# ubyte & 32-bit mask -> long\n\t"
            "andl    $dst, right_n_bits($mask, 8)" %}
  ins_encode %{
    Register Rdst = $dst$$Register;
    __ movzbq(Rdst, $mem$$Address);
    __ andl(Rdst, $mask$$constant & right_n_bits(8));
  %}
  ins_pipe(ialu_reg_mem);
%}

// Load Short (16 bit signed)
instruct loadS(rRegI dst, memory mem)
%{
  match(Set dst (LoadS mem));

  ins_cost(125);
  format %{ "movswl $dst, $mem\t# short" %}

  ins_encode %{
    __ movswl($dst$$Register, $mem$$Address);
  %}

  ins_pipe(ialu_reg_mem);
%}

// Load Short (16 bit signed) to Byte (8 bit signed)
instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
  match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));

  ins_cost(125);
  format %{ "movsbl $dst, $mem\t# short -> byte" %}
  ins_encode %{
    __ movsbl($dst$$Register, $mem$$Address);
  %}
  ins_pipe(ialu_reg_mem);
%}

// Load Short (16 bit signed) into Long Register
instruct loadS2L(rRegL dst, memory mem)
%{
  match(Set dst (ConvI2L (LoadS mem)));

  ins_cost(125);
  format %{ "movswq $dst, $mem\t# short -> long" %}

  ins_encode %{
    __ movswq($dst$$Register, $mem$$Address);
  %}

  ins_pipe(ialu_reg_mem);
%}

// Load Unsigned Short/Char (16 bit UNsigned)
instruct loadUS(rRegI dst, memory mem)
%{
  match(Set dst (LoadUS mem));

  ins_cost(125);
  format %{ "movzwl  $dst, $mem\t# ushort/char" %}

  ins_encode %{
    __ movzwl($dst$$Register, $mem$$Address);
  %}

  ins_pipe(ialu_reg_mem);
%}

// Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
  match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));

  ins_cost(125);
  format %{ "movsbl $dst, $mem\t# ushort -> byte" %}
  ins_encode %{
    __ movsbl($dst$$Register, $mem$$Address);
  %}
  ins_pipe(ialu_reg_mem);
%}

// Load Unsigned Short/Char (16 bit UNsigned) into Long Register
instruct loadUS2L(rRegL dst, memory mem)
%{
  match(Set dst (ConvI2L (LoadUS mem)));

  ins_cost(125);
  format %{ "movzwq  $dst, $mem\t# ushort/char -> long" %}

  ins_encode %{
    __ movzwq($dst$$Register, $mem$$Address);
  %}

  ins_pipe(ialu_reg_mem);
%}

// Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
  match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));

  format %{ "movzbq  $dst, $mem\t# ushort/char & 0xFF -> long" %}
  ins_encode %{
    __ movzbq($dst$$Register, $mem$$Address);
  %}
  ins_pipe(ialu_reg_mem);
%}

// Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
instruct loadUS2L_immI(rRegL dst, memory mem, immI mask, rFlagsReg cr) %{
  match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
  effect(KILL cr);

  format %{ "movzwq  $dst, $mem\t# ushort/char & 32-bit mask -> long\n\t"
            "andl    $dst, right_n_bits($mask, 16)" %}
  ins_encode %{
    Register Rdst = $dst$$Register;
    __ movzwq(Rdst, $mem$$Address);
    __ andl(Rdst, $mask$$constant & right_n_bits(16));
  %}
  ins_pipe(ialu_reg_mem);
%}

// Load Integer
instruct loadI(rRegI dst, memory mem)
%{
  match(Set dst (LoadI mem));

  ins_cost(125);
  format %{ "movl    $dst, $mem\t# int" %}

  ins_encode %{
    __ movl($dst$$Register, $mem$$Address);
  %}

  ins_pipe(ialu_reg_mem);
%}

// Load Integer (32 bit signed) to Byte (8 bit signed)
instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
  match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));

  ins_cost(125);
  format %{ "movsbl  $dst, $mem\t# int -> byte" %}
  ins_encode %{
    __ movsbl($dst$$Register, $mem$$Address);
  %}
  ins_pipe(ialu_reg_mem);
%}

// Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
  match(Set dst (AndI (LoadI mem) mask));

  ins_cost(125);
  format %{ "movzbl  $dst, $mem\t# int -> ubyte" %}
  ins_encode %{
    __ movzbl($dst$$Register, $mem$$Address);
  %}
  ins_pipe(ialu_reg_mem);
%}

// Load Integer (32 bit signed) to Short (16 bit signed)
instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
  match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));

  ins_cost(125);
  format %{ "movswl  $dst, $mem\t# int -> short" %}
  ins_encode %{
    __ movswl($dst$$Register, $mem$$Address);
  %}
  ins_pipe(ialu_reg_mem);
%}

// Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
  match(Set dst (AndI (LoadI mem) mask));

  ins_cost(125);
  format %{ "movzwl  $dst, $mem\t# int -> ushort/char" %}
  ins_encode %{
    __ movzwl($dst$$Register, $mem$$Address);
  %}
  ins_pipe(ialu_reg_mem);
%}

// Load Integer into Long Register
instruct loadI2L(rRegL dst, memory mem)
%{
  match(Set dst (ConvI2L (LoadI mem)));

  ins_cost(125);
  format %{ "movslq  $dst, $mem\t# int -> long" %}

  ins_encode %{
    __ movslq($dst$$Register, $mem$$Address);
  %}

  ins_pipe(ialu_reg_mem);
%}

// Load Integer with mask 0xFF into Long Register
instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
  match(Set dst (ConvI2L (AndI (LoadI mem) mask)));

  format %{ "movzbq  $dst, $mem\t# int & 0xFF -> long" %}
  ins_encode %{
    __ movzbq($dst$$Register, $mem$$Address);
  %}
  ins_pipe(ialu_reg_mem);
%}

// Load Integer with mask 0xFFFF into Long Register
instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
  match(Set dst (ConvI2L (AndI (LoadI mem) mask)));

  format %{ "movzwq  $dst, $mem\t# int & 0xFFFF -> long" %}
  ins_encode %{
    __ movzwq($dst$$Register, $mem$$Address);
  %}
  ins_pipe(ialu_reg_mem);
%}

// Load Integer with a 31-bit mask into Long Register
instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask, rFlagsReg cr) %{
  match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
  effect(KILL cr);

  format %{ "movl    $dst, $mem\t# int & 31-bit mask -> long\n\t"
            "andl    $dst, $mask" %}
  ins_encode %{
    Register Rdst = $dst$$Register;
    __ movl(Rdst, $mem$$Address);
    __ andl(Rdst, $mask$$constant);
  %}
  ins_pipe(ialu_reg_mem);
%}

// Load Unsigned Integer into Long Register
instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
%{
  match(Set dst (AndL (ConvI2L (LoadI mem)) mask));

  ins_cost(125);
  format %{ "movl    $dst, $mem\t# uint -> long" %}

  ins_encode %{
    __ movl($dst$$Register, $mem$$Address);
  %}

  ins_pipe(ialu_reg_mem);
%}

// Load Long
instruct loadL(rRegL dst, memory mem)
%{
  match(Set dst (LoadL mem));

  ins_cost(125);
  format %{ "movq    $dst, $mem\t# long" %}

  ins_encode %{
    __ movq($dst$$Register, $mem$$Address);
  %}

  ins_pipe(ialu_reg_mem); // XXX
%}

// Load Range
instruct loadRange(rRegI dst, memory mem)
%{
  match(Set dst (LoadRange mem));

  ins_cost(125); // XXX
  format %{ "movl    $dst, $mem\t# range" %}
  ins_encode %{
    __ movl($dst$$Register, $mem$$Address);
  %}
  ins_pipe(ialu_reg_mem);
%}

// Load Pointer
instruct loadP(rRegP dst, memory mem)
%{
  match(Set dst (LoadP mem));
  predicate(n->as_Load()->barrier_data() == 0);

  ins_cost(125); // XXX
  format %{ "movq    $dst, $mem\t# ptr" %}
  ins_encode %{
    __ movq($dst$$Register, $mem$$Address);
  %}
  ins_pipe(ialu_reg_mem); // XXX
%}

// Load Compressed Pointer
instruct loadN(rRegN dst, memory mem)
%{
   match(Set dst (LoadN mem));

   ins_cost(125); // XXX
   format %{ "movl    $dst, $mem\t# compressed ptr" %}
   ins_encode %{
     __ movl($dst$$Register, $mem$$Address);
   %}
   ins_pipe(ialu_reg_mem); // XXX
%}

// Load Klass Pointer
instruct loadKlass(rRegP dst, memory mem)
%{
  match(Set dst (LoadKlass mem));

  ins_cost(125); // XXX
  format %{ "movq    $dst, $mem\t# class" %}
  ins_encode %{
    __ movq($dst$$Register, $mem$$Address);
  %}
  ins_pipe(ialu_reg_mem); // XXX
%}

// Load narrow Klass Pointer
instruct loadNKlass(rRegN dst, memory mem)
%{
  match(Set dst (LoadNKlass mem));

  ins_cost(125); // XXX
  format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
  ins_encode %{
    __ movl($dst$$Register, $mem$$Address);
  %}
  ins_pipe(ialu_reg_mem); // XXX
%}

// Load Float
instruct loadF(regF dst, memory mem)
%{
  match(Set dst (LoadF mem));

  ins_cost(145); // XXX
  format %{ "movss   $dst, $mem\t# float" %}
  ins_encode %{
    __ movflt($dst$$XMMRegister, $mem$$Address);
  %}
  ins_pipe(pipe_slow); // XXX
%}

// Load Double
instruct loadD_partial(regD dst, memory mem)
%{
  predicate(!UseXmmLoadAndClearUpper);
  match(Set dst (LoadD mem));

  ins_cost(145); // XXX
  format %{ "movlpd  $dst, $mem\t# double" %}
  ins_encode %{
    __ movdbl($dst$$XMMRegister, $mem$$Address);
  %}
  ins_pipe(pipe_slow); // XXX
%}

instruct loadD(regD dst, memory mem)
%{
  predicate(UseXmmLoadAndClearUpper);
  match(Set dst (LoadD mem));

  ins_cost(145); // XXX
  format %{ "movsd   $dst, $mem\t# double" %}
  ins_encode %{
    __ movdbl($dst$$XMMRegister, $mem$$Address);
  %}
  ins_pipe(pipe_slow); // XXX
%}

// Following pseudo code describes the algorithm for max[FD]:
// Min algorithm is on similar lines
//  btmp = (b < +0.0) ? a : b
//  atmp = (b < +0.0) ? b : a
//  Tmp  = Max_Float(atmp , btmp)
//  Res  = (atmp == NaN) ? atmp : Tmp

// max = java.lang.Math.max(float a, float b)
instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
  predicate(UseAVX > 0 && !n->is_reduction());
  match(Set dst (MaxF a b));
  effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
  format %{
     "vblendvps        $btmp,$b,$a,$b           \n\t"
     "vblendvps        $atmp,$a,$b,$b           \n\t"
     "vmaxss           $tmp,$atmp,$btmp         \n\t"
     "vcmpps.unordered $btmp,$atmp,$atmp        \n\t"
     "vblendvps        $dst,$tmp,$atmp,$btmp    \n\t"
  %}
  ins_encode %{
    int vector_len = Assembler::AVX_128bit;
    __ vblendvps($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vector_len);
    __ vblendvps($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $b$$XMMRegister, vector_len);
    __ vmaxss($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister);
    __ vcmpps($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
    __ vblendvps($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
%}
  ins_pipe( pipe_slow );
%}

instruct maxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xmmt, rRegI tmp, rFlagsReg cr) %{
  predicate(UseAVX > 0 && n->is_reduction());
  match(Set dst (MaxF a b));
  effect(USE a, USE b, TEMP xmmt, TEMP tmp, KILL cr);

  format %{ "$dst = max($a, $b)\t# intrinsic (float)" %}
  ins_encode %{
    emit_fp_min_max(_masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xmmt$$XMMRegister, $tmp$$Register,
                    false /*min*/, true /*single*/);
  %}
  ins_pipe( pipe_slow );
%}

// max = java.lang.Math.max(double a, double b)
instruct maxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
  predicate(UseAVX > 0 && !n->is_reduction());
  match(Set dst (MaxD a b));
  effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
  format %{
     "vblendvpd        $btmp,$b,$a,$b            \n\t"
     "vblendvpd        $atmp,$a,$b,$b            \n\t"
     "vmaxsd           $tmp,$atmp,$btmp          \n\t"
     "vcmppd.unordered $btmp,$atmp,$atmp         \n\t"
     "vblendvpd        $dst,$tmp,$atmp,$btmp     \n\t"
  %}
  ins_encode %{
    int vector_len = Assembler::AVX_128bit;
    __ vblendvpd($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vector_len);
    __ vblendvpd($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $b$$XMMRegister, vector_len);
    __ vmaxsd($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister);
    __ vcmppd($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
    __ vblendvpd($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
  %}
  ins_pipe( pipe_slow );
%}

instruct maxD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xmmt, rRegL tmp, rFlagsReg cr) %{
  predicate(UseAVX > 0 && n->is_reduction());
  match(Set dst (MaxD a b));
  effect(USE a, USE b, TEMP xmmt, TEMP tmp, KILL cr);

  format %{ "$dst = max($a, $b)\t# intrinsic (double)" %}
  ins_encode %{
    emit_fp_min_max(_masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xmmt$$XMMRegister, $tmp$$Register,
                    false /*min*/, false /*single*/);
  %}
  ins_pipe( pipe_slow );
%}

// min = java.lang.Math.min(float a, float b)
instruct minF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
  predicate(UseAVX > 0 && !n->is_reduction());
  match(Set dst (MinF a b));
  effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
  format %{
     "vblendvps        $atmp,$a,$b,$a             \n\t"
     "vblendvps        $btmp,$b,$a,$a             \n\t"
     "vminss           $tmp,$atmp,$btmp           \n\t"
     "vcmpps.unordered $btmp,$atmp,$atmp          \n\t"
     "vblendvps        $dst,$tmp,$atmp,$btmp      \n\t"
  %}
  ins_encode %{
    int vector_len = Assembler::AVX_128bit;
    __ vblendvps($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, vector_len);
    __ vblendvps($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, vector_len);
    __ vminss($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister);
    __ vcmpps($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
    __ vblendvps($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
  %}
  ins_pipe( pipe_slow );
%}

instruct minF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xmmt, rRegI tmp, rFlagsReg cr) %{
  predicate(UseAVX > 0 && n->is_reduction());
  match(Set dst (MinF a b));
  effect(USE a, USE b, TEMP xmmt, TEMP tmp, KILL cr);

  format %{ "$dst = min($a, $b)\t# intrinsic (float)" %}
  ins_encode %{
    emit_fp_min_max(_masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xmmt$$XMMRegister, $tmp$$Register,
                    true /*min*/, true /*single*/);
  %}
  ins_pipe( pipe_slow );
%}

// min = java.lang.Math.min(double a, double b)
instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp, legRegD btmp) %{
  predicate(UseAVX > 0 && !n->is_reduction());
  match(Set dst (MinD a b));
  effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
  format %{
     "vblendvpd        $atmp,$a,$b,$a           \n\t"
     "vblendvpd        $btmp,$b,$a,$a           \n\t"
     "vminsd           $tmp,$atmp,$btmp         \n\t"
     "vcmppd.unordered $btmp,$atmp,$atmp        \n\t"
     "vblendvpd        $dst,$tmp,$atmp,$btmp    \n\t"
  %}
  ins_encode %{
    int vector_len = Assembler::AVX_128bit;
    __ vblendvpd($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, vector_len);
    __ vblendvpd($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, vector_len);
    __ vminsd($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister);
    __ vcmppd($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
    __ vblendvpd($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
  %}
  ins_pipe( pipe_slow );
%}

instruct minD_reduction_reg(legRegD dst, legRegD a, legRegD b, legRegD xmmt, rRegL tmp, rFlagsReg cr) %{
  predicate(UseAVX > 0 && n->is_reduction());
  match(Set dst (MinD a b));
  effect(USE a, USE b, TEMP xmmt, TEMP tmp, KILL cr);

  format %{ "$dst = min($a, $b)\t# intrinsic (double)" %}
  ins_encode %{
    emit_fp_min_max(_masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xmmt$$XMMRegister, $tmp$$Register,
                    true /*min*/, false /*single*/);
  %}
  ins_pipe( pipe_slow );
%}

// Load Effective Address
instruct leaP8(rRegP dst, indOffset8 mem)
%{
  match(Set dst mem);

  ins_cost(110); // XXX
  format %{ "leaq    $dst, $mem\t# ptr 8" %}
  ins_encode %{
    __ leaq($dst$$Register, $mem$$Address);
  %}
  ins_pipe(ialu_reg_reg_fat);
%}

instruct leaP32(rRegP dst, indOffset32 mem)
%{
  match(Set dst mem);

  ins_cost(110);
  format %{ "leaq    $dst, $mem\t# ptr 32" %}
  ins_encode %{
    __ leaq($dst$$Register, $mem$$Address);
  %}
  ins_pipe(ialu_reg_reg_fat);
%}

instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
%{
  match(Set dst mem);

  ins_cost(110);
  format %{ "leaq    $dst, $mem\t# ptr idxoff" %}
  ins_encode %{
    __ leaq($dst$$Register, $mem$$Address);
  %}
  ins_pipe(ialu_reg_reg_fat);
%}

instruct leaPIdxScale(rRegP dst, indIndexScale mem)
%{
  match(Set dst mem);

  ins_cost(110);
  format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
  ins_encode %{
    __ leaq($dst$$Register, $mem$$Address);
  %}
  ins_pipe(ialu_reg_reg_fat);
%}

instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
%{
  match(Set dst mem);

  ins_cost(110);
  format %{ "leaq    $dst, $mem\t# ptr idxscale" %}
  ins_encode %{
    __ leaq($dst$$Register, $mem$$Address);
  %}
  ins_pipe(ialu_reg_reg_fat);
%}

instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
%{
  match(Set dst mem);

  ins_cost(110);
  format %{ "leaq    $dst, $mem\t# ptr idxscaleoff" %}
  ins_encode %{
    __ leaq($dst$$Register, $mem$$Address);
  %}
  ins_pipe(ialu_reg_reg_fat);
%}

instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
%{
  match(Set dst mem);

  ins_cost(110);
  format %{ "leaq    $dst, $mem\t# ptr posidxoff" %}
  ins_encode %{
    __ leaq($dst$$Register, $mem$$Address);
  %}
  ins_pipe(ialu_reg_reg_fat);
%}

instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
%{
  match(Set dst mem);

  ins_cost(110);
  format %{ "leaq    $dst, $mem\t# ptr posidxscaleoff" %}
  ins_encode %{
    __ leaq($dst$$Register, $mem$$Address);
  %}
  ins_pipe(ialu_reg_reg_fat);
%}

// Load Effective Address which uses Narrow (32-bits) oop
instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
%{
  predicate(UseCompressedOops && (CompressedOops::shift() != 0));
  match(Set dst mem);

  ins_cost(110);
  format %{ "leaq    $dst, $mem\t# ptr compressedoopoff32" %}
  ins_encode %{
    __ leaq($dst$$Register, $mem$$Address);
  %}
  ins_pipe(ialu_reg_reg_fat);
%}

instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
%{
  predicate(CompressedOops::shift() == 0);
  match(Set dst mem);

  ins_cost(110); // XXX
  format %{ "leaq    $dst, $mem\t# ptr off8narrow" %}
  ins_encode %{
    __ leaq($dst$$Register, $mem$$Address);
  %}
  ins_pipe(ialu_reg_reg_fat);
%}

instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
%{
  predicate(CompressedOops::shift() == 0);
  match(Set dst mem);

  ins_cost(110);
  format %{ "leaq    $dst, $mem\t# ptr off32narrow" %}
  ins_encode %{
    __ leaq($dst$$Register, $mem$$Address);
  %}
  ins_pipe(ialu_reg_reg_fat);
%}

instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
%{
  predicate(CompressedOops::shift() == 0);
  match(Set dst mem);

  ins_cost(110);
  format %{ "leaq    $dst, $mem\t# ptr idxoffnarrow" %}
  ins_encode %{
    __ leaq($dst$$Register, $mem$$Address);
  %}
  ins_pipe(ialu_reg_reg_fat);
%}

instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
%{
  predicate(CompressedOops::shift() == 0);
  match(Set dst mem);

  ins_cost(110);
  format %{ "leaq    $dst, $mem\t# ptr idxscalenarrow" %}
  ins_encode %{
    __ leaq($dst$$Register, $mem$$Address);
  %}
  ins_pipe(ialu_reg_reg_fat);
%}

instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
%{
  predicate(CompressedOops::shift() == 0);
  match(Set dst mem);

  ins_cost(110);
  format %{ "leaq    $dst, $mem\t# ptr idxscaleoffnarrow" %}
  ins_encode %{
    __ leaq($dst$$Register, $mem$$Address);
  %}
  ins_pipe(ialu_reg_reg_fat);
%}

instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
%{
  predicate(CompressedOops::shift() == 0);
  match(Set dst mem);

  ins_cost(110);
  format %{ "leaq    $dst, $mem\t# ptr posidxoffnarrow" %}
  ins_encode %{
    __ leaq($dst$$Register, $mem$$Address);
  %}
  ins_pipe(ialu_reg_reg_fat);
%}

instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
%{
  predicate(CompressedOops::shift() == 0);
  match(Set dst mem);

  ins_cost(110);
  format %{ "leaq    $dst, $mem\t# ptr posidxscaleoffnarrow" %}
  ins_encode %{
    __ leaq($dst$$Register, $mem$$Address);
  %}
  ins_pipe(ialu_reg_reg_fat);
%}

instruct loadConI(rRegI dst, immI src)
%{
  match(Set dst src);

  format %{ "movl    $dst, $src\t# int" %}
  ins_encode %{
    __ movl($dst$$Register, $src$$constant);
  %}
  ins_pipe(ialu_reg_fat); // XXX
%}

instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
%{
  match(Set dst src);
  effect(KILL cr);

  ins_cost(50);
  format %{ "xorl    $dst, $dst\t# int" %}
  ins_encode %{
    __ xorl($dst$$Register, $dst$$Register);
  %}
  ins_pipe(ialu_reg);
%}

instruct loadConL(rRegL dst, immL src)
%{
  match(Set dst src);

  ins_cost(150);
  format %{ "movq    $dst, $src\t# long" %}
  ins_encode %{
    __ mov64($dst$$Register, $src$$constant);
  %}
  ins_pipe(ialu_reg);
%}

instruct loadConL0(rRegL dst, immL0 src, rFlagsReg cr)
%{
  match(Set dst src);
  effect(KILL cr);

  ins_cost(50);
  format %{ "xorl    $dst, $dst\t# long" %}
  ins_encode %{
    __ xorl($dst$$Register, $dst$$Register);
  %}
  ins_pipe(ialu_reg); // XXX
%}

instruct loadConUL32(rRegL dst, immUL32 src)
%{
  match(Set dst src);

  ins_cost(60);
  format %{ "movl    $dst, $src\t# long (unsigned 32-bit)" %}
  ins_encode %{
    __ movl($dst$$Register, $src$$constant);
  %}
  ins_pipe(ialu_reg);
%}

instruct loadConL32(rRegL dst, immL32 src)
%{
  match(Set dst src);

  ins_cost(70);
  format %{ "movq    $dst, $src\t# long (32-bit)" %}
  ins_encode %{
    __ movq($dst$$Register, $src$$constant);
  %}
  ins_pipe(ialu_reg);
%}

instruct loadConP(rRegP dst, immP con) %{
  match(Set dst con);

  format %{ "movq    $dst, $con\t# ptr" %}
  ins_encode %{
    __ mov64($dst$$Register, $con$$constant, $con->constant_reloc(), RELOC_IMM64);
  %}
  ins_pipe(ialu_reg_fat); // XXX
%}

instruct loadConP0(rRegP dst, immP0 src, rFlagsReg cr)
%{
  match(Set dst src);
  effect(KILL cr);

  ins_cost(50);
  format %{ "xorl    $dst, $dst\t# ptr" %}
  ins_encode %{
    __ xorl($dst$$Register, $dst$$Register);
  %}
  ins_pipe(ialu_reg);
%}

instruct loadConP31(rRegP dst, immP31 src, rFlagsReg cr)
%{
  match(Set dst src);
  effect(KILL cr);

  ins_cost(60);
  format %{ "movl    $dst, $src\t# ptr (positive 32-bit)" %}
  ins_encode %{
    __ movl($dst$$Register, $src$$constant);
  %}
  ins_pipe(ialu_reg);
%}

instruct loadConF(regF dst, immF con) %{
  match(Set dst con);
  ins_cost(125);
  format %{ "movss   $dst, [$constantaddress]\t# load from constant table: float=$con" %}
  ins_encode %{
    __ movflt($dst$$XMMRegister, $constantaddress($con));
  %}
  ins_pipe(pipe_slow);
%}

instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
  match(Set dst src);
  effect(KILL cr);
  format %{ "xorq    $dst, $src\t# compressed NULL ptr" %}
  ins_encode %{
    __ xorq($dst$$Register, $dst$$Register);
  %}
  ins_pipe(ialu_reg);
%}

instruct loadConN(rRegN dst, immN src) %{
  match(Set dst src);

  ins_cost(125);
  format %{ "movl    $dst, $src\t# compressed ptr" %}
  ins_encode %{
    address con = (address)$src$$constant;
    if (con == NULL) {
      ShouldNotReachHere();
    } else {
      __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
    }
  %}
  ins_pipe(ialu_reg_fat); // XXX
%}

instruct loadConNKlass(rRegN dst, immNKlass src) %{
  match(Set dst src);

  ins_cost(125);
  format %{ "movl    $dst, $src\t# compressed klass ptr" %}
  ins_encode %{
    address con = (address)$src$$constant;
    if (con == NULL) {
      ShouldNotReachHere();
    } else {
      __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
    }
  %}
  ins_pipe(ialu_reg_fat); // XXX
%}

instruct loadConF0(regF dst, immF0 src)
%{
  match(Set dst src);
  ins_cost(100);

  format %{ "xorps   $dst, $dst\t# float 0.0" %}
  ins_encode %{
    __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
  %}
  ins_pipe(pipe_slow);
%}

// Use the same format since predicate() can not be used here.
instruct loadConD(regD dst, immD con) %{
  match(Set dst con);
  ins_cost(125);
  format %{ "movsd   $dst, [$constantaddress]\t# load from constant table: double=$con" %}
  ins_encode %{
    __ movdbl($dst$$XMMRegister, $constantaddress($con));
  %}
  ins_pipe(pipe_slow);
%}

instruct loadConD0(regD dst, immD0 src)
%{
  match(Set dst src);
  ins_cost(100);

  format %{ "xorpd   $dst, $dst\t# double 0.0" %}
  ins_encode %{
    __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
  %}
  ins_pipe(pipe_slow);
%}

instruct loadSSI(rRegI dst, stackSlotI src)
%{
  match(Set dst src);

  ins_cost(125);
  format %{ "movl    $dst, $src\t# int stk" %}
  opcode(0x8B);
  ins_encode(REX_reg_mem(dst, src), OpcP, reg_mem(dst, src));
  ins_pipe(ialu_reg_mem);
%}

instruct loadSSL(rRegL dst, stackSlotL src)
%{
  match(Set dst src);

  ins_cost(125);
  format %{ "movq    $dst, $src\t# long stk" %}
  opcode(0x8B);
  ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
  ins_pipe(ialu_reg_mem);
%}

instruct loadSSP(rRegP dst, stackSlotP src)
%{
  match(Set dst src);

  ins_cost(125);
  format %{ "movq    $dst, $src\t# ptr stk" %}
  opcode(0x8B);
  ins_encode(REX_reg_mem_wide(dst, src), OpcP, reg_mem(dst, src));
  ins_pipe(ialu_reg_mem);
%}

instruct loadSSF(regF dst, stackSlotF src)
%{
  match(Set dst src);

  ins_cost(125);
  format %{ "movss   $dst, $src\t# float stk" %}
  ins_encode %{
    __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
  %}
  ins_pipe(pipe_slow); // XXX
%}

// Use the same format since predicate() can not be used here.
instruct loadSSD(regD dst, stackSlotD src)
%{
  match(Set dst src);

  ins_cost(125);
  format %{ "movsd   $dst, $src\t# double stk" %}
  ins_encode  %{
    __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
  %}
  ins_pipe(pipe_slow); // XXX
%}

// Prefetch instructions for allocation.
// Must be safe to execute with invalid address (cannot fault).

instruct prefetchAlloc( memory mem ) %{
  predicate(AllocatePrefetchInstr==3);
  match(PrefetchAllocation mem);
  ins_cost(125);

  format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
  ins_encode %{
    __ prefetchw($mem$$Address);
  %}
  ins_pipe(ialu_mem);
%}

instruct prefetchAllocNTA( memory mem ) %{
  predicate(AllocatePrefetchInstr==0);
  match(PrefetchAllocation mem);
  ins_cost(125);

  format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
  ins_encode %{
    __ prefetchnta($mem$$Address);
  %}
  ins_pipe(ialu_mem);
%}

instruct prefetchAllocT0( memory mem ) %{
  predicate(AllocatePrefetchInstr==1);
  match(PrefetchAllocation mem);
  ins_cost(125);

  format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
  ins_encode %{
    __ prefetcht0($mem$$Address);
  %}
  ins_pipe(ialu_mem);
%}

instruct prefetchAllocT2( memory mem ) %{
  predicate(AllocatePrefetchInstr==2);
  match(PrefetchAllocation mem);
  ins_cost(125);

  format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
  ins_encode %{
    __ prefetcht2($mem$$Address);
  %}
  ins_pipe(ialu_mem);
%}

//----------Store Instructions-------------------------------------------------

// Store Byte
instruct storeB(memory mem, rRegI src)
%{
  match(Set mem (StoreB mem src));

  ins_cost(125); // XXX
  format %{ "movb    $mem, $src\t# byte" %}
  ins_encode %{
    __ movb($mem$$Address, $src$$Register);
  %}
  ins_pipe(ialu_mem_reg);
%}

// Store Char/Short
instruct storeC(memory mem, rRegI src)
%{
  match(Set mem (StoreC mem src));

  ins_cost(125); // XXX
  format %{ "movw    $mem, $src\t# char/short" %}
  ins_encode %{
    __ movw($mem$$Address, $src$$Register);
  %}
  ins_pipe(ialu_mem_reg);
%}

// Store Integer
instruct storeI(memory mem, rRegI src)
%{
  match(Set mem (StoreI mem src));

  ins_cost(125); // XXX
  format %{ "movl    $mem, $src\t# int" %}
  ins_encode %{
    __ movl($mem$$Address, $src$$Register);
  %}
  ins_pipe(ialu_mem_reg);
%}

// Store Long
instruct storeL(memory mem, rRegL src)
%{
  match(Set mem (StoreL mem src));

  ins_cost(125); // XXX
  format %{ "movq    $mem, $src\t# long" %}
  ins_encode %{
    __ movq($mem$$Address, $src$$Register);
  %}
  ins_pipe(ialu_mem_reg); // XXX
%}

// Store Pointer
instruct storeP(memory mem, any_RegP src)
%{
  match(Set mem (StoreP mem src));

  ins_cost(125); // XXX
  format %{ "movq    $mem, $src\t# ptr" %}
  ins_encode %{
    __ movq($mem$$Address, $src$$Register);
  %}
  ins_pipe(ialu_mem_reg);
%}

instruct storeImmP0(memory mem, immP0 zero)
%{
  predicate(UseCompressedOops && (CompressedOops::base() == NULL));
  match(Set mem (StoreP mem zero));

  ins_cost(125); // XXX
  format %{ "movq    $mem, R12\t# ptr (R12_heapbase==0)" %}
  ins_encode %{
    __ movq($mem$$Address, r12);
  %}
  ins_pipe(ialu_mem_reg);
%}

// Store NULL Pointer, mark word, or other simple pointer constant.
instruct storeImmP(memory mem, immP31 src)
%{
  match(Set mem (StoreP mem src));

  ins_cost(150); // XXX
  format %{ "movq    $mem, $src\t# ptr" %}
  ins_encode %{
    __ movq($mem$$Address, $src$$constant);
  %}
  ins_pipe(ialu_mem_imm);
%}

// Store Compressed Pointer
instruct storeN(memory mem, rRegN src)
%{
  match(Set mem (StoreN mem src));

  ins_cost(125); // XXX
  format %{ "movl    $mem, $src\t# compressed ptr" %}
  ins_encode %{
    __ movl($mem$$Address, $src$$Register);
  %}
  ins_pipe(ialu_mem_reg);
%}

instruct storeNKlass(memory mem, rRegN src)
%{
  match(Set mem (StoreNKlass mem src));

  ins_cost(125); // XXX
  format %{ "movl    $mem, $src\t# compressed klass ptr" %}
  ins_encode %{
    __ movl($mem$$Address, $src$$Register);
  %}
  ins_pipe(ialu_mem_reg);
%}

instruct storeImmN0(memory mem, immN0 zero)
%{
  predicate(CompressedOops::base() == NULL);
  match(Set mem (StoreN mem zero));

  ins_cost(125); // XXX
  format %{ "movl    $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
  ins_encode %{
    __ movl($mem$$Address, r12);
  %}
  ins_pipe(ialu_mem_reg);
%}

instruct storeImmN(memory mem, immN src)
%{
  match(Set mem (StoreN mem src));

  ins_cost(150); // XXX
  format %{ "movl    $mem, $src\t# compressed ptr" %}
  ins_encode %{
    address con = (address)$src$$constant;
    if (con == NULL) {
      __ movl($mem$$Address, 0);
    } else {
      __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
    }
  %}
  ins_pipe(ialu_mem_imm);
%}

instruct storeImmNKlass(memory mem, immNKlass src)
%{
  match(Set mem (StoreNKlass mem src));

  ins_cost(150); // XXX
  format %{ "movl    $mem, $src\t# compressed klass ptr" %}
  ins_encode %{
    __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
  %}
  ins_pipe(ialu_mem_imm);
%}

// Store Integer Immediate
instruct storeImmI0(memory mem, immI_0 zero)
%{
  predicate(UseCompressedOops && (CompressedOops::base() == NULL));
  match(Set mem (StoreI mem zero));

  ins_cost(125); // XXX
  format %{ "movl    $mem, R12\t# int (R12_heapbase==0)" %}
  ins_encode %{
    __ movl($mem$$Address, r12);
  %}
  ins_pipe(ialu_mem_reg);
%}

instruct storeImmI(memory mem, immI src)
%{
  match(Set mem (StoreI mem src));

  ins_cost(150);
  format %{ "movl    $mem, $src\t# int" %}
  ins_encode %{
    __ movl($mem$$Address, $src$$constant);
  %}
  ins_pipe(ialu_mem_imm);
%}

// Store Long Immediate
instruct storeImmL0(memory mem, immL0 zero)
%{
  predicate(UseCompressedOops && (CompressedOops::base() == NULL));
  match(Set mem (StoreL mem zero));

  ins_cost(125); // XXX
  format %{ "movq    $mem, R12\t# long (R12_heapbase==0)" %}
  ins_encode %{
    __ movq($mem$$Address, r12);
  %}
  ins_pipe(ialu_mem_reg);
%}

instruct storeImmL(memory mem, immL32 src)
%{
  match(Set mem (StoreL mem src));

  ins_cost(150);
  format %{ "movq    $mem, $src\t# long" %}
  ins_encode %{
    __ movq($mem$$Address, $src$$constant);
  %}
  ins_pipe(ialu_mem_imm);
%}

// Store Short/Char Immediate
instruct storeImmC0(memory mem, immI_0 zero)
%{
  predicate(UseCompressedOops && (CompressedOops::base() == NULL));
  match(Set mem (StoreC mem zero));

  ins_cost(125); // XXX
  format %{ "movw    $mem, R12\t# short/char (R12_heapbase==0)" %}
  ins_encode %{
    __ movw($mem$$Address, r12);
  %}
  ins_pipe(ialu_mem_reg);
%}

instruct storeImmI16(memory mem, immI16 src)
%{
  predicate(UseStoreImmI16);
  match(Set mem (StoreC mem src));

  ins_cost(150);
  format %{ "movw    $mem, $src\t# short/char" %}
  ins_encode %{
    __ movw($mem$$Address, $src$$constant);
  %}
  ins_pipe(ialu_mem_imm);
%}

// Store Byte Immediate
instruct storeImmB0(memory mem, immI_0 zero)
%{
  predicate(UseCompressedOops && (CompressedOops::base() == NULL));
  match(Set mem (StoreB mem zero));

  ins_cost(125); // XXX
  format %{ "movb    $mem, R12\t# short/char (R12_heapbase==0)" %}
  ins_encode %{
    __ movb($mem$$Address, r12);
  %}
  ins_pipe(ialu_mem_reg);
%}

instruct storeImmB(memory mem, immI8 src)
%{
  match(Set mem (StoreB mem src));

  ins_cost(150); // XXX
  format %{ "movb    $mem, $src\t# byte" %}
  ins_encode %{
    __ movb($mem$$Address, $src$$constant);
  %}
  ins_pipe(ialu_mem_imm);
%}

// Store CMS card-mark Immediate
instruct storeImmCM0_reg(memory mem, immI_0 zero)
%{
  predicate(UseCompressedOops && (CompressedOops::base() == NULL));
  match(Set mem (StoreCM mem zero));

  ins_cost(125); // XXX
  format %{ "movb    $mem, R12\t# CMS card-mark byte 0 (R12_heapbase==0)" %}
  ins_encode %{
    __ movb($mem$$Address, r12);
  %}
  ins_pipe(ialu_mem_reg);
%}

instruct storeImmCM0(memory mem, immI_0 src)
%{
  match(Set mem (StoreCM mem src));

  ins_cost(150); // XXX
  format %{ "movb    $mem, $src\t# CMS card-mark byte 0" %}
  ins_encode %{
    __ movb($mem$$Address, $src$$constant);
  %}
  ins_pipe(ialu_mem_imm);
%}

// Store Float
instruct storeF(memory mem, regF src)
%{
  match(Set mem (StoreF mem src));

  ins_cost(95); // XXX
  format %{ "movss   $mem, $src\t# float" %}
  ins_encode %{
    __ movflt($mem$$Address, $src$$XMMRegister);
  %}
  ins_pipe(pipe_slow); // XXX
%}

// Store immediate Float value (it is faster than store from XMM register)
instruct storeF0(memory mem, immF0 zero)
%{
  predicate(UseCompressedOops && (CompressedOops::base() == NULL));
  match(Set mem (StoreF mem zero));

  ins_cost(25); // XXX
  format %{ "movl    $mem, R12\t# float 0. (R12_heapbase==0)" %}
  ins_encode %{
    __ movl($mem$$Address, r12);
  %}
  ins_pipe(ialu_mem_reg);
%}

instruct storeF_imm(memory mem, immF src)
%{
  match(Set mem (StoreF mem src));

  ins_cost(50);
  format %{ "movl    $mem, $src\t# float" %}
  ins_encode %{
    __ movl($mem$$Address, jint_cast($src$$constant));
  %}
  ins_pipe(ialu_mem_imm);
%}

// Store Double
instruct storeD(memory mem, regD src)
%{
  match(Set mem (StoreD mem src));

  ins_cost(95); // XXX
  format %{ "movsd   $mem, $src\t# double" %}
  ins_encode %{
    __ movdbl($mem$$Address, $src$$XMMRegister);
  %}
  ins_pipe(pipe_slow); // XXX
%}

// Store immediate double 0.0 (it is faster than store from XMM register)
instruct storeD0_imm(memory mem, immD0 src)
%{
  predicate(!UseCompressedOops || (CompressedOops::base() != NULL));
  match(Set mem (StoreD mem src));

  ins_cost(50);
  format %{ "movq    $mem, $src\t# double 0." %}
  ins_encode %{
    __ movq($mem$$Address, $src$$constant);
  %}
  ins_pipe(ialu_mem_imm);
%}

instruct storeD0(memory mem, immD0 zero)
%{
  predicate(UseCompressedOops && (CompressedOops::base() == NULL));
  match(Set mem (StoreD mem zero));

  ins_cost(25); // XXX
  format %{ "movq    $mem, R12\t# double 0. (R12_heapbase==0)" %}
  ins_encode %{
    __ movq($mem$$Address, r12);
  %}
  ins_pipe(ialu_mem_reg);
%}

instruct storeSSI(stackSlotI dst, rRegI src)
%{
  match(Set dst src);

  ins_cost(100);
  format %{ "movl    $dst, $src\t# int stk" %}
  opcode(0x89);
  ins_encode(REX_reg_mem(src, dst), OpcP, reg_mem(src, dst));
  ins_pipe( ialu_mem_reg );
%}

instruct storeSSL(stackSlotL dst, rRegL src)
%{
  match(Set dst src);

  ins_cost(100);
  format %{ "movq    $dst, $src\t# long stk" %}
  opcode(0x89);
  ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
  ins_pipe(ialu_mem_reg);
%}

instruct storeSSP(stackSlotP dst, rRegP src)
%{
  match(Set dst src);

  ins_cost(100);
  format %{ "movq    $dst, $src\t# ptr stk" %}
  opcode(0x89);
  ins_encode(REX_reg_mem_wide(src, dst), OpcP, reg_mem(src, dst));
  ins_pipe(ialu_mem_reg);
%}

instruct storeSSF(stackSlotF dst, regF src)
%{
  match(Set dst src);

  ins_cost(95); // XXX
  format %{ "movss   $dst, $src\t# float stk" %}
  ins_encode %{
    __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
  %}
  ins_pipe(pipe_slow); // XXX
%}

instruct storeSSD(stackSlotD dst, regD src)
%{
  match(Set dst src);

  ins_cost(95); // XXX
  format %{ "movsd   $dst, $src\t# double stk" %}
  ins_encode %{
    __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
  %}
  ins_pipe(pipe_slow); // XXX
%}

instruct cacheWB(indirect addr)
%{
  predicate(VM_Version::supports_data_cache_line_flush());
  match(CacheWB addr);

  ins_cost(100);
  format %{"cache wb $addr" %}
  ins_encode %{
    assert($addr->index_position() < 0, "should be");
    assert($addr$$disp == 0, "should be");
    __ cache_wb(Address($addr$$base$$Register, 0));
  %}
  ins_pipe(pipe_slow); // XXX
%}

instruct cacheWBPreSync()
%{
  predicate(VM_Version::supports_data_cache_line_flush());
  match(CacheWBPreSync);

  ins_cost(100);
  format %{"cache wb presync" %}
  ins_encode %{
    __ cache_wbsync(true);
  %}
  ins_pipe(pipe_slow); // XXX
%}

instruct cacheWBPostSync()
%{
  predicate(VM_Version::supports_data_cache_line_flush());
  match(CacheWBPostSync);

  ins_cost(100);
  format %{"cache wb postsync" %}
  ins_encode %{
    __ cache_wbsync(false);
  %}
  ins_pipe(pipe_slow); // XXX
%}

//----------BSWAP Instructions-------------------------------------------------
instruct bytes_reverse_int(rRegI dst) %{
  match(Set dst (ReverseBytesI dst));

  format %{ "bswapl  $dst" %}
  ins_encode %{
    __ bswapl($dst$$Register);
  %}
  ins_pipe( ialu_reg );
%}

instruct bytes_reverse_long(rRegL dst) %{
  match(Set dst (ReverseBytesL dst));

  format %{ "bswapq  $dst" %}
  ins_encode %{
    __ bswapq($dst$$Register);
  %}
  ins_pipe( ialu_reg);
%}

instruct bytes_reverse_unsigned_short(rRegI dst, rFlagsReg cr) %{
  match(Set dst (ReverseBytesUS dst));
  effect(KILL cr);

  format %{ "bswapl  $dst\n\t"
            "shrl    $dst,16\n\t" %}
  ins_encode %{
    __ bswapl($dst$$Register);
    __ shrl($dst$$Register, 16);
  %}
  ins_pipe( ialu_reg );
%}

instruct bytes_reverse_short(rRegI dst, rFlagsReg cr) %{
  match(Set dst (ReverseBytesS dst));
  effect(KILL cr);

  format %{ "bswapl  $dst\n\t"
            "sar     $dst,16\n\t" %}
  ins_encode %{
    __ bswapl($dst$$Register);
    __ sarl($dst$$Register, 16);
  %}
  ins_pipe( ialu_reg );
%}

//---------- Zeros Count Instructions ------------------------------------------

instruct countLeadingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
  predicate(UseCountLeadingZerosInstruction);
  match(Set dst (CountLeadingZerosI src));
  effect(KILL cr);

  format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
  ins_encode %{
    __ lzcntl($dst$$Register, $src$$Register);
  %}
  ins_pipe(ialu_reg);
%}

instruct countLeadingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
  predicate(UseCountLeadingZerosInstruction);
  match(Set dst (CountLeadingZerosI (LoadI src)));
  effect(KILL cr);
  ins_cost(175);
  format %{ "lzcntl  $dst, $src\t# count leading zeros (int)" %}
  ins_encode %{
    __ lzcntl($dst$$Register, $src$$Address);
  %}
  ins_pipe(ialu_reg_mem);
%}

instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, rFlagsReg cr) %{
  predicate(!UseCountLeadingZerosInstruction);
  match(Set dst (CountLeadingZerosI src));
  effect(KILL cr);

  format %{ "bsrl    $dst, $src\t# count leading zeros (int)\n\t"
            "jnz     skip\n\t"
            "movl    $dst, -1\n"
      "skip:\n\t"
            "negl    $dst\n\t"
            "addl    $dst, 31" %}
  ins_encode %{
    Register Rdst = $dst$$Register;
    Register Rsrc = $src$$Register;
    Label skip;
    __ bsrl(Rdst, Rsrc);
    __ jccb(Assembler::notZero, skip);
    __ movl(Rdst, -1);
    __ bind(skip);
    __ negl(Rdst);
    __ addl(Rdst, BitsPerInt - 1);
  %}
  ins_pipe(ialu_reg);
%}

instruct countLeadingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
  predicate(UseCountLeadingZerosInstruction);
  match(Set dst (CountLeadingZerosL src));
  effect(KILL cr);

  format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
  ins_encode %{
    __ lzcntq($dst$$Register, $src$$Register);
  %}
  ins_pipe(ialu_reg);
%}

instruct countLeadingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
  predicate(UseCountLeadingZerosInstruction);
  match(Set dst (CountLeadingZerosL (LoadL src)));
  effect(KILL cr);
  ins_cost(175);
  format %{ "lzcntq  $dst, $src\t# count leading zeros (long)" %}
  ins_encode %{
    __ lzcntq($dst$$Register, $src$$Address);
  %}
  ins_pipe(ialu_reg_mem);
%}

instruct countLeadingZerosL_bsr(rRegI dst, rRegL src, rFlagsReg cr) %{
  predicate(!UseCountLeadingZerosInstruction);
  match(Set dst (CountLeadingZerosL src));
  effect(KILL cr);

  format %{ "bsrq    $dst, $src\t# count leading zeros (long)\n\t"
            "jnz     skip\n\t"
            "movl    $dst, -1\n"
      "skip:\n\t"
            "negl    $dst\n\t"
            "addl    $dst, 63" %}
  ins_encode %{
    Register Rdst = $dst$$Register;
    Register Rsrc = $src$$Register;
    Label skip;
    __ bsrq(Rdst, Rsrc);
    __ jccb(Assembler::notZero, skip);
    __ movl(Rdst, -1);
    __ bind(skip);
    __ negl(Rdst);
    __ addl(Rdst, BitsPerLong - 1);
  %}
  ins_pipe(ialu_reg);
%}

instruct countTrailingZerosI(rRegI dst, rRegI src, rFlagsReg cr) %{
  predicate(UseCountTrailingZerosInstruction);
  match(Set dst (CountTrailingZerosI src));
  effect(KILL cr);

  format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
  ins_encode %{
    __ tzcntl($dst$$Register, $src$$Register);
  %}
  ins_pipe(ialu_reg);
%}

instruct countTrailingZerosI_mem(rRegI dst, memory src, rFlagsReg cr) %{
  predicate(UseCountTrailingZerosInstruction);
  match(Set dst (CountTrailingZerosI (LoadI src)));
  effect(KILL cr);
  ins_cost(175);
  format %{ "tzcntl    $dst, $src\t# count trailing zeros (int)" %}
  ins_encode %{
    __ tzcntl($dst$$Register, $src$$Address);
  %}
  ins_pipe(ialu_reg_mem);
%}

instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, rFlagsReg cr) %{
  predicate(!UseCountTrailingZerosInstruction);
  match(Set dst (CountTrailingZerosI src));
  effect(KILL cr);

  format %{ "bsfl    $dst, $src\t# count trailing zeros (int)\n\t"
            "jnz     done\n\t"
            "movl    $dst, 32\n"
      "done:" %}
  ins_encode %{
    Register Rdst = $dst$$Register;
    Label done;
    __ bsfl(Rdst, $src$$Register);
    __ jccb(Assembler::notZero, done);
    __ movl(Rdst, BitsPerInt);
    __ bind(done);
  %}
  ins_pipe(ialu_reg);
%}

instruct countTrailingZerosL(rRegI dst, rRegL src, rFlagsReg cr) %{
  predicate(UseCountTrailingZerosInstruction);
  match(Set dst (CountTrailingZerosL src));
  effect(KILL cr);

  format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
  ins_encode %{
    __ tzcntq($dst$$Register, $src$$Register);
  %}
  ins_pipe(ialu_reg);
%}

instruct countTrailingZerosL_mem(rRegI dst, memory src, rFlagsReg cr) %{
  predicate(UseCountTrailingZerosInstruction);
  match(Set dst (CountTrailingZerosL (LoadL src)));
  effect(KILL cr);
  ins_cost(175);
  format %{ "tzcntq    $dst, $src\t# count trailing zeros (long)" %}
  ins_encode %{
    __ tzcntq($dst$$Register, $src$$Address);
  %}
  ins_pipe(ialu_reg_mem);
%}

instruct countTrailingZerosL_bsf(rRegI dst, rRegL src, rFlagsReg cr) %{
  predicate(!UseCountTrailingZerosInstruction);
  match(Set dst (CountTrailingZerosL src));
  effect(KILL cr);

  format %{ "bsfq    $dst, $src\t# count trailing zeros (long)\n\t"
            "jnz     done\n\t"
            "movl    $dst, 64\n"
      "done:" %}
  ins_encode %{
    Register Rdst = $dst$$Register;
    Label done;
    __ bsfq(Rdst, $src$$Register);
    __ jccb(Assembler::notZero, done);
    __ movl(Rdst, BitsPerLong);
    __ bind(done);
  %}
  ins_pipe(ialu_reg);
%}

//--------------- Reverse Operation Instructions ----------------
instruct bytes_reversebit_int(rRegI dst, rRegI src, rRegI rtmp, rFlagsReg cr) %{
  predicate(!VM_Version::supports_gfni());
  match(Set dst (ReverseI src));
  effect(TEMP dst, TEMP rtmp, KILL cr);
  format %{ "reverse_int $dst $src\t! using $rtmp as TEMP" %}
  ins_encode %{
    __ reverseI($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp$$Register);
  %}
  ins_pipe( ialu_reg );
%}

instruct bytes_reversebit_int_gfni(rRegI dst, rRegI src, regF xtmp1, regF xtmp2, rRegL rtmp, rFlagsReg cr) %{
  predicate(VM_Version::supports_gfni());
  match(Set dst (ReverseI src));
  effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
  format %{ "reverse_int $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
  ins_encode %{
    __ reverseI($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register);
  %}
  ins_pipe( ialu_reg );
%}

instruct bytes_reversebit_long(rRegL dst, rRegL src, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
  predicate(!VM_Version::supports_gfni());
  match(Set dst (ReverseL src));
  effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, KILL cr);
  format %{ "reverse_long $dst $src\t! using $rtmp1 and $rtmp2 as TEMP" %}
  ins_encode %{
    __ reverseL($dst$$Register, $src$$Register, xnoreg, xnoreg, $rtmp1$$Register, $rtmp2$$Register);
  %}
  ins_pipe( ialu_reg );
%}

instruct bytes_reversebit_long_gfni(rRegL dst, rRegL src, regD xtmp1, regD xtmp2, rRegL rtmp, rFlagsReg cr) %{
  predicate(VM_Version::supports_gfni());
  match(Set dst (ReverseL src));
  effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp, KILL cr);
  format %{ "reverse_long $dst $src\t! using $rtmp, $xtmp1 and $xtmp2 as TEMP" %}
  ins_encode %{
    __ reverseL($dst$$Register, $src$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $rtmp$$Register, noreg);
  %}
  ins_pipe( ialu_reg );
%}

//---------- Population Count Instructions -------------------------------------

instruct popCountI(rRegI dst, rRegI src, rFlagsReg cr) %{
  predicate(UsePopCountInstruction);
  match(Set dst (PopCountI src));
  effect(KILL cr);

  format %{ "popcnt  $dst, $src" %}
  ins_encode %{
    __ popcntl($dst$$Register, $src$$Register);
  %}
  ins_pipe(ialu_reg);
%}

instruct popCountI_mem(rRegI dst, memory mem, rFlagsReg cr) %{
  predicate(UsePopCountInstruction);
  match(Set dst (PopCountI (LoadI mem)));
  effect(KILL cr);

  format %{ "popcnt  $dst, $mem" %}
  ins_encode %{
    __ popcntl($dst$$Register, $mem$$Address);
  %}
  ins_pipe(ialu_reg);
%}

// Note: Long.bitCount(long) returns an int.
instruct popCountL(rRegI dst, rRegL src, rFlagsReg cr) %{
  predicate(UsePopCountInstruction);
  match(Set dst (PopCountL src));
  effect(KILL cr);

  format %{ "popcnt  $dst, $src" %}
  ins_encode %{
    __ popcntq($dst$$Register, $src$$Register);
  %}
  ins_pipe(ialu_reg);
%}

// Note: Long.bitCount(long) returns an int.
instruct popCountL_mem(rRegI dst, memory mem, rFlagsReg cr) %{
  predicate(UsePopCountInstruction);
  match(Set dst (PopCountL (LoadL mem)));
  effect(KILL cr);

  format %{ "popcnt  $dst, $mem" %}
  ins_encode %{
    __ popcntq($dst$$Register, $mem$$Address);
  %}
  ins_pipe(ialu_reg);
%}

//----------MemBar Instructions-----------------------------------------------
// Memory barrier flavors

instruct membar_acquire()
%{
  match(MemBarAcquire);
  match(LoadFence);
  ins_cost(0);

  size(0);
  format %{ "MEMBAR-acquire ! (empty encoding)" %}
  ins_encode();
  ins_pipe(empty);
%}

instruct membar_acquire_lock()
%{
  match(MemBarAcquireLock);
  ins_cost(0);

  size(0);
  format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
  ins_encode();
  ins_pipe(empty);
%}

instruct membar_release()
%{
  match(MemBarRelease);
  match(StoreFence);
  ins_cost(0);

  size(0);
  format %{ "MEMBAR-release ! (empty encoding)" %}
  ins_encode();
  ins_pipe(empty);
%}

instruct membar_release_lock()
%{
  match(MemBarReleaseLock);
  ins_cost(0);

  size(0);
  format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
  ins_encode();
  ins_pipe(empty);
%}

instruct membar_volatile(rFlagsReg cr) %{
  match(MemBarVolatile);
  effect(KILL cr);
  ins_cost(400);

  format %{
    $$template
    $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile"
  %}
  ins_encode %{
    __ membar(Assembler::StoreLoad);
  %}
  ins_pipe(pipe_slow);
%}

instruct unnecessary_membar_volatile()
%{
  match(MemBarVolatile);
  predicate(Matcher::post_store_load_barrier(n));
  ins_cost(0);

  size(0);
  format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
  ins_encode();
  ins_pipe(empty);
%}

instruct membar_storestore() %{
  match(MemBarStoreStore);
  match(StoreStoreFence);
  ins_cost(0);

  size(0);
  format %{ "MEMBAR-storestore (empty encoding)" %}
  ins_encode( );
  ins_pipe(empty);
%}

//----------Move Instructions--------------------------------------------------

instruct castX2P(rRegP dst, rRegL src)
%{
  match(Set dst (CastX2P src));

  format %{ "movq    $dst, $src\t# long->ptr" %}
  ins_encode %{
    if ($dst$$reg != $src$$reg) {
      __ movptr($dst$$Register, $src$$Register);
    }
  %}
  ins_pipe(ialu_reg_reg); // XXX
%}

instruct castP2X(rRegL dst, rRegP src)
%{
  match(Set dst (CastP2X src));

  format %{ "movq    $dst, $src\t# ptr -> long" %}
  ins_encode %{
    if ($dst$$reg != $src$$reg) {
      __ movptr($dst$$Register, $src$$Register);
    }
  %}
  ins_pipe(ialu_reg_reg); // XXX
%}

// Convert oop into int for vectors alignment masking
instruct convP2I(rRegI dst, rRegP src)
%{
  match(Set dst (ConvL2I (CastP2X src)));

  format %{ "movl    $dst, $src\t# ptr -> int" %}
  ins_encode %{
    __ movl($dst$$Register, $src$$Register);
  %}
  ins_pipe(ialu_reg_reg); // XXX
%}

// Convert compressed oop into int for vectors alignment masking
// in case of 32bit oops (heap < 4Gb).
instruct convN2I(rRegI dst, rRegN src)
%{
  predicate(CompressedOops::shift() == 0);
  match(Set dst (ConvL2I (CastP2X (DecodeN src))));

  format %{ "movl    $dst, $src\t# compressed ptr -> int" %}
  ins_encode %{
    __ movl($dst$$Register, $src$$Register);
  %}
  ins_pipe(ialu_reg_reg); // XXX
%}

// Convert oop pointer into compressed form
instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
  predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
  match(Set dst (EncodeP src));
  effect(KILL cr);
  format %{ "encode_heap_oop $dst,$src" %}
  ins_encode %{
    Register s = $src$$Register;
    Register d = $dst$$Register;
    if (s != d) {
      __ movq(d, s);
    }
    __ encode_heap_oop(d);
  %}
  ins_pipe(ialu_reg_long);
%}

instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
  predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
  match(Set dst (EncodeP src));
  effect(KILL cr);
  format %{ "encode_heap_oop_not_null $dst,$src" %}
  ins_encode %{
    __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
  %}
  ins_pipe(ialu_reg_long);
%}

instruct decodeHeapOop(rRegP dst, rRegN src, rFlagsReg cr) %{
  predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
            n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
  match(Set dst (DecodeN src));
  effect(KILL cr);
  format %{ "decode_heap_oop $dst,$src" %}
  ins_encode %{
    Register s = $src$$Register;
    Register d = $dst$$Register;
    if (s != d) {
      __ movq(d, s);
    }
    __ decode_heap_oop(d);
  %}
  ins_pipe(ialu_reg_long);
%}

instruct decodeHeapOop_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
  predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
            n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
  match(Set dst (DecodeN src));
  effect(KILL cr);
  format %{ "decode_heap_oop_not_null $dst,$src" %}
  ins_encode %{
    Register s = $src$$Register;
    Register d = $dst$$Register;
    if (s != d) {
      __ decode_heap_oop_not_null(d, s);
    } else {
      __ decode_heap_oop_not_null(d);
    }
  %}
  ins_pipe(ialu_reg_long);
%}

instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
  match(Set dst (EncodePKlass src));
  effect(TEMP dst, KILL cr);
  format %{ "encode_and_move_klass_not_null $dst,$src" %}
  ins_encode %{
    __ encode_and_move_klass_not_null($dst$$Register, $src$$Register);
  %}
  ins_pipe(ialu_reg_long);
%}

instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
  match(Set dst (DecodeNKlass src));
  effect(TEMP dst, KILL cr);
  format %{ "decode_and_move_klass_not_null $dst,$src" %}
  ins_encode %{
    __ decode_and_move_klass_not_null($dst$$Register, $src$$Register);
  %}
  ins_pipe(ialu_reg_long);
%}

//----------Conditional Move---------------------------------------------------
// Jump
// dummy instruction for generating temp registers
instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
  match(Jump (LShiftL switch_val shift));
  ins_cost(350);
  predicate(false);
  effect(TEMP dest);

  format %{ "leaq    $dest, [$constantaddress]\n\t"
            "jmp     [$dest + $switch_val << $shift]\n\t" %}
  ins_encode %{
    // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
    // to do that and the compiler is using that register as one it can allocate.
    // So we build it all by hand.
    // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
    // ArrayAddress dispatch(table, index);
    Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
    __ lea($dest$$Register, $constantaddress);
    __ jmp(dispatch);
  %}
  ins_pipe(pipe_jmp);
%}

instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
  match(Jump (AddL (LShiftL switch_val shift) offset));
  ins_cost(350);
  effect(TEMP dest);

  format %{ "leaq    $dest, [$constantaddress]\n\t"
            "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
  ins_encode %{
    // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
    // to do that and the compiler is using that register as one it can allocate.
    // So we build it all by hand.
    // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
    // ArrayAddress dispatch(table, index);
    Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
    __ lea($dest$$Register, $constantaddress);
    __ jmp(dispatch);
  %}
  ins_pipe(pipe_jmp);
%}

instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
  match(Jump switch_val);
  ins_cost(350);
  effect(TEMP dest);

  format %{ "leaq    $dest, [$constantaddress]\n\t"
            "jmp     [$dest + $switch_val]\n\t" %}
  ins_encode %{
    // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
    // to do that and the compiler is using that register as one it can allocate.
    // So we build it all by hand.
    // Address index(noreg, switch_reg, Address::times_1);
    // ArrayAddress dispatch(table, index);
    Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
    __ lea($dest$$Register, $constantaddress);
    __ jmp(dispatch);
  %}
  ins_pipe(pipe_jmp);
%}

// Conditional move
instruct cmovI_imm_01(rRegI dst, immI_1 src, rFlagsReg cr, cmpOp cop)
%{
  predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
  match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));

  ins_cost(100); // XXX
  format %{ "setbn$cop $dst\t# signed, int" %}
  ins_encode %{
    Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
    __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
  %}
  ins_pipe(ialu_reg);
%}

instruct cmovI_reg(rRegI dst, rRegI src, rFlagsReg cr, cmpOp cop)
%{
  match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));

  ins_cost(200); // XXX
  format %{ "cmovl$cop $dst, $src\t# signed, int" %}
  ins_encode %{
    __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
  %}
  ins_pipe(pipe_cmov_reg);
%}

instruct cmovI_imm_01U(rRegI dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
%{
  predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
  match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));

  ins_cost(100); // XXX
  format %{ "setbn$cop $dst\t# unsigned, int" %}
  ins_encode %{
    Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
    __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
  %}
  ins_pipe(ialu_reg);
%}

instruct cmovI_regU(cmpOpU cop, rFlagsRegU cr, rRegI dst, rRegI src) %{
  match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));

  ins_cost(200); // XXX
  format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
  ins_encode %{
    __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
  %}
  ins_pipe(pipe_cmov_reg);
%}

instruct cmovI_imm_01UCF(rRegI dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
%{
  predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_int() == 0);
  match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));

  ins_cost(100); // XXX
  format %{ "setbn$cop $dst\t# unsigned, int" %}
  ins_encode %{
    Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
    __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
  %}
  ins_pipe(ialu_reg);
%}

instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
  match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
  ins_cost(200);
  expand %{
    cmovI_regU(cop, cr, dst, src);
  %}
%}

instruct cmovI_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
  predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
  match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));

  ins_cost(200); // XXX
  format %{ "cmovpl  $dst, $src\n\t"
            "cmovnel $dst, $src" %}
  ins_encode %{
    __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
    __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
  %}
  ins_pipe(pipe_cmov_reg);
%}

// Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
// inputs of the CMove
instruct cmovI_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
  predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
  match(Set dst (CMoveI (Binary cop cr) (Binary src dst)));

  ins_cost(200); // XXX
  format %{ "cmovpl  $dst, $src\n\t"
            "cmovnel $dst, $src" %}
  ins_encode %{
    __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
    __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
  %}
  ins_pipe(pipe_cmov_reg);
%}

// Conditional move
instruct cmovI_mem(cmpOp cop, rFlagsReg cr, rRegI dst, memory src) %{
  match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));

  ins_cost(250); // XXX
  format %{ "cmovl$cop $dst, $src\t# signed, int" %}
  ins_encode %{
    __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
  %}
  ins_pipe(pipe_cmov_mem);
%}

// Conditional move
instruct cmovI_memU(cmpOpU cop, rFlagsRegU cr, rRegI dst, memory src)
%{
  match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));

  ins_cost(250); // XXX
  format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
  ins_encode %{
    __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
  %}
  ins_pipe(pipe_cmov_mem);
%}

instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
  match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
  ins_cost(250);
  expand %{
    cmovI_memU(cop, cr, dst, src);
  %}
%}

// Conditional move
instruct cmovN_reg(rRegN dst, rRegN src, rFlagsReg cr, cmpOp cop)
%{
  match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));

  ins_cost(200); // XXX
  format %{ "cmovl$cop $dst, $src\t# signed, compressed ptr" %}
  ins_encode %{
    __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
  %}
  ins_pipe(pipe_cmov_reg);
%}

// Conditional move
instruct cmovN_regU(cmpOpU cop, rFlagsRegU cr, rRegN dst, rRegN src)
%{
  match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));

  ins_cost(200); // XXX
  format %{ "cmovl$cop $dst, $src\t# unsigned, compressed ptr" %}
  ins_encode %{
    __ cmovl((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
  %}
  ins_pipe(pipe_cmov_reg);
%}

instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
  match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
  ins_cost(200);
  expand %{
    cmovN_regU(cop, cr, dst, src);
  %}
%}

instruct cmovN_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
  predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
  match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));

  ins_cost(200); // XXX
  format %{ "cmovpl  $dst, $src\n\t"
            "cmovnel $dst, $src" %}
  ins_encode %{
    __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
    __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
  %}
  ins_pipe(pipe_cmov_reg);
%}

// Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
// inputs of the CMove
instruct cmovN_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
  predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
  match(Set dst (CMoveN (Binary cop cr) (Binary src dst)));

  ins_cost(200); // XXX
  format %{ "cmovpl  $dst, $src\n\t"
            "cmovnel $dst, $src" %}
  ins_encode %{
    __ cmovl(Assembler::parity, $dst$$Register, $src$$Register);
    __ cmovl(Assembler::notEqual, $dst$$Register, $src$$Register);
  %}
  ins_pipe(pipe_cmov_reg);
%}

// Conditional move
instruct cmovP_reg(rRegP dst, rRegP src, rFlagsReg cr, cmpOp cop)
%{
  match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));

  ins_cost(200); // XXX
  format %{ "cmovq$cop $dst, $src\t# signed, ptr" %}
  ins_encode %{
    __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
  %}
  ins_pipe(pipe_cmov_reg);  // XXX
%}

// Conditional move
instruct cmovP_regU(cmpOpU cop, rFlagsRegU cr, rRegP dst, rRegP src)
%{
  match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));

  ins_cost(200); // XXX
  format %{ "cmovq$cop $dst, $src\t# unsigned, ptr" %}
  ins_encode %{
    __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
  %}
  ins_pipe(pipe_cmov_reg); // XXX
%}

instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
  match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
  ins_cost(200);
  expand %{
    cmovP_regU(cop, cr, dst, src);
  %}
%}

instruct cmovP_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
  predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
  match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));

  ins_cost(200); // XXX
  format %{ "cmovpq  $dst, $src\n\t"
            "cmovneq $dst, $src" %}
  ins_encode %{
    __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
    __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
  %}
  ins_pipe(pipe_cmov_reg);
%}

// Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
// inputs of the CMove
instruct cmovP_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
  predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
  match(Set dst (CMoveP (Binary cop cr) (Binary src dst)));

  ins_cost(200); // XXX
  format %{ "cmovpq  $dst, $src\n\t"
            "cmovneq $dst, $src" %}
  ins_encode %{
    __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
    __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
  %}
  ins_pipe(pipe_cmov_reg);
%}

// DISABLED: Requires the ADLC to emit a bottom_type call that
// correctly meets the two pointer arguments; one is an incoming
// register but the other is a memory operand.  ALSO appears to
// be buggy with implicit null checks.
//
//// Conditional move
//instruct cmovP_mem(cmpOp cop, rFlagsReg cr, rRegP dst, memory src)
//%{
//  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
//  ins_cost(250);
//  format %{ "CMOV$cop $dst,$src\t# ptr" %}
//  opcode(0x0F,0x40);
//  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
//  ins_pipe( pipe_cmov_mem );
//%}
//
//// Conditional move
//instruct cmovP_memU(cmpOpU cop, rFlagsRegU cr, rRegP dst, memory src)
//%{
//  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
//  ins_cost(250);
//  format %{ "CMOV$cop $dst,$src\t# ptr" %}
//  opcode(0x0F,0x40);
//  ins_encode( enc_cmov(cop), reg_mem( dst, src ) );
//  ins_pipe( pipe_cmov_mem );
//%}

instruct cmovL_imm_01(rRegL dst, immI_1 src, rFlagsReg cr, cmpOp cop)
%{
  predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
  match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));

  ins_cost(100); // XXX
  format %{ "setbn$cop $dst\t# signed, long" %}
  ins_encode %{
    Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
    __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
  %}
  ins_pipe(ialu_reg);
%}

instruct cmovL_reg(cmpOp cop, rFlagsReg cr, rRegL dst, rRegL src)
%{
  match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));

  ins_cost(200); // XXX
  format %{ "cmovq$cop $dst, $src\t# signed, long" %}
  ins_encode %{
    __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
  %}
  ins_pipe(pipe_cmov_reg);  // XXX
%}

instruct cmovL_mem(cmpOp cop, rFlagsReg cr, rRegL dst, memory src)
%{
  match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));

  ins_cost(200); // XXX
  format %{ "cmovq$cop $dst, $src\t# signed, long" %}
  ins_encode %{
    __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
  %}
  ins_pipe(pipe_cmov_mem);  // XXX
%}

instruct cmovL_imm_01U(rRegL dst, immI_1 src, rFlagsRegU cr, cmpOpU cop)
%{
  predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
  match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));

  ins_cost(100); // XXX
  format %{ "setbn$cop $dst\t# unsigned, long" %}
  ins_encode %{
    Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
    __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
  %}
  ins_pipe(ialu_reg);
%}

instruct cmovL_regU(cmpOpU cop, rFlagsRegU cr, rRegL dst, rRegL src)
%{
  match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));

  ins_cost(200); // XXX
  format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
  ins_encode %{
    __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Register);
  %}
  ins_pipe(pipe_cmov_reg); // XXX
%}

instruct cmovL_imm_01UCF(rRegL dst, immI_1 src, rFlagsRegUCF cr, cmpOpUCF cop)
%{
  predicate(n->in(2)->in(2)->is_Con() && n->in(2)->in(2)->get_long() == 0);
  match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));

  ins_cost(100); // XXX
  format %{ "setbn$cop $dst\t# unsigned, long" %}
  ins_encode %{
    Assembler::Condition cond = (Assembler::Condition)($cop$$cmpcode);
    __ setb(MacroAssembler::negate_condition(cond), $dst$$Register);
  %}
  ins_pipe(ialu_reg);
%}

instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
  match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
  ins_cost(200);
  expand %{
    cmovL_regU(cop, cr, dst, src);
  %}
%}

instruct cmovL_regUCF2_ne(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
  predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne);
  match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));

  ins_cost(200); // XXX
  format %{ "cmovpq  $dst, $src\n\t"
            "cmovneq $dst, $src" %}
  ins_encode %{
    __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
    __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
  %}
  ins_pipe(pipe_cmov_reg);
%}

// Since (x == y) == !(x != y), we can flip the sense of the test by flipping the
// inputs of the CMove
instruct cmovL_regUCF2_eq(cmpOpUCF2 cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
  predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq);
  match(Set dst (CMoveL (Binary cop cr) (Binary src dst)));

  ins_cost(200); // XXX
  format %{ "cmovpq  $dst, $src\n\t"
            "cmovneq $dst, $src" %}
  ins_encode %{
    __ cmovq(Assembler::parity, $dst$$Register, $src$$Register);
    __ cmovq(Assembler::notEqual, $dst$$Register, $src$$Register);
  %}
  ins_pipe(pipe_cmov_reg);
%}

instruct cmovL_memU(cmpOpU cop, rFlagsRegU cr, rRegL dst, memory src)
%{
  match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));

  ins_cost(200); // XXX
  format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
  ins_encode %{
    __ cmovq((Assembler::Condition)($cop$$cmpcode), $dst$$Register, $src$$Address);
  %}
  ins_pipe(pipe_cmov_mem); // XXX
%}

instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
  match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
  ins_cost(200);
  expand %{
    cmovL_memU(cop, cr, dst, src);
  %}
%}

instruct cmovF_reg(cmpOp cop, rFlagsReg cr, regF dst, regF src)
%{
  match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));

  ins_cost(200); // XXX
  format %{ "jn$cop    skip\t# signed cmove float\n\t"
            "movss     $dst, $src\n"
    "skip:" %}
  ins_encode %{
    Label Lskip;
    // Invert sense of branch from sense of CMOV
    __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
    __ movflt($dst$$XMMRegister, $src$$XMMRegister);
    __ bind(Lskip);
  %}
  ins_pipe(pipe_slow);
%}

// instruct cmovF_mem(cmpOp cop, rFlagsReg cr, regF dst, memory src)
// %{
//   match(Set dst (CMoveF (Binary cop cr) (Binary dst (LoadL src))));

//   ins_cost(200); // XXX
//   format %{ "jn$cop    skip\t# signed cmove float\n\t"
//             "movss     $dst, $src\n"
//     "skip:" %}
//   ins_encode(enc_cmovf_mem_branch(cop, dst, src));
//   ins_pipe(pipe_slow);
// %}

instruct cmovF_regU(cmpOpU cop, rFlagsRegU cr, regF dst, regF src)
%{
  match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));

  ins_cost(200); // XXX
  format %{ "jn$cop    skip\t# unsigned cmove float\n\t"
            "movss     $dst, $src\n"
    "skip:" %}
  ins_encode %{
    Label Lskip;
    // Invert sense of branch from sense of CMOV
    __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
    __ movflt($dst$$XMMRegister, $src$$XMMRegister);
    __ bind(Lskip);
  %}
  ins_pipe(pipe_slow);
%}

instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
  match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
  ins_cost(200);
  expand %{
    cmovF_regU(cop, cr, dst, src);
  %}
%}

instruct cmovD_reg(cmpOp cop, rFlagsReg cr, regD dst, regD src)
%{
  match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));

  ins_cost(200); // XXX
  format %{ "jn$cop    skip\t# signed cmove double\n\t"
            "movsd     $dst, $src\n"
    "skip:" %}
  ins_encode %{
    Label Lskip;
    // Invert sense of branch from sense of CMOV
    __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
    __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
    __ bind(Lskip);
  %}
  ins_pipe(pipe_slow);
%}

instruct cmovD_regU(cmpOpU cop, rFlagsRegU cr, regD dst, regD src)
%{
  match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));

  ins_cost(200); // XXX
  format %{ "jn$cop    skip\t# unsigned cmove double\n\t"
            "movsd     $dst, $src\n"
    "skip:" %}
  ins_encode %{
    Label Lskip;
    // Invert sense of branch from sense of CMOV
    __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
    __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
    __ bind(Lskip);
  %}
  ins_pipe(pipe_slow);
%}

instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
  match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
  ins_cost(200);
  expand %{
    cmovD_regU(cop, cr, dst, src);
  %}
%}

//----------Arithmetic Instructions--------------------------------------------
//----------Addition Instructions----------------------------------------------

instruct addI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
%{
  match(Set dst (AddI dst src));
  effect(KILL cr);

  format %{ "addl    $dst, $src\t# int" %}
  ins_encode %{
    __ addl($dst$$Register, $src$$Register);
  %}
  ins_pipe(ialu_reg_reg);
%}

instruct addI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
%{
  match(Set dst (AddI dst src));
  effect(KILL cr);

  format %{ "addl    $dst, $src\t# int" %}
  ins_encode %{
    __ addl($dst$$Register, $src$$constant);
  %}
  ins_pipe( ialu_reg );
%}

instruct addI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
%{
  match(Set dst (AddI dst (LoadI src)));
  effect(KILL cr);

  ins_cost(150); // XXX
  format %{ "addl    $dst, $src\t# int" %}
  ins_encode %{
    __ addl($dst$$Register, $src$$Address);
  %}
  ins_pipe(ialu_reg_mem);
%}

instruct addI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
%{
  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
  effect(KILL cr);

  ins_cost(150); // XXX
  format %{ "addl    $dst, $src\t# int" %}
  ins_encode %{
    __ addl($dst$$Address, $src$$Register);
  %}
  ins_pipe(ialu_mem_reg);
%}

instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
%{
  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
  effect(KILL cr);

  ins_cost(125); // XXX
  format %{ "addl    $dst, $src\t# int" %}
  ins_encode %{
    __ addl($dst$$Address, $src$$constant);
  %}
  ins_pipe(ialu_mem_imm);
%}

instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
%{
  predicate(UseIncDec);
  match(Set dst (AddI dst src));
  effect(KILL cr);

  format %{ "incl    $dst\t# int" %}
  ins_encode %{
    __ incrementl($dst$$Register);
  %}
  ins_pipe(ialu_reg);
%}

instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
%{
  predicate(UseIncDec);
  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
  effect(KILL cr);

  ins_cost(125); // XXX
  format %{ "incl    $dst\t# int" %}
  ins_encode %{
    __ incrementl($dst$$Address);
  %}
  ins_pipe(ialu_mem_imm);
%}

// XXX why does that use AddI
instruct decI_rReg(rRegI dst, immI_M1 src, rFlagsReg cr)
%{
  predicate(UseIncDec);
  match(Set dst (AddI dst src));
  effect(KILL cr);

  format %{ "decl    $dst\t# int" %}
  ins_encode %{
    __ decrementl($dst$$Register);
  %}
  ins_pipe(ialu_reg);
%}

// XXX why does that use AddI
instruct decI_mem(memory dst, immI_M1 src, rFlagsReg cr)
%{
  predicate(UseIncDec);
  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
  effect(KILL cr);

  ins_cost(125); // XXX
  format %{ "decl    $dst\t# int" %}
  ins_encode %{
    __ decrementl($dst$$Address);
  %}
  ins_pipe(ialu_mem_imm);
%}

instruct leaI_rReg_immI2_immI(rRegI dst, rRegI index, immI2 scale, immI disp)
%{
  predicate(VM_Version::supports_fast_2op_lea());
  match(Set dst (AddI (LShiftI index scale) disp));

  format %{ "leal $dst, [$index << $scale + $disp]\t# int" %}
  ins_encode %{
    Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
    __ leal($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
  %}
  ins_pipe(ialu_reg_reg);
%}

instruct leaI_rReg_rReg_immI(rRegI dst, rRegI base, rRegI index, immI disp)
%{
  predicate(VM_Version::supports_fast_3op_lea());
  match(Set dst (AddI (AddI base index) disp));

  format %{ "leal $dst, [$base + $index + $disp]\t# int" %}
  ins_encode %{
    __ leal($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
  %}
  ins_pipe(ialu_reg_reg);
%}

instruct leaI_rReg_rReg_immI2(rRegI dst, no_rbp_r13_RegI base, rRegI index, immI2 scale)
%{
  predicate(VM_Version::supports_fast_2op_lea());
  match(Set dst (AddI base (LShiftI index scale)));

  format %{ "leal $dst, [$base + $index << $scale]\t# int" %}
  ins_encode %{
    Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
    __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale));
  %}
  ins_pipe(ialu_reg_reg);
%}

instruct leaI_rReg_rReg_immI2_immI(rRegI dst, rRegI base, rRegI index, immI2 scale, immI disp)
%{
  predicate(VM_Version::supports_fast_3op_lea());
  match(Set dst (AddI (AddI base (LShiftI index scale)) disp));

  format %{ "leal $dst, [$base + $index << $scale + $disp]\t# int" %}
  ins_encode %{
    Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
    __ leal($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
  %}
  ins_pipe(ialu_reg_reg);
%}

instruct addL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
%{
  match(Set dst (AddL dst src));
  effect(KILL cr);

  format %{ "addq    $dst, $src\t# long" %}
  ins_encode %{
    __ addq($dst$$Register, $src$$Register);
  %}
  ins_pipe(ialu_reg_reg);
%}

instruct addL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
%{
  match(Set dst (AddL dst src));
  effect(KILL cr);

  format %{ "addq    $dst, $src\t# long" %}
  ins_encode %{
    __ addq($dst$$Register, $src$$constant);
  %}
  ins_pipe( ialu_reg );
%}

instruct addL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
%{
  match(Set dst (AddL dst (LoadL src)));
  effect(KILL cr);

  ins_cost(150); // XXX
  format %{ "addq    $dst, $src\t# long" %}
  ins_encode %{
    __ addq($dst$$Register, $src$$Address);
  %}
  ins_pipe(ialu_reg_mem);
%}

instruct addL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
%{
  match(Set dst (StoreL dst (AddL (LoadL dst) src)));
  effect(KILL cr);

  ins_cost(150); // XXX
  format %{ "addq    $dst, $src\t# long" %}
  ins_encode %{
    __ addq($dst$$Address, $src$$Register);
  %}
  ins_pipe(ialu_mem_reg);
%}

instruct addL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
%{
  match(Set dst (StoreL dst (AddL (LoadL dst) src)));
  effect(KILL cr);

  ins_cost(125); // XXX
  format %{ "addq    $dst, $src\t# long" %}
  ins_encode %{
    __ addq($dst$$Address, $src$$constant);
  %}
  ins_pipe(ialu_mem_imm);
%}

instruct incL_rReg(rRegI dst, immL1 src, rFlagsReg cr)
%{
  predicate(UseIncDec);
  match(Set dst (AddL dst src));
  effect(KILL cr);

  format %{ "incq    $dst\t# long" %}
  ins_encode %{
    __ incrementq($dst$$Register);
  %}
  ins_pipe(ialu_reg);
%}

instruct incL_mem(memory dst, immL1 src, rFlagsReg cr)
%{
  predicate(UseIncDec);
  match(Set dst (StoreL dst (AddL (LoadL dst) src)));
  effect(KILL cr);

  ins_cost(125); // XXX
  format %{ "incq    $dst\t# long" %}
  ins_encode %{
    __ incrementq($dst$$Address);
  %}
  ins_pipe(ialu_mem_imm);
%}

// XXX why does that use AddL
instruct decL_rReg(rRegL dst, immL_M1 src, rFlagsReg cr)
%{
  predicate(UseIncDec);
  match(Set dst (AddL dst src));
  effect(KILL cr);

  format %{ "decq    $dst\t# long" %}
  ins_encode %{
    __ decrementq($dst$$Register);
  %}
  ins_pipe(ialu_reg);
%}

// XXX why does that use AddL
instruct decL_mem(memory dst, immL_M1 src, rFlagsReg cr)
%{
  predicate(UseIncDec);
  match(Set dst (StoreL dst (AddL (LoadL dst) src)));
  effect(KILL cr);

  ins_cost(125); // XXX
  format %{ "decq    $dst\t# long" %}
  ins_encode %{
    __ decrementq($dst$$Address);
  %}
  ins_pipe(ialu_mem_imm);
%}

instruct leaL_rReg_immI2_immL32(rRegL dst, rRegL index, immI2 scale, immL32 disp)
%{
  predicate(VM_Version::supports_fast_2op_lea());
  match(Set dst (AddL (LShiftL index scale) disp));

  format %{ "leaq $dst, [$index << $scale + $disp]\t# long" %}
  ins_encode %{
    Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
    __ leaq($dst$$Register, Address(noreg, $index$$Register, scale, $disp$$constant));
  %}
  ins_pipe(ialu_reg_reg);
%}

instruct leaL_rReg_rReg_immL32(rRegL dst, rRegL base, rRegL index, immL32 disp)
%{
  predicate(VM_Version::supports_fast_3op_lea());
  match(Set dst (AddL (AddL base index) disp));

  format %{ "leaq $dst, [$base + $index + $disp]\t# long" %}
  ins_encode %{
    __ leaq($dst$$Register, Address($base$$Register, $index$$Register, Address::times_1, $disp$$constant));
  %}
  ins_pipe(ialu_reg_reg);
%}

instruct leaL_rReg_rReg_immI2(rRegL dst, no_rbp_r13_RegL base, rRegL index, immI2 scale)
%{
  predicate(VM_Version::supports_fast_2op_lea());
  match(Set dst (AddL base (LShiftL index scale)));

  format %{ "leaq $dst, [$base + $index << $scale]\t# long" %}
  ins_encode %{
    Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
    __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale));
  %}
  ins_pipe(ialu_reg_reg);
%}

instruct leaL_rReg_rReg_immI2_immL32(rRegL dst, rRegL base, rRegL index, immI2 scale, immL32 disp)
%{
  predicate(VM_Version::supports_fast_3op_lea());
  match(Set dst (AddL (AddL base (LShiftL index scale)) disp));

  format %{ "leaq $dst, [$base + $index << $scale + $disp]\t# long" %}
  ins_encode %{
    Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($scale$$constant);
    __ leaq($dst$$Register, Address($base$$Register, $index$$Register, scale, $disp$$constant));
  %}
  ins_pipe(ialu_reg_reg);
%}

instruct addP_rReg(rRegP dst, rRegL src, rFlagsReg cr)
%{
  match(Set dst (AddP dst src));
  effect(KILL cr);

  format %{ "addq    $dst, $src\t# ptr" %}
  ins_encode %{
    __ addq($dst$$Register, $src$$Register);
  %}
  ins_pipe(ialu_reg_reg);
%}

instruct addP_rReg_imm(rRegP dst, immL32 src, rFlagsReg cr)
%{
  match(Set dst (AddP dst src));
  effect(KILL cr);

  format %{ "addq    $dst, $src\t# ptr" %}
  ins_encode %{
    __ addq($dst$$Register, $src$$constant);
  %}
  ins_pipe( ialu_reg );
%}

// XXX addP mem ops ????

instruct checkCastPP(rRegP dst)
%{
  match(Set dst (CheckCastPP dst));

  size(0);
  format %{ "# checkcastPP of $dst" %}
  ins_encode(/* empty encoding */);
  ins_pipe(empty);
%}

instruct castPP(rRegP dst)
%{
  match(Set dst (CastPP dst));

  size(0);
  format %{ "# castPP of $dst" %}
  ins_encode(/* empty encoding */);
  ins_pipe(empty);
%}

instruct castII(rRegI dst)
%{
  match(Set dst (CastII dst));

  size(0);
  format %{ "# castII of $dst" %}
  ins_encode(/* empty encoding */);
  ins_cost(0);
  ins_pipe(empty);
%}

instruct castLL(rRegL dst)
%{
  match(Set dst (CastLL dst));

  size(0);
  format %{ "# castLL of $dst" %}
  ins_encode(/* empty encoding */);
  ins_cost(0);
  ins_pipe(empty);
%}

instruct castFF(regF dst)
%{
  match(Set dst (CastFF dst));

  size(0);
  format %{ "# castFF of $dst" %}
  ins_encode(/* empty encoding */);
  ins_cost(0);
  ins_pipe(empty);
%}

instruct castDD(regD dst)
%{
  match(Set dst (CastDD dst));

  size(0);
  format %{ "# castDD of $dst" %}
  ins_encode(/* empty encoding */);
  ins_cost(0);
  ins_pipe(empty);
%}

// XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
instruct compareAndSwapP(rRegI res,
                         memory mem_ptr,
                         rax_RegP oldval, rRegP newval,
                         rFlagsReg cr)
%{
  predicate(VM_Version::supports_cx8() && n->as_LoadStore()->barrier_data() == 0);
  match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
  match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
  effect(KILL cr, KILL oldval);

  format %{ "cmpxchgq $mem_ptr,$newval\t# "
            "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
            "sete    $res\n\t"
            "movzbl  $res, $res" %}
  ins_encode %{
    __ lock();
    __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
    __ sete($res$$Register);
    __ movzbl($res$$Register, $res$$Register);
  %}
  ins_pipe( pipe_cmpxchg );
%}

instruct compareAndSwapL(rRegI res,
                         memory mem_ptr,
                         rax_RegL oldval, rRegL newval,
                         rFlagsReg cr)
%{
  predicate(VM_Version::supports_cx8());
  match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
  match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
  effect(KILL cr, KILL oldval);

  format %{ "cmpxchgq $mem_ptr,$newval\t# "
            "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
            "sete    $res\n\t"
            "movzbl  $res, $res" %}
  ins_encode %{
    __ lock();
    __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
    __ sete($res$$Register);
    __ movzbl($res$$Register, $res$$Register);
  %}
  ins_pipe( pipe_cmpxchg );
%}

instruct compareAndSwapI(rRegI res,
                         memory mem_ptr,
                         rax_RegI oldval, rRegI newval,
                         rFlagsReg cr)
%{
  match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
  match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
  effect(KILL cr, KILL oldval);

  format %{ "cmpxchgl $mem_ptr,$newval\t# "
            "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
            "sete    $res\n\t"
            "movzbl  $res, $res" %}
  ins_encode %{
    __ lock();
    __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
    __ sete($res$$Register);
    __ movzbl($res$$Register, $res$$Register);
  %}
  ins_pipe( pipe_cmpxchg );
%}

instruct compareAndSwapB(rRegI res,
                         memory mem_ptr,
                         rax_RegI oldval, rRegI newval,
                         rFlagsReg cr)
%{
  match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
  match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
  effect(KILL cr, KILL oldval);

  format %{ "cmpxchgb $mem_ptr,$newval\t# "
            "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
            "sete    $res\n\t"
            "movzbl  $res, $res" %}
  ins_encode %{
    __ lock();
    __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
    __ sete($res$$Register);
    __ movzbl($res$$Register, $res$$Register);
  %}
  ins_pipe( pipe_cmpxchg );
%}

instruct compareAndSwapS(rRegI res,
                         memory mem_ptr,
                         rax_RegI oldval, rRegI newval,
                         rFlagsReg cr)
%{
  match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
  match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
  effect(KILL cr, KILL oldval);

  format %{ "cmpxchgw $mem_ptr,$newval\t# "
            "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
            "sete    $res\n\t"
            "movzbl  $res, $res" %}
  ins_encode %{
    __ lock();
    __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
    __ sete($res$$Register);
    __ movzbl($res$$Register, $res$$Register);
  %}
  ins_pipe( pipe_cmpxchg );
%}

instruct compareAndSwapN(rRegI res,
                          memory mem_ptr,
                          rax_RegN oldval, rRegN newval,
                          rFlagsReg cr) %{
  match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
  match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
  effect(KILL cr, KILL oldval);

  format %{ "cmpxchgl $mem_ptr,$newval\t# "
            "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
            "sete    $res\n\t"
            "movzbl  $res, $res" %}
  ins_encode %{
    __ lock();
    __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
    __ sete($res$$Register);
    __ movzbl($res$$Register, $res$$Register);
  %}
  ins_pipe( pipe_cmpxchg );
%}

instruct compareAndExchangeB(
                         memory mem_ptr,
                         rax_RegI oldval, rRegI newval,
                         rFlagsReg cr)
%{
  match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
  effect(KILL cr);

  format %{ "cmpxchgb $mem_ptr,$newval\t# "
            "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
  ins_encode %{
    __ lock();
    __ cmpxchgb($newval$$Register, $mem_ptr$$Address);
  %}
  ins_pipe( pipe_cmpxchg );
%}

instruct compareAndExchangeS(
                         memory mem_ptr,
                         rax_RegI oldval, rRegI newval,
                         rFlagsReg cr)
%{
  match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
  effect(KILL cr);

  format %{ "cmpxchgw $mem_ptr,$newval\t# "
            "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
  ins_encode %{
    __ lock();
    __ cmpxchgw($newval$$Register, $mem_ptr$$Address);
  %}
  ins_pipe( pipe_cmpxchg );
%}

instruct compareAndExchangeI(
                         memory mem_ptr,
                         rax_RegI oldval, rRegI newval,
                         rFlagsReg cr)
%{
  match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
  effect(KILL cr);

  format %{ "cmpxchgl $mem_ptr,$newval\t# "
            "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
  ins_encode %{
    __ lock();
    __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
  %}
  ins_pipe( pipe_cmpxchg );
%}

instruct compareAndExchangeL(
                         memory mem_ptr,
                         rax_RegL oldval, rRegL newval,
                         rFlagsReg cr)
%{
  predicate(VM_Version::supports_cx8());
  match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
  effect(KILL cr);

  format %{ "cmpxchgq $mem_ptr,$newval\t# "
            "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
  ins_encode %{
    __ lock();
    __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
  %}
  ins_pipe( pipe_cmpxchg );
%}

instruct compareAndExchangeN(
                          memory mem_ptr,
                          rax_RegN oldval, rRegN newval,
                          rFlagsReg cr) %{
  match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
  effect(KILL cr);

  format %{ "cmpxchgl $mem_ptr,$newval\t# "
            "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
  ins_encode %{
    __ lock();
    __ cmpxchgl($newval$$Register, $mem_ptr$$Address);
  %}
  ins_pipe( pipe_cmpxchg );
%}

instruct compareAndExchangeP(
                         memory mem_ptr,
                         rax_RegP oldval, rRegP newval,
                         rFlagsReg cr)
%{
  predicate(VM_Version::supports_cx8() && n->as_LoadStore()->barrier_data() == 0);
  match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
  effect(KILL cr);

  format %{ "cmpxchgq $mem_ptr,$newval\t# "
            "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
  ins_encode %{
    __ lock();
    __ cmpxchgq($newval$$Register, $mem_ptr$$Address);
  %}
  ins_pipe( pipe_cmpxchg );
%}

instruct xaddB_no_res( memory mem, Universe dummy, immI add, rFlagsReg cr) %{
  predicate(n->as_LoadStore()->result_not_used());
  match(Set dummy (GetAndAddB mem add));
  effect(KILL cr);
  format %{ "ADDB  [$mem],$add" %}
  ins_encode %{
    __ lock();
    __ addb($mem$$Address, $add$$constant);
  %}
  ins_pipe( pipe_cmpxchg );
%}

instruct xaddB( memory mem, rRegI newval, rFlagsReg cr) %{
  match(Set newval (GetAndAddB mem newval));
  effect(KILL cr);
  format %{ "XADDB  [$mem],$newval" %}
  ins_encode %{
    __ lock();
    __ xaddb($mem$$Address, $newval$$Register);
  %}
  ins_pipe( pipe_cmpxchg );
%}

instruct xaddS_no_res( memory mem, Universe dummy, immI add, rFlagsReg cr) %{
  predicate(n->as_LoadStore()->result_not_used());
  match(Set dummy (GetAndAddS mem add));
  effect(KILL cr);
  format %{ "ADDW  [$mem],$add" %}
  ins_encode %{
    __ lock();
    __ addw($mem$$Address, $add$$constant);
  %}
  ins_pipe( pipe_cmpxchg );
%}

instruct xaddS( memory mem, rRegI newval, rFlagsReg cr) %{
  match(Set newval (GetAndAddS mem newval));
  effect(KILL cr);
  format %{ "XADDW  [$mem],$newval" %}
  ins_encode %{
    __ lock();
    __ xaddw($mem$$Address, $newval$$Register);
  %}
  ins_pipe( pipe_cmpxchg );
%}

instruct xaddI_no_res( memory mem, Universe dummy, immI add, rFlagsReg cr) %{
  predicate(n->as_LoadStore()->result_not_used());
  match(Set dummy (GetAndAddI mem add));
  effect(KILL cr);
  format %{ "ADDL  [$mem],$add" %}
  ins_encode %{
    __ lock();
    __ addl($mem$$Address, $add$$constant);
  %}
  ins_pipe( pipe_cmpxchg );
%}

instruct xaddI( memory mem, rRegI newval, rFlagsReg cr) %{
  match(Set newval (GetAndAddI mem newval));
  effect(KILL cr);
  format %{ "XADDL  [$mem],$newval" %}
  ins_encode %{
    __ lock();
    __ xaddl($mem$$Address, $newval$$Register);
  %}
  ins_pipe( pipe_cmpxchg );
%}

instruct xaddL_no_res( memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
  predicate(n->as_LoadStore()->result_not_used());
  match(Set dummy (GetAndAddL mem add));
  effect(KILL cr);
  format %{ "ADDQ  [$mem],$add" %}
  ins_encode %{
    __ lock();
    __ addq($mem$$Address, $add$$constant);
  %}
  ins_pipe( pipe_cmpxchg );
%}

instruct xaddL( memory mem, rRegL newval, rFlagsReg cr) %{
  match(Set newval (GetAndAddL mem newval));
  effect(KILL cr);
  format %{ "XADDQ  [$mem],$newval" %}
  ins_encode %{
    __ lock();
    __ xaddq($mem$$Address, $newval$$Register);
  %}
  ins_pipe( pipe_cmpxchg );
%}

instruct xchgB( memory mem, rRegI newval) %{
  match(Set newval (GetAndSetB mem newval));
  format %{ "XCHGB  $newval,[$mem]" %}
  ins_encode %{
    __ xchgb($newval$$Register, $mem$$Address);
  %}
  ins_pipe( pipe_cmpxchg );
%}

instruct xchgS( memory mem, rRegI newval) %{
  match(Set newval (GetAndSetS mem newval));
  format %{ "XCHGW  $newval,[$mem]" %}
  ins_encode %{
    __ xchgw($newval$$Register, $mem$$Address);
  %}
  ins_pipe( pipe_cmpxchg );
%}

instruct xchgI( memory mem, rRegI newval) %{
  match(Set newval (GetAndSetI mem newval));
  format %{ "XCHGL  $newval,[$mem]" %}
  ins_encode %{
    __ xchgl($newval$$Register, $mem$$Address);
  %}
  ins_pipe( pipe_cmpxchg );
%}

instruct xchgL( memory mem, rRegL newval) %{
  match(Set newval (GetAndSetL mem newval));
  format %{ "XCHGL  $newval,[$mem]" %}
  ins_encode %{
    __ xchgq($newval$$Register, $mem$$Address);
  %}
  ins_pipe( pipe_cmpxchg );
%}

instruct xchgP( memory mem, rRegP newval) %{
  match(Set newval (GetAndSetP mem newval));
  predicate(n->as_LoadStore()->barrier_data() == 0);
  format %{ "XCHGQ  $newval,[$mem]" %}
  ins_encode %{
    __ xchgq($newval$$Register, $mem$$Address);
  %}
  ins_pipe( pipe_cmpxchg );
%}

instruct xchgN( memory mem, rRegN newval) %{
  match(Set newval (GetAndSetN mem newval));
  format %{ "XCHGL  $newval,$mem]" %}
  ins_encode %{
    __ xchgl($newval$$Register, $mem$$Address);
  %}
  ins_pipe( pipe_cmpxchg );
%}

//----------Abs Instructions-------------------------------------------

// Integer Absolute Instructions
instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, rFlagsReg cr)
%{
  match(Set dst (AbsI src));
  effect(TEMP dst, TEMP tmp, KILL cr);
  format %{ "movl $tmp, $src\n\t"
            "sarl $tmp, 31\n\t"
            "movl $dst, $src\n\t"
            "xorl $dst, $tmp\n\t"
            "subl $dst, $tmp\n"
          %}
  ins_encode %{
    __ movl($tmp$$Register, $src$$Register);
    __ sarl($tmp$$Register, 31);
    __ movl($dst$$Register, $src$$Register);
    __ xorl($dst$$Register, $tmp$$Register);
    __ subl($dst$$Register, $tmp$$Register);
  %}

  ins_pipe(ialu_reg_reg);
%}

// Long Absolute Instructions
instruct absL_rReg(rRegL dst, rRegL src, rRegL tmp, rFlagsReg cr)
%{
  match(Set dst (AbsL src));
  effect(TEMP dst, TEMP tmp, KILL cr);
  format %{ "movq $tmp, $src\n\t"
            "sarq $tmp, 63\n\t"
            "movq $dst, $src\n\t"
            "xorq $dst, $tmp\n\t"
            "subq $dst, $tmp\n"
          %}
  ins_encode %{
    __ movq($tmp$$Register, $src$$Register);
    __ sarq($tmp$$Register, 63);
    __ movq($dst$$Register, $src$$Register);
    __ xorq($dst$$Register, $tmp$$Register);
    __ subq($dst$$Register, $tmp$$Register);
  %}

  ins_pipe(ialu_reg_reg);
%}

//----------Subtraction Instructions-------------------------------------------

// Integer Subtraction Instructions
instruct subI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
%{
  match(Set dst (SubI dst src));
  effect(KILL cr);

  format %{ "subl    $dst, $src\t# int" %}
  ins_encode %{
    __ subl($dst$$Register, $src$$Register);
  %}
  ins_pipe(ialu_reg_reg);
%}

instruct subI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
%{
  match(Set dst (SubI dst (LoadI src)));
  effect(KILL cr);

  ins_cost(150);
  format %{ "subl    $dst, $src\t# int" %}
  ins_encode %{
    __ subl($dst$$Register, $src$$Address);
  %}
  ins_pipe(ialu_reg_mem);
%}

instruct subI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
%{
  match(Set dst (StoreI dst (SubI (LoadI dst) src)));
  effect(KILL cr);

  ins_cost(150);
  format %{ "subl    $dst, $src\t# int" %}
  ins_encode %{
    __ subl($dst$$Address, $src$$Register);
  %}
  ins_pipe(ialu_mem_reg);
%}

instruct subL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
%{
  match(Set dst (SubL dst src));
  effect(KILL cr);

  format %{ "subq    $dst, $src\t# long" %}
  ins_encode %{
    __ subq($dst$$Register, $src$$Register);
  %}
  ins_pipe(ialu_reg_reg);
%}

instruct subL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
%{
  match(Set dst (SubL dst (LoadL src)));
  effect(KILL cr);

  ins_cost(150);
  format %{ "subq    $dst, $src\t# long" %}
  ins_encode %{
    __ subq($dst$$Register, $src$$Address);
  %}
  ins_pipe(ialu_reg_mem);
%}

instruct subL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
%{
  match(Set dst (StoreL dst (SubL (LoadL dst) src)));
  effect(KILL cr);

  ins_cost(150);
  format %{ "subq    $dst, $src\t# long" %}
  ins_encode %{
    __ subq($dst$$Address, $src$$Register);
  %}
  ins_pipe(ialu_mem_reg);
%}

// Subtract from a pointer
// XXX hmpf???
instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
%{
  match(Set dst (AddP dst (SubI zero src)));
  effect(KILL cr);

  format %{ "subq    $dst, $src\t# ptr - int" %}
  opcode(0x2B);
  ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst, src));
  ins_pipe(ialu_reg_reg);
%}

instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
%{
  match(Set dst (SubI zero dst));
  effect(KILL cr);

  format %{ "negl    $dst\t# int" %}
  ins_encode %{
    __ negl($dst$$Register);
  %}
  ins_pipe(ialu_reg);
%}

instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
%{
  match(Set dst (NegI dst));
  effect(KILL cr);

  format %{ "negl    $dst\t# int" %}
  ins_encode %{
    __ negl($dst$$Register);
  %}
  ins_pipe(ialu_reg);
%}

instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
%{
  match(Set dst (StoreI dst (SubI zero (LoadI dst))));
  effect(KILL cr);

  format %{ "negl    $dst\t# int" %}
  ins_encode %{
    __ negl($dst$$Address);
  %}
  ins_pipe(ialu_reg);
%}

instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
%{
  match(Set dst (SubL zero dst));
  effect(KILL cr);

  format %{ "negq    $dst\t# long" %}
  ins_encode %{
    __ negq($dst$$Register);
  %}
  ins_pipe(ialu_reg);
%}

instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
%{
  match(Set dst (NegL dst));
  effect(KILL cr);

  format %{ "negq    $dst\t# int" %}
  ins_encode %{
    __ negq($dst$$Register);
  %}
  ins_pipe(ialu_reg);
%}

instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
%{
  match(Set dst (StoreL dst (SubL zero (LoadL dst))));
  effect(KILL cr);

  format %{ "negq    $dst\t# long" %}
  ins_encode %{
    __ negq($dst$$Address);
  %}
  ins_pipe(ialu_reg);
%}

//----------Multiplication/Division Instructions-------------------------------
// Integer Multiplication Instructions
// Multiply Register

instruct mulI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
%{
  match(Set dst (MulI dst src));
  effect(KILL cr);

  ins_cost(300);
  format %{ "imull   $dst, $src\t# int" %}
  ins_encode %{
    __ imull($dst$$Register, $src$$Register);
  %}
  ins_pipe(ialu_reg_reg_alu0);
%}

instruct mulI_rReg_imm(rRegI dst, rRegI src, immI imm, rFlagsReg cr)
%{
  match(Set dst (MulI src imm));
  effect(KILL cr);

  ins_cost(300);
  format %{ "imull   $dst, $src, $imm\t# int" %}
  ins_encode %{
    __ imull($dst$$Register, $src$$Register, $imm$$constant);
  %}
  ins_pipe(ialu_reg_reg_alu0);
%}

instruct mulI_mem(rRegI dst, memory src, rFlagsReg cr)
%{
  match(Set dst (MulI dst (LoadI src)));
  effect(KILL cr);

  ins_cost(350);
  format %{ "imull   $dst, $src\t# int" %}
  ins_encode %{
    __ imull($dst$$Register, $src$$Address);
  %}
  ins_pipe(ialu_reg_mem_alu0);
%}

instruct mulI_mem_imm(rRegI dst, memory src, immI imm, rFlagsReg cr)
%{
  match(Set dst (MulI (LoadI src) imm));
  effect(KILL cr);

  ins_cost(300);
  format %{ "imull   $dst, $src, $imm\t# int" %}
  ins_encode %{
    __ imull($dst$$Register, $src$$Address, $imm$$constant);
  %}
  ins_pipe(ialu_reg_mem_alu0);
%}

instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, rFlagsReg cr)
%{
  match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
  effect(KILL cr, KILL src2);

  expand %{ mulI_rReg(dst, src1, cr);
           mulI_rReg(src2, src3, cr);
           addI_rReg(dst, src2, cr); %}
%}

instruct mulL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
%{
  match(Set dst (MulL dst src));
  effect(KILL cr);

  ins_cost(300);
  format %{ "imulq   $dst, $src\t# long" %}
  ins_encode %{
    __ imulq($dst$$Register, $src$$Register);
  %}
  ins_pipe(ialu_reg_reg_alu0);
%}

instruct mulL_rReg_imm(rRegL dst, rRegL src, immL32 imm, rFlagsReg cr)
%{
  match(Set dst (MulL src imm));
  effect(KILL cr);

  ins_cost(300);
  format %{ "imulq   $dst, $src, $imm\t# long" %}
  ins_encode %{
    __ imulq($dst$$Register, $src$$Register, $imm$$constant);
  %}
  ins_pipe(ialu_reg_reg_alu0);
%}

instruct mulL_mem(rRegL dst, memory src, rFlagsReg cr)
%{
  match(Set dst (MulL dst (LoadL src)));
  effect(KILL cr);

  ins_cost(350);
  format %{ "imulq   $dst, $src\t# long" %}
  ins_encode %{
    __ imulq($dst$$Register, $src$$Address);
  %}
  ins_pipe(ialu_reg_mem_alu0);
%}

instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm, rFlagsReg cr)
%{
  match(Set dst (MulL (LoadL src) imm));
  effect(KILL cr);

  ins_cost(300);
  format %{ "imulq   $dst, $src, $imm\t# long" %}
  ins_encode %{
    __ imulq($dst$$Register, $src$$Address, $imm$$constant);
  %}
  ins_pipe(ialu_reg_mem_alu0);
%}

instruct mulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
%{
  match(Set dst (MulHiL src rax));
  effect(USE_KILL rax, KILL cr);

  ins_cost(300);
  format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
  ins_encode %{
    __ imulq($src$$Register);
  %}
  ins_pipe(ialu_reg_reg_alu0);
%}

instruct umulHiL_rReg(rdx_RegL dst, rRegL src, rax_RegL rax, rFlagsReg cr)
%{
  match(Set dst (UMulHiL src rax));
  effect(USE_KILL rax, KILL cr);

  ins_cost(300);
  format %{ "mulq   RDX:RAX, RAX, $src\t# umulhi" %}
  ins_encode %{
    __ mulq($src$$Register);
  %}
  ins_pipe(ialu_reg_reg_alu0);
%}

instruct divI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
                   rFlagsReg cr)
%{
  match(Set rax (DivI rax div));
  effect(KILL rdx, KILL cr);

  ins_cost(30*100+10*100); // XXX
  format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
            "jne,s   normal\n\t"
            "xorl    rdx, rdx\n\t"
            "cmpl    $div, -1\n\t"
            "je,s    done\n"
    "normal: cdql\n\t"
            "idivl   $div\n"
    "done:"        %}
  ins_encode(cdql_enc(div));
  ins_pipe(ialu_reg_reg_alu0);
%}

instruct divL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
                   rFlagsReg cr)
%{
  match(Set rax (DivL rax div));
  effect(KILL rdx, KILL cr);

  ins_cost(30*100+10*100); // XXX
  format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
            "cmpq    rax, rdx\n\t"
            "jne,s   normal\n\t"
            "xorl    rdx, rdx\n\t"
            "cmpq    $div, -1\n\t"
            "je,s    done\n"
    "normal: cdqq\n\t"
            "idivq   $div\n"
    "done:"        %}
  ins_encode(cdqq_enc(div));
  ins_pipe(ialu_reg_reg_alu0);
%}

instruct udivI_rReg(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div, rFlagsReg cr)
%{
  match(Set rax (UDivI rax div));
  effect(KILL rdx, KILL cr);

  ins_cost(300);
  format %{ "udivl $rax,$rax,$div\t# UDivI\n" %}
  ins_encode %{
    __ udivI($rax$$Register, $div$$Register, $rdx$$Register);
  %}
  ins_pipe(ialu_reg_reg_alu0);
%}

instruct udivL_rReg(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div, rFlagsReg cr)
%{
  match(Set rax (UDivL rax div));
  effect(KILL rdx, KILL cr);

  ins_cost(300);
  format %{ "udivq $rax,$rax,$div\t# UDivL\n" %}
  ins_encode %{
     __ udivL($rax$$Register, $div$$Register, $rdx$$Register);
  %}
  ins_pipe(ialu_reg_reg_alu0);
%}

// Integer DIVMOD with Register, both quotient and mod results
instruct divModI_rReg_divmod(rax_RegI rax, rdx_RegI rdx, no_rax_rdx_RegI div,
                             rFlagsReg cr)
%{
  match(DivModI rax div);
  effect(KILL cr);

  ins_cost(30*100+10*100); // XXX
  format %{ "cmpl    rax, 0x80000000\t# idiv\n\t"
            "jne,s   normal\n\t"
            "xorl    rdx, rdx\n\t"
            "cmpl    $div, -1\n\t"
            "je,s    done\n"
    "normal: cdql\n\t"
            "idivl   $div\n"
    "done:"        %}
  ins_encode(cdql_enc(div));
  ins_pipe(pipe_slow);
%}

// Long DIVMOD with Register, both quotient and mod results
instruct divModL_rReg_divmod(rax_RegL rax, rdx_RegL rdx, no_rax_rdx_RegL div,
                             rFlagsReg cr)
%{
  match(DivModL rax div);
  effect(KILL cr);

  ins_cost(30*100+10*100); // XXX
  format %{ "movq    rdx, 0x8000000000000000\t# ldiv\n\t"
            "cmpq    rax, rdx\n\t"
            "jne,s   normal\n\t"
            "xorl    rdx, rdx\n\t"
            "cmpq    $div, -1\n\t"
            "je,s    done\n"
    "normal: cdqq\n\t"
            "idivq   $div\n"
    "done:"        %}
  ins_encode(cdqq_enc(div));
  ins_pipe(pipe_slow);
%}

// Unsigned integer DIVMOD with Register, both quotient and mod results
instruct udivModI_rReg_divmod(rax_RegI rax, no_rax_rdx_RegI tmp, rdx_RegI rdx,
                              no_rax_rdx_RegI div, rFlagsReg cr)
%{
  match(UDivModI rax div);
  effect(TEMP tmp, KILL cr);

  ins_cost(300);
  format %{ "udivl $rax,$rax,$div\t# begin UDivModI\n\t"
            "umodl $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModI\n"
          %}
  ins_encode %{
    __ udivmodI($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
  %}
  ins_pipe(pipe_slow);
%}

// Unsigned long DIVMOD with Register, both quotient and mod results
instruct udivModL_rReg_divmod(rax_RegL rax, no_rax_rdx_RegL tmp, rdx_RegL rdx,
                              no_rax_rdx_RegL div, rFlagsReg cr)
%{
  match(UDivModL rax div);
  effect(TEMP tmp, KILL cr);

  ins_cost(300);
  format %{ "udivq $rax,$rax,$div\t# begin UDivModL\n\t"
            "umodq $rdx,$rax,$div\t! using $tmp as TEMP # end UDivModL\n"
          %}
  ins_encode %{
    __ udivmodL($rax$$Register, $div$$Register, $rdx$$Register, $tmp$$Register);
  %}
  ins_pipe(pipe_slow);
%}

instruct modI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div,
                   rFlagsReg cr)
%{
  match(Set rdx (ModI rax div));
  effect(KILL rax, KILL cr);

  ins_cost(300); // XXX
  format %{ "cmpl    rax, 0x80000000\t# irem\n\t"
            "jne,s   normal\n\t"
            "xorl    rdx, rdx\n\t"
            "cmpl    $div, -1\n\t"
            "je,s    done\n"
    "normal: cdql\n\t"
            "idivl   $div\n"
    "done:"        %}
  ins_encode(cdql_enc(div));
  ins_pipe(ialu_reg_reg_alu0);
%}

instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
                   rFlagsReg cr)
%{
  match(Set rdx (ModL rax div));
  effect(KILL rax, KILL cr);

  ins_cost(300); // XXX
  format %{ "movq    rdx, 0x8000000000000000\t# lrem\n\t"
            "cmpq    rax, rdx\n\t"
            "jne,s   normal\n\t"
            "xorl    rdx, rdx\n\t"
            "cmpq    $div, -1\n\t"
            "je,s    done\n"
    "normal: cdqq\n\t"
            "idivq   $div\n"
    "done:"        %}
  ins_encode(cdqq_enc(div));
  ins_pipe(ialu_reg_reg_alu0);
%}

instruct umodI_rReg(rdx_RegI rdx, rax_RegI rax, no_rax_rdx_RegI div, rFlagsReg cr)
%{
  match(Set rdx (UModI rax div));
  effect(KILL rax, KILL cr);

  ins_cost(300);
  format %{ "umodl $rdx,$rax,$div\t# UModI\n" %}
  ins_encode %{
    __ umodI($rax$$Register, $div$$Register, $rdx$$Register);
  %}
  ins_pipe(ialu_reg_reg_alu0);
%}

instruct umodL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div, rFlagsReg cr)
%{
  match(Set rdx (UModL rax div));
  effect(KILL rax, KILL cr);

  ins_cost(300);
  format %{ "umodq $rdx,$rax,$div\t# UModL\n" %}
  ins_encode %{
    __ umodL($rax$$Register, $div$$Register, $rdx$$Register);
  %}
  ins_pipe(ialu_reg_reg_alu0);
%}

// Integer Shift Instructions
// Shift Left by one, two, three
instruct salI_rReg_immI2(rRegI dst, immI2 shift, rFlagsReg cr)
%{
  match(Set dst (LShiftI dst shift));
  effect(KILL cr);

  format %{ "sall    $dst, $shift" %}
  ins_encode %{
    __ sall($dst$$Register, $shift$$constant);
  %}
  ins_pipe(ialu_reg);
%}

// Shift Left by 8-bit immediate
instruct salI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
%{
  match(Set dst (LShiftI dst shift));
  effect(KILL cr);

  format %{ "sall    $dst, $shift" %}
  ins_encode %{
    __ sall($dst$$Register, $shift$$constant);
  %}
  ins_pipe(ialu_reg);
%}

// Shift Left by 8-bit immediate
instruct salI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
%{
  match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
  effect(KILL cr);

  format %{ "sall    $dst, $shift" %}
  ins_encode %{
    __ sall($dst$$Address, $shift$$constant);
  %}
  ins_pipe(ialu_mem_imm);
%}

// Shift Left by variable
instruct salI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
%{
  predicate(!VM_Version::supports_bmi2());
  match(Set dst (LShiftI dst shift));
  effect(KILL cr);

  format %{ "sall    $dst, $shift" %}
  ins_encode %{
    __ sall($dst$$Register);
  %}
  ins_pipe(ialu_reg_reg);
%}

// Shift Left by variable
instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
%{
  predicate(!VM_Version::supports_bmi2());
  match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
  effect(KILL cr);

  format %{ "sall    $dst, $shift" %}
  ins_encode %{
    __ sall($dst$$Address);
  %}
  ins_pipe(ialu_mem_reg);
%}

instruct salI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
%{
  predicate(VM_Version::supports_bmi2());
  match(Set dst (LShiftI src shift));

  format %{ "shlxl   $dst, $src, $shift" %}
  ins_encode %{
    __ shlxl($dst$$Register, $src$$Register, $shift$$Register);
  %}
  ins_pipe(ialu_reg_reg);
%}

instruct salI_mem_rReg(rRegI dst, memory src, rRegI shift)
%{
  predicate(VM_Version::supports_bmi2());
  match(Set dst (LShiftI (LoadI src) shift));
  ins_cost(175);
  format %{ "shlxl   $dst, $src, $shift" %}
  ins_encode %{
    __ shlxl($dst$$Register, $src$$Address, $shift$$Register);
  %}
  ins_pipe(ialu_reg_mem);
%}

// Arithmetic Shift Right by 8-bit immediate
instruct sarI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
%{
  match(Set dst (RShiftI dst shift));
  effect(KILL cr);

  format %{ "sarl    $dst, $shift" %}
  ins_encode %{
    __ sarl($dst$$Register, $shift$$constant);
  %}
  ins_pipe(ialu_mem_imm);
%}

// Arithmetic Shift Right by 8-bit immediate
instruct sarI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
%{
  match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
  effect(KILL cr);

  format %{ "sarl    $dst, $shift" %}
  ins_encode %{
    __ sarl($dst$$Address, $shift$$constant);
  %}
  ins_pipe(ialu_mem_imm);
%}

// Arithmetic Shift Right by variable
instruct sarI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
%{
  predicate(!VM_Version::supports_bmi2());
  match(Set dst (RShiftI dst shift));
  effect(KILL cr);
  format %{ "sarl    $dst, $shift" %}
  ins_encode %{
    __ sarl($dst$$Register);
  %}
  ins_pipe(ialu_reg_reg);
%}

// Arithmetic Shift Right by variable
instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
%{
  predicate(!VM_Version::supports_bmi2());
  match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
  effect(KILL cr);

  format %{ "sarl    $dst, $shift" %}
  ins_encode %{
    __ sarl($dst$$Address);
  %}
  ins_pipe(ialu_mem_reg);
%}

instruct sarI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
%{
  predicate(VM_Version::supports_bmi2());
  match(Set dst (RShiftI src shift));

  format %{ "sarxl   $dst, $src, $shift" %}
  ins_encode %{
    __ sarxl($dst$$Register, $src$$Register, $shift$$Register);
  %}
  ins_pipe(ialu_reg_reg);
%}

instruct sarI_mem_rReg(rRegI dst, memory src, rRegI shift)
%{
  predicate(VM_Version::supports_bmi2());
  match(Set dst (RShiftI (LoadI src) shift));
  ins_cost(175);
  format %{ "sarxl   $dst, $src, $shift" %}
  ins_encode %{
    __ sarxl($dst$$Register, $src$$Address, $shift$$Register);
  %}
  ins_pipe(ialu_reg_mem);
%}

// Logical Shift Right by 8-bit immediate
instruct shrI_rReg_imm(rRegI dst, immI8 shift, rFlagsReg cr)
%{
  match(Set dst (URShiftI dst shift));
  effect(KILL cr);

  format %{ "shrl    $dst, $shift" %}
  ins_encode %{
    __ shrl($dst$$Register, $shift$$constant);
  %}
  ins_pipe(ialu_reg);
%}

// Logical Shift Right by 8-bit immediate
instruct shrI_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
%{
  match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
  effect(KILL cr);

  format %{ "shrl    $dst, $shift" %}
  ins_encode %{
    __ shrl($dst$$Address, $shift$$constant);
  %}
  ins_pipe(ialu_mem_imm);
%}

// Logical Shift Right by variable
instruct shrI_rReg_CL(rRegI dst, rcx_RegI shift, rFlagsReg cr)
%{
  predicate(!VM_Version::supports_bmi2());
  match(Set dst (URShiftI dst shift));
  effect(KILL cr);

  format %{ "shrl    $dst, $shift" %}
  ins_encode %{
    __ shrl($dst$$Register);
  %}
  ins_pipe(ialu_reg_reg);
%}

// Logical Shift Right by variable
instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
%{
  predicate(!VM_Version::supports_bmi2());
  match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
  effect(KILL cr);

  format %{ "shrl    $dst, $shift" %}
  ins_encode %{
    __ shrl($dst$$Address);
  %}
  ins_pipe(ialu_mem_reg);
%}

instruct shrI_rReg_rReg(rRegI dst, rRegI src, rRegI shift)
%{
  predicate(VM_Version::supports_bmi2());
  match(Set dst (URShiftI src shift));

  format %{ "shrxl   $dst, $src, $shift" %}
  ins_encode %{
    __ shrxl($dst$$Register, $src$$Register, $shift$$Register);
  %}
  ins_pipe(ialu_reg_reg);
%}

instruct shrI_mem_rReg(rRegI dst, memory src, rRegI shift)
%{
  predicate(VM_Version::supports_bmi2());
  match(Set dst (URShiftI (LoadI src) shift));
  ins_cost(175);
  format %{ "shrxl   $dst, $src, $shift" %}
  ins_encode %{
    __ shrxl($dst$$Register, $src$$Address, $shift$$Register);
  %}
  ins_pipe(ialu_reg_mem);
%}

// Long Shift Instructions
// Shift Left by one, two, three
instruct salL_rReg_immI2(rRegL dst, immI2 shift, rFlagsReg cr)
%{
  match(Set dst (LShiftL dst shift));
  effect(KILL cr);

  format %{ "salq    $dst, $shift" %}
  ins_encode %{
    __ salq($dst$$Register, $shift$$constant);
  %}
  ins_pipe(ialu_reg);
%}

// Shift Left by 8-bit immediate
instruct salL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
%{
  match(Set dst (LShiftL dst shift));
  effect(KILL cr);

  format %{ "salq    $dst, $shift" %}
  ins_encode %{
    __ salq($dst$$Register, $shift$$constant);
  %}
  ins_pipe(ialu_reg);
%}

// Shift Left by 8-bit immediate
instruct salL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
%{
  match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
  effect(KILL cr);

  format %{ "salq    $dst, $shift" %}
  ins_encode %{
    __ salq($dst$$Address, $shift$$constant);
  %}
  ins_pipe(ialu_mem_imm);
%}

// Shift Left by variable
instruct salL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
%{
  predicate(!VM_Version::supports_bmi2());
  match(Set dst (LShiftL dst shift));
  effect(KILL cr);

  format %{ "salq    $dst, $shift" %}
  ins_encode %{
    __ salq($dst$$Register);
  %}
  ins_pipe(ialu_reg_reg);
%}

// Shift Left by variable
instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
%{
  predicate(!VM_Version::supports_bmi2());
  match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
  effect(KILL cr);

  format %{ "salq    $dst, $shift" %}
  ins_encode %{
    __ salq($dst$$Address);
  %}
  ins_pipe(ialu_mem_reg);
%}

instruct salL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
%{
  predicate(VM_Version::supports_bmi2());
  match(Set dst (LShiftL src shift));

  format %{ "shlxq   $dst, $src, $shift" %}
  ins_encode %{
    __ shlxq($dst$$Register, $src$$Register, $shift$$Register);
  %}
  ins_pipe(ialu_reg_reg);
%}

instruct salL_mem_rReg(rRegL dst, memory src, rRegI shift)
%{
  predicate(VM_Version::supports_bmi2());
  match(Set dst (LShiftL (LoadL src) shift));
  ins_cost(175);
  format %{ "shlxq   $dst, $src, $shift" %}
  ins_encode %{
    __ shlxq($dst$$Register, $src$$Address, $shift$$Register);
  %}
  ins_pipe(ialu_reg_mem);
%}

// Arithmetic Shift Right by 8-bit immediate
instruct sarL_rReg_imm(rRegL dst, immI shift, rFlagsReg cr)
%{
  match(Set dst (RShiftL dst shift));
  effect(KILL cr);

  format %{ "sarq    $dst, $shift" %}
  ins_encode %{
    __ sarq($dst$$Register, (unsigned char)($shift$$constant & 0x3F));
  %}
  ins_pipe(ialu_mem_imm);
%}

// Arithmetic Shift Right by 8-bit immediate
instruct sarL_mem_imm(memory dst, immI shift, rFlagsReg cr)
%{
  match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
  effect(KILL cr);

  format %{ "sarq    $dst, $shift" %}
  ins_encode %{
    __ sarq($dst$$Address, (unsigned char)($shift$$constant & 0x3F));
  %}
  ins_pipe(ialu_mem_imm);
%}

// Arithmetic Shift Right by variable
instruct sarL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
%{
  predicate(!VM_Version::supports_bmi2());
  match(Set dst (RShiftL dst shift));
  effect(KILL cr);

  format %{ "sarq    $dst, $shift" %}
  ins_encode %{
    __ sarq($dst$$Register);
  %}
  ins_pipe(ialu_reg_reg);
%}

// Arithmetic Shift Right by variable
instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
%{
  predicate(!VM_Version::supports_bmi2());
  match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
  effect(KILL cr);

  format %{ "sarq    $dst, $shift" %}
  ins_encode %{
    __ sarq($dst$$Address);
  %}
  ins_pipe(ialu_mem_reg);
%}

instruct sarL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
%{
  predicate(VM_Version::supports_bmi2());
  match(Set dst (RShiftL src shift));

  format %{ "sarxq   $dst, $src, $shift" %}
  ins_encode %{
    __ sarxq($dst$$Register, $src$$Register, $shift$$Register);
  %}
  ins_pipe(ialu_reg_reg);
%}

instruct sarL_mem_rReg(rRegL dst, memory src, rRegI shift)
%{
  predicate(VM_Version::supports_bmi2());
  match(Set dst (RShiftL (LoadL src) shift));
  ins_cost(175);
  format %{ "sarxq   $dst, $src, $shift" %}
  ins_encode %{
    __ sarxq($dst$$Register, $src$$Address, $shift$$Register);
  %}
  ins_pipe(ialu_reg_mem);
%}

// Logical Shift Right by 8-bit immediate
instruct shrL_rReg_imm(rRegL dst, immI8 shift, rFlagsReg cr)
%{
  match(Set dst (URShiftL dst shift));
  effect(KILL cr);

  format %{ "shrq    $dst, $shift" %}
  ins_encode %{
    __ shrq($dst$$Register, $shift$$constant);
  %}
  ins_pipe(ialu_reg);
%}

// Logical Shift Right by 8-bit immediate
instruct shrL_mem_imm(memory dst, immI8 shift, rFlagsReg cr)
%{
  match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
  effect(KILL cr);

  format %{ "shrq    $dst, $shift" %}
  ins_encode %{
    __ shrq($dst$$Address, $shift$$constant);
  %}
  ins_pipe(ialu_mem_imm);
%}

// Logical Shift Right by variable
instruct shrL_rReg_CL(rRegL dst, rcx_RegI shift, rFlagsReg cr)
%{
  predicate(!VM_Version::supports_bmi2());
  match(Set dst (URShiftL dst shift));
  effect(KILL cr);

  format %{ "shrq    $dst, $shift" %}
  ins_encode %{
    __ shrq($dst$$Register);
  %}
  ins_pipe(ialu_reg_reg);
%}

// Logical Shift Right by variable
instruct shrL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
%{
  predicate(!VM_Version::supports_bmi2());
  match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
  effect(KILL cr);

  format %{ "shrq    $dst, $shift" %}
  ins_encode %{
    __ shrq($dst$$Address);
  %}
  ins_pipe(ialu_mem_reg);
%}

instruct shrL_rReg_rReg(rRegL dst, rRegL src, rRegI shift)
%{
  predicate(VM_Version::supports_bmi2());
  match(Set dst (URShiftL src shift));

  format %{ "shrxq   $dst, $src, $shift" %}
  ins_encode %{
    __ shrxq($dst$$Register, $src$$Register, $shift$$Register);
  %}
  ins_pipe(ialu_reg_reg);
%}

instruct shrL_mem_rReg(rRegL dst, memory src, rRegI shift)
%{
  predicate(VM_Version::supports_bmi2());
  match(Set dst (URShiftL (LoadL src) shift));
  ins_cost(175);
  format %{ "shrxq   $dst, $src, $shift" %}
  ins_encode %{
    __ shrxq($dst$$Register, $src$$Address, $shift$$Register);
  %}
  ins_pipe(ialu_reg_mem);
%}

// Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
// This idiom is used by the compiler for the i2b bytecode.
instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
%{
  match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));

  format %{ "movsbl  $dst, $src\t# i2b" %}
  ins_encode %{
    __ movsbl($dst$$Register, $src$$Register);
  %}
  ins_pipe(ialu_reg_reg);
%}

// Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
// This idiom is used by the compiler the i2s bytecode.
instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
%{
  match(Set dst (RShiftI (LShiftI src sixteen) sixteen));

  format %{ "movswl  $dst, $src\t# i2s" %}
  ins_encode %{
    __ movswl($dst$$Register, $src$$Register);
  %}
  ins_pipe(ialu_reg_reg);
%}

// ROL/ROR instructions

// Rotate left by constant.
instruct rolI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
%{
  predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
  match(Set dst (RotateLeft dst shift));
  effect(KILL cr);
  format %{ "roll    $dst, $shift" %}
  ins_encode %{
    __ roll($dst$$Register, $shift$$constant);
  %}
  ins_pipe(ialu_reg);
%}

instruct rolI_immI8(rRegI dst, rRegI src, immI8 shift)
%{
  predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
  match(Set dst (RotateLeft src shift));
  format %{ "rolxl   $dst, $src, $shift" %}
  ins_encode %{
    int shift = 32 - ($shift$$constant & 31);
    __ rorxl($dst$$Register, $src$$Register, shift);
  %}
  ins_pipe(ialu_reg_reg);
%}

instruct rolI_mem_immI8(rRegI dst, memory src, immI8 shift)
%{
  predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
  match(Set dst (RotateLeft (LoadI src) shift));
  ins_cost(175);
  format %{ "rolxl   $dst, $src, $shift" %}
  ins_encode %{
    int shift = 32 - ($shift$$constant & 31);
    __ rorxl($dst$$Register, $src$$Address, shift);
  %}
  ins_pipe(ialu_reg_mem);
%}

// Rotate Left by variable
instruct rolI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
%{
  predicate(n->bottom_type()->basic_type() == T_INT);
  match(Set dst (RotateLeft dst shift));
  effect(KILL cr);
  format %{ "roll    $dst, $shift" %}
  ins_encode %{
    __ roll($dst$$Register);
  %}
  ins_pipe(ialu_reg_reg);
%}

// Rotate Right by constant.
instruct rorI_immI8_legacy(rRegI dst, immI8 shift, rFlagsReg cr)
%{
  predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
  match(Set dst (RotateRight dst shift));
  effect(KILL cr);
  format %{ "rorl    $dst, $shift" %}
  ins_encode %{
    __ rorl($dst$$Register, $shift$$constant);
  %}
  ins_pipe(ialu_reg);
%}

// Rotate Right by constant.
instruct rorI_immI8(rRegI dst, rRegI src, immI8 shift)
%{
  predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
  match(Set dst (RotateRight src shift));
  format %{ "rorxl   $dst, $src, $shift" %}
  ins_encode %{
    __ rorxl($dst$$Register, $src$$Register, $shift$$constant);
  %}
  ins_pipe(ialu_reg_reg);
%}

instruct rorI_mem_immI8(rRegI dst, memory src, immI8 shift)
%{
  predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_INT);
  match(Set dst (RotateRight (LoadI src) shift));
  ins_cost(175);
  format %{ "rorxl   $dst, $src, $shift" %}
  ins_encode %{
    __ rorxl($dst$$Register, $src$$Address, $shift$$constant);
  %}
  ins_pipe(ialu_reg_mem);
%}

// Rotate Right by variable
instruct rorI_rReg_Var(rRegI dst, rcx_RegI shift, rFlagsReg cr)
%{
  predicate(n->bottom_type()->basic_type() == T_INT);
  match(Set dst (RotateRight dst shift));
  effect(KILL cr);
  format %{ "rorl    $dst, $shift" %}
  ins_encode %{
    __ rorl($dst$$Register);
  %}
  ins_pipe(ialu_reg_reg);
%}

// Rotate Left by constant.
instruct rolL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
%{
  predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
  match(Set dst (RotateLeft dst shift));
  effect(KILL cr);
  format %{ "rolq    $dst, $shift" %}
  ins_encode %{
    __ rolq($dst$$Register, $shift$$constant);
  %}
  ins_pipe(ialu_reg);
%}

instruct rolL_immI8(rRegL dst, rRegL src, immI8 shift)
%{
  predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
  match(Set dst (RotateLeft src shift));
  format %{ "rolxq   $dst, $src, $shift" %}
  ins_encode %{
    int shift = 64 - ($shift$$constant & 63);
    __ rorxq($dst$$Register, $src$$Register, shift);
  %}
  ins_pipe(ialu_reg_reg);
%}

instruct rolL_mem_immI8(rRegL dst, memory src, immI8 shift)
%{
  predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
  match(Set dst (RotateLeft (LoadL src) shift));
  ins_cost(175);
  format %{ "rolxq   $dst, $src, $shift" %}
  ins_encode %{
    int shift = 64 - ($shift$$constant & 63);
    __ rorxq($dst$$Register, $src$$Address, shift);
  %}
  ins_pipe(ialu_reg_mem);
%}

// Rotate Left by variable
instruct rolL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
%{
  predicate(n->bottom_type()->basic_type() == T_LONG);
  match(Set dst (RotateLeft dst shift));
  effect(KILL cr);
  format %{ "rolq    $dst, $shift" %}
  ins_encode %{
    __ rolq($dst$$Register);
  %}
  ins_pipe(ialu_reg_reg);
%}

// Rotate Right by constant.
instruct rorL_immI8_legacy(rRegL dst, immI8 shift, rFlagsReg cr)
%{
  predicate(!VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
  match(Set dst (RotateRight dst shift));
  effect(KILL cr);
  format %{ "rorq    $dst, $shift" %}
  ins_encode %{
    __ rorq($dst$$Register, $shift$$constant);
  %}
  ins_pipe(ialu_reg);
%}

// Rotate Right by constant
instruct rorL_immI8(rRegL dst, rRegL src, immI8 shift)
%{
  predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
  match(Set dst (RotateRight src shift));
  format %{ "rorxq   $dst, $src, $shift" %}
  ins_encode %{
    __ rorxq($dst$$Register, $src$$Register, $shift$$constant);
  %}
  ins_pipe(ialu_reg_reg);
%}

instruct rorL_mem_immI8(rRegL dst, memory src, immI8 shift)
%{
  predicate(VM_Version::supports_bmi2() && n->bottom_type()->basic_type() == T_LONG);
  match(Set dst (RotateRight (LoadL src) shift));
  ins_cost(175);
  format %{ "rorxq   $dst, $src, $shift" %}
  ins_encode %{
    __ rorxq($dst$$Register, $src$$Address, $shift$$constant);
  %}
  ins_pipe(ialu_reg_mem);
%}

// Rotate Right by variable
instruct rorL_rReg_Var(rRegL dst, rcx_RegI shift, rFlagsReg cr)
%{
  predicate(n->bottom_type()->basic_type() == T_LONG);
  match(Set dst (RotateRight dst shift));
  effect(KILL cr);
  format %{ "rorq    $dst, $shift" %}
  ins_encode %{
    __ rorq($dst$$Register);
  %}
  ins_pipe(ialu_reg_reg);
%}

//----------------------------- CompressBits/ExpandBits ------------------------

instruct compressBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
  predicate(n->bottom_type()->isa_long());
  match(Set dst (CompressBits src mask));
  format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
  ins_encode %{
    __ pextq($dst$$Register, $src$$Register, $mask$$Register);
  %}
  ins_pipe( pipe_slow );
%}

instruct expandBitsL_reg(rRegL dst, rRegL src, rRegL mask) %{
  predicate(n->bottom_type()->isa_long());
  match(Set dst (ExpandBits src mask));
  format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
  ins_encode %{
    __ pdepq($dst$$Register, $src$$Register, $mask$$Register);
  %}
  ins_pipe( pipe_slow );
%}

instruct compressBitsL_mem(rRegL dst, rRegL src, memory mask) %{
  predicate(n->bottom_type()->isa_long());
  match(Set dst (CompressBits src (LoadL mask)));
  format %{ "pextq  $dst, $src, $mask\t! parallel bit extract" %}
  ins_encode %{
    __ pextq($dst$$Register, $src$$Register, $mask$$Address);
  %}
  ins_pipe( pipe_slow );
%}

instruct expandBitsL_mem(rRegL dst, rRegL src, memory mask) %{
  predicate(n->bottom_type()->isa_long());
  match(Set dst (ExpandBits src (LoadL mask)));
  format %{ "pdepq  $dst, $src, $mask\t! parallel bit deposit" %}
  ins_encode %{
    __ pdepq($dst$$Register, $src$$Register, $mask$$Address);
  %}
  ins_pipe( pipe_slow );
%}

// Logical Instructions

// Integer Logical Instructions

// And Instructions
// And Register with Register
instruct andI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
%{
  match(Set dst (AndI dst src));
  effect(KILL cr);

  format %{ "andl    $dst, $src\t# int" %}
  ins_encode %{
    __ andl($dst$$Register, $src$$Register);
  %}
  ins_pipe(ialu_reg_reg);
%}

// And Register with Immediate 255
instruct andI_rReg_imm255(rRegI dst, rRegI src, immI_255 mask)
%{
  match(Set dst (AndI src mask));

  format %{ "movzbl  $dst, $src\t# int & 0xFF" %}
  ins_encode %{
    __ movzbl($dst$$Register, $src$$Register);
  %}
  ins_pipe(ialu_reg);
%}

// And Register with Immediate 255 and promote to long
instruct andI2L_rReg_imm255(rRegL dst, rRegI src, immI_255 mask)
%{
  match(Set dst (ConvI2L (AndI src mask)));

  format %{ "movzbl  $dst, $src\t# int & 0xFF -> long" %}
  ins_encode %{
    __ movzbl($dst$$Register, $src$$Register);
  %}
  ins_pipe(ialu_reg);
%}

// And Register with Immediate 65535
instruct andI_rReg_imm65535(rRegI dst, rRegI src, immI_65535 mask)
%{
  match(Set dst (AndI src mask));

  format %{ "movzwl  $dst, $src\t# int & 0xFFFF" %}
  ins_encode %{
    __ movzwl($dst$$Register, $src$$Register);
  %}
  ins_pipe(ialu_reg);
%}

// And Register with Immediate 65535 and promote to long
instruct andI2L_rReg_imm65535(rRegL dst, rRegI src, immI_65535 mask)
%{
  match(Set dst (ConvI2L (AndI src mask)));

  format %{ "movzwl  $dst, $src\t# int & 0xFFFF -> long" %}
  ins_encode %{
    __ movzwl($dst$$Register, $src$$Register);
  %}
  ins_pipe(ialu_reg);
%}

// Can skip int2long conversions after AND with small bitmask
instruct convI2LAndI_reg_immIbitmask(rRegL dst, rRegI src,  immI_Pow2M1 mask, rRegI tmp, rFlagsReg cr)
%{
  predicate(VM_Version::supports_bmi2());
  ins_cost(125);
  effect(TEMP tmp, KILL cr);
  match(Set dst (ConvI2L (AndI src mask)));
  format %{ "bzhiq $dst, $src, $mask \t# using $tmp as TEMP, int &  immI_Pow2M1 -> long" %}
  ins_encode %{
    __ movl($tmp$$Register, exact_log2($mask$$constant + 1));
    __ bzhiq($dst$$Register, $src$$Register, $tmp$$Register);
  %}
  ins_pipe(ialu_reg_reg);
%}

// And Register with Immediate
instruct andI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
%{
  match(Set dst (AndI dst src));
  effect(KILL cr);

  format %{ "andl    $dst, $src\t# int" %}
  ins_encode %{
    __ andl($dst$$Register, $src$$constant);
  %}
  ins_pipe(ialu_reg);
%}

// And Register with Memory
instruct andI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
%{
  match(Set dst (AndI dst (LoadI src)));
  effect(KILL cr);

  ins_cost(150);
  format %{ "andl    $dst, $src\t# int" %}
  ins_encode %{
    __ andl($dst$$Register, $src$$Address);
  %}
  ins_pipe(ialu_reg_mem);
%}

// And Memory with Register
instruct andB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
%{
  match(Set dst (StoreB dst (AndI (LoadB dst) src)));
  effect(KILL cr);

  ins_cost(150);
  format %{ "andb    $dst, $src\t# byte" %}
  ins_encode %{
    __ andb($dst$$Address, $src$$Register);
  %}
  ins_pipe(ialu_mem_reg);
%}

instruct andI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
%{
  match(Set dst (StoreI dst (AndI (LoadI dst) src)));
  effect(KILL cr);

  ins_cost(150);
  format %{ "andl    $dst, $src\t# int" %}
  ins_encode %{
    __ andl($dst$$Address, $src$$Register);
  %}
  ins_pipe(ialu_mem_reg);
%}

// And Memory with Immediate
instruct andI_mem_imm(memory dst, immI src, rFlagsReg cr)
%{
  match(Set dst (StoreI dst (AndI (LoadI dst) src)));
  effect(KILL cr);

  ins_cost(125);
  format %{ "andl    $dst, $src\t# int" %}
  ins_encode %{
    __ andl($dst$$Address, $src$$constant);
  %}
  ins_pipe(ialu_mem_imm);
%}

// BMI1 instructions
instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, rFlagsReg cr) %{
  match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
  predicate(UseBMI1Instructions);
  effect(KILL cr);

  ins_cost(125);
  format %{ "andnl  $dst, $src1, $src2" %}

  ins_encode %{
    __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
  %}
  ins_pipe(ialu_reg_mem);
%}

instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, rFlagsReg cr) %{
  match(Set dst (AndI (XorI src1 minus_1) src2));
  predicate(UseBMI1Instructions);
  effect(KILL cr);

  format %{ "andnl  $dst, $src1, $src2" %}

  ins_encode %{
    __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
  %}
  ins_pipe(ialu_reg);
%}

instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
  match(Set dst (AndI (SubI imm_zero src) src));
  predicate(UseBMI1Instructions);
  effect(KILL cr);

  format %{ "blsil  $dst, $src" %}

  ins_encode %{
    __ blsil($dst$$Register, $src$$Register);
  %}
  ins_pipe(ialu_reg);
%}

instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
  match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
  predicate(UseBMI1Instructions);
  effect(KILL cr);

  ins_cost(125);
  format %{ "blsil  $dst, $src" %}

  ins_encode %{
    __ blsil($dst$$Register, $src$$Address);
  %}
  ins_pipe(ialu_reg_mem);
%}

instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
%{
  match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
  predicate(UseBMI1Instructions);
  effect(KILL cr);

  ins_cost(125);
  format %{ "blsmskl $dst, $src" %}

  ins_encode %{
    __ blsmskl($dst$$Register, $src$$Address);
  %}
  ins_pipe(ialu_reg_mem);
%}

instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
%{
  match(Set dst (XorI (AddI src minus_1) src));
  predicate(UseBMI1Instructions);
  effect(KILL cr);

  format %{ "blsmskl $dst, $src" %}

  ins_encode %{
    __ blsmskl($dst$$Register, $src$$Register);
  %}

  ins_pipe(ialu_reg);
%}

instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, rFlagsReg cr)
%{
  match(Set dst (AndI (AddI src minus_1) src) );
  predicate(UseBMI1Instructions);
  effect(KILL cr);

  format %{ "blsrl  $dst, $src" %}

  ins_encode %{
    __ blsrl($dst$$Register, $src$$Register);
  %}

  ins_pipe(ialu_reg_mem);
%}

instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, rFlagsReg cr)
%{
  match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
  predicate(UseBMI1Instructions);
  effect(KILL cr);

  ins_cost(125);
  format %{ "blsrl  $dst, $src" %}

  ins_encode %{
    __ blsrl($dst$$Register, $src$$Address);
  %}

  ins_pipe(ialu_reg);
%}

// Or Instructions
// Or Register with Register
instruct orI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
%{
  match(Set dst (OrI dst src));
  effect(KILL cr);

  format %{ "orl     $dst, $src\t# int" %}
  ins_encode %{
    __ orl($dst$$Register, $src$$Register);
  %}
  ins_pipe(ialu_reg_reg);
%}

// Or Register with Immediate
instruct orI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
%{
  match(Set dst (OrI dst src));
  effect(KILL cr);

  format %{ "orl     $dst, $src\t# int" %}
  ins_encode %{
    __ orl($dst$$Register, $src$$constant);
  %}
  ins_pipe(ialu_reg);
%}

// Or Register with Memory
instruct orI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
%{
  match(Set dst (OrI dst (LoadI src)));
  effect(KILL cr);

  ins_cost(150);
  format %{ "orl     $dst, $src\t# int" %}
  ins_encode %{
    __ orl($dst$$Register, $src$$Address);
  %}
  ins_pipe(ialu_reg_mem);
%}

// Or Memory with Register
instruct orB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
%{
  match(Set dst (StoreB dst (OrI (LoadB dst) src)));
  effect(KILL cr);

  ins_cost(150);
  format %{ "orb    $dst, $src\t# byte" %}
  ins_encode %{
    __ orb($dst$$Address, $src$$Register);
  %}
  ins_pipe(ialu_mem_reg);
%}

instruct orI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
%{
  match(Set dst (StoreI dst (OrI (LoadI dst) src)));
  effect(KILL cr);

  ins_cost(150);
  format %{ "orl     $dst, $src\t# int" %}
  ins_encode %{
    __ orl($dst$$Address, $src$$Register);
  %}
  ins_pipe(ialu_mem_reg);
%}

// Or Memory with Immediate
instruct orI_mem_imm(memory dst, immI src, rFlagsReg cr)
%{
  match(Set dst (StoreI dst (OrI (LoadI dst) src)));
  effect(KILL cr);

  ins_cost(125);
  format %{ "orl     $dst, $src\t# int" %}
  ins_encode %{
    __ orl($dst$$Address, $src$$constant);
  %}
  ins_pipe(ialu_mem_imm);
%}

// Xor Instructions
// Xor Register with Register
instruct xorI_rReg(rRegI dst, rRegI src, rFlagsReg cr)
%{
  match(Set dst (XorI dst src));
  effect(KILL cr);

  format %{ "xorl    $dst, $src\t# int" %}
  ins_encode %{
    __ xorl($dst$$Register, $src$$Register);
  %}
  ins_pipe(ialu_reg_reg);
%}

// Xor Register with Immediate -1
instruct xorI_rReg_im1(rRegI dst, immI_M1 imm) %{
  match(Set dst (XorI dst imm));

  format %{ "not    $dst" %}
  ins_encode %{
     __ notl($dst$$Register);
  %}
  ins_pipe(ialu_reg);
%}

// Xor Register with Immediate
instruct xorI_rReg_imm(rRegI dst, immI src, rFlagsReg cr)
%{
  match(Set dst (XorI dst src));
  effect(KILL cr);

  format %{ "xorl    $dst, $src\t# int" %}
  ins_encode %{
    __ xorl($dst$$Register, $src$$constant);
  %}
  ins_pipe(ialu_reg);
%}

// Xor Register with Memory
instruct xorI_rReg_mem(rRegI dst, memory src, rFlagsReg cr)
%{
  match(Set dst (XorI dst (LoadI src)));
  effect(KILL cr);

  ins_cost(150);
  format %{ "xorl    $dst, $src\t# int" %}
  ins_encode %{
    __ xorl($dst$$Register, $src$$Address);
  %}
  ins_pipe(ialu_reg_mem);
%}

// Xor Memory with Register
instruct xorB_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
%{
  match(Set dst (StoreB dst (XorI (LoadB dst) src)));
  effect(KILL cr);

  ins_cost(150);
  format %{ "xorb    $dst, $src\t# byte" %}
  ins_encode %{
    __ xorb($dst$$Address, $src$$Register);
  %}
  ins_pipe(ialu_mem_reg);
%}

instruct xorI_mem_rReg(memory dst, rRegI src, rFlagsReg cr)
%{
  match(Set dst (StoreI dst (XorI (LoadI dst) src)));
  effect(KILL cr);

  ins_cost(150);
  format %{ "xorl    $dst, $src\t# int" %}
  ins_encode %{
    __ xorl($dst$$Address, $src$$Register);
  %}
  ins_pipe(ialu_mem_reg);
%}

// Xor Memory with Immediate
instruct xorI_mem_imm(memory dst, immI src, rFlagsReg cr)
%{
  match(Set dst (StoreI dst (XorI (LoadI dst) src)));
  effect(KILL cr);

  ins_cost(125);
  format %{ "xorl    $dst, $src\t# int" %}
  ins_encode %{
    __ xorl($dst$$Address, $src$$constant);
  %}
  ins_pipe(ialu_mem_imm);
%}

// Long Logical Instructions

// And Instructions
// And Register with Register
instruct andL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
%{
  match(Set dst (AndL dst src));
  effect(KILL cr);

  format %{ "andq    $dst, $src\t# long" %}
  ins_encode %{
    __ andq($dst$$Register, $src$$Register);
  %}
  ins_pipe(ialu_reg_reg);
%}

// And Register with Immediate 255
instruct andL_rReg_imm255(rRegL dst, rRegL src, immL_255 mask)
%{
  match(Set dst (AndL src mask));

  format %{ "movzbl  $dst, $src\t# long & 0xFF" %}
  ins_encode %{
    // movzbl zeroes out the upper 32-bit and does not need REX.W
    __ movzbl($dst$$Register, $src$$Register);
  %}
  ins_pipe(ialu_reg);
%}

// And Register with Immediate 65535
instruct andL_rReg_imm65535(rRegL dst, rRegL src, immL_65535 mask)
%{
  match(Set dst (AndL src mask));

  format %{ "movzwl  $dst, $src\t# long & 0xFFFF" %}
  ins_encode %{
    // movzwl zeroes out the upper 32-bit and does not need REX.W
    __ movzwl($dst$$Register, $src$$Register);
  %}
  ins_pipe(ialu_reg);
%}

// And Register with Immediate
instruct andL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
%{
  match(Set dst (AndL dst src));
  effect(KILL cr);

  format %{ "andq    $dst, $src\t# long" %}
  ins_encode %{
    __ andq($dst$$Register, $src$$constant);
  %}
  ins_pipe(ialu_reg);
%}

// And Register with Memory
instruct andL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
%{
  match(Set dst (AndL dst (LoadL src)));
  effect(KILL cr);

  ins_cost(150);
  format %{ "andq    $dst, $src\t# long" %}
  ins_encode %{
    __ andq($dst$$Register, $src$$Address);
  %}
  ins_pipe(ialu_reg_mem);
%}

// And Memory with Register
instruct andL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
%{
  match(Set dst (StoreL dst (AndL (LoadL dst) src)));
  effect(KILL cr);

  ins_cost(150);
  format %{ "andq    $dst, $src\t# long" %}
  ins_encode %{
    __ andq($dst$$Address, $src$$Register);
  %}
  ins_pipe(ialu_mem_reg);
%}

// And Memory with Immediate
instruct andL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
%{
  match(Set dst (StoreL dst (AndL (LoadL dst) src)));
  effect(KILL cr);

  ins_cost(125);
  format %{ "andq    $dst, $src\t# long" %}
  ins_encode %{
    __ andq($dst$$Address, $src$$constant);
  %}
  ins_pipe(ialu_mem_imm);
%}

instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
%{
  // con should be a pure 64-bit immediate given that not(con) is a power of 2
  // because AND/OR works well enough for 8/32-bit values.
  predicate(log2i_graceful(~n->in(3)->in(2)->get_long()) > 30);

  match(Set dst (StoreL dst (AndL (LoadL dst) con)));
  effect(KILL cr);

  ins_cost(125);
  format %{ "btrq    $dst, log2(not($con))\t# long" %}
  ins_encode %{
    __ btrq($dst$$Address, log2i_exact((julong)~$con$$constant));
  %}
  ins_pipe(ialu_mem_imm);
%}

// BMI1 instructions
instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1, rFlagsReg cr) %{
  match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
  predicate(UseBMI1Instructions);
  effect(KILL cr);

  ins_cost(125);
  format %{ "andnq  $dst, $src1, $src2" %}

  ins_encode %{
    __ andnq($dst$$Register, $src1$$Register, $src2$$Address);
  %}
  ins_pipe(ialu_reg_mem);
%}

instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1, rFlagsReg cr) %{
  match(Set dst (AndL (XorL src1 minus_1) src2));
  predicate(UseBMI1Instructions);
  effect(KILL cr);

  format %{ "andnq  $dst, $src1, $src2" %}

  ins_encode %{
  __ andnq($dst$$Register, $src1$$Register, $src2$$Register);
  %}
  ins_pipe(ialu_reg_mem);
%}

instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero, rFlagsReg cr) %{
  match(Set dst (AndL (SubL imm_zero src) src));
  predicate(UseBMI1Instructions);
  effect(KILL cr);

  format %{ "blsiq  $dst, $src" %}

  ins_encode %{
    __ blsiq($dst$$Register, $src$$Register);
  %}
  ins_pipe(ialu_reg);
%}

instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero, rFlagsReg cr) %{
  match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
  predicate(UseBMI1Instructions);
  effect(KILL cr);

  ins_cost(125);
  format %{ "blsiq  $dst, $src" %}

  ins_encode %{
    __ blsiq($dst$$Register, $src$$Address);
  %}
  ins_pipe(ialu_reg_mem);
%}

instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
%{
  match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
  predicate(UseBMI1Instructions);
  effect(KILL cr);

  ins_cost(125);
  format %{ "blsmskq $dst, $src" %}

  ins_encode %{
    __ blsmskq($dst$$Register, $src$$Address);
  %}
  ins_pipe(ialu_reg_mem);
%}

instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
%{
  match(Set dst (XorL (AddL src minus_1) src));
  predicate(UseBMI1Instructions);
  effect(KILL cr);

  format %{ "blsmskq $dst, $src" %}

  ins_encode %{
    __ blsmskq($dst$$Register, $src$$Register);
  %}

  ins_pipe(ialu_reg);
%}

instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1, rFlagsReg cr)
%{
  match(Set dst (AndL (AddL src minus_1) src) );
  predicate(UseBMI1Instructions);
  effect(KILL cr);

  format %{ "blsrq  $dst, $src" %}

  ins_encode %{
    __ blsrq($dst$$Register, $src$$Register);
  %}

  ins_pipe(ialu_reg);
%}

instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1, rFlagsReg cr)
%{
  match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
  predicate(UseBMI1Instructions);
  effect(KILL cr);

  ins_cost(125);
  format %{ "blsrq  $dst, $src" %}

  ins_encode %{
    __ blsrq($dst$$Register, $src$$Address);
  %}

  ins_pipe(ialu_reg);
%}

// Or Instructions
// Or Register with Register
instruct orL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
%{
  match(Set dst (OrL dst src));
  effect(KILL cr);

  format %{ "orq     $dst, $src\t# long" %}
  ins_encode %{
    __ orq($dst$$Register, $src$$Register);
  %}
  ins_pipe(ialu_reg_reg);
%}

// Use any_RegP to match R15 (TLS register) without spilling.
instruct orL_rReg_castP2X(rRegL dst, any_RegP src, rFlagsReg cr) %{
  match(Set dst (OrL dst (CastP2X src)));
  effect(KILL cr);

  format %{ "orq     $dst, $src\t# long" %}
  ins_encode %{
    __ orq($dst$$Register, $src$$Register);
  %}
  ins_pipe(ialu_reg_reg);
%}

// Or Register with Immediate
instruct orL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
%{
  match(Set dst (OrL dst src));
  effect(KILL cr);

  format %{ "orq     $dst, $src\t# long" %}
  ins_encode %{
    __ orq($dst$$Register, $src$$constant);
  %}
  ins_pipe(ialu_reg);
%}

// Or Register with Memory
instruct orL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
%{
  match(Set dst (OrL dst (LoadL src)));
  effect(KILL cr);

  ins_cost(150);
  format %{ "orq     $dst, $src\t# long" %}
  ins_encode %{
    __ orq($dst$$Register, $src$$Address);
  %}
  ins_pipe(ialu_reg_mem);
%}

// Or Memory with Register
instruct orL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
%{
  match(Set dst (StoreL dst (OrL (LoadL dst) src)));
  effect(KILL cr);

  ins_cost(150);
  format %{ "orq     $dst, $src\t# long" %}
  ins_encode %{
    __ orq($dst$$Address, $src$$Register);
  %}
  ins_pipe(ialu_mem_reg);
%}

// Or Memory with Immediate
instruct orL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
%{
  match(Set dst (StoreL dst (OrL (LoadL dst) src)));
  effect(KILL cr);

  ins_cost(125);
  format %{ "orq     $dst, $src\t# long" %}
  ins_encode %{
    __ orq($dst$$Address, $src$$constant);
  %}
  ins_pipe(ialu_mem_imm);
%}

instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
%{
  // con should be a pure 64-bit power of 2 immediate
  // because AND/OR works well enough for 8/32-bit values.
  predicate(log2i_graceful(n->in(3)->in(2)->get_long()) > 31);

  match(Set dst (StoreL dst (OrL (LoadL dst) con)));
  effect(KILL cr);

  ins_cost(125);
  format %{ "btsq    $dst, log2($con)\t# long" %}
  ins_encode %{
    __ btsq($dst$$Address, log2i_exact((julong)$con$$constant));
  %}
  ins_pipe(ialu_mem_imm);
%}

// Xor Instructions
// Xor Register with Register
instruct xorL_rReg(rRegL dst, rRegL src, rFlagsReg cr)
%{
  match(Set dst (XorL dst src));
  effect(KILL cr);

  format %{ "xorq    $dst, $src\t# long" %}
  ins_encode %{
    __ xorq($dst$$Register, $src$$Register);
  %}
  ins_pipe(ialu_reg_reg);
%}

// Xor Register with Immediate -1
instruct xorL_rReg_im1(rRegL dst, immL_M1 imm) %{
  match(Set dst (XorL dst imm));

  format %{ "notq   $dst" %}
  ins_encode %{
     __ notq($dst$$Register);
  %}
  ins_pipe(ialu_reg);
%}

// Xor Register with Immediate
instruct xorL_rReg_imm(rRegL dst, immL32 src, rFlagsReg cr)
%{
  match(Set dst (XorL dst src));
  effect(KILL cr);

  format %{ "xorq    $dst, $src\t# long" %}
  ins_encode %{
    __ xorq($dst$$Register, $src$$constant);
  %}
  ins_pipe(ialu_reg);
%}

// Xor Register with Memory
instruct xorL_rReg_mem(rRegL dst, memory src, rFlagsReg cr)
%{
  match(Set dst (XorL dst (LoadL src)));
  effect(KILL cr);

  ins_cost(150);
  format %{ "xorq    $dst, $src\t# long" %}
  ins_encode %{
    __ xorq($dst$$Register, $src$$Address);
  %}
  ins_pipe(ialu_reg_mem);
%}

// Xor Memory with Register
instruct xorL_mem_rReg(memory dst, rRegL src, rFlagsReg cr)
%{
  match(Set dst (StoreL dst (XorL (LoadL dst) src)));
  effect(KILL cr);

  ins_cost(150);
  format %{ "xorq    $dst, $src\t# long" %}
  ins_encode %{
    __ xorq($dst$$Address, $src$$Register);
  %}
  ins_pipe(ialu_mem_reg);
%}

// Xor Memory with Immediate
instruct xorL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
%{
  match(Set dst (StoreL dst (XorL (LoadL dst) src)));
  effect(KILL cr);

  ins_cost(125);
  format %{ "xorq    $dst, $src\t# long" %}
  ins_encode %{
    __ xorq($dst$$Address, $src$$constant);
  %}
  ins_pipe(ialu_mem_imm);
%}

// Convert Int to Boolean
instruct convI2B(rRegI dst, rRegI src, rFlagsReg cr)
%{
  match(Set dst (Conv2B src));
  effect(KILL cr);

  format %{ "testl   $src, $src\t# ci2b\n\t"
            "setnz   $dst\n\t"
            "movzbl  $dst, $dst" %}
  ins_encode %{
    __ testl($src$$Register, $src$$Register);
    __ set_byte_if_not_zero($dst$$Register);
    __ movzbl($dst$$Register, $dst$$Register);
  %}
  ins_pipe(pipe_slow); // XXX
%}

// Convert Pointer to Boolean
instruct convP2B(rRegI dst, rRegP src, rFlagsReg cr)
%{
  match(Set dst (Conv2B src));
  effect(KILL cr);

  format %{ "testq   $src, $src\t# cp2b\n\t"
            "setnz   $dst\n\t"
            "movzbl  $dst, $dst" %}
  ins_encode %{
    __ testq($src$$Register, $src$$Register);
    __ set_byte_if_not_zero($dst$$Register);
    __ movzbl($dst$$Register, $dst$$Register);
  %}
  ins_pipe(pipe_slow); // XXX
%}

instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
%{
  match(Set dst (CmpLTMask p q));
  effect(KILL cr);

  ins_cost(400);
  format %{ "cmpl    $p, $q\t# cmpLTMask\n\t"
            "setlt   $dst\n\t"
            "movzbl  $dst, $dst\n\t"
            "negl    $dst" %}
  ins_encode %{
    __ cmpl($p$$Register, $q$$Register);
    __ setl($dst$$Register);
    __ movzbl($dst$$Register, $dst$$Register);
    __ negl($dst$$Register);
  %}
  ins_pipe(pipe_slow);
%}

instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
%{
  match(Set dst (CmpLTMask dst zero));
  effect(KILL cr);

  ins_cost(100);
  format %{ "sarl    $dst, #31\t# cmpLTMask0" %}
  ins_encode %{
    __ sarl($dst$$Register, 31);
  %}
  ins_pipe(ialu_reg);
%}

/* Better to save a register than avoid a branch */
instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
%{
  match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
  effect(KILL cr);
  ins_cost(300);
  format %{ "subl    $p,$q\t# cadd_cmpLTMask\n\t"
            "jge     done\n\t"
            "addl    $p,$y\n"
            "done:   " %}
  ins_encode %{
    Register Rp = $p$$Register;
    Register Rq = $q$$Register;
    Register Ry = $y$$Register;
    Label done;
    __ subl(Rp, Rq);
    __ jccb(Assembler::greaterEqual, done);
    __ addl(Rp, Ry);
    __ bind(done);
  %}
  ins_pipe(pipe_cmplt);
%}

/* Better to save a register than avoid a branch */
instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, rFlagsReg cr)
%{
  match(Set y (AndI (CmpLTMask p q) y));
  effect(KILL cr);

  ins_cost(300);

  format %{ "cmpl    $p, $q\t# and_cmpLTMask\n\t"
            "jlt     done\n\t"
            "xorl    $y, $y\n"
            "done:   " %}
  ins_encode %{
    Register Rp = $p$$Register;
    Register Rq = $q$$Register;
    Register Ry = $y$$Register;
    Label done;
    __ cmpl(Rp, Rq);
    __ jccb(Assembler::less, done);
    __ xorl(Ry, Ry);
    __ bind(done);
  %}
  ins_pipe(pipe_cmplt);
%}

//---------- FP Instructions------------------------------------------------

// Really expensive, avoid
instruct cmpF_cc_reg(rFlagsRegU cr, regF src1, regF src2)
%{
  match(Set cr (CmpF src1 src2));

  ins_cost(500);
  format %{ "ucomiss $src1, $src2\n\t"
            "jnp,s   exit\n\t"
            "pushfq\t# saw NaN, set CF\n\t"
            "andq    [rsp], #0xffffff2b\n\t"
            "popfq\n"
    "exit:" %}
  ins_encode %{
    __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
    emit_cmpfp_fixup(_masm);
  %}
  ins_pipe(pipe_slow);
%}

instruct cmpF_cc_reg_CF(rFlagsRegUCF cr, regF src1, regF src2) %{
  match(Set cr (CmpF src1 src2));

  ins_cost(100);
  format %{ "ucomiss $src1, $src2" %}
  ins_encode %{
    __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
  %}
  ins_pipe(pipe_slow);
%}

instruct cmpF_cc_memCF(rFlagsRegUCF cr, regF src1, memory src2) %{
  match(Set cr (CmpF src1 (LoadF src2)));

  ins_cost(100);
  format %{ "ucomiss $src1, $src2" %}
  ins_encode %{
    __ ucomiss($src1$$XMMRegister, $src2$$Address);
  %}
  ins_pipe(pipe_slow);
%}

instruct cmpF_cc_immCF(rFlagsRegUCF cr, regF src, immF con) %{
  match(Set cr (CmpF src con));
  ins_cost(100);
  format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con" %}
  ins_encode %{
    __ ucomiss($src$$XMMRegister, $constantaddress($con));
  %}
  ins_pipe(pipe_slow);
%}

// Really expensive, avoid
instruct cmpD_cc_reg(rFlagsRegU cr, regD src1, regD src2)
%{
  match(Set cr (CmpD src1 src2));

  ins_cost(500);
  format %{ "ucomisd $src1, $src2\n\t"
            "jnp,s   exit\n\t"
            "pushfq\t# saw NaN, set CF\n\t"
            "andq    [rsp], #0xffffff2b\n\t"
            "popfq\n"
    "exit:" %}
  ins_encode %{
    __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
    emit_cmpfp_fixup(_masm);
  %}
  ins_pipe(pipe_slow);
%}

instruct cmpD_cc_reg_CF(rFlagsRegUCF cr, regD src1, regD src2) %{
  match(Set cr (CmpD src1 src2));

  ins_cost(100);
  format %{ "ucomisd $src1, $src2 test" %}
  ins_encode %{
    __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
  %}
  ins_pipe(pipe_slow);
%}

instruct cmpD_cc_memCF(rFlagsRegUCF cr, regD src1, memory src2) %{
  match(Set cr (CmpD src1 (LoadD src2)));

  ins_cost(100);
  format %{ "ucomisd $src1, $src2" %}
  ins_encode %{
    __ ucomisd($src1$$XMMRegister, $src2$$Address);
  %}
  ins_pipe(pipe_slow);
%}

instruct cmpD_cc_immCF(rFlagsRegUCF cr, regD src, immD con) %{
  match(Set cr (CmpD src con));
  ins_cost(100);
  format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con" %}
  ins_encode %{
    __ ucomisd($src$$XMMRegister, $constantaddress($con));
  %}
  ins_pipe(pipe_slow);
%}

// Compare into -1,0,1
instruct cmpF_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr)
%{
  match(Set dst (CmpF3 src1 src2));
  effect(KILL cr);

  ins_cost(275);
  format %{ "ucomiss $src1, $src2\n\t"
            "movl    $dst, #-1\n\t"
            "jp,s    done\n\t"
            "jb,s    done\n\t"
            "setne   $dst\n\t"
            "movzbl  $dst, $dst\n"
    "done:" %}
  ins_encode %{
    __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
    emit_cmpfp3(_masm, $dst$$Register);
  %}
  ins_pipe(pipe_slow);
%}

// Compare into -1,0,1
instruct cmpF_mem(rRegI dst, regF src1, memory src2, rFlagsReg cr)
%{
  match(Set dst (CmpF3 src1 (LoadF src2)));
  effect(KILL cr);

  ins_cost(275);
  format %{ "ucomiss $src1, $src2\n\t"
            "movl    $dst, #-1\n\t"
            "jp,s    done\n\t"
            "jb,s    done\n\t"
            "setne   $dst\n\t"
            "movzbl  $dst, $dst\n"
    "done:" %}
  ins_encode %{
    __ ucomiss($src1$$XMMRegister, $src2$$Address);
    emit_cmpfp3(_masm, $dst$$Register);
  %}
  ins_pipe(pipe_slow);
%}

// Compare into -1,0,1
instruct cmpF_imm(rRegI dst, regF src, immF con, rFlagsReg cr) %{
  match(Set dst (CmpF3 src con));
  effect(KILL cr);

  ins_cost(275);
  format %{ "ucomiss $src, [$constantaddress]\t# load from constant table: float=$con\n\t"
            "movl    $dst, #-1\n\t"
            "jp,s    done\n\t"
            "jb,s    done\n\t"
            "setne   $dst\n\t"
            "movzbl  $dst, $dst\n"
    "done:" %}
  ins_encode %{
    __ ucomiss($src$$XMMRegister, $constantaddress($con));
    emit_cmpfp3(_masm, $dst$$Register);
  %}
  ins_pipe(pipe_slow);
%}

// Compare into -1,0,1
instruct cmpD_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr)
%{
  match(Set dst (CmpD3 src1 src2));
  effect(KILL cr);

  ins_cost(275);
  format %{ "ucomisd $src1, $src2\n\t"
            "movl    $dst, #-1\n\t"
            "jp,s    done\n\t"
            "jb,s    done\n\t"
            "setne   $dst\n\t"
            "movzbl  $dst, $dst\n"
    "done:" %}
  ins_encode %{
    __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
    emit_cmpfp3(_masm, $dst$$Register);
  %}
  ins_pipe(pipe_slow);
%}

// Compare into -1,0,1
instruct cmpD_mem(rRegI dst, regD src1, memory src2, rFlagsReg cr)
%{
  match(Set dst (CmpD3 src1 (LoadD src2)));
  effect(KILL cr);

  ins_cost(275);
  format %{ "ucomisd $src1, $src2\n\t"
            "movl    $dst, #-1\n\t"
            "jp,s    done\n\t"
            "jb,s    done\n\t"
            "setne   $dst\n\t"
            "movzbl  $dst, $dst\n"
    "done:" %}
  ins_encode %{
    __ ucomisd($src1$$XMMRegister, $src2$$Address);
    emit_cmpfp3(_masm, $dst$$Register);
  %}
  ins_pipe(pipe_slow);
%}

// Compare into -1,0,1
instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
  match(Set dst (CmpD3 src con));
  effect(KILL cr);

  ins_cost(275);
  format %{ "ucomisd $src, [$constantaddress]\t# load from constant table: double=$con\n\t"
            "movl    $dst, #-1\n\t"
            "jp,s    done\n\t"
            "jb,s    done\n\t"
            "setne   $dst\n\t"
            "movzbl  $dst, $dst\n"
    "done:" %}
  ins_encode %{
    __ ucomisd($src$$XMMRegister, $constantaddress($con));
    emit_cmpfp3(_masm, $dst$$Register);
  %}
  ins_pipe(pipe_slow);
%}

//----------Arithmetic Conversion Instructions---------------------------------

instruct convF2D_reg_reg(regD dst, regF src)
%{
  match(Set dst (ConvF2D src));

  format %{ "cvtss2sd $dst, $src" %}
  ins_encode %{
    __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
  %}
  ins_pipe(pipe_slow); // XXX
%}

instruct convF2D_reg_mem(regD dst, memory src)
%{
  match(Set dst (ConvF2D (LoadF src)));

  format %{ "cvtss2sd $dst, $src" %}
  ins_encode %{
    __ cvtss2sd ($dst$$XMMRegister, $src$$Address);
  %}
  ins_pipe(pipe_slow); // XXX
%}

instruct convD2F_reg_reg(regF dst, regD src)
%{
  match(Set dst (ConvD2F src));

  format %{ "cvtsd2ss $dst, $src" %}
  ins_encode %{
    __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
  %}
  ins_pipe(pipe_slow); // XXX
%}

instruct convD2F_reg_mem(regF dst, memory src)
%{
  match(Set dst (ConvD2F (LoadD src)));

  format %{ "cvtsd2ss $dst, $src" %}
  ins_encode %{
    __ cvtsd2ss ($dst$$XMMRegister, $src$$Address);
  %}
  ins_pipe(pipe_slow); // XXX
%}

// XXX do mem variants
instruct convF2I_reg_reg(rRegI dst, regF src, rFlagsReg cr)
%{
  match(Set dst (ConvF2I src));
  effect(KILL cr);
  format %{ "convert_f2i $dst,$src" %}
  ins_encode %{
    __ convert_f2i($dst$$Register, $src$$XMMRegister);
  %}
  ins_pipe(pipe_slow);
%}

instruct convF2L_reg_reg(rRegL dst, regF src, rFlagsReg cr)
%{
  match(Set dst (ConvF2L src));
  effect(KILL cr);
  format %{ "convert_f2l $dst,$src"%}
  ins_encode %{
    __ convert_f2l($dst$$Register, $src$$XMMRegister);
  %}
  ins_pipe(pipe_slow);
%}

instruct convD2I_reg_reg(rRegI dst, regD src, rFlagsReg cr)
%{
  match(Set dst (ConvD2I src));
  effect(KILL cr);
  format %{ "convert_d2i $dst,$src"%}
  ins_encode %{
    __ convert_d2i($dst$$Register, $src$$XMMRegister);
  %}
  ins_pipe(pipe_slow);
%}

instruct convD2L_reg_reg(rRegL dst, regD src, rFlagsReg cr)
%{
  match(Set dst (ConvD2L src));
  effect(KILL cr);
  format %{ "convert_d2l $dst,$src"%}
  ins_encode %{
    __ convert_d2l($dst$$Register, $src$$XMMRegister);
  %}
  ins_pipe(pipe_slow);
%}

instruct round_double_reg(rRegL dst, regD src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
%{
  match(Set dst (RoundD src));
  effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
  format %{ "round_double $dst,$src \t! using $rtmp and $rcx as TEMP"%}
  ins_encode %{
    __ round_double($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
  %}
  ins_pipe(pipe_slow);
%}

instruct round_float_reg(rRegI dst, regF src, rRegL rtmp, rcx_RegL rcx, rFlagsReg cr)
%{
  match(Set dst (RoundF src));
  effect(TEMP dst, TEMP rtmp, TEMP rcx, KILL cr);
  format %{ "round_float $dst,$src" %}
  ins_encode %{
    __ round_float($dst$$Register, $src$$XMMRegister, $rtmp$$Register, $rcx$$Register);
  %}
  ins_pipe(pipe_slow);
%}

instruct convI2F_reg_reg(regF dst, rRegI src)
%{
  predicate(!UseXmmI2F);
  match(Set dst (ConvI2F src));

  format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
  ins_encode %{
    __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
  %}
  ins_pipe(pipe_slow); // XXX
%}

instruct convI2F_reg_mem(regF dst, memory src)
%{
  match(Set dst (ConvI2F (LoadI src)));

  format %{ "cvtsi2ssl $dst, $src\t# i2f" %}
  ins_encode %{
    __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
  %}
  ins_pipe(pipe_slow); // XXX
%}

instruct convI2D_reg_reg(regD dst, rRegI src)
%{
  predicate(!UseXmmI2D);
  match(Set dst (ConvI2D src));

  format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
  ins_encode %{
    __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
  %}
  ins_pipe(pipe_slow); // XXX
%}

instruct convI2D_reg_mem(regD dst, memory src)
%{
  match(Set dst (ConvI2D (LoadI src)));

  format %{ "cvtsi2sdl $dst, $src\t# i2d" %}
  ins_encode %{
    __ cvtsi2sdl ($dst$$XMMRegister, $src$$Address);
  %}
  ins_pipe(pipe_slow); // XXX
%}

instruct convXI2F_reg(regF dst, rRegI src)
%{
  predicate(UseXmmI2F);
  match(Set dst (ConvI2F src));

  format %{ "movdl $dst, $src\n\t"
            "cvtdq2psl $dst, $dst\t# i2f" %}
  ins_encode %{
    __ movdl($dst$$XMMRegister, $src$$Register);
    __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
  %}
  ins_pipe(pipe_slow); // XXX
%}

instruct convXI2D_reg(regD dst, rRegI src)
%{
  predicate(UseXmmI2D);
  match(Set dst (ConvI2D src));

  format %{ "movdl $dst, $src\n\t"
            "cvtdq2pdl $dst, $dst\t# i2d" %}
  ins_encode %{
    __ movdl($dst$$XMMRegister, $src$$Register);
    __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
  %}
  ins_pipe(pipe_slow); // XXX
%}

instruct convL2F_reg_reg(regF dst, rRegL src)
%{
  match(Set dst (ConvL2F src));

  format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
  ins_encode %{
    __ cvtsi2ssq ($dst$$XMMRegister, $src$$Register);
  %}
  ins_pipe(pipe_slow); // XXX
%}

instruct convL2F_reg_mem(regF dst, memory src)
%{
  match(Set dst (ConvL2F (LoadL src)));

  format %{ "cvtsi2ssq $dst, $src\t# l2f" %}
  ins_encode %{
    __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
  %}
  ins_pipe(pipe_slow); // XXX
%}

instruct convL2D_reg_reg(regD dst, rRegL src)
%{
  match(Set dst (ConvL2D src));

  format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
  ins_encode %{
    __ cvtsi2sdq ($dst$$XMMRegister, $src$$Register);
  %}
  ins_pipe(pipe_slow); // XXX
%}

instruct convL2D_reg_mem(regD dst, memory src)
%{
  match(Set dst (ConvL2D (LoadL src)));

  format %{ "cvtsi2sdq $dst, $src\t# l2d" %}
  ins_encode %{
    __ cvtsi2sdq ($dst$$XMMRegister, $src$$Address);
  %}
  ins_pipe(pipe_slow); // XXX
%}

instruct convI2L_reg_reg(rRegL dst, rRegI src)
%{
  match(Set dst (ConvI2L src));

  ins_cost(125);
  format %{ "movslq  $dst, $src\t# i2l" %}
  ins_encode %{
    __ movslq($dst$$Register, $src$$Register);
  %}
  ins_pipe(ialu_reg_reg);
%}

// instruct convI2L_reg_reg_foo(rRegL dst, rRegI src)
// %{
//   match(Set dst (ConvI2L src));
// //   predicate(_kids[0]->_leaf->as_Type()->type()->is_int()->_lo >= 0 &&
// //             _kids[0]->_leaf->as_Type()->type()->is_int()->_hi >= 0);
//   predicate(((const TypeNode*) n)->type()->is_long()->_hi ==
//             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_hi &&
//             ((const TypeNode*) n)->type()->is_long()->_lo ==
//             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_lo);

//   format %{ "movl    $dst, $src\t# unsigned i2l" %}
//   ins_encode(enc_copy(dst, src));
// //   opcode(0x63); // needs REX.W
// //   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst,src));
//   ins_pipe(ialu_reg_reg);
// %}

// Zero-extend convert int to long
instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
%{
  match(Set dst (AndL (ConvI2L src) mask));

  format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
  ins_encode %{
    if ($dst$$reg != $src$$reg) {
      __ movl($dst$$Register, $src$$Register);
    }
  %}
  ins_pipe(ialu_reg_reg);
%}

// Zero-extend convert int to long
instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
%{
  match(Set dst (AndL (ConvI2L (LoadI src)) mask));

  format %{ "movl    $dst, $src\t# i2l zero-extend\n\t" %}
  ins_encode %{
    __ movl($dst$$Register, $src$$Address);
  %}
  ins_pipe(ialu_reg_mem);
%}

instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
%{
  match(Set dst (AndL src mask));

  format %{ "movl    $dst, $src\t# zero-extend long" %}
  ins_encode %{
    __ movl($dst$$Register, $src$$Register);
  %}
  ins_pipe(ialu_reg_reg);
%}

instruct convL2I_reg_reg(rRegI dst, rRegL src)
%{
  match(Set dst (ConvL2I src));

  format %{ "movl    $dst, $src\t# l2i" %}
  ins_encode %{
    __ movl($dst$$Register, $src$$Register);
  %}
  ins_pipe(ialu_reg_reg);
%}

instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
  match(Set dst (MoveF2I src));
  effect(DEF dst, USE src);

  ins_cost(125);
  format %{ "movl    $dst, $src\t# MoveF2I_stack_reg" %}
  ins_encode %{
    __ movl($dst$$Register, Address(rsp, $src$$disp));
  %}
  ins_pipe(ialu_reg_mem);
%}

instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
  match(Set dst (MoveI2F src));
  effect(DEF dst, USE src);

  ins_cost(125);
  format %{ "movss   $dst, $src\t# MoveI2F_stack_reg" %}
  ins_encode %{
    __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
  %}
  ins_pipe(pipe_slow);
%}

instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
  match(Set dst (MoveD2L src));
  effect(DEF dst, USE src);

  ins_cost(125);
  format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
  ins_encode %{
    __ movq($dst$$Register, Address(rsp, $src$$disp));
  %}
  ins_pipe(ialu_reg_mem);
%}

instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
  predicate(!UseXmmLoadAndClearUpper);
  match(Set dst (MoveL2D src));
  effect(DEF dst, USE src);

  ins_cost(125);
  format %{ "movlpd  $dst, $src\t# MoveL2D_stack_reg" %}
  ins_encode %{
    __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
  %}
  ins_pipe(pipe_slow);
%}

instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
  predicate(UseXmmLoadAndClearUpper);
  match(Set dst (MoveL2D src));
  effect(DEF dst, USE src);

  ins_cost(125);
  format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
  ins_encode %{
    __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
  %}
  ins_pipe(pipe_slow);
%}

instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
  match(Set dst (MoveF2I src));
  effect(DEF dst, USE src);

  ins_cost(95); // XXX
  format %{ "movss   $dst, $src\t# MoveF2I_reg_stack" %}
  ins_encode %{
    __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
  %}
  ins_pipe(pipe_slow);
%}

instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
  match(Set dst (MoveI2F src));
  effect(DEF dst, USE src);

  ins_cost(100);
  format %{ "movl    $dst, $src\t# MoveI2F_reg_stack" %}
  ins_encode %{
    __ movl(Address(rsp, $dst$$disp), $src$$Register);
  %}
  ins_pipe( ialu_mem_reg );
%}

instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
  match(Set dst (MoveD2L src));
  effect(DEF dst, USE src);

  ins_cost(95); // XXX
  format %{ "movsd   $dst, $src\t# MoveL2D_reg_stack" %}
  ins_encode %{
    __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
  %}
  ins_pipe(pipe_slow);
%}

instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
  match(Set dst (MoveL2D src));
  effect(DEF dst, USE src);

  ins_cost(100);
  format %{ "movq    $dst, $src\t# MoveL2D_reg_stack" %}
  ins_encode %{
    __ movq(Address(rsp, $dst$$disp), $src$$Register);
  %}
  ins_pipe(ialu_mem_reg);
%}

instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
  match(Set dst (MoveF2I src));
  effect(DEF dst, USE src);
  ins_cost(85);
  format %{ "movd    $dst,$src\t# MoveF2I" %}
  ins_encode %{
    __ movdl($dst$$Register, $src$$XMMRegister);
  %}
  ins_pipe( pipe_slow );
%}

instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
  match(Set dst (MoveD2L src));
  effect(DEF dst, USE src);
  ins_cost(85);
  format %{ "movd    $dst,$src\t# MoveD2L" %}
  ins_encode %{
    __ movdq($dst$$Register, $src$$XMMRegister);
  %}
  ins_pipe( pipe_slow );
%}

instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
  match(Set dst (MoveI2F src));
  effect(DEF dst, USE src);
  ins_cost(100);
  format %{ "movd    $dst,$src\t# MoveI2F" %}
  ins_encode %{
    __ movdl($dst$$XMMRegister, $src$$Register);
  %}
  ins_pipe( pipe_slow );
%}

instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
  match(Set dst (MoveL2D src));
  effect(DEF dst, USE src);
  ins_cost(100);
  format %{ "movd    $dst,$src\t# MoveL2D" %}
  ins_encode %{
     __ movdq($dst$$XMMRegister, $src$$Register);
  %}
  ins_pipe( pipe_slow );
%}

// Fast clearing of an array
// Small ClearArray non-AVX512.
instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
                  Universe dummy, rFlagsReg cr)
%{
  predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2));
  match(Set dummy (ClearArray cnt base));
  effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);

  format %{ $$template
    $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
    $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
    $$emit$$"jg      LARGE\n\t"
    $$emit$$"dec     rcx\n\t"
    $$emit$$"js      DONE\t# Zero length\n\t"
    $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
    $$emit$$"dec     rcx\n\t"
    $$emit$$"jge     LOOP\n\t"
    $$emit$$"jmp     DONE\n\t"
    $$emit$$"# LARGE:\n\t"
    if (UseFastStosb) {
       $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
       $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
    } else if (UseXMMForObjInit) {
       $$emit$$"mov     rdi,rax\n\t"
       $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
       $$emit$$"jmpq    L_zero_64_bytes\n\t"
       $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
       $$emit$$"vmovdqu ymm0,(rax)\n\t"
       $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
       $$emit$$"add     0x40,rax\n\t"
       $$emit$$"# L_zero_64_bytes:\n\t"
       $$emit$$"sub     0x8,rcx\n\t"
       $$emit$$"jge     L_loop\n\t"
       $$emit$$"add     0x4,rcx\n\t"
       $$emit$$"jl      L_tail\n\t"
       $$emit$$"vmovdqu ymm0,(rax)\n\t"
       $$emit$$"add     0x20,rax\n\t"
       $$emit$$"sub     0x4,rcx\n\t"
       $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
       $$emit$$"add     0x4,rcx\n\t"
       $$emit$$"jle     L_end\n\t"
       $$emit$$"dec     rcx\n\t"
       $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
       $$emit$$"vmovq   xmm0,(rax)\n\t"
       $$emit$$"add     0x8,rax\n\t"
       $$emit$$"dec     rcx\n\t"
       $$emit$$"jge     L_sloop\n\t"
       $$emit$$"# L_end:\n\t"
    } else {
       $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
    }
    $$emit$$"# DONE"
  %}
  ins_encode %{
    __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
                 $tmp$$XMMRegister, false, knoreg);
  %}
  ins_pipe(pipe_slow);
%}

// Small ClearArray AVX512 non-constant length.
instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
                       Universe dummy, rFlagsReg cr)
%{
  predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2));
  match(Set dummy (ClearArray cnt base));
  ins_cost(125);
  effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);

  format %{ $$template
    $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
    $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
    $$emit$$"jg      LARGE\n\t"
    $$emit$$"dec     rcx\n\t"
    $$emit$$"js      DONE\t# Zero length\n\t"
    $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
    $$emit$$"dec     rcx\n\t"
    $$emit$$"jge     LOOP\n\t"
    $$emit$$"jmp     DONE\n\t"
    $$emit$$"# LARGE:\n\t"
    if (UseFastStosb) {
       $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
       $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
    } else if (UseXMMForObjInit) {
       $$emit$$"mov     rdi,rax\n\t"
       $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
       $$emit$$"jmpq    L_zero_64_bytes\n\t"
       $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
       $$emit$$"vmovdqu ymm0,(rax)\n\t"
       $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
       $$emit$$"add     0x40,rax\n\t"
       $$emit$$"# L_zero_64_bytes:\n\t"
       $$emit$$"sub     0x8,rcx\n\t"
       $$emit$$"jge     L_loop\n\t"
       $$emit$$"add     0x4,rcx\n\t"
       $$emit$$"jl      L_tail\n\t"
       $$emit$$"vmovdqu ymm0,(rax)\n\t"
       $$emit$$"add     0x20,rax\n\t"
       $$emit$$"sub     0x4,rcx\n\t"
       $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
       $$emit$$"add     0x4,rcx\n\t"
       $$emit$$"jle     L_end\n\t"
       $$emit$$"dec     rcx\n\t"
       $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
       $$emit$$"vmovq   xmm0,(rax)\n\t"
       $$emit$$"add     0x8,rax\n\t"
       $$emit$$"dec     rcx\n\t"
       $$emit$$"jge     L_sloop\n\t"
       $$emit$$"# L_end:\n\t"
    } else {
       $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
    }
    $$emit$$"# DONE"
  %}
  ins_encode %{
    __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
                 $tmp$$XMMRegister, false, $ktmp$$KRegister);
  %}
  ins_pipe(pipe_slow);
%}

// Large ClearArray non-AVX512.
instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
                        Universe dummy, rFlagsReg cr)
%{
  predicate((UseAVX <=2) && ((ClearArrayNode*)n)->is_large());
  match(Set dummy (ClearArray cnt base));
  effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);

  format %{ $$template
    if (UseFastStosb) {
       $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
       $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
       $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
    } else if (UseXMMForObjInit) {
       $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
       $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
       $$emit$$"jmpq    L_zero_64_bytes\n\t"
       $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
       $$emit$$"vmovdqu ymm0,(rax)\n\t"
       $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
       $$emit$$"add     0x40,rax\n\t"
       $$emit$$"# L_zero_64_bytes:\n\t"
       $$emit$$"sub     0x8,rcx\n\t"
       $$emit$$"jge     L_loop\n\t"
       $$emit$$"add     0x4,rcx\n\t"
       $$emit$$"jl      L_tail\n\t"
       $$emit$$"vmovdqu ymm0,(rax)\n\t"
       $$emit$$"add     0x20,rax\n\t"
       $$emit$$"sub     0x4,rcx\n\t"
       $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
       $$emit$$"add     0x4,rcx\n\t"
       $$emit$$"jle     L_end\n\t"
       $$emit$$"dec     rcx\n\t"
       $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
       $$emit$$"vmovq   xmm0,(rax)\n\t"
       $$emit$$"add     0x8,rax\n\t"
       $$emit$$"dec     rcx\n\t"
       $$emit$$"jge     L_sloop\n\t"
       $$emit$$"# L_end:\n\t"
    } else {
       $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
       $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
    }
  %}
  ins_encode %{
    __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
                 $tmp$$XMMRegister, true, knoreg);
  %}
  ins_pipe(pipe_slow);
%}

// Large ClearArray AVX512.
instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, legRegD tmp, kReg ktmp, rax_RegI zero,
                             Universe dummy, rFlagsReg cr)
%{
  predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large());
  match(Set dummy (ClearArray cnt base));
  effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);

  format %{ $$template
    if (UseFastStosb) {
       $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
       $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
       $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
    } else if (UseXMMForObjInit) {
       $$emit$$"mov     rdi,rax\t# ClearArray:\n\t"
       $$emit$$"vpxor   ymm0,ymm0,ymm0\n\t"
       $$emit$$"jmpq    L_zero_64_bytes\n\t"
       $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
       $$emit$$"vmovdqu ymm0,(rax)\n\t"
       $$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
       $$emit$$"add     0x40,rax\n\t"
       $$emit$$"# L_zero_64_bytes:\n\t"
       $$emit$$"sub     0x8,rcx\n\t"
       $$emit$$"jge     L_loop\n\t"
       $$emit$$"add     0x4,rcx\n\t"
       $$emit$$"jl      L_tail\n\t"
       $$emit$$"vmovdqu ymm0,(rax)\n\t"
       $$emit$$"add     0x20,rax\n\t"
       $$emit$$"sub     0x4,rcx\n\t"
       $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
       $$emit$$"add     0x4,rcx\n\t"
       $$emit$$"jle     L_end\n\t"
       $$emit$$"dec     rcx\n\t"
       $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
       $$emit$$"vmovq   xmm0,(rax)\n\t"
       $$emit$$"add     0x8,rax\n\t"
       $$emit$$"dec     rcx\n\t"
       $$emit$$"jge     L_sloop\n\t"
       $$emit$$"# L_end:\n\t"
    } else {
       $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
       $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
    }
  %}
  ins_encode %{
    __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
                 $tmp$$XMMRegister, true, $ktmp$$KRegister);
  %}
  ins_pipe(pipe_slow);
%}

// Small ClearArray AVX512 constant length.
instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
%{
  predicate(!((ClearArrayNode*)n)->is_large() &&
              ((UseAVX > 2) && VM_Version::supports_avx512vlbw()));
  match(Set dummy (ClearArray cnt base));
  ins_cost(100);
  effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
  format %{ "clear_mem_imm $base , $cnt  \n\t" %}
  ins_encode %{
   __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
  %}
  ins_pipe(pipe_slow);
%}

instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
                         rax_RegI result, legRegD tmp1, rFlagsReg cr)
%{
  predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
  effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);

  format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
  ins_encode %{
    __ string_compare($str1$$Register, $str2$$Register,
                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
                      $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
  %}
  ins_pipe( pipe_slow );
%}

instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
                              rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
%{
  predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
  effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);

  format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
  ins_encode %{
    __ string_compare($str1$$Register, $str2$$Register,
                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
                      $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
  %}
  ins_pipe( pipe_slow );
%}

instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
                         rax_RegI result, legRegD tmp1, rFlagsReg cr)
%{
  predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
  effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);

  format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
  ins_encode %{
    __ string_compare($str1$$Register, $str2$$Register,
                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
                      $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
  %}
  ins_pipe( pipe_slow );
%}

instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
                              rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
%{
  predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
  effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);

  format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
  ins_encode %{
    __ string_compare($str1$$Register, $str2$$Register,
                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
                      $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
  %}
  ins_pipe( pipe_slow );
%}

instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
%{
  predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
  effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);

  format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
  ins_encode %{
    __ string_compare($str1$$Register, $str2$$Register,
                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
                      $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
  %}
  ins_pipe( pipe_slow );
%}

instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
%{
  predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
  effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);

  format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
  ins_encode %{
    __ string_compare($str1$$Register, $str2$$Register,
                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
                      $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
  %}
  ins_pipe( pipe_slow );
%}

instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
                          rax_RegI result, legRegD tmp1, rFlagsReg cr)
%{
  predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
  effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);

  format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
  ins_encode %{
    __ string_compare($str2$$Register, $str1$$Register,
                      $cnt2$$Register, $cnt1$$Register, $result$$Register,
                      $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
  %}
  ins_pipe( pipe_slow );
%}

instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
                               rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
%{
  predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
  effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);

  format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
  ins_encode %{
    __ string_compare($str2$$Register, $str1$$Register,
                      $cnt2$$Register, $cnt1$$Register, $result$$Register,
                      $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
  %}
  ins_pipe( pipe_slow );
%}

// fast search of substring with known size.
instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
                             rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
%{
  predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
  effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);

  format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
  ins_encode %{
    int icnt2 = (int)$int_cnt2$$constant;
    if (icnt2 >= 16) {
      // IndexOf for constant substrings with size >= 16 elements
      // which don't need to be loaded through stack.
      __ string_indexofC8($str1$$Register, $str2$$Register,
                          $cnt1$$Register, $cnt2$$Register,
                          icnt2, $result$$Register,
                          $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
    } else {
      // Small strings are loaded through stack if they cross page boundary.
      __ string_indexof($str1$$Register, $str2$$Register,
                        $cnt1$$Register, $cnt2$$Register,
                        icnt2, $result$$Register,
                        $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
    }
  %}
  ins_pipe( pipe_slow );
%}

// fast search of substring with known size.
instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
                             rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
%{
  predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
  effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);

  format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
  ins_encode %{
    int icnt2 = (int)$int_cnt2$$constant;
    if (icnt2 >= 8) {
      // IndexOf for constant substrings with size >= 8 elements
      // which don't need to be loaded through stack.
      __ string_indexofC8($str1$$Register, $str2$$Register,
                          $cnt1$$Register, $cnt2$$Register,
                          icnt2, $result$$Register,
                          $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
    } else {
      // Small strings are loaded through stack if they cross page boundary.
      __ string_indexof($str1$$Register, $str2$$Register,
                        $cnt1$$Register, $cnt2$$Register,
                        icnt2, $result$$Register,
                        $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
    }
  %}
  ins_pipe( pipe_slow );
%}

// fast search of substring with known size.
instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
                              rbx_RegI result, legRegD tmp_vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
%{
  predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
  effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);

  format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $tmp_vec, $cnt1, $cnt2, $tmp" %}
  ins_encode %{
    int icnt2 = (int)$int_cnt2$$constant;
    if (icnt2 >= 8) {
      // IndexOf for constant substrings with size >= 8 elements
      // which don't need to be loaded through stack.
      __ string_indexofC8($str1$$Register, $str2$$Register,
                          $cnt1$$Register, $cnt2$$Register,
                          icnt2, $result$$Register,
                          $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
    } else {
      // Small strings are loaded through stack if they cross page boundary.
      __ string_indexof($str1$$Register, $str2$$Register,
                        $cnt1$$Register, $cnt2$$Register,
                        icnt2, $result$$Register,
                        $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
    }
  %}
  ins_pipe( pipe_slow );
%}

instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
                         rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
%{
  predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
  effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);

  format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
  ins_encode %{
    __ string_indexof($str1$$Register, $str2$$Register,
                      $cnt1$$Register, $cnt2$$Register,
                      (-1), $result$$Register,
                      $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
  %}
  ins_pipe( pipe_slow );
%}

instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
                         rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
%{
  predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
  effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);

  format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
  ins_encode %{
    __ string_indexof($str1$$Register, $str2$$Register,
                      $cnt1$$Register, $cnt2$$Register,
                      (-1), $result$$Register,
                      $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
  %}
  ins_pipe( pipe_slow );
%}

instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
                          rbx_RegI result, legRegD tmp_vec, rcx_RegI tmp, rFlagsReg cr)
%{
  predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
  effect(TEMP tmp_vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);

  format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
  ins_encode %{
    __ string_indexof($str1$$Register, $str2$$Register,
                      $cnt1$$Register, $cnt2$$Register,
                      (-1), $result$$Register,
                      $tmp_vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
  %}
  ins_pipe( pipe_slow );
%}

instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
                              rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
%{
  predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
  match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
  effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
  format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
  ins_encode %{
    __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
                           $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
  %}
  ins_pipe( pipe_slow );
%}

instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
                              rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
%{
  predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
  match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
  effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
  format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
  ins_encode %{
    __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
                           $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
  %}
  ins_pipe( pipe_slow );
%}

// fast string equals
instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
                       legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
%{
  predicate(!VM_Version::supports_avx512vlbw());
  match(Set result (StrEquals (Binary str1 str2) cnt));
  effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);

  format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
  ins_encode %{
    __ arrays_equals(false, $str1$$Register, $str2$$Register,
                     $cnt$$Register, $result$$Register, $tmp3$$Register,
                     $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
  %}
  ins_pipe( pipe_slow );
%}

instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
                           legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr)
%{
  predicate(VM_Version::supports_avx512vlbw());
  match(Set result (StrEquals (Binary str1 str2) cnt));
  effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);

  format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
  ins_encode %{
    __ arrays_equals(false, $str1$$Register, $str2$$Register,
                     $cnt$$Register, $result$$Register, $tmp3$$Register,
                     $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
  %}
  ins_pipe( pipe_slow );
%}

// fast array equals
instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
                       legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
%{
  predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
  match(Set result (AryEq ary1 ary2));
  effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);

  format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
  ins_encode %{
    __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
                     $tmp3$$Register, $result$$Register, $tmp4$$Register,
                     $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
  %}
  ins_pipe( pipe_slow );
%}

instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
                            legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
%{
  predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
  match(Set result (AryEq ary1 ary2));
  effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);

  format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
  ins_encode %{
    __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
                     $tmp3$$Register, $result$$Register, $tmp4$$Register,
                     $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
  %}
  ins_pipe( pipe_slow );
%}

instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
                       legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
%{
  predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
  match(Set result (AryEq ary1 ary2));
  effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);

  format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
  ins_encode %{
    __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
                     $tmp3$$Register, $result$$Register, $tmp4$$Register,
                     $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
  %}
  ins_pipe( pipe_slow );
%}

instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
                            legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
%{
  predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
  match(Set result (AryEq ary1 ary2));
  effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);

  format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
  ins_encode %{
    __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
                     $tmp3$$Register, $result$$Register, $tmp4$$Register,
                     $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
  %}
  ins_pipe( pipe_slow );
%}

instruct count_positives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
                         legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
%{
  predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
  match(Set result (CountPositives ary1 len));
  effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);

  format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
  ins_encode %{
    __ count_positives($ary1$$Register, $len$$Register,
                       $result$$Register, $tmp3$$Register,
                       $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
  %}
  ins_pipe( pipe_slow );
%}

instruct count_positives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
                              legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
%{
  predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
  match(Set result (CountPositives ary1 len));
  effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);

  format %{ "countPositives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
  ins_encode %{
    __ count_positives($ary1$$Register, $len$$Register,
                       $result$$Register, $tmp3$$Register,
                       $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
  %}
  ins_pipe( pipe_slow );
%}

// fast char[] to byte[] compression
instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
                         legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
  predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
  match(Set result (StrCompressedCopy src (Binary dst len)));
  effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst,
         USE_KILL len, KILL tmp5, KILL cr);

  format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
  ins_encode %{
    __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
                           $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
                           knoreg, knoreg);
  %}
  ins_pipe( pipe_slow );
%}

instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
                              legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
  predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
  match(Set result (StrCompressedCopy src (Binary dst len)));
  effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst,
         USE_KILL len, KILL tmp5, KILL cr);

  format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
  ins_encode %{
    __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
                           $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
                           $ktmp1$$KRegister, $ktmp2$$KRegister);
  %}
  ins_pipe( pipe_slow );
%}
// fast byte[] to char[] inflation
instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
                        legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
  predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2());
  match(Set dummy (StrInflatedCopy src (Binary dst len)));
  effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);

  format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
  ins_encode %{
    __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
                          $tmp1$$XMMRegister, $tmp2$$Register, knoreg);
  %}
  ins_pipe( pipe_slow );
%}

instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
                             legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{
  predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2());
  match(Set dummy (StrInflatedCopy src (Binary dst len)));
  effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);

  format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
  ins_encode %{
    __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
                          $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
  %}
  ins_pipe( pipe_slow );
%}

// encode char[] to byte[] in ISO_8859_1
instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
                          legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
                          rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
  predicate(!((EncodeISOArrayNode*)n)->is_ascii());
  match(Set result (EncodeISOArray src (Binary dst len)));
  effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);

  format %{ "Encode iso array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
  ins_encode %{
    __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
                        $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false);
  %}
  ins_pipe( pipe_slow );
%}

// encode char[] to byte[] in ASCII
instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len,
                            legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
                            rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
  predicate(((EncodeISOArrayNode*)n)->is_ascii());
  match(Set result (EncodeISOArray src (Binary dst len)));
  effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);

  format %{ "Encode ascii array $src,$dst,$len -> $result    // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %}
  ins_encode %{
    __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
                        $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true);
  %}
  ins_pipe( pipe_slow );
%}

//----------Overflow Math Instructions-----------------------------------------

instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
%{
  match(Set cr (OverflowAddI op1 op2));
  effect(DEF cr, USE_KILL op1, USE op2);

  format %{ "addl    $op1, $op2\t# overflow check int" %}

  ins_encode %{
    __ addl($op1$$Register, $op2$$Register);
  %}
  ins_pipe(ialu_reg_reg);
%}

instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
%{
  match(Set cr (OverflowAddI op1 op2));
  effect(DEF cr, USE_KILL op1, USE op2);

  format %{ "addl    $op1, $op2\t# overflow check int" %}

  ins_encode %{
    __ addl($op1$$Register, $op2$$constant);
  %}
  ins_pipe(ialu_reg_reg);
%}

instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
%{
  match(Set cr (OverflowAddL op1 op2));
  effect(DEF cr, USE_KILL op1, USE op2);

  format %{ "addq    $op1, $op2\t# overflow check long" %}
  ins_encode %{
    __ addq($op1$$Register, $op2$$Register);
  %}
  ins_pipe(ialu_reg_reg);
%}

instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
%{
  match(Set cr (OverflowAddL op1 op2));
  effect(DEF cr, USE_KILL op1, USE op2);

  format %{ "addq    $op1, $op2\t# overflow check long" %}
  ins_encode %{
    __ addq($op1$$Register, $op2$$constant);
  %}
  ins_pipe(ialu_reg_reg);
%}

instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
%{
  match(Set cr (OverflowSubI op1 op2));

  format %{ "cmpl    $op1, $op2\t# overflow check int" %}
  ins_encode %{
    __ cmpl($op1$$Register, $op2$$Register);
  %}
  ins_pipe(ialu_reg_reg);
%}

instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
%{
  match(Set cr (OverflowSubI op1 op2));

  format %{ "cmpl    $op1, $op2\t# overflow check int" %}
  ins_encode %{
    __ cmpl($op1$$Register, $op2$$constant);
  %}
  ins_pipe(ialu_reg_reg);
%}

instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
%{
  match(Set cr (OverflowSubL op1 op2));

  format %{ "cmpq    $op1, $op2\t# overflow check long" %}
  ins_encode %{
    __ cmpq($op1$$Register, $op2$$Register);
  %}
  ins_pipe(ialu_reg_reg);
%}

instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
%{
  match(Set cr (OverflowSubL op1 op2));

  format %{ "cmpq    $op1, $op2\t# overflow check long" %}
  ins_encode %{
    __ cmpq($op1$$Register, $op2$$constant);
  %}
  ins_pipe(ialu_reg_reg);
%}

instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
%{
  match(Set cr (OverflowSubI zero op2));
  effect(DEF cr, USE_KILL op2);

  format %{ "negl    $op2\t# overflow check int" %}
  ins_encode %{
    __ negl($op2$$Register);
  %}
  ins_pipe(ialu_reg_reg);
%}

instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
%{
  match(Set cr (OverflowSubL zero op2));
  effect(DEF cr, USE_KILL op2);

  format %{ "negq    $op2\t# overflow check long" %}
  ins_encode %{
    __ negq($op2$$Register);
  %}
  ins_pipe(ialu_reg_reg);
%}

instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
%{
  match(Set cr (OverflowMulI op1 op2));
  effect(DEF cr, USE_KILL op1, USE op2);

  format %{ "imull    $op1, $op2\t# overflow check int" %}
  ins_encode %{
    __ imull($op1$$Register, $op2$$Register);
  %}
  ins_pipe(ialu_reg_reg_alu0);
%}

instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
%{
  match(Set cr (OverflowMulI op1 op2));
  effect(DEF cr, TEMP tmp, USE op1, USE op2);

  format %{ "imull    $tmp, $op1, $op2\t# overflow check int" %}
  ins_encode %{
    __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
  %}
  ins_pipe(ialu_reg_reg_alu0);
%}

instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
%{
  match(Set cr (OverflowMulL op1 op2));
  effect(DEF cr, USE_KILL op1, USE op2);

  format %{ "imulq    $op1, $op2\t# overflow check long" %}
  ins_encode %{
    __ imulq($op1$$Register, $op2$$Register);
  %}
  ins_pipe(ialu_reg_reg_alu0);
%}

instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
%{
  match(Set cr (OverflowMulL op1 op2));
  effect(DEF cr, TEMP tmp, USE op1, USE op2);

  format %{ "imulq    $tmp, $op1, $op2\t# overflow check long" %}
  ins_encode %{
    __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
  %}
  ins_pipe(ialu_reg_reg_alu0);
%}

//----------Control Flow Instructions------------------------------------------
// Signed compare Instructions

// XXX more variants!!
instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
%{
  match(Set cr (CmpI op1 op2));
  effect(DEF cr, USE op1, USE op2);

  format %{ "cmpl    $op1, $op2" %}
  ins_encode %{
    __ cmpl($op1$$Register, $op2$$Register);
  %}
  ins_pipe(ialu_cr_reg_reg);
%}

instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
%{
  match(Set cr (CmpI op1 op2));

  format %{ "cmpl    $op1, $op2" %}
  ins_encode %{
    __ cmpl($op1$$Register, $op2$$constant);
  %}
  ins_pipe(ialu_cr_reg_imm);
%}

instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
%{
  match(Set cr (CmpI op1 (LoadI op2)));

  ins_cost(500); // XXX
  format %{ "cmpl    $op1, $op2" %}
  ins_encode %{
    __ cmpl($op1$$Register, $op2$$Address);
  %}
  ins_pipe(ialu_cr_reg_mem);
%}

instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
%{
  match(Set cr (CmpI src zero));

  format %{ "testl   $src, $src" %}
  ins_encode %{
    __ testl($src$$Register, $src$$Register);
  %}
  ins_pipe(ialu_cr_reg_imm);
%}

instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
%{
  match(Set cr (CmpI (AndI src con) zero));

  format %{ "testl   $src, $con" %}
  ins_encode %{
    __ testl($src$$Register, $con$$constant);
  %}
  ins_pipe(ialu_cr_reg_imm);
%}

instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
%{
  match(Set cr (CmpI (AndI src (LoadI mem)) zero));

  format %{ "testl   $src, $mem" %}
  ins_encode %{
    __ testl($src$$Register, $mem$$Address);
  %}
  ins_pipe(ialu_cr_reg_mem);
%}

// Unsigned compare Instructions; really, same as signed except they
// produce an rFlagsRegU instead of rFlagsReg.
instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
%{
  match(Set cr (CmpU op1 op2));

  format %{ "cmpl    $op1, $op2\t# unsigned" %}
  ins_encode %{
    __ cmpl($op1$$Register, $op2$$Register);
  %}
  ins_pipe(ialu_cr_reg_reg);
%}

instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
%{
  match(Set cr (CmpU op1 op2));

  format %{ "cmpl    $op1, $op2\t# unsigned" %}
  ins_encode %{
    __ cmpl($op1$$Register, $op2$$constant);
  %}
  ins_pipe(ialu_cr_reg_imm);
%}

instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
%{
  match(Set cr (CmpU op1 (LoadI op2)));

  ins_cost(500); // XXX
  format %{ "cmpl    $op1, $op2\t# unsigned" %}
  ins_encode %{
    __ cmpl($op1$$Register, $op2$$Address);
  %}
  ins_pipe(ialu_cr_reg_mem);
%}

// // // Cisc-spilled version of cmpU_rReg
// //instruct compU_mem_rReg(rFlagsRegU cr, memory op1, rRegI op2)
// //%{
// //  match(Set cr (CmpU (LoadI op1) op2));
// //
// //  format %{ "CMPu   $op1,$op2" %}
// //  ins_cost(500);
// //  opcode(0x39);  /* Opcode 39 /r */
// //  ins_encode( OpcP, reg_mem( op1, op2) );
// //%}

instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
%{
  match(Set cr (CmpU src zero));

  format %{ "testl   $src, $src\t# unsigned" %}
  ins_encode %{
    __ testl($src$$Register, $src$$Register);
  %}
  ins_pipe(ialu_cr_reg_imm);
%}

instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
%{
  match(Set cr (CmpP op1 op2));

  format %{ "cmpq    $op1, $op2\t# ptr" %}
  ins_encode %{
    __ cmpq($op1$$Register, $op2$$Register);
  %}
  ins_pipe(ialu_cr_reg_reg);
%}

instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
%{
  match(Set cr (CmpP op1 (LoadP op2)));
  predicate(n->in(2)->as_Load()->barrier_data() == 0);

  ins_cost(500); // XXX
  format %{ "cmpq    $op1, $op2\t# ptr" %}
  ins_encode %{
    __ cmpq($op1$$Register, $op2$$Address);
  %}
  ins_pipe(ialu_cr_reg_mem);
%}

// // // Cisc-spilled version of cmpP_rReg
// //instruct compP_mem_rReg(rFlagsRegU cr, memory op1, rRegP op2)
// //%{
// //  match(Set cr (CmpP (LoadP op1) op2));
// //
// //  format %{ "CMPu   $op1,$op2" %}
// //  ins_cost(500);
// //  opcode(0x39);  /* Opcode 39 /r */
// //  ins_encode( OpcP, reg_mem( op1, op2) );
// //%}

// XXX this is generalized by compP_rReg_mem???
// Compare raw pointer (used in out-of-heap check).
// Only works because non-oop pointers must be raw pointers
// and raw pointers have no anti-dependencies.
instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
%{
  predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none &&
            n->in(2)->as_Load()->barrier_data() == 0);
  match(Set cr (CmpP op1 (LoadP op2)));

  format %{ "cmpq    $op1, $op2\t# raw ptr" %}
  ins_encode %{
    __ cmpq($op1$$Register, $op2$$Address);
  %}
  ins_pipe(ialu_cr_reg_mem);
%}

// This will generate a signed flags result. This should be OK since
// any compare to a zero should be eq/neq.
instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
%{
  match(Set cr (CmpP src zero));

  format %{ "testq   $src, $src\t# ptr" %}
  ins_encode %{
    __ testq($src$$Register, $src$$Register);
  %}
  ins_pipe(ialu_cr_reg_imm);
%}

// This will generate a signed flags result. This should be OK since
// any compare to a zero should be eq/neq.
instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
%{
  predicate((!UseCompressedOops || (CompressedOops::base() != NULL)) &&
            n->in(1)->as_Load()->barrier_data() == 0);
  match(Set cr (CmpP (LoadP op) zero));

  ins_cost(500); // XXX
  format %{ "testq   $op, 0xffffffffffffffff\t# ptr" %}
  ins_encode %{
    __ testq($op$$Address, 0xFFFFFFFF);
  %}
  ins_pipe(ialu_cr_reg_imm);
%}

instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
%{
  predicate(UseCompressedOops && (CompressedOops::base() == NULL) &&
            n->in(1)->as_Load()->barrier_data() == 0);
  match(Set cr (CmpP (LoadP mem) zero));

  format %{ "cmpq    R12, $mem\t# ptr (R12_heapbase==0)" %}
  ins_encode %{
    __ cmpq(r12, $mem$$Address);
  %}
  ins_pipe(ialu_cr_reg_mem);
%}

instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
%{
  match(Set cr (CmpN op1 op2));

  format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
  ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
  ins_pipe(ialu_cr_reg_reg);
%}

instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
%{
  match(Set cr (CmpN src (LoadN mem)));

  format %{ "cmpl    $src, $mem\t# compressed ptr" %}
  ins_encode %{
    __ cmpl($src$$Register, $mem$$Address);
  %}
  ins_pipe(ialu_cr_reg_mem);
%}

instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
  match(Set cr (CmpN op1 op2));

  format %{ "cmpl    $op1, $op2\t# compressed ptr" %}
  ins_encode %{
    __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
  %}
  ins_pipe(ialu_cr_reg_imm);
%}

instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
%{
  match(Set cr (CmpN src (LoadN mem)));

  format %{ "cmpl    $mem, $src\t# compressed ptr" %}
  ins_encode %{
    __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
  %}
  ins_pipe(ialu_cr_reg_mem);
%}

instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
  match(Set cr (CmpN op1 op2));

  format %{ "cmpl    $op1, $op2\t# compressed klass ptr" %}
  ins_encode %{
    __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant);
  %}
  ins_pipe(ialu_cr_reg_imm);
%}

instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
%{
  match(Set cr (CmpN src (LoadNKlass mem)));

  format %{ "cmpl    $mem, $src\t# compressed klass ptr" %}
  ins_encode %{
    __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant);
  %}
  ins_pipe(ialu_cr_reg_mem);
%}

instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
  match(Set cr (CmpN src zero));

  format %{ "testl   $src, $src\t# compressed ptr" %}
  ins_encode %{ __ testl($src$$Register, $src$$Register); %}
  ins_pipe(ialu_cr_reg_imm);
%}

instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
%{
  predicate(CompressedOops::base() != NULL);
  match(Set cr (CmpN (LoadN mem) zero));

  ins_cost(500); // XXX
  format %{ "testl   $mem, 0xffffffff\t# compressed ptr" %}
  ins_encode %{
    __ cmpl($mem$$Address, (int)0xFFFFFFFF);
  %}
  ins_pipe(ialu_cr_reg_mem);
%}

instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
%{
  predicate(CompressedOops::base() == NULL);
  match(Set cr (CmpN (LoadN mem) zero));

  format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
  ins_encode %{
    __ cmpl(r12, $mem$$Address);
  %}
  ins_pipe(ialu_cr_reg_mem);
%}

// Yanked all unsigned pointer compare operations.
// Pointer compares are done with CmpP which is already unsigned.

instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
%{
  match(Set cr (CmpL op1 op2));

  format %{ "cmpq    $op1, $op2" %}
  ins_encode %{
    __ cmpq($op1$$Register, $op2$$Register);
  %}
  ins_pipe(ialu_cr_reg_reg);
%}

instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
%{
  match(Set cr (CmpL op1 op2));

  format %{ "cmpq    $op1, $op2" %}
  ins_encode %{
    __ cmpq($op1$$Register, $op2$$constant);
  %}
  ins_pipe(ialu_cr_reg_imm);
%}

instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
%{
  match(Set cr (CmpL op1 (LoadL op2)));

  format %{ "cmpq    $op1, $op2" %}
  ins_encode %{
    __ cmpq($op1$$Register, $op2$$Address);
  %}
  ins_pipe(ialu_cr_reg_mem);
%}

instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
%{
  match(Set cr (CmpL src zero));

  format %{ "testq   $src, $src" %}
  ins_encode %{
    __ testq($src$$Register, $src$$Register);
  %}
  ins_pipe(ialu_cr_reg_imm);
%}

instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
%{
  match(Set cr (CmpL (AndL src con) zero));

  format %{ "testq   $src, $con\t# long" %}
  ins_encode %{
    __ testq($src$$Register, $con$$constant);
  %}
  ins_pipe(ialu_cr_reg_imm);
%}

instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
%{
  match(Set cr (CmpL (AndL src (LoadL mem)) zero));

  format %{ "testq   $src, $mem" %}
  ins_encode %{
    __ testq($src$$Register, $mem$$Address);
  %}
  ins_pipe(ialu_cr_reg_mem);
%}

instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
%{
  match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));

  format %{ "testq   $src, $mem" %}
  ins_encode %{
    __ testq($src$$Register, $mem$$Address);
  %}
  ins_pipe(ialu_cr_reg_mem);
%}

// Manifest a CmpU result in an integer register.  Very painful.
// This is the test to avoid.
instruct cmpU3_reg_reg(rRegI dst, rRegI src1, rRegI src2, rFlagsReg flags)
%{
  match(Set dst (CmpU3 src1 src2));
  effect(KILL flags);

  ins_cost(275); // XXX
  format %{ "cmpl    $src1, $src2\t# CmpL3\n\t"
            "movl    $dst, -1\n\t"
            "jb,u    done\n\t"
            "setne   $dst\n\t"
            "movzbl  $dst, $dst\n\t"
    "done:" %}
  ins_encode %{
    Label done;
    __ cmpl($src1$$Register, $src2$$Register);
    __ movl($dst$$Register, -1);
    __ jccb(Assembler::below, done);
    __ setne($dst$$Register);
    __ movzbl($dst$$Register, $dst$$Register);
    __ bind(done);
  %}
  ins_pipe(pipe_slow);
%}

// Manifest a CmpL result in an integer register.  Very painful.
// This is the test to avoid.
instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
%{
  match(Set dst (CmpL3 src1 src2));
  effect(KILL flags);

  ins_cost(275); // XXX
  format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
            "movl    $dst, -1\n\t"
            "jl,s    done\n\t"
            "setne   $dst\n\t"
            "movzbl  $dst, $dst\n\t"
    "done:" %}
  ins_encode %{
    Label done;
    __ cmpq($src1$$Register, $src2$$Register);
    __ movl($dst$$Register, -1);
    __ jccb(Assembler::less, done);
    __ setne($dst$$Register);
    __ movzbl($dst$$Register, $dst$$Register);
    __ bind(done);
  %}
  ins_pipe(pipe_slow);
%}

// Manifest a CmpUL result in an integer register.  Very painful.
// This is the test to avoid.
instruct cmpUL3_reg_reg(rRegI dst, rRegL src1, rRegL src2, rFlagsReg flags)
%{
  match(Set dst (CmpUL3 src1 src2));
  effect(KILL flags);

  ins_cost(275); // XXX
  format %{ "cmpq    $src1, $src2\t# CmpL3\n\t"
            "movl    $dst, -1\n\t"
            "jb,u    done\n\t"
            "setne   $dst\n\t"
            "movzbl  $dst, $dst\n\t"
    "done:" %}
  ins_encode %{
    Label done;
    __ cmpq($src1$$Register, $src2$$Register);
    __ movl($dst$$Register, -1);
    __ jccb(Assembler::below, done);
    __ setne($dst$$Register);
    __ movzbl($dst$$Register, $dst$$Register);
    __ bind(done);
  %}
  ins_pipe(pipe_slow);
%}

// Unsigned long compare Instructions; really, same as signed long except they
// produce an rFlagsRegU instead of rFlagsReg.
instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
%{
  match(Set cr (CmpUL op1 op2));

  format %{ "cmpq    $op1, $op2\t# unsigned" %}
  ins_encode %{
    __ cmpq($op1$$Register, $op2$$Register);
  %}
  ins_pipe(ialu_cr_reg_reg);
%}

instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
%{
  match(Set cr (CmpUL op1 op2));

  format %{ "cmpq    $op1, $op2\t# unsigned" %}
  ins_encode %{
    __ cmpq($op1$$Register, $op2$$constant);
  %}
  ins_pipe(ialu_cr_reg_imm);
%}

instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
%{
  match(Set cr (CmpUL op1 (LoadL op2)));

  format %{ "cmpq    $op1, $op2\t# unsigned" %}
  ins_encode %{
    __ cmpq($op1$$Register, $op2$$Address);
  %}
  ins_pipe(ialu_cr_reg_mem);
%}

instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
%{
  match(Set cr (CmpUL src zero));

  format %{ "testq   $src, $src\t# unsigned" %}
  ins_encode %{
    __ testq($src$$Register, $src$$Register);
  %}
  ins_pipe(ialu_cr_reg_imm);
%}

instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
%{
  match(Set cr (CmpI (LoadB mem) imm));

  ins_cost(125);
  format %{ "cmpb    $mem, $imm" %}
  ins_encode %{ __ cmpb($mem$$Address, $imm$$constant); %}
  ins_pipe(ialu_cr_reg_mem);
%}

instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
%{
  match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));

  ins_cost(125);
  format %{ "testb   $mem, $imm\t# ubyte" %}
  ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
  ins_pipe(ialu_cr_reg_mem);
%}

instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
%{
  match(Set cr (CmpI (AndI (LoadB mem) imm) zero));

  ins_cost(125);
  format %{ "testb   $mem, $imm\t# byte" %}
  ins_encode %{ __ testb($mem$$Address, $imm$$constant); %}
  ins_pipe(ialu_cr_reg_mem);
%}

//----------Max and Min--------------------------------------------------------
// Min Instructions

instruct cmovI_reg_g(rRegI dst, rRegI src, rFlagsReg cr)
%{
  effect(USE_DEF dst, USE src, USE cr);

  format %{ "cmovlgt $dst, $src\t# min" %}
  ins_encode %{
    __ cmovl(Assembler::greater, $dst$$Register, $src$$Register);
  %}
  ins_pipe(pipe_cmov_reg);
%}

instruct minI_rReg(rRegI dst, rRegI src)
%{
  match(Set dst (MinI dst src));

  ins_cost(200);
  expand %{
    rFlagsReg cr;
    compI_rReg(cr, dst, src);
    cmovI_reg_g(dst, src, cr);
  %}
%}

instruct cmovI_reg_l(rRegI dst, rRegI src, rFlagsReg cr)
%{
  effect(USE_DEF dst, USE src, USE cr);

  format %{ "cmovllt $dst, $src\t# max" %}
  ins_encode %{
    __ cmovl(Assembler::less, $dst$$Register, $src$$Register);
  %}
  ins_pipe(pipe_cmov_reg);
%}

instruct maxI_rReg(rRegI dst, rRegI src)
%{
  match(Set dst (MaxI dst src));

  ins_cost(200);
  expand %{
    rFlagsReg cr;
    compI_rReg(cr, dst, src);
    cmovI_reg_l(dst, src, cr);
  %}
%}

// ============================================================================
// Branch Instructions

// Jump Direct - Label defines a relative address from JMP+1
instruct jmpDir(label labl)
%{
  match(Goto);
  effect(USE labl);

  ins_cost(300);
  format %{ "jmp     $labl" %}
  size(5);
  ins_encode %{
    Label* L = $labl$$label;
    __ jmp(*L, false); // Always long jump
  %}
  ins_pipe(pipe_jmp);
%}

// Jump Direct Conditional - Label defines a relative address from Jcc+1
instruct jmpCon(cmpOp cop, rFlagsReg cr, label labl)
%{
  match(If cop cr);
  effect(USE labl);

  ins_cost(300);
  format %{ "j$cop     $labl" %}
  size(6);
  ins_encode %{
    Label* L = $labl$$label;
    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
  %}
  ins_pipe(pipe_jcc);
%}

// Jump Direct Conditional - Label defines a relative address from Jcc+1
instruct jmpLoopEnd(cmpOp cop, rFlagsReg cr, label labl)
%{
  match(CountedLoopEnd cop cr);
  effect(USE labl);

  ins_cost(300);
  format %{ "j$cop     $labl\t# loop end" %}
  size(6);
  ins_encode %{
    Label* L = $labl$$label;
    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
  %}
  ins_pipe(pipe_jcc);
%}

// Jump Direct Conditional - Label defines a relative address from Jcc+1
instruct jmpLoopEndU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
  match(CountedLoopEnd cop cmp);
  effect(USE labl);

  ins_cost(300);
  format %{ "j$cop,u   $labl\t# loop end" %}
  size(6);
  ins_encode %{
    Label* L = $labl$$label;
    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
  %}
  ins_pipe(pipe_jcc);
%}

instruct jmpLoopEndUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
  match(CountedLoopEnd cop cmp);
  effect(USE labl);

  ins_cost(200);
  format %{ "j$cop,u   $labl\t# loop end" %}
  size(6);
  ins_encode %{
    Label* L = $labl$$label;
    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
  %}
  ins_pipe(pipe_jcc);
%}

// Jump Direct Conditional - using unsigned comparison
instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
  match(If cop cmp);
  effect(USE labl);

  ins_cost(300);
  format %{ "j$cop,u   $labl" %}
  size(6);
  ins_encode %{
    Label* L = $labl$$label;
    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
  %}
  ins_pipe(pipe_jcc);
%}

instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
  match(If cop cmp);
  effect(USE labl);

  ins_cost(200);
  format %{ "j$cop,u   $labl" %}
  size(6);
  ins_encode %{
    Label* L = $labl$$label;
    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
  %}
  ins_pipe(pipe_jcc);
%}

instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
  match(If cop cmp);
  effect(USE labl);

  ins_cost(200);
  format %{ $$template
    if ($cop$$cmpcode == Assembler::notEqual) {
      $$emit$$"jp,u    $labl\n\t"
      $$emit$$"j$cop,u   $labl"
    } else {
      $$emit$$"jp,u    done\n\t"
      $$emit$$"j$cop,u   $labl\n\t"
      $$emit$$"done:"
    }
  %}
  ins_encode %{
    Label* l = $labl$$label;
    if ($cop$$cmpcode == Assembler::notEqual) {
      __ jcc(Assembler::parity, *l, false);
      __ jcc(Assembler::notEqual, *l, false);
    } else if ($cop$$cmpcode == Assembler::equal) {
      Label done;
      __ jccb(Assembler::parity, done);
      __ jcc(Assembler::equal, *l, false);
      __ bind(done);
    } else {
       ShouldNotReachHere();
    }
  %}
  ins_pipe(pipe_jcc);
%}

// ============================================================================
// The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
// superklass array for an instance of the superklass.  Set a hidden
// internal cache on a hit (cache is checked with exposed code in
// gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
// encoding ALSO sets flags.

instruct partialSubtypeCheck(rdi_RegP result,
                             rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
                             rFlagsReg cr)
%{
  match(Set result (PartialSubtypeCheck sub super));
  effect(KILL rcx, KILL cr);

  ins_cost(1100);  // slightly larger than the next version
  format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
            "movl    rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
            "addq    rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
            "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
            "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
            "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
            "xorq    $result, $result\t\t Hit: rdi zero\n\t"
    "miss:\t" %}

  opcode(0x1); // Force a XOR of RDI
  ins_encode(enc_PartialSubtypeCheck());
  ins_pipe(pipe_slow);
%}

instruct partialSubtypeCheck_vs_Zero(rFlagsReg cr,
                                     rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
                                     immP0 zero,
                                     rdi_RegP result)
%{
  match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
  effect(KILL rcx, KILL result);

  ins_cost(1000);
  format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
            "movl    rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
            "addq    rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
            "repne   scasq\t# Scan *rdi++ for a match with rax while cx-- != 0\n\t"
            "jne,s   miss\t\t# Missed: flags nz\n\t"
            "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
    "miss:\t" %}

  opcode(0x0); // No need to XOR RDI
  ins_encode(enc_PartialSubtypeCheck());
  ins_pipe(pipe_slow);
%}

// ============================================================================
// Branch Instructions -- short offset versions
//
// These instructions are used to replace jumps of a long offset (the default
// match) with jumps of a shorter offset.  These instructions are all tagged
// with the ins_short_branch attribute, which causes the ADLC to suppress the
// match rules in general matching.  Instead, the ADLC generates a conversion
// method in the MachNode which can be used to do in-place replacement of the
// long variant with the shorter variant.  The compiler will determine if a
// branch can be taken by the is_short_branch_offset() predicate in the machine
// specific code section of the file.

// Jump Direct - Label defines a relative address from JMP+1
instruct jmpDir_short(label labl) %{
  match(Goto);
  effect(USE labl);

  ins_cost(300);
  format %{ "jmp,s   $labl" %}
  size(2);
  ins_encode %{
    Label* L = $labl$$label;
    __ jmpb(*L);
  %}
  ins_pipe(pipe_jmp);
  ins_short_branch(1);
%}

// Jump Direct Conditional - Label defines a relative address from Jcc+1
instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
  match(If cop cr);
  effect(USE labl);

  ins_cost(300);
  format %{ "j$cop,s   $labl" %}
  size(2);
  ins_encode %{
    Label* L = $labl$$label;
    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
  %}
  ins_pipe(pipe_jcc);
  ins_short_branch(1);
%}

// Jump Direct Conditional - Label defines a relative address from Jcc+1
instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
  match(CountedLoopEnd cop cr);
  effect(USE labl);

  ins_cost(300);
  format %{ "j$cop,s   $labl\t# loop end" %}
  size(2);
  ins_encode %{
    Label* L = $labl$$label;
    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
  %}
  ins_pipe(pipe_jcc);
  ins_short_branch(1);
%}

// Jump Direct Conditional - Label defines a relative address from Jcc+1
instruct jmpLoopEndU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
  match(CountedLoopEnd cop cmp);
  effect(USE labl);

  ins_cost(300);
  format %{ "j$cop,us  $labl\t# loop end" %}
  size(2);
  ins_encode %{
    Label* L = $labl$$label;
    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
  %}
  ins_pipe(pipe_jcc);
  ins_short_branch(1);
%}

instruct jmpLoopEndUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
  match(CountedLoopEnd cop cmp);
  effect(USE labl);

  ins_cost(300);
  format %{ "j$cop,us  $labl\t# loop end" %}
  size(2);
  ins_encode %{
    Label* L = $labl$$label;
    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
  %}
  ins_pipe(pipe_jcc);
  ins_short_branch(1);
%}

// Jump Direct Conditional - using unsigned comparison
instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
  match(If cop cmp);
  effect(USE labl);

  ins_cost(300);
  format %{ "j$cop,us  $labl" %}
  size(2);
  ins_encode %{
    Label* L = $labl$$label;
    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
  %}
  ins_pipe(pipe_jcc);
  ins_short_branch(1);
%}

instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
  match(If cop cmp);
  effect(USE labl);

  ins_cost(300);
  format %{ "j$cop,us  $labl" %}
  size(2);
  ins_encode %{
    Label* L = $labl$$label;
    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
  %}
  ins_pipe(pipe_jcc);
  ins_short_branch(1);
%}

instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
  match(If cop cmp);
  effect(USE labl);

  ins_cost(300);
  format %{ $$template
    if ($cop$$cmpcode == Assembler::notEqual) {
      $$emit$$"jp,u,s  $labl\n\t"
      $$emit$$"j$cop,u,s  $labl"
    } else {
      $$emit$$"jp,u,s  done\n\t"
      $$emit$$"j$cop,u,s  $labl\n\t"
      $$emit$$"done:"
    }
  %}
  size(4);
  ins_encode %{
    Label* l = $labl$$label;
    if ($cop$$cmpcode == Assembler::notEqual) {
      __ jccb(Assembler::parity, *l);
      __ jccb(Assembler::notEqual, *l);
    } else if ($cop$$cmpcode == Assembler::equal) {
      Label done;
      __ jccb(Assembler::parity, done);
      __ jccb(Assembler::equal, *l);
      __ bind(done);
    } else {
       ShouldNotReachHere();
    }
  %}
  ins_pipe(pipe_jcc);
  ins_short_branch(1);
%}

// ============================================================================
// inlined locking and unlocking

instruct cmpFastLockRTM(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI tmp, rdx_RegI scr, rRegI cx1, rRegI cx2) %{
  predicate(Compile::current()->use_rtm());
  match(Set cr (FastLock object box));
  effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
  ins_cost(300);
  format %{ "fastlock $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
  ins_encode %{
    __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
                 $scr$$Register, $cx1$$Register, $cx2$$Register,
                 _rtm_counters, _stack_rtm_counters,
                 ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
                 true, ra_->C->profile_rtm());
  %}
  ins_pipe(pipe_slow);
%}

instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI tmp, rRegP scr, rRegP cx1) %{
  predicate(!Compile::current()->use_rtm());
  match(Set cr (FastLock object box));
  effect(TEMP tmp, TEMP scr, TEMP cx1, USE_KILL box);
  ins_cost(300);
  format %{ "fastlock $object,$box\t! kills $box,$tmp,$scr" %}
  ins_encode %{
    __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
                 $scr$$Register, $cx1$$Register, noreg, NULL, NULL, NULL, false, false);
  %}
  ins_pipe(pipe_slow);
%}

instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP box, rRegP tmp) %{
  match(Set cr (FastUnlock object box));
  effect(TEMP tmp, USE_KILL box);
  ins_cost(300);
  format %{ "fastunlock $object,$box\t! kills $box,$tmp" %}
  ins_encode %{
    __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
  %}
  ins_pipe(pipe_slow);
%}

// ============================================================================
// Safepoint Instructions
instruct safePoint_poll_tls(rFlagsReg cr, rRegP poll)
%{
  match(SafePoint poll);
  effect(KILL cr, USE poll);

  format %{ "testl   rax, [$poll]\t"
            "# Safepoint: poll for GC" %}
  ins_cost(125);
  size(4); /* setting an explicit size will cause debug builds to assert if size is incorrect */
  ins_encode %{
    __ relocate(relocInfo::poll_type);
    address pre_pc = __ pc();
    __ testl(rax, Address($poll$$Register, 0));
    assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit test %%eax [reg]");
  %}
  ins_pipe(ialu_reg_mem);
%}

instruct mask_all_evexL(kReg dst, rRegL src) %{
  match(Set dst (MaskAll src));
  format %{ "mask_all_evexL $dst, $src \t! mask all operation" %}
  ins_encode %{
    int mask_len = Matcher::vector_length(this);
    __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len);
  %}
  ins_pipe( pipe_slow );
%}

instruct mask_all_evexI_GT32(kReg dst, rRegI src, rRegL tmp) %{
  predicate(Matcher::vector_length(n) > 32);
  match(Set dst (MaskAll src));
  effect(TEMP tmp);
  format %{ "mask_all_evexI_GT32 $dst, $src \t! using $tmp as TEMP" %}
  ins_encode %{
    int mask_len = Matcher::vector_length(this);
    __ movslq($tmp$$Register, $src$$Register);
    __ vector_maskall_operation($dst$$KRegister, $tmp$$Register, mask_len);
  %}
  ins_pipe( pipe_slow );
%}

// ============================================================================
// Procedure Call/Return Instructions
// Call Java Static Instruction
// Note: If this code changes, the corresponding ret_addr_offset() and
//       compute_padding() functions will have to be adjusted.
instruct CallStaticJavaDirect(method meth) %{
  match(CallStaticJava);
  effect(USE meth);

  ins_cost(300);
  format %{ "call,static " %}
  opcode(0xE8); /* E8 cd */
  ins_encode(clear_avx, Java_Static_Call(meth), call_epilog);
  ins_pipe(pipe_slow);
  ins_alignment(4);
%}

// Call Java Dynamic Instruction
// Note: If this code changes, the corresponding ret_addr_offset() and
//       compute_padding() functions will have to be adjusted.
instruct CallDynamicJavaDirect(method meth)
%{
  match(CallDynamicJava);
  effect(USE meth);

  ins_cost(300);
  format %{ "movq    rax, #Universe::non_oop_word()\n\t"
            "call,dynamic " %}
  ins_encode(clear_avx, Java_Dynamic_Call(meth), call_epilog);
  ins_pipe(pipe_slow);
  ins_alignment(4);
%}

// Call Runtime Instruction
instruct CallRuntimeDirect(method meth)
%{
  match(CallRuntime);
  effect(USE meth);

  ins_cost(300);
  format %{ "call,runtime " %}
  ins_encode(clear_avx, Java_To_Runtime(meth));
  ins_pipe(pipe_slow);
%}

// Call runtime without safepoint
instruct CallLeafDirect(method meth)
%{
  match(CallLeaf);
  effect(USE meth);

  ins_cost(300);
  format %{ "call_leaf,runtime " %}
  ins_encode(clear_avx, Java_To_Runtime(meth));
  ins_pipe(pipe_slow);
%}

// Call runtime without safepoint and with vector arguments
instruct CallLeafDirectVector(method meth)
%{
  match(CallLeafVector);
  effect(USE meth);

  ins_cost(300);
  format %{ "call_leaf,vector " %}
  ins_encode(Java_To_Runtime(meth));
  ins_pipe(pipe_slow);
%}

// Call runtime without safepoint
instruct CallLeafNoFPDirect(method meth)
%{
  match(CallLeafNoFP);
  effect(USE meth);

  ins_cost(300);
  format %{ "call_leaf_nofp,runtime " %}
  ins_encode(clear_avx, Java_To_Runtime(meth));
  ins_pipe(pipe_slow);
%}

// Return Instruction
// Remove the return address & jump to it.
// Notice: We always emit a nop after a ret to make sure there is room
// for safepoint patching
instruct Ret()
%{
  match(Return);

  format %{ "ret" %}
  ins_encode %{
    __ ret(0);
  %}
  ins_pipe(pipe_jmp);
%}

// Tail Call; Jump from runtime stub to Java code.
// Also known as an 'interprocedural jump'.
// Target of jump will eventually return to caller.
// TailJump below removes the return address.
instruct TailCalljmpInd(no_rbp_RegP jump_target, rbx_RegP method_ptr)
%{
  match(TailCall jump_target method_ptr);

  ins_cost(300);
  format %{ "jmp     $jump_target\t# rbx holds method" %}
  ins_encode %{
    __ jmp($jump_target$$Register);
  %}
  ins_pipe(pipe_jmp);
%}

// Tail Jump; remove the return address; jump to target.
// TailCall above leaves the return address around.
instruct tailjmpInd(no_rbp_RegP jump_target, rax_RegP ex_oop)
%{
  match(TailJump jump_target ex_oop);

  ins_cost(300);
  format %{ "popq    rdx\t# pop return address\n\t"
            "jmp     $jump_target" %}
  ins_encode %{
    __ popq(as_Register(RDX_enc));
    __ jmp($jump_target$$Register);
  %}
  ins_pipe(pipe_jmp);
%}

// Create exception oop: created by stack-crawling runtime code.
// Created exception is now available to this handler, and is setup
// just prior to jumping to this handler.  No code emitted.
instruct CreateException(rax_RegP ex_oop)
%{
  match(Set ex_oop (CreateEx));

  size(0);
  // use the following format syntax
  format %{ "# exception oop is in rax; no code emitted" %}
  ins_encode();
  ins_pipe(empty);
%}

// Rethrow exception:
// The exception oop will come in the first argument position.
// Then JUMP (not call) to the rethrow stub code.
instruct RethrowException()
%{
  match(Rethrow);

  // use the following format syntax
  format %{ "jmp     rethrow_stub" %}
  ins_encode(enc_rethrow);
  ins_pipe(pipe_jmp);
%}

// ============================================================================
// This name is KNOWN by the ADLC and cannot be changed.
// The ADLC forces a 'TypeRawPtr::BOTTOM' output type
// for this guy.
instruct tlsLoadP(r15_RegP dst) %{
  match(Set dst (ThreadLocal));
  effect(DEF dst);

  size(0);
  format %{ "# TLS is in R15" %}
  ins_encode( /*empty encoding*/ );
  ins_pipe(ialu_reg_reg);
%}

//----------PEEPHOLE RULES-----------------------------------------------------
// These must follow all instruction definitions as they use the names
// defined in the instructions definitions.
//
// peeppredicate ( rule_predicate );
// // the predicate unless which the peephole rule will be ignored
//
// peepmatch ( root_instr_name [preceding_instruction]* );
//
// peepprocedure ( procedure_name );
// // provide a procedure name to perform the optimization, the procedure should
// // reside in the architecture dependent peephole file, the method has the
// // signature of MachNode* (Block*, int, PhaseRegAlloc*, (MachNode*)(*)(), int...)
// // with the arguments being the basic block, the current node index inside the
// // block, the register allocator, the functions upon invoked return a new node
// // defined in peepreplace, and the rules of the nodes appearing in the
// // corresponding peepmatch, the function return true if successful, else
// // return false
//
// peepconstraint %{
// (instruction_number.operand_name relational_op instruction_number.operand_name
//  [, ...] );
// // instruction numbers are zero-based using left to right order in peepmatch
//
// peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
// // provide an instruction_number.operand_name for each operand that appears
// // in the replacement instruction's match rule
//
// ---------VM FLAGS---------------------------------------------------------
//
// All peephole optimizations can be turned off using -XX:-OptoPeephole
//
// Each peephole rule is given an identifying number starting with zero and
// increasing by one in the order seen by the parser.  An individual peephole
// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
// on the command-line.
//
// ---------CURRENT LIMITATIONS----------------------------------------------
//
// Only transformations inside a basic block (do we need more for peephole)
//
// ---------EXAMPLE----------------------------------------------------------
//
// // pertinent parts of existing instructions in architecture description
// instruct movI(rRegI dst, rRegI src)
// %{
//   match(Set dst (CopyI src));
// %}
//
// instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
// %{
//   match(Set dst (AddI dst src));
//   effect(KILL cr);
// %}
//
// instruct leaI_rReg_immI(rRegI dst, immI_1 src)
// %{
//   match(Set dst (AddI dst src));
// %}
//
// 1. Simple replacement
// - Only match adjacent instructions in same basic block
// - Only equality constraints
// - Only constraints between operands, not (0.dest_reg == RAX_enc)
// - Only one replacement instruction
//
// // Change (inc mov) to lea
// peephole %{
//   // lea should only be emitted when beneficial
//   peeppredicate( VM_Version::supports_fast_2op_lea() );
//   // increment preceded by register-register move
//   peepmatch ( incI_rReg movI );
//   // require that the destination register of the increment
//   // match the destination register of the move
//   peepconstraint ( 0.dst == 1.dst );
//   // construct a replacement instruction that sets
//   // the destination to ( move's source register + one )
//   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
// %}
//
// 2. Procedural replacement
// - More flexible finding relevent nodes
// - More flexible constraints
// - More flexible transformations
// - May utilise architecture-dependent API more effectively
// - Currently only one replacement instruction due to adlc parsing capabilities
//
// // Change (inc mov) to lea
// peephole %{
//   // lea should only be emitted when beneficial
//   peeppredicate( VM_Version::supports_fast_2op_lea() );
//   // the rule numbers of these nodes inside are passed into the function below
//   peepmatch ( incI_rReg movI );
//   // the method that takes the responsibility of transformation
//   peepprocedure ( inc_mov_to_lea );
//   // the replacement is a leaI_rReg_immI, a lambda upon invoked creating this
//   // node is passed into the function above
//   peepreplace ( leaI_rReg_immI() );
// %}

// These instructions is not matched by the matcher but used by the peephole
instruct leaI_rReg_rReg_peep(rRegI dst, rRegI src1, rRegI src2)
%{
  predicate(false);
  match(Set dst (AddI src1 src2));
  format %{ "leal    $dst, [$src1 + $src2]" %}
  ins_encode %{
    Register dst = $dst$$Register;
    Register src1 = $src1$$Register;
    Register src2 = $src2$$Register;
    if (src1 != rbp && src1 != r13) {
      __ leal(dst, Address(src1, src2, Address::times_1));
    } else {
      assert(src2 != rbp && src2 != r13, "");
      __ leal(dst, Address(src2, src1, Address::times_1));
    }
  %}
  ins_pipe(ialu_reg_reg);
%}

instruct leaI_rReg_immI_peep(rRegI dst, rRegI src1, immI src2)
%{
  predicate(false);
  match(Set dst (AddI src1 src2));
  format %{ "leal    $dst, [$src1 + $src2]" %}
  ins_encode %{
    __ leal($dst$$Register, Address($src1$$Register, $src2$$constant));
  %}
  ins_pipe(ialu_reg_reg);
%}

instruct leaI_rReg_immI2_peep(rRegI dst, rRegI src, immI2 shift)
%{
  predicate(false);
  match(Set dst (LShiftI src shift));
  format %{ "leal    $dst, [$src << $shift]" %}
  ins_encode %{
    Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
    Register src = $src$$Register;
    if (scale == Address::times_2 && src != rbp && src != r13) {
      __ leal($dst$$Register, Address(src, src, Address::times_1));
    } else {
      __ leal($dst$$Register, Address(noreg, src, scale));
    }
  %}
  ins_pipe(ialu_reg_reg);
%}

instruct leaL_rReg_rReg_peep(rRegL dst, rRegL src1, rRegL src2)
%{
  predicate(false);
  match(Set dst (AddL src1 src2));
  format %{ "leaq    $dst, [$src1 + $src2]" %}
  ins_encode %{
    Register dst = $dst$$Register;
    Register src1 = $src1$$Register;
    Register src2 = $src2$$Register;
    if (src1 != rbp && src1 != r13) {
      __ leaq(dst, Address(src1, src2, Address::times_1));
    } else {
      assert(src2 != rbp && src2 != r13, "");
      __ leaq(dst, Address(src2, src1, Address::times_1));
    }
  %}
  ins_pipe(ialu_reg_reg);
%}

instruct leaL_rReg_immL32_peep(rRegL dst, rRegL src1, immL32 src2)
%{
  predicate(false);
  match(Set dst (AddL src1 src2));
  format %{ "leaq    $dst, [$src1 + $src2]" %}
  ins_encode %{
    __ leaq($dst$$Register, Address($src1$$Register, $src2$$constant));
  %}
  ins_pipe(ialu_reg_reg);
%}

instruct leaL_rReg_immI2_peep(rRegL dst, rRegL src, immI2 shift)
%{
  predicate(false);
  match(Set dst (LShiftL src shift));
  format %{ "leaq    $dst, [$src << $shift]" %}
  ins_encode %{
    Address::ScaleFactor scale = static_cast<Address::ScaleFactor>($shift$$constant);
    Register src = $src$$Register;
    if (scale == Address::times_2 && src != rbp && src != r13) {
      __ leaq($dst$$Register, Address(src, src, Address::times_1));
    } else {
      __ leaq($dst$$Register, Address(noreg, src, scale));
    }
  %}
  ins_pipe(ialu_reg_reg);
%}

peephole
%{
  peeppredicate(VM_Version::supports_fast_2op_lea());
  peepmatch (addI_rReg);
  peepprocedure (lea_coalesce_reg);
  peepreplace (leaI_rReg_rReg_peep());
%}

peephole
%{
  peeppredicate(VM_Version::supports_fast_2op_lea());
  peepmatch (addI_rReg_imm);
  peepprocedure (lea_coalesce_imm);
  peepreplace (leaI_rReg_immI_peep());
%}

peephole
%{
  peeppredicate(VM_Version::supports_fast_2op_lea());
  peepmatch (incI_rReg);
  peepprocedure (lea_coalesce_imm);
  peepreplace (leaI_rReg_immI_peep());
%}

peephole
%{
  peeppredicate(VM_Version::supports_fast_2op_lea());
  peepmatch (decI_rReg);
  peepprocedure (lea_coalesce_imm);
  peepreplace (leaI_rReg_immI_peep());
%}

peephole
%{
  peeppredicate(VM_Version::supports_fast_2op_lea());
  peepmatch (salI_rReg_immI2);
  peepprocedure (lea_coalesce_imm);
  peepreplace (leaI_rReg_immI2_peep());
%}

peephole
%{
  peeppredicate(VM_Version::supports_fast_2op_lea());
  peepmatch (addL_rReg);
  peepprocedure (lea_coalesce_reg);
  peepreplace (leaL_rReg_rReg_peep());
%}

peephole
%{
  peeppredicate(VM_Version::supports_fast_2op_lea());
  peepmatch (addL_rReg_imm);
  peepprocedure (lea_coalesce_imm);
  peepreplace (leaL_rReg_immL32_peep());
%}

peephole
%{
  peeppredicate(VM_Version::supports_fast_2op_lea());
  peepmatch (incL_rReg);
  peepprocedure (lea_coalesce_imm);
  peepreplace (leaL_rReg_immL32_peep());
%}

peephole
%{
  peeppredicate(VM_Version::supports_fast_2op_lea());
  peepmatch (decL_rReg);
  peepprocedure (lea_coalesce_imm);
  peepreplace (leaL_rReg_immL32_peep());
%}

peephole
%{
  peeppredicate(VM_Version::supports_fast_2op_lea());
  peepmatch (salL_rReg_immI2);
  peepprocedure (lea_coalesce_imm);
  peepreplace (leaL_rReg_immI2_peep());
%}

//----------SMARTSPILL RULES---------------------------------------------------
// These must follow all instruction definitions as they use the names
// defined in the instructions definitions.