/* * Copyright (c) 2018, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. *
*/
/* * This file contains the implementation of continuation freezing (yield) and thawing (run). * * This code is very latency-critical and very hot. An ordinary and well-behaved server application * would likely call these operations many thousands of times per second second, on every core. * * Freeze might be called every time the application performs any I/O operation, every time it * acquires a j.u.c. lock, every time it takes a message from a queue, and thaw can be called * multiple times in each of those cases, as it is called by the return barrier, which may be * invoked on method return. * * The amortized budget for each of those two operations is ~100-150ns. That is why, for * example, every effort is made to avoid Java-VM transitions as much as possible. * * On the fast path, all frames are known to be compiled, and the chunk requires no barriers * and so frames simply copied, and the bottom-most one is patched. * On the slow path, internal pointers in interpreted frames are de/relativized to/from offsets * and absolute pointers, and barriers invoked.
*/
/************************************************
Thread-stack layout on freeze/thaw. See corresponding stack-chunk layout in instanceStackChunkKlass.hpp
+----------------------------+ | . | | . | | . | | carrier frames | | | |----------------------------| | | | Continuation.run | | | |============================| | enterSpecial frame | | pc | | rbp | | ----- | ^ | int argsize | = ContinuationEntry | | oopDesc* cont | | | oopDesc* chunk | | | ContinuationEntry* parent | | | ... | | |============================| <------ JavaThread::_cont_entry = entry->sp() | | ? alignment word ? | | |----------------------------| <--\ | | | | | | ? caller stack args ? | | argsize (might not be 2-word aligned) words Address | | | | Caller is still in the chunk. | |----------------------------| | | | pc (? return barrier ?) | | This pc contains the return barrier when the bottom-most frame | | rbp | | isn't the last one in the continuation. | | | | | | frame | | | | | | +----------------------------| \__ Continuation frames to be frozen/thawed | | / | frame | | | | | |----------------------------| | | | | | frame | | | | | |----------------------------| <--/ | | | doYield/safepoint stub | When preempting forcefully, we could have a safepoint stub | | instead of a doYield stub |============================| <- the sp passed to freeze | | | Native freeze/thaw frames | | . | | . | | . | +----------------------------+
************************************************/
staticconstbool TEST_THAW_ONE_CHUNK_FRAME = false; // force thawing frames one-at-a-time for testing
#define CONT_JFR false// emit low-level JFR events that count slow/fast path for continuation performance debugging only #if CONT_JFR #define CONT_JFR_ONLY(code) code #else #define CONT_JFR_ONLY(code) #endif
// TODO: See AbstractAssembler::generate_stack_overflow_check, // Compile::bang_size_in_bytes(), m->as_SafePoint()->jvms()->interpreter_frame_size() // when we stack-bang, we need to update a thread field with the lowest (farthest) bang point.
// Data invariants are defined by Continuation::debug_verify_continuation and Continuation::debug_verify_stack_chunk
// Used to just annotatate cold/hot branches #define LIKELY(condition) (condition) #define UNLIKELY(condition) (condition)
// debugging functions #ifdef ASSERT extern"C"bool dbg_is_safe(constvoid* p, intptr_t errvalue); // address p is readable and *(intptr_t*)p != errvalue
// Entry point to freeze. Transitions are handled manually // Called from gen_continuation_yield() in sharedRuntime_<cpu>.cpp through Continuation::freeze_entry(); template<typename ConfigT> static JRT_BLOCK_ENTRY(int, freeze(JavaThread* current, intptr_t* sp))
assert(sp == current->frame_anchor()->last_Java_sp(), "");
class FreezeBase : public StackObj { protected:
JavaThread* const _thread;
ContinuationWrapper& _cont; bool _barriers; // only set when we allocate a chunk constbool _preempt; // used only on the slow path const intptr_t * const _frame_sp; // Top frame sp for this freeze
intptr_t* _bottom_address;
int _freeze_size; // total size of all frames plus metadata in words. int _total_align_size;
#ifdef ASSERT bool is_empty(stackChunkOop chunk) { // during freeze, the chunk is in an intermediate state (after setting the chunk's argsize but before setting its // ultimate sp) so we use this instead of stackChunkOopDesc::is_empty return chunk->sp() >= chunk->stack_size() - chunk->argsize() - frame::metadata_words_at_top;
} #endif
};
template <typename ConfigT> class Freeze : public FreezeBase { private:
stackChunkOop allocate_chunk(size_t stack_size);
// properties of the continuation on the stack; all sizes are in words
_cont_stack_top = frame_sp + doYield_stub_frame_size; // we don't freeze the doYield stub frame
_cont_stack_bottom = _cont.entrySP() + (_cont.argsize() == 0 ? frame::metadata_words_at_top : 0)
- ContinuationHelper::frame_align_words(_cont.argsize()); // see alignment in thaw
// Called _after_ the last possible safepoint during the freeze operation (chunk allocation) void FreezeBase::unwind_frames() {
ContinuationEntry* entry = _cont.entry();
entry->flush_stack_processing(_thread);
set_anchor_to_entry(_thread, entry);
}
stackChunkOop chunk = allocate_chunk(cont_size() + frame::metadata_words); if (freeze_fast_new_chunk(chunk)) { return freeze_ok;
} if (_thread->has_pending_exception()) { return freeze_exception;
}
// TODO R REMOVE when deopt change is fixed
assert(!_thread->cont_fastpath() || _barriers, "");
log_develop_trace(continuations)("-- RETRYING SLOW --"); return freeze_slow();
}
// Returns size needed if the continuation fits, otherwise 0. int FreezeBase::size_if_fast_freeze_available() {
stackChunkOop chunk = _cont.tail(); if (chunk == nullptr || chunk->is_gc_mode() || chunk->requires_barriers() || chunk->has_mixed_frames()) {
log_develop_trace(continuations)("chunk available %s", chunk == nullptr ? "no chunk" : "chunk requires barriers"); return 0;
}
int total_size_needed = cont_size(); constint chunk_sp = chunk->sp();
// argsize can be nonzero if we have a caller, but the caller could be in a non-empty parent chunk, // so we subtract it only if we overlap with the caller, i.e. the current chunk isn't empty. // Consider leaving the chunk's argsize set when emptying it and removing the following branch, // although that would require changing stackChunkOopDesc::is_empty if (chunk_sp < chunk->stack_size()) {
total_size_needed -= _cont.argsize() + frame::metadata_words_at_top;
}
if (chunk->sp() < chunk->stack_size()) { // we are copying into a non-empty chunk
DEBUG_ONLY(_empty = false;)
assert(chunk->sp() < (chunk->stack_size() - chunk->argsize()), "");
assert(*(address*)(chunk->sp_address() - frame::sender_sp_ret_address_offset()) == chunk->pc(), "");
// the chunk's sp before the freeze, adjusted to point beyond the stack-passed arguments in the topmost frame // we overlap; we'll overwrite the chunk's top frame's callee arguments constint chunk_start_sp = chunk->sp() + _cont.argsize() + frame::metadata_words_at_top;
assert(chunk_start_sp <= chunk->stack_size(), "sp not pointing into stack");
// increase max_size by what we're freezing minus the overlap
chunk->set_max_thawing_size(chunk->max_thawing_size() + cont_size() - _cont.argsize() - frame::metadata_words_at_top);
intptr_t* const bottom_sp = _cont_stack_bottom - _cont.argsize() - frame::metadata_words_at_top;
assert(bottom_sp == _bottom_address, ""); // Because the chunk isn't empty, we know there's a caller in the chunk, therefore the bottom-most frame // should have a return barrier (installed back when we thawed it).
assert(*(address*)(bottom_sp-frame::sender_sp_ret_address_offset()) == StubRoutines::cont_returnBarrier(), "should be the continuation return barrier"); // We copy the fp from the chunk back to the stack because it contains some caller data, // including, possibly, an oop that might have gone stale since we thawed.
patch_stack_pd(bottom_sp, chunk->sp_address()); // we don't patch the return pc at this time, so as not to make the stack unwalkable for async walks
freeze_fast_copy(chunk, chunk_start_sp CONT_JFR_ONLY(COMMA false));
} else { // the chunk is empty
DEBUG_ONLY(_empty = true;) constint chunk_start_sp = chunk->sp();
// in a fresh chunk, we freeze *with* the bottom-most frame's stack arguments. // They'll then be stored twice: in the chunk and in the parent chunk's top frame constint chunk_start_sp = cont_size() + frame::metadata_words;
assert(chunk_start_sp == chunk->stack_size(), "");
// We unwind frames after the last safepoint so that the GC will have found the oops in the frames, but before // writing into the chunk. This is so that an asynchronous stack walk (not at a safepoint) that suspends us here // will either see no continuation on the stack, or a consistent chunk.
unwind_frames();
log_develop_trace(continuations)("freeze_fast start: chunk " INTPTR_FORMAT " size: %d orig sp: %d argsize: %d",
p2i((oopDesc*)chunk), chunk->stack_size(), chunk_start_sp, _cont.argsize());
assert(chunk_start_sp <= chunk->stack_size(), "");
assert(chunk_start_sp >= cont_size(), "no room in the chunk");
constint chunk_new_sp = chunk_start_sp - cont_size(); // the chunk's new sp, after freeze
assert(!(_fast_freeze_size > 0) || _orig_chunk_sp - (chunk->start_address() + chunk_new_sp) == _fast_freeze_size, "");
log_develop_trace(continuations)("freeze_fast start: " INTPTR_FORMAT " sp: %d chunk_top: " INTPTR_FORMAT,
p2i(chunk->start_address()), chunk_new_sp, p2i(chunk_top));
intptr_t* from = _cont_stack_top - frame::metadata_words_at_bottom;
intptr_t* to = chunk_top - frame::metadata_words_at_bottom;
copy_to_chunk(from, to, cont_size() + frame::metadata_words_at_bottom); // Because we're not patched yet, the chunk is now in a bad state
// patch return pc of the bottom-most frozen frame (now in the chunk) with the actual caller's return address
intptr_t* chunk_bottom_sp = chunk_top + cont_size() - _cont.argsize() - frame::metadata_words_at_top;
assert(_empty || *(address*)(chunk_bottom_sp-frame::sender_sp_ret_address_offset()) == StubRoutines::cont_returnBarrier(), "");
*(address*)(chunk_bottom_sp - frame::sender_sp_ret_address_offset()) = chunk->pc();
// We're always writing to a young chunk, so the GC can't see it until the next safepoint.
chunk->set_sp(chunk_new_sp); // set chunk->pc to the return address of the topmost frame in the chunk
chunk->set_pc(*(address*)(_cont_stack_top - frame::sender_sp_ret_address_offset()));
if (Interpreter::contains(ContinuationHelper::StubFrame::return_pc(f))) {
f = sender<ContinuationHelper::StubFrame>(f); // Safepoint stub in interpreter
}
}
assert(Continuation::is_frame_in_continuation(_thread->last_continuation(), f), ""); return f;
}
// The parameter callee_argsize includes metadata that has to be part of caller/callee overlap.
NOINLINE freeze_result FreezeBase::recurse_freeze(frame& f, frame& caller, intcallee_argsize, bool callee_interpreted, bool top) {
assert(f.unextended_sp() < _bottom_address, ""); // see recurse_freeze_java_frame
assert(f.is_interpreted_frame() || ((top && _preempt) == ContinuationHelper::Frame::is_stub(f.cb())), "");
if (stack_overflow()) { return freeze_exception;
}
if (f.is_compiled_frame()) { if (UNLIKELY(f.oop_map() == nullptr)) { // special native frame return freeze_pinned_native;
} return recurse_freeze_compiled_frame(f, caller, callee_argsize, callee_interpreted);
} elseif (f.is_interpreted_frame()) {
assert((_preempt && top) || !f.interpreter_frame_method()->is_native(), ""); if (_preempt && top && f.interpreter_frame_method()->is_native()) { // int native entry return freeze_pinned_native;
}
// The parameter callee_argsize includes metadata that has to be part of caller/callee overlap. // See also StackChunkFrameStream<frame_kind>::frame_size() template<typename FKind> inline freeze_result FreezeBase::recurse_freeze_java_frame(const frame& f, frame& caller, int fsize, int argsize) {
assert(FKind::is_instance(f), "");
// We don't use FKind::frame_bottom(f) == _bottom_address because on x64 there's sometimes an extra word between // enterSpecial and an interpreted frame if (FKind::frame_bottom(f) >= _bottom_address - 1) { return finalize_freeze(f, caller, argsize); // recursion end
} else {
frame senderf = sender<FKind>(f);
assert(FKind::interpreted || senderf.sp() == senderf.unextended_sp(), "");
freeze_result result = recurse_freeze(senderf, caller, argsize, FKind::interpreted, false); // recursive call return result;
}
}
_freeze_size += frame::metadata_words; // for top frame's metadata
int overlap = 0; // the args overlap the caller -- if there is one in this chunk and is of the same kind int unextended_sp = -1; if (chunk != nullptr) {
unextended_sp = chunk->sp(); if (!chunk->is_empty()) {
StackChunkFrameStream<ChunkFrames::Mixed> last(chunk);
unextended_sp = chunk->to_offset(StackChunkFrameStream<ChunkFrames::Mixed>(chunk).unextended_sp()); bool top_interpreted = Interpreter::contains(chunk->pc()); if (callee.is_interpreted_frame() == top_interpreted) {
overlap = argsize_md;
}
}
}
// _barriers can be set to true by an allocation in freeze_fast, in which case the chunk is available bool allocated_old_in_freeze_fast = _barriers;
assert(!allocated_old_in_freeze_fast || (unextended_sp >= _freeze_size && chunk->is_empty()), "Chunk allocated in freeze_fast is of insufficient size " "unextended_sp: %d size: %d is_empty: %d", unextended_sp, _freeze_size, chunk->is_empty());
assert(!allocated_old_in_freeze_fast || (!UseZGC && !UseG1GC), "Unexpected allocation");
DEBUG_ONLY(bool empty_chunk = true); if (unextended_sp < _freeze_size || chunk->is_gc_mode() || (!allocated_old_in_freeze_fast && chunk->requires_barriers())) { // ALLOCATE NEW CHUNK
// We unwind frames after the last safepoint so that the GC will have found the oops in the frames, but before // writing into the chunk. This is so that an asynchronous stack walk (not at a safepoint) that suspends us here // will either see no continuation or a consistent chunk.
unwind_frames();
if (lt.develop_is_enabled()) {
LogStream ls(lt);
ls.print_cr("top chunk:");
chunk->print_on(&ls);
}
// The topmost existing frame in the chunk; or an empty frame if the chunk is empty
caller = StackChunkFrameStream<ChunkFrames::Mixed>(chunk).to_frame();
void FreezeBase::patch(const frame& f, frame& hf, const frame& caller, bool is_bottom_frame) { if (is_bottom_frame) { // If we're the bottom frame, we need to replace the return barrier with the real // caller's pc.
address last_pc = caller.pc();
assert((last_pc == nullptr) == is_empty(_cont.tail()), "");
ContinuationHelper::Frame::patch_pc(caller, last_pc);
} else {
assert(!caller.is_empty(), "");
}
patch_pd(hf, caller);
if (f.is_interpreted_frame()) {
assert(hf.is_heap_frame(), "should be");
ContinuationHelper::InterpretedFrame::patch_sender_sp(hf, caller);
}
#ifdef ASSERT if (hf.is_compiled_frame()) { if (f.is_deoptimized_frame()) { // TODO DEOPT: long term solution: unroll on freeze and patch pc
log_develop_trace(continuations)("Freezing deoptimized frame");
assert(f.cb()->as_compiled_method()->is_deopt_pc(f.raw_pc()), "");
assert(f.cb()->as_compiled_method()->is_deopt_pc(ContinuationHelper::Frame::real_pc(f)), "");
}
} #endif
}
// The parameter callee_argsize includes metadata that has to be part of caller/callee overlap. // See also StackChunkFrameStream<frame_kind>::frame_size()
NOINLINE freeze_result FreezeBase::recurse_freeze_interpreted_frame(frame& f, frame& caller, int callee_argsize /* incl. metadata */, bool callee_interpreted) {
adjust_interpreted_frame_unextended_sp(f);
// The frame's top never includes the stack arguments to the callee
intptr_t* const stack_frame_top = ContinuationHelper::InterpretedFrame::frame_top(f, callee_argsize, callee_interpreted);
intptr_t* const callers_sp = ContinuationHelper::InterpretedFrame::callers_sp(f); constint locals = f.interpreter_frame_method()->max_locals(); constint fsize = callers_sp + frame::metadata_words_at_top + locals - stack_frame_top;
intptr_t* const stack_frame_bottom = ContinuationHelper::InterpretedFrame::frame_bottom(f);
assert(stack_frame_bottom - stack_frame_top >= fsize, ""); // == on x86
DEBUG_ONLY(verify_frame_top(f, stack_frame_top));
Method* frame_method = ContinuationHelper::Frame::frame_method(f); // including metadata between f and its args constint argsize = ContinuationHelper::InterpretedFrame::stack_argsize(f) + frame::metadata_words_at_top;
log_develop_trace(continuations)("recurse_freeze_interpreted_frame %s _size: %d fsize: %d argsize: %d",
frame_method->name_and_sig_as_C_string(), _freeze_size, fsize, argsize); // we'd rather not yield inside methods annotated with @JvmtiMountTransition
assert(!ContinuationHelper::Frame::frame_method(f)->jvmti_mount_transition(), "");
freeze_result result = recurse_freeze_java_frame<ContinuationHelper::InterpretedFrame>(f, caller, fsize, argsize); if (UNLIKELY(result > freeze_ok_bottom)) { return result;
}
bool is_bottom_frame = result == freeze_ok_bottom;
assert(!caller.is_empty() || is_bottom_frame, "");
// on AArch64 we add padding between the locals and the rest of the frame to keep the fp 16-byte-aligned
copy_to_chunk(stack_frame_bottom - locals, heap_frame_bottom - locals, locals); // copy locals
copy_to_chunk(stack_frame_top, heap_frame_top, fsize - locals); // copy rest
assert(!is_bottom_frame || !caller.is_interpreted_frame() || (heap_frame_top + fsize) == (caller.unextended_sp() + argsize), "");
// Mark frame_method's GC epoch for class redefinition on_stack calculation.
frame_method->record_gc_epoch();
return freeze_ok;
}
// The parameter callee_argsize includes metadata that has to be part of caller/callee overlap. // See also StackChunkFrameStream<frame_kind>::frame_size()
freeze_result FreezeBase::recurse_freeze_compiled_frame(frame& f, frame& caller, int callee_argsize /* incl. metadata */, bool callee_interpreted) { // The frame's top never includes the stack arguments to the callee
intptr_t* const stack_frame_top = ContinuationHelper::CompiledFrame::frame_top(f, callee_argsize, callee_interpreted);
intptr_t* const stack_frame_bottom = ContinuationHelper::CompiledFrame::frame_bottom(f); // including metadata between f and its stackargs constint argsize = ContinuationHelper::CompiledFrame::stack_argsize(f) + frame::metadata_words_at_top; constint fsize = stack_frame_bottom + argsize - stack_frame_top;
log_develop_trace(continuations)("recurse_freeze_compiled_frame %s _size: %d fsize: %d argsize: %d",
ContinuationHelper::Frame::frame_method(f) != nullptr ?
ContinuationHelper::Frame::frame_method(f)->name_and_sig_as_C_string() : "",
_freeze_size, fsize, argsize); // we'd rather not yield inside methods annotated with @JvmtiMountTransition
assert(!ContinuationHelper::Frame::frame_method(f)->jvmti_mount_transition(), "");
freeze_result result = recurse_freeze_java_frame<ContinuationHelper::CompiledFrame>(f, caller, fsize, argsize); if (UNLIKELY(result > freeze_ok_bottom)) { return result;
}
bool is_bottom_frame = result == freeze_ok_bottom;
assert(!caller.is_empty() || is_bottom_frame, "");
// recurse_freeze_java_frame and freeze inlined here because we need to use a full RegisterMap for lock ownership
NOT_PRODUCT(_frames++;)
_freeze_size += fsize;
RegisterMap map(_cont.thread(),
RegisterMap::UpdateMap::include,
RegisterMap::ProcessFrames::skip,
RegisterMap::WalkContinuation::skip);
map.set_include_argument_oops(false);
ContinuationHelper::update_register_map<ContinuationHelper::StubFrame>(f, &map);
f.oop_map()->update_register_map(&f, &map); // we have callee-save registers in this case
frame senderf = sender<ContinuationHelper::StubFrame>(f);
assert(senderf.unextended_sp() < _bottom_address - 1, "");
assert(senderf.is_compiled_frame(), "");
if (UNLIKELY(_barriers)) {
log_develop_trace(continuations)("do barriers on old chunk"); // Serial and Parallel GC can allocate objects directly into the old generation. // Then we want to relativize the derived pointers eagerly so that // old chunks are all in GC mode.
assert(!UseG1GC, "G1 can not deal with allocating outside of eden");
assert(!UseZGC, "ZGC can not deal with allocating chunks visible to marking"); if (UseShenandoahGC) {
_cont.tail()->relativize_derived_pointers_concurrently();
} else {
ContinuationGCSupport::transform_stack_chunk(_cont.tail());
} // For objects in the old generation we must maintain the remembered set
_cont.tail()->do_barriers<stackChunkOopDesc::BarrierType::Store>();
}
if (lt.develop_is_enabled()) {
LogStream ls(lt);
ls.print_cr("top hframe after (freeze):");
assert(_cont.last_frame().is_heap_frame(), "should be");
_cont.last_frame().print_on(&ls);
}
assert(_cont.chunk_invariant(), "");
}
inlinebool FreezeBase::stack_overflow() { // detect stack overflow in recursive native code
JavaThread* t = !_preempt ? _thread : JavaThread::current();
assert(t == JavaThread::current(), ""); if (os::current_stack_pointer() < t->stack_overflow_state()->shadow_zone_safe_limit()) { if (!_preempt) {
ContinuationWrapper::SafepointOp so(t, _cont); // could also call _cont.done() instead
Exceptions::_throw_msg(t, __FILE__, __LINE__, vmSymbols::java_lang_StackOverflowError(), "Stack overflow while freezing");
} returntrue;
} returnfalse;
}
class StackChunkAllocator : public MemAllocator { const size_t _stack_size;
ContinuationWrapper& _continuation_wrapper;
JvmtiSampledObjectAllocEventCollector* const _jvmti_event_collector; mutablebool _took_slow_path;
// Does the minimal amount of initialization needed for a TLAB allocation. // We don't need to do a full initialization, as such an allocation need not be immediately walkable. virtual oop initialize(HeapWord* mem) const override {
assert(_stack_size > 0, "");
assert(_stack_size <= max_jint, "");
assert(_word_size > _stack_size, "");
// zero out fields (but not the stack) const size_t hs = oopDesc::header_size();
Copy::fill_to_aligned_words(mem + hs, vmClasses::StackChunk_klass()->size_helper() - hs);
// Provides it's own, specialized allocation which skips instrumentation // if the memory can be allocated without going to a slow-path.
stackChunkOop allocate() const { // First try to allocate without any slow-paths or instrumentation.
stackChunkOop obj = allocate_fast(); if (obj != nullptr) { return obj;
}
// Now try full-blown allocation with all expensive operations, // including potentially safepoint operations.
_took_slow_path = true;
InstanceStackChunkKlass* klass = InstanceStackChunkKlass::cast(vmClasses::StackChunk_klass());
size_t size_in_words = klass->instance_size(stack_size);
if (CollectedHeap::stack_chunk_max_size() > 0 && size_in_words >= CollectedHeap::stack_chunk_max_size()) { if (!_preempt) {
throw_stack_overflow_on_humongous_chunk();
} return nullptr;
}
JavaThread* current = _preempt ? JavaThread::current() : _thread;
assert(current == JavaThread::current(), "should be current");
// Allocate the chunk. // // This might safepoint while allocating, but all safepointing due to // instrumentation have been deferred. This property is important for // some GCs, as this ensures that the allocated object is in the young // generation / newly allocated memory.
StackChunkAllocator allocator(klass, size_in_words, current, stack_size, _cont, _jvmti_event_collector);
stackChunkOop chunk = allocator.allocate();
// fields are uninitialized
chunk->set_parent_access<IS_DEST_UNINITIALIZED>(_cont.last_nonempty_chunk());
chunk->set_cont_access<IS_DEST_UNINITIALIZED>(_cont.continuation());
#if INCLUDE_ZGC if (UseZGC) {
assert(!chunk->requires_barriers(), "ZGC always allocates in the young generation");
_barriers = false;
} else #endif #if INCLUDE_SHENANDOAHGC if (UseShenandoahGC) {
_barriers = chunk->requires_barriers();
} else #endif
{ if (!allocator.took_slow_path()) { // Guaranteed to be in young gen / newly allocated memory
assert(!chunk->requires_barriers(), "Unfamiliar GC requires barriers on TLAB allocation");
_barriers = false;
} else { // Some GCs could put direct allocations in old gen for slow-path // allocations; need to explicitly check if that was the case.
_barriers = chunk->requires_barriers();
}
}
if (_barriers) {
log_develop_trace(continuations)("allocation requires barriers");
}
#ifdef ASSERT staticbool monitors_on_stack(JavaThread* thread) {
ContinuationEntry* ce = thread->last_continuation();
RegisterMap map(thread,
RegisterMap::UpdateMap::include,
RegisterMap::ProcessFrames::include,
RegisterMap::WalkContinuation::skip);
map.set_include_argument_oops(false); for (frame f = thread->last_frame(); Continuation::is_frame_in_continuation(ce, f); f = f.sender(&map)) { if ((f.is_interpreted_frame() && ContinuationHelper::InterpretedFrame::is_owning_locks(f)) ||
(f.is_compiled_frame() && ContinuationHelper::CompiledFrame::is_owning_locks(map.thread(), &map, f))) { returntrue;
}
} returnfalse;
}
bool FreezeBase::interpreted_native_or_deoptimized_on_stack() {
ContinuationEntry* ce = _thread->last_continuation();
RegisterMap map(_thread,
RegisterMap::UpdateMap::skip,
RegisterMap::ProcessFrames::skip,
RegisterMap::WalkContinuation::skip);
map.set_include_argument_oops(false); for (frame f = freeze_start_frame(); Continuation::is_frame_in_continuation(ce, f); f = f.sender(&map)) { if (f.is_interpreted_frame() || f.is_native_frame() || f.is_deoptimized_frame()) { returntrue;
}
} returnfalse;
} #endif// ASSERT
staticinlineint freeze_epilog(JavaThread* thread, ContinuationWrapper& cont) {
verify_continuation(cont.continuation());
assert(!cont.is_empty(), ""); // This is done for the sake of the enterSpecial frame
StackWatermarkSet::after_unwind(thread);
log_develop_debug(continuations)("=== End of freeze cont ### #" INTPTR_FORMAT, cont.hash());
return 0;
}
staticint freeze_epilog(JavaThread* thread, ContinuationWrapper& cont, freeze_result res) { if (UNLIKELY(res != freeze_ok)) {
verify_continuation(cont.continuation());
log_develop_trace(continuations)("=== end of freeze (fail %d)", res); return res;
}
JVMTI_ONLY(jvmti_yield_cleanup(thread, cont)); // can safepoint return freeze_epilog(thread, cont);
}
if (entry->is_pinned() || current->held_monitor_count() > 0) {
log_develop_debug(continuations)("PINNED due to critical section/hold monitor");
verify_continuation(cont.continuation());
freeze_result res = entry->is_pinned() ? freeze_pinned_cs : freeze_pinned_monitor;
log_develop_trace(continuations)("=== end of freeze (fail %d)", res); return res;
}
Freeze<ConfigT> freeze(current, cont, sp);
// There are no interpreted frames if we're not called from the interpreter and we haven't ancountered an i2c // adapter or called Deoptimization::unpack_frames. Calls from native frames also go through the interpreter // (see JavaCalls::call_helper).
assert(!current->cont_fastpath()
|| (current->cont_fastpath_thread_state() && !freeze.interpreted_native_or_deoptimized_on_stack()), ""); bool fast = UseContinuationFastPath && current->cont_fastpath(); if (fast && freeze.size_if_fast_freeze_available() > 0) {
freeze.freeze_fast_existing_chunk();
CONT_JFR_ONLY(freeze.jfr_info().post_jfr_event(&event, oopCont, current);)
freeze_epilog(current, cont); return 0;
}
log_develop_trace(continuations)("chunk unavailable; transitioning to VM");
assert(current == JavaThread::current(), "must be current thread except for preempt");
JRT_BLOCK // delays a possible JvmtiSampledObjectAllocEventCollector in alloc_chunk
JvmtiSampledObjectAllocEventCollector jsoaec(false);
freeze.set_jvmti_event_collector(&jsoaec);
freeze_result res = fast ? freeze.try_freeze_fast() : freeze.freeze_slow();
CONT_JFR_ONLY(freeze.jfr_info().post_jfr_event(&event, oopCont, current);)
freeze_epilog(current, cont, res);
cont.done(); // allow safepoint in the transition back to Java return res;
JRT_BLOCK_END
}
RegisterMap map(thread,
RegisterMap::UpdateMap::include,
RegisterMap::ProcessFrames::skip,
RegisterMap::WalkContinuation::skip);
map.set_include_argument_oops(false);
frame f = thread->last_frame();
if (!safepoint) {
f = f.sender(&map); // this is the yield frame
} else { // safepoint yield #if (defined(X86) || defined(AARCH64) || defined(RISCV64)) && !defined(ZERO)
f.set_fp(f.real_fp()); // Instead of this, maybe in ContinuationWrapper::set_last_frame always use the real_fp? #else
Unimplemented(); #endif if (!Interpreter::contains(f.pc())) {
assert(ContinuationHelper::Frame::is_stub(f.cb()), "must be");
assert(f.oop_map() != nullptr, "must be");
f.oop_map()->update_register_map(&f, &map); // we have callee-save registers in this case
}
}
while (true) { if ((f.is_interpreted_frame() && f.interpreter_frame_method()->is_native()) || f.is_native_frame()) { return freeze_pinned_native;
}
f = f.sender(&map); if (!Continuation::is_frame_in_continuation(entry, f)) {
oop scope = jdk_internal_vm_Continuation::scope(entry->cont_oop(thread)); if (scope == cont_scope) { break;
} int monitor_count = entry->parent_held_monitor_count();
entry = entry->parent(); if (entry == nullptr) { break;
} if (entry->is_pinned()) { return freeze_pinned_cs;
} elseif (monitor_count > 0) { return freeze_pinned_monitor;
}
}
} return freeze_ok;
}
/////////////// THAW ////
staticint thaw_size(stackChunkOop chunk) { int size = chunk->max_thawing_size();
size += frame::metadata_words; // For the top pc+fp in push_return_frame or top = stack_sp - frame::metadata_words in thaw_fast
size += 2*frame::align_wiggle; // in case of alignments at the top and bottom return size;
}
// make room on the stack for thaw // returns the size in bytes, or 0 on failure staticinlineint prepare_thaw_internal(JavaThread* thread, bool return_barrier) {
log_develop_trace(continuations)("~~~~ prepare_thaw return_barrier: %d", return_barrier);
// The tail can be empty because it might still be available for another freeze. // However, here we want to thaw, so we get rid of it (it will be GCed). if (UNLIKELY(chunk->is_empty())) {
chunk = chunk->parent();
assert(chunk != nullptr, "");
assert(!chunk->is_empty(), "");
jdk_internal_vm_Continuation::set_tail(continuation, chunk);
}
// Verification
chunk->verify();
assert(chunk->max_thawing_size() > 0, "chunk invariant violated; expected to not be empty");
// Only make space for the last chunk because we only thaw from the last chunk int size = thaw_size(chunk) << LogBytesPerWord;
const address bottom = (address)thread->last_continuation()->entry_sp(); // 300 is an estimate for stack size taken for this native code, in addition to StackShadowPages // for the Java frames in the check below. if (!stack_overflow_check(thread, size + 300, bottom)) { return 0;
}
void clear_chunk(stackChunkOop chunk); int remove_top_compiled_frame_from_chunk(stackChunkOop chunk, int &argsize); void copy_from_chunk(intptr_t* from, intptr_t* to, int size);
// fast path inlinevoid prefetch_chunk_pd(void* start, int size_words); void patch_return(intptr_t* sp, bool is_last);
class ReconstructedStack : public StackObj {
intptr_t* _base; // _cont.entrySP(); // top of the entry frame int _thaw_size; int _argsize; public:
ReconstructedStack(intptr_t* base, int thaw_size, int argsize)
: _base(base), _thaw_size(thaw_size - (argsize == 0 ? frame::metadata_words_at_top : 0)), _argsize(argsize) { // The only possible source of misalignment is stack-passed arguments b/c compiled frames are 16-byte aligned.
assert(argsize != 0 || (_base - _thaw_size) == ContinuationHelper::frame_align_pointer(_base - _thaw_size), ""); // We're at most one alignment word away from entrySP
assert(_base - 1 <= top() + total_size() + frame::metadata_words_at_bottom, "missed entry frame");
}
// several operations operate on the totality of the stack being reconstructed, // including the metadata words
intptr_t* top() const { return sp() - frame::metadata_words_at_bottom; } int total_size() const { return _thaw_size + frame::metadata_words_at_bottom; }
};
if (empty) {
clear_chunk(chunk);
} else {
chunk->set_sp(chunk->sp() + frame_size);
chunk->set_max_thawing_size(chunk->max_thawing_size() - frame_size); // We set chunk->pc to the return pc into the next frame
chunk->set_pc(f.pc());
assert(f.pc() == *(address*)(chunk_sp + frame_size - frame::sender_sp_ret_address_offset()), "unexpected pc");
}
assert(empty == chunk->is_empty(), ""); // returns the size required to store the frame on stack, and because it is a // compiled frame, it must include a copy of the arguments passed by the caller return frame_size + argsize + frame::metadata_words_at_top;
}
void ThawBase::copy_from_chunk(intptr_t* from, intptr_t* to, int size) {
assert(to >= _top_stack_address, "overwrote past thawing space" " to: " INTPTR_FORMAT " top_address: " INTPTR_FORMAT, p2i(to), p2i(_top_stack_address));
assert(to + size <= _cont.entrySP(), "overwrote past thawing space");
_cont.tail()->copy_from_chunk_to_stack(from, to, size);
CONT_JFR_ONLY(_jfr_info.record_size_copied(size);)
}
frame heap_frame = _stream.to_frame(); if (lt.develop_is_enabled()) {
LogStream ls(lt);
ls.print_cr("top hframe before (thaw):");
assert(heap_frame.is_heap_frame(), "should have created a relative frame");
heap_frame.print_value_on(&ls, nullptr);
}
frame caller; // the thawed caller on the stack
recurse_thaw(heap_frame, caller, num_frames, true);
finish_thaw(caller); // caller is now the topmost thawed frame
_cont.write();
// we never leave a compiled caller of an interpreted frame as the top frame in the chunk // as it makes detecting that situation and adjusting unextended_sp tricky if (num_frames == 1 && !_stream.is_done() && FKind::interpreted && _stream.is_compiled()) {
log_develop_trace(continuations)("thawing extra compiled frame to not leave a compiled interpreted-caller at top");
num_frames++;
}
inlinevoid ThawBase::patch(frame& f, const frame& caller, bool bottom) {
assert(!bottom || caller.fp() == _cont.entryFP(), ""); if (bottom) {
ContinuationHelper::Frame::patch_pc(caller, _cont.is_empty() ? caller.pc()
: StubRoutines::cont_returnBarrier());
} else { // caller might have been deoptimized during thaw but we've overwritten the return address when copying f from the heap. // If the caller is not deoptimized, pc is unchanged.
ContinuationHelper::Frame::patch_pc(caller, caller.raw_pc());
}
patch_pd(f, caller);
if (f.is_interpreted_frame()) {
ContinuationHelper::InterpretedFrame::patch_sender_sp(f, caller);
}
void ThawBase::clear_bitmap_bits(intptr_t* start, int range) { // we need to clear the bits that correspond to arguments as they reside in the caller frame // or they will keep objects that are otherwise unreachable alive
log_develop_trace(continuations)("clearing bitmap for " INTPTR_FORMAT " - " INTPTR_FORMAT, p2i(start), p2i(start+range));
stackChunkOop chunk = _cont.tail();
chunk->bitmap().clear_range(chunk->bit_index_for(start),
chunk->bit_index_for(start+range));
}
// on AArch64/PPC64 we add padding between the locals and the rest of the frame to keep the fp 16-byte-aligned constint locals = hf.interpreter_frame_method()->max_locals();
assert(hf.is_heap_frame(), "should be");
assert(!f.is_heap_frame(), "should not be");
void ThawBase::recurse_thaw_compiled_frame(const frame& hf, frame& caller, intnum_frames, bool stub_caller) {
assert(!hf.is_interpreted_frame(), "");
assert(_cont.is_preempted() || !stub_caller, "stub caller not at preemption");
if (!stub_caller && UNLIKELY(seen_by_gc())) { // recurse_thaw_stub_frame already invoked our barriers with a full regmap
_cont.tail()->do_barriers<stackChunkOopDesc::BarrierType::Store>(_stream, SmallRegisterMap::instance);
}
if ((!is_bottom_frame && caller.is_interpreted_frame()) || (is_bottom_frame && Interpreter::contains(_cont.tail()->pc()))) {
_align_size += frame::align_wiggle; // we add one whether or not we've aligned because we add it in freeze_interpreted_frame
}
// new_stack_frame must construct the resulting frame using hf.pc() rather than hf.raw_pc() because the frame is not // yet laid out in the stack, and so the original_pc is not stored in it. // As a result, f.is_deoptimized_frame() is always false and we must test hf to know if the frame is deoptimized.
frame f = new_stack_frame<ContinuationHelper::CompiledFrame>(hf, caller, is_bottom_frame);
intptr_t* const stack_frame_top = f.sp();
intptr_t* const heap_frame_top = hf.unextended_sp();
intptr_t* from = heap_frame_top - frame::metadata_words_at_bottom;
intptr_t* to = stack_frame_top - frame::metadata_words_at_bottom; // copy metadata, except the metadata at the top of the (unextended) entry frame int sz = fsize + frame::metadata_words_at_bottom + (is_bottom_frame && added_argsize == 0 ? 0 : frame::metadata_words_at_top);
// If we're the bottom-most thawed frame, we're writing to within one word from entrySP // (we might have one padding word for alignment)
assert(!is_bottom_frame || (_cont.entrySP() - 1 <= to + sz && to + sz <= _cont.entrySP()), "");
assert(!is_bottom_frame || hf.compiled_frame_stack_argsize() != 0 || (to + sz && to + sz == _cont.entrySP()), "");
copy_from_chunk(from, to, sz); // copying good oops because we invoked barriers above
patch(f, caller, is_bottom_frame);
// f.is_deoptimized_frame() is always false and we must test hf.is_deoptimized_frame() (see comment above)
assert(!f.is_deoptimized_frame(), ""); if (hf.is_deoptimized_frame()) {
maybe_set_fastpath(f.sp());
} elseif (_thread->is_interp_only_mode()
|| (_cont.is_preempted() && f.cb()->as_compiled_method()->is_marked_for_deoptimization())) { // The caller of the safepoint stub when the continuation is preempted is not at a call instruction, and so // cannot rely on nmethod patching for deopt.
assert(_thread->is_interp_only_mode() || stub_caller, "expected a stub-caller");
f.deoptimize(nullptr); // the null thread simply avoids the assertion in deoptimize which we're not set up for
assert(f.is_deoptimized_frame(), "");
assert(ContinuationHelper::Frame::is_deopt_return(f.raw_pc(), f), "");
maybe_set_fastpath(f.sp());
}
if (!is_bottom_frame) { // can only fix caller once this frame is thawed (due to callee saved regs); this happens on the stack
_cont.tail()->fix_thawed_frame(caller, SmallRegisterMap::instance);
} elseif (_cont.tail()->has_bitmap() && added_argsize > 0) {
clear_bitmap_bits(heap_frame_top + ContinuationHelper::CompiledFrame::size(hf) + frame::metadata_words_at_top, added_argsize);
}
if (chunk->is_empty()) { // Only remove chunk from list if it can't be reused for another freeze if (seen_by_gc()) {
_cont.set_tail(chunk->parent());
} else {
chunk->set_has_mixed_frames(false);
}
chunk->set_max_thawing_size(0);
assert(chunk->argsize() == 0, "");
} else {
chunk->set_max_thawing_size(chunk->max_thawing_size() - _align_size);
}
assert(chunk->is_empty() == (chunk->max_thawing_size() == 0), "");
if (!is_aligned(f.sp(), frame::frame_alignment)) {
assert(f.is_interpreted_frame(), "");
f.set_sp(align_down(f.sp(), frame::frame_alignment));
}
push_return_frame(f);
chunk->fix_thawed_frame(f, SmallRegisterMap::instance); // can only fix caller after push_return_frame (due to callee saved regs)
assert(f.sp() - frame::metadata_words_at_bottom >= _top_stack_address, "overwrote past thawing space" " to: " INTPTR_FORMAT " top_address: " INTPTR_FORMAT, p2i(f.sp() - frame::metadata_words), p2i(_top_stack_address));
ContinuationHelper::Frame::patch_pc(f, f.raw_pc()); // in case we want to deopt the frame in a full transition, this is checked.
ContinuationHelper::push_pd(f);
// returns new top sp // called after preparations (stack overflow check and making room) template<typename ConfigT> staticinline intptr_t* thaw_internal(JavaThread* thread, const Continuation::thaw_kind kind) {
assert(thread == JavaThread::current(), "Must be current thread");
if (false) { for (frame f = thread->last_frame(); !f.is_entry_frame(); f = f.sender(&map)) {
f.print_on(&ls);
}
} else {
map.set_skip_missing(true);
ResetNoHandleMark rnhm;
ResourceMark rm;
HandleMark hm(Thread::current());
FrameValues values;
int i = 0; int post_entry = -1; for (frame f = thread->last_frame(); !f.is_entry_frame(); f = f.sender(&map)) {
f.describe(values, i++, &map); if (post_entry >= 0 || Continuation::is_continuation_enterSpecial(f))
post_entry++; if (post_entry >= show_entry_callers) break;
}
values.print_on(thread, &ls);
}
ls.print_cr("======= end frames =========");
} #endif// ASSERT
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.