diff --git a/make/RunTests.gmk b/make/RunTests.gmk
index 72dc41c2374..0b69c042f56 100644
--- a/make/RunTests.gmk
+++ b/make/RunTests.gmk
@@ -812,6 +812,11 @@ define SetupRunJtregTestBody
     $1_JTREG_BASIC_OPTIONS += $$(addprefix $$(JTREG_PROBLEM_LIST_PREFIX), $$($1_JTREG_PROBLEM_LIST))
   endif
 
+  # Add more Lilliput-specific ProblemLists when UCOH is enabled
+  ifneq ($$(findstring -XX:+UseCompactObjectHeaders, $$(TEST_OPTS)), )
+    JTREG_EXTRA_PROBLEM_LISTS += $(TOPDIR)/test/hotspot/jtreg/ProblemList-lilliput.txt
+  endif
+
   ifneq ($$(JTREG_EXTRA_PROBLEM_LISTS), )
     # Accept both absolute paths as well as relative to the current test root.
     $1_JTREG_BASIC_OPTIONS += $$(addprefix $$(JTREG_PROBLEM_LIST_PREFIX), $$(wildcard \
diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad
index 339f12a99b7..56e202efbbc 100644
--- a/src/hotspot/cpu/aarch64/aarch64.ad
+++ b/src/hotspot/cpu/aarch64/aarch64.ad
@@ -1986,7 +1986,9 @@ void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
     Label dummy_label;
     Label* code_stub = &dummy_label;
     if (!C->output()->in_scratch_emit_size()) {
-      code_stub = &C->output()->safepoint_poll_table()->add_safepoint(__ offset());
+      C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
+      C->output()->add_stub(stub);
+      code_stub = &stub->entry();
     }
     __ relocate(relocInfo::poll_return_type);
     __ safepoint_poll(*code_stub, true /* at_return */, false /* acquire */, true /* in_nmethod */);
@@ -3799,166 +3801,6 @@ encode %{
     __ br(target_reg);
   %}
 
-  enc_class aarch64_enc_fast_lock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
-    C2_MacroAssembler _masm(&cbuf);
-    Register oop = as_Register($object$$reg);
-    Register box = as_Register($box$$reg);
-    Register disp_hdr = as_Register($tmp$$reg);
-    Register tmp = as_Register($tmp2$$reg);
-    Label cont;
-    Label object_has_monitor;
-    Label cas_failed;
-
-    assert_different_registers(oop, box, tmp, disp_hdr);
-
-    // Load markWord from object into displaced_header.
-    __ ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
-
-    if (DiagnoseSyncOnValueBasedClasses != 0) {
-      __ load_klass(tmp, oop);
-      __ ldrw(tmp, Address(tmp, Klass::access_flags_offset()));
-      __ tstw(tmp, JVM_ACC_IS_VALUE_BASED_CLASS);
-      __ br(Assembler::NE, cont);
-    }
-
-    if (UseBiasedLocking && !UseOptoBiasInlining) {
-      __ biased_locking_enter(box, oop, disp_hdr, tmp, true, cont);
-    }
-
-    // Check for existing monitor
-    __ tbnz(disp_hdr, exact_log2(markWord::monitor_value), object_has_monitor);
-
-    // Set tmp to be (markWord of object | UNLOCK_VALUE).
-    __ orr(tmp, disp_hdr, markWord::unlocked_value);
-
-    // Initialize the box. (Must happen before we update the object mark!)
-    __ str(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
-
-    // Compare object markWord with an unlocked value (tmp) and if
-    // equal exchange the stack address of our box with object markWord.
-    // On failure disp_hdr contains the possibly locked markWord.
-    __ cmpxchg(oop, tmp, box, Assembler::xword, /*acquire*/ true,
-               /*release*/ true, /*weak*/ false, disp_hdr);
-    __ br(Assembler::EQ, cont);
-
-    assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
-
-    // If the compare-and-exchange succeeded, then we found an unlocked
-    // object, will have now locked it will continue at label cont
-
-    __ bind(cas_failed);
-    // We did not see an unlocked object so try the fast recursive case.
-
-    // Check if the owner is self by comparing the value in the
-    // markWord of object (disp_hdr) with the stack pointer.
-    __ mov(rscratch1, sp);
-    __ sub(disp_hdr, disp_hdr, rscratch1);
-    __ mov(tmp, (address) (~(os::vm_page_size()-1) | markWord::lock_mask_in_place));
-    // If condition is true we are cont and hence we can store 0 as the
-    // displaced header in the box, which indicates that it is a recursive lock.
-    __ ands(tmp/*==0?*/, disp_hdr, tmp);   // Sets flags for result
-    __ str(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
-
-    __ b(cont);
-
-    // Handle existing monitor.
-    __ bind(object_has_monitor);
-
-    // The object's monitor m is unlocked iff m->owner == NULL,
-    // otherwise m->owner may contain a thread or a stack address.
-    //
-    // Try to CAS m->owner from NULL to current thread.
-    __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes()-markWord::monitor_value));
-    __ cmpxchg(tmp, zr, rthread, Assembler::xword, /*acquire*/ true,
-               /*release*/ true, /*weak*/ false, rscratch1); // Sets flags for result
-
-    // Store a non-null value into the box to avoid looking like a re-entrant
-    // lock. The fast-path monitor unlock code checks for
-    // markWord::monitor_value so use markWord::unused_mark which has the
-    // relevant bit set, and also matches ObjectSynchronizer::enter.
-    __ mov(tmp, (address)markWord::unused_mark().value());
-    __ str(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
-
-    __ br(Assembler::EQ, cont); // CAS success means locking succeeded
-
-    __ cmp(rscratch1, rthread);
-    __ br(Assembler::NE, cont); // Check for recursive locking
-
-    // Recursive lock case
-    __ increment(Address(disp_hdr, ObjectMonitor::recursions_offset_in_bytes() - markWord::monitor_value), 1);
-    // flag == EQ still from the cmp above, checking if this is a reentrant lock
-
-    __ bind(cont);
-    // flag == EQ indicates success
-    // flag == NE indicates failure
-  %}
-
-  enc_class aarch64_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
-    C2_MacroAssembler _masm(&cbuf);
-    Register oop = as_Register($object$$reg);
-    Register box = as_Register($box$$reg);
-    Register disp_hdr = as_Register($tmp$$reg);
-    Register tmp = as_Register($tmp2$$reg);
-    Label cont;
-    Label object_has_monitor;
-
-    assert_different_registers(oop, box, tmp, disp_hdr);
-
-    if (UseBiasedLocking && !UseOptoBiasInlining) {
-      __ biased_locking_exit(oop, tmp, cont);
-    }
-
-    // Find the lock address and load the displaced header from the stack.
-    __ ldr(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
-
-    // If the displaced header is 0, we have a recursive unlock.
-    __ cmp(disp_hdr, zr);
-    __ br(Assembler::EQ, cont);
-
-    // Handle existing monitor.
-    __ ldr(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
-    __ tbnz(tmp, exact_log2(markWord::monitor_value), object_has_monitor);
-
-    // Check if it is still a light weight lock, this is is true if we
-    // see the stack address of the basicLock in the markWord of the
-    // object.
-
-    __ cmpxchg(oop, box, disp_hdr, Assembler::xword, /*acquire*/ false,
-               /*release*/ true, /*weak*/ false, tmp);
-    __ b(cont);
-
-    assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
-
-    // Handle existing monitor.
-    __ bind(object_has_monitor);
-    STATIC_ASSERT(markWord::monitor_value <= INT_MAX);
-    __ add(tmp, tmp, -(int)markWord::monitor_value); // monitor
-    __ ldr(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
-
-    Label notRecursive;
-    __ cbz(disp_hdr, notRecursive);
-
-    // Recursive lock
-    __ sub(disp_hdr, disp_hdr, 1u);
-    __ str(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
-    __ cmp(disp_hdr, disp_hdr); // Sets flags for result
-    __ b(cont);
-
-    __ bind(notRecursive);
-    __ ldr(rscratch1, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
-    __ ldr(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
-    __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if both are 0.
-    __ cmp(rscratch1, zr); // Sets flags for result
-    __ cbnz(rscratch1, cont);
-    // need a release store here
-    __ lea(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
-    __ stlr(zr, tmp); // set unowned
-
-    __ bind(cont);
-    // flag == EQ indicates success
-    // flag == NE indicates failure
-  %}
-
 %}
 
 //----------FRAME--------------------------------------------------------------
@@ -7439,7 +7281,7 @@ instruct loadKlass(iRegPNoSp dst, memory8 mem)
 instruct loadNKlass(iRegNNoSp dst, memory4 mem)
 %{
   match(Set dst (LoadNKlass mem));
-  predicate(!needs_acquiring_load(n));
+  predicate(!needs_acquiring_load(n) && !UseCompactObjectHeaders);
 
   ins_cost(4 * INSN_COST);
   format %{ "ldrw  $dst, $mem\t# compressed class ptr" %}
@@ -7449,6 +7291,20 @@ instruct loadNKlass(iRegNNoSp dst, memory4 mem)
   ins_pipe(iload_reg_mem);
 %}
 
+instruct loadNKlassCompactHeaders(iRegNNoSp dst, memory4 mem, rFlagsReg cr)
+%{
+  match(Set dst (LoadNKlass mem));
+  effect(KILL cr);
+  predicate(!needs_acquiring_load(n) && UseCompactObjectHeaders);
+
+  ins_cost(4 * INSN_COST);
+  format %{ "ldrw  $dst, $mem\t# compressed class ptr" %}
+  ins_encode %{
+    __ load_nklass_compact($dst$$Register, $mem$$base$$Register, $mem$$index$$Register, $mem$$scale, $mem$$disp);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
 // Load Float
 instruct loadF(vRegF dst, memory4 mem)
 %{
@@ -16409,34 +16265,69 @@ instruct branchLoopEndU(cmpOpU cmp, rFlagsRegU cr, label lbl)
 // ============================================================================
 // inlined locking and unlocking
 
-instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
+instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2, iRegPNoSp tmp3)
 %{
+  predicate(LockingMode != LM_LIGHTWEIGHT);
   match(Set cr (FastLock object box));
-  effect(TEMP tmp, TEMP tmp2);
+  effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
 
-  // TODO
-  // identify correct cost
   ins_cost(5 * INSN_COST);
-  format %{ "fastlock $object,$box\t! kills $tmp,$tmp2" %}
+  format %{ "fastlock $object,$box\t! kills $tmp,$tmp2,$tmp3" %}
 
-  ins_encode(aarch64_enc_fast_lock(object, box, tmp, tmp2));
+  ins_encode %{
+    __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, $tmp2$$Register, $tmp3$$Register);
+  %}
 
   ins_pipe(pipe_serial);
 %}
 
 instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
 %{
+  predicate(LockingMode != LM_LIGHTWEIGHT);
   match(Set cr (FastUnlock object box));
   effect(TEMP tmp, TEMP tmp2);
 
   ins_cost(5 * INSN_COST);
   format %{ "fastunlock $object,$box\t! kills $tmp, $tmp2" %}
 
-  ins_encode(aarch64_enc_fast_unlock(object, box, tmp, tmp2));
+  ins_encode %{
+    __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, $tmp2$$Register);
+  %}
 
   ins_pipe(pipe_serial);
 %}
 
+instruct cmpFastLockLightweight(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
+%{
+  predicate(LockingMode == LM_LIGHTWEIGHT);
+  match(Set cr (FastLock object box));
+  effect(TEMP tmp, TEMP tmp2);
+
+  ins_cost(5 * INSN_COST);
+  format %{ "fastlock $object,$box\t! kills $tmp,$tmp2" %}
+
+  ins_encode %{
+    __ fast_lock_lightweight($object$$Register, $box$$Register, $tmp$$Register, $tmp2$$Register);
+  %}
+
+  ins_pipe(pipe_serial);
+%}
+
+instruct cmpFastUnlockLightweight(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
+%{
+  predicate(LockingMode == LM_LIGHTWEIGHT);
+  match(Set cr (FastUnlock object box));
+  effect(TEMP tmp, TEMP tmp2);
+
+  ins_cost(5 * INSN_COST);
+  format %{ "fastunlock $object,$box\t! kills $tmp, $tmp2" %}
+
+  ins_encode %{
+    __ fast_unlock_lightweight($object$$Register, $box$$Register, $tmp$$Register, $tmp2$$Register);
+  %}
+
+  ins_pipe(pipe_serial);
+%}
 
 // ============================================================================
 // Safepoint Instructions
diff --git a/src/hotspot/cpu/aarch64/c1_CodeStubs_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_CodeStubs_aarch64.cpp
index 63f03db2dc5..81640ac0a22 100644
--- a/src/hotspot/cpu/aarch64/c1_CodeStubs_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/c1_CodeStubs_aarch64.cpp
@@ -32,6 +32,7 @@
 #include "c1/c1_Runtime1.hpp"
 #include "classfile/javaClasses.hpp"
 #include "nativeInst_aarch64.hpp"
+#include "runtime/objectMonitor.hpp"
 #include "runtime/sharedRuntime.hpp"
 #include "vmreg_aarch64.inline.hpp"
 
@@ -253,6 +254,13 @@ void MonitorExitStub::emit_code(LIR_Assembler* ce) {
   __ far_jump(RuntimeAddress(Runtime1::entry_for(exit_id)));
 }
 
+void LoadKlassStub::emit_code(LIR_Assembler* ce) {
+  assert(UseCompactObjectHeaders, "Only use with compact object headers");
+  __ bind(_entry);
+  Register d = _result->as_register();
+  __ ldr(d, Address(d, OM_OFFSET_NO_MONITOR_VALUE_TAG(header)));
+  __ b(_continuation);
+}
 
 // Implementation of patching:
 // - Copy the code at given offset to an inlined buffer (first the bytes, then the number of bytes)
diff --git a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp
index 5ce3ecf9e7a..28c85bb860f 100644
--- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp
@@ -437,7 +437,7 @@ int LIR_Assembler::emit_unwind_handler() {
   if (method()->is_synchronized()) {
     monitor_address(0, FrameMap::r0_opr);
     stub = new MonitorExitStub(FrameMap::r0_opr, true, 0);
-    __ unlock_object(r5, r4, r0, *stub->entry());
+    __ unlock_object(r5, r4, r0, r6, *stub->entry());
     __ bind(*stub->continuation());
   }
 
@@ -984,14 +984,7 @@ void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_Patch
       __ ldr(dest->as_register(), as_Address(from_addr));
       break;
     case T_ADDRESS:
-      // FIXME: OMG this is a horrible kludge.  Any offset from an
-      // address that matches klass_offset_in_bytes() will be loaded
-      // as a word, not a long.
-      if (UseCompressedClassPointers && addr->disp() == oopDesc::klass_offset_in_bytes()) {
-        __ ldrw(dest->as_register(), as_Address(from_addr));
-      } else {
-        __ ldr(dest->as_register(), as_Address(from_addr));
-      }
+      __ ldr(dest->as_register(), as_Address(from_addr));
       break;
     case T_INT:
       __ ldrw(dest->as_register(), as_Address(from_addr));
@@ -1030,10 +1023,6 @@ void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_Patch
       // Load barrier has not yet been applied, so ZGC can't verify the oop here
       __ verify_oop(dest->as_register());
     }
-  } else if (type == T_ADDRESS && addr->disp() == oopDesc::klass_offset_in_bytes()) {
-    if (UseCompressedClassPointers) {
-      __ decode_klass_not_null(dest->as_register());
-    }
   }
 }
 
@@ -1249,7 +1238,7 @@ void LIR_Assembler::emit_alloc_array(LIR_OpAllocArray* op) {
                       len,
                       tmp1,
                       tmp2,
-                      arrayOopDesc::header_size(op->type()),
+                      arrayOopDesc::base_offset_in_bytes(op->type()),
                       array_element_size(op->type()),
                       op->klass()->as_register(),
                       *op->stub()->entry());
@@ -2366,12 +2355,12 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) {
     if (basic_type != T_OBJECT) {
       // Simple test for basic type arrays
       if (UseCompressedClassPointers) {
-        __ ldrw(tmp, src_klass_addr);
-        __ ldrw(rscratch1, dst_klass_addr);
+        __ load_nklass(tmp, src);
+        __ load_nklass(rscratch1, dst);
         __ cmpw(tmp, rscratch1);
       } else {
-        __ ldr(tmp, src_klass_addr);
-        __ ldr(rscratch1, dst_klass_addr);
+        __ ldr(tmp, Address(src, oopDesc::klass_offset_in_bytes()));
+        __ ldr(rscratch1, Address(dst, oopDesc::klass_offset_in_bytes()));
         __ cmp(tmp, rscratch1);
       }
       __ br(Assembler::NE, *stub->entry());
@@ -2495,36 +2484,14 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) {
     // but not necessarily exactly of type default_type.
     Label known_ok, halt;
     __ mov_metadata(tmp, default_type->constant_encoding());
-    if (UseCompressedClassPointers) {
-      __ encode_klass_not_null(tmp);
-    }
 
     if (basic_type != T_OBJECT) {
-
-      if (UseCompressedClassPointers) {
-        __ ldrw(rscratch1, dst_klass_addr);
-        __ cmpw(tmp, rscratch1);
-      } else {
-        __ ldr(rscratch1, dst_klass_addr);
-        __ cmp(tmp, rscratch1);
-      }
+      __ cmp_klass(dst, tmp, rscratch1);
       __ br(Assembler::NE, halt);
-      if (UseCompressedClassPointers) {
-        __ ldrw(rscratch1, src_klass_addr);
-        __ cmpw(tmp, rscratch1);
-      } else {
-        __ ldr(rscratch1, src_klass_addr);
-        __ cmp(tmp, rscratch1);
-      }
+      __ cmp_klass(src, tmp, rscratch1);
       __ br(Assembler::EQ, known_ok);
     } else {
-      if (UseCompressedClassPointers) {
-        __ ldrw(rscratch1, dst_klass_addr);
-        __ cmpw(tmp, rscratch1);
-      } else {
-        __ ldr(rscratch1, dst_klass_addr);
-        __ cmp(tmp, rscratch1);
-      }
+      __ cmp_klass(dst, tmp, rscratch1);
       __ br(Assembler::EQ, known_ok);
       __ cmp(src, dst);
       __ br(Assembler::EQ, known_ok);
@@ -2572,29 +2539,57 @@ void LIR_Assembler::emit_lock(LIR_OpLock* op) {
   Register obj = op->obj_opr()->as_register();  // may not be an oop
   Register hdr = op->hdr_opr()->as_register();
   Register lock = op->lock_opr()->as_register();
-  if (!UseFastLocking) {
+  Register temp = op->scratch_opr()->as_register();
+  if (LockingMode == LM_MONITOR) {
+    if (op->info() != NULL) {
+      add_debug_info_for_null_check_here(op->info());
+      __ null_check(obj, -1);
+    }
     __ b(*op->stub()->entry());
   } else if (op->code() == lir_lock) {
-    Register scratch = noreg;
-    if (UseBiasedLocking) {
-      scratch = op->scratch_opr()->as_register();
-    }
     assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header");
     // add debug info for NullPointerException only if one is possible
-    int null_check_offset = __ lock_object(hdr, obj, lock, scratch, *op->stub()->entry());
+    int null_check_offset = __ lock_object(hdr, obj, lock, temp, *op->stub()->entry());
     if (op->info() != NULL) {
       add_debug_info_for_null_check(null_check_offset, op->info());
     }
     // done
   } else if (op->code() == lir_unlock) {
     assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header");
-    __ unlock_object(hdr, obj, lock, *op->stub()->entry());
+    __ unlock_object(hdr, obj, lock, temp, *op->stub()->entry());
   } else {
     Unimplemented();
   }
   __ bind(*op->stub()->continuation());
 }
 
+void LIR_Assembler::emit_load_klass(LIR_OpLoadKlass* op) {
+  Register obj = op->obj()->as_pointer_register();
+  Register result = op->result_opr()->as_pointer_register();
+
+  CodeEmitInfo* info = op->info();
+  if (info != NULL) {
+    add_debug_info_for_null_check_here(info);
+  }
+
+  if (UseCompressedClassPointers) {
+    if (UseCompactObjectHeaders) {
+      // Check if we can take the (common) fast path, if obj is unlocked.
+      __ ldr(result, Address(obj, oopDesc::mark_offset_in_bytes()));
+      __ tst(result, markWord::monitor_value);
+      __ br(Assembler::NE, *op->stub()->entry());
+      __ bind(*op->stub()->continuation());
+
+      // Shift to get proper narrow Klass*.
+      __ lsr(result, result, markWord::klass_shift);
+    } else {
+      __ ldrw(result, Address (obj, oopDesc::klass_offset_in_bytes()));
+    }
+    __ decode_klass_not_null(result);
+  } else {
+    __ ldr(result, Address (obj, oopDesc::klass_offset_in_bytes()));
+  }
+}
 
 void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) {
   ciMethod* method = op->profiled_method();
diff --git a/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp
index d2cbdbdba26..9769f7e3562 100644
--- a/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp
@@ -314,11 +314,7 @@ void LIRGenerator::do_MonitorEnter(MonitorEnter* x) {
 
   // "lock" stores the address of the monitor stack slot, so this is not an oop
   LIR_Opr lock = new_register(T_INT);
-  // Need a scratch register for biased locking
-  LIR_Opr scratch = LIR_OprFact::illegalOpr;
-  if (UseBiasedLocking) {
-    scratch = new_register(T_INT);
-  }
+  LIR_Opr scratch = new_register(T_INT);
 
   CodeEmitInfo* info_for_exception = NULL;
   if (x->needs_null_check()) {
@@ -340,8 +336,9 @@ void LIRGenerator::do_MonitorExit(MonitorExit* x) {
 
   LIR_Opr lock = new_register(T_INT);
   LIR_Opr obj_temp = new_register(T_INT);
+  LIR_Opr scratch = new_register(T_INT);
   set_no_result(x);
-  monitor_exit(obj_temp, lock, syncTempOpr(), LIR_OprFact::illegalOpr, x->monitor_no());
+  monitor_exit(obj_temp, lock, syncTempOpr(), scratch, x->monitor_no());
 }
 
 
diff --git a/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp
index 3b17370304b..c7718e3ebfd 100644
--- a/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2024, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2014, 2021, Red Hat Inc. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -61,10 +61,10 @@ void C1_MacroAssembler::float_cmp(bool is_float, int unordered_result,
   }
 }
 
-int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Register scratch, Label& slow_case) {
+int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Register temp, Label& slow_case) {
   const int aligned_mask = BytesPerWord -1;
   const int hdr_offset = oopDesc::mark_offset_in_bytes();
-  assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different");
+  assert_different_registers(hdr, obj, disp_hdr, temp, rscratch2);
   Label done;
   int null_check_offset = -1;
 
@@ -83,45 +83,49 @@ int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr
   }
 
   if (UseBiasedLocking) {
-    assert(scratch != noreg, "should have scratch register at this point");
-    biased_locking_enter(disp_hdr, obj, hdr, scratch, false, done, &slow_case);
+    assert(temp != noreg, "should have temp register at this point");
+    biased_locking_enter(disp_hdr, obj, hdr, temp, false, done, &slow_case);
   }
 
-  // Load object header
-  ldr(hdr, Address(obj, hdr_offset));
-  // and mark it as unlocked
-  orr(hdr, hdr, markWord::unlocked_value);
-  // save unlocked object header into the displaced header location on the stack
-  str(hdr, Address(disp_hdr, 0));
-  // test if object header is still the same (i.e. unlocked), and if so, store the
-  // displaced header address in the object header - if it is not the same, get the
-  // object header instead
-  lea(rscratch2, Address(obj, hdr_offset));
-  cmpxchgptr(hdr, disp_hdr, rscratch2, rscratch1, done, /*fallthough*/NULL);
-  // if the object header was the same, we're done
-  // if the object header was not the same, it is now in the hdr register
-  // => test if it is a stack pointer into the same stack (recursive locking), i.e.:
-  //
-  // 1) (hdr & aligned_mask) == 0
-  // 2) sp <= hdr
-  // 3) hdr <= sp + page_size
-  //
-  // these 3 tests can be done by evaluating the following expression:
-  //
-  // (hdr - sp) & (aligned_mask - page_size)
-  //
-  // assuming both the stack pointer and page_size have their least
-  // significant 2 bits cleared and page_size is a power of 2
-  mov(rscratch1, sp);
-  sub(hdr, hdr, rscratch1);
-  ands(hdr, hdr, aligned_mask - os::vm_page_size());
-  // for recursive locking, the result is zero => save it in the displaced header
-  // location (NULL in the displaced hdr location indicates recursive locking)
-  str(hdr, Address(disp_hdr, 0));
-  // otherwise we don't care about the result and handle locking via runtime call
-  cbnz(hdr, slow_case);
-  // done
-  bind(done);
+  if (LockingMode == LM_LIGHTWEIGHT) {
+    lightweight_lock(obj, hdr, temp, rscratch2, slow_case);
+  } else {
+    // Load object header
+    ldr(hdr, Address(obj, hdr_offset));
+    // and mark it as unlocked
+    orr(hdr, hdr, markWord::unlocked_value);
+    // save unlocked object header into the displaced header location on the stack
+    str(hdr, Address(disp_hdr, 0));
+    // test if object header is still the same (i.e. unlocked), and if so, store the
+    // displaced header address in the object header - if it is not the same, get the
+    // object header instead
+    lea(rscratch2, Address(obj, hdr_offset));
+    cmpxchgptr(hdr, disp_hdr, rscratch2, rscratch1, done, /*fallthough*/NULL);
+    // if the object header was the same, we're done
+    // if the object header was not the same, it is now in the hdr register
+    // => test if it is a stack pointer into the same stack (recursive locking), i.e.:
+    //
+    // 1) (hdr & aligned_mask) == 0
+    // 2) sp <= hdr
+    // 3) hdr <= sp + page_size
+    //
+    // these 3 tests can be done by evaluating the following expression:
+    //
+    // (hdr - sp) & (aligned_mask - page_size)
+    //
+    // assuming both the stack pointer and page_size have their least
+    // significant 2 bits cleared and page_size is a power of 2
+    mov(rscratch1, sp);
+    sub(hdr, hdr, rscratch1);
+    ands(hdr, hdr, aligned_mask - os::vm_page_size());
+    // for recursive locking, the result is zero => save it in the displaced header
+    // location (NULL in the displaced hdr location indicates recursive locking)
+    str(hdr, Address(disp_hdr, 0));
+    // otherwise we don't care about the result and handle locking via runtime call
+    cbnz(hdr, slow_case);
+    // done
+    bind(done);
+  }
   if (PrintBiasedLockingStatistics) {
     lea(rscratch2, ExternalAddress((address)BiasedLocking::fast_path_entry_count_addr()));
     addmw(Address(rscratch2, 0), 1, rscratch1);
@@ -130,10 +134,10 @@ int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr
 }
 
 
-void C1_MacroAssembler::unlock_object(Register hdr, Register obj, Register disp_hdr, Label& slow_case) {
+void C1_MacroAssembler::unlock_object(Register hdr, Register obj, Register disp_hdr, Register temp, Label& slow_case) {
   const int aligned_mask = BytesPerWord -1;
   const int hdr_offset = oopDesc::mark_offset_in_bytes();
-  assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different");
+  assert_different_registers(hdr, obj, disp_hdr, temp, rscratch2);
   Label done;
 
   if (UseBiasedLocking) {
@@ -142,29 +146,37 @@ void C1_MacroAssembler::unlock_object(Register hdr, Register obj, Register disp_
     biased_locking_exit(obj, hdr, done);
   }
 
-  // load displaced header
-  ldr(hdr, Address(disp_hdr, 0));
-  // if the loaded hdr is NULL we had recursive locking
-  // if we had recursive locking, we are done
-  cbz(hdr, done);
+  if (LockingMode != LM_LIGHTWEIGHT) {
+    // load displaced header
+    ldr(hdr, Address(disp_hdr, 0));
+    // if the loaded hdr is null we had recursive locking
+    // if we had recursive locking, we are done
+    cbz(hdr, done);
+  }
+
   if (!UseBiasedLocking) {
     // load object
     ldr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
   }
   verify_oop(obj);
-  // test if object header is pointing to the displaced header, and if so, restore
-  // the displaced header in the object - if the object header is not pointing to
-  // the displaced header, get the object header instead
-  // if the object header was not pointing to the displaced header,
-  // we do unlocking via runtime call
-  if (hdr_offset) {
-    lea(rscratch1, Address(obj, hdr_offset));
-    cmpxchgptr(disp_hdr, hdr, rscratch1, rscratch2, done, &slow_case);
+
+  if (LockingMode == LM_LIGHTWEIGHT) {
+    lightweight_unlock(obj, hdr, temp, rscratch2, slow_case);
   } else {
-    cmpxchgptr(disp_hdr, hdr, obj, rscratch2, done, &slow_case);
+    // test if object header is pointing to the displaced header, and if so, restore
+    // the displaced header in the object - if the object header is not pointing to
+    // the displaced header, get the object header instead
+    // if the object header was not pointing to the displaced header,
+    // we do unlocking via runtime call
+    if (hdr_offset) {
+      lea(rscratch1, Address(obj, hdr_offset));
+      cmpxchgptr(disp_hdr, hdr, rscratch1, rscratch2, done, &slow_case);
+    } else {
+      cmpxchgptr(disp_hdr, hdr, obj, rscratch2, done, &slow_case);
+    }
+    // done
+    bind(done);
   }
-  // done
-  bind(done);
 }
 
 
@@ -179,7 +191,7 @@ void C1_MacroAssembler::try_allocate(Register obj, Register var_size_in_bytes, i
 
 void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len, Register t1, Register t2) {
   assert_different_registers(obj, klass, len);
-  if (UseBiasedLocking && !len->is_valid()) {
+  if (UseCompactObjectHeaders || (UseBiasedLocking && !len->is_valid())) {
     assert_different_registers(obj, klass, len, t1, t2);
     ldr(t1, Address(klass, Klass::prototype_header_offset()));
   } else {
@@ -188,16 +200,18 @@ void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register
   }
   str(t1, Address(obj, oopDesc::mark_offset_in_bytes()));
 
-  if (UseCompressedClassPointers) { // Take care not to kill klass
-    encode_klass_not_null(t1, klass);
-    strw(t1, Address(obj, oopDesc::klass_offset_in_bytes()));
-  } else {
-    str(klass, Address(obj, oopDesc::klass_offset_in_bytes()));
+  if (!UseCompactObjectHeaders) {
+    if (UseCompressedClassPointers) { // Take care not to kill klass
+      encode_klass_not_null(t1, klass);
+      strw(t1, Address(obj, oopDesc::klass_offset_in_bytes()));
+    } else {
+      str(klass, Address(obj, oopDesc::klass_offset_in_bytes()));
+    }
   }
 
   if (len->is_valid()) {
     strw(len, Address(obj, arrayOopDesc::length_offset_in_bytes()));
-  } else if (UseCompressedClassPointers) {
+  } else if (UseCompressedClassPointers && !UseCompactObjectHeaders) {
     store_klass_gap(obj, zr);
   }
 }
@@ -216,6 +230,12 @@ void C1_MacroAssembler::initialize_body(Register obj, Register len_in_bytes, int
   subs(len_in_bytes, len_in_bytes, hdr_size_in_bytes);
   br(Assembler::EQ, done);
 
+  // Zero first 4 bytes, if start offset is not word aligned.
+  if (!is_aligned(hdr_size_in_bytes, BytesPerWord)) {
+    strw(zr, Address(obj, hdr_size_in_bytes));
+    hdr_size_in_bytes += BytesPerInt;
+  }
+
   // zero_words() takes ptr in r10 and count in words in r11
   mov(rscratch1, len_in_bytes);
   lea(t1, Address(obj, hdr_size_in_bytes));
@@ -265,7 +285,7 @@ void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register
 
   verify_oop(obj);
 }
-void C1_MacroAssembler::allocate_array(Register obj, Register len, Register t1, Register t2, int header_size, int f, Register klass, Label& slow_case) {
+void C1_MacroAssembler::allocate_array(Register obj, Register len, Register t1, Register t2, int base_offset_in_bytes, int f, Register klass, Label& slow_case) {
   assert_different_registers(obj, len, t1, t2, klass);
 
   // determine alignment mask
@@ -278,7 +298,7 @@ void C1_MacroAssembler::allocate_array(Register obj, Register len, Register t1,
 
   const Register arr_size = t2; // okay to be the same
   // align object end
-  mov(arr_size, (int32_t)header_size * BytesPerWord + MinObjAlignmentInBytesMask);
+  mov(arr_size, (int32_t)base_offset_in_bytes + MinObjAlignmentInBytesMask);
   add(arr_size, arr_size, len, ext::uxtw, f);
   andr(arr_size, arr_size, ~MinObjAlignmentInBytesMask);
 
@@ -287,7 +307,7 @@ void C1_MacroAssembler::allocate_array(Register obj, Register len, Register t1,
   initialize_header(obj, klass, len, t1, t2);
 
   // clear rest of allocated space
-  initialize_body(obj, arr_size, header_size * BytesPerWord, t1, t2);
+  initialize_body(obj, arr_size, base_offset_in_bytes, t1, t2);
 
   membar(StoreStore);
 
@@ -304,8 +324,11 @@ void C1_MacroAssembler::inline_cache_check(Register receiver, Register iCache) {
   verify_oop(receiver);
   // explicit NULL check not needed since load from [klass_offset] causes a trap
   // check against inline cache
-  assert(!MacroAssembler::needs_explicit_null_check(oopDesc::klass_offset_in_bytes()), "must add explicit null check");
-
+  if (UseCompactObjectHeaders) {
+    assert(!MacroAssembler::needs_explicit_null_check(oopDesc::mark_offset_in_bytes()), "must add explicit null check");
+  } else {
+    assert(!MacroAssembler::needs_explicit_null_check(oopDesc::klass_offset_in_bytes()), "must add explicit null check");
+  }
   cmp_klass(receiver, iCache, rscratch1);
 }
 
diff --git a/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.hpp
index e10968240db..20bfc827741 100644
--- a/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.hpp
@@ -58,15 +58,16 @@ using MacroAssembler::null_check;
   // hdr     : must be r0, contents destroyed
   // obj     : must point to the object to lock, contents preserved
   // disp_hdr: must point to the displaced header location, contents preserved
-  // scratch : scratch register, contents destroyed
+  // temp    : temporary register, must not be rscratch1 or rscratch2
   // returns code offset at which to add null check debug information
-  int lock_object  (Register swap, Register obj, Register disp_hdr, Register scratch, Label& slow_case);
+  int lock_object  (Register swap, Register obj, Register disp_hdr, Register temp, Label& slow_case);
 
   // unlocking
   // hdr     : contents destroyed
   // obj     : must point to the object to lock, contents preserved
   // disp_hdr: must be r0 & must point to the displaced header location, contents destroyed
-  void unlock_object(Register swap, Register obj, Register lock, Label& slow_case);
+  // temp    : temporary register, must not be rscratch1 or rscratch2
+  void unlock_object(Register swap, Register obj, Register lock, Register temp, Label& slow_case);
 
   void initialize_object(
     Register obj,                      // result: pointer to object after successful allocation
diff --git a/src/hotspot/cpu/aarch64/c2_CodeStubs_aarch64.cpp b/src/hotspot/cpu/aarch64/c2_CodeStubs_aarch64.cpp
new file mode 100644
index 00000000000..a595d951fb6
--- /dev/null
+++ b/src/hotspot/cpu/aarch64/c2_CodeStubs_aarch64.cpp
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2020, 2022 Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "opto/c2_MacroAssembler.hpp"
+#include "opto/c2_CodeStubs.hpp"
+#include "runtime/objectMonitor.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+
+#define __ masm.
+
+int C2SafepointPollStub::max_size() const {
+  return 20;
+}
+
+void C2SafepointPollStub::emit(C2_MacroAssembler& masm) {
+  assert(SharedRuntime::polling_page_return_handler_blob() != NULL,
+         "polling page return stub not created yet");
+  address stub = SharedRuntime::polling_page_return_handler_blob()->entry_point();
+
+  RuntimeAddress callback_addr(stub);
+
+  __ bind(entry());
+  InternalAddress safepoint_pc(masm.pc() - masm.offset() + _safepoint_offset);
+  __ adr(rscratch1, safepoint_pc);
+  __ str(rscratch1, Address(rthread, JavaThread::saved_exception_pc_offset()));
+  __ far_jump(callback_addr);
+}
+
+int C2HandleAnonOMOwnerStub::max_size() const {
+  // Max size of stub has been determined by testing with 0, in which case
+  // C2CodeStubList::emit() will throw an assertion and report the actual size that
+  // is needed.
+  return 24;
+}
+
+void C2HandleAnonOMOwnerStub::emit(C2_MacroAssembler& masm) {
+  __ bind(entry());
+  Register mon = monitor();
+  Register t = tmp();
+  assert(t != noreg, "need tmp register");
+
+  // Fix owner to be the current thread.
+  __ str(rthread, Address(mon, ObjectMonitor::owner_offset_in_bytes()));
+
+  // Pop owner object from lock-stack.
+  __ ldrw(t, Address(rthread, JavaThread::lock_stack_top_offset()));
+  __ subw(t, t, oopSize);
+#ifdef ASSERT
+  __ str(zr, Address(rthread, t));
+#endif
+  __ strw(t, Address(rthread, JavaThread::lock_stack_top_offset()));
+
+  __ b(continuation());
+}
+
+int C2LoadNKlassStub::max_size() const {
+  return 8;
+}
+
+void C2LoadNKlassStub::emit(C2_MacroAssembler& masm) {
+  __ bind(entry());
+  Register d = dst();
+  __ ldr(d, Address(d, OM_OFFSET_NO_MONITOR_VALUE_TAG(header)));
+  __ b(continuation());
+}
+
+
+#undef __
diff --git a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp
index 9da93c99680..17d67778aad 100644
--- a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp
@@ -25,10 +25,14 @@
 #include "precompiled.hpp"
 #include "asm/assembler.hpp"
 #include "asm/assembler.inline.hpp"
+#include "opto/c2_CodeStubs.hpp"
 #include "opto/c2_MacroAssembler.hpp"
+#include "opto/compile.hpp"
+#include "opto/output.hpp"
 #include "opto/intrinsicnode.hpp"
 #include "opto/subnode.hpp"
 #include "runtime/stubRoutines.hpp"
+#include "utilities/globalDefinitions.hpp"
 
 #ifdef PRODUCT
 #define BLOCK_COMMENT(str) /* nothing */
@@ -42,6 +46,434 @@
 
 typedef void (MacroAssembler::* chr_insn)(Register Rt, const Address &adr);
 
+void C2_MacroAssembler::fast_lock(Register objectReg, Register boxReg, Register tmpReg,
+                                  Register tmp2Reg, Register tmp3Reg) {
+  Register oop = objectReg;
+  Register box = boxReg;
+  Register disp_hdr = tmpReg;
+  Register tmp = tmp2Reg;
+  Label cont;
+  Label object_has_monitor;
+  Label cas_failed;
+
+  assert(LockingMode != LM_LIGHTWEIGHT, "lightweight locking should use fast_lock_lightweight");
+  assert_different_registers(oop, box, tmp, disp_hdr);
+
+  // Load markWord from object into displaced_header.
+  ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
+
+  if (DiagnoseSyncOnValueBasedClasses != 0) {
+    load_klass(tmp, oop);
+    ldrw(tmp, Address(tmp, Klass::access_flags_offset()));
+    tstw(tmp, JVM_ACC_IS_VALUE_BASED_CLASS);
+    br(Assembler::NE, cont);
+  }
+
+  if (UseBiasedLocking && !UseOptoBiasInlining) {
+    biased_locking_enter(box, oop, disp_hdr, tmp, true, cont);
+  }
+
+  // Check for existing monitor
+  tbnz(disp_hdr, exact_log2(markWord::monitor_value), object_has_monitor);
+
+  if (LockingMode == LM_MONITOR) {
+    tst(oop, oop); // Set NE to indicate 'failure' -> take slow-path. We know that oop != 0.
+    b(cont);
+  } else {
+    assert(LockingMode == LM_LEGACY, "must be");
+    // Set tmp to be (markWord of object | UNLOCK_VALUE).
+    orr(tmp, disp_hdr, markWord::unlocked_value);
+
+    // Initialize the box. (Must happen before we update the object mark!)
+    str(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
+
+    // Compare object markWord with an unlocked value (tmp) and if
+    // equal exchange the stack address of our box with object markWord.
+    // On failure disp_hdr contains the possibly locked markWord.
+    cmpxchg(oop, tmp, box, Assembler::xword, /*acquire*/ true,
+            /*release*/ true, /*weak*/ false, disp_hdr);
+    br(Assembler::EQ, cont);
+
+    assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
+
+    // If the compare-and-exchange succeeded, then we found an unlocked
+    // object, will have now locked it will continue at label cont
+
+    bind(cas_failed);
+    // We did not see an unlocked object so try the fast recursive case.
+
+    // Check if the owner is self by comparing the value in the
+    // markWord of object (disp_hdr) with the stack pointer.
+    mov(rscratch1, sp);
+    sub(disp_hdr, disp_hdr, rscratch1);
+    mov(tmp, (address) (~(os::vm_page_size()-1) | markWord::lock_mask_in_place));
+    // If condition is true we are cont and hence we can store 0 as the
+    // displaced header in the box, which indicates that it is a recursive lock.
+    ands(tmp/*==0?*/, disp_hdr, tmp);   // Sets flags for result
+    str(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
+    b(cont);
+  }
+
+  // Handle existing monitor.
+  bind(object_has_monitor);
+
+  // The object's monitor m is unlocked iff m->owner == NULL,
+  // otherwise m->owner may contain a thread or a stack address.
+  //
+  // Try to CAS m->owner from NULL to current thread.
+  add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes()-markWord::monitor_value));
+  cmpxchg(tmp, zr, rthread, Assembler::xword, /*acquire*/ true,
+          /*release*/ true, /*weak*/ false, rscratch1); // Sets flags for result
+
+  // Store a non-null value into the box to avoid looking like a re-entrant
+  // lock. The fast-path monitor unlock code checks for
+  // markWord::monitor_value so use markWord::unused_mark which has the
+  // relevant bit set, and also matches ObjectSynchronizer::enter.
+  mov(tmp, (address)markWord::unused_mark().value());
+  str(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
+
+  br(Assembler::EQ, cont); // CAS success means locking succeeded
+
+  cmp(rscratch1, rthread);
+  br(Assembler::NE, cont); // Check for recursive locking
+
+  // Recursive lock case
+  increment(Address(disp_hdr, ObjectMonitor::recursions_offset_in_bytes() - markWord::monitor_value), 1);
+  // flag == EQ still from the cmp above, checking if this is a reentrant lock
+
+  bind(cont);
+  // flag == EQ indicates success
+  // flag == NE indicates failure
+}
+
+void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg, Register tmpReg,
+                                    Register tmp2Reg) {
+  Register oop = objectReg;
+  Register box = boxReg;
+  Register disp_hdr = tmpReg;
+  Register tmp = tmp2Reg;
+  Label cont;
+  Label object_has_monitor;
+
+  assert(LockingMode != LM_LIGHTWEIGHT, "lightweight locking should use fast_unlock_lightweight");
+  assert_different_registers(oop, box, tmp, disp_hdr);
+
+  if (UseBiasedLocking && !UseOptoBiasInlining) {
+    biased_locking_exit(oop, tmp, cont);
+  }
+
+  if (LockingMode == LM_LEGACY) {
+    // Find the lock address and load the displaced header from the stack.
+    ldr(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
+
+    // If the displaced header is 0, we have a recursive unlock.
+    cmp(disp_hdr, zr);
+    br(Assembler::EQ, cont);
+  }
+
+  // Handle existing monitor.
+  ldr(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
+  tbnz(tmp, exact_log2(markWord::monitor_value), object_has_monitor);
+
+  if (LockingMode == LM_MONITOR) {
+    tst(oop, oop); // Set NE to indicate 'failure' -> take slow-path. We know that oop != 0.
+    b(cont);
+  } else {
+    assert(LockingMode == LM_LEGACY, "must be");
+    // Check if it is still a light weight lock, this is is true if we
+    // see the stack address of the basicLock in the markWord of the
+    // object.
+
+    cmpxchg(oop, box, disp_hdr, Assembler::xword, /*acquire*/ false,
+            /*release*/ true, /*weak*/ false, tmp);
+    b(cont);
+  }
+
+  assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
+
+  // Handle existing monitor.
+  bind(object_has_monitor);
+  STATIC_ASSERT(markWord::monitor_value <= INT_MAX);
+  add(tmp, tmp, -(int)markWord::monitor_value); // monitor
+
+  ldr(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
+
+  Label notRecursive;
+  cbz(disp_hdr, notRecursive);
+
+  // Recursive lock
+  sub(disp_hdr, disp_hdr, 1u);
+  str(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
+  cmp(disp_hdr, disp_hdr); // Sets flags for result
+  b(cont);
+
+  bind(notRecursive);
+  ldr(rscratch1, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
+  ldr(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
+  orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if both are 0.
+  cmp(rscratch1, zr); // Sets flags for result
+  cbnz(rscratch1, cont);
+  // need a release store here
+  lea(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
+  stlr(zr, tmp); // set unowned
+
+  bind(cont);
+  // flag == EQ indicates success
+  // flag == NE indicates failure
+}
+
+void C2_MacroAssembler::fast_lock_lightweight(Register obj, Register t1,
+                                              Register t2, Register t3) {
+  assert(LockingMode == LM_LIGHTWEIGHT, "must be");
+  assert_different_registers(obj, t1, t2, t3);
+
+  // Handle inflated monitor.
+  Label inflated;
+  // Finish fast lock successfully. MUST branch to with flag == EQ
+  Label locked;
+  // Finish fast lock unsuccessfully. MUST branch to with flag == NE
+  Label slow_path;
+
+  if (DiagnoseSyncOnValueBasedClasses != 0) {
+    load_klass(t1, obj);
+    ldrw(t1, Address(t1, Klass::access_flags_offset()));
+    tstw(t1, JVM_ACC_IS_VALUE_BASED_CLASS);
+    br(Assembler::NE, slow_path);
+  }
+
+  const Register t1_mark = t1;
+
+  { // Lightweight locking
+
+    // Push lock to the lock stack and finish successfully. MUST branch to with flag == EQ
+    Label push;
+
+    const Register t2_top = t2;
+    const Register t3_t = t3;
+
+    // Check if lock-stack is full.
+    ldrw(t2_top, Address(rthread, JavaThread::lock_stack_top_offset()));
+    cmpw(t2_top, (unsigned)LockStack::end_offset() - 1);
+    br(Assembler::GT, slow_path);
+
+    // Check if recursive.
+    subw(t3_t, t2_top, oopSize);
+    ldr(t3_t, Address(rthread, t3_t));
+    cmp(obj, t3_t);
+    br(Assembler::EQ, push);
+
+    // Relaxed normal load to check for monitor. Optimization for monitor case.
+    ldr(t1_mark, Address(obj, oopDesc::mark_offset_in_bytes()));
+    tbnz(t1_mark, exact_log2(markWord::monitor_value), inflated);
+
+    // Not inflated
+    assert(oopDesc::mark_offset_in_bytes() == 0, "required to avoid a lea");
+
+    // Try to lock. Transition lock-bits 0b01 => 0b00
+    orr(t1_mark, t1_mark, markWord::unlocked_value);
+    eor(t3_t, t1_mark, markWord::unlocked_value);
+    cmpxchg(/*addr*/ obj, /*expected*/ t1_mark, /*new*/ t3_t, Assembler::xword,
+            /*acquire*/ true, /*release*/ false, /*weak*/ false, noreg);
+    br(Assembler::NE, slow_path);
+
+    bind(push);
+    // After successful lock, push object on lock-stack.
+    str(obj, Address(rthread, t2_top));
+    addw(t2_top, t2_top, oopSize);
+    strw(t2_top, Address(rthread, JavaThread::lock_stack_top_offset()));
+    b(locked);
+  }
+
+  { // Handle inflated monitor.
+    bind(inflated);
+
+    // mark contains the tagged ObjectMonitor*.
+    const Register t1_tagged_monitor = t1_mark;
+    const uintptr_t monitor_tag = markWord::monitor_value;
+    const Register t2_owner_addr = t2;
+    const Register t3_owner = t3;
+
+    // Compute owner address.
+    lea(t2_owner_addr, Address(t1_tagged_monitor, ObjectMonitor::owner_offset_in_bytes() - monitor_tag));
+
+    // CAS owner (null => current thread).
+    cmpxchg(t2_owner_addr, zr, rthread, Assembler::xword, /*acquire*/ true,
+            /*release*/ false, /*weak*/ false, t3_owner);
+    br(Assembler::EQ, locked);
+
+    // Check if recursive.
+    cmp(t3_owner, rthread);
+    br(Assembler::NE, slow_path);
+
+    // Recursive.
+    increment(Address(t1_tagged_monitor, ObjectMonitor::recursions_offset_in_bytes() - monitor_tag), 1);
+  }
+
+  bind(locked);
+#ifdef ASSERT
+  // Check that locked label is reached with Flags == EQ.
+  Label flag_correct;
+  br(Assembler::EQ, flag_correct);
+  stop("Fast Lock Flag != EQ");
+#endif
+
+  bind(slow_path);
+#ifdef ASSERT
+  // Check that slow_path label is reached with Flags == NE.
+  br(Assembler::NE, flag_correct);
+  stop("Fast Lock Flag != NE");
+  bind(flag_correct);
+#endif
+  // C2 uses the value of Flags (NE vs EQ) to determine the continuation.
+}
+
+void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register t1, Register t2,
+                                                Register t3) {
+  assert(LockingMode == LM_LIGHTWEIGHT, "must be");
+  assert_different_registers(obj, t1, t2, t3);
+
+  // Handle inflated monitor.
+  Label inflated, inflated_load_monitor;
+  // Finish fast unlock successfully. MUST branch to with flag == EQ
+  Label unlocked;
+  // Finish fast unlock unsuccessfully. MUST branch to with flag == NE
+  Label slow_path;
+
+  const Register t1_mark = t1;
+  const Register t2_top = t2;
+  const Register t3_t = t3;
+
+  { // Lightweight unlock
+
+    // Check if obj is top of lock-stack.
+    ldrw(t2_top, Address(rthread, JavaThread::lock_stack_top_offset()));
+    subw(t2_top, t2_top, oopSize);
+    ldr(t3_t, Address(rthread, t2_top));
+    cmp(obj, t3_t);
+    // Top of lock stack was not obj. Must be monitor.
+    br(Assembler::NE, inflated_load_monitor);
+
+    // Pop lock-stack.
+    DEBUG_ONLY(str(zr, Address(rthread, t2_top));)
+    strw(t2_top, Address(rthread, JavaThread::lock_stack_top_offset()));
+
+    // Check if recursive.
+    subw(t3_t, t2_top, oopSize);
+    ldr(t3_t, Address(rthread, t3_t));
+    cmp(obj, t3_t);
+    br(Assembler::EQ, unlocked);
+
+    // Not recursive.
+    // Load Mark.
+    ldr(t1_mark, Address(obj, oopDesc::mark_offset_in_bytes()));
+
+    // Check header for monitor (0b10).
+    tbnz(t1_mark, exact_log2(markWord::monitor_value), inflated);
+
+    // Try to unlock. Transition lock bits 0b00 => 0b01
+    assert(oopDesc::mark_offset_in_bytes() == 0, "required to avoid lea");
+    orr(t3_t, t1_mark, markWord::unlocked_value);
+    cmpxchg(/*addr*/ obj, /*expected*/ t1_mark, /*new*/ t3_t, Assembler::xword,
+            /*acquire*/ false, /*release*/ true, /*weak*/ false, noreg);
+    br(Assembler::EQ, unlocked);
+
+    // Compare and exchange failed.
+    // Restore lock-stack and handle the unlock in runtime.
+    DEBUG_ONLY(str(obj, Address(rthread, t2_top));)
+    addw(t2_top, t2_top, oopSize);
+    str(t2_top, Address(rthread, JavaThread::lock_stack_top_offset()));
+    b(slow_path);
+  }
+
+
+  { // Handle inflated monitor.
+    bind(inflated_load_monitor);
+    ldr(t1_mark, Address(obj, oopDesc::mark_offset_in_bytes()));
+#ifdef ASSERT
+    tbnz(t1_mark, exact_log2(markWord::monitor_value), inflated);
+    stop("Fast Unlock not monitor");
+#endif
+
+    bind(inflated);
+
+#ifdef ASSERT
+    Label check_done;
+    subw(t2_top, t2_top, oopSize);
+    cmpw(t2_top, in_bytes(JavaThread::lock_stack_base_offset()));
+    br(Assembler::LT, check_done);
+    ldr(t3_t, Address(rthread, t2_top));
+    cmp(obj, t3_t);
+    br(Assembler::NE, inflated);
+    stop("Fast Unlock lock on stack");
+    bind(check_done);
+#endif
+
+    // mark contains the tagged ObjectMonitor*.
+    const Register t1_monitor = t1_mark;
+    const uintptr_t monitor_tag = markWord::monitor_value;
+
+    // Untag the monitor.
+    sub(t1_monitor, t1_mark, monitor_tag);
+
+    const Register t2_recursions = t2;
+    Label not_recursive;
+
+    // Check if recursive.
+    ldr(t2_recursions, Address(t1_monitor, ObjectMonitor::recursions_offset_in_bytes()));
+    cbz(t2_recursions, not_recursive);
+
+    // Recursive unlock.
+    sub(t2_recursions, t2_recursions, 1u);
+    str(t2_recursions, Address(t1_monitor, ObjectMonitor::recursions_offset_in_bytes()));
+    // Set flag == EQ
+    cmp(t2_recursions, t2_recursions);
+    b(unlocked);
+
+    bind(not_recursive);
+
+    Label release;
+    const Register t2_owner_addr = t2;
+
+    // Compute owner address.
+    lea(t2_owner_addr, Address(t1_monitor, ObjectMonitor::owner_offset_in_bytes()));
+
+    // Check if the entry lists are empty.
+    ldr(rscratch1, Address(t1_monitor, ObjectMonitor::EntryList_offset_in_bytes()));
+    ldr(t3_t, Address(t1_monitor, ObjectMonitor::cxq_offset_in_bytes()));
+    orr(rscratch1, rscratch1, t3_t);
+    cmp(rscratch1, zr);
+    br(Assembler::EQ, release);
+
+    // The owner may be anonymous and we removed the last obj entry in
+    // the lock-stack. This loses the information about the owner.
+    // Write the thread to the owner field so the runtime knows the owner.
+    str(rthread, Address(t2_owner_addr));
+    b(slow_path);
+
+    bind(release);
+    // Set owner to null.
+    // Release to satisfy the JMM
+    stlr(zr, t2_owner_addr);
+  }
+
+  bind(unlocked);
+#ifdef ASSERT
+  // Check that unlocked label is reached with Flags == EQ.
+  Label flag_correct;
+  br(Assembler::EQ, flag_correct);
+  stop("Fast Unlock Flag != EQ");
+#endif
+
+  bind(slow_path);
+#ifdef ASSERT
+  // Check that slow_path label is reached with Flags == NE.
+  br(Assembler::NE, flag_correct);
+  stop("Fast Unlock Flag != NE");
+  bind(flag_correct);
+#endif
+  // C2 uses the value of Flags (NE vs EQ) to determine the continuation.
+}
+
 // Search for str1 in str2 and return index or -1
 void C2_MacroAssembler::string_indexof(Register str2, Register str1,
                                        Register cnt2, Register cnt1,
@@ -875,3 +1307,30 @@ void C2_MacroAssembler::neon_compare(FloatRegister dst, BasicType bt, FloatRegis
     }
   }
 }
+
+void C2_MacroAssembler::load_nklass_compact(Register dst, Register obj, Register index, int scale, int disp) {
+  C2LoadNKlassStub* stub = new (Compile::current()->comp_arena()) C2LoadNKlassStub(dst);
+  Compile::current()->output()->add_stub(stub);
+
+  // Note: Don't clobber obj anywhere in that method!
+
+  // The incoming address is pointing into obj-start + klass_offset_in_bytes. We need to extract
+  // obj-start, so that we can load from the object's mark-word instead. Usually the address
+  // comes as obj-start in obj and klass_offset_in_bytes in disp. However, sometimes C2
+  // emits code that pre-computes obj-start + klass_offset_in_bytes into a register, and
+  // then passes that register as obj and 0 in disp. The following code extracts the base
+  // and offset to load the mark-word.
+  int offset = oopDesc::mark_offset_in_bytes() + disp - oopDesc::klass_offset_in_bytes();
+  if (index == noreg) {
+    ldr(dst, Address(obj, offset));
+  } else {
+    lea(dst, Address(obj, index, Address::lsl(scale)));
+    ldr(dst, Address(dst, offset));
+  }
+  // NOTE: We can't use tbnz here, because the target is sometimes too far away
+  // and cannot be encoded.
+  tst(dst, markWord::monitor_value);
+  br(Assembler::NE, stub->entry());
+  bind(stub->continuation());
+  lsr(dst, dst, markWord::klass_shift);
+}
diff --git a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp
index d7d381116b2..7cd7f48f743 100644
--- a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -28,6 +28,12 @@
 // C2_MacroAssembler contains high-level macros for C2
 
  public:
+  // Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file.
+  void fast_lock(Register object, Register box, Register tmp, Register tmp2, Register tmp3);
+  void fast_unlock(Register object, Register box, Register tmp, Register tmp2);
+  // Code used by cmpFastLockLightweight and cmpFastUnlockLightweight mach instructions in .ad file.
+  void fast_lock_lightweight(Register object, Register t1, Register t2, Register t3);
+  void fast_unlock_lightweight(Register object, Register t1, Register t2, Register t3);
 
   void string_compare(Register str1, Register str2,
                       Register cnt1, Register cnt2, Register result,
@@ -53,4 +59,6 @@
   void neon_compare(FloatRegister dst, BasicType bt, FloatRegister src1,
                     FloatRegister src2, int cond, bool isQ);
 
+  void load_nklass_compact(Register dst, Register obj, Register index, int scale, int disp);
+
 #endif // CPU_AARCH64_C2_MACROASSEMBLER_AARCH64_HPP
diff --git a/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp b/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp
index bbecb7d3581..a0064e6b78c 100644
--- a/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2024, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -721,12 +721,12 @@ void InterpreterMacroAssembler::remove_activation(
 //
 // Kills:
 //      r0
-//      c_rarg0, c_rarg1, c_rarg2, c_rarg3, .. (param regs)
+//      c_rarg0, c_rarg1, c_rarg2, c_rarg3, c_rarg4, .. (param regs)
 //      rscratch1, rscratch2 (scratch regs)
 void InterpreterMacroAssembler::lock_object(Register lock_reg)
 {
   assert(lock_reg == c_rarg1, "The argument is only for looks. It must be c_rarg1");
-  if (UseHeavyMonitors) {
+  if (LockingMode == LM_MONITOR) {
     call_VM(noreg,
             CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
             lock_reg);
@@ -736,6 +736,8 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg)
     const Register swap_reg = r0;
     const Register tmp = c_rarg2;
     const Register obj_reg = c_rarg3; // Will contain the oop
+    const Register tmp2 = c_rarg4;
+    const Register tmp3 = c_rarg5;
 
     const int obj_offset = BasicObjectLock::obj_offset_in_bytes();
     const int lock_offset = BasicObjectLock::lock_offset_in_bytes ();
@@ -754,82 +756,92 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg)
       br(Assembler::NE, slow_case);
     }
 
-    if (UseBiasedLocking) {
-      biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp, false, done, &slow_case);
-    }
-
-    // Load (object->mark() | 1) into swap_reg
-    ldr(rscratch1, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
-    orr(swap_reg, rscratch1, 1);
-
-    // Save (object->mark() | 1) into BasicLock's displaced header
-    str(swap_reg, Address(lock_reg, mark_offset));
-
-    assert(lock_offset == 0,
-           "displached header must be first word in BasicObjectLock");
-
-    Label fail;
-    if (PrintBiasedLockingStatistics) {
-      Label fast;
-      cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, rscratch1, fast, &fail);
-      bind(fast);
-      atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()),
-                  rscratch2, rscratch1, tmp);
+    if (LockingMode == LM_LIGHTWEIGHT) {
+      lightweight_lock(obj_reg, tmp, tmp2, tmp3, slow_case);
       b(done);
-      bind(fail);
     } else {
-      cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, rscratch1, done, /*fallthrough*/NULL);
-    }
+      if (UseBiasedLocking) {
+        biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp, false, done, &slow_case);
+      }
 
-    // Fast check for recursive lock.
-    //
-    // Can apply the optimization only if this is a stack lock
-    // allocated in this thread. For efficiency, we can focus on
-    // recently allocated stack locks (instead of reading the stack
-    // base and checking whether 'mark' points inside the current
-    // thread stack):
-    //  1) (mark & 7) == 0, and
-    //  2) sp <= mark < mark + os::pagesize()
-    //
-    // Warning: sp + os::pagesize can overflow the stack base. We must
-    // neither apply the optimization for an inflated lock allocated
-    // just above the thread stack (this is why condition 1 matters)
-    // nor apply the optimization if the stack lock is inside the stack
-    // of another thread. The latter is avoided even in case of overflow
-    // because we have guard pages at the end of all stacks. Hence, if
-    // we go over the stack base and hit the stack of another thread,
-    // this should not be in a writeable area that could contain a
-    // stack lock allocated by that thread. As a consequence, a stack
-    // lock less than page size away from sp is guaranteed to be
-    // owned by the current thread.
-    //
-    // These 3 tests can be done by evaluating the following
-    // expression: ((mark - sp) & (7 - os::vm_page_size())),
-    // assuming both stack pointer and pagesize have their
-    // least significant 3 bits clear.
-    // NOTE: the mark is in swap_reg %r0 as the result of cmpxchg
-    // NOTE2: aarch64 does not like to subtract sp from rn so take a
-    // copy
-    mov(rscratch1, sp);
-    sub(swap_reg, swap_reg, rscratch1);
-    ands(swap_reg, swap_reg, (uint64_t)(7 - os::vm_page_size()));
-
-    // Save the test result, for recursive case, the result is zero
-    str(swap_reg, Address(lock_reg, mark_offset));
-
-    if (PrintBiasedLockingStatistics) {
-      br(Assembler::NE, slow_case);
-      atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()),
-                  rscratch2, rscratch1, tmp);
-    }
-    br(Assembler::EQ, done);
+      // Load (object->mark() | 1) into swap_reg
+      ldr(rscratch1, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
+      orr(swap_reg, rscratch1, 1);
+
+      // Save (object->mark() | 1) into BasicLock's displaced header
+      str(swap_reg, Address(lock_reg, mark_offset));
+
+      assert(lock_offset == 0,
+             "displached header must be first word in BasicObjectLock");
+
+      Label fail;
+      if (PrintBiasedLockingStatistics) {
+        Label fast;
+        cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, rscratch1, fast, &fail);
+        bind(fast);
+        atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()),
+                    rscratch2, rscratch1, tmp);
+        b(done);
+        bind(fail);
+      } else {
+        cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, rscratch1, done, /*fallthrough*/NULL);
+      }
 
+      // Fast check for recursive lock.
+      //
+      // Can apply the optimization only if this is a stack lock
+      // allocated in this thread. For efficiency, we can focus on
+      // recently allocated stack locks (instead of reading the stack
+      // base and checking whether 'mark' points inside the current
+      // thread stack):
+      //  1) (mark & 7) == 0, and
+      //  2) sp <= mark < mark + os::pagesize()
+      //
+      // Warning: sp + os::pagesize can overflow the stack base. We must
+      // neither apply the optimization for an inflated lock allocated
+      // just above the thread stack (this is why condition 1 matters)
+      // nor apply the optimization if the stack lock is inside the stack
+      // of another thread. The latter is avoided even in case of overflow
+      // because we have guard pages at the end of all stacks. Hence, if
+      // we go over the stack base and hit the stack of another thread,
+      // this should not be in a writeable area that could contain a
+      // stack lock allocated by that thread. As a consequence, a stack
+      // lock less than page size away from sp is guaranteed to be
+      // owned by the current thread.
+      //
+      // These 3 tests can be done by evaluating the following
+      // expression: ((mark - sp) & (7 - os::vm_page_size())),
+      // assuming both stack pointer and pagesize have their
+      // least significant 3 bits clear.
+      // NOTE: the mark is in swap_reg %r0 as the result of cmpxchg
+      // NOTE2: aarch64 does not like to subtract sp from rn so take a
+      // copy
+      mov(rscratch1, sp);
+      sub(swap_reg, swap_reg, rscratch1);
+      ands(swap_reg, swap_reg, (uint64_t)(7 - os::vm_page_size()));
+
+      // Save the test result, for recursive case, the result is zero
+      str(swap_reg, Address(lock_reg, mark_offset));
+
+      if (PrintBiasedLockingStatistics) {
+        br(Assembler::NE, slow_case);
+        atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()),
+                    rscratch2, rscratch1, tmp);
+      }
+      br(Assembler::EQ, done);
+    }
     bind(slow_case);
 
     // Call the runtime routine for slow case
-    call_VM(noreg,
-            CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
-            lock_reg);
+    if (LockingMode == LM_LIGHTWEIGHT) {
+      call_VM(noreg,
+              CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter_obj),
+              obj_reg);
+    } else {
+      call_VM(noreg,
+              CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
+              lock_reg);
+    }
 
     bind(done);
   }
@@ -851,7 +863,7 @@ void InterpreterMacroAssembler::unlock_object(Register lock_reg)
 {
   assert(lock_reg == c_rarg1, "The argument is only for looks. It must be rarg1");
 
-  if (UseHeavyMonitors) {
+  if (LockingMode == LM_MONITOR) {
     call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg);
   } else {
     Label done;
@@ -859,12 +871,15 @@ void InterpreterMacroAssembler::unlock_object(Register lock_reg)
     const Register swap_reg   = r0;
     const Register header_reg = c_rarg2;  // Will contain the old oopMark
     const Register obj_reg    = c_rarg3;  // Will contain the oop
+    const Register tmp_reg    = c_rarg4;  // Temporary used by lightweight_unlock
 
     save_bcp(); // Save in case of exception
 
-    // Convert from BasicObjectLock structure to object and BasicLock
-    // structure Store the BasicLock address into %r0
-    lea(swap_reg, Address(lock_reg, BasicObjectLock::lock_offset_in_bytes()));
+    if (LockingMode != LM_LIGHTWEIGHT) {
+      // Convert from BasicObjectLock structure to object and BasicLock
+      // structure Store the BasicLock address into %r0
+      lea(swap_reg, Address(lock_reg, BasicObjectLock::lock_offset_in_bytes()));
+    }
 
     // Load oop into obj_reg(%c_rarg3)
     ldr(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes()));
@@ -872,20 +887,26 @@ void InterpreterMacroAssembler::unlock_object(Register lock_reg)
     // Free entry
     str(zr, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes()));
 
-    if (UseBiasedLocking) {
-      biased_locking_exit(obj_reg, header_reg, done);
-    }
-
-    // Load the old header from BasicLock structure
-    ldr(header_reg, Address(swap_reg,
-                            BasicLock::displaced_header_offset_in_bytes()));
+    if (LockingMode == LM_LIGHTWEIGHT) {
+      Label slow_case;
+      lightweight_unlock(obj_reg, header_reg, swap_reg, tmp_reg, slow_case);
+      b(done);
+      bind(slow_case);
+    } else {
+      if (UseBiasedLocking) {
+        biased_locking_exit(obj_reg, header_reg, done);
+      }
 
-    // Test for recursion
-    cbz(header_reg, done);
+      // Load the old header from BasicLock structure
+      ldr(header_reg, Address(swap_reg,
+                              BasicLock::displaced_header_offset_in_bytes()));
 
-    // Atomic swap back the old header
-    cmpxchg_obj_header(swap_reg, header_reg, obj_reg, rscratch1, done, /*fallthrough*/NULL);
+      // Test for recursion
+      cbz(header_reg, done);
 
+      // Atomic swap back the old header
+      cmpxchg_obj_header(swap_reg, header_reg, obj_reg, rscratch1, done, /*fallthrough*/NULL);
+    }
     // Call the runtime routine for slow case.
     str(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); // restore obj
     call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg);
diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
index 676a548d039..72cafd48195 100644
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2014, 2021, Red Hat Inc. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -23,8 +23,6 @@
  *
  */
 
-#include <sys/types.h>
-
 #include "precompiled.hpp"
 #include "jvm.h"
 #include "asm/assembler.hpp"
@@ -50,9 +48,11 @@
 #include "runtime/icache.hpp"
 #include "runtime/interfaceSupport.inline.hpp"
 #include "runtime/jniHandles.inline.hpp"
+#include "runtime/objectMonitor.hpp"
 #include "runtime/sharedRuntime.hpp"
 #include "runtime/stubRoutines.hpp"
 #include "runtime/thread.hpp"
+#include "utilities/globalDefinitions.hpp"
 #include "utilities/powerOfTwo.hpp"
 #ifdef COMPILER1
 #include "c1/c1_LIRAssembler.hpp"
@@ -64,6 +64,8 @@
 #include "opto/output.hpp"
 #endif
 
+#include <sys/types.h>
+
 #ifdef PRODUCT
 #define BLOCK_COMMENT(str) /* nothing */
 #else
@@ -2489,6 +2491,10 @@ void MacroAssembler::cmpxchg(Register addr, Register expected,
     mov(result, expected);
     lse_cas(result, new_val, addr, size, acquire, release, /*not_pair*/ true);
     compare_eq(result, expected, size);
+#ifdef ASSERT
+    // Poison rscratch1 which is written on !UseLSE branch
+    mov(rscratch1, 0x1f1f1f1f1f1f1f1f);
+#endif
   } else {
     Label retry_load, done;
     if ((VM_Version::features() & VM_Version::CPU_STXR_PREFETCH))
@@ -3805,9 +3811,46 @@ void MacroAssembler::load_method_holder(Register holder, Register method) {
   ldr(holder, Address(holder, ConstantPool::pool_holder_offset_in_bytes())); // InstanceKlass*
 }
 
-void MacroAssembler::load_klass(Register dst, Register src) {
-  if (UseCompressedClassPointers) {
+// Loads the obj's Klass* into dst.
+// src and dst must be distinct registers
+// Preserves all registers (incl src, rscratch1 and rscratch2), but clobbers condition flags
+void MacroAssembler::load_nklass(Register dst, Register src) {
+  assert(UseCompressedClassPointers, "expects UseCompressedClassPointers");
+
+  if (!UseCompactObjectHeaders) {
     ldrw(dst, Address(src, oopDesc::klass_offset_in_bytes()));
+    return;
+  }
+
+  Label fast;
+
+  // Check if we can take the (common) fast path, if obj is unlocked.
+  ldr(dst, Address(src, oopDesc::mark_offset_in_bytes()));
+  tbz(dst, exact_log2(markWord::monitor_value), fast);
+
+  // Fetch displaced header
+  ldr(dst, Address(dst, OM_OFFSET_NO_MONITOR_VALUE_TAG(header)));
+
+  // Fast-path: shift and decode Klass*.
+  bind(fast);
+  lsr(dst, dst, markWord::klass_shift);
+}
+
+void MacroAssembler::load_klass(Register dst, Register src, bool null_check_src) {
+  if (null_check_src) {
+    if (UseCompactObjectHeaders) {
+      null_check(src, oopDesc::mark_offset_in_bytes());
+    } else {
+      null_check(src, oopDesc::klass_offset_in_bytes());
+    }
+  }
+
+  if (UseCompressedClassPointers) {
+    if (UseCompactObjectHeaders) {
+      load_nklass(dst, src);
+    } else {
+      ldrw(dst, Address(src, oopDesc::klass_offset_in_bytes()));
+    }
     decode_klass_not_null(dst);
   } else {
     ldr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
@@ -3846,8 +3889,13 @@ void MacroAssembler::load_mirror(Register dst, Register method, Register tmp) {
 }
 
 void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp) {
+  assert_different_registers(oop, trial_klass, tmp);
   if (UseCompressedClassPointers) {
-    ldrw(tmp, Address(oop, oopDesc::klass_offset_in_bytes()));
+    if (UseCompactObjectHeaders) {
+      load_nklass(tmp, oop);
+    } else {
+      ldrw(tmp, Address(oop, oopDesc::klass_offset_in_bytes()));
+    }
     if (CompressedKlassPointers::base() == NULL) {
       cmp(trial_klass, tmp, LSL, CompressedKlassPointers::shift());
       return;
@@ -3864,11 +3912,6 @@ void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp)
   cmp(trial_klass, tmp);
 }
 
-void MacroAssembler::load_prototype_header(Register dst, Register src) {
-  load_klass(dst, src);
-  ldr(dst, Address(dst, Klass::prototype_header_offset()));
-}
-
 void MacroAssembler::store_klass(Register dst, Register src) {
   // FIXME: Should this be a store release?  concurrent gcs assumes
   // klass length is valid if klass field is not null.
@@ -3887,6 +3930,11 @@ void MacroAssembler::store_klass_gap(Register dst, Register src) {
   }
 }
 
+void MacroAssembler::load_prototype_header(Register dst, Register src) {
+  load_klass(dst, src);
+  ldr(dst, Address(dst, Klass::prototype_header_offset()));
+}
+
 // Algorithm must match CompressedOops::encode.
 void MacroAssembler::encode_heap_oop(Register d, Register s) {
 #ifdef ASSERT
@@ -5366,3 +5414,124 @@ void MacroAssembler::spin_wait() {
     }
   }
 }
+
+// Implements lightweight-locking.
+//
+//  - obj: the object to be locked
+//  - t1, t2, t3: temporary registers, will be destroyed
+//  - slow: branched to if locking fails, absolute offset may larger than 32KB (imm14 encoding).
+void MacroAssembler::lightweight_lock(Register obj, Register t1, Register t2, Register t3, Label& slow) {
+  assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking");
+  assert_different_registers(obj, t1, t2, t3, rscratch1);
+
+  Label push;
+  const Register top = t1;
+  const Register mark = t2;
+  const Register t = t3;
+
+  // Preload the markWord. It is important that this is the first
+  // instruction emitted as it is part of C1's null check semantics.
+  ldr(mark, Address(obj, oopDesc::mark_offset_in_bytes()));
+
+  // Check if the lock-stack is full.
+  ldrw(top, Address(rthread, JavaThread::lock_stack_top_offset()));
+  cmpw(top, (unsigned)LockStack::end_offset());
+  br(Assembler::GE, slow);
+
+  // Check for recursion.
+  subw(t, top, oopSize);
+  ldr(t, Address(rthread, t));
+  cmp(obj, t);
+  br(Assembler::EQ, push);
+
+  // Check header for monitor (0b10).
+  tst(mark, markWord::monitor_value);
+  br(Assembler::NE, slow);
+
+  // Try to lock. Transition lock bits 0b01 => 0b00
+  assert(oopDesc::mark_offset_in_bytes() == 0, "required to avoid lea");
+  orr(mark, mark, markWord::unlocked_value);
+  eor(t, mark, markWord::unlocked_value);
+  cmpxchg(/*addr*/ obj, /*expected*/ mark, /*new*/ t, Assembler::xword,
+          /*acquire*/ true, /*release*/ false, /*weak*/ false, noreg);
+  br(Assembler::NE, slow);
+
+  bind(push);
+  // After successful lock, push object on lock-stack.
+  str(obj, Address(rthread, top));
+  addw(top, top, oopSize);
+  strw(top, Address(rthread, JavaThread::lock_stack_top_offset()));
+}
+
+// Implements lightweight-unlocking.
+//
+// - obj: the object to be unlocked
+// - t1, t2, t3: temporary registers
+// - slow: branched to if unlocking fails, absolute offset may larger than 32KB (imm14 encoding).
+void MacroAssembler::lightweight_unlock(Register obj, Register t1, Register t2, Register t3, Label& slow) {
+  assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking");
+  // cmpxchg clobbers rscratch1.
+  assert_different_registers(obj, t1, t2, t3, rscratch1);
+
+#ifdef ASSERT
+  {
+    // Check for lock-stack underflow.
+    Label stack_ok;
+    ldrw(t1, Address(rthread, JavaThread::lock_stack_top_offset()));
+    cmpw(t1, (unsigned)LockStack::start_offset());
+    br(Assembler::GE, stack_ok);
+    STOP("Lock-stack underflow");
+    bind(stack_ok);
+  }
+#endif
+
+  Label unlocked, push_and_slow;
+  const Register top = t1;
+  const Register mark = t2;
+  const Register t = t3;
+
+  // Check if obj is top of lock-stack.
+  ldrw(top, Address(rthread, JavaThread::lock_stack_top_offset()));
+  subw(top, top, oopSize);
+  ldr(t, Address(rthread, top));
+  cmp(obj, t);
+  br(Assembler::NE, slow);
+
+  // Pop lock-stack.
+  DEBUG_ONLY(str(zr, Address(rthread, top));)
+  strw(top, Address(rthread, JavaThread::lock_stack_top_offset()));
+
+  // Check if recursive.
+  subw(t, top, oopSize);
+  ldr(t, Address(rthread, t));
+  cmp(obj, t);
+  br(Assembler::EQ, unlocked);
+
+  // Not recursive. Check header for monitor (0b10).
+  ldr(mark, Address(obj, oopDesc::mark_offset_in_bytes()));
+  tbnz(mark, log2i_exact(markWord::monitor_value), push_and_slow);
+
+#ifdef ASSERT
+  // Check header not unlocked (0b01).
+  Label not_unlocked;
+  tbz(mark, log2i_exact(markWord::unlocked_value), not_unlocked);
+  stop("lightweight_unlock already unlocked");
+  bind(not_unlocked);
+#endif
+
+  // Try to unlock. Transition lock bits 0b00 => 0b01
+  assert(oopDesc::mark_offset_in_bytes() == 0, "required to avoid lea");
+  orr(t, mark, markWord::unlocked_value);
+  cmpxchg(obj, mark, t, Assembler::xword,
+          /*acquire*/ false, /*release*/ true, /*weak*/ false, noreg);
+  br(Assembler::EQ, unlocked);
+
+  bind(push_and_slow);
+  // Restore lock-stack and handle the unlock in runtime.
+  DEBUG_ONLY(str(obj, Address(rthread, top));)
+  addw(top, top, oopSize);
+  strw(top, Address(rthread, JavaThread::lock_stack_top_offset()));
+  b(slow);
+
+  bind(unlocked);
+}
diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
index e403289e22f..a150aa07713 100644
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2014, 2021, Red Hat Inc. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -823,7 +823,8 @@ class MacroAssembler: public Assembler {
   void load_method_holder(Register holder, Register method);
 
   // oop manipulations
-  void load_klass(Register dst, Register src);
+  void load_nklass(Register dst, Register src);
+  void load_klass(Register dst, Register src, bool null_check = false);
   void store_klass(Register dst, Register src);
   void cmp_klass(Register oop, Register trial_klass, Register tmp);
 
@@ -850,10 +851,10 @@ class MacroAssembler: public Assembler {
   // stored using routines that take a jobject.
   void store_heap_oop_null(Address dst);
 
-  void load_prototype_header(Register dst, Register src);
-
   void store_klass_gap(Register dst, Register src);
 
+  void load_prototype_header(Register dst, Register src);
+
   // This dummy is to prevent a call to store_heap_oop from
   // converting a zero (like NULL) into a Register by giving
   // the compiler two choices it can't resolve
@@ -1420,6 +1421,9 @@ class MacroAssembler: public Assembler {
   // Code for java.lang.Thread::onSpinWait() intrinsic.
   void spin_wait();
 
+  void lightweight_lock(Register obj, Register t1, Register t2, Register t3, Label& slow);
+  void lightweight_unlock(Register obj, Register t1, Register t2, Register t3, Label& slow);
+
 private:
   // Check the current thread doesn't need a cross modify fence.
   void verify_cross_modify_fence_not_required() PRODUCT_RETURN;
diff --git a/src/hotspot/cpu/aarch64/methodHandles_aarch64.cpp b/src/hotspot/cpu/aarch64/methodHandles_aarch64.cpp
index 415d3774b9c..759b1fdeb08 100644
--- a/src/hotspot/cpu/aarch64/methodHandles_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/methodHandles_aarch64.cpp
@@ -307,8 +307,7 @@ void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
         __ null_check(receiver_reg);
       } else {
         // load receiver klass itself
-        __ null_check(receiver_reg, oopDesc::klass_offset_in_bytes());
-        __ load_klass(temp1_recv_klass, receiver_reg);
+        __ load_klass(temp1_recv_klass, receiver_reg, true);
         __ verify_klass_ptr(temp1_recv_klass);
       }
       BLOCK_COMMENT("check_receiver {");
diff --git a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp
index da1584d7969..52d945a6191 100644
--- a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2024, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2014, 2021, Red Hat Inc. All rights reserved.
  * Copyright (c) 2021, Azul Systems, Inc. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -1753,6 +1753,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
   const Register obj_reg  = r19;  // Will contain the oop
   const Register lock_reg = r13;  // Address of compiler lock object (BasicLock)
   const Register old_hdr  = r13;  // value of old header at unlock time
+  const Register lock_tmp = r14;  // Temporary used by lightweight_lock/unlock
   const Register tmp = lr;
 
   Label slow_path_lock;
@@ -1773,41 +1774,47 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
     // Load the oop from the handle
     __ ldr(obj_reg, Address(oop_handle_reg, 0));
 
-    if (UseBiasedLocking) {
-      __ biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp, false, lock_done, &slow_path_lock);
-    }
-
-    // Load (object->mark() | 1) into swap_reg %r0
-    __ ldr(rscratch1, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
-    __ orr(swap_reg, rscratch1, 1);
+    if (LockingMode == LM_MONITOR) {
+      __ b(slow_path_lock);
+    } else if (LockingMode == LM_LEGACY) {
+      if (UseBiasedLocking) {
+        __ biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp, false, lock_done, &slow_path_lock);
+      }
 
-    // Save (object->mark() | 1) into BasicLock's displaced header
-    __ str(swap_reg, Address(lock_reg, mark_word_offset));
+      // Load (object->mark() | 1) into swap_reg %r0
+      __ ldr(rscratch1, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
+      __ orr(swap_reg, rscratch1, 1);
 
-    // src -> dest iff dest == r0 else r0 <- dest
-    { Label here;
-      __ cmpxchg_obj_header(r0, lock_reg, obj_reg, rscratch1, lock_done, /*fallthrough*/NULL);
-    }
+      // Save (object->mark() | 1) into BasicLock's displaced header
+      __ str(swap_reg, Address(lock_reg, mark_word_offset));
 
-    // Hmm should this move to the slow path code area???
+      // src -> dest iff dest == r0 else r0 <- dest
+      { Label here;
+        __ cmpxchg_obj_header(r0, lock_reg, obj_reg, rscratch1, lock_done, /*fallthrough*/NULL);
+      }
 
-    // Test if the oopMark is an obvious stack pointer, i.e.,
-    //  1) (mark & 3) == 0, and
-    //  2) sp <= mark < mark + os::pagesize()
-    // These 3 tests can be done by evaluating the following
-    // expression: ((mark - sp) & (3 - os::vm_page_size())),
-    // assuming both stack pointer and pagesize have their
-    // least significant 2 bits clear.
-    // NOTE: the oopMark is in swap_reg %r0 as the result of cmpxchg
+      // Hmm should this move to the slow path code area???
 
-    __ sub(swap_reg, sp, swap_reg);
-    __ neg(swap_reg, swap_reg);
-    __ ands(swap_reg, swap_reg, 3 - os::vm_page_size());
+      // Test if the oopMark is an obvious stack pointer, i.e.,
+      //  1) (mark & 3) == 0, and
+      //  2) sp <= mark < mark + os::pagesize()
+      // These 3 tests can be done by evaluating the following
+      // expression: ((mark - sp) & (3 - os::vm_page_size())),
+      // assuming both stack pointer and pagesize have their
+      // least significant 2 bits clear.
+      // NOTE: the oopMark is in swap_reg %r0 as the result of cmpxchg
 
-    // Save the test result, for recursive case, the result is zero
-    __ str(swap_reg, Address(lock_reg, mark_word_offset));
-    __ br(Assembler::NE, slow_path_lock);
+      __ sub(swap_reg, sp, swap_reg);
+      __ neg(swap_reg, swap_reg);
+      __ ands(swap_reg, swap_reg, 3 - os::vm_page_size());
 
+      // Save the test result, for recursive case, the result is zero
+      __ str(swap_reg, Address(lock_reg, mark_word_offset));
+      __ br(Assembler::NE, slow_path_lock);
+    } else {
+      assert(LockingMode == LM_LIGHTWEIGHT, "must be");
+      __ lightweight_lock(obj_reg, swap_reg, tmp, lock_tmp, slow_path_lock);
+    }
     // Slow path will re-enter here
 
     __ bind(lock_done);
@@ -1929,10 +1936,11 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
       __ biased_locking_exit(obj_reg, old_hdr, done);
     }
 
-    // Simple recursive lock?
-
-    __ ldr(rscratch1, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
-    __ cbz(rscratch1, done);
+    if (LockingMode == LM_LEGACY) {
+      // Simple recursive lock?
+      __ ldr(rscratch1, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
+      __ cbz(rscratch1, done);
+    }
 
     // Must save r0 if if it is live now because cmpxchg must use it
     if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
@@ -1940,15 +1948,22 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
     }
 
 
-    // get address of the stack lock
-    __ lea(r0, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
-    //  get old displaced header
-    __ ldr(old_hdr, Address(r0, 0));
+    if (LockingMode == LM_MONITOR) {
+      __ b(slow_path_unlock);
+    } else if (LockingMode == LM_LEGACY) {
+      // get address of the stack lock
+      __ lea(r0, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
+      //  get old displaced header
+      __ ldr(old_hdr, Address(r0, 0));
 
-    // Atomic swap old header if oop still contains the stack lock
-    Label succeed;
-    __ cmpxchg_obj_header(r0, old_hdr, obj_reg, rscratch1, succeed, &slow_path_unlock);
-    __ bind(succeed);
+      // Atomic swap old header if oop still contains the stack lock
+      Label succeed;
+      __ cmpxchg_obj_header(r0, old_hdr, obj_reg, rscratch1, succeed, &slow_path_unlock);
+      __ bind(succeed);
+    } else {
+      assert(LockingMode == LM_LIGHTWEIGHT, "");
+      __ lightweight_unlock(obj_reg, old_hdr, swap_reg, lock_tmp, slow_path_unlock);
+    }
 
     // slow path re-enters here
     __ bind(unlock_done);
diff --git a/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp b/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp
index 7cdaa89f2a3..3747dc50acd 100644
--- a/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp
@@ -3235,8 +3235,7 @@ void TemplateTable::invokevirtual_helper(Register index,
   __ bind(notFinal);
 
   // get receiver klass
-  __ null_check(recv, oopDesc::klass_offset_in_bytes());
-  __ load_klass(r0, recv);
+  __ load_klass(r0, recv, true);
 
   // profile this call
   __ profile_virtual_call(r0, rlocals, r3);
@@ -3325,8 +3324,7 @@ void TemplateTable::invokeinterface(int byte_no) {
   __ tbz(r3, ConstantPoolCacheEntry::is_vfinal_shift, notVFinal);
 
   // Get receiver klass into r3 - also a null check
-  __ null_check(r2, oopDesc::klass_offset_in_bytes());
-  __ load_klass(r3, r2);
+  __ load_klass(r3, r2, true);
 
   Label subtype;
   __ check_klass_subtype(r3, r0, r4, subtype);
@@ -3342,8 +3340,7 @@ void TemplateTable::invokeinterface(int byte_no) {
 
   // Get receiver klass into r3 - also a null check
   __ restore_locals();
-  __ null_check(r2, oopDesc::klass_offset_in_bytes());
-  __ load_klass(r3, r2);
+  __ load_klass(r3, r2, true);
 
   Label no_such_method;
 
@@ -3538,12 +3535,17 @@ void TemplateTable::_new() {
     // The object is initialized before the header.  If the object size is
     // zero, go directly to the header initialization.
     __ bind(initialize_object);
-    __ sub(r3, r3, sizeof(oopDesc));
+    __ sub(r3, r3, oopDesc::base_offset_in_bytes());
     __ cbz(r3, initialize_header);
 
     // Initialize object fields
     {
-      __ add(r2, r0, sizeof(oopDesc));
+      __ add(r2, r0, oopDesc::base_offset_in_bytes());
+      if (!is_aligned(oopDesc::base_offset_in_bytes(), BytesPerLong)) {
+        __ strw(zr, Address(__ post(r2, BytesPerInt)));
+        __ sub(r3, r3, BytesPerInt);
+        __ cbz(r3, initialize_header);
+      }
       Label loop;
       __ bind(loop);
       __ str(zr, Address(__ post(r2, BytesPerLong)));
@@ -3553,15 +3555,16 @@ void TemplateTable::_new() {
 
     // initialize object header only.
     __ bind(initialize_header);
-    if (UseBiasedLocking) {
+    if (UseBiasedLocking || UseCompactObjectHeaders) {
       __ ldr(rscratch1, Address(r4, Klass::prototype_header_offset()));
     } else {
       __ mov(rscratch1, (intptr_t)markWord::prototype().value());
     }
     __ str(rscratch1, Address(r0, oopDesc::mark_offset_in_bytes()));
-    __ store_klass_gap(r0, zr);  // zero klass gap for compressed oops
-    __ store_klass(r0, r4);      // store klass last
-
+    if (!UseCompactObjectHeaders) {
+      __ store_klass_gap(r0, zr);  // zero klass gap for compressed oops
+      __ store_klass(r0, r4);      // store klass last
+    }
     {
       SkipIfEqual skip(_masm, &DTraceAllocProbes, false);
       // Trigger dtrace event for fastpath
diff --git a/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp b/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp
index 8f03b7e4437..81e1d50eb9c 100644
--- a/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -153,6 +153,7 @@ class VM_Version : public Abstract_VM_Version {
 
   static bool supports_fast_class_init_checks() { return true; }
   constexpr static bool supports_stack_watermark_barrier() { return true; }
+  constexpr static bool supports_recursive_lightweight_locking() { return true; }
 
   static void get_compatible_board(char *buf, int buflen);
 
diff --git a/src/hotspot/cpu/aarch64/vtableStubs_aarch64.cpp b/src/hotspot/cpu/aarch64/vtableStubs_aarch64.cpp
index acef8d21abc..85eb1583b57 100644
--- a/src/hotspot/cpu/aarch64/vtableStubs_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/vtableStubs_aarch64.cpp
@@ -212,7 +212,7 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) {
   const ptrdiff_t lookupSize = __ pc() - start_pc;
 
   // Reduce "estimate" such that "padding" does not drop below 8.
-  const ptrdiff_t estimate = 124;
+  const ptrdiff_t estimate = 128;
   const ptrdiff_t codesize = typecheckSize + lookupSize;
   slop_delta  = (int)(estimate - codesize);
   slop_bytes += slop_delta;
diff --git a/src/hotspot/cpu/arm/c1_CodeStubs_arm.cpp b/src/hotspot/cpu/arm/c1_CodeStubs_arm.cpp
index b0ace8d21f9..6e26ec9188a 100644
--- a/src/hotspot/cpu/arm/c1_CodeStubs_arm.cpp
+++ b/src/hotspot/cpu/arm/c1_CodeStubs_arm.cpp
@@ -243,6 +243,9 @@ void MonitorExitStub::emit_code(LIR_Assembler* ce) {
   __ b(_continuation);
 }
 
+void LoadKlassStub::emit_code(LIR_Assembler* ce) {
+  Unimplemented();  // Only needed with compact object headers.
+}
 
 // Call return is directly after patch word
 int PatchingStub::_patch_info_offset = 0;
diff --git a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp
index 1c21f835f75..38ac26be464 100644
--- a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp
+++ b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp
@@ -721,11 +721,7 @@ void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type,
       break;
 
     case T_ADDRESS:
-      if (UseCompressedClassPointers && addr->disp() == oopDesc::klass_offset_in_bytes()) {
-        __ ldr_u32(dest->as_pointer_register(), as_Address(addr));
-      } else {
-        __ ldr(dest->as_pointer_register(), as_Address(addr));
-      }
+      __ ldr(dest->as_pointer_register(), as_Address(addr));
       break;
 
     case T_INT:
@@ -2454,6 +2450,21 @@ void LIR_Assembler::emit_lock(LIR_OpLock* op) {
   __ bind(*op->stub()->continuation());
 }
 
+void LIR_Assembler::emit_load_klass(LIR_OpLoadKlass* op) {
+  Register obj = op->obj()->as_pointer_register();
+  Register result = op->result_opr()->as_pointer_register();
+
+  CodeEmitInfo* info = op->info();
+  if (info != NULL) {
+    add_debug_info_for_null_check_here(info);
+  }
+
+  if (UseCompressedClassPointers) { // On 32 bit arm??
+    __ ldr_u32(result, Address(obj, oopDesc::klass_offset_in_bytes()));
+  } else {
+    __ ldr(result, Address(obj, oopDesc::klass_offset_in_bytes()));
+  }
+}
 
 void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) {
   ciMethod* method = op->profiled_method();
diff --git a/src/hotspot/cpu/ppc/c1_CodeStubs_ppc.cpp b/src/hotspot/cpu/ppc/c1_CodeStubs_ppc.cpp
index 3d18e728363..c34daf0180b 100644
--- a/src/hotspot/cpu/ppc/c1_CodeStubs_ppc.cpp
+++ b/src/hotspot/cpu/ppc/c1_CodeStubs_ppc.cpp
@@ -317,6 +317,9 @@ void MonitorExitStub::emit_code(LIR_Assembler* ce) {
   __ b(_continuation);
 }
 
+void LoadKlassStub::emit_code(LIR_Assembler* ce) {
+  Unimplemented();  // Only needed with compact object headers.
+}
 
 // Implementation of patching:
 // - Copy the code at given offset to an inlined buffer (first the bytes, then the number of bytes).
diff --git a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp
index 2b7094f71aa..844c953e543 100644
--- a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp
+++ b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp
@@ -812,12 +812,7 @@ int LIR_Assembler::load(Register base, int offset, LIR_Opr to_reg, BasicType typ
       case T_LONG  :   __ ld(to_reg->as_register_lo(), offset, base); break;
       case T_METADATA: __ ld(to_reg->as_register(), offset, base); break;
       case T_ADDRESS:
-        if (offset == oopDesc::klass_offset_in_bytes() && UseCompressedClassPointers) {
-          __ lwz(to_reg->as_register(), offset, base);
-          __ decode_klass_not_null(to_reg->as_register());
-        } else {
-          __ ld(to_reg->as_register(), offset, base);
-        }
+        __ ld(to_reg->as_register(), offset, base);
         break;
       case T_ARRAY : // fall through
       case T_OBJECT:
@@ -2733,6 +2728,28 @@ void LIR_Assembler::emit_lock(LIR_OpLock* op) {
   __ bind(*op->stub()->continuation());
 }
 
+void LIR_Assembler::emit_load_klass(LIR_OpLoadKlass* op) {
+  Register obj = op->obj()->as_pointer_register();
+  Register result = op->result_opr()->as_pointer_register();
+
+  CodeEmitInfo* info = op->info();
+  if (info != NULL) {
+    if (info != NULL) {
+      if (!os::zero_page_read_protected() || !ImplicitNullChecks) {
+        explicit_null_check(obj, info);
+      } else {
+        add_debug_info_for_null_check_here(info);
+      }
+    }
+  }
+
+  if (UseCompressedClassPointers) {
+    __ lwz(result, oopDesc::klass_offset_in_bytes(), obj);
+    __ decode_klass_not_null(result);
+  } else {
+    __ ld(result, oopDesc::klass_offset_in_bytes(), obj);
+  }
+}
 
 void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) {
   ciMethod* method = op->profiled_method();
diff --git a/src/hotspot/cpu/ppc/c2_safepointPollStubTable_ppc.cpp b/src/hotspot/cpu/ppc/c2_CodeStubs_ppc.cpp
similarity index 80%
rename from src/hotspot/cpu/ppc/c2_safepointPollStubTable_ppc.cpp
rename to src/hotspot/cpu/ppc/c2_CodeStubs_ppc.cpp
index b65b91df1b8..f99f9e978cf 100644
--- a/src/hotspot/cpu/ppc/c2_safepointPollStubTable_ppc.cpp
+++ b/src/hotspot/cpu/ppc/c2_CodeStubs_ppc.cpp
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2021 SAP SE. All rights reserved.
+ * Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2021, 2022, SAP SE. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -24,19 +24,22 @@
  */
 
 #include "precompiled.hpp"
-#include "macroAssembler_ppc.inline.hpp"
-#include "opto/compile.hpp"
-#include "opto/node.hpp"
-#include "opto/output.hpp"
+#include "opto/c2_MacroAssembler.hpp"
+#include "opto/c2_CodeStubs.hpp"
 #include "runtime/sharedRuntime.hpp"
 
 #define __ masm.
-void C2SafepointPollStubTable::emit_stub_impl(MacroAssembler& masm, C2SafepointPollStub* entry) const {
+
+int C2SafepointPollStub::max_size() const {
+  return 56;
+}
+
+void C2SafepointPollStub::emit(C2_MacroAssembler& masm) {
   assert(SharedRuntime::polling_page_return_handler_blob() != NULL,
          "polling page return stub not created yet");
   address stub = SharedRuntime::polling_page_return_handler_blob()->entry_point();
 
-  __ bind(entry->_stub_label);
+  __ bind(entry());
   // Using pc relative address computation.
   {
     Label next_pc;
@@ -45,7 +48,7 @@ void C2SafepointPollStubTable::emit_stub_impl(MacroAssembler& masm, C2SafepointP
   }
   int current_offset = __ offset();
   // Code size should not depend on offset: see _stub_size computation in output.cpp
-  __ load_const32(R12, entry->_safepoint_offset - current_offset);
+  __ load_const32(R12, _safepoint_offset - current_offset);
   __ mflr(R0);
   __ add(R12, R12, R0);
   __ std(R12, in_bytes(JavaThread::saved_exception_pc_offset()), R16_thread);
diff --git a/src/hotspot/cpu/ppc/frame_ppc.inline.hpp b/src/hotspot/cpu/ppc/frame_ppc.inline.hpp
index 239db8224c0..876ad046a0c 100644
--- a/src/hotspot/cpu/ppc/frame_ppc.inline.hpp
+++ b/src/hotspot/cpu/ppc/frame_ppc.inline.hpp
@@ -143,7 +143,6 @@ inline intptr_t* frame::interpreter_frame_mdp_addr() const {
   return (intptr_t*) &(get_ijava_state()->mdx);
 }
 
-// Pointer beyond the "oldest/deepest" BasicObjectLock on stack.
 inline BasicObjectLock* frame::interpreter_frame_monitor_end() const {
   return (BasicObjectLock*) get_ijava_state()->monitors;
 }
diff --git a/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp b/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp
index 70466fdf3c3..fb07b3f4465 100644
--- a/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp
+++ b/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp
@@ -1933,7 +1933,7 @@ void InterpreterMacroAssembler::profile_parameters_type(Register tmp1, Register
   }
 }
 
-// Add a InterpMonitorElem to stack (see frame_sparc.hpp).
+// Add a monitor (see frame_ppc.hpp).
 void InterpreterMacroAssembler::add_monitor_to_stack(bool stack_is_empty, Register Rtemp1, Register Rtemp2) {
 
   // Very-local scratch registers.
diff --git a/src/hotspot/cpu/ppc/ppc.ad b/src/hotspot/cpu/ppc/ppc.ad
index 06711461666..1259aeb1dfa 100644
--- a/src/hotspot/cpu/ppc/ppc.ad
+++ b/src/hotspot/cpu/ppc/ppc.ad
@@ -982,6 +982,7 @@ source_hpp %{
 
 source %{
 
+#include "opto/c2_CodeStubs.hpp"
 #include "oops/klass.inline.hpp"
 
 void PhaseOutput::pd_perform_mach_node_analysis() {
@@ -1621,7 +1622,9 @@ void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
     Label dummy_label;
     Label* code_stub = &dummy_label;
     if (!UseSIGTRAP && !C->output()->in_scratch_emit_size()) {
-      code_stub = &C->output()->safepoint_poll_table()->add_safepoint(__ offset());
+      C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
+      C->output()->add_stub(stub);
+      code_stub = &stub->entry();
       __ relocate(relocInfo::poll_return_type);
     }
     __ safepoint_poll(*code_stub, temp, true /* at_return */, true /* in_nmethod */);
diff --git a/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp b/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp
index 2210e05a410..13c13fe843d 100644
--- a/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp
+++ b/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp
@@ -4045,90 +4045,78 @@ void TemplateTable::athrow() {
 // at next monitor exit.
 void TemplateTable::monitorenter() {
   transition(atos, vtos);
-
   __ verify_oop(R17_tos);
 
-  Register Rcurrent_monitor  = R11_scratch1,
-           Rcurrent_obj      = R12_scratch2,
+  Register Rcurrent_monitor  = R3_ARG1,
+           Rcurrent_obj      = R4_ARG2,
            Robj_to_lock      = R17_tos,
-           Rscratch1         = R3_ARG1,
-           Rscratch2         = R4_ARG2,
-           Rscratch3         = R5_ARG3,
-           Rcurrent_obj_addr = R6_ARG4;
+           Rscratch1         = R11_scratch1,
+           Rscratch2         = R12_scratch2,
+           Rbot              = R5_ARG3,
+           Rfree_slot        = R6_ARG4;
+
+  Label Lfound, Lallocate_new;
+
+  __ ld(Rscratch1, _abi0(callers_sp), R1_SP); // load FP
+  __ li(Rfree_slot, 0); // Points to free slot or null.
+
+  // Set up search loop - start with topmost monitor.
+  __ mr(Rcurrent_monitor, R26_monitor);
+  __ addi(Rbot, Rscratch1, -frame::ijava_state_size);
 
   // ------------------------------------------------------------------------------
   // Null pointer exception.
-  __ null_check_throw(Robj_to_lock, -1, R11_scratch1);
+  __ null_check_throw(Robj_to_lock, -1, Rscratch1);
 
-  // Try to acquire a lock on the object.
-  // Repeat until succeeded (i.e., until monitorenter returns true).
+  // Check if any slot is present => short cut to allocation if not.
+  __ cmpld(CCR0, Rcurrent_monitor, Rbot);
+  __ beq(CCR0, Lallocate_new);
 
   // ------------------------------------------------------------------------------
   // Find a free slot in the monitor block.
-  Label Lfound, Lexit, Lallocate_new;
-  ConditionRegister found_free_slot = CCR0,
-                    found_same_obj  = CCR1,
-                    reached_limit   = CCR6;
+  // Note: The order of the monitors is important for C2 OSR which derives the
+  //       unlock order from it (see comments for interpreter_frame_monitor_*).
   {
-    Label Lloop;
-    Register Rlimit = Rcurrent_monitor;
-
-    // Set up search loop - start with topmost monitor.
-    __ add(Rcurrent_obj_addr, BasicObjectLock::obj_offset_in_bytes(), R26_monitor);
+    Label Lloop, LnotFree, Lexit;
 
-    __ ld(Rlimit, 0, R1_SP);
-    __ addi(Rlimit, Rlimit, - (frame::ijava_state_size + frame::interpreter_frame_monitor_size_in_bytes() - BasicObjectLock::obj_offset_in_bytes())); // Monitor base
+    __ bind(Lloop);
+    __ ld(Rcurrent_obj, BasicObjectLock::obj_offset_in_bytes(), Rcurrent_monitor);
+    // Exit if current entry is for same object; this guarantees, that new monitor
+    // used for recursive lock is above the older one.
+    __ cmpd(CCR0, Rcurrent_obj, Robj_to_lock);
+    __ beq(CCR0, Lexit); // recursive locking
 
-    // Check if any slot is present => short cut to allocation if not.
-    __ cmpld(reached_limit, Rcurrent_obj_addr, Rlimit);
-    __ bgt(reached_limit, Lallocate_new);
+    __ cmpdi(CCR0, Rcurrent_obj, 0);
+    __ bne(CCR0, LnotFree);
+    __ mr(Rfree_slot, Rcurrent_monitor); // remember free slot closest to the bottom
+    __ bind(LnotFree);
 
-    // Pre-load topmost slot.
-    __ ld(Rcurrent_obj, 0, Rcurrent_obj_addr);
-    __ addi(Rcurrent_obj_addr, Rcurrent_obj_addr, frame::interpreter_frame_monitor_size() * wordSize);
-    // The search loop.
-    __ bind(Lloop);
-    // Found free slot?
-    __ cmpdi(found_free_slot, Rcurrent_obj, 0);
-    // Is this entry for same obj? If so, stop the search and take the found
-    // free slot or allocate a new one to enable recursive locking.
-    __ cmpd(found_same_obj, Rcurrent_obj, Robj_to_lock);
-    __ cmpld(reached_limit, Rcurrent_obj_addr, Rlimit);
-    __ beq(found_free_slot, Lexit);
-    __ beq(found_same_obj, Lallocate_new);
-    __ bgt(reached_limit, Lallocate_new);
-    // Check if last allocated BasicLockObj reached.
-    __ ld(Rcurrent_obj, 0, Rcurrent_obj_addr);
-    __ addi(Rcurrent_obj_addr, Rcurrent_obj_addr, frame::interpreter_frame_monitor_size() * wordSize);
-    // Next iteration if unchecked BasicObjectLocks exist on the stack.
-    __ b(Lloop);
+    __ addi(Rcurrent_monitor, Rcurrent_monitor, frame::interpreter_frame_monitor_size_in_bytes());
+    __ cmpld(CCR0, Rcurrent_monitor, Rbot);
+    __ bne(CCR0, Lloop);
+    __ bind(Lexit);
   }
 
   // ------------------------------------------------------------------------------
   // Check if we found a free slot.
-  __ bind(Lexit);
-
-  __ addi(Rcurrent_monitor, Rcurrent_obj_addr, -(frame::interpreter_frame_monitor_size() * wordSize) - BasicObjectLock::obj_offset_in_bytes());
-  __ addi(Rcurrent_obj_addr, Rcurrent_obj_addr, - frame::interpreter_frame_monitor_size() * wordSize);
-  __ b(Lfound);
+  __ cmpdi(CCR0, Rfree_slot, 0);
+  __ bne(CCR0, Lfound);
 
   // We didn't find a free BasicObjLock => allocate one.
-  __ align(32, 12);
   __ bind(Lallocate_new);
   __ add_monitor_to_stack(false, Rscratch1, Rscratch2);
-  __ mr(Rcurrent_monitor, R26_monitor);
-  __ addi(Rcurrent_obj_addr, R26_monitor, BasicObjectLock::obj_offset_in_bytes());
+  __ mr(Rfree_slot, R26_monitor);
 
   // ------------------------------------------------------------------------------
   // We now have a slot to lock.
   __ bind(Lfound);
 
   // Increment bcp to point to the next bytecode, so exception handling for async. exceptions work correctly.
-  // The object has already been poped from the stack, so the expression stack looks correct.
+  // The object has already been popped from the stack, so the expression stack looks correct.
   __ addi(R14_bcp, R14_bcp, 1);
 
-  __ std(Robj_to_lock, 0, Rcurrent_obj_addr);
-  __ lock_object(Rcurrent_monitor, Robj_to_lock);
+  __ std(Robj_to_lock, BasicObjectLock::obj_offset_in_bytes(), Rfree_slot);
+  __ lock_object(Rfree_slot, Robj_to_lock);
 
   // Check if there's enough space on the stack for the monitors after locking.
   // This emits a single store.
@@ -4142,46 +4130,40 @@ void TemplateTable::monitorexit() {
   transition(atos, vtos);
   __ verify_oop(R17_tos);
 
-  Register Rcurrent_monitor  = R11_scratch1,
-           Rcurrent_obj      = R12_scratch2,
+  Register Rcurrent_monitor  = R3_ARG1,
+           Rcurrent_obj      = R4_ARG2,
            Robj_to_lock      = R17_tos,
-           Rcurrent_obj_addr = R3_ARG1,
-           Rlimit            = R4_ARG2;
+           Rscratch          = R11_scratch1,
+           Rbot              = R12_scratch2;
+
   Label Lfound, Lillegal_monitor_state;
 
-  // Check corner case: unbalanced monitorEnter / Exit.
-  __ ld(Rlimit, 0, R1_SP);
-  __ addi(Rlimit, Rlimit, - (frame::ijava_state_size + frame::interpreter_frame_monitor_size_in_bytes())); // Monitor base
+  __ ld(Rscratch, _abi0(callers_sp), R1_SP); // load FP
+
+  // Set up search loop - start with topmost monitor.
+  __ mr(Rcurrent_monitor, R26_monitor);
+  __ addi(Rbot, Rscratch, -frame::ijava_state_size);
 
   // Null pointer check.
-  __ null_check_throw(Robj_to_lock, -1, R11_scratch1);
+  __ null_check_throw(Robj_to_lock, -1, Rscratch);
 
-  __ cmpld(CCR0, R26_monitor, Rlimit);
-  __ bgt(CCR0, Lillegal_monitor_state);
+  // Check corner case: unbalanced monitorEnter / Exit.
+  __ cmpld(CCR0, Rcurrent_monitor, Rbot);
+  __ beq(CCR0, Lillegal_monitor_state);
 
   // Find the corresponding slot in the monitors stack section.
   {
     Label Lloop;
 
-    // Start with topmost monitor.
-    __ addi(Rcurrent_obj_addr, R26_monitor, BasicObjectLock::obj_offset_in_bytes());
-    __ addi(Rlimit, Rlimit, BasicObjectLock::obj_offset_in_bytes());
-    __ ld(Rcurrent_obj, 0, Rcurrent_obj_addr);
-    __ addi(Rcurrent_obj_addr, Rcurrent_obj_addr, frame::interpreter_frame_monitor_size() * wordSize);
-
     __ bind(Lloop);
+    __ ld(Rcurrent_obj, BasicObjectLock::obj_offset_in_bytes(), Rcurrent_monitor);
     // Is this entry for same obj?
     __ cmpd(CCR0, Rcurrent_obj, Robj_to_lock);
     __ beq(CCR0, Lfound);
 
-    // Check if last allocated BasicLockObj reached.
-
-    __ ld(Rcurrent_obj, 0, Rcurrent_obj_addr);
-    __ cmpld(CCR0, Rcurrent_obj_addr, Rlimit);
-    __ addi(Rcurrent_obj_addr, Rcurrent_obj_addr, frame::interpreter_frame_monitor_size() * wordSize);
-
-    // Next iteration if unchecked BasicObjectLocks exist on the stack.
-    __ ble(CCR0, Lloop);
+    __ addi(Rcurrent_monitor, Rcurrent_monitor, frame::interpreter_frame_monitor_size_in_bytes());
+    __ cmpld(CCR0, Rcurrent_monitor, Rbot);
+    __ bne(CCR0, Lloop);
   }
 
   // Fell through without finding the basic obj lock => throw up!
@@ -4191,8 +4173,6 @@ void TemplateTable::monitorexit() {
 
   __ align(32, 12);
   __ bind(Lfound);
-  __ addi(Rcurrent_monitor, Rcurrent_obj_addr,
-          -(frame::interpreter_frame_monitor_size() * wordSize) - BasicObjectLock::obj_offset_in_bytes());
   __ unlock_object(Rcurrent_monitor);
 }
 
diff --git a/src/hotspot/cpu/s390/c1_CodeStubs_s390.cpp b/src/hotspot/cpu/s390/c1_CodeStubs_s390.cpp
index 329c163f313..04bc01f1458 100644
--- a/src/hotspot/cpu/s390/c1_CodeStubs_s390.cpp
+++ b/src/hotspot/cpu/s390/c1_CodeStubs_s390.cpp
@@ -270,6 +270,10 @@ void MonitorExitStub::emit_code(LIR_Assembler* ce) {
   __ branch_optimized(Assembler::bcondAlways, _continuation);
 }
 
+void LoadKlassStub::emit_code(LIR_Assembler* ce) {
+  Unimplemented();  // Only needed with compact object headers.
+}
+
 // Implementation of patching:
 // - Copy the code at given offset to an inlined buffer (first the bytes, then the number of bytes).
 // - Replace original code with a call to the stub.
diff --git a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp
index a7d5a4a1c42..eaf83d8b311 100644
--- a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp
+++ b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp
@@ -950,12 +950,7 @@ void LIR_Assembler::mem2reg(LIR_Opr src_opr, LIR_Opr dest, BasicType type, LIR_P
       }
       break;
     case T_ADDRESS:
-      if (UseCompressedClassPointers && addr->disp() == oopDesc::klass_offset_in_bytes()) {
-        __ z_llgf(dest->as_register(), disp_value, disp_reg, src);
-        __ decode_klass_not_null(dest->as_register());
-      } else {
-        __ z_lg(dest->as_register(), disp_value, disp_reg, src);
-      }
+      __ z_lg(dest->as_register(), disp_value, disp_reg, src);
       break;
     case T_ARRAY : // fall through
     case T_OBJECT:
@@ -2754,6 +2749,22 @@ void LIR_Assembler::emit_lock(LIR_OpLock* op) {
   __ bind(*op->stub()->continuation());
 }
 
+void LIR_Assembler::emit_load_klass(LIR_OpLoadKlass* op) {
+  Register obj = op->obj()->as_pointer_register();
+  Register result = op->result_opr()->as_pointer_register();
+
+  CodeEmitInfo* info = op->info();
+  if (info != NULL) {
+    add_debug_info_for_null_check_here(info);
+  }
+
+  if (UseCompressedClassPointers) {
+    __ z_llgf(result, Address(obj, oopDesc::klass_offset_in_bytes()));
+    __ decode_klass_not_null(result);
+  } else {
+    __ z_lg(result, Address(obj, oopDesc::klass_offset_in_bytes()));
+  }
+}
 void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) {
   ciMethod* method = op->profiled_method();
   int bci          = op->profiled_bci();
diff --git a/src/hotspot/cpu/x86/assembler_x86.hpp b/src/hotspot/cpu/x86/assembler_x86.hpp
index 3c72c93ad40..53a61c77478 100644
--- a/src/hotspot/cpu/x86/assembler_x86.hpp
+++ b/src/hotspot/cpu/x86/assembler_x86.hpp
@@ -216,7 +216,7 @@ class Address {
   // No default displacement otherwise Register can be implicitly
   // converted to 0(Register) which is quite a different animal.
 
-  Address(Register base, int disp)
+  explicit Address(Register base, int disp = 0)
     : _base(base),
       _index(noreg),
       _xmmindex(xnoreg),
diff --git a/src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp b/src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp
index 1ba5061a574..b886ea2642a 100644
--- a/src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp
+++ b/src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp
@@ -30,6 +30,7 @@
 #include "c1/c1_Runtime1.hpp"
 #include "classfile/javaClasses.hpp"
 #include "nativeInst_x86.hpp"
+#include "runtime/objectMonitor.hpp"
 #include "runtime/sharedRuntime.hpp"
 #include "utilities/align.hpp"
 #include "utilities/macros.hpp"
@@ -300,7 +301,6 @@ void MonitorExitStub::emit_code(LIR_Assembler* ce) {
   __ jmp(_continuation);
 }
 
-
 // Implementation of patching:
 // - Copy the code at given offset to an inlined buffer (first the bytes, then the number of bytes)
 // - Replace original code with a call to the stub
diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
index e2454f32481..a2fcddb6c82 100644
--- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
@@ -1184,7 +1184,6 @@ void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_Patch
 
   LIR_Address* addr = src->as_address_ptr();
   Address from_addr = as_Address(addr);
-  Register tmp_load_klass = LP64_ONLY(rscratch1) NOT_LP64(noreg);
 
   if (addr->base()->type() == T_OBJECT) {
     __ verify_oop(addr->base()->as_pointer_register());
@@ -1257,11 +1256,7 @@ void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_Patch
       break;
 
     case T_ADDRESS:
-      if (UseCompressedClassPointers && addr->disp() == oopDesc::klass_offset_in_bytes()) {
-        __ movl(dest->as_register(), from_addr);
-      } else {
-        __ movptr(dest->as_register(), from_addr);
-      }
+      __ movptr(dest->as_register(), from_addr);
       break;
     case T_INT:
       __ movl(dest->as_register(), from_addr);
@@ -1367,12 +1362,6 @@ void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_Patch
     if (!UseZGC) {
       __ verify_oop(dest->as_register());
     }
-  } else if (type == T_ADDRESS && addr->disp() == oopDesc::klass_offset_in_bytes()) {
-#ifdef _LP64
-    if (UseCompressedClassPointers) {
-      __ decode_klass_not_null(dest->as_register(), tmp_load_klass);
-    }
-#endif
   }
 }
 
@@ -1654,7 +1643,7 @@ void LIR_Assembler::emit_alloc_array(LIR_OpAllocArray* op) {
                       len,
                       tmp1,
                       tmp2,
-                      arrayOopDesc::header_size(op->type()),
+                      arrayOopDesc::base_offset_in_bytes(op->type()),
                       array_element_size(op->type()),
                       op->klass()->as_register(),
                       *op->stub()->entry());
@@ -3079,6 +3068,7 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) {
   Register length  = op->length()->as_register();
   Register tmp = op->tmp()->as_register();
   Register tmp_load_klass = LP64_ONLY(rscratch1) NOT_LP64(noreg);
+  Register tmp2 = UseCompactObjectHeaders ? rscratch2 : noreg;
 
   CodeStub* stub = op->stub();
   int flags = op->flags();
@@ -3270,13 +3260,7 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) {
     // We don't know the array types are compatible
     if (basic_type != T_OBJECT) {
       // Simple test for basic type arrays
-      if (UseCompressedClassPointers) {
-        __ movl(tmp, src_klass_addr);
-        __ cmpl(tmp, dst_klass_addr);
-      } else {
-        __ movptr(tmp, src_klass_addr);
-        __ cmpptr(tmp, dst_klass_addr);
-      }
+      __ cmp_klass(src, dst, tmp, tmp2);
       __ jcc(Assembler::notEqual, *stub->entry());
     } else {
       // For object arrays, if src is a sub class of dst then we can
@@ -3436,18 +3420,13 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) {
       __ encode_klass_not_null(tmp, rscratch1);
     }
 #endif
-
     if (basic_type != T_OBJECT) {
-
-      if (UseCompressedClassPointers)          __ cmpl(tmp, dst_klass_addr);
-      else                   __ cmpptr(tmp, dst_klass_addr);
+      __ cmp_klass(tmp, dst, tmp2);
       __ jcc(Assembler::notEqual, halt);
-      if (UseCompressedClassPointers)          __ cmpl(tmp, src_klass_addr);
-      else                   __ cmpptr(tmp, src_klass_addr);
+      __ cmp_klass(tmp, src, tmp2);
       __ jcc(Assembler::equal, known_ok);
     } else {
-      if (UseCompressedClassPointers)          __ cmpl(tmp, dst_klass_addr);
-      else                   __ cmpptr(tmp, dst_klass_addr);
+      __ cmp_klass(tmp, dst, tmp2);
       __ jcc(Assembler::equal, known_ok);
       __ cmpptr(src, dst);
       __ jcc(Assembler::equal, known_ok);
@@ -3509,11 +3488,11 @@ void LIR_Assembler::emit_lock(LIR_OpLock* op) {
   Register obj = op->obj_opr()->as_register();  // may not be an oop
   Register hdr = op->hdr_opr()->as_register();
   Register lock = op->lock_opr()->as_register();
-  if (!UseFastLocking) {
+  if (LockingMode == LM_MONITOR) {
     __ jmp(*op->stub()->entry());
   } else if (op->code() == lir_lock) {
     Register scratch = noreg;
-    if (UseBiasedLocking) {
+    if (UseBiasedLocking || LockingMode == LM_LIGHTWEIGHT) {
       scratch = op->scratch_opr()->as_register();
     }
     assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header");
@@ -3532,6 +3511,34 @@ void LIR_Assembler::emit_lock(LIR_OpLock* op) {
   __ bind(*op->stub()->continuation());
 }
 
+void LIR_Assembler::emit_load_klass(LIR_OpLoadKlass* op) {
+  Register obj = op->obj()->as_pointer_register();
+  Register result = op->result_opr()->as_pointer_register();
+
+  CodeEmitInfo* info = op->info();
+  if (info != NULL) {
+    add_debug_info_for_null_check_here(info);
+  }
+
+#ifdef _LP64
+  if (UseCompactObjectHeaders) {
+    Register tmp = rscratch1;
+    assert_different_registers(tmp, obj);
+    assert_different_registers(tmp, result);
+
+    // Check if we can take the (common) fast path, if obj is unlocked.
+    __ movq(result, Address(obj, oopDesc::mark_offset_in_bytes()));
+    __ shrq(result, markWord::klass_shift);
+    __ decode_klass_not_null(result, tmp);
+  } else if (UseCompressedClassPointers) {
+    __ movl(result, Address(obj, oopDesc::klass_offset_in_bytes()));
+    __ decode_klass_not_null(result, rscratch1);
+  } else
+#endif
+  {
+    __ movptr(result, Address(obj, oopDesc::klass_offset_in_bytes()));
+  }
+}
 
 void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) {
   ciMethod* method = op->profiled_method();
@@ -3636,12 +3643,9 @@ void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) {
       __ orptr(mdo_addr, TypeEntries::null_seen);
     }
     if (do_update) {
-#ifndef ASSERT
-      __ jmpb(next);
-    }
-#else
       __ jmp(next);
     }
+#ifdef ASSERT
   } else {
     __ testptr(tmp, tmp);
     __ jcc(Assembler::notZero, update);
diff --git a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp
index b99f16fea05..d28bbf76fcc 100644
--- a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp
+++ b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp
@@ -314,7 +314,7 @@ void LIRGenerator::do_MonitorEnter(MonitorEnter* x) {
   LIR_Opr lock = new_register(T_INT);
   // Need a scratch register for biased locking on x86
   LIR_Opr scratch = LIR_OprFact::illegalOpr;
-  if (UseBiasedLocking) {
+  if (UseBiasedLocking || LockingMode == LM_LIGHTWEIGHT) {
     scratch = new_register(T_INT);
   }
 
diff --git a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp
index b022f11990c..7e84f8c0da2 100644
--- a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -34,17 +34,18 @@
 #include "oops/markWord.hpp"
 #include "runtime/basicLock.hpp"
 #include "runtime/biasedLocking.hpp"
+#include "runtime/globals.hpp"
 #include "runtime/os.hpp"
 #include "runtime/sharedRuntime.hpp"
 #include "runtime/stubRoutines.hpp"
+#include "utilities/globalDefinitions.hpp"
 
 int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Register scratch, Label& slow_case) {
   const Register rklass_decode_tmp = LP64_ONLY(rscratch1) NOT_LP64(noreg);
   const int aligned_mask = BytesPerWord -1;
   const int hdr_offset = oopDesc::mark_offset_in_bytes();
   assert(hdr == rax, "hdr must be rax, for the cmpxchg instruction");
-  assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different");
-  Label done;
+  assert_different_registers(hdr, obj, disp_hdr, scratch);
   int null_check_offset = -1;
 
   verify_oop(obj);
@@ -61,50 +62,64 @@ int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr
     jcc(Assembler::notZero, slow_case);
   }
 
-  if (UseBiasedLocking) {
-    assert(scratch != noreg, "should have scratch register at this point");
-    biased_locking_enter(disp_hdr, obj, hdr, scratch, rklass_decode_tmp, false, done, &slow_case);
-  }
+  if (LockingMode == LM_LIGHTWEIGHT) {
+#ifdef _LP64
+    const Register thread = r15_thread;
+    lightweight_lock(disp_hdr, obj, hdr, thread, scratch, slow_case);
+#else
+    // Implicit null check.
+    movptr(hdr, Address(obj, oopDesc::mark_offset_in_bytes()));
+    // Lacking registers and thread on x86_32. Always take slow path.
+    jmp(slow_case);
+#endif
+  } else {
+    Label done;
+
+    if (UseBiasedLocking) {
+      assert(scratch != noreg, "should have scratch register at this point");
+      biased_locking_enter(disp_hdr, obj, hdr, scratch, rklass_decode_tmp, false, done, &slow_case);
+    }
 
-  // Load object header
-  movptr(hdr, Address(obj, hdr_offset));
-  // and mark it as unlocked
-  orptr(hdr, markWord::unlocked_value);
-  // save unlocked object header into the displaced header location on the stack
-  movptr(Address(disp_hdr, 0), hdr);
-  // test if object header is still the same (i.e. unlocked), and if so, store the
-  // displaced header address in the object header - if it is not the same, get the
-  // object header instead
-  MacroAssembler::lock(); // must be immediately before cmpxchg!
-  cmpxchgptr(disp_hdr, Address(obj, hdr_offset));
-  // if the object header was the same, we're done
-  if (PrintBiasedLockingStatistics) {
-    cond_inc32(Assembler::equal,
-               ExternalAddress((address)BiasedLocking::fast_path_entry_count_addr()));
+    // Load object header
+    movptr(hdr, Address(obj, hdr_offset));
+    // and mark it as unlocked
+    orptr(hdr, markWord::unlocked_value);
+    // save unlocked object header into the displaced header location on the stack
+    movptr(Address(disp_hdr, 0), hdr);
+    // test if object header is still the same (i.e. unlocked), and if so, store the
+    // displaced header address in the object header - if it is not the same, get the
+    // object header instead
+    MacroAssembler::lock(); // must be immediately before cmpxchg!
+    cmpxchgptr(disp_hdr, Address(obj, hdr_offset));
+    // if the object header was the same, we're done
+    if (PrintBiasedLockingStatistics) {
+      cond_inc32(Assembler::equal,
+                 ExternalAddress((address)BiasedLocking::fast_path_entry_count_addr()));
+    }
+    jcc(Assembler::equal, done);
+    // if the object header was not the same, it is now in the hdr register
+    // => test if it is a stack pointer into the same stack (recursive locking), i.e.:
+    //
+    // 1) (hdr & aligned_mask) == 0
+    // 2) rsp <= hdr
+    // 3) hdr <= rsp + page_size
+    //
+    // these 3 tests can be done by evaluating the following expression:
+    //
+    // (hdr - rsp) & (aligned_mask - page_size)
+    //
+    // assuming both the stack pointer and page_size have their least
+    // significant 2 bits cleared and page_size is a power of 2
+    subptr(hdr, rsp);
+    andptr(hdr, aligned_mask - os::vm_page_size());
+    // for recursive locking, the result is zero => save it in the displaced header
+    // location (NULL in the displaced hdr location indicates recursive locking)
+    movptr(Address(disp_hdr, 0), hdr);
+    // otherwise we don't care about the result and handle locking via runtime call
+    jcc(Assembler::notZero, slow_case);
+    // done
+    bind(done);
   }
-  jcc(Assembler::equal, done);
-  // if the object header was not the same, it is now in the hdr register
-  // => test if it is a stack pointer into the same stack (recursive locking), i.e.:
-  //
-  // 1) (hdr & aligned_mask) == 0
-  // 2) rsp <= hdr
-  // 3) hdr <= rsp + page_size
-  //
-  // these 3 tests can be done by evaluating the following expression:
-  //
-  // (hdr - rsp) & (aligned_mask - page_size)
-  //
-  // assuming both the stack pointer and page_size have their least
-  // significant 2 bits cleared and page_size is a power of 2
-  subptr(hdr, rsp);
-  andptr(hdr, aligned_mask - os::vm_page_size());
-  // for recursive locking, the result is zero => save it in the displaced header
-  // location (NULL in the displaced hdr location indicates recursive locking)
-  movptr(Address(disp_hdr, 0), hdr);
-  // otherwise we don't care about the result and handle locking via runtime call
-  jcc(Assembler::notZero, slow_case);
-  // done
-  bind(done);
   return null_check_offset;
 }
 
@@ -114,35 +129,47 @@ void C1_MacroAssembler::unlock_object(Register hdr, Register obj, Register disp_
   const int hdr_offset = oopDesc::mark_offset_in_bytes();
   assert(disp_hdr == rax, "disp_hdr must be rax, for the cmpxchg instruction");
   assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different");
-  Label done;
 
-  if (UseBiasedLocking) {
-    // load object
+  if (LockingMode == LM_LIGHTWEIGHT) {
     movptr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
-    biased_locking_exit(obj, hdr, done);
-  }
+    verify_oop(obj);
+#ifdef _LP64
+    lightweight_unlock(obj, disp_hdr, r15_thread, hdr, slow_case);
+#else
+    // Lacking registers and thread on x86_32. Always take slow path.
+    jmp(slow_case);
+#endif
+  } else {
+    Label done;
 
-  // load displaced header
-  movptr(hdr, Address(disp_hdr, 0));
-  // if the loaded hdr is NULL we had recursive locking
-  testptr(hdr, hdr);
-  // if we had recursive locking, we are done
-  jcc(Assembler::zero, done);
-  if (!UseBiasedLocking) {
-    // load object
-    movptr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
+    if (UseBiasedLocking) {
+      // load object
+      movptr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
+      biased_locking_exit(obj, hdr, done);
+    }
+
+    // load displaced header
+    movptr(hdr, Address(disp_hdr, 0));
+    // if the loaded hdr is NULL we had recursive locking
+    testptr(hdr, hdr);
+    // if we had recursive locking, we are done
+    jcc(Assembler::zero, done);
+    if (!UseBiasedLocking) {
+      // load object
+      movptr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
+    }
+    verify_oop(obj);
+    // test if object header is pointing to the displaced header, and if so, restore
+    // the displaced header in the object - if the object header is not pointing to
+    // the displaced header, get the object header instead
+    MacroAssembler::lock(); // must be immediately before cmpxchg!
+    cmpxchgptr(hdr, Address(obj, hdr_offset));
+    // if the object header was not pointing to the displaced header,
+    // we do unlocking via runtime call
+    jcc(Assembler::notEqual, slow_case);
+    // done
+    bind(done);
   }
-  verify_oop(obj);
-  // test if object header is pointing to the displaced header, and if so, restore
-  // the displaced header in the object - if the object header is not pointing to
-  // the displaced header, get the object header instead
-  MacroAssembler::lock(); // must be immediately before cmpxchg!
-  cmpxchgptr(hdr, Address(obj, hdr_offset));
-  // if the object header was not pointing to the displaced header,
-  // we do unlocking via runtime call
-  jcc(Assembler::notEqual, slow_case);
-  // done
-  bind(done);
 }
 
 
@@ -157,32 +184,31 @@ void C1_MacroAssembler::try_allocate(Register obj, Register var_size_in_bytes, i
 
 
 void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len, Register t1, Register t2) {
-  assert_different_registers(obj, klass, len);
-  Register tmp_encode_klass = LP64_ONLY(rscratch1) NOT_LP64(noreg);
-  if (UseBiasedLocking && !len->is_valid()) {
-    assert_different_registers(obj, klass, len, t1, t2);
+  assert_different_registers(obj, klass, len, t1, t2);
+  if (UseCompactObjectHeaders || (UseBiasedLocking && !len->is_valid())) {
     movptr(t1, Address(klass, Klass::prototype_header_offset()));
     movptr(Address(obj, oopDesc::mark_offset_in_bytes()), t1);
   } else {
-    // This assumes that all prototype bits fit in an int32_t
-    movptr(Address(obj, oopDesc::mark_offset_in_bytes ()), (int32_t)(intptr_t)markWord::prototype().value());
+    movptr(Address(obj, oopDesc::mark_offset_in_bytes()), checked_cast<int32_t>(markWord::prototype().value()));
   }
+
+  if (!UseCompactObjectHeaders) {
 #ifdef _LP64
-  if (UseCompressedClassPointers) { // Take care not to kill klass
-    movptr(t1, klass);
-    encode_klass_not_null(t1, tmp_encode_klass);
-    movl(Address(obj, oopDesc::klass_offset_in_bytes()), t1);
-  } else
+    if (UseCompressedClassPointers) { // Take care not to kill klass
+      movptr(t1, klass);
+      encode_klass_not_null(t1, rscratch1);
+      movl(Address(obj, oopDesc::klass_offset_in_bytes()), t1);
+    } else
 #endif
-  {
-    movptr(Address(obj, oopDesc::klass_offset_in_bytes()), klass);
+    {
+      movptr(Address(obj, oopDesc::klass_offset_in_bytes()), klass);
+    }
   }
-
   if (len->is_valid()) {
     movl(Address(obj, arrayOopDesc::length_offset_in_bytes()), len);
   }
 #ifdef _LP64
-  else if (UseCompressedClassPointers) {
+  else if (UseCompressedClassPointers && !UseCompactObjectHeaders) {
     xorptr(t1, t1);
     store_klass_gap(obj, t1);
   }
@@ -225,30 +251,31 @@ void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register
     const Register t1_zero = t1;
     const Register index = t2;
     const int threshold = 6 * BytesPerWord;   // approximate break even point for code size (see comments below)
+    int hdr_size_aligned = align_up(hdr_size_in_bytes, BytesPerWord); // klass gap is already cleared by init_header().
     if (var_size_in_bytes != noreg) {
       mov(index, var_size_in_bytes);
-      initialize_body(obj, index, hdr_size_in_bytes, t1_zero);
+      initialize_body(obj, index, hdr_size_aligned, t1_zero);
     } else if (con_size_in_bytes <= threshold) {
       // use explicit null stores
       // code size = 2 + 3*n bytes (n = number of fields to clear)
       xorptr(t1_zero, t1_zero); // use t1_zero reg to clear memory (shorter code)
-      for (int i = hdr_size_in_bytes; i < con_size_in_bytes; i += BytesPerWord)
+      for (int i = hdr_size_aligned; i < con_size_in_bytes; i += BytesPerWord)
         movptr(Address(obj, i), t1_zero);
-    } else if (con_size_in_bytes > hdr_size_in_bytes) {
+    } else if (con_size_in_bytes > hdr_size_aligned) {
       // use loop to null out the fields
       // code size = 16 bytes for even n (n = number of fields to clear)
       // initialize last object field first if odd number of fields
       xorptr(t1_zero, t1_zero); // use t1_zero reg to clear memory (shorter code)
       movptr(index, (con_size_in_bytes - hdr_size_in_bytes) >> 3);
       // initialize last object field if constant size is odd
-      if (((con_size_in_bytes - hdr_size_in_bytes) & 4) != 0)
+      if (((con_size_in_bytes - hdr_size_aligned) & 4) != 0)
         movptr(Address(obj, con_size_in_bytes - (1*BytesPerWord)), t1_zero);
       // initialize remaining object fields: rdx is a multiple of 2
       { Label loop;
         bind(loop);
-        movptr(Address(obj, index, Address::times_8, hdr_size_in_bytes - (1*BytesPerWord)),
+        movptr(Address(obj, index, Address::times_8, hdr_size_aligned - (1*BytesPerWord)),
                t1_zero);
-        NOT_LP64(movptr(Address(obj, index, Address::times_8, hdr_size_in_bytes - (2*BytesPerWord)),
+        NOT_LP64(movptr(Address(obj, index, Address::times_8, hdr_size_aligned - (2*BytesPerWord)),
                t1_zero);)
         decrement(index);
         jcc(Assembler::notZero, loop);
@@ -264,7 +291,7 @@ void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register
   verify_oop(obj);
 }
 
-void C1_MacroAssembler::allocate_array(Register obj, Register len, Register t1, Register t2, int header_size, Address::ScaleFactor f, Register klass, Label& slow_case) {
+void C1_MacroAssembler::allocate_array(Register obj, Register len, Register t1, Register t2, int base_offset_in_bytes, Address::ScaleFactor f, Register klass, Label& slow_case) {
   assert(obj == rax, "obj must be in rax, for cmpxchg");
   assert_different_registers(obj, len, t1, t2, klass);
 
@@ -277,7 +304,7 @@ void C1_MacroAssembler::allocate_array(Register obj, Register len, Register t1,
 
   const Register arr_size = t2; // okay to be the same
   // align object end
-  movptr(arr_size, (int32_t)header_size * BytesPerWord + MinObjAlignmentInBytesMask);
+  movptr(arr_size, (int32_t)base_offset_in_bytes + MinObjAlignmentInBytesMask);
   lea(arr_size, Address(arr_size, len, f));
   andptr(arr_size, ~MinObjAlignmentInBytesMask);
 
@@ -287,7 +314,7 @@ void C1_MacroAssembler::allocate_array(Register obj, Register len, Register t1,
 
   // clear rest of allocated space
   const Register len_zero = len;
-  initialize_body(obj, arr_size, header_size * BytesPerWord, len_zero);
+  initialize_body(obj, arr_size, base_offset_in_bytes, len_zero);
 
   if (CURRENT_ENV->dtrace_alloc_probes()) {
     assert(obj == rax, "must be");
diff --git a/src/hotspot/cpu/x86/c2_CodeStubs_x86.cpp b/src/hotspot/cpu/x86/c2_CodeStubs_x86.cpp
new file mode 100644
index 00000000000..e8c44d90b45
--- /dev/null
+++ b/src/hotspot/cpu/x86/c2_CodeStubs_x86.cpp
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2020, 2024, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "opto/c2_MacroAssembler.hpp"
+#include "opto/c2_CodeStubs.hpp"
+#include "runtime/objectMonitor.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+
+#define __ masm.
+
+int C2SafepointPollStub::max_size() const {
+  return 33;
+}
+
+void C2SafepointPollStub::emit(C2_MacroAssembler& masm) {
+  assert(SharedRuntime::polling_page_return_handler_blob() != NULL,
+         "polling page return stub not created yet");
+  address stub = SharedRuntime::polling_page_return_handler_blob()->entry_point();
+
+  RuntimeAddress callback_addr(stub);
+
+  __ bind(entry());
+  InternalAddress safepoint_pc(masm.pc() - masm.offset() + _safepoint_offset);
+#ifdef _LP64
+  __ lea(rscratch1, safepoint_pc);
+  __ movptr(Address(r15_thread, JavaThread::saved_exception_pc_offset()), rscratch1);
+#else
+  const Register tmp1 = rcx;
+  const Register tmp2 = rdx;
+  __ push(tmp1);
+  __ push(tmp2);
+
+  __ lea(tmp1, safepoint_pc);
+  __ get_thread(tmp2);
+  __ movptr(Address(tmp2, JavaThread::saved_exception_pc_offset()), tmp1);
+
+  __ pop(tmp2);
+  __ pop(tmp1);
+#endif
+  __ jump(callback_addr);
+}
+
+int C2FastUnlockLightweightStub::max_size() const {
+  return 128;
+}
+
+void C2FastUnlockLightweightStub::emit(C2_MacroAssembler& masm) {
+  assert(_t == rax, "must be");
+
+  Label slow_path;
+
+  { // Restore lock-stack and handle the unlock in runtime.
+
+    __ bind(_push_and_slow_path);
+#ifdef ASSERT
+    // The obj was only cleared in debug.
+    __ movl(_t, Address(_thread, JavaThread::lock_stack_top_offset()));
+    __ movptr(Address(_thread, _t), _obj);
+#endif
+    __ addl(Address(_thread, JavaThread::lock_stack_top_offset()), oopSize);
+  }
+
+  { // Slow path.
+
+    __ bind(slow_path);
+    __ bind(_slow_path);
+    // Clear ZF.
+    __ testptr(_thread, _thread);
+    __ jmp(slow_path_continuation());
+  }
+
+  { // Handle monitor medium path.
+
+    __ bind(_check_successor);
+
+    Label fix_zf_and_unlocked;
+    const Register monitor = _mark;
+
+#ifndef _LP64
+    __ jmpb(slow_path);
+#else // _LP64
+    const ByteSize monitor_tag = in_ByteSize(UseObjectMonitorTable ? 0 : checked_cast<int>(markWord::monitor_value));
+    const Address succ_address{monitor, ObjectMonitor::succ_offset() - monitor_tag};
+    const Address owner_address{monitor, ObjectMonitor::owner_offset() - monitor_tag};
+
+    // successor null check.
+    __ cmpptr(succ_address, NULL_WORD);
+    __ jccb(Assembler::equal, slow_path);
+
+    // Release lock.
+    __ movptr(owner_address, NULL_WORD);
+
+    // Fence.
+    // Instead of MFENCE we use a dummy locked add of 0 to the top-of-stack.
+    __ lock(); __ addl(Address(rsp, 0), 0);
+
+    // Recheck successor.
+    __ cmpptr(succ_address, NULL_WORD);
+    // Observed a successor after the release -> fence we have handed off the monitor
+    __ jccb(Assembler::notEqual, fix_zf_and_unlocked);
+
+    // Try to relock, if it fails the monitor has been handed over
+    // TODO: Caveat, this may fail due to deflation, which does
+    //       not handle the monitor handoff. Currently only works
+    //       due to the responsible thread.
+    __ xorptr(rax, rax);
+    __ lock(); __ cmpxchgptr(_thread, owner_address);
+    __ jccb  (Assembler::equal, slow_path);
+#endif
+
+    __ bind(fix_zf_and_unlocked);
+    __ xorl(rax, rax);
+    __ jmp(unlocked_continuation());
+  }
+}
+
+#undef __
diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
index 579183c9303..a5cbedc6f83 100644
--- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
@@ -28,13 +28,19 @@
 #include "asm/assembler.hpp"
 #include "asm/assembler.inline.hpp"
 #include "oops/methodData.hpp"
+#include "opto/c2_CodeStubs.hpp"
 #include "opto/c2_MacroAssembler.hpp"
 #include "opto/intrinsicnode.hpp"
 #include "opto/opcodes.hpp"
+#include "opto/output.hpp"
 #include "opto/subnode.hpp"
 #include "runtime/biasedLocking.hpp"
+#include "runtime/globals.hpp"
 #include "runtime/objectMonitor.hpp"
 #include "runtime/stubRoutines.hpp"
+#include "utilities/globalDefinitions.hpp"
+#include "utilities/powerOfTwo.hpp"
+#include "utilities/sizes.hpp"
 
 inline Assembler::AvxVectorLen C2_MacroAssembler::vector_length_encoding(int vlen_in_bytes) {
   switch (vlen_in_bytes) {
@@ -448,12 +454,13 @@ void C2_MacroAssembler::rtm_inflated_locking(Register objReg, Register boxReg, R
 // rax,: tmp -- KILLED
 // scr: tmp -- KILLED
 void C2_MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg,
-                                 Register scrReg, Register cx1Reg, Register cx2Reg,
+                                 Register scrReg, Register cx1Reg, Register cx2Reg, Register thread,
                                  BiasedLockingCounters* counters,
                                  RTMLockingCounters* rtm_counters,
                                  RTMLockingCounters* stack_rtm_counters,
                                  Metadata* method_data,
                                  bool use_rtm, bool profile_rtm) {
+  assert(LockingMode != LM_LIGHTWEIGHT, "lightweight locking should use fast_lock_lightweight");
   // Ensure the register assignments are disjoint
   assert(tmpReg == rax, "");
 
@@ -515,29 +522,35 @@ void C2_MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmp
 
   movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes()));          // [FETCH]
   testptr(tmpReg, markWord::monitor_value); // inflated vs stack-locked|neutral|biased
-  jccb(Assembler::notZero, IsInflated);
+  jcc(Assembler::notZero, IsInflated);
 
-  // Attempt stack-locking ...
-  orptr (tmpReg, markWord::unlocked_value);
-  movptr(Address(boxReg, 0), tmpReg);          // Anticipate successful CAS
-  lock();
-  cmpxchgptr(boxReg, Address(objReg, oopDesc::mark_offset_in_bytes()));      // Updates tmpReg
-  if (counters != NULL) {
-    cond_inc32(Assembler::equal,
-               ExternalAddress((address)counters->fast_path_entry_count_addr()));
-  }
-  jcc(Assembler::equal, DONE_LABEL);           // Success
-
-  // Recursive locking.
-  // The object is stack-locked: markword contains stack pointer to BasicLock.
-  // Locked by current thread if difference with current SP is less than one page.
-  subptr(tmpReg, rsp);
-  // Next instruction set ZFlag == 1 (Success) if difference is less then one page.
-  andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())) );
-  movptr(Address(boxReg, 0), tmpReg);
-  if (counters != NULL) {
-    cond_inc32(Assembler::equal,
-               ExternalAddress((address)counters->fast_path_entry_count_addr()));
+  if (LockingMode == LM_MONITOR) {
+    // Clear ZF so that we take the slow path at the DONE label. objReg is known to be not 0.
+    testptr(objReg, objReg);
+  } else {
+    assert(LockingMode == LM_LEGACY, "must be");
+    // Attempt stack-locking ...
+    orptr (tmpReg, markWord::unlocked_value);
+    movptr(Address(boxReg, 0), tmpReg);          // Anticipate successful CAS
+    lock();
+    cmpxchgptr(boxReg, Address(objReg, oopDesc::mark_offset_in_bytes()));      // Updates tmpReg
+    if (counters != NULL) {
+      cond_inc32(Assembler::equal,
+                 ExternalAddress((address)counters->fast_path_entry_count_addr()));
+    }
+    jcc(Assembler::equal, DONE_LABEL);           // Success
+
+    // Recursive locking.
+    // The object is stack-locked: markword contains stack pointer to BasicLock.
+    // Locked by current thread if difference with current SP is less than one page.
+    subptr(tmpReg, rsp);
+    // Next instruction set ZFlag == 1 (Success) if difference is less then one page.
+    andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())) );
+    movptr(Address(boxReg, 0), tmpReg);
+    if (counters != NULL) {
+      cond_inc32(Assembler::equal,
+                 ExternalAddress((address)counters->fast_path_entry_count_addr()));
+    }
   }
   jmp(DONE_LABEL);
 
@@ -661,6 +674,7 @@ void C2_MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmp
 // Xcheck:jni is enabled.
 
 void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg, bool use_rtm) {
+  assert(LockingMode != LM_LIGHTWEIGHT, "lightweight locking should use fast_unlock_lightweight");
   assert(boxReg == rax, "");
   assert_different_registers(objReg, boxReg, tmpReg);
 
@@ -686,13 +700,18 @@ void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register t
   }
 #endif
 
-  cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD);                   // Examine the displaced header
-  jcc   (Assembler::zero, DONE_LABEL);                              // 0 indicates recursive stack-lock
+  if (LockingMode == LM_LEGACY) {
+    cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD);                   // Examine the displaced header
+    jcc   (Assembler::zero, DONE_LABEL);                              // 0 indicates recursive stack-lock
+  }
   movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Examine the object's markword
-  testptr(tmpReg, markWord::monitor_value);                         // Inflated?
-  jccb  (Assembler::zero, Stacked);
+  if (LockingMode != LM_MONITOR) {
+    testptr(tmpReg, markWord::monitor_value);                         // Inflated?
+    jcc(Assembler::zero, Stacked);
+  }
 
   // It's inflated.
+
 #if INCLUDE_RTM_OPT
   if (use_rtm) {
     Label L_regular_inflated_unlock;
@@ -701,7 +720,7 @@ void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register t
     testptr(boxReg, boxReg);
     jccb(Assembler::notZero, L_regular_inflated_unlock);
     xend();
-    jmpb(DONE_LABEL);
+    jmp(DONE_LABEL);
     bind(L_regular_inflated_unlock);
   }
 #endif
@@ -736,19 +755,10 @@ void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register t
   jccb  (Assembler::notZero, DONE_LABEL);
   movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
   orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
-  jccb  (Assembler::notZero, CheckSucc);
+  jccb  (Assembler::notZero, DONE_LABEL);
   movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
   jmpb  (DONE_LABEL);
 
-  bind (Stacked);
-  // It's not inflated and it's not recursively stack-locked and it's not biased.
-  // It must be stack-locked.
-  // Try to reset the header to displaced header.
-  // The "box" value on the stack is stable, so we can reload
-  // and be assured we observe the same value as above.
-  movptr(tmpReg, Address(boxReg, 0));
-  lock();
-  cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
   // Intention fall-thru into DONE_LABEL
 
   // DONE_LABEL is a hot target - we'd really like to place it at the
@@ -832,15 +842,331 @@ void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register t
   testl (boxReg, 0);                      // set ICC.ZF=1 to indicate success
   jmpb  (DONE_LABEL);
 
-  bind  (Stacked);
-  movptr(tmpReg, Address (boxReg, 0));      // re-fetch
-  lock();
-  cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
-
 #endif
+  if (LockingMode == LM_LEGACY) {
+    bind  (Stacked);
+    movptr(tmpReg, Address (boxReg, 0));      // re-fetch
+    lock();
+    cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
+    // Intentional fall-thru into DONE_LABEL
+  }
+
   bind(DONE_LABEL);
 }
 
+void C2_MacroAssembler::fast_lock_lightweight(Register obj, Register box, Register rax_reg,
+                                              Register t, Register thread) {
+  assert(LockingMode == LM_LIGHTWEIGHT, "must be");
+  assert(rax_reg == rax, "Used for CAS");
+  assert_different_registers(obj, box, rax_reg, t, thread);
+
+  // Handle inflated monitor.
+  Label inflated;
+  // Finish fast lock successfully. ZF value is irrelevant.
+  Label locked;
+  // Finish fast lock unsuccessfully. MUST jump with ZF == 0
+  Label slow_path;
+
+  if (UseObjectMonitorTable) {
+    // Clear cache in case fast locking succeeds.
+    movptr(Address(box, BasicLock::object_monitor_cache_offset_in_bytes()), 0);
+  }
+
+  if (DiagnoseSyncOnValueBasedClasses != 0) {
+    load_klass(rax_reg, obj, t);
+    movl(rax_reg, Address(rax_reg, Klass::access_flags_offset()));
+    testl(rax_reg, JVM_ACC_IS_VALUE_BASED_CLASS);
+    jcc(Assembler::notZero, slow_path);
+  }
+
+  const Register mark = t;
+
+  { // Lightweight Lock
+
+    Label push;
+
+    const Register top = UseObjectMonitorTable ? rax_reg : box;
+
+    // Load the mark.
+    movptr(mark, Address(obj, oopDesc::mark_offset_in_bytes()));
+
+    // Prefetch top.
+    movl(top, Address(thread, JavaThread::lock_stack_top_offset()));
+
+    // Check for monitor (0b10).
+    testptr(mark, markWord::monitor_value);
+    jcc(Assembler::notZero, inflated);
+
+    // Check if lock-stack is full.
+    cmpl(top, LockStack::end_offset() - 1);
+    jcc(Assembler::greater, slow_path);
+
+    // Check if recursive.
+    cmpptr(obj, Address(thread, top, Address::times_1, -oopSize));
+    jccb(Assembler::equal, push);
+
+    // Try to lock. Transition lock bits 0b01 => 0b00
+    movptr(rax_reg, mark);
+    orptr(rax_reg, markWord::unlocked_value);
+    andptr(mark, ~(int32_t)markWord::unlocked_value);
+    lock(); cmpxchgptr(mark, Address(obj, oopDesc::mark_offset_in_bytes()));
+    jcc(Assembler::notEqual, slow_path);
+
+    if (UseObjectMonitorTable) {
+      // Need to reload top, clobbered by CAS.
+      movl(top, Address(thread, JavaThread::lock_stack_top_offset()));
+    }
+    bind(push);
+    // After successful lock, push object on lock-stack.
+    movptr(Address(thread, top), obj);
+    addl(Address(thread, JavaThread::lock_stack_top_offset()), oopSize);
+    jmpb(locked);
+  }
+
+  { // Handle inflated monitor.
+    bind(inflated);
+
+    const Register monitor = t;
+
+    if (!UseObjectMonitorTable) {
+      assert(mark == monitor, "should be the same here");
+    } else {
+      // Uses ObjectMonitorTable.  Look for the monitor in the om_cache.
+      // Fetch ObjectMonitor* from the cache or take the slow-path.
+      Label monitor_found;
+
+      // Load cache address
+      lea(t, Address(thread, JavaThread::om_cache_oops_offset()));
+
+      const int num_unrolled = 2;
+      for (int i = 0; i < num_unrolled; i++) {
+        cmpptr(obj, Address(t));
+        jccb(Assembler::equal, monitor_found);
+        if (i + 1 != num_unrolled) {
+          increment(t, in_bytes(OMCache::oop_to_oop_difference()));
+        }
+      }
+
+      // Loop after unrolling, advance iterator.
+      increment(t, in_bytes(OMCache::oop_to_oop_difference()));
+
+      Label loop;
+
+      // Search for obj in cache.
+      bind(loop);
+
+      // Check for match.
+      cmpptr(obj, Address(t));
+      jccb(Assembler::equal, monitor_found);
+
+      // Search until null encountered, guaranteed _null_sentinel at end.
+      cmpptr(Address(t), 1);
+      jcc(Assembler::below, slow_path); // 0 check, but with ZF=0 when *t == 0
+      increment(t, in_bytes(OMCache::oop_to_oop_difference()));
+      jmpb(loop);
+
+      // Cache hit.
+      bind(monitor_found);
+      movptr(monitor, Address(t, OMCache::oop_to_monitor_difference()));
+    }
+    const ByteSize monitor_tag = in_ByteSize(UseObjectMonitorTable ? 0 : checked_cast<int>(markWord::monitor_value));
+    const Address recursions_address{monitor, ObjectMonitor::recursions_offset() - monitor_tag};
+    const Address owner_address{monitor, ObjectMonitor::owner_offset() - monitor_tag};
+
+    Label monitor_locked;
+    // Lock the monitor.
+
+    // CAS owner (null => current thread).
+    xorptr(rax_reg, rax_reg);
+    lock(); cmpxchgptr(thread, owner_address);
+    jccb(Assembler::equal, monitor_locked);
+
+    // Check if recursive.
+    cmpptr(thread, rax_reg);
+    jccb(Assembler::notEqual, slow_path);
+
+    // Recursive.
+    increment(recursions_address);
+
+    bind(monitor_locked);
+    if (UseObjectMonitorTable) {
+      // Cache the monitor for unlock
+      movptr(Address(box, BasicLock::object_monitor_cache_offset_in_bytes()), monitor);
+    }
+  }
+
+  bind(locked);
+  // Set ZF = 1
+  xorl(rax_reg, rax_reg);
+
+#ifdef ASSERT
+  // Check that locked label is reached with ZF set.
+  Label zf_correct;
+  Label zf_bad_zero;
+  jcc(Assembler::zero, zf_correct);
+  jmp(zf_bad_zero);
+#endif
+
+  bind(slow_path);
+#ifdef ASSERT
+  // Check that slow_path label is reached with ZF not set.
+  jcc(Assembler::notZero, zf_correct);
+  stop("Fast Lock ZF != 0");
+  bind(zf_bad_zero);
+  stop("Fast Lock ZF != 1");
+  bind(zf_correct);
+#endif
+  // C2 uses the value of ZF to determine the continuation.
+}
+
+void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register reg_rax, Register t, Register thread) {
+  assert(LockingMode == LM_LIGHTWEIGHT, "must be");
+  assert(reg_rax == rax, "Used for CAS");
+  assert_different_registers(obj, reg_rax, t);
+
+  // Handle inflated monitor.
+  Label inflated, inflated_check_lock_stack;
+  // Finish fast unlock successfully.  MUST jump with ZF == 1
+  Label unlocked;
+
+  const Register mark = t;
+  const Register monitor = t;
+  const Register top = UseObjectMonitorTable ? t : reg_rax;
+  const Register box = reg_rax;
+
+  Label dummy;
+  C2FastUnlockLightweightStub* stub = nullptr;
+
+  if (!Compile::current()->output()->in_scratch_emit_size()) {
+    stub = new (Compile::current()->comp_arena()) C2FastUnlockLightweightStub(obj, mark, reg_rax, thread);
+    Compile::current()->output()->add_stub(stub);
+  }
+
+  Label& push_and_slow_path = stub == nullptr ? dummy : stub->push_and_slow_path();
+  Label& check_successor = stub == nullptr ? dummy : stub->check_successor();
+  Label& slow_path = stub == nullptr ? dummy : stub->slow_path();
+
+  { // Lightweight Unlock
+
+    // Load top.
+    movl(top, Address(thread, JavaThread::lock_stack_top_offset()));
+
+    if (!UseObjectMonitorTable) {
+      // Prefetch mark.
+      movptr(mark, Address(obj, oopDesc::mark_offset_in_bytes()));
+    }
+
+    // Check if obj is top of lock-stack.
+    cmpptr(obj, Address(thread, top, Address::times_1, -oopSize));
+    // Top of lock stack was not obj. Must be monitor.
+    jcc(Assembler::notEqual, inflated_check_lock_stack);
+
+    // Pop lock-stack.
+    DEBUG_ONLY(movptr(Address(thread, top, Address::times_1, -oopSize), 0);)
+    subl(Address(thread, JavaThread::lock_stack_top_offset()), oopSize);
+
+    // Check if recursive.
+    cmpptr(obj, Address(thread, top, Address::times_1, -2 * oopSize));
+    jcc(Assembler::equal, unlocked);
+
+    // We elide the monitor check, let the CAS fail instead.
+
+    if (UseObjectMonitorTable) {
+      // Load mark.
+      movptr(mark, Address(obj, oopDesc::mark_offset_in_bytes()));
+    }
+
+    // Try to unlock. Transition lock bits 0b00 => 0b01
+    movptr(reg_rax, mark);
+    andptr(reg_rax, ~(int32_t)markWord::lock_mask);
+    orptr(mark, markWord::unlocked_value);
+    lock(); cmpxchgptr(mark, Address(obj, oopDesc::mark_offset_in_bytes()));
+    jcc(Assembler::notEqual, push_and_slow_path);
+    jmp(unlocked);
+  }
+
+
+  { // Handle inflated monitor.
+    bind(inflated_check_lock_stack);
+#ifdef ASSERT
+    Label check_done;
+    subl(top, oopSize);
+    cmpl(top, in_bytes(JavaThread::lock_stack_base_offset()));
+    jcc(Assembler::below, check_done);
+    cmpptr(obj, Address(thread, top));
+    jccb(Assembler::notEqual, inflated_check_lock_stack);
+    stop("Fast Unlock lock on stack");
+    bind(check_done);
+    if (UseObjectMonitorTable) {
+      movptr(mark, Address(obj, oopDesc::mark_offset_in_bytes()));
+    }
+    testptr(mark, markWord::monitor_value);
+    jccb(Assembler::notZero, inflated);
+    stop("Fast Unlock not monitor");
+#endif
+
+    bind(inflated);
+
+    if (!UseObjectMonitorTable) {
+      assert(mark == monitor, "should be the same here");
+    } else {
+      // Uses ObjectMonitorTable.  Look for the monitor in our BasicLock on the stack.
+      movptr(monitor, Address(box, BasicLock::object_monitor_cache_offset_in_bytes()));
+      // null check with ZF == 0, no valid pointer below alignof(ObjectMonitor*)
+      cmpptr(monitor, alignof(ObjectMonitor*));
+      jcc(Assembler::below, slow_path);
+    }
+    const ByteSize monitor_tag = in_ByteSize(UseObjectMonitorTable ? 0 : checked_cast<int>(markWord::monitor_value));
+    const Address recursions_address{monitor, ObjectMonitor::recursions_offset() - monitor_tag};
+    const Address cxq_address{monitor, ObjectMonitor::cxq_offset() - monitor_tag};
+    const Address EntryList_address{monitor, ObjectMonitor::EntryList_offset() - monitor_tag};
+    const Address owner_address{monitor, ObjectMonitor::owner_offset() - monitor_tag};
+
+    Label recursive;
+
+    // Check if recursive.
+    cmpptr(recursions_address, 0);
+    jccb(Assembler::notEqual, recursive);
+
+    // Check if the entry lists are empty.
+    movptr(reg_rax, cxq_address);
+    orptr(reg_rax, EntryList_address);
+    jcc(Assembler::notZero, check_successor);
+
+    // Release lock.
+    movptr(owner_address, NULL_WORD);
+    jmpb(unlocked);
+
+    // Recursive unlock.
+    bind(recursive);
+    decrement(recursions_address);
+    xorl(t, t);
+  }
+
+  bind(unlocked);
+  if (stub != nullptr) {
+    bind(stub->unlocked_continuation());
+  }
+
+#ifdef ASSERT
+  // Check that unlocked label is reached with ZF set.
+  Label zf_correct;
+  jcc(Assembler::zero, zf_correct);
+  stop("Fast Unlock ZF != 1");
+#endif
+
+  if (stub != nullptr) {
+    bind(stub->slow_path_continuation());
+  }
+#ifdef ASSERT
+  // Check that stub->continuation() label is reached with ZF not set.
+  jccb(Assembler::notZero, zf_correct);
+  stop("Fast Unlock ZF != 0");
+  bind(zf_correct);
+#endif
+  // C2 uses the value of ZF to determine the continuation.
+}
+
 //-------------------------------------------------------------------------------------------
 // Generic instructions support for use in .ad files C2 code generation
 
@@ -3982,3 +4308,23 @@ void C2_MacroAssembler::rearrange_bytes(XMMRegister dst, XMMRegister shuffle, XM
   evshufi64x2(xtmp3, src, src, 0xFF, vlen_enc);
   evpshufb(dst, ktmp, xtmp3, shuffle, true, vlen_enc);
 }
+
+#ifdef _LP64
+void C2_MacroAssembler::load_nklass_compact_c2(Register dst, Register obj, Register index, Address::ScaleFactor scale, int disp) {
+  // Note: Don't clobber obj anywhere in that method!
+
+  // The incoming address is pointing into obj-start + klass_offset_in_bytes. We need to extract
+  // obj-start, so that we can load from the object's mark-word instead. Usually the address
+  // comes as obj-start in obj and klass_offset_in_bytes in disp. However, sometimes C2
+  // emits code that pre-computes obj-start + klass_offset_in_bytes into a register, and
+  // then passes that register as obj and 0 in disp. The following code extracts the base
+  // and offset to load the mark-word.
+  int offset = oopDesc::mark_offset_in_bytes() + disp - oopDesc::klass_offset_in_bytes();
+  if (markWord::klass_shift == 32) {
+    movl(dst, Address(obj, index, scale, offset + 4));
+  } else {
+    movq(dst, Address(obj, index, scale, offset));
+    shrq(dst, markWord::klass_shift);
+  }
+}
+#endif
diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp
index 01830e5edff..2bd2279f02a 100644
--- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp
+++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp
@@ -37,7 +37,7 @@
   // Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file.
   // See full desription in macroAssembler_x86.cpp.
   void fast_lock(Register obj, Register box, Register tmp,
-                 Register scr, Register cx1, Register cx2,
+                 Register scr, Register cx1, Register cx2, Register thread,
                  BiasedLockingCounters* counters,
                  RTMLockingCounters* rtm_counters,
                  RTMLockingCounters* stack_rtm_counters,
@@ -45,6 +45,10 @@
                  bool use_rtm, bool profile_rtm);
   void fast_unlock(Register obj, Register box, Register tmp, bool use_rtm);
 
+  void fast_lock_lightweight(Register obj, Register box, Register rax_reg,
+                             Register t, Register thread);
+  void fast_unlock_lightweight(Register obj, Register reg_rax, Register t, Register thread);
+
 #if INCLUDE_RTM_OPT
   void rtm_counters_update(Register abort_status, Register rtm_counters);
   void branch_on_random_using_rdtsc(Register tmp, Register scr, int count, Label& brLabel);
@@ -278,4 +282,6 @@
   void rearrange_bytes(XMMRegister dst, XMMRegister shuffle, XMMRegister src, XMMRegister xtmp1,
                        XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, KRegister ktmp, int vlen_enc);
 
+  void load_nklass_compact_c2(Register dst, Register obj, Register index, Address::ScaleFactor scale, int disp);
+
 #endif // CPU_X86_C2_MACROASSEMBLER_X86_HPP
diff --git a/src/hotspot/cpu/x86/interp_masm_x86.cpp b/src/hotspot/cpu/x86/interp_masm_x86.cpp
index d92885daeca..3a8b17fb137 100644
--- a/src/hotspot/cpu/x86/interp_masm_x86.cpp
+++ b/src/hotspot/cpu/x86/interp_masm_x86.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -57,7 +57,7 @@ void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& md
   testptr(obj, obj);
   jccb(Assembler::notZero, update);
   orptr(mdo_addr, TypeEntries::null_seen);
-  jmpb(next);
+  jmp(next);
 
   bind(update);
   Register tmp_load_klass = LP64_ONLY(rscratch1) NOT_LP64(noreg);
@@ -1197,7 +1197,7 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg) {
   assert(lock_reg == LP64_ONLY(c_rarg1) NOT_LP64(rdx),
          "The argument is only for looks. It must be c_rarg1");
 
-  if (UseHeavyMonitors) {
+  if (LockingMode == LM_MONITOR) {
     call_VM(noreg,
             CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
             lock_reg);
@@ -1231,74 +1231,83 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg) {
       biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp_reg, rklass_decode_tmp, false, done, &slow_case);
     }
 
-    // Load immediate 1 into swap_reg %rax
-    movl(swap_reg, (int32_t)1);
+    if (LockingMode == LM_LIGHTWEIGHT) {
+#ifdef _LP64
+      const Register thread = r15_thread;
+      lightweight_lock(lock_reg, obj_reg, swap_reg, thread, tmp_reg, slow_case);
+#else
+      // Lacking registers and thread on x86_32. Always take slow path.
+      jmp(slow_case);
+#endif
+      jmp(done);
+    } else {
+      // Load immediate 1 into swap_reg %rax
+      movl(swap_reg, (int32_t)1);
 
-    // Load (object->mark() | 1) into swap_reg %rax
-    orptr(swap_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
+      // Load (object->mark() | 1) into swap_reg %rax
+      orptr(swap_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
 
-    // Save (object->mark() | 1) into BasicLock's displaced header
-    movptr(Address(lock_reg, mark_offset), swap_reg);
+      // Save (object->mark() | 1) into BasicLock's displaced header
+      movptr(Address(lock_reg, mark_offset), swap_reg);
 
-    assert(lock_offset == 0,
-           "displaced header must be first word in BasicObjectLock");
+      assert(lock_offset == 0,
+             "displaced header must be first word in BasicObjectLock");
 
-    lock();
-    cmpxchgptr(lock_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
-    if (PrintBiasedLockingStatistics) {
-      cond_inc32(Assembler::zero,
-                 ExternalAddress((address) BiasedLocking::fast_path_entry_count_addr()));
-    }
-    jcc(Assembler::zero, done);
-
-    const int zero_bits = LP64_ONLY(7) NOT_LP64(3);
-
-    // Fast check for recursive lock.
-    //
-    // Can apply the optimization only if this is a stack lock
-    // allocated in this thread. For efficiency, we can focus on
-    // recently allocated stack locks (instead of reading the stack
-    // base and checking whether 'mark' points inside the current
-    // thread stack):
-    //  1) (mark & zero_bits) == 0, and
-    //  2) rsp <= mark < mark + os::pagesize()
-    //
-    // Warning: rsp + os::pagesize can overflow the stack base. We must
-    // neither apply the optimization for an inflated lock allocated
-    // just above the thread stack (this is why condition 1 matters)
-    // nor apply the optimization if the stack lock is inside the stack
-    // of another thread. The latter is avoided even in case of overflow
-    // because we have guard pages at the end of all stacks. Hence, if
-    // we go over the stack base and hit the stack of another thread,
-    // this should not be in a writeable area that could contain a
-    // stack lock allocated by that thread. As a consequence, a stack
-    // lock less than page size away from rsp is guaranteed to be
-    // owned by the current thread.
-    //
-    // These 3 tests can be done by evaluating the following
-    // expression: ((mark - rsp) & (zero_bits - os::vm_page_size())),
-    // assuming both stack pointer and pagesize have their
-    // least significant bits clear.
-    // NOTE: the mark is in swap_reg %rax as the result of cmpxchg
-    subptr(swap_reg, rsp);
-    andptr(swap_reg, zero_bits - os::vm_page_size());
-
-    // Save the test result, for recursive case, the result is zero
-    movptr(Address(lock_reg, mark_offset), swap_reg);
-
-    if (PrintBiasedLockingStatistics) {
-      cond_inc32(Assembler::zero,
-                 ExternalAddress((address) BiasedLocking::fast_path_entry_count_addr()));
+      lock();
+      cmpxchgptr(lock_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
+      if (PrintBiasedLockingStatistics) {
+        cond_inc32(Assembler::zero,
+                   ExternalAddress((address) BiasedLocking::fast_path_entry_count_addr()));
+      }
+      jcc(Assembler::zero, done);
+
+      const int zero_bits = LP64_ONLY(7) NOT_LP64(3);
+
+      // Fast check for recursive lock.
+      //
+      // Can apply the optimization only if this is a stack lock
+      // allocated in this thread. For efficiency, we can focus on
+      // recently allocated stack locks (instead of reading the stack
+      // base and checking whether 'mark' points inside the current
+      // thread stack):
+      //  1) (mark & zero_bits) == 0, and
+      //  2) rsp <= mark < mark + os::pagesize()
+      //
+      // Warning: rsp + os::pagesize can overflow the stack base. We must
+      // neither apply the optimization for an inflated lock allocated
+      // just above the thread stack (this is why condition 1 matters)
+      // nor apply the optimization if the stack lock is inside the stack
+      // of another thread. The latter is avoided even in case of overflow
+      // because we have guard pages at the end of all stacks. Hence, if
+      // we go over the stack base and hit the stack of another thread,
+      // this should not be in a writeable area that could contain a
+      // stack lock allocated by that thread. As a consequence, a stack
+      // lock less than page size away from rsp is guaranteed to be
+      // owned by the current thread.
+      //
+      // These 3 tests can be done by evaluating the following
+      // expression: ((mark - rsp) & (zero_bits - os::vm_page_size())),
+      // assuming both stack pointer and pagesize have their
+      // least significant bits clear.
+      // NOTE: the mark is in swap_reg %rax as the result of cmpxchg
+      subptr(swap_reg, rsp);
+      andptr(swap_reg, zero_bits - os::vm_page_size());
+
+      // Save the test result, for recursive case, the result is zero
+      movptr(Address(lock_reg, mark_offset), swap_reg);
+
+      if (PrintBiasedLockingStatistics) {
+        cond_inc32(Assembler::zero,
+                   ExternalAddress((address) BiasedLocking::fast_path_entry_count_addr()));
+      }
+      jcc(Assembler::zero, done);
     }
-    jcc(Assembler::zero, done);
-
     bind(slow_case);
 
     // Call the runtime routine for slow case
     call_VM(noreg,
             CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
             lock_reg);
-
     bind(done);
   }
 }
@@ -1320,10 +1329,10 @@ void InterpreterMacroAssembler::unlock_object(Register lock_reg) {
   assert(lock_reg == LP64_ONLY(c_rarg1) NOT_LP64(rdx),
          "The argument is only for looks. It must be c_rarg1");
 
-  if (UseHeavyMonitors) {
+  if (LockingMode == LM_MONITOR) {
     call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg);
   } else {
-    Label done;
+    Label done, slow_case;
 
     const Register swap_reg   = rax;  // Must use rax for cmpxchg instruction
     const Register header_reg = LP64_ONLY(c_rarg2) NOT_LP64(rbx);  // Will contain the old oopMark
@@ -1331,9 +1340,11 @@ void InterpreterMacroAssembler::unlock_object(Register lock_reg) {
 
     save_bcp(); // Save in case of exception
 
-    // Convert from BasicObjectLock structure to object and BasicLock
-    // structure Store the BasicLock address into %rax
-    lea(swap_reg, Address(lock_reg, BasicObjectLock::lock_offset_in_bytes()));
+    if (LockingMode != LM_LIGHTWEIGHT) {
+      // Convert from BasicObjectLock structure to object and BasicLock
+      // structure Store the BasicLock address into %rax
+      lea(swap_reg, Address(lock_reg, BasicObjectLock::lock_offset_in_bytes()));
+    }
 
     // Load oop into obj_reg(%c_rarg3)
     movptr(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes()));
@@ -1341,28 +1352,38 @@ void InterpreterMacroAssembler::unlock_object(Register lock_reg) {
     // Free entry
     movptr(Address(lock_reg, BasicObjectLock::obj_offset_in_bytes()), (int32_t)NULL_WORD);
 
-    if (UseBiasedLocking) {
-      biased_locking_exit(obj_reg, header_reg, done);
-    }
-
-    // Load the old header from BasicLock structure
-    movptr(header_reg, Address(swap_reg,
-                               BasicLock::displaced_header_offset_in_bytes()));
+    if (LockingMode == LM_LIGHTWEIGHT) {
+#ifdef _LP64
+      lightweight_unlock(obj_reg, swap_reg, r15_thread, header_reg, slow_case);
+#else
+      // Lacking registers and thread on x86_32. Always take slow path.
+      jmp(slow_case);
+#endif
+      jmp(done);
+    } else {
+      if (UseBiasedLocking) {
+        biased_locking_exit(obj_reg, header_reg, done);
+      }
 
-    // Test for recursion
-    testptr(header_reg, header_reg);
+      // Load the old header from BasicLock structure
+      movptr(header_reg, Address(swap_reg,
+                                 BasicLock::displaced_header_offset_in_bytes()));
 
-    // zero for recursive case
-    jcc(Assembler::zero, done);
+      // Test for recursion
+      testptr(header_reg, header_reg);
 
-    // Atomic swap back the old header
-    lock();
-    cmpxchgptr(header_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
+      // zero for recursive case
+      jcc(Assembler::zero, done);
 
-    // zero for simple unlock of a stack-lock case
-    jcc(Assembler::zero, done);
+      // Atomic swap back the old header
+      lock();
+      cmpxchgptr(header_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
 
+      // zero for simple unlock of a stack-lock case
+      jcc(Assembler::zero, done);
+    }
 
+    bind(slow_case);
     // Call the runtime routine for slow case.
     movptr(Address(lock_reg, BasicObjectLock::obj_offset_in_bytes()), obj_reg); // restore obj
     call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg);
diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.cpp b/src/hotspot/cpu/x86/macroAssembler_x86.cpp
index da4a40ba3f9..b9fbbd73ebc 100644
--- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -54,6 +54,12 @@
 #include "utilities/macros.hpp"
 #include "crc32c.h"
 
+#ifdef COMPILER2
+#include "opto/c2_CodeStubs.hpp"
+#include "opto/compile.hpp"
+#include "opto/output.hpp"
+#endif
+
 #ifdef PRODUCT
 #define BLOCK_COMMENT(str) /* nothing */
 #define STOP(error) stop(error)
@@ -3771,12 +3777,25 @@ void MacroAssembler::eden_allocate(Register thread, Register obj,
 // Preserves the contents of address, destroys the contents length_in_bytes and temp.
 void MacroAssembler::zero_memory(Register address, Register length_in_bytes, int offset_in_bytes, Register temp) {
   assert(address != length_in_bytes && address != temp && temp != length_in_bytes, "registers must be different");
-  assert((offset_in_bytes & (BytesPerWord - 1)) == 0, "offset must be a multiple of BytesPerWord");
+  assert((offset_in_bytes & (BytesPerInt - 1)) == 0, "offset must be a multiple of BytesPerInt");
   Label done;
 
   testptr(length_in_bytes, length_in_bytes);
   jcc(Assembler::zero, done);
 
+  // Emit single 32bit store to clear leading bytes, if necessary.
+  xorptr(temp, temp);    // use _zero reg to clear memory (shorter code)
+#ifdef _LP64
+  if (!is_aligned(offset_in_bytes, BytesPerWord)) {
+    movl(Address(address, offset_in_bytes), temp);
+    offset_in_bytes += BytesPerInt;
+    decrement(length_in_bytes, BytesPerInt);
+  }
+  assert((offset_in_bytes & (BytesPerWord - 1)) == 0, "offset must be a multiple of BytesPerWord");
+  testptr(length_in_bytes, length_in_bytes);
+  jcc(Assembler::zero, done);
+#endif
+
   // initialize topmost word, divide index by 2, check if odd and test if zero
   // note: for the remaining code to work, index must be a multiple of BytesPerWord
 #ifdef ASSERT
@@ -3789,7 +3808,6 @@ void MacroAssembler::zero_memory(Register address, Register length_in_bytes, int
   }
 #endif
   Register index = length_in_bytes;
-  xorptr(temp, temp);    // use _zero reg to clear memory (shorter code)
   if (UseIncDec) {
     shrptr(index, 3);  // divide by 8/16 and set carry flag if bit 2 was set
   } else {
@@ -4733,16 +4751,33 @@ void MacroAssembler::load_method_holder(Register holder, Register method) {
   movptr(holder, Address(holder, ConstantPool::pool_holder_offset_in_bytes())); // InstanceKlass*
 }
 
-void MacroAssembler::load_klass(Register dst, Register src, Register tmp) {
+#ifdef _LP64
+void MacroAssembler::load_nklass_compact(Register dst, Register src) {
+  assert(UseCompactObjectHeaders, "expect compact object headers");
+  if (markWord::klass_shift == 32) {
+    movl(dst, Address(src, oopDesc::mark_offset_in_bytes() + 4));
+  } else {
+    movq(dst, Address(src, oopDesc::mark_offset_in_bytes()));
+    shrq(dst, markWord::klass_shift);
+  }
+}
+#endif
+
+void MacroAssembler::load_klass(Register dst, Register src, Register tmp, bool null_check_src) {
   assert_different_registers(src, tmp);
   assert_different_registers(dst, tmp);
 #ifdef _LP64
-  if (UseCompressedClassPointers) {
+  if (UseCompactObjectHeaders) {
+    load_nklass_compact(dst, src);
+    decode_klass_not_null(dst, tmp);
+  } else if (UseCompressedClassPointers) {
     movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
     decode_klass_not_null(dst, tmp);
   } else
 #endif
+  {
     movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
+  }
 }
 
 void MacroAssembler::load_prototype_header(Register dst, Register src, Register tmp) {
@@ -4751,6 +4786,7 @@ void MacroAssembler::load_prototype_header(Register dst, Register src, Register
 }
 
 void MacroAssembler::store_klass(Register dst, Register src, Register tmp) {
+  assert(!UseCompactObjectHeaders, "not with compact headers");
   assert_different_registers(src, tmp);
   assert_different_registers(dst, tmp);
 #ifdef _LP64
@@ -4759,7 +4795,46 @@ void MacroAssembler::store_klass(Register dst, Register src, Register tmp) {
     movl(Address(dst, oopDesc::klass_offset_in_bytes()), src);
   } else
 #endif
-    movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src);
+   movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src);
+}
+
+void MacroAssembler::cmp_klass(Register klass, Register obj, Register tmp) {
+#ifdef _LP64
+  if (UseCompactObjectHeaders) {
+    // NOTE: We need to deal with possible ObjectMonitor in object header.
+    // Eventually we might be able to do simple movl & cmpl like in
+    // the CCP path below.
+    load_nklass_compact(tmp, obj);
+    cmpl(klass, tmp);
+  } else if (UseCompressedClassPointers) {
+    cmpl(klass, Address(obj, oopDesc::klass_offset_in_bytes()));
+  } else
+#endif
+  {
+    cmpptr(klass, Address(obj, oopDesc::klass_offset_in_bytes()));
+  }
+}
+
+void MacroAssembler::cmp_klass(Register src, Register dst, Register tmp1, Register tmp2) {
+#ifdef _LP64
+  if (UseCompactObjectHeaders) {
+    // NOTE: We need to deal with possible ObjectMonitor in object header.
+    // Eventually we might be able to do simple movl & cmpl like in
+    // the CCP path below.
+    assert(tmp2 != noreg, "need tmp2");
+    assert_different_registers(src, dst, tmp1, tmp2);
+    load_nklass_compact(tmp1, src);
+    load_nklass_compact(tmp2, dst);
+    cmpl(tmp1, tmp2);
+  } else if (UseCompressedClassPointers) {
+    movl(tmp1, Address(src, oopDesc::klass_offset_in_bytes()));
+    cmpl(tmp1, Address(dst, oopDesc::klass_offset_in_bytes()));
+  } else
+#endif
+  {
+    movptr(tmp1, Address(src, oopDesc::klass_offset_in_bytes()));
+    cmpptr(tmp1, Address(dst, oopDesc::klass_offset_in_bytes()));
+  }
 }
 
 void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src,
@@ -4809,6 +4884,7 @@ void MacroAssembler::store_heap_oop_null(Address dst) {
 
 #ifdef _LP64
 void MacroAssembler::store_klass_gap(Register dst, Register src) {
+  assert(!UseCompactObjectHeaders, "Don't use with compact headers");
   if (UseCompressedClassPointers) {
     // Store to klass gap in destination
     movl(Address(dst, oopDesc::klass_gap_offset_in_bytes()), src);
@@ -8686,3 +8762,115 @@ void MacroAssembler::get_thread(Register thread) {
 }
 
 #endif // !WIN32 || _LP64
+
+// Implements lightweight-locking.
+//
+// obj: the object to be locked
+// reg_rax: rax
+// thread: the thread which attempts to lock obj
+// tmp: a temporary register
+void MacroAssembler::lightweight_lock(Register basic_lock, Register obj, Register reg_rax, Register thread, Register tmp, Label& slow) {
+  assert(reg_rax == rax, "");
+  assert_different_registers(basic_lock, obj, reg_rax, thread, tmp);
+
+  Label push;
+  const Register top = tmp;
+
+  // Preload the markWord. It is important that this is the first
+  // instruction emitted as it is part of C1's null check semantics.
+  movptr(reg_rax, Address(obj, oopDesc::mark_offset_in_bytes()));
+
+  if (UseObjectMonitorTable) {
+    // Clear cache in case fast locking succeeds.
+    movptr(Address(basic_lock, BasicObjectLock::lock_offset() + in_ByteSize((BasicLock::object_monitor_cache_offset_in_bytes()))), 0);
+  }
+
+  // Load top.
+  movl(top, Address(thread, JavaThread::lock_stack_top_offset()));
+
+  // Check if the lock-stack is full.
+  cmpl(top, LockStack::end_offset());
+  jcc(Assembler::greaterEqual, slow);
+
+  // Check for recursion.
+  cmpptr(obj, Address(thread, top, Address::times_1, -oopSize));
+  jcc(Assembler::equal, push);
+
+  // Check header for monitor (0b10).
+  testptr(reg_rax, markWord::monitor_value);
+  jcc(Assembler::notZero, slow);
+
+  // Try to lock. Transition lock bits 0b01 => 0b00
+  movptr(tmp, reg_rax);
+  andptr(tmp, ~(int32_t)markWord::unlocked_value);
+  orptr(reg_rax, markWord::unlocked_value);
+  lock(); cmpxchgptr(tmp, Address(obj, oopDesc::mark_offset_in_bytes()));
+  jcc(Assembler::notEqual, slow);
+
+  // Restore top, CAS clobbers register.
+  movl(top, Address(thread, JavaThread::lock_stack_top_offset()));
+
+  bind(push);
+  // After successful lock, push object on lock-stack.
+  movptr(Address(thread, top), obj);
+  incrementl(top, oopSize);
+  movl(Address(thread, JavaThread::lock_stack_top_offset()), top);
+}
+
+// Implements lightweight-unlocking.
+//
+// obj: the object to be unlocked
+// reg_rax: rax
+// thread: the thread
+// tmp: a temporary register
+void MacroAssembler::lightweight_unlock(Register obj, Register reg_rax, Register thread, Register tmp, Label& slow) {
+  assert(reg_rax == rax, "");
+  assert_different_registers(obj, reg_rax, thread, tmp);
+
+  Label unlocked, push_and_slow;
+  const Register top = tmp;
+
+  // Check if obj is top of lock-stack.
+  movl(top, Address(thread, JavaThread::lock_stack_top_offset()));
+  cmpptr(obj, Address(thread, top, Address::times_1, -oopSize));
+  jcc(Assembler::notEqual, slow);
+
+  // Pop lock-stack.
+  DEBUG_ONLY(movptr(Address(thread, top, Address::times_1, -oopSize), 0);)
+  subl(Address(thread, JavaThread::lock_stack_top_offset()), oopSize);
+
+  // Check if recursive.
+  cmpptr(obj, Address(thread, top, Address::times_1, -2 * oopSize));
+  jcc(Assembler::equal, unlocked);
+
+  // Not recursive. Check header for monitor (0b10).
+  movptr(reg_rax, Address(obj, oopDesc::mark_offset_in_bytes()));
+  testptr(reg_rax, markWord::monitor_value);
+  jcc(Assembler::notZero, push_and_slow);
+
+#ifdef ASSERT
+  // Check header not unlocked (0b01).
+  Label not_unlocked;
+  testptr(reg_rax, markWord::unlocked_value);
+  jcc(Assembler::zero, not_unlocked);
+  stop("lightweight_unlock already unlocked");
+  bind(not_unlocked);
+#endif
+
+  // Try to unlock. Transition lock bits 0b00 => 0b01
+  movptr(tmp, reg_rax);
+  orptr(tmp, markWord::unlocked_value);
+  lock(); cmpxchgptr(tmp, Address(obj, oopDesc::mark_offset_in_bytes()));
+  jcc(Assembler::equal, unlocked);
+
+  bind(push_and_slow);
+  // Restore lock-stack and handle the unlock in runtime.
+#ifdef ASSERT
+  movl(top, Address(thread, JavaThread::lock_stack_top_offset()));
+  movptr(Address(thread, top), obj);
+#endif
+  addl(Address(thread, JavaThread::lock_stack_top_offset()), oopSize);
+  jmp(slow);
+
+  bind(unlocked);
+}
diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.hpp b/src/hotspot/cpu/x86/macroAssembler_x86.hpp
index fcde34b2751..72046b34177 100644
--- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -148,6 +148,8 @@ class MacroAssembler: public Assembler {
 
   void increment(Register reg, int value = 1) { LP64_ONLY(incrementq(reg, value)) NOT_LP64(incrementl(reg, value)) ; }
   void decrement(Register reg, int value = 1) { LP64_ONLY(decrementq(reg, value)) NOT_LP64(decrementl(reg, value)) ; }
+  void increment(Address dst, int value = 1)  { LP64_ONLY(incrementq(dst, value)) NOT_LP64(incrementl(dst, value)) ; }
+  void decrement(Address dst, int value = 1)  { LP64_ONLY(decrementq(dst, value)) NOT_LP64(decrementl(dst, value)) ; }
 
   void decrementl(Address dst, int value = 1);
   void decrementl(Register reg, int value = 1);
@@ -339,9 +341,20 @@ class MacroAssembler: public Assembler {
   void load_method_holder(Register holder, Register method);
 
   // oop manipulations
-  void load_klass(Register dst, Register src, Register tmp);
+  void load_klass(Register dst, Register src, Register tmp, bool null_check_src = false);
+#ifdef _LP64
+  void load_nklass_compact(Register dst, Register src);
+#endif
   void store_klass(Register dst, Register src, Register tmp);
 
+  // Compares the Klass pointer of an object to a given Klass (which might be narrow,
+  // depending on UseCompressedClassPointers).
+  void cmp_klass(Register klass, Register dst, Register tmp);
+
+  // Compares the Klass pointer of two objects o1 and o2. Result is in the condition flags.
+  // Uses t1 and t2 as temporary registers.
+  void cmp_klass(Register src, Register dst, Register tmp1, Register tmp2);
+
   void access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src,
                       Register tmp1, Register thread_tmp);
   void access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register src,
@@ -1910,6 +1923,9 @@ class MacroAssembler: public Assembler {
 #endif // _LP64
 
   void vallones(XMMRegister dst, int vector_len);
+
+  void lightweight_lock(Register basic_lock, Register obj, Register reg_rax, Register thread, Register tmp, Label& slow);
+  void lightweight_unlock(Register obj, Register reg_rax, Register thread, Register tmp, Label& slow);
 };
 
 /**
diff --git a/src/hotspot/cpu/x86/methodHandles_x86.cpp b/src/hotspot/cpu/x86/methodHandles_x86.cpp
index f08dbaa4c83..1d3bfeda125 100644
--- a/src/hotspot/cpu/x86/methodHandles_x86.cpp
+++ b/src/hotspot/cpu/x86/methodHandles_x86.cpp
@@ -389,8 +389,7 @@ void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
         __ null_check(receiver_reg);
       } else {
         // load receiver klass itself
-        __ null_check(receiver_reg, oopDesc::klass_offset_in_bytes());
-        __ load_klass(temp1_recv_klass, receiver_reg, temp2);
+        __ load_klass(temp1_recv_klass, receiver_reg, temp2, true);
         __ verify_klass_ptr(temp1_recv_klass);
       }
       BLOCK_COMMENT("check_receiver {");
diff --git a/src/hotspot/cpu/x86/sharedRuntime_x86.cpp b/src/hotspot/cpu/x86/sharedRuntime_x86.cpp
index d067820fdc1..68ea6c0f554 100644
--- a/src/hotspot/cpu/x86/sharedRuntime_x86.cpp
+++ b/src/hotspot/cpu/x86/sharedRuntime_x86.cpp
@@ -25,6 +25,7 @@
 #include "precompiled.hpp"
 #include "asm/macroAssembler.hpp"
 #include "runtime/sharedRuntime.hpp"
+#include "utilities/globalDefinitions.hpp"
 #include "vmreg_x86.inline.hpp"
 #ifdef COMPILER1
 #include "c1/c1_Runtime1.hpp"
@@ -58,9 +59,18 @@ void SharedRuntime::inline_check_hashcode_from_object_header(MacroAssembler* mas
 
   __ movptr(result, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
 
-  // check if locked
-  __ testptr(result, markWord::unlocked_value);
-  __ jcc(Assembler::zero, slowCase);
+
+  if (LockingMode == LM_LIGHTWEIGHT) {
+    if (!UseObjectMonitorTable) {
+      // check if monitor
+      __ testptr(result, markWord::monitor_value);
+      __ jcc(Assembler::notZero, slowCase);
+    }
+  } else {
+    // check if locked
+    __ testptr(result, markWord::unlocked_value);
+    __ jcc(Assembler::zero, slowCase);
+  }
 
   if (UseBiasedLocking) {
     // Check if biased and fall through to runtime if so
@@ -73,8 +83,13 @@ void SharedRuntime::inline_check_hashcode_from_object_header(MacroAssembler* mas
   // Read the header and build a mask to get its hash field.
   // Depend on hash_mask being at most 32 bits and avoid the use of hash_mask_in_place
   // because it could be larger than 32 bits in a 64-bit vm. See markWord.hpp.
-  __ shrptr(result, markWord::hash_shift);
-  __ andptr(result, markWord::hash_mask);
+  if (UseCompactObjectHeaders) {
+    __ shrptr(result, markWord::hash_shift_compact);
+    __ andptr(result, markWord::hash_mask_compact);
+  } else {
+    __ shrptr(result, markWord::hash_shift);
+    __ andptr(result, markWord::hash_mask);
+  }
 #else
   __ andptr(result, markWord::hash_mask_in_place);
 #endif //_LP64
diff --git a/src/hotspot/cpu/x86/sharedRuntime_x86_32.cpp b/src/hotspot/cpu/x86/sharedRuntime_x86_32.cpp
index 492ec5962d0..d9063f7badd 100644
--- a/src/hotspot/cpu/x86/sharedRuntime_x86_32.cpp
+++ b/src/hotspot/cpu/x86/sharedRuntime_x86_32.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -1823,41 +1823,49 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
     // Load the oop from the handle
     __ movptr(obj_reg, Address(oop_handle_reg, 0));
 
-    if (UseBiasedLocking) {
-      // Note that oop_handle_reg is trashed during this call
-      __ biased_locking_enter(lock_reg, obj_reg, swap_reg, oop_handle_reg, noreg, false, lock_done, &slow_path_lock);
-    }
+    if (LockingMode == LM_MONITOR) {
+      __ jmp(slow_path_lock);
+    } else if (LockingMode == LM_LEGACY) {
+      if (UseBiasedLocking) {
+        // Note that oop_handle_reg is trashed during this call
+        __ biased_locking_enter(lock_reg, obj_reg, swap_reg, oop_handle_reg, noreg, false, lock_done, &slow_path_lock);
+      }
 
-    // Load immediate 1 into swap_reg %rax,
-    __ movptr(swap_reg, 1);
+      // Load immediate 1 into swap_reg %rax,
+      __ movptr(swap_reg, 1);
 
-    // Load (object->mark() | 1) into swap_reg %rax,
-    __ orptr(swap_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
+      // Load (object->mark() | 1) into swap_reg %rax,
+      __ orptr(swap_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
 
-    // Save (object->mark() | 1) into BasicLock's displaced header
-    __ movptr(Address(lock_reg, mark_word_offset), swap_reg);
+      // Save (object->mark() | 1) into BasicLock's displaced header
+      __ movptr(Address(lock_reg, mark_word_offset), swap_reg);
 
-    // src -> dest iff dest == rax, else rax, <- dest
-    // *obj_reg = lock_reg iff *obj_reg == rax, else rax, = *(obj_reg)
-    __ lock();
-    __ cmpxchgptr(lock_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
-    __ jcc(Assembler::equal, lock_done);
+      // src -> dest iff dest == rax, else rax, <- dest
+      // *obj_reg = lock_reg iff *obj_reg == rax, else rax, = *(obj_reg)
+      __ lock();
+      __ cmpxchgptr(lock_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
+      __ jcc(Assembler::equal, lock_done);
 
-    // Test if the oopMark is an obvious stack pointer, i.e.,
-    //  1) (mark & 3) == 0, and
-    //  2) rsp <= mark < mark + os::pagesize()
-    // These 3 tests can be done by evaluating the following
-    // expression: ((mark - rsp) & (3 - os::vm_page_size())),
-    // assuming both stack pointer and pagesize have their
-    // least significant 2 bits clear.
-    // NOTE: the oopMark is in swap_reg %rax, as the result of cmpxchg
+      // Test if the oopMark is an obvious stack pointer, i.e.,
+      //  1) (mark & 3) == 0, and
+      //  2) rsp <= mark < mark + os::pagesize()
+      // These 3 tests can be done by evaluating the following
+      // expression: ((mark - rsp) & (3 - os::vm_page_size())),
+      // assuming both stack pointer and pagesize have their
+      // least significant 2 bits clear.
+      // NOTE: the oopMark is in swap_reg %rax, as the result of cmpxchg
 
-    __ subptr(swap_reg, rsp);
-    __ andptr(swap_reg, 3 - os::vm_page_size());
+      __ subptr(swap_reg, rsp);
+      __ andptr(swap_reg, 3 - os::vm_page_size());
 
-    // Save the test result, for recursive case, the result is zero
-    __ movptr(Address(lock_reg, mark_word_offset), swap_reg);
-    __ jcc(Assembler::notEqual, slow_path_lock);
+      // Save the test result, for recursive case, the result is zero
+      __ movptr(Address(lock_reg, mark_word_offset), swap_reg);
+      __ jcc(Assembler::notEqual, slow_path_lock);
+    } else {
+      assert(LockingMode == LM_LIGHTWEIGHT, "must be");
+      // Lacking registers and thread on x86_32. Always take slow path.
+      __ jmp(slow_path_lock);
+    }
     // Slow path will re-enter here
     __ bind(lock_done);
 
@@ -1997,28 +2005,37 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
       __ biased_locking_exit(obj_reg, rbx, done);
     }
 
-    // Simple recursive lock?
+    if (LockingMode == LM_LEGACY) {
+      // Simple recursive lock?
 
-    __ cmpptr(Address(rbp, lock_slot_rbp_offset), (int32_t)NULL_WORD);
-    __ jcc(Assembler::equal, done);
+      __ cmpptr(Address(rbp, lock_slot_rbp_offset), (int32_t)NULL_WORD);
+      __ jcc(Assembler::equal, done);
+    }
 
     // Must save rax, if if it is live now because cmpxchg must use it
     if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
       save_native_result(masm, ret_type, stack_slots);
     }
 
-    //  get old displaced header
-    __ movptr(rbx, Address(rbp, lock_slot_rbp_offset));
-
-    // get address of the stack lock
-    __ lea(rax, Address(rbp, lock_slot_rbp_offset));
-
-    // Atomic swap old header if oop still contains the stack lock
-    // src -> dest iff dest == rax, else rax, <- dest
-    // *obj_reg = rbx, iff *obj_reg == rax, else rax, = *(obj_reg)
-    __ lock();
-    __ cmpxchgptr(rbx, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
-    __ jcc(Assembler::notEqual, slow_path_unlock);
+    if (LockingMode == LM_MONITOR) {
+      __ jmp(slow_path_unlock);
+    } else if (LockingMode == LM_LEGACY) {
+      //  get old displaced header
+      __ movptr(rbx, Address(rbp, lock_slot_rbp_offset));
+
+      // get address of the stack lock
+      __ lea(rax, Address(rbp, lock_slot_rbp_offset));
+
+      // Atomic swap old header if oop still contains the stack lock
+      // src -> dest iff dest == rax, else rax, <- dest
+      // *obj_reg = rbx, iff *obj_reg == rax, else rax, = *(obj_reg)
+      __ lock();
+      __ cmpxchgptr(rbx, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
+      __ jcc(Assembler::notEqual, slow_path_unlock);
+    } else {
+      assert(LockingMode == LM_LIGHTWEIGHT, "must be");
+      __ lightweight_unlock(obj_reg, swap_reg, thread, lock_reg, slow_path_unlock);
+    }
 
     // slow path re-enters here
     __ bind(unlock_done);
diff --git a/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp b/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp
index 768f2dabdd3..3788987e69a 100644
--- a/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp
+++ b/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -2072,41 +2072,48 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
     // Load the oop from the handle
     __ movptr(obj_reg, Address(oop_handle_reg, 0));
 
-    if (UseBiasedLocking) {
-      __ biased_locking_enter(lock_reg, obj_reg, swap_reg, rscratch1, rscratch2, false, lock_done, &slow_path_lock);
-    }
+    if (LockingMode == LM_MONITOR) {
+      __ jmp(slow_path_lock);
+    } else if (LockingMode == LM_LEGACY) {
+      if (UseBiasedLocking) {
+        __ biased_locking_enter(lock_reg, obj_reg, swap_reg, rscratch1, rscratch2, false, lock_done, &slow_path_lock);
+      }
 
-    // Load immediate 1 into swap_reg %rax
-    __ movl(swap_reg, 1);
+      // Load immediate 1 into swap_reg %rax
+      __ movl(swap_reg, 1);
 
-    // Load (object->mark() | 1) into swap_reg %rax
-    __ orptr(swap_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
+      // Load (object->mark() | 1) into swap_reg %rax
+      __ orptr(swap_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
 
-    // Save (object->mark() | 1) into BasicLock's displaced header
-    __ movptr(Address(lock_reg, mark_word_offset), swap_reg);
+      // Save (object->mark() | 1) into BasicLock's displaced header
+      __ movptr(Address(lock_reg, mark_word_offset), swap_reg);
 
-    // src -> dest iff dest == rax else rax <- dest
-    __ lock();
-    __ cmpxchgptr(lock_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
-    __ jcc(Assembler::equal, lock_done);
+      // src -> dest iff dest == rax else rax <- dest
+      __ lock();
+      __ cmpxchgptr(lock_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
+      __ jcc(Assembler::equal, lock_done);
 
-    // Hmm should this move to the slow path code area???
+      // Hmm should this move to the slow path code area???
 
-    // Test if the oopMark is an obvious stack pointer, i.e.,
-    //  1) (mark & 3) == 0, and
-    //  2) rsp <= mark < mark + os::pagesize()
-    // These 3 tests can be done by evaluating the following
-    // expression: ((mark - rsp) & (3 - os::vm_page_size())),
-    // assuming both stack pointer and pagesize have their
-    // least significant 2 bits clear.
-    // NOTE: the oopMark is in swap_reg %rax as the result of cmpxchg
+      // Test if the oopMark is an obvious stack pointer, i.e.,
+      //  1) (mark & 3) == 0, and
+      //  2) rsp <= mark < mark + os::pagesize()
+      // These 3 tests can be done by evaluating the following
+      // expression: ((mark - rsp) & (3 - os::vm_page_size())),
+      // assuming both stack pointer and pagesize have their
+      // least significant 2 bits clear.
+      // NOTE: the oopMark is in swap_reg %rax as the result of cmpxchg
 
-    __ subptr(swap_reg, rsp);
-    __ andptr(swap_reg, 3 - os::vm_page_size());
+      __ subptr(swap_reg, rsp);
+      __ andptr(swap_reg, 3 - os::vm_page_size());
 
-    // Save the test result, for recursive case, the result is zero
-    __ movptr(Address(lock_reg, mark_word_offset), swap_reg);
-    __ jcc(Assembler::notEqual, slow_path_lock);
+      // Save the test result, for recursive case, the result is zero
+      __ movptr(Address(lock_reg, mark_word_offset), swap_reg);
+      __ jcc(Assembler::notEqual, slow_path_lock);
+    } else {
+      assert(LockingMode == LM_LIGHTWEIGHT, "must be");
+      __ lightweight_lock(lock_reg, obj_reg, swap_reg, r15_thread, rscratch1, slow_path_lock);
+    }
 
     // Slow path will re-enter here
 
@@ -2231,26 +2238,34 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
       __ biased_locking_exit(obj_reg, old_hdr, done);
     }
 
-    // Simple recursive lock?
+    if (LockingMode == LM_LEGACY) {
+      // Simple recursive lock?
 
-    __ cmpptr(Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size), (int32_t)NULL_WORD);
-    __ jcc(Assembler::equal, done);
+      __ cmpptr(Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size), (int32_t)NULL_WORD);
+      __ jcc(Assembler::equal, done);
+    }
 
     // Must save rax if if it is live now because cmpxchg must use it
     if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
       save_native_result(masm, ret_type, stack_slots);
     }
 
+    if (LockingMode == LM_MONITOR) {
+      __ jmp(slow_path_unlock);
+    } else if (LockingMode == LM_LEGACY) {
+      // get address of the stack lock
+      __ lea(rax, Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size));
+      //  get old displaced header
+      __ movptr(old_hdr, Address(rax, 0));
 
-    // get address of the stack lock
-    __ lea(rax, Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size));
-    //  get old displaced header
-    __ movptr(old_hdr, Address(rax, 0));
-
-    // Atomic swap old header if oop still contains the stack lock
-    __ lock();
-    __ cmpxchgptr(old_hdr, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
-    __ jcc(Assembler::notEqual, slow_path_unlock);
+      // Atomic swap old header if oop still contains the stack lock
+      __ lock();
+      __ cmpxchgptr(old_hdr, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
+      __ jcc(Assembler::notEqual, slow_path_unlock);
+    } else {
+      assert(LockingMode == LM_LIGHTWEIGHT, "must be");
+      __ lightweight_unlock(obj_reg, swap_reg, r15_thread, lock_reg, slow_path_unlock);
+    }
 
     // slow path re-enters here
     __ bind(unlock_done);
diff --git a/src/hotspot/cpu/x86/templateTable_x86.cpp b/src/hotspot/cpu/x86/templateTable_x86.cpp
index a82954580f0..9c36710419e 100644
--- a/src/hotspot/cpu/x86/templateTable_x86.cpp
+++ b/src/hotspot/cpu/x86/templateTable_x86.cpp
@@ -3657,9 +3657,8 @@ void TemplateTable::invokevirtual_helper(Register index,
   __ bind(notFinal);
 
   // get receiver klass
-  __ null_check(recv, oopDesc::klass_offset_in_bytes());
   Register tmp_load_klass = LP64_ONLY(rscratch1) NOT_LP64(noreg);
-  __ load_klass(rax, recv, tmp_load_klass);
+  __ load_klass(rax, recv, tmp_load_klass, true);
 
   // profile this call
   __ profile_virtual_call(rax, rlocals, rdx);
@@ -3750,9 +3749,8 @@ void TemplateTable::invokeinterface(int byte_no) {
   __ jcc(Assembler::zero, notVFinal);
 
   // Get receiver klass into rlocals - also a null check
-  __ null_check(rcx, oopDesc::klass_offset_in_bytes());
   Register tmp_load_klass = LP64_ONLY(rscratch1) NOT_LP64(noreg);
-  __ load_klass(rlocals, rcx, tmp_load_klass);
+  __ load_klass(rlocals, rcx, tmp_load_klass, true);
 
   Label subtype;
   __ check_klass_subtype(rlocals, rax, rbcp, subtype);
@@ -3774,8 +3772,7 @@ void TemplateTable::invokeinterface(int byte_no) {
 
   // Get receiver klass into rdx - also a null check
   __ restore_locals();  // restore r14
-  __ null_check(rcx, oopDesc::klass_offset_in_bytes());
-  __ load_klass(rdx, rcx, tmp_load_klass);
+  __ load_klass(rdx, rcx, tmp_load_klass, true);
 
   Label no_such_method;
 
@@ -3993,7 +3990,8 @@ void TemplateTable::_new() {
     // The object is initialized before the header.  If the object size is
     // zero, go directly to the header initialization.
     __ bind(initialize_object);
-    __ decrement(rdx, sizeof(oopDesc));
+    int header_size = align_up(oopDesc::base_offset_in_bytes(), BytesPerLong);
+    __ decrement(rdx, header_size);
     __ jcc(Assembler::zero, initialize_header);
 
     // Initialize topmost object field, divide rdx by 8, check if odd and
@@ -4015,15 +4013,15 @@ void TemplateTable::_new() {
     // initialize remaining object fields: rdx was a multiple of 8
     { Label loop;
     __ bind(loop);
-    __ movptr(Address(rax, rdx, Address::times_8, sizeof(oopDesc) - 1*oopSize), rcx);
-    NOT_LP64(__ movptr(Address(rax, rdx, Address::times_8, sizeof(oopDesc) - 2*oopSize), rcx));
+    __ movptr(Address(rax, rdx, Address::times_8, header_size - 1*oopSize), rcx);
+    NOT_LP64(__ movptr(Address(rax, rdx, Address::times_8, header_size - 2*oopSize), rcx));
     __ decrement(rdx);
     __ jcc(Assembler::notZero, loop);
     }
 
     // initialize object header only.
     __ bind(initialize_header);
-    if (UseBiasedLocking) {
+    if (UseBiasedLocking || UseCompactObjectHeaders) {
       __ pop(rcx);   // get saved klass back in the register.
       __ movptr(rbx, Address(rcx, Klass::prototype_header_offset()));
       __ movptr(Address(rax, oopDesc::mark_offset_in_bytes ()), rbx);
@@ -4032,12 +4030,13 @@ void TemplateTable::_new() {
                 (intptr_t)markWord::prototype().value()); // header
       __ pop(rcx);   // get saved klass back in the register.
     }
+    if (!UseCompactObjectHeaders) {
 #ifdef _LP64
-    __ xorl(rsi, rsi); // use zero reg to clear memory (shorter code)
-    __ store_klass_gap(rax, rsi);  // zero klass gap for compressed oops
+      __ xorl(rsi, rsi); // use zero reg to clear memory (shorter code)
+      __ store_klass_gap(rax, rsi);  // zero klass gap for compressed oops
 #endif
-    Register tmp_store_klass = LP64_ONLY(rscratch1) NOT_LP64(noreg);
-    __ store_klass(rax, rcx, tmp_store_klass);  // klass
+      __ store_klass(rax, rcx, rscratch1);  // klass
+    }
 
     {
       SkipIfEqual skip_if(_masm, &DTraceAllocProbes, 0);
diff --git a/src/hotspot/cpu/x86/vm_version_x86.hpp b/src/hotspot/cpu/x86/vm_version_x86.hpp
index eb064d50a02..89953671f19 100644
--- a/src/hotspot/cpu/x86/vm_version_x86.hpp
+++ b/src/hotspot/cpu/x86/vm_version_x86.hpp
@@ -1,7 +1,7 @@
 // This project is a modified version of OpenJDK, licensed under GPL v2.
 // Modifications Copyright (C) 2025 ByteDance Inc.
 /*
- * Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -1030,6 +1030,10 @@ enum Extended_Family {
     return true;
   }
 
+  constexpr static bool supports_recursive_lightweight_locking() {
+    return true;
+  }
+
   // there are several insns to force cache line sync to memory which
   // we can use to ensure mapped non-volatile memory is up to date with
   // pending in-cache changes.
diff --git a/src/hotspot/cpu/x86/x86_32.ad b/src/hotspot/cpu/x86/x86_32.ad
index 01005c73047..bac92b0ca85 100644
--- a/src/hotspot/cpu/x86/x86_32.ad
+++ b/src/hotspot/cpu/x86/x86_32.ad
@@ -721,7 +721,9 @@ void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
     Label dummy_label;
     Label* code_stub = &dummy_label;
     if (!C->output()->in_scratch_emit_size()) {
-      code_stub = &C->output()->safepoint_poll_table()->add_safepoint(__ offset());
+      C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
+      C->output()->add_stub(stub);
+      code_stub = &stub->entry();
     }
     __ relocate(relocInfo::poll_return_type);
     __ safepoint_poll(*code_stub, thread, true /* at_return */, true /* in_nmethod */);
@@ -13695,15 +13697,16 @@ instruct RethrowException()
 
 // inlined locking and unlocking
 
-instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{
+instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2, eRegP thread) %{
   predicate(Compile::current()->use_rtm());
   match(Set cr (FastLock object box));
-  effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
+  effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box, TEMP thread);
   ins_cost(300);
   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
   ins_encode %{
+    __ get_thread($thread$$Register);
     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
-                 $scr$$Register, $cx1$$Register, $cx2$$Register,
+                 $scr$$Register, $cx1$$Register, $cx2$$Register, $thread$$Register,
                  _counters, _rtm_counters, _stack_rtm_counters,
                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
                  true, ra_->C->profile_rtm());
@@ -13711,20 +13714,22 @@ instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eD
   ins_pipe(pipe_slow);
 %}
 
-instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
-  predicate(!Compile::current()->use_rtm());
+instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr, eRegP thread) %{
+  predicate(LockingMode != LM_LIGHTWEIGHT && !Compile::current()->use_rtm());
   match(Set cr (FastLock object box));
-  effect(TEMP tmp, TEMP scr, USE_KILL box);
+  effect(TEMP tmp, TEMP scr, USE_KILL box, TEMP thread);
   ins_cost(300);
   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
   ins_encode %{
+    __ get_thread($thread$$Register);
     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
-                 $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false);
+                 $scr$$Register, noreg, noreg, $thread$$Register, _counters, NULL, NULL, NULL, false, false);
   %}
   ins_pipe(pipe_slow);
 %}
 
 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
+  predicate(LockingMode != LM_LIGHTWEIGHT);
   match(Set cr (FastUnlock object box));
   effect(TEMP tmp, USE_KILL box);
   ins_cost(300);
@@ -13735,7 +13740,31 @@ instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
   ins_pipe(pipe_slow);
 %}
 
+instruct cmpFastLockLightweight(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI eax_reg, eRegP tmp, eRegP thread) %{
+  predicate(LockingMode == LM_LIGHTWEIGHT);
+  match(Set cr (FastLock object box));
+  effect(TEMP eax_reg, TEMP tmp, USE_KILL box, TEMP thread);
+  ins_cost(300);
+  format %{ "FASTLOCK $object,$box\t! kills $box,$eax_reg,$tmp" %}
+  ins_encode %{
+    __ get_thread($thread$$Register);
+    __ fast_lock_lightweight($object$$Register, $box$$Register, $eax_reg$$Register, $tmp$$Register, $thread$$Register);
+  %}
+  ins_pipe(pipe_slow);
+%}
 
+instruct cmpFastUnlockLightweight(eFlagsReg cr, eRegP object, eAXRegP eax_reg, eRegP tmp, eRegP thread) %{
+  predicate(LockingMode == LM_LIGHTWEIGHT);
+  match(Set cr (FastUnlock object eax_reg));
+  effect(TEMP tmp, USE_KILL eax_reg, TEMP thread);
+  ins_cost(300);
+  format %{ "FASTUNLOCK $object,$eax_reg\t! kills $eax_reg,$tmp" %}
+  ins_encode %{
+    __ get_thread($thread$$Register);
+    __ fast_unlock_lightweight($object$$Register, $eax_reg$$Register, $tmp$$Register, $thread$$Register);
+  %}
+  ins_pipe(pipe_slow);
+%}
 
 // ============================================================================
 // Safepoint Instruction
diff --git a/src/hotspot/cpu/x86/x86_64.ad b/src/hotspot/cpu/x86/x86_64.ad
index 654b92db5bf..eb566441cf8 100644
--- a/src/hotspot/cpu/x86/x86_64.ad
+++ b/src/hotspot/cpu/x86/x86_64.ad
@@ -998,7 +998,9 @@ void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
     Label dummy_label;
     Label* code_stub = &dummy_label;
     if (!C->output()->in_scratch_emit_size()) {
-      code_stub = &C->output()->safepoint_poll_table()->add_safepoint(__ offset());
+      C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset());
+      C->output()->add_stub(stub);
+      code_stub = &stub->entry();
     }
     __ relocate(relocInfo::poll_return_type);
     __ safepoint_poll(*code_stub, r15_thread, true /* at_return */, true /* in_nmethod */);
@@ -5190,6 +5192,7 @@ instruct loadKlass(rRegP dst, memory mem)
 // Load narrow Klass Pointer
 instruct loadNKlass(rRegN dst, memory mem)
 %{
+  predicate(!UseCompactObjectHeaders);
   match(Set dst (LoadNKlass mem));
 
   ins_cost(125); // XXX
@@ -5200,6 +5203,21 @@ instruct loadNKlass(rRegN dst, memory mem)
   ins_pipe(ialu_reg_mem); // XXX
 %}
 
+instruct loadNKlassCompactHeaders(rRegN dst, memory mem, rFlagsReg cr)
+%{
+  predicate(UseCompactObjectHeaders);
+  match(Set dst (LoadNKlass mem));
+  effect(KILL cr);
+  ins_cost(125); // XXX
+  format %{ "movl    $dst, $mem\t# compressed klass ptr" %}
+  ins_encode %{
+    Register index = $mem$$index != 4 ? $mem$$index$$Register : noreg;
+    Address::ScaleFactor sf = (index != noreg) ? static_cast<Address::ScaleFactor>($mem$$scale) : Address::no_scale;
+    __ load_nklass_compact_c2($dst$$Register, $mem$$base$$Register, index, sf, $mem$$disp);
+  %}
+  ins_pipe(pipe_slow); // XXX
+%}
+
 // Load Float
 instruct loadF(regF dst, memory mem)
 %{
@@ -12225,6 +12243,7 @@ instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
 
 instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
 %{
+  predicate(!UseCompactObjectHeaders);
   match(Set cr (CmpN src (LoadNKlass mem)));
 
   format %{ "cmpl    $mem, $src\t# compressed klass ptr" %}
@@ -12923,7 +12942,7 @@ instruct cmpFastLockRTM(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI tmp,
   format %{ "fastlock $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
   ins_encode %{
     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
-                 $scr$$Register, $cx1$$Register, $cx2$$Register,
+                 $scr$$Register, $cx1$$Register, $cx2$$Register, r15_thread,
                  _counters, _rtm_counters, _stack_rtm_counters,
                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
                  true, ra_->C->profile_rtm());
@@ -12932,19 +12951,20 @@ instruct cmpFastLockRTM(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI tmp,
 %}
 
 instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI tmp, rRegP scr, rRegP cx1) %{
-  predicate(!Compile::current()->use_rtm());
+  predicate(LockingMode != LM_LIGHTWEIGHT && !Compile::current()->use_rtm());
   match(Set cr (FastLock object box));
   effect(TEMP tmp, TEMP scr, TEMP cx1, USE_KILL box);
   ins_cost(300);
   format %{ "fastlock $object,$box\t! kills $box,$tmp,$scr" %}
   ins_encode %{
     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
-                 $scr$$Register, $cx1$$Register, noreg, _counters, NULL, NULL, NULL, false, false);
+                 $scr$$Register, $cx1$$Register, noreg, r15_thread, _counters, NULL, NULL, NULL, false, false);
   %}
   ins_pipe(pipe_slow);
 %}
 
 instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP box, rRegP tmp) %{
+  predicate(LockingMode != LM_LIGHTWEIGHT);
   match(Set cr (FastUnlock object box));
   effect(TEMP tmp, USE_KILL box);
   ins_cost(300);
@@ -12955,6 +12975,30 @@ instruct cmpFastUnlock(rFlagsReg cr, rRegP object, rax_RegP box, rRegP tmp) %{
   ins_pipe(pipe_slow);
 %}
 
+instruct cmpFastLockLightweight(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI rax_reg, rRegP tmp) %{
+  predicate(LockingMode == LM_LIGHTWEIGHT);
+  match(Set cr (FastLock object box));
+  effect(TEMP rax_reg, TEMP tmp, USE_KILL box);
+  ins_cost(300);
+  format %{ "fastlock $object,$box\t! kills $box,$rax_reg,$tmp" %}
+  ins_encode %{
+    __ fast_lock_lightweight($object$$Register, $box$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct cmpFastUnlockLightweight(rFlagsReg cr, rRegP object, rax_RegP rax_reg, rRegP tmp) %{
+  predicate(LockingMode == LM_LIGHTWEIGHT);
+  match(Set cr (FastUnlock object rax_reg));
+  effect(TEMP tmp, USE_KILL rax_reg);
+  ins_cost(300);
+  format %{ "fastunlock $object,$rax_reg\t! kills $rax_reg,$tmp" %}
+  ins_encode %{
+    __ fast_unlock_lightweight($object$$Register, $rax_reg$$Register, $tmp$$Register, r15_thread);
+  %}
+  ins_pipe(pipe_slow);
+%}
+
 
 // ============================================================================
 // Safepoint Instructions
diff --git a/src/hotspot/cpu/zero/vm_version_zero.cpp b/src/hotspot/cpu/zero/vm_version_zero.cpp
index 333857afa69..b5f0a830a14 100644
--- a/src/hotspot/cpu/zero/vm_version_zero.cpp
+++ b/src/hotspot/cpu/zero/vm_version_zero.cpp
@@ -115,6 +115,11 @@ void VM_Version::initialize() {
     FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
   }
 
+  if ((LockingMode != LM_LEGACY) && (LockingMode != LM_MONITOR)) {
+    warning("Unsupported locking mode for this CPU.");
+    FLAG_SET_DEFAULT(LockingMode, LM_LEGACY);
+  }
+
   // Not implemented
   UNSUPPORTED_OPTION(CriticalJNINatives);
   UNSUPPORTED_OPTION(UseCompiler);
diff --git a/src/hotspot/cpu/zero/zeroInterpreter_zero.cpp b/src/hotspot/cpu/zero/zeroInterpreter_zero.cpp
index b73669f6450..b5011fbf87d 100644
--- a/src/hotspot/cpu/zero/zeroInterpreter_zero.cpp
+++ b/src/hotspot/cpu/zero/zeroInterpreter_zero.cpp
@@ -333,7 +333,7 @@ int ZeroInterpreter::native_entry(Method* method, intptr_t UNUSED, TRAPS) {
     oop lockee = monitor->obj();
     markWord disp = lockee->mark().set_unlocked();
     monitor->lock()->set_displaced_header(disp);
-    bool call_vm = UseHeavyMonitors;
+    bool call_vm = (LockingMode == LM_MONITOR);
     if (call_vm || lockee->cas_set_mark(markWord::from_pointer(monitor), disp) != disp) {
       // Is it simple recursive case?
       if (!call_vm && thread->is_lock_owned((address) disp.clear_lock_bits().to_pointer())) {
diff --git a/src/hotspot/os/aix/os_aix.inline.hpp b/src/hotspot/os/aix/os_aix.inline.hpp
index c0ec5faa564..489ae5a7e6e 100644
--- a/src/hotspot/os/aix/os_aix.inline.hpp
+++ b/src/hotspot/os/aix/os_aix.inline.hpp
@@ -47,4 +47,8 @@ inline bool os::must_commit_stack_guard_pages() {
 inline void os::map_stack_shadow_pages(address sp) {
 }
 
+// Trim-native support, stubbed out for now, may be enabled later
+inline bool os::can_trim_native_heap() { return false; }
+inline bool os::trim_native_heap(os::size_change_t* rss_change) { return false; }
+
 #endif // OS_AIX_OS_AIX_INLINE_HPP
diff --git a/src/hotspot/os/bsd/os_bsd.inline.hpp b/src/hotspot/os/bsd/os_bsd.inline.hpp
index 0b9fc65b0d8..f59f48e889f 100644
--- a/src/hotspot/os/bsd/os_bsd.inline.hpp
+++ b/src/hotspot/os/bsd/os_bsd.inline.hpp
@@ -51,4 +51,8 @@ inline bool os::must_commit_stack_guard_pages() {
 inline void os::map_stack_shadow_pages(address sp) {
 }
 
+// Trim-native support, stubbed out for now, may be enabled later
+inline bool os::can_trim_native_heap() { return false; }
+inline bool os::trim_native_heap(os::size_change_t* rss_change) { return false; }
+
 #endif // OS_BSD_OS_BSD_INLINE_HPP
diff --git a/src/hotspot/os/linux/os_linux.cpp b/src/hotspot/os/linux/os_linux.cpp
index da9212dfe45..c4b964a87a4 100644
--- a/src/hotspot/os/linux/os_linux.cpp
+++ b/src/hotspot/os/linux/os_linux.cpp
@@ -5500,4 +5500,33 @@ int os::socket_available(int fd, jint *pbytes) {
   // is expected to return 0 on failure and 1 on success to the jdk.
   return (ret < 0) ? 0 : 1;
 }
+
+bool os::trim_native_heap(os::size_change_t* rss_change) {
+#ifdef __GLIBC__
+  os::Linux::meminfo_t info1;
+  os::Linux::meminfo_t info2;
+
+  bool have_info1 = rss_change != nullptr &&
+                    os::Linux::query_process_memory_info(&info1);
+  ::malloc_trim(0);
+  bool have_info2 = rss_change != nullptr && have_info1 &&
+                    os::Linux::query_process_memory_info(&info2);
+  ssize_t delta = (ssize_t) -1;
+  if (rss_change != nullptr) {
+    if (have_info1 && have_info2 &&
+        info1.vmrss != -1 && info2.vmrss != -1 &&
+        info1.vmswap != -1 && info2.vmswap != -1) {
+      // Note: query_process_memory_info returns values in K
+      rss_change->before = (info1.vmrss + info1.vmswap) * K;
+      rss_change->after = (info2.vmrss + info2.vmswap) * K;
+    } else {
+      rss_change->after = rss_change->before = SIZE_MAX;
+    }
+  }
+
+  return true;
+#else
+  return false; // musl
+#endif
+}
 #endif
diff --git a/src/hotspot/os/linux/os_linux.inline.hpp b/src/hotspot/os/linux/os_linux.inline.hpp
index b37eb917a46..07af39cd4e3 100644
--- a/src/hotspot/os/linux/os_linux.inline.hpp
+++ b/src/hotspot/os/linux/os_linux.inline.hpp
@@ -142,4 +142,13 @@ inline int os::unlink(const char *path) {
 }
 #endif
 
+// Trim-native support
+inline bool os::can_trim_native_heap() {
+#ifdef __GLIBC__
+  return true;
+#else
+  return false; // musl
+#endif
+}
+
 #endif // OS_LINUX_OS_LINUX_INLINE_HPP
diff --git a/src/hotspot/os/linux/trimCHeapDCmd.cpp b/src/hotspot/os/linux/trimCHeapDCmd.cpp
index ee93ac5e8c8..33dd6f3a5bd 100644
--- a/src/hotspot/os/linux/trimCHeapDCmd.cpp
+++ b/src/hotspot/os/linux/trimCHeapDCmd.cpp
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2021 SAP SE. All rights reserved.
- * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2022 SAP SE. All rights reserved.
+ * Copyright (c) 2021, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -24,56 +24,29 @@
  */
 
 #include "precompiled.hpp"
-#include "logging/log.hpp"
-#include "runtime/os.hpp"
+#include "runtime/os.inline.hpp"
+#include "trimCHeapDCmd.hpp"
 #include "utilities/debug.hpp"
+#include "utilities/globalDefinitions.hpp"
 #include "utilities/ostream.hpp"
-#include "trimCHeapDCmd.hpp"
 
 #include <malloc.h>
 
 void TrimCLibcHeapDCmd::execute(DCmdSource source, TRAPS) {
-#ifdef __GLIBC__
-  stringStream ss_report(1024); // Note: before calling trim
-
-  os::Linux::meminfo_t info1;
-  os::Linux::meminfo_t info2;
-  // Query memory before...
-  bool have_info1 = os::Linux::query_process_memory_info(&info1);
-
-  _output->print_cr("Attempting trim...");
-  ::malloc_trim(0);
-  _output->print_cr("Done.");
-
-  // ...and after trim.
-  bool have_info2 = os::Linux::query_process_memory_info(&info2);
-
-  // Print report both to output stream as well to UL
-  bool wrote_something = false;
-  if (have_info1 && have_info2) {
-    if (info1.vmsize != -1 && info2.vmsize != -1) {
-      ss_report.print_cr("Virtual size before: " SSIZE_FORMAT "k, after: " SSIZE_FORMAT "k, (" SSIZE_FORMAT "k)",
-                         info1.vmsize, info2.vmsize, (info2.vmsize - info1.vmsize));
-      wrote_something = true;
-    }
-    if (info1.vmrss != -1 && info2.vmrss != -1) {
-      ss_report.print_cr("RSS before: " SSIZE_FORMAT "k, after: " SSIZE_FORMAT "k, (" SSIZE_FORMAT "k)",
-                         info1.vmrss, info2.vmrss, (info2.vmrss - info1.vmrss));
-      wrote_something = true;
-    }
-    if (info1.vmswap != -1 && info2.vmswap != -1) {
-      ss_report.print_cr("Swap before: " SSIZE_FORMAT "k, after: " SSIZE_FORMAT "k, (" SSIZE_FORMAT "k)",
-                         info1.vmswap, info2.vmswap, (info2.vmswap - info1.vmswap));
-      wrote_something = true;
+  if (os::can_trim_native_heap()) {
+    os::size_change_t sc;
+    if (os::trim_native_heap(&sc)) {
+      _output->print("Trim native heap: ");
+      if (sc.after != SIZE_MAX) {
+        const size_t delta = sc.after < sc.before ? (sc.before - sc.after) : (sc.after - sc.before);
+        const char sign = sc.after < sc.before ? '-' : '+';
+        _output->print_cr("RSS+Swap: " PROPERFMT "->" PROPERFMT " (%c" PROPERFMT ")",
+                          PROPERFMTARGS(sc.before), PROPERFMTARGS(sc.after), sign, PROPERFMTARGS(delta));
+      } else {
+        _output->print_cr("(no details available).");
+      }
     }
+  } else {
+    _output->print_cr("Not available.");
   }
-  if (!wrote_something) {
-    ss_report.print_raw("No details available.");
-  }
-
-  _output->print_raw(ss_report.base());
-  log_info(os)("malloc_trim:\n%s", ss_report.base());
-#else
-  _output->print_cr("Not available.");
-#endif
 }
diff --git a/src/hotspot/os/windows/os_windows.inline.hpp b/src/hotspot/os/windows/os_windows.inline.hpp
index 8dab6f44180..3b4d193d431 100644
--- a/src/hotspot/os/windows/os_windows.inline.hpp
+++ b/src/hotspot/os/windows/os_windows.inline.hpp
@@ -93,4 +93,8 @@ inline void os::PlatformMonitor::notify_all() {
   WakeAllConditionVariable(&_cond);
 }
 
+// Trim-native support, stubbed out for now, may be enabled later
+inline bool os::can_trim_native_heap() { return false; }
+inline bool os::trim_native_heap(os::size_change_t* rss_change) { return false; }
+
 #endif // OS_WINDOWS_OS_WINDOWS_INLINE_HPP
diff --git a/src/hotspot/share/c1/c1_LIR.cpp b/src/hotspot/share/c1/c1_LIR.cpp
index 62cff4c7505..2bb294f20ea 100644
--- a/src/hotspot/share/c1/c1_LIR.cpp
+++ b/src/hotspot/share/c1/c1_LIR.cpp
@@ -880,6 +880,19 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
       break;
     }
 
+// LIR_OpLoadKlass
+    case lir_load_klass:
+    {
+      LIR_OpLoadKlass* opLoadKlass = op->as_OpLoadKlass();
+      assert(opLoadKlass != NULL, "must be");
+
+      do_input(opLoadKlass->_obj);
+      do_output(opLoadKlass->_result);
+      if (opLoadKlass->_info) do_info(opLoadKlass->_info);
+      break;
+    }
+
+
 // LIR_OpProfileCall:
     case lir_profile_call: {
       assert(op->as_OpProfileCall() != NULL, "must be");
@@ -1049,6 +1062,10 @@ void LIR_OpLock::emit_code(LIR_Assembler* masm) {
   }
 }
 
+void LIR_OpLoadKlass::emit_code(LIR_Assembler* masm) {
+  masm->emit_load_klass(this);
+}
+
 #ifdef ASSERT
 void LIR_OpAssert::emit_code(LIR_Assembler* masm) {
   masm->emit_assert(this);
@@ -1973,6 +1990,11 @@ void LIR_OpLock::print_instr(outputStream* out) const {
   out->print("[lbl:" INTPTR_FORMAT "]", p2i(stub()->entry()));
 }
 
+void LIR_OpLoadKlass::print_instr(outputStream* out) const {
+  obj()->print(out);        out->print(" ");
+  result_opr()->print(out); out->print(" ");
+}
+
 #ifdef ASSERT
 void LIR_OpAssert::print_instr(outputStream* out) const {
   print_condition(out, condition()); out->print(" ");
diff --git a/src/hotspot/share/c1/c1_LIR.hpp b/src/hotspot/share/c1/c1_LIR.hpp
index 2342e6117eb..1a3261eb266 100644
--- a/src/hotspot/share/c1/c1_LIR.hpp
+++ b/src/hotspot/share/c1/c1_LIR.hpp
@@ -877,6 +877,7 @@ class    LIR_OpUpdateCRC32;
 class    LIR_OpLock;
 class    LIR_OpTypeCheck;
 class    LIR_OpCompareAndSwap;
+class    LIR_OpLoadKlass;
 class    LIR_OpProfileCall;
 class    LIR_OpProfileType;
 #ifdef ASSERT
@@ -922,6 +923,7 @@ enum LIR_Code {
       , lir_roundfp
       , lir_safepoint
       , lir_unwind
+      , lir_load_klass
   , end_op1
   , begin_op2
       , lir_cmp
@@ -1132,6 +1134,7 @@ class LIR_Op: public CompilationResourceObj {
   virtual LIR_OpUpdateCRC32* as_OpUpdateCRC32() { return NULL; }
   virtual LIR_OpTypeCheck* as_OpTypeCheck() { return NULL; }
   virtual LIR_OpCompareAndSwap* as_OpCompareAndSwap() { return NULL; }
+  virtual LIR_OpLoadKlass* as_OpLoadKlass() { return NULL; }
   virtual LIR_OpProfileCall* as_OpProfileCall() { return NULL; }
   virtual LIR_OpProfileType* as_OpProfileType() { return NULL; }
 #ifdef ASSERT
@@ -1804,6 +1807,23 @@ class LIR_OpLock: public LIR_Op {
   void print_instr(outputStream* out) const PRODUCT_RETURN;
 };
 
+class LIR_OpLoadKlass: public LIR_Op {
+  friend class LIR_OpVisitState;
+
+ private:
+  LIR_Opr _obj;
+ public:
+  LIR_OpLoadKlass(LIR_Opr obj, LIR_Opr result, CodeEmitInfo* info)
+    : LIR_Op(lir_load_klass, result, info)
+    , _obj(obj)
+    {}
+
+  LIR_Opr obj()        const { return _obj;  }
+
+  virtual LIR_OpLoadKlass* as_OpLoadKlass() { return this; }
+  virtual void emit_code(LIR_Assembler* masm);
+  void print_instr(outputStream* out) const PRODUCT_RETURN;
+};
 
 class LIR_OpDelay: public LIR_Op {
  friend class LIR_OpVisitState;
@@ -2249,6 +2269,9 @@ class LIR_List: public CompilationResourceObj {
 
   void xadd(LIR_Opr src, LIR_Opr add, LIR_Opr res, LIR_Opr tmp) { append(new LIR_Op2(lir_xadd, src, add, res, tmp)); }
   void xchg(LIR_Opr src, LIR_Opr set, LIR_Opr res, LIR_Opr tmp) { append(new LIR_Op2(lir_xchg, src, set, res, tmp)); }
+
+  void load_klass(LIR_Opr obj, LIR_Opr result, CodeEmitInfo* info) { append(new LIR_OpLoadKlass(obj, result, info)); }
+
 #ifdef ASSERT
   void lir_assert(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, const char* msg, bool halt) { append(new LIR_OpAssert(condition, opr1, opr2, msg, halt)); }
 #endif
diff --git a/src/hotspot/share/c1/c1_LIRAssembler.hpp b/src/hotspot/share/c1/c1_LIRAssembler.hpp
index 02c79160d04..3bf298f40b7 100644
--- a/src/hotspot/share/c1/c1_LIRAssembler.hpp
+++ b/src/hotspot/share/c1/c1_LIRAssembler.hpp
@@ -197,6 +197,7 @@ class LIR_Assembler: public CompilationResourceObj {
   void emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, Label* failure, Label* obj_is_null);
   void emit_compare_and_swap(LIR_OpCompareAndSwap* op);
   void emit_lock(LIR_OpLock* op);
+  void emit_load_klass(LIR_OpLoadKlass* op);
   void emit_call(LIR_OpJavaCall* op);
   void emit_rtcall(LIR_OpRTCall* op);
   void emit_profile_call(LIR_OpProfileCall* op);
diff --git a/src/hotspot/share/c1/c1_LIRGenerator.cpp b/src/hotspot/share/c1/c1_LIRGenerator.cpp
index 077f10ce32c..5bff671f574 100644
--- a/src/hotspot/share/c1/c1_LIRGenerator.cpp
+++ b/src/hotspot/share/c1/c1_LIRGenerator.cpp
@@ -638,7 +638,7 @@ void LIRGenerator::monitor_exit(LIR_Opr object, LIR_Opr lock, LIR_Opr new_hdr, L
   // setup registers
   LIR_Opr hdr = lock;
   lock = new_hdr;
-  CodeStub* slow_path = new MonitorExitStub(lock, UseFastLocking, monitor_no);
+  CodeStub* slow_path = new MonitorExitStub(lock, LockingMode != LM_MONITOR, monitor_no);
   __ load_stack_address_monitor(monitor_no, lock);
   __ unlock_object(hdr, object, lock, scratch, slow_path);
 }
@@ -1257,13 +1257,17 @@ void LIRGenerator::do_isInstance(Intrinsic* x) {
   __ move(call_result, result);
 }
 
+void LIRGenerator::load_klass(LIR_Opr obj, LIR_Opr klass, CodeEmitInfo* null_check_info) {
+  __ load_klass(obj, klass, null_check_info);
+}
+
 // Example: object.getClass ()
 void LIRGenerator::do_getClass(Intrinsic* x) {
   assert(x->number_of_arguments() == 1, "wrong type");
 
   LIRItem rcvr(x->argument_at(0), this);
   rcvr.load_item();
-  LIR_Opr temp = new_register(T_METADATA);
+  LIR_Opr temp = new_register(T_ADDRESS);
   LIR_Opr result = rlock_result(x);
 
   // need to perform the null check on the rcvr
@@ -1272,10 +1276,9 @@ void LIRGenerator::do_getClass(Intrinsic* x) {
     info = state_for(x);
   }
 
-  // FIXME T_ADDRESS should actually be T_METADATA but it can't because the
-  // meaning of these two is mixed up (see JDK-8026837).
-  __ move(new LIR_Address(rcvr.result(), oopDesc::klass_offset_in_bytes(), T_ADDRESS), temp, info);
-  __ move_wide(new LIR_Address(temp, in_bytes(Klass::java_mirror_offset()), T_ADDRESS), temp);
+  LIR_Opr klass = new_register(T_METADATA);
+  load_klass(rcvr.result(), klass, info);
+  __ move_wide(new LIR_Address(klass, in_bytes(Klass::java_mirror_offset()), T_ADDRESS), temp);
   // mirror = ((OopHandle)mirror)->resolve();
   access_load(IN_NATIVE, T_OBJECT,
               LIR_OprFact::address(new LIR_Address(temp, T_OBJECT)), result);
@@ -1355,7 +1358,7 @@ void LIRGenerator::do_getObjectSize(Intrinsic* x) {
   value.load_item();
 
   LIR_Opr klass = new_register(T_METADATA);
-  __ move(new LIR_Address(value.result(), oopDesc::klass_offset_in_bytes(), T_ADDRESS), klass, NULL);
+  load_klass(value.result(), klass, NULL);
   LIR_Opr layout = new_register(T_INT);
   __ move(new LIR_Address(klass, in_bytes(Klass::layout_helper_offset()), T_INT), layout);
 
@@ -3765,7 +3768,7 @@ LIR_Opr LIRGenerator::mask_boolean(LIR_Opr array, LIR_Opr value, CodeEmitInfo*&
     __ logical_and(value, LIR_OprFact::intConst(1), value_fixed);
   }
   LIR_Opr klass = new_register(T_METADATA);
-  __ move(new LIR_Address(array, oopDesc::klass_offset_in_bytes(), T_ADDRESS), klass, null_check_info);
+  load_klass(array, klass, null_check_info);
   null_check_info = NULL;
   LIR_Opr layout = new_register(T_INT);
   __ move(new LIR_Address(klass, in_bytes(Klass::layout_helper_offset()), T_INT), layout);
diff --git a/src/hotspot/share/c1/c1_LIRGenerator.hpp b/src/hotspot/share/c1/c1_LIRGenerator.hpp
index 67c986cb4e7..3fcaeb58b9d 100644
--- a/src/hotspot/share/c1/c1_LIRGenerator.hpp
+++ b/src/hotspot/share/c1/c1_LIRGenerator.hpp
@@ -239,6 +239,8 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure {
   void  move_to_phi(PhiResolver* resolver, Value cur_val, Value sux_val);
   void  move_to_phi(ValueStack* cur_state);
 
+  void load_klass(LIR_Opr obj, LIR_Opr klass, CodeEmitInfo* null_check_info);
+
   // platform dependent
   LIR_Opr getThreadPointer();
 
diff --git a/src/hotspot/share/c1/c1_Runtime1.cpp b/src/hotspot/share/c1/c1_Runtime1.cpp
index fb178432d8b..c4acf3d452a 100644
--- a/src/hotspot/share/c1/c1_Runtime1.cpp
+++ b/src/hotspot/share/c1/c1_Runtime1.cpp
@@ -699,7 +699,7 @@ JRT_END
 
 JRT_BLOCK_ENTRY(void, Runtime1::monitorenter(JavaThread* current, oopDesc* obj, BasicObjectLock* lock))
   NOT_PRODUCT(_monitorenter_slowcase_cnt++;)
-  if (!UseFastLocking) {
+  if (LockingMode == LM_MONITOR) {
     lock->set_obj(obj);
   }
   assert(obj == lock->obj(), "must match");
diff --git a/src/hotspot/share/cds/archiveBuilder.cpp b/src/hotspot/share/cds/archiveBuilder.cpp
index 699926fcfe0..5c5a56c0c96 100644
--- a/src/hotspot/share/cds/archiveBuilder.cpp
+++ b/src/hotspot/share/cds/archiveBuilder.cpp
@@ -39,6 +39,7 @@
 #include "memory/memRegion.hpp"
 #include "memory/resourceArea.hpp"
 #include "oops/instanceKlass.hpp"
+#include "oops/klass.inline.hpp"
 #include "oops/objArrayKlass.hpp"
 #include "oops/oopHandle.inline.hpp"
 #include "runtime/arguments.hpp"
@@ -731,6 +732,13 @@ void ArchiveBuilder::make_klasses_shareable() {
   for (int i = 0; i < klasses()->length(); i++) {
     Klass* k = klasses()->at(i);
     k->remove_java_mirror();
+#ifdef _LP64
+    if (UseCompactObjectHeaders) {
+      Klass* requested_k = to_requested(k);
+      narrowKlass nk = CompressedKlassPointers::encode_not_null(requested_k, _requested_static_archive_bottom);
+      k->set_prototype_header(markWord::prototype().set_narrow_klass(nk));
+    }
+#endif //_LP64
     if (k->is_objArray_klass()) {
       // InstanceKlass and TypeArrayKlass will in turn call remove_unshareable_info
       // on their array classes.
@@ -772,11 +780,17 @@ uintx ArchiveBuilder::any_to_offset(address p) const {
 // Update a Java object to point its Klass* to the new location after
 // shared archive has been compacted.
 void ArchiveBuilder::relocate_klass_ptr(oop o) {
+#ifdef _LP64
   assert(DumpSharedSpaces, "sanity");
   Klass* k = get_relocated_klass(o->klass());
   Klass* requested_k = to_requested(k);
   narrowKlass nk = CompressedKlassPointers::encode_not_null(requested_k, _requested_static_archive_bottom);
-  o->set_narrow_klass(nk);
+  if (UseCompactObjectHeaders) {
+    o->set_mark(o->mark().set_narrow_klass(nk));
+  } else {
+    o->set_narrow_klass(nk);
+  }
+#endif // _LP64
 }
 
 // RelocateBufferToRequested --- Relocate all the pointers in rw/ro,
diff --git a/src/hotspot/share/cds/filemap.cpp b/src/hotspot/share/cds/filemap.cpp
index c21625b7378..9c2405e275b 100644
--- a/src/hotspot/share/cds/filemap.cpp
+++ b/src/hotspot/share/cds/filemap.cpp
@@ -217,6 +217,7 @@ void FileMapHeader::populate(FileMapInfo* mapinfo, size_t core_region_alignment)
   _core_region_alignment = core_region_alignment;
   _obj_alignment = ObjectAlignmentInBytes;
   _compact_strings = CompactStrings;
+  _compact_headers = UseCompactObjectHeaders;
   if (HeapShared::is_heap_object_archiving_allowed()) {
     _narrow_oop_mode = CompressedOops::mode();
     _narrow_oop_base = CompressedOops::base();
@@ -279,6 +280,7 @@ void FileMapHeader::print(outputStream* st) {
   st->print_cr("- narrow_oop_base:                " INTPTR_FORMAT, p2i(_narrow_oop_base));
   st->print_cr("- narrow_oop_shift                %d", _narrow_oop_shift);
   st->print_cr("- compact_strings:                %d", _compact_strings);
+  st->print_cr("- compact_headers:                %d", _compact_headers);
   st->print_cr("- max_heap_size:                  " UINTX_FORMAT, _max_heap_size);
   st->print_cr("- narrow_oop_mode:                %d", _narrow_oop_mode);
   st->print_cr("- narrow_klass_shift:             %d", _narrow_klass_shift);
@@ -2285,6 +2287,14 @@ bool FileMapHeader::validate() {
     return false;
   }
 
+  if (compact_headers() != UseCompactObjectHeaders) {
+    log_info(cds)("The shared archive file's UseCompactObjectHeaders setting (%s)"
+                  " does not equal the current UseCompactObjectHeaders setting (%s).",
+                  _compact_headers          ? "enabled" : "disabled",
+                  UseCompactObjectHeaders   ? "enabled" : "disabled");
+    return false;
+  }
+
   if (!_use_optimized_module_handling) {
     MetaspaceShared::disable_optimized_module_handling();
     log_info(cds)("optimized module handling: disabled because archive was created without optimized module handling");
diff --git a/src/hotspot/share/cds/filemap.hpp b/src/hotspot/share/cds/filemap.hpp
index 9dc2b2309f2..ec968c48d54 100644
--- a/src/hotspot/share/cds/filemap.hpp
+++ b/src/hotspot/share/cds/filemap.hpp
@@ -193,6 +193,7 @@ class FileMapHeader: private CDSFileMapHeaderBase {
   address _narrow_oop_base;         // compressed oop encoding base
   int    _narrow_oop_shift;         // compressed oop encoding shift
   bool   _compact_strings;          // value of CompactStrings
+  bool   _compact_headers;          // value of UseCompactObjectHeaders
   uintx  _max_heap_size;            // java max heap size during dumping
   CompressedOops::Mode _narrow_oop_mode; // compressed oop encoding mode
   int     _narrow_klass_shift;      // save narrow klass base and shift
@@ -258,6 +259,7 @@ class FileMapHeader: private CDSFileMapHeaderBase {
   address narrow_oop_base()                const { return _narrow_oop_base; }
   int narrow_oop_shift()                   const { return _narrow_oop_shift; }
   bool compact_strings()                   const { return _compact_strings; }
+  bool compact_headers()                   const { return _compact_headers; }
   uintx max_heap_size()                    const { return _max_heap_size; }
   CompressedOops::Mode narrow_oop_mode()   const { return _narrow_oop_mode; }
   int narrow_klass_shift()                 const { return _narrow_klass_shift; }
diff --git a/src/hotspot/share/cds/heapShared.cpp b/src/hotspot/share/cds/heapShared.cpp
index 3e50dd7506b..a89f7cf6a78 100644
--- a/src/hotspot/share/cds/heapShared.cpp
+++ b/src/hotspot/share/cds/heapShared.cpp
@@ -58,6 +58,7 @@
 #include "runtime/init.hpp"
 #include "runtime/java.hpp"
 #include "runtime/javaCalls.hpp"
+#include "runtime/safepoint.hpp"
 #include "runtime/safepointVerifiers.hpp"
 #include "utilities/bitMap.inline.hpp"
 #include "utilities/copy.hpp"
@@ -275,7 +276,16 @@ oop HeapShared::archive_heap_object(oop obj) {
     // identity_hash for all shared objects, so they are less likely to be written
     // into during run time, increasing the potential of memory sharing.
     int hash_original = obj->identity_hash();
-    archived_oop->set_mark(markWord::prototype().copy_set_hash(hash_original));
+    if (UseCompactObjectHeaders) {
+      markWord mark = obj->mark();
+      if (mark.has_displaced_mark_helper()) {
+        mark = mark.displaced_mark_helper();
+      }
+      narrowKlass nklass = mark.narrow_klass();
+      archived_oop->set_mark(markWord::prototype().copy_set_hash(hash_original) LP64_ONLY(.set_narrow_klass(nklass)));
+    } else {
+      archived_oop->set_mark(markWord::prototype().copy_set_hash(hash_original));
+    }
     assert(archived_oop->mark().is_unlocked(), "sanity");
 
     DEBUG_ONLY(int hash_archived = archived_oop->identity_hash());
@@ -418,10 +428,14 @@ void HeapShared::copy_roots() {
     // This is copied from MemAllocator::finish
     if (UseBiasedLocking) {
       oopDesc::set_mark(mem, k->prototype_header());
+    } else if (UseCompactObjectHeaders) {
+      oopDesc::release_set_mark(mem, k->prototype_header());
     } else {
       oopDesc::set_mark(mem, markWord::prototype());
     }
-    oopDesc::release_set_klass(mem, k);
+    if (!UseCompactObjectHeaders) {
+      oopDesc::release_set_klass(mem, k);
+    }
   }
   {
     // This is copied from ObjArrayAllocator::initialize
diff --git a/src/hotspot/share/ci/ciKlass.cpp b/src/hotspot/share/ci/ciKlass.cpp
index be9f903ff49..29185e44906 100644
--- a/src/hotspot/share/ci/ciKlass.cpp
+++ b/src/hotspot/share/ci/ciKlass.cpp
@@ -249,3 +249,23 @@ const char* ciKlass::external_name() const {
     return get_Klass()->external_name();
   )
 }
+
+// ------------------------------------------------------------------
+// ciKlass::prototype_header_offset
+juint ciKlass::prototype_header_offset() {
+  assert(is_loaded(), "must be loaded");
+
+  VM_ENTRY_MARK;
+  Klass* this_klass = get_Klass();
+  return in_bytes(this_klass->prototype_header_offset());
+}
+
+// ------------------------------------------------------------------
+// ciKlass::prototype_header
+uintptr_t ciKlass::prototype_header() {
+  assert(is_loaded(), "must be loaded");
+
+  VM_ENTRY_MARK;
+  Klass* this_klass = get_Klass();
+  return (uintptr_t)this_klass->prototype_header().to_pointer();
+}
diff --git a/src/hotspot/share/ci/ciKlass.hpp b/src/hotspot/share/ci/ciKlass.hpp
index 467284b7cde..e180ee5b99c 100644
--- a/src/hotspot/share/ci/ciKlass.hpp
+++ b/src/hotspot/share/ci/ciKlass.hpp
@@ -129,6 +129,9 @@ class ciKlass : public ciType {
   void print_name_on(outputStream* st);
 
   const char* external_name() const;
+
+  juint prototype_header_offset();
+  uintptr_t prototype_header();
 };
 
 #endif // SHARE_CI_CIKLASS_HPP
diff --git a/src/hotspot/share/classfile/classFileParser.cpp b/src/hotspot/share/classfile/classFileParser.cpp
index 5835c5c1f1f..e438f402d13 100644
--- a/src/hotspot/share/classfile/classFileParser.cpp
+++ b/src/hotspot/share/classfile/classFileParser.cpp
@@ -2112,7 +2112,11 @@ void ClassFileParser::ClassAnnotationCollector::apply_to(InstanceKlass* ik) {
     ik->set_has_value_based_class_annotation();
     if (DiagnoseSyncOnValueBasedClasses) {
       ik->set_is_value_based();
-      ik->set_prototype_header(markWord::prototype());
+      if (UseCompactObjectHeaders) {
+        ik->set_prototype_header(markWord::prototype() LP64_ONLY(.set_klass(ik)));
+      } else {
+        ik->set_prototype_header(markWord::prototype());
+      }
     }
   }
 }
diff --git a/src/hotspot/share/classfile/stringTable.cpp b/src/hotspot/share/classfile/stringTable.cpp
index ab74df425a7..2da3e9c9508 100644
--- a/src/hotspot/share/classfile/stringTable.cpp
+++ b/src/hotspot/share/classfile/stringTable.cpp
@@ -49,6 +49,7 @@
 #include "runtime/safepointVerifiers.hpp"
 #include "runtime/timerTrace.hpp"
 #include "runtime/interfaceSupport.inline.hpp"
+#include "runtime/trimNativeHeap.hpp"
 #include "services/diagnosticCommand.hpp"
 #include "utilities/concurrentHashTable.inline.hpp"
 #include "utilities/concurrentHashTableTasks.inline.hpp"
@@ -148,11 +149,9 @@ class StringTableLookupJchar : StackObj {
   uintx get_hash() const {
     return _hash;
   }
-  bool equals(WeakHandle* value, bool* is_dead) {
+  bool equals(WeakHandle* value) {
     oop val_oop = value->peek();
     if (val_oop == NULL) {
-      // dead oop, mark this hash dead for cleaning
-      *is_dead = true;
       return false;
     }
     bool equals = java_lang_String::equals(val_oop, _str, _len);
@@ -163,6 +162,10 @@ class StringTableLookupJchar : StackObj {
      _found = Handle(_thread, value->resolve());
     return true;
   }
+  bool is_dead(WeakHandle* value) {
+    oop val_oop = value->peek();
+    return val_oop == NULL;
+  }
 };
 
 class StringTableLookupOop : public StackObj {
@@ -180,11 +183,9 @@ class StringTableLookupOop : public StackObj {
     return _hash;
   }
 
-  bool equals(WeakHandle* value, bool* is_dead) {
+  bool equals(WeakHandle* value) {
     oop val_oop = value->peek();
     if (val_oop == NULL) {
-      // dead oop, mark this hash dead for cleaning
-      *is_dead = true;
       return false;
     }
     bool equals = java_lang_String::equals(_find(), val_oop);
@@ -195,6 +196,11 @@ class StringTableLookupOop : public StackObj {
     _found = Handle(_thread, value->resolve());
     return true;
   }
+
+  bool is_dead(WeakHandle* value) {
+    oop val_oop = value->peek();
+    return val_oop == NULL;
+  }
 };
 
 static size_t ceil_log2(size_t val) {
@@ -429,6 +435,7 @@ void StringTable::clean_dead_entries(JavaThread* jt) {
 
   StringTableDeleteCheck stdc;
   StringTableDoDelete stdd;
+  NativeHeapTrimmer::SuspendMark sm("stringtable");
   {
     TraceTime timer("Clean", TRACETIME_LOG(Debug, stringtable, perf));
     while(bdt.do_task(jt, stdc, stdd)) {
diff --git a/src/hotspot/share/classfile/symbolTable.cpp b/src/hotspot/share/classfile/symbolTable.cpp
index a321d94bbd2..7d33905216f 100644
--- a/src/hotspot/share/classfile/symbolTable.cpp
+++ b/src/hotspot/share/classfile/symbolTable.cpp
@@ -37,6 +37,7 @@
 #include "runtime/atomic.hpp"
 #include "runtime/interfaceSupport.inline.hpp"
 #include "runtime/timerTrace.hpp"
+#include "runtime/trimNativeHeap.hpp"
 #include "services/diagnosticCommand.hpp"
 #include "utilities/concurrentHashTable.inline.hpp"
 #include "utilities/concurrentHashTableTasks.inline.hpp"
@@ -371,7 +372,11 @@ class SymbolTableLookup : StackObj {
   uintx get_hash() const {
     return _hash;
   }
-  bool equals(Symbol** value, bool* is_dead) {
+  // Note: When equals() returns "true", the symbol's refcount is incremented. This is
+  // needed to ensure that the symbol is kept alive before equals() returns to the caller,
+  // so that another thread cannot clean the symbol up concurrently. The caller is
+  // responsible for decrementing the refcount, when the symbol is no longer needed.
+  bool equals(Symbol** value) {
     assert(value != NULL, "expected valid value");
     assert(*value != NULL, "value should point to a symbol");
     Symbol *sym = *value;
@@ -381,14 +386,15 @@ class SymbolTableLookup : StackObj {
         return true;
       } else {
         assert(sym->refcount() == 0, "expected dead symbol");
-        *is_dead = true;
         return false;
       }
     } else {
-      *is_dead = (sym->refcount() == 0);
       return false;
     }
   }
+  bool is_dead(Symbol** value) {
+    return (*value)->refcount() == 0;
+  }
 };
 
 class SymbolTableGet : public StackObj {
@@ -696,6 +702,7 @@ void SymbolTable::clean_dead_entries(JavaThread* jt) {
 
   SymbolTableDeleteCheck stdc;
   SymbolTableDoDelete stdd;
+  NativeHeapTrimmer::SuspendMark sm("symboltable");
   {
     TraceTime timer("Clean", TRACETIME_LOG(Debug, symboltable, perf));
     while (bdt.do_task(jt, stdc, stdd)) {
diff --git a/src/hotspot/share/gc/g1/g1CollectedHeap.cpp b/src/hotspot/share/gc/g1/g1CollectedHeap.cpp
index 3d4a5d4da97..9ebc2614033 100644
--- a/src/hotspot/share/gc/g1/g1CollectedHeap.cpp
+++ b/src/hotspot/share/gc/g1/g1CollectedHeap.cpp
@@ -82,6 +82,7 @@
 #include "gc/shared/locationPrinter.inline.hpp"
 #include "gc/shared/oopStorageParState.hpp"
 #include "gc/shared/preservedMarks.inline.hpp"
+#include "gc/shared/slidingForwarding.hpp"
 #include "gc/shared/suspendibleThreadSet.hpp"
 #include "gc/shared/referenceProcessor.inline.hpp"
 #include "gc/shared/taskTerminator.hpp"
@@ -1774,6 +1775,8 @@ jint G1CollectedHeap::initialize() {
 
   G1InitLogger::print();
 
+  SlidingForwarding::initialize(heap_rs.region(), HeapRegion::GrainWords);
+
   return JNI_OK;
 }
 
diff --git a/src/hotspot/share/gc/g1/g1FullCollector.cpp b/src/hotspot/share/gc/g1/g1FullCollector.cpp
index 0522528ebef..1ef680218f6 100644
--- a/src/hotspot/share/gc/g1/g1FullCollector.cpp
+++ b/src/hotspot/share/gc/g1/g1FullCollector.cpp
@@ -40,6 +40,7 @@
 #include "gc/shared/gcTraceTime.inline.hpp"
 #include "gc/shared/preservedMarks.hpp"
 #include "gc/shared/referenceProcessor.hpp"
+#include "gc/shared/slidingForwarding.hpp"
 #include "gc/shared/verifyOption.hpp"
 #include "gc/shared/weakProcessor.inline.hpp"
 #include "gc/shared/workerPolicy.hpp"
@@ -201,11 +202,15 @@ void G1FullCollector::collect() {
   // Don't add any more derived pointers during later phases
   deactivate_derived_pointers();
 
+  SlidingForwarding::begin();
+
   phase2_prepare_compaction();
 
   phase3_adjust_pointers();
 
   phase4_do_compaction();
+
+  SlidingForwarding::end();
 }
 
 void G1FullCollector::complete_collection() {
@@ -309,6 +314,7 @@ void G1FullCollector::phase1_mark_live_objects() {
 
 void G1FullCollector::phase2_prepare_compaction() {
   GCTraceTime(Info, gc, phases) info("Phase 2: Prepare for compaction", scope()->timer());
+
   G1FullGCPrepareTask task(this);
   run_task(&task);
 
diff --git a/src/hotspot/share/gc/g1/g1FullGCAdjustTask.cpp b/src/hotspot/share/gc/g1/g1FullGCAdjustTask.cpp
index 273db274a56..32dbb579b74 100644
--- a/src/hotspot/share/gc/g1/g1FullGCAdjustTask.cpp
+++ b/src/hotspot/share/gc/g1/g1FullGCAdjustTask.cpp
@@ -39,10 +39,11 @@
 #include "memory/iterator.inline.hpp"
 #include "runtime/atomic.hpp"
 
+template <bool ALT_FWD>
 class G1AdjustLiveClosure : public StackObj {
-  G1AdjustClosure* _adjust_closure;
+  G1AdjustClosure<ALT_FWD>* _adjust_closure;
 public:
-  G1AdjustLiveClosure(G1AdjustClosure* cl) :
+  G1AdjustLiveClosure(G1AdjustClosure<ALT_FWD>* cl) :
     _adjust_closure(cl) { }
 
   size_t apply(oop object) {
@@ -61,7 +62,17 @@ class G1AdjustRegionClosure : public HeapRegionClosure {
     _worker_id(worker_id) { }
 
   bool do_heap_region(HeapRegion* r) {
-    G1AdjustClosure cl(_collector);
+    if (UseAltGCForwarding) {
+      return do_heap_region_impl<true>(r);
+    } else {
+      return do_heap_region_impl<false>(r);
+    }
+  }
+
+ private:
+  template <bool ALT_FWD>
+  bool do_heap_region_impl(HeapRegion* r) {
+    G1AdjustClosure<ALT_FWD> cl(_collector);
     if (r->is_humongous()) {
       // Special handling for humongous regions to get somewhat better
       // work distribution.
@@ -71,7 +82,7 @@ class G1AdjustRegionClosure : public HeapRegionClosure {
       // Closed archive regions never change references and only contain
       // references into other closed regions and are always live. Free
       // regions do not contain objects to iterate. So skip both.
-      G1AdjustLiveClosure adjust(&cl);
+      G1AdjustLiveClosure<ALT_FWD> adjust(&cl);
       r->apply_to_marked_objects(_bitmap, &adjust);
     }
     return false;
@@ -83,13 +94,13 @@ G1FullGCAdjustTask::G1FullGCAdjustTask(G1FullCollector* collector) :
     _root_processor(G1CollectedHeap::heap(), collector->workers()),
     _references_done(false),
     _weak_proc_task(collector->workers()),
-    _hrclaimer(collector->workers()),
-    _adjust(collector) {
+    _hrclaimer(collector->workers()) {
   // Need cleared claim bits for the roots processing
   ClassLoaderDataGraph::clear_claimed_marks();
 }
 
-void G1FullGCAdjustTask::work(uint worker_id) {
+template <bool ALT_FWD>
+void G1FullGCAdjustTask::work_impl(uint worker_id) {
   Ticks start = Ticks::now();
   ResourceMark rm;
 
@@ -97,20 +108,29 @@ void G1FullGCAdjustTask::work(uint worker_id) {
   G1FullGCMarker* marker = collector()->marker(worker_id);
   marker->preserved_stack()->adjust_during_full_gc();
 
+  G1AdjustClosure<ALT_FWD> adjust(collector());
   // Adjust the weak roots.
   if (!Atomic::cmpxchg(&_references_done, false, true)) {
-    G1CollectedHeap::heap()->ref_processor_stw()->weak_oops_do(&_adjust);
+    G1CollectedHeap::heap()->ref_processor_stw()->weak_oops_do(&adjust);
   }
 
   AlwaysTrueClosure always_alive;
-  _weak_proc_task.work(worker_id, &always_alive, &_adjust);
+  _weak_proc_task.work(worker_id, &always_alive, &adjust);
 
-  CLDToOopClosure adjust_cld(&_adjust, ClassLoaderData::_claim_strong);
-  CodeBlobToOopClosure adjust_code(&_adjust, CodeBlobToOopClosure::FixRelocations);
-  _root_processor.process_all_roots(&_adjust, &adjust_cld, &adjust_code);
+  CLDToOopClosure adjust_cld(&adjust, ClassLoaderData::_claim_strong);
+  CodeBlobToOopClosure adjust_code(&adjust, CodeBlobToOopClosure::FixRelocations);
+  _root_processor.process_all_roots(&adjust, &adjust_cld, &adjust_code);
 
   // Now adjust pointers region by region
   G1AdjustRegionClosure blk(collector(), worker_id);
   G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&blk, &_hrclaimer, worker_id);
   log_task("Adjust task", worker_id, start);
 }
+
+void G1FullGCAdjustTask::work(uint worker_id) {
+  if (UseAltGCForwarding) {
+    work_impl<true>(worker_id);
+  } else {
+    work_impl<false>(worker_id);
+  }
+}
diff --git a/src/hotspot/share/gc/g1/g1FullGCAdjustTask.hpp b/src/hotspot/share/gc/g1/g1FullGCAdjustTask.hpp
index 56c5957cd26..b548f198cfd 100644
--- a/src/hotspot/share/gc/g1/g1FullGCAdjustTask.hpp
+++ b/src/hotspot/share/gc/g1/g1FullGCAdjustTask.hpp
@@ -39,8 +39,9 @@ class G1FullGCAdjustTask : public G1FullGCTask {
   volatile bool            _references_done;
   WeakProcessor::Task      _weak_proc_task;
   HeapRegionClaimer        _hrclaimer;
-  G1AdjustClosure          _adjust;
 
+  template <bool ALT_FWD>
+  void work_impl(uint worker_id);
 public:
   G1FullGCAdjustTask(G1FullCollector* collector);
   void work(uint worker_id);
diff --git a/src/hotspot/share/gc/g1/g1FullGCCompactTask.cpp b/src/hotspot/share/gc/g1/g1FullGCCompactTask.cpp
index a7aec46d2ee..e410914d8e5 100644
--- a/src/hotspot/share/gc/g1/g1FullGCCompactTask.cpp
+++ b/src/hotspot/share/gc/g1/g1FullGCCompactTask.cpp
@@ -31,6 +31,7 @@
 #include "gc/g1/g1FullGCCompactTask.hpp"
 #include "gc/g1/heapRegion.inline.hpp"
 #include "gc/shared/gcTraceTime.inline.hpp"
+#include "gc/shared/slidingForwarding.inline.hpp"
 #include "logging/log.hpp"
 #include "oops/oop.inline.hpp"
 #include "utilities/ticks.hpp"
@@ -58,14 +59,16 @@ class G1ResetSkipCompactingClosure : public HeapRegionClosure {
   }
 };
 
-size_t G1FullGCCompactTask::G1CompactRegionClosure::apply(oop obj) {
+template <bool ALT_FWD>
+size_t G1FullGCCompactTask::G1CompactRegionClosure<ALT_FWD>::apply(oop obj) {
   size_t size = obj->size();
-  HeapWord* destination = cast_from_oop<HeapWord*>(obj->forwardee());
-  if (destination == NULL) {
+  if (!SlidingForwarding::is_forwarded(obj)) {
     // Object not moving
     return size;
   }
 
+  HeapWord* destination = cast_from_oop<HeapWord*>(SlidingForwarding::forwardee<ALT_FWD>(obj));
+
   // copy object and reinit its mark
   HeapWord* obj_addr = cast_from_oop<HeapWord*>(obj);
   assert(obj_addr != destination, "everything in this pass should be moving");
@@ -79,8 +82,13 @@ size_t G1FullGCCompactTask::G1CompactRegionClosure::apply(oop obj) {
 void G1FullGCCompactTask::compact_region(HeapRegion* hr) {
   assert(!hr->is_pinned(), "Should be no pinned region in compaction queue");
   assert(!hr->is_humongous(), "Should be no humongous regions in compaction queue");
-  G1CompactRegionClosure compact(collector()->mark_bitmap());
-  hr->apply_to_marked_objects(collector()->mark_bitmap(), &compact);
+  if (UseAltGCForwarding) {
+    G1CompactRegionClosure<true> compact(collector()->mark_bitmap());
+    hr->apply_to_marked_objects(collector()->mark_bitmap(), &compact);
+  } else {
+    G1CompactRegionClosure<false> compact(collector()->mark_bitmap());
+    hr->apply_to_marked_objects(collector()->mark_bitmap(), &compact);
+  }
   // Clear the liveness information for this region if necessary i.e. if we actually look at it
   // for bitmap verification. Otherwise it is sufficient that we move the TAMS to bottom().
   if (G1VerifyBitmaps) {
diff --git a/src/hotspot/share/gc/g1/g1FullGCCompactTask.hpp b/src/hotspot/share/gc/g1/g1FullGCCompactTask.hpp
index 5f96796acca..29b7c8fbb6e 100644
--- a/src/hotspot/share/gc/g1/g1FullGCCompactTask.hpp
+++ b/src/hotspot/share/gc/g1/g1FullGCCompactTask.hpp
@@ -48,6 +48,7 @@ class G1FullGCCompactTask : public G1FullGCTask {
   void work(uint worker_id);
   void serial_compaction();
 
+  template <bool ALT_FWD>
   class G1CompactRegionClosure : public StackObj {
     G1CMBitMap* _bitmap;
 
diff --git a/src/hotspot/share/gc/g1/g1FullGCCompactionPoint.cpp b/src/hotspot/share/gc/g1/g1FullGCCompactionPoint.cpp
index 499ae94c12b..0369e64f9a7 100644
--- a/src/hotspot/share/gc/g1/g1FullGCCompactionPoint.cpp
+++ b/src/hotspot/share/gc/g1/g1FullGCCompactionPoint.cpp
@@ -25,6 +25,7 @@
 #include "precompiled.hpp"
 #include "gc/g1/g1FullGCCompactionPoint.hpp"
 #include "gc/g1/heapRegion.hpp"
+#include "gc/shared/slidingForwarding.inline.hpp"
 #include "oops/oop.inline.hpp"
 #include "utilities/debug.hpp"
 
@@ -93,6 +94,7 @@ void G1FullGCCompactionPoint::switch_region() {
   initialize_values(true);
 }
 
+template <bool ALT_FWD>
 void G1FullGCCompactionPoint::forward(oop object, size_t size) {
   assert(_current_region != NULL, "Must have been initialized");
 
@@ -103,8 +105,10 @@ void G1FullGCCompactionPoint::forward(oop object, size_t size) {
 
   // Store a forwarding pointer if the object should be moved.
   if (cast_from_oop<HeapWord*>(object) != _compaction_top) {
-    object->forward_to(cast_to_oop(_compaction_top));
+    SlidingForwarding::forward_to<ALT_FWD>(object, cast_to_oop(_compaction_top));
   } else {
+    assert(!SlidingForwarding::is_forwarded(object), "should not be forwarded");
+    /*
     if (object->forwardee() != NULL) {
       // Object should not move but mark-word is used so it looks like the
       // object is forwarded. Need to clear the mark and it's no problem
@@ -123,6 +127,7 @@ void G1FullGCCompactionPoint::forward(oop object, size_t size) {
              p2i(object), object->mark().value(), markWord::prototype_for_klass(object->klass()).value());
     }
     assert(object->forwardee() == NULL, "should be forwarded to NULL");
+    */
   }
 
   // Update compaction values.
@@ -132,6 +137,9 @@ void G1FullGCCompactionPoint::forward(oop object, size_t size) {
   }
 }
 
+template void G1FullGCCompactionPoint::forward<true>(oop object, size_t size);
+template void G1FullGCCompactionPoint::forward<false>(oop object, size_t size);
+
 void G1FullGCCompactionPoint::add(HeapRegion* hr) {
   _compaction_regions->append(hr);
 }
diff --git a/src/hotspot/share/gc/g1/g1FullGCCompactionPoint.hpp b/src/hotspot/share/gc/g1/g1FullGCCompactionPoint.hpp
index 5f732dcad89..383a98dfa42 100644
--- a/src/hotspot/share/gc/g1/g1FullGCCompactionPoint.hpp
+++ b/src/hotspot/share/gc/g1/g1FullGCCompactionPoint.hpp
@@ -51,6 +51,7 @@ class G1FullGCCompactionPoint : public CHeapObj<mtGC> {
   bool is_initialized();
   void initialize(HeapRegion* hr, bool init_threshold);
   void update();
+  template <bool ALT_FWD>
   void forward(oop object, size_t size);
   void add(HeapRegion* hr);
 
diff --git a/src/hotspot/share/gc/g1/g1FullGCOopClosures.hpp b/src/hotspot/share/gc/g1/g1FullGCOopClosures.hpp
index 1690c99ea24..bb8377a1a6a 100644
--- a/src/hotspot/share/gc/g1/g1FullGCOopClosures.hpp
+++ b/src/hotspot/share/gc/g1/g1FullGCOopClosures.hpp
@@ -77,6 +77,7 @@ class G1MarkAndPushClosure : public OopIterateClosure {
   virtual void do_cld(ClassLoaderData* cld);
 };
 
+template <bool ALT_FWD>
 class G1AdjustClosure : public BasicOopIterateClosure {
   G1FullCollector* _collector;
 
diff --git a/src/hotspot/share/gc/g1/g1FullGCOopClosures.inline.hpp b/src/hotspot/share/gc/g1/g1FullGCOopClosures.inline.hpp
index 6ed5385dd6c..ed8e3eedc2b 100644
--- a/src/hotspot/share/gc/g1/g1FullGCOopClosures.inline.hpp
+++ b/src/hotspot/share/gc/g1/g1FullGCOopClosures.inline.hpp
@@ -32,6 +32,7 @@
 #include "gc/g1/g1ConcurrentMarkBitMap.inline.hpp"
 #include "gc/g1/g1FullGCMarker.inline.hpp"
 #include "gc/g1/heapRegionRemSet.hpp"
+#include "gc/shared/slidingForwarding.inline.hpp"
 #include "memory/iterator.inline.hpp"
 #include "memory/universe.hpp"
 #include "oops/access.inline.hpp"
@@ -63,7 +64,8 @@ inline void G1MarkAndPushClosure::do_cld(ClassLoaderData* cld) {
   _marker->follow_cld(cld);
 }
 
-template <class T> inline void G1AdjustClosure::adjust_pointer(T* p) {
+template <bool ALT_FWD>
+template <class T> inline void G1AdjustClosure<ALT_FWD>::adjust_pointer(T* p) {
   T heap_oop = RawAccess<>::oop_load(p);
   if (CompressedOops::is_null(heap_oop)) {
     return;
@@ -77,24 +79,28 @@ template <class T> inline void G1AdjustClosure::adjust_pointer(T* p) {
     return;
   }
 
-  oop forwardee = obj->forwardee();
-  if (forwardee == NULL) {
+  if (!SlidingForwarding::is_forwarded(obj)) {
     // Not forwarded, return current reference.
+    /*
     assert(obj->mark() == markWord::prototype_for_klass(obj->klass()) || // Correct mark
            obj->mark_must_be_preserved() || // Will be restored by PreservedMarksSet
            (UseBiasedLocking && obj->has_bias_pattern()), // Will be restored by BiasedLocking
            "Must have correct prototype or be preserved, obj: " PTR_FORMAT ", mark: " PTR_FORMAT ", prototype: " PTR_FORMAT,
            p2i(obj), obj->mark().value(), markWord::prototype_for_klass(obj->klass()).value());
+    */
     return;
   }
 
   // Forwarded, just update.
+  oop forwardee = SlidingForwarding::forwardee<ALT_FWD>(obj);
   assert(G1CollectedHeap::heap()->is_in_reserved(forwardee), "should be in object space");
   RawAccess<IS_NOT_NULL>::oop_store(p, forwardee);
 }
 
-inline void G1AdjustClosure::do_oop(oop* p)       { do_oop_work(p); }
-inline void G1AdjustClosure::do_oop(narrowOop* p) { do_oop_work(p); }
+template <bool ALT_FWD>
+inline void G1AdjustClosure<ALT_FWD>::do_oop(oop* p)       { do_oop_work(p); }
+template <bool ALT_FWD>
+inline void G1AdjustClosure<ALT_FWD>::do_oop(narrowOop* p) { do_oop_work(p); }
 
 inline bool G1IsAliveClosure::do_object_b(oop p) {
   return _bitmap->is_marked(p) || _collector->is_skip_marking(p);
diff --git a/src/hotspot/share/gc/g1/g1FullGCPrepareTask.cpp b/src/hotspot/share/gc/g1/g1FullGCPrepareTask.cpp
index cc486aa8e7a..1e680c71cf4 100644
--- a/src/hotspot/share/gc/g1/g1FullGCPrepareTask.cpp
+++ b/src/hotspot/share/gc/g1/g1FullGCPrepareTask.cpp
@@ -34,6 +34,7 @@
 #include "gc/g1/heapRegion.inline.hpp"
 #include "gc/shared/gcTraceTime.inline.hpp"
 #include "gc/shared/referenceProcessor.hpp"
+#include "gc/shared/slidingForwarding.inline.hpp"
 #include "logging/log.hpp"
 #include "memory/iterator.inline.hpp"
 #include "oops/oop.inline.hpp"
@@ -156,35 +157,46 @@ void G1FullGCPrepareTask::G1CalculatePointersClosure::reset_region_metadata(Heap
   }
 }
 
-G1FullGCPrepareTask::G1PrepareCompactLiveClosure::G1PrepareCompactLiveClosure(G1FullGCCompactionPoint* cp) :
+template <bool ALT_FWD>
+G1FullGCPrepareTask::G1PrepareCompactLiveClosure<ALT_FWD>::G1PrepareCompactLiveClosure(G1FullGCCompactionPoint* cp) :
     _cp(cp) { }
 
-size_t G1FullGCPrepareTask::G1PrepareCompactLiveClosure::apply(oop object) {
+template <bool ALT_FWD>
+size_t G1FullGCPrepareTask::G1PrepareCompactLiveClosure<ALT_FWD>::apply(oop object) {
   size_t size = object->size();
-  _cp->forward(object, size);
+  _cp->forward<ALT_FWD>(object, size);
   return size;
 }
 
-size_t G1FullGCPrepareTask::G1RePrepareClosure::apply(oop obj) {
+template <bool ALT_FWD>
+size_t G1FullGCPrepareTask::G1RePrepareClosure<ALT_FWD>::apply(oop obj) {
   // We only re-prepare objects forwarded within the current region, so
   // skip objects that are already forwarded to another region.
-  oop forwarded_to = obj->forwardee();
-  if (forwarded_to != NULL && !_current->is_in(forwarded_to)) {
-    return obj->size();
+  if (SlidingForwarding::is_forwarded(obj)) {
+    oop forwarded_to = SlidingForwarding::forwardee<ALT_FWD>(obj);
+    assert(forwarded_to != NULL, "must have forwardee");
+    if (!_current->is_in(forwarded_to)) {
+      return obj->size();
+    }
   }
-
   // Get size and forward.
   size_t size = obj->size();
-  _cp->forward(obj, size);
+  _cp->forward<ALT_FWD>(obj, size);
 
   return size;
 }
 
 void G1FullGCPrepareTask::G1CalculatePointersClosure::prepare_for_compaction_work(G1FullGCCompactionPoint* cp,
                                                                                   HeapRegion* hr) {
-  G1PrepareCompactLiveClosure prepare_compact(cp);
-  hr->set_compaction_top(hr->bottom());
-  hr->apply_to_marked_objects(_bitmap, &prepare_compact);
+  if (UseAltGCForwarding) {
+    G1PrepareCompactLiveClosure<true> prepare_compact(cp);
+    hr->set_compaction_top(hr->bottom());
+    hr->apply_to_marked_objects(_bitmap, &prepare_compact);
+  } else {
+    G1PrepareCompactLiveClosure<false> prepare_compact(cp);
+    hr->set_compaction_top(hr->bottom());
+    hr->apply_to_marked_objects(_bitmap, &prepare_compact);
+  }
 }
 
 void G1FullGCPrepareTask::G1CalculatePointersClosure::prepare_for_compaction(HeapRegion* hr) {
@@ -197,7 +209,8 @@ void G1FullGCPrepareTask::G1CalculatePointersClosure::prepare_for_compaction(Hea
   prepare_for_compaction_work(_cp, hr);
 }
 
-void G1FullGCPrepareTask::prepare_serial_compaction() {
+template <bool ALT_FWD>
+void G1FullGCPrepareTask::prepare_serial_compaction_impl() {
   GCTraceTime(Debug, gc, phases) debug("Phase 2: Prepare Serial Compaction", collector()->scope()->timer());
   // At this point we know that no regions were completely freed by
   // the parallel compaction. That means that the last region of
@@ -221,7 +234,7 @@ void G1FullGCPrepareTask::prepare_serial_compaction() {
       cp->initialize(current, false);
     } else {
       assert(!current->is_humongous(), "Should be no humongous regions in compaction queue");
-      G1RePrepareClosure re_prepare(cp, current);
+      G1RePrepareClosure<ALT_FWD> re_prepare(cp, current);
       current->set_compaction_top(current->bottom());
       current->apply_to_marked_objects(collector()->mark_bitmap(), &re_prepare);
     }
@@ -229,6 +242,14 @@ void G1FullGCPrepareTask::prepare_serial_compaction() {
   cp->update();
 }
 
+void G1FullGCPrepareTask::prepare_serial_compaction() {
+  if (UseAltGCForwarding) {
+    prepare_serial_compaction_impl<true>();
+  } else {
+    prepare_serial_compaction_impl<false>();
+  }
+}
+
 bool G1FullGCPrepareTask::G1CalculatePointersClosure::freed_regions() {
   if (_regions_freed) {
     return true;
diff --git a/src/hotspot/share/gc/g1/g1FullGCPrepareTask.hpp b/src/hotspot/share/gc/g1/g1FullGCPrepareTask.hpp
index d694fc6ffca..ed74ee90b8e 100644
--- a/src/hotspot/share/gc/g1/g1FullGCPrepareTask.hpp
+++ b/src/hotspot/share/gc/g1/g1FullGCPrepareTask.hpp
@@ -42,6 +42,10 @@ class G1FullGCPrepareTask : public G1FullGCTask {
 
   void set_freed_regions();
 
+private:
+  template <bool ALT_FWD>
+  void prepare_serial_compaction_impl();
+
 public:
   G1FullGCPrepareTask(G1FullCollector* collector);
   void work(uint worker_id);
@@ -74,6 +78,7 @@ class G1FullGCPrepareTask : public G1FullGCTask {
     bool freed_regions();
   };
 
+  template <bool ALT_FWD>
   class G1PrepareCompactLiveClosure : public StackObj {
     G1FullGCCompactionPoint* _cp;
 
@@ -82,6 +87,7 @@ class G1FullGCPrepareTask : public G1FullGCTask {
     size_t apply(oop object);
   };
 
+  template <bool ALT_FWD>
   class G1RePrepareClosure : public StackObj {
     G1FullGCCompactionPoint* _cp;
     HeapRegion* _current;
diff --git a/src/hotspot/share/gc/g1/g1OopClosures.inline.hpp b/src/hotspot/share/gc/g1/g1OopClosures.inline.hpp
index d5bfadb545b..2f9558ba7a8 100644
--- a/src/hotspot/share/gc/g1/g1OopClosures.inline.hpp
+++ b/src/hotspot/share/gc/g1/g1OopClosures.inline.hpp
@@ -234,7 +234,7 @@ void G1ParCopyClosure<barrier, should_mark>::do_oop_work(T* p) {
     oop forwardee;
     markWord m = obj->mark();
     if (m.is_marked()) {
-      forwardee = cast_to_oop(m.decode_pointer());
+      forwardee = obj->forwardee(m);
     } else {
       forwardee = _par_scan_state->copy_to_survivor_space(state, obj, m);
     }
diff --git a/src/hotspot/share/gc/g1/g1ParScanThreadState.cpp b/src/hotspot/share/gc/g1/g1ParScanThreadState.cpp
index 896c891ae74..1b0ffe7d7bb 100644
--- a/src/hotspot/share/gc/g1/g1ParScanThreadState.cpp
+++ b/src/hotspot/share/gc/g1/g1ParScanThreadState.cpp
@@ -197,7 +197,7 @@ void G1ParScanThreadState::do_oop_evac(T* p) {
 
   markWord m = obj->mark();
   if (m.is_marked()) {
-    obj = cast_to_oop(m.decode_pointer());
+    obj = obj->forwardee(m);
   } else {
     obj = do_copy_to_survivor_space(region_attr, obj, m);
   }
@@ -211,7 +211,7 @@ void G1ParScanThreadState::do_partial_array(PartialArrayScanTask task) {
   oop from_obj = task.to_source_array();
 
   assert(_g1h->is_in_reserved(from_obj), "must be in heap.");
-  assert(from_obj->is_objArray(), "must be obj array");
+  assert(UseCompactObjectHeaders || from_obj->is_objArray(), "must be obj array");
   assert(from_obj->is_forwarded(), "must be forwarded");
 
   oop to_obj = from_obj->forwardee();
@@ -241,7 +241,7 @@ MAYBE_INLINE_EVACUATION
 void G1ParScanThreadState::start_partial_objarray(G1HeapRegionAttr dest_attr,
                                                   oop from_obj,
                                                   oop to_obj) {
-  assert(from_obj->is_objArray(), "precondition");
+  assert(UseCompactObjectHeaders || from_obj->is_objArray(), "precondition");
   assert(from_obj->is_forwarded(), "precondition");
   assert(from_obj->forwardee() == to_obj, "precondition");
   assert(from_obj != to_obj, "should not be scanning self-forwarded objects");
@@ -360,22 +360,22 @@ G1HeapRegionAttr G1ParScanThreadState::next_region_attr(G1HeapRegionAttr const r
 }
 
 void G1ParScanThreadState::report_promotion_event(G1HeapRegionAttr const dest_attr,
-                                                  oop const old, size_t word_sz, uint age,
+                                                  Klass* klass, size_t word_sz, uint age,
                                                   HeapWord * const obj_ptr, uint node_index) const {
   PLAB* alloc_buf = _plab_allocator->alloc_buffer(dest_attr, node_index);
   if (alloc_buf->contains(obj_ptr)) {
-    _g1h->_gc_tracer_stw->report_promotion_in_new_plab_event(old->klass(), word_sz * HeapWordSize, age,
+    _g1h->_gc_tracer_stw->report_promotion_in_new_plab_event(klass, word_sz * HeapWordSize, age,
                                                              dest_attr.type() == G1HeapRegionAttr::Old,
                                                              alloc_buf->word_sz() * HeapWordSize);
   } else {
-    _g1h->_gc_tracer_stw->report_promotion_outside_plab_event(old->klass(), word_sz * HeapWordSize, age,
+    _g1h->_gc_tracer_stw->report_promotion_outside_plab_event(klass, word_sz * HeapWordSize, age,
                                                               dest_attr.type() == G1HeapRegionAttr::Old);
   }
 }
 
 NOINLINE
 HeapWord* G1ParScanThreadState::allocate_copy_slow(G1HeapRegionAttr* dest_attr,
-                                                   oop old,
+                                                   Klass* klass,
                                                    size_t word_sz,
                                                    uint age,
                                                    uint node_index) {
@@ -398,7 +398,7 @@ HeapWord* G1ParScanThreadState::allocate_copy_slow(G1HeapRegionAttr* dest_attr,
     update_numa_stats(node_index);
     if (_g1h->_gc_tracer_stw->should_report_promotion_events()) {
       // The events are checked individually as part of the actual commit
-      report_promotion_event(*dest_attr, old, word_sz, age, obj_ptr, node_index);
+      report_promotion_event(*dest_attr, klass, word_sz, age, obj_ptr, node_index);
     }
   }
   return obj_ptr;
@@ -423,7 +423,13 @@ oop G1ParScanThreadState::do_copy_to_survivor_space(G1HeapRegionAttr const regio
 
   // Get the klass once.  We'll need it again later, and this avoids
   // re-decoding when it's compressed.
-  Klass* klass = old->klass();
+  // NOTE: With compact headers, it is not safe to load the Klass* from o, because
+  // that would access the mark-word, and the mark-word might change at any time by
+  // concurrent promotion. The promoted mark-word would point to the forwardee, which
+  // may not yet have completed copying. Therefore we must load the Klass* from
+  // the mark-word that we have already loaded. This is safe, because we have checked
+  // that this is not yet forwarded in the caller.
+  Klass* klass = old->forward_safe_klass(old_mark);
   const size_t word_sz = old->size_given_klass(klass);
 
   uint age = 0;
@@ -436,7 +442,7 @@ oop G1ParScanThreadState::do_copy_to_survivor_space(G1HeapRegionAttr const regio
   // PLAB allocations should succeed most of the time, so we'll
   // normally check against NULL once and that's it.
   if (obj_ptr == NULL) {
-    obj_ptr = allocate_copy_slow(&dest_attr, old, word_sz, age, node_index);
+    obj_ptr = allocate_copy_slow(&dest_attr, klass, word_sz, age, node_index);
     if (obj_ptr == NULL) {
       // This will either forward-to-self, or detect that someone else has
       // installed a forwarding pointer.
@@ -593,7 +599,7 @@ NOINLINE
 oop G1ParScanThreadState::handle_evacuation_failure_par(oop old, markWord m) {
   assert(_g1h->is_in_cset(old), "Object " PTR_FORMAT " should be in the CSet", p2i(old));
 
-  oop forward_ptr = old->forward_to_atomic(old, m, memory_order_relaxed);
+  oop forward_ptr = old->forward_to_self_atomic(m, memory_order_relaxed);
   if (forward_ptr == NULL) {
     // Forward-to-self succeeded. We are the "owner" of the object.
     HeapRegion* r = _g1h->heap_region_containing(old);
diff --git a/src/hotspot/share/gc/g1/g1ParScanThreadState.hpp b/src/hotspot/share/gc/g1/g1ParScanThreadState.hpp
index 94f91ff2a1c..699905f2165 100644
--- a/src/hotspot/share/gc/g1/g1ParScanThreadState.hpp
+++ b/src/hotspot/share/gc/g1/g1ParScanThreadState.hpp
@@ -172,7 +172,7 @@ class G1ParScanThreadState : public CHeapObj<mtGC> {
   void start_partial_objarray(G1HeapRegionAttr dest_dir, oop from, oop to);
 
   HeapWord* allocate_copy_slow(G1HeapRegionAttr* dest_attr,
-                               oop old,
+                               Klass* klass,
                                size_t word_sz,
                                uint age,
                                uint node_index);
@@ -205,7 +205,7 @@ class G1ParScanThreadState : public CHeapObj<mtGC> {
   inline G1HeapRegionAttr next_region_attr(G1HeapRegionAttr const region_attr, markWord const m, uint& age);
 
   void report_promotion_event(G1HeapRegionAttr const dest_attr,
-                              oop const old, size_t word_sz, uint age,
+                              Klass* klass, size_t word_sz, uint age,
                               HeapWord * const obj_ptr, uint node_index) const;
 
   void trim_queue_to_threshold(uint threshold);
diff --git a/src/hotspot/share/gc/g1/heapRegion.cpp b/src/hotspot/share/gc/g1/heapRegion.cpp
index 26d40d7438c..e675cd71c77 100644
--- a/src/hotspot/share/gc/g1/heapRegion.cpp
+++ b/src/hotspot/share/gc/g1/heapRegion.cpp
@@ -860,12 +860,21 @@ HeapWord* HeapRegion::cross_threshold(HeapWord* start, HeapWord* end) {
   return _bot_part.threshold();
 }
 
-void HeapRegion::object_iterate(ObjectClosure* blk) {
+template<bool RESOLVE>
+void HeapRegion::object_iterate_impl(ObjectClosure* blk) {
   HeapWord* p = bottom();
   while (p < top()) {
     if (block_is_obj(p)) {
       blk->do_object(cast_to_oop(p));
     }
-    p += block_size(p);
+    p += block_size<RESOLVE>(p);
+  }
+}
+
+void HeapRegion::object_iterate(ObjectClosure* blk) {
+  if (!UseCompactObjectHeaders || G1CollectedHeap::heap()->collector_state()->in_full_gc()) {
+    object_iterate_impl<false>(blk);
+  } else {
+    object_iterate_impl<true>(blk);
   }
 }
diff --git a/src/hotspot/share/gc/g1/heapRegion.hpp b/src/hotspot/share/gc/g1/heapRegion.hpp
index ea63f65a8e6..39ff9133f99 100644
--- a/src/hotspot/share/gc/g1/heapRegion.hpp
+++ b/src/hotspot/share/gc/g1/heapRegion.hpp
@@ -145,6 +145,9 @@ class HeapRegion : public CHeapObj<mtGC> {
   // This version synchronizes with other calls to par_allocate_impl().
   inline HeapWord* par_allocate_impl(size_t min_word_size, size_t desired_word_size, size_t* actual_word_size);
 
+  template<bool RESOLVE>
+  void object_iterate_impl(ObjectClosure* blk);
+
 public:
   HeapWord* block_start(const void* p);
 
@@ -183,6 +186,7 @@ class HeapRegion : public CHeapObj<mtGC> {
 
   // Returns the object size for all valid block starts
   // and the amount of unallocated words if called on top()
+  template<bool RESOLVE = false>
   size_t block_size(const HeapWord* p) const;
 
   // Scans through the region using the bitmap to determine what
diff --git a/src/hotspot/share/gc/g1/heapRegion.inline.hpp b/src/hotspot/share/gc/g1/heapRegion.inline.hpp
index 54da24902a1..790bcc5a42d 100644
--- a/src/hotspot/share/gc/g1/heapRegion.inline.hpp
+++ b/src/hotspot/share/gc/g1/heapRegion.inline.hpp
@@ -177,13 +177,27 @@ inline bool HeapRegion::is_obj_dead(const oop obj, const G1CMBitMap* const prev_
          !is_closed_archive();
 }
 
+template <bool RESOLVE>
 inline size_t HeapRegion::block_size(const HeapWord *addr) const {
   if (addr == top()) {
     return pointer_delta(end(), addr);
   }
 
   if (block_is_obj(addr)) {
-    return cast_to_oop(addr)->size();
+    oop obj = cast_to_oop(addr);
+#ifdef _LP64
+#ifdef ASSERT
+    if (RESOLVE) {
+      assert(UseCompactObjectHeaders && !G1CollectedHeap::heap()->collector_state()->in_full_gc(), "Illegal/excessive resolve during full-GC");
+    } else {
+      assert(!UseCompactObjectHeaders || G1CollectedHeap::heap()->collector_state()->in_full_gc() || !obj->is_forwarded(), "Missing resolve when forwarded during normal GC");
+    }
+#endif
+    if (RESOLVE && obj->is_forwarded()) {
+      obj = obj->forwardee();
+    }
+#endif
+    return obj->size();
   }
 
   return block_size_using_bitmap(addr, G1CollectedHeap::heap()->concurrent_mark()->prev_mark_bitmap());
diff --git a/src/hotspot/share/gc/parallel/mutableNUMASpace.cpp b/src/hotspot/share/gc/parallel/mutableNUMASpace.cpp
index c00890d38e1..c1dfb66ed96 100644
--- a/src/hotspot/share/gc/parallel/mutableNUMASpace.cpp
+++ b/src/hotspot/share/gc/parallel/mutableNUMASpace.cpp
@@ -109,7 +109,7 @@ void MutableNUMASpace::ensure_parsability() {
             size_t touched_words = words_to_fill;
 #ifndef ASSERT
             if (!ZapUnusedHeapArea) {
-              touched_words = MIN2((size_t)align_object_size(typeArrayOopDesc::header_size(T_INT)),
+              touched_words = MIN2((size_t)align_object_size(align_up(typeArrayOopDesc::base_offset_in_bytes(T_INT), HeapWordSize) / HeapWordSize),
                 touched_words);
             }
 #endif
diff --git a/src/hotspot/share/gc/parallel/mutableSpace.cpp b/src/hotspot/share/gc/parallel/mutableSpace.cpp
index 363e115d10d..b06a9610bf0 100644
--- a/src/hotspot/share/gc/parallel/mutableSpace.cpp
+++ b/src/hotspot/share/gc/parallel/mutableSpace.cpp
@@ -233,11 +233,36 @@ void MutableSpace::oop_iterate(OopIterateClosure* cl) {
   }
 }
 
-void MutableSpace::object_iterate(ObjectClosure* cl) {
+template<bool COMPACT_HEADERS>
+void MutableSpace::object_iterate_impl(ObjectClosure* cl) {
   HeapWord* p = bottom();
   while (p < top()) {
-    cl->do_object(cast_to_oop(p));
-    p += cast_to_oop(p)->size();
+    oop obj = cast_to_oop(p);
+    // When promotion-failure occurs during Young GC, eden/from space is not cleared,
+    // so we can encounter objects with "forwarded" markword.
+    // They are essentially dead, so skipping them
+    if (!obj->is_forwarded()) {
+      cl->do_object(obj);
+      p += obj->size();
+    } else {
+      assert(obj->forwardee() != obj, "must not be self-forwarded");
+      if (COMPACT_HEADERS) {
+        // It is safe to use the forwardee here. Parallel GC only uses
+        // header-based forwarding during promotion. Full GC doesn't
+        // use the object header for forwarding at all.
+        p += obj->forwardee()->size();
+      } else {
+        p += obj->size();
+      }
+    }
+  }
+}
+
+void MutableSpace::object_iterate(ObjectClosure* cl) {
+  if (UseCompactObjectHeaders) {
+    object_iterate_impl<true>(cl);
+  } else {
+    object_iterate_impl<false>(cl);
   }
 }
 
diff --git a/src/hotspot/share/gc/parallel/mutableSpace.hpp b/src/hotspot/share/gc/parallel/mutableSpace.hpp
index b6bb131828f..fc4b607403a 100644
--- a/src/hotspot/share/gc/parallel/mutableSpace.hpp
+++ b/src/hotspot/share/gc/parallel/mutableSpace.hpp
@@ -67,6 +67,9 @@ class MutableSpace: public CHeapObj<mtGC> {
   void set_last_setup_region(MemRegion mr) { _last_setup_region = mr;   }
   MemRegion last_setup_region() const      { return _last_setup_region; }
 
+  template<bool COMPACT_HEADERS>
+  void object_iterate_impl(ObjectClosure* cl);
+
  public:
   virtual ~MutableSpace();
   MutableSpace(size_t page_size);
diff --git a/src/hotspot/share/gc/parallel/psOldGen.cpp b/src/hotspot/share/gc/parallel/psOldGen.cpp
index a8b871f2011..2e4ebd581d2 100644
--- a/src/hotspot/share/gc/parallel/psOldGen.cpp
+++ b/src/hotspot/share/gc/parallel/psOldGen.cpp
@@ -408,7 +408,7 @@ class VerifyObjectStartArrayClosure : public ObjectClosure {
     _old_gen(old_gen), _start_array(start_array) { }
 
   virtual void do_object(oop obj) {
-    HeapWord* test_addr = cast_from_oop<HeapWord*>(obj) + 1;
+    HeapWord* test_addr = cast_from_oop<HeapWord*>(obj);
     guarantee(_start_array->object_start(test_addr) == cast_from_oop<HeapWord*>(obj), "ObjectStartArray cannot find start of object");
     guarantee(_start_array->is_block_allocated(cast_from_oop<HeapWord*>(obj)), "ObjectStartArray missing block allocation");
   }
diff --git a/src/hotspot/share/gc/parallel/psPromotionLAB.cpp b/src/hotspot/share/gc/parallel/psPromotionLAB.cpp
index 53fac0e98a5..46e8d28ae56 100644
--- a/src/hotspot/share/gc/parallel/psPromotionLAB.cpp
+++ b/src/hotspot/share/gc/parallel/psPromotionLAB.cpp
@@ -47,7 +47,7 @@ void PSPromotionLAB::initialize(MemRegion lab) {
 
   // Initialize after VM starts up because header_size depends on compressed
   // oops.
-  filler_header_size = align_object_size(typeArrayOopDesc::header_size(T_INT));
+  filler_header_size = align_object_size((arrayOopDesc::base_offset_in_bytes(T_INT) + BytesPerWord) / BytesPerWord);
 
   // We can be initialized to a zero size!
   if (free() > 0) {
@@ -83,17 +83,22 @@ void PSPromotionLAB::flush() {
   // so they can always fill with an array.
   HeapWord* tlab_end = end() + filler_header_size;
   typeArrayOop filler_oop = (typeArrayOop) cast_to_oop(top());
-  filler_oop->set_mark(markWord::prototype());
-  filler_oop->set_klass(Universe::intArrayKlassObj());
-  const size_t array_length =
-    pointer_delta(tlab_end, top()) - typeArrayOopDesc::header_size(T_INT);
-  assert( (array_length * (HeapWordSize/sizeof(jint))) < (size_t)max_jint, "array too big in PSPromotionLAB");
-  filler_oop->set_length((int)(array_length * (HeapWordSize/sizeof(jint))));
+  if (UseCompactObjectHeaders) {
+    filler_oop->set_mark(Universe::intArrayKlassObj()->prototype_header());
+  } else {
+    filler_oop->set_mark(markWord::prototype());
+    filler_oop->set_klass(Universe::intArrayKlassObj());
+  }
+  int header_size = arrayOopDesc::base_offset_in_bytes(T_INT);
+  const size_t array_length_bytes = pointer_delta(tlab_end, top(), 1) - header_size;
+  assert((array_length_bytes / sizeof(jint)) < (size_t)max_jint, "array too big in PSPromotionLAB");
+  filler_oop->set_length((int)(array_length_bytes / sizeof(jint)));
 
 #ifdef ASSERT
   // Note that we actually DO NOT want to use the aligned header size!
-  HeapWord* elt_words = cast_from_oop<HeapWord*>(filler_oop) + typeArrayOopDesc::header_size(T_INT);
-  Copy::fill_to_words(elt_words, array_length, 0xDEAABABE);
+  const size_t array_length_words = pointer_delta(tlab_end, top()) - heap_word_size(header_size);
+  HeapWord* elt_words = cast_from_oop<HeapWord*>(filler_oop) + heap_word_size(header_size);
+  Copy::fill_to_words(elt_words, array_length_words, 0xDEAABABE);
 #endif
 
   set_bottom(NULL);
diff --git a/src/hotspot/share/gc/parallel/psPromotionManager.cpp b/src/hotspot/share/gc/parallel/psPromotionManager.cpp
index c5f321f5455..f7336d680c0 100644
--- a/src/hotspot/share/gc/parallel/psPromotionManager.cpp
+++ b/src/hotspot/share/gc/parallel/psPromotionManager.cpp
@@ -314,7 +314,7 @@ void PSPromotionManager::process_array_chunk(PartialArrayScanTask task) {
   assert(PSChunkLargeArrays, "invariant");
 
   oop old = task.to_source_array();
-  assert(old->is_objArray(), "invariant");
+  assert(UseCompactObjectHeaders || old->is_objArray(), "invariant");
   assert(old->is_forwarded(), "invariant");
 
   TASKQUEUE_STATS_ONLY(++_array_chunks_processed);
@@ -352,7 +352,7 @@ oop PSPromotionManager::oop_promotion_failed(oop obj, markWord obj_mark) {
   // this started.  If it is the same (i.e., no forwarding
   // pointer has been installed), then this thread owns
   // it.
-  if (obj->cas_forward_to(obj, obj_mark)) {
+  if (obj->forward_to_self_atomic(obj_mark) == NULL) {
     // We won any races, we "own" this object.
     assert(obj == obj->forwardee(), "Sanity");
 
@@ -360,7 +360,10 @@ oop PSPromotionManager::oop_promotion_failed(oop obj, markWord obj_mark) {
 
     push_contents(obj);
 
-    _preserved_marks->push_if_necessary(obj, obj_mark);
+    // Save the markWord of promotion-failed objs in _preserved_marks for later
+    // restoration. This way we don't have to walk the young-gen to locate
+    // these promotion-failed objs.
+    _preserved_marks->push_always(obj, obj_mark);
   }  else {
     // We lost, someone else "owns" this object
     guarantee(obj->is_forwarded(), "Object must be forwarded if the cas failed.");
diff --git a/src/hotspot/share/gc/parallel/psPromotionManager.hpp b/src/hotspot/share/gc/parallel/psPromotionManager.hpp
index e94be81b621..50f2ba234c9 100644
--- a/src/hotspot/share/gc/parallel/psPromotionManager.hpp
+++ b/src/hotspot/share/gc/parallel/psPromotionManager.hpp
@@ -104,7 +104,8 @@ class PSPromotionManager {
 
   void push_depth(ScannerTask task);
 
-  inline void promotion_trace_event(oop new_obj, oop old_obj, size_t obj_size,
+  inline void promotion_trace_event(oop new_obj, oop old_obj,
+                                    Klass* klass, size_t obj_size,
                                     uint age, bool tenured,
                                     const PSPromotionLAB* lab);
 
diff --git a/src/hotspot/share/gc/parallel/psPromotionManager.inline.hpp b/src/hotspot/share/gc/parallel/psPromotionManager.inline.hpp
index 5eb6a20b0b7..2fc87d22ad4 100644
--- a/src/hotspot/share/gc/parallel/psPromotionManager.inline.hpp
+++ b/src/hotspot/share/gc/parallel/psPromotionManager.inline.hpp
@@ -61,7 +61,7 @@ inline void PSPromotionManager::claim_or_forward_depth(T* p) {
 }
 
 inline void PSPromotionManager::promotion_trace_event(oop new_obj, oop old_obj,
-                                                      size_t obj_size,
+                                                      Klass* klass, size_t obj_size,
                                                       uint age, bool tenured,
                                                       const PSPromotionLAB* lab) {
   // Skip if memory allocation failed
@@ -73,14 +73,14 @@ inline void PSPromotionManager::promotion_trace_event(oop new_obj, oop old_obj,
       if (gc_tracer->should_report_promotion_in_new_plab_event()) {
         size_t obj_bytes = obj_size * HeapWordSize;
         size_t lab_size = lab->capacity();
-        gc_tracer->report_promotion_in_new_plab_event(old_obj->klass(), obj_bytes,
+        gc_tracer->report_promotion_in_new_plab_event(klass, obj_bytes,
                                                       age, tenured, lab_size);
       }
     } else {
       // Promotion of object directly to heap
       if (gc_tracer->should_report_promotion_outside_plab_event()) {
         size_t obj_bytes = obj_size * HeapWordSize;
-        gc_tracer->report_promotion_outside_plab_event(old_obj->klass(), obj_bytes,
+        gc_tracer->report_promotion_outside_plab_event(klass, obj_bytes,
                                                        age, tenured);
       }
     }
@@ -144,7 +144,7 @@ inline oop PSPromotionManager::copy_to_survivor_space(oop o) {
     // other thread.
     OrderAccess::acquire();
     // Return the already installed forwardee.
-    return cast_to_oop(m.decode_pointer());
+    return o->forwardee(m);
   }
 }
 
@@ -160,7 +160,14 @@ inline oop PSPromotionManager::copy_unmarked_to_survivor_space(oop o,
 
   oop new_obj = NULL;
   bool new_obj_is_tenured = false;
-  size_t new_obj_size = o->size();
+  // NOTE: With compact headers, it is not safe to load the Klass* from o, because
+  // that would access the mark-word, and the mark-word might change at any time by
+  // concurrent promotion. The promoted mark-word would point to the forwardee, which
+  // may not yet have completed copying. Therefore we must load the Klass* from
+  // the mark-word that we have already loaded. This is safe, because we have checked
+  // that this is not yet forwarded in the caller.
+  Klass* klass = o->forward_safe_klass(test_mark);
+  size_t new_obj_size = o->size_given_klass(klass);
 
   // Find the objects age, MT safe.
   uint age = (test_mark.has_displaced_mark_helper() /* o->has_displaced_mark() */) ?
@@ -175,7 +182,7 @@ inline oop PSPromotionManager::copy_unmarked_to_survivor_space(oop o,
         if (new_obj_size > (YoungPLABSize / 2)) {
           // Allocate this object directly
           new_obj = cast_to_oop(young_space()->cas_allocate(new_obj_size));
-          promotion_trace_event(new_obj, o, new_obj_size, age, false, NULL);
+          promotion_trace_event(new_obj, o, klass, new_obj_size, age, false, NULL);
         } else {
           // Flush and fill
           _young_lab.flush();
@@ -185,7 +192,7 @@ inline oop PSPromotionManager::copy_unmarked_to_survivor_space(oop o,
             _young_lab.initialize(MemRegion(lab_base, YoungPLABSize));
             // Try the young lab allocation again.
             new_obj = cast_to_oop(_young_lab.allocate(new_obj_size));
-            promotion_trace_event(new_obj, o, new_obj_size, age, false, &_young_lab);
+            promotion_trace_event(new_obj, o, klass, new_obj_size, age, false, &_young_lab);
           } else {
             _young_gen_is_full = true;
           }
@@ -211,7 +218,7 @@ inline oop PSPromotionManager::copy_unmarked_to_survivor_space(oop o,
         if (new_obj_size > (OldPLABSize / 2)) {
           // Allocate this object directly
           new_obj = cast_to_oop(old_gen()->allocate(new_obj_size));
-          promotion_trace_event(new_obj, o, new_obj_size, age, true, NULL);
+          promotion_trace_event(new_obj, o, klass, new_obj_size, age, true, NULL);
         } else {
           // Flush and fill
           _old_lab.flush();
@@ -228,7 +235,7 @@ inline oop PSPromotionManager::copy_unmarked_to_survivor_space(oop o,
             _old_lab.initialize(MemRegion(lab_base, OldPLABSize));
             // Try the old lab allocation again.
             new_obj = cast_to_oop(_old_lab.allocate(new_obj_size));
-            promotion_trace_event(new_obj, o, new_obj_size, age, true, &_old_lab);
+            promotion_trace_event(new_obj, o, klass, new_obj_size, age, true, &_old_lab);
           }
         }
       }
diff --git a/src/hotspot/share/gc/parallel/psScavenge.cpp b/src/hotspot/share/gc/parallel/psScavenge.cpp
index f324e4a9b73..9bc9738204f 100644
--- a/src/hotspot/share/gc/parallel/psScavenge.cpp
+++ b/src/hotspot/share/gc/parallel/psScavenge.cpp
@@ -705,19 +705,11 @@ bool PSScavenge::invoke_no_policy() {
   return !promotion_failure_occurred;
 }
 
-// This method iterates over all objects in the young generation,
-// removing all forwarding references. It then restores any preserved marks.
 void PSScavenge::clean_up_failed_promotion() {
-  ParallelScavengeHeap* heap = ParallelScavengeHeap::heap();
-  PSYoungGen* young_gen = heap->young_gen();
-
-  RemoveForwardedPointerClosure remove_fwd_ptr_closure;
-  young_gen->object_iterate(&remove_fwd_ptr_closure);
-
   PSPromotionManager::restore_preserved_marks();
 
   // Reset the PromotionFailureALot counters.
-  NOT_PRODUCT(heap->reset_promotion_should_fail();)
+  NOT_PRODUCT(ParallelScavengeHeap::heap()->reset_promotion_should_fail();)
 }
 
 bool PSScavenge::should_attempt_scavenge() {
diff --git a/src/hotspot/share/gc/serial/defNewGeneration.cpp b/src/hotspot/share/gc/serial/defNewGeneration.cpp
index 1a24a86dcf6..f625bc9cc65 100644
--- a/src/hotspot/share/gc/serial/defNewGeneration.cpp
+++ b/src/hotspot/share/gc/serial/defNewGeneration.cpp
@@ -682,7 +682,7 @@ void DefNewGeneration::handle_promotion_failure(oop old) {
   _promotion_failed_info.register_copy_failure(old->size());
   _preserved_marks_set.get()->push_if_necessary(old, old->mark());
   // forward to self
-  old->forward_to(old);
+  old->forward_to_self();
 
   _promo_failure_scan_stack.push(old);
 
diff --git a/src/hotspot/share/gc/serial/genMarkSweep.cpp b/src/hotspot/share/gc/serial/genMarkSweep.cpp
index 38ebfef2803..d9bb575716d 100644
--- a/src/hotspot/share/gc/serial/genMarkSweep.cpp
+++ b/src/hotspot/share/gc/serial/genMarkSweep.cpp
@@ -45,6 +45,7 @@
 #include "gc/shared/modRefBarrierSet.hpp"
 #include "gc/shared/referencePolicy.hpp"
 #include "gc/shared/referenceProcessorPhaseTimes.hpp"
+#include "gc/shared/slidingForwarding.hpp"
 #include "gc/shared/space.hpp"
 #include "gc/shared/strongRootsScope.hpp"
 #include "gc/shared/weakProcessor.hpp"
@@ -93,6 +94,8 @@ void GenMarkSweep::invoke_at_safepoint(ReferenceProcessor* rp, bool clear_all_so
 
   mark_sweep_phase1(clear_all_softrefs);
 
+  SlidingForwarding::begin();
+
   mark_sweep_phase2();
 
   // Don't add any more derived pointers during phase3
@@ -111,6 +114,8 @@ void GenMarkSweep::invoke_at_safepoint(ReferenceProcessor* rp, bool clear_all_so
   // (Should this be in general part?)
   gch->save_marks();
 
+  SlidingForwarding::end();
+
   deallocate_stacks();
 
   // If compaction completely evacuated the young generation then we
@@ -271,18 +276,31 @@ void GenMarkSweep::mark_sweep_phase3() {
   // Need new claim bits for the pointer adjustment tracing.
   ClassLoaderDataGraph::clear_claimed_marks();
 
-  {
-    StrongRootsScope srs(0);
-
-    gch->full_process_roots(true,  // this is the adjust phase
-                            GenCollectedHeap::SO_AllCodeCache,
-                            false, // all roots
-                            &adjust_pointer_closure,
-                            &adjust_cld_closure);
+  if (UseAltGCForwarding) {
+    AdjustPointerClosure<true> adjust_pointer_closure;
+    CLDToOopClosure adjust_cld_closure(&adjust_pointer_closure, ClassLoaderData::_claim_strong);
+    {
+      StrongRootsScope srs(0);
+      gch->full_process_roots(true,  // this is the adjust phase
+                              GenCollectedHeap::SO_AllCodeCache,
+                              false, // all roots
+                              &adjust_pointer_closure,
+                              &adjust_cld_closure);
+    }
+    gch->gen_process_weak_roots(&adjust_pointer_closure);
+  } else {
+    AdjustPointerClosure<false> adjust_pointer_closure;
+    CLDToOopClosure adjust_cld_closure(&adjust_pointer_closure, ClassLoaderData::_claim_strong);
+    {
+      StrongRootsScope srs(0);
+      gch->full_process_roots(true,  // this is the adjust phase
+                              GenCollectedHeap::SO_AllCodeCache,
+                              false, // all roots
+                              &adjust_pointer_closure,
+                              &adjust_cld_closure);
+    }
+    gch->gen_process_weak_roots(&adjust_pointer_closure);
   }
-
-  gch->gen_process_weak_roots(&adjust_pointer_closure);
-
   adjust_marks();
   GenAdjustPointersClosure blk;
   gch->generation_iterate(&blk, true);
diff --git a/src/hotspot/share/gc/serial/markSweep.cpp b/src/hotspot/share/gc/serial/markSweep.cpp
index ca8a9fe80a4..e1f0082ea49 100644
--- a/src/hotspot/share/gc/serial/markSweep.cpp
+++ b/src/hotspot/share/gc/serial/markSweep.cpp
@@ -62,7 +62,6 @@ MarkSweep::FollowRootClosure  MarkSweep::follow_root_closure;
 
 MarkAndPushClosure MarkSweep::mark_and_push_closure;
 CLDToOopClosure    MarkSweep::follow_cld_closure(&mark_and_push_closure, ClassLoaderData::_claim_strong);
-CLDToOopClosure    MarkSweep::adjust_cld_closure(&adjust_pointer_closure, ClassLoaderData::_claim_strong);
 
 template <class T> inline void MarkSweep::KeepAliveClosure::do_oop_work(T* p) {
   mark_and_push(p);
@@ -144,8 +143,9 @@ template <class T> inline void MarkSweep::follow_root(T* p) {
 void MarkSweep::FollowRootClosure::do_oop(oop* p)       { follow_root(p); }
 void MarkSweep::FollowRootClosure::do_oop(narrowOop* p) { follow_root(p); }
 
+template <bool ALT_FWD>
 void PreservedMark::adjust_pointer() {
-  MarkSweep::adjust_pointer(&_obj);
+  MarkSweep::adjust_pointer<ALT_FWD>(&_obj);
 }
 
 void PreservedMark::restore() {
@@ -173,22 +173,29 @@ void MarkSweep::set_ref_processor(ReferenceProcessor* rp) {
   mark_and_push_closure.set_ref_discoverer(_ref_processor);
 }
 
-AdjustPointerClosure MarkSweep::adjust_pointer_closure;
-
-void MarkSweep::adjust_marks() {
+template <bool ALT_FWD>
+void MarkSweep::adjust_marks_impl() {
   assert( _preserved_oop_stack.size() == _preserved_mark_stack.size(),
          "inconsistent preserved oop stacks");
 
   // adjust the oops we saved earlier
   for (size_t i = 0; i < _preserved_count; i++) {
-    _preserved_marks[i].adjust_pointer();
+    _preserved_marks[i].adjust_pointer<ALT_FWD>();
   }
 
   // deal with the overflow stack
   StackIterator<oop, mtGC> iter(_preserved_oop_stack);
   while (!iter.is_empty()) {
     oop* p = iter.next_addr();
-    adjust_pointer(p);
+    adjust_pointer<ALT_FWD>(p);
+  }
+}
+
+void MarkSweep::adjust_marks() {
+  if (UseAltGCForwarding) {
+    adjust_marks_impl<true>();
+  } else {
+    adjust_marks_impl<false>();
   }
 }
 
diff --git a/src/hotspot/share/gc/serial/markSweep.hpp b/src/hotspot/share/gc/serial/markSweep.hpp
index e09d7d06833..7b9355cfaca 100644
--- a/src/hotspot/share/gc/serial/markSweep.hpp
+++ b/src/hotspot/share/gc/serial/markSweep.hpp
@@ -50,7 +50,6 @@ class STWGCTimer;
 // declared at end
 class PreservedMark;
 class MarkAndPushClosure;
-class AdjustPointerClosure;
 
 class MarkSweep : AllStatic {
   //
@@ -84,7 +83,6 @@ class MarkSweep : AllStatic {
   //
   // Friend decls
   //
-  friend class AdjustPointerClosure;
   friend class KeepAliveClosure;
   friend class VM_MarkSweep;
 
@@ -124,8 +122,6 @@ class MarkSweep : AllStatic {
   static MarkAndPushClosure   mark_and_push_closure;
   static FollowStackClosure   follow_stack_closure;
   static CLDToOopClosure      follow_cld_closure;
-  static AdjustPointerClosure adjust_pointer_closure;
-  static CLDToOopClosure      adjust_cld_closure;
 
   // Accessors
   static uint total_invocations() { return _total_invocations; }
@@ -139,9 +135,12 @@ class MarkSweep : AllStatic {
 
   static void preserve_mark(oop p, markWord mark);
                                 // Save the mark word so it can be restored later
+  template <bool ALT_FWD>
+  static void adjust_marks_impl();   // Adjust the pointers in the preserved marks table
   static void adjust_marks();   // Adjust the pointers in the preserved marks table
   static void restore_marks();  // Restore the marks that we saved in preserve_mark
 
+  template <bool ALT_FWD>
   static int adjust_pointers(oop obj);
 
   static void follow_stack();   // Empty marking stack.
@@ -150,7 +149,8 @@ class MarkSweep : AllStatic {
 
   static void follow_cld(ClassLoaderData* cld);
 
-  template <class T> static inline void adjust_pointer(T* p);
+  template <bool ALT_FWD, class T>
+  static inline void adjust_pointer(T* p);
 
   // Check mark and maybe push on marking stack
   template <class T> static void mark_and_push(T* p);
@@ -185,6 +185,7 @@ class MarkAndPushClosure: public OopIterateClosure {
   }
 };
 
+template <bool ALT_FWD>
 class AdjustPointerClosure: public BasicOopIterateClosure {
  public:
   template <typename T> void do_oop_work(T* p);
@@ -204,6 +205,7 @@ class PreservedMark {
     _mark = mark;
   }
 
+  template <bool ALT_FWD>
   void adjust_pointer();
   void restore();
 };
diff --git a/src/hotspot/share/gc/serial/markSweep.inline.hpp b/src/hotspot/share/gc/serial/markSweep.inline.hpp
index 1e06cf60ad4..77f89a215e9 100644
--- a/src/hotspot/share/gc/serial/markSweep.inline.hpp
+++ b/src/hotspot/share/gc/serial/markSweep.inline.hpp
@@ -28,6 +28,7 @@
 #include "gc/serial/markSweep.hpp"
 
 #include "classfile/classLoaderData.inline.hpp"
+#include "gc/shared/slidingForwarding.inline.hpp"
 #include "memory/universe.hpp"
 #include "oops/markWord.inline.hpp"
 #include "oops/access.inline.hpp"
@@ -40,7 +41,7 @@ inline void MarkSweep::mark_object(oop obj) {
   // some marks may contain information we need to preserve so we store them away
   // and overwrite the mark.  We'll restore it at the end of markSweep.
   markWord mark = obj->mark();
-  obj->set_mark(markWord::prototype().set_marked());
+  obj->set_mark(obj->prototype_mark().set_marked());
 
   if (obj->mark_must_be_preserved(mark)) {
     preserve_mark(obj, mark);
@@ -74,35 +75,36 @@ inline void MarkAndPushClosure::do_oop(narrowOop* p)         { do_oop_work(p); }
 inline void MarkAndPushClosure::do_klass(Klass* k)           { MarkSweep::follow_klass(k); }
 inline void MarkAndPushClosure::do_cld(ClassLoaderData* cld) { MarkSweep::follow_cld(cld); }
 
-template <class T> inline void MarkSweep::adjust_pointer(T* p) {
+template <bool ALT_FWD, class T>
+inline void MarkSweep::adjust_pointer(T* p) {
   T heap_oop = RawAccess<>::oop_load(p);
   if (!CompressedOops::is_null(heap_oop)) {
     oop obj = CompressedOops::decode_not_null(heap_oop);
     assert(Universe::heap()->is_in(obj), "should be in heap");
 
-    oop new_obj = cast_to_oop(obj->mark().decode_pointer());
-
-    assert(new_obj != NULL ||                      // is forwarding ptr?
-           obj->mark() == markWord::prototype() || // not gc marked?
-           (UseBiasedLocking && obj->mark().has_bias_pattern()),
-           // not gc marked?
-           "should be forwarded");
-
-    if (new_obj != NULL) {
+    if (SlidingForwarding::is_forwarded(obj)) {
+      oop new_obj = SlidingForwarding::forwardee<ALT_FWD>(obj);
+      assert(new_obj != NULL, "must be forwarded");
       assert(is_object_aligned(new_obj), "oop must be aligned");
       RawAccess<IS_NOT_NULL>::oop_store(p, new_obj);
     }
   }
 }
 
+template <bool ALT_FWD>
 template <typename T>
-void AdjustPointerClosure::do_oop_work(T* p)           { MarkSweep::adjust_pointer(p); }
-inline void AdjustPointerClosure::do_oop(oop* p)       { do_oop_work(p); }
-inline void AdjustPointerClosure::do_oop(narrowOop* p) { do_oop_work(p); }
+void AdjustPointerClosure<ALT_FWD>::do_oop_work(T* p)           { MarkSweep::adjust_pointer<ALT_FWD>(p); }
+
+template <bool ALT_FWD>
+inline void AdjustPointerClosure<ALT_FWD>::do_oop(oop* p)       { do_oop_work(p); }
 
+template <bool ALT_FWD>
+inline void AdjustPointerClosure<ALT_FWD>::do_oop(narrowOop* p) { do_oop_work(p); }
 
+template <bool ALT_FWD>
 inline int MarkSweep::adjust_pointers(oop obj) {
-  return obj->oop_iterate_size(&MarkSweep::adjust_pointer_closure);
+  AdjustPointerClosure<ALT_FWD> adjust_pointer_closure;
+  return obj->oop_iterate_size(&adjust_pointer_closure);
 }
 
 #endif // SHARE_GC_SERIAL_MARKSWEEP_INLINE_HPP
diff --git a/src/hotspot/share/gc/shared/c1/barrierSetC1.cpp b/src/hotspot/share/gc/shared/c1/barrierSetC1.cpp
index 663ff91372b..b1828fcdb17 100644
--- a/src/hotspot/share/gc/shared/c1/barrierSetC1.cpp
+++ b/src/hotspot/share/gc/shared/c1/barrierSetC1.cpp
@@ -334,7 +334,7 @@ void BarrierSetC1::generate_referent_check(LIRAccess& access, LabelObj* cont) {
     if (gen_type_check) {
       // We have determined that offset == referent_offset && src != null.
       // if (src->_klass->_reference_type == REF_NONE) -> continue
-      __ move(new LIR_Address(base_reg, oopDesc::klass_offset_in_bytes(), T_ADDRESS), src_klass);
+      gen->load_klass(base_reg, src_klass, NULL);
       LIR_Address* reference_type_addr = new LIR_Address(src_klass, in_bytes(InstanceKlass::reference_type_offset()), T_BYTE);
       LIR_Opr reference_type = gen->new_register(T_INT);
       __ move(reference_type_addr, reference_type);
diff --git a/src/hotspot/share/gc/shared/collectedHeap.cpp b/src/hotspot/share/gc/shared/collectedHeap.cpp
index 96b3eb94681..aee5f2bb2ce 100644
--- a/src/hotspot/share/gc/shared/collectedHeap.cpp
+++ b/src/hotspot/share/gc/shared/collectedHeap.cpp
@@ -208,10 +208,6 @@ bool CollectedHeap::is_oop(oop object) const {
     return false;
   }
 
-  if (is_in(object->klass_or_null())) {
-    return false;
-  }
-
   return true;
 }
 
@@ -230,8 +226,10 @@ CollectedHeap::CollectedHeap() :
 {
   const size_t max_len = size_t(arrayOopDesc::max_array_length(T_INT));
   const size_t elements_per_word = HeapWordSize / sizeof(jint);
-  _filler_array_max_size = align_object_size(filler_array_hdr_size() +
-                                             max_len / elements_per_word);
+  int header_size_in_bytes = arrayOopDesc::base_offset_in_bytes(T_INT);
+  assert(header_size_in_bytes % sizeof(jint) == 0, "must be aligned to int");
+  int header_size_in_ints = header_size_in_bytes / sizeof(jint);
+  _filler_array_max_size = align_object_size((header_size_in_ints + max_len) / elements_per_word);
 
   NOT_PRODUCT(_promotion_failure_alot_count = 0;)
   NOT_PRODUCT(_promotion_failure_alot_gc_number = 0;)
@@ -391,18 +389,17 @@ size_t CollectedHeap::max_tlab_size() const {
   // We actually lose a little by dividing first,
   // but that just makes the TLAB  somewhat smaller than the biggest array,
   // which is fine, since we'll be able to fill that.
-  size_t max_int_size = typeArrayOopDesc::header_size(T_INT) +
+  int header_size_in_bytes = typeArrayOopDesc::base_offset_in_bytes(T_INT);
+  assert(header_size_in_bytes % sizeof(jint) == 0, "header size must align to int");
+  size_t max_int_size = header_size_in_bytes / HeapWordSize +
               sizeof(jint) *
               ((juint) max_jint / (size_t) HeapWordSize);
   return align_down(max_int_size, MinObjAlignment);
 }
 
-size_t CollectedHeap::filler_array_hdr_size() {
-  return align_object_offset(arrayOopDesc::header_size(T_INT)); // align to Long
-}
-
 size_t CollectedHeap::filler_array_min_size() {
-  return align_object_size(filler_array_hdr_size()); // align to MinObjAlignment
+  int aligned_header_size_words = align_up(arrayOopDesc::base_offset_in_bytes(T_INT), HeapWordSize) / HeapWordSize;
+  return align_object_size(aligned_header_size_words); // align to MinObjAlignment
 }
 
 #ifdef ASSERT
@@ -415,8 +412,9 @@ void CollectedHeap::fill_args_check(HeapWord* start, size_t words)
 void CollectedHeap::zap_filler_array(HeapWord* start, size_t words, bool zap)
 {
   if (ZapFillerObjects && zap) {
-    Copy::fill_to_words(start + filler_array_hdr_size(),
-                        words - filler_array_hdr_size(), 0XDEAFBABE);
+  int payload_start = align_up(arrayOopDesc::base_offset_in_bytes(T_INT), HeapWordSize) / HeapWordSize;
+  Copy::fill_to_words(start + payload_start,
+                      words - payload_start, 0XDEAFBABE);
   }
 }
 #endif // ASSERT
@@ -427,8 +425,9 @@ CollectedHeap::fill_with_array(HeapWord* start, size_t words, bool zap)
   assert(words >= filler_array_min_size(), "too small for an array");
   assert(words <= filler_array_max_size(), "too big for a single object");
 
-  const size_t payload_size = words - filler_array_hdr_size();
-  const size_t len = payload_size * HeapWordSize / sizeof(jint);
+  const size_t payload_size_bytes = words * HeapWordSize - arrayOopDesc::base_offset_in_bytes(T_INT);
+  assert(payload_size_bytes % sizeof(jint) == 0, "must be int aligned");
+  const size_t len = payload_size_bytes / sizeof(jint);
   assert((int)len >= 0, "size too large " SIZE_FORMAT " becomes %d", words, (int)len);
 
   ObjArrayAllocator allocator(Universe::intArrayKlassObj(), words, (int)len, /* do_zero */ false);
@@ -481,10 +480,6 @@ void CollectedHeap::fill_with_dummy_object(HeapWord* start, HeapWord* end, bool
   CollectedHeap::fill_with_object(start, end, zap);
 }
 
-size_t CollectedHeap::min_dummy_object_size() const {
-  return oopDesc::header_size();
-}
-
 size_t CollectedHeap::tlab_alloc_reserve() const {
   size_t min_size = min_dummy_object_size();
   return min_size > (size_t)MinObjAlignment ? align_object_size(min_size) : 0;
diff --git a/src/hotspot/share/gc/shared/collectedHeap.hpp b/src/hotspot/share/gc/shared/collectedHeap.hpp
index e4d444b8c95..928072550a6 100644
--- a/src/hotspot/share/gc/shared/collectedHeap.hpp
+++ b/src/hotspot/share/gc/shared/collectedHeap.hpp
@@ -140,7 +140,6 @@ class CollectedHeap : public CHeapObj<mtGC> {
                                  bool* gc_overhead_limit_was_exceeded) = 0;
 
   // Filler object utilities.
-  static inline size_t filler_array_hdr_size();
   static inline size_t filler_array_min_size();
 
   DEBUG_ONLY(static void fill_args_check(HeapWord* start, size_t words);)
@@ -275,7 +274,11 @@ class CollectedHeap : public CHeapObj<mtGC> {
   }
 
   virtual void fill_with_dummy_object(HeapWord* start, HeapWord* end, bool zap);
-  virtual size_t min_dummy_object_size() const;
+
+  static size_t min_dummy_object_size() {
+    return oopDesc::header_size();
+  }
+
   size_t tlab_alloc_reserve() const;
 
   // Some heaps may offer a contiguous region for shared non-blocking
diff --git a/src/hotspot/share/gc/shared/gc_globals.hpp b/src/hotspot/share/gc/shared/gc_globals.hpp
index 61d66749d6d..a86e1b7a6bc 100644
--- a/src/hotspot/share/gc/shared/gc_globals.hpp
+++ b/src/hotspot/share/gc/shared/gc_globals.hpp
@@ -694,7 +694,10 @@
   product(uintx, GCDrainStackTargetSize, 64,                                \
           "Number of entries we will try to leave on the stack "            \
           "during parallel gc")                                             \
-          range(0, max_juint)
+          range(0, max_juint)                                               \
+                                                                            \
+  product(bool, UseAltGCForwarding, false, EXPERIMENTAL,                    \
+          "Use alternative GC forwarding that preserves object headers")    \
 
 // end of GC_FLAGS
 
diff --git a/src/hotspot/share/gc/shared/genCollectedHeap.cpp b/src/hotspot/share/gc/shared/genCollectedHeap.cpp
index 528aacf3b5b..c2eb106c40b 100644
--- a/src/hotspot/share/gc/shared/genCollectedHeap.cpp
+++ b/src/hotspot/share/gc/shared/genCollectedHeap.cpp
@@ -52,6 +52,7 @@
 #include "gc/shared/oopStorageSet.inline.hpp"
 #include "gc/shared/oopStorageParState.inline.hpp"
 #include "gc/shared/scavengableNMethods.hpp"
+#include "gc/shared/slidingForwarding.hpp"
 #include "gc/shared/space.hpp"
 #include "gc/shared/strongRootsScope.hpp"
 #include "gc/shared/weakProcessor.hpp"
@@ -137,6 +138,8 @@ jint GenCollectedHeap::initialize() {
 
   GCInitLogger::print();
 
+  SlidingForwarding::initialize(_reserved, SpaceAlignment / HeapWordSize);
+
   return JNI_OK;
 }
 
diff --git a/src/hotspot/share/gc/shared/memAllocator.cpp b/src/hotspot/share/gc/shared/memAllocator.cpp
index a23a2e70bf6..a50b2e67c82 100644
--- a/src/hotspot/share/gc/shared/memAllocator.cpp
+++ b/src/hotspot/share/gc/shared/memAllocator.cpp
@@ -376,7 +376,9 @@ void MemAllocator::mem_clear(HeapWord* mem) const {
   assert(mem != NULL, "cannot initialize NULL object");
   const size_t hs = oopDesc::header_size();
   assert(_word_size >= hs, "unexpected object size");
-  oopDesc::set_klass_gap(mem, 0);
+  if (!UseCompactObjectHeaders) {
+    oopDesc::set_klass_gap(mem, 0);
+  }
   Copy::fill_to_aligned_words(mem + hs, _word_size - hs);
 }
 
@@ -384,6 +386,8 @@ oop MemAllocator::finish(HeapWord* mem) const {
   assert(mem != NULL, "NULL object pointer");
   if (UseBiasedLocking) {
     oopDesc::set_mark(mem, _klass->prototype_header());
+  } else if (UseCompactObjectHeaders) {
+    oopDesc::release_set_mark(mem, _klass->prototype_header());
   } else {
     // May be bootstrapping
     oopDesc::set_mark(mem, markWord::prototype());
@@ -391,7 +395,9 @@ oop MemAllocator::finish(HeapWord* mem) const {
   // Need a release store to ensure array/class length, mark word, and
   // object zeroing are visible before setting the klass non-NULL, for
   // concurrent collectors.
-  oopDesc::release_set_klass(mem, _klass);
+  if (!UseCompactObjectHeaders) {
+    oopDesc::release_set_klass(mem, _klass);
+  }
   return cast_to_oop(mem);
 }
 
@@ -405,7 +411,7 @@ MemRegion ObjArrayAllocator::obj_memory_range(oop obj) const {
     return MemAllocator::obj_memory_range(obj);
   }
   ArrayKlass* array_klass = ArrayKlass::cast(_klass);
-  const size_t hs = arrayOopDesc::header_size(array_klass->element_type());
+  const size_t hs = align_up(arrayOopDesc::base_offset_in_bytes(array_klass->element_type()), HeapWordSize) / HeapWordSize;
   return MemRegion(cast_from_oop<HeapWord*>(obj) + hs, _word_size - hs);
 }
 
diff --git a/src/hotspot/share/gc/shared/preservedMarks.cpp b/src/hotspot/share/gc/shared/preservedMarks.cpp
index 8dfd7cf0ff4..0b4933cdb10 100644
--- a/src/hotspot/share/gc/shared/preservedMarks.cpp
+++ b/src/hotspot/share/gc/shared/preservedMarks.cpp
@@ -24,6 +24,7 @@
 
 #include "precompiled.hpp"
 #include "gc/shared/preservedMarks.inline.hpp"
+#include "gc/shared/slidingForwarding.inline.hpp"
 #include "gc/shared/workgroup.hpp"
 #include "memory/allocation.inline.hpp"
 #include "memory/resourceArea.hpp"
@@ -39,18 +40,27 @@ void PreservedMarks::restore() {
   assert_empty();
 }
 
-void PreservedMarks::adjust_during_full_gc() {
+template <bool ALT_FWD>
+void PreservedMarks::adjust_during_full_gc_impl() {
   StackIterator<OopAndMarkWord, mtGC> iter(_stack);
   while (!iter.is_empty()) {
     OopAndMarkWord* elem = iter.next_addr();
 
     oop obj = elem->get_oop();
-    if (obj->is_forwarded()) {
-      elem->set_oop(obj->forwardee());
+    if (SlidingForwarding::is_forwarded(obj)) {
+      elem->set_oop(SlidingForwarding::forwardee<ALT_FWD>(obj));
     }
   }
 }
 
+void PreservedMarks::adjust_during_full_gc() {
+  if (UseAltGCForwarding) {
+    adjust_during_full_gc_impl<true>();
+  } else {
+    adjust_during_full_gc_impl<false>();
+  }
+}
+
 void PreservedMarks::restore_and_increment(volatile size_t* const total_size_addr) {
   const size_t stack_size = size();
   restore();
@@ -124,8 +134,10 @@ class RestorePreservedMarksTask : public AbstractGangTask {
 
   ~RestorePreservedMarksTask() {
     assert(_total_size == _total_size_before, "total_size = %zu before = %zu", _total_size, _total_size_before);
-
-    log_trace(gc)("Restored %zu marks", _total_size);
+    size_t mem_size = _total_size * (sizeof(oop) + sizeof(markWord));
+    log_trace(gc)("Restored %zu marks, occupying %zu %s", _total_size,
+                                                          byte_size_in_proper_unit(mem_size),
+                                                          proper_unit_for_byte_size(mem_size));
   }
 };
 
diff --git a/src/hotspot/share/gc/shared/preservedMarks.hpp b/src/hotspot/share/gc/shared/preservedMarks.hpp
index d04d22ef3c9..fae3aa305a2 100644
--- a/src/hotspot/share/gc/shared/preservedMarks.hpp
+++ b/src/hotspot/share/gc/shared/preservedMarks.hpp
@@ -54,10 +54,14 @@ class PreservedMarks {
 
   inline bool should_preserve_mark(oop obj, markWord m) const;
 
+  template <bool ALT_FWD>
+  void adjust_during_full_gc_impl();
+
 public:
   size_t size() const { return _stack.size(); }
   inline void push(oop obj, markWord m);
   inline void push_if_necessary(oop obj, markWord m);
+  inline void push_always(oop obj, markWord m);
   // Iterate over the stack, restore all preserved marks, and
   // reclaim the memory taken up by the stack segments.
   void restore();
diff --git a/src/hotspot/share/gc/shared/preservedMarks.inline.hpp b/src/hotspot/share/gc/shared/preservedMarks.inline.hpp
index 44bb26f87e6..9e5d4b42194 100644
--- a/src/hotspot/share/gc/shared/preservedMarks.inline.hpp
+++ b/src/hotspot/share/gc/shared/preservedMarks.inline.hpp
@@ -47,8 +47,14 @@ inline void PreservedMarks::push_if_necessary(oop obj, markWord m) {
   }
 }
 
+inline void PreservedMarks::push_always(oop obj, markWord m) {
+  assert(!m.is_marked(), "precondition");
+  OopAndMarkWord elem(obj, m);
+  _stack.push(elem);
+}
+
 inline void PreservedMarks::init_forwarded_mark(oop obj) {
-  obj->init_mark();
+  obj->forward_safe_init_mark();
 }
 
 inline PreservedMarks::PreservedMarks()
diff --git a/src/hotspot/share/gc/shared/slidingForwarding.cpp b/src/hotspot/share/gc/shared/slidingForwarding.cpp
new file mode 100644
index 00000000000..bf82246686f
--- /dev/null
+++ b/src/hotspot/share/gc/shared/slidingForwarding.cpp
@@ -0,0 +1,173 @@
+/*
+ * Copyright (c) 2021, Red Hat, Inc. All rights reserved.
+ * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "gc/shared/gc_globals.hpp"
+#include "gc/shared/slidingForwarding.hpp"
+#include "utilities/fastHash.hpp"
+#include "utilities/ostream.hpp"
+#include "utilities/powerOfTwo.hpp"
+
+// We cannot use 0, because that may already be a valid base address in zero-based heaps.
+// 0x1 is safe because heap base addresses must be aligned by much larger alignment
+HeapWord* const SlidingForwarding::UNUSED_BASE = reinterpret_cast<HeapWord*>(0x1);
+
+HeapWord* SlidingForwarding::_heap_start = nullptr;
+size_t SlidingForwarding::_region_size_words = 0;
+size_t SlidingForwarding::_heap_start_region_bias = 0;
+size_t SlidingForwarding::_num_regions = 0;
+uint SlidingForwarding::_region_size_bytes_shift = 0;
+uintptr_t SlidingForwarding::_region_mask = 0;
+HeapWord** SlidingForwarding::_biased_bases[SlidingForwarding::NUM_TARGET_REGIONS] = { nullptr, nullptr };
+HeapWord** SlidingForwarding::_bases_table = nullptr;
+SlidingForwarding::FallbackTable* SlidingForwarding::_fallback_table = nullptr;
+
+void SlidingForwarding::initialize(MemRegion heap, size_t region_size_words) {
+#ifdef _LP64
+  if (UseAltGCForwarding) {
+    _heap_start = heap.start();
+
+    // If the heap is small enough to fit directly into the available offset bits,
+    // and we are running Serial GC, we can treat the whole heap as a single region
+    // if it happens to be aligned to allow biasing.
+    size_t rounded_heap_size = round_up_power_of_2(heap.byte_size());
+
+    if (UseSerialGC && (heap.word_size() <= (1 << NUM_OFFSET_BITS)) &&
+        is_aligned((uintptr_t)_heap_start, rounded_heap_size)) {
+      _num_regions = 1;
+      _region_size_words = heap.word_size();
+      _region_size_bytes_shift = log2i_exact(rounded_heap_size);
+    } else {
+      _num_regions = align_up(pointer_delta(heap.end(), heap.start()), region_size_words) / region_size_words;
+      _region_size_words = region_size_words;
+      _region_size_bytes_shift = log2i_exact(_region_size_words) + LogHeapWordSize;
+    }
+    _heap_start_region_bias = (uintptr_t)_heap_start >> _region_size_bytes_shift;
+    _region_mask = ~((uintptr_t(1) << _region_size_bytes_shift) - 1);
+
+    guarantee((_heap_start_region_bias << _region_size_bytes_shift) == (uintptr_t)_heap_start, "must be aligned: _heap_start_region_bias: " SIZE_FORMAT ", _region_size_byte_shift: %u, _heap_start: " PTR_FORMAT, _heap_start_region_bias, _region_size_bytes_shift, p2i(_heap_start));
+
+    assert(_region_size_words >= 1, "regions must be at least a word large");
+    assert(_bases_table == nullptr, "should not be initialized yet");
+    assert(_fallback_table == nullptr, "should not be initialized yet");
+  }
+#endif
+}
+
+void SlidingForwarding::begin() {
+#ifdef _LP64
+  if (UseAltGCForwarding) {
+    assert(_bases_table == nullptr, "should not be initialized yet");
+    assert(_fallback_table == nullptr, "should not be initialized yet");
+
+    size_t max = _num_regions * NUM_TARGET_REGIONS;
+    _bases_table = NEW_C_HEAP_ARRAY(HeapWord*, max, mtGC);
+    HeapWord** biased_start = _bases_table - _heap_start_region_bias;
+    _biased_bases[0] = biased_start;
+    _biased_bases[1] = biased_start + _num_regions;
+    for (size_t i = 0; i < max; i++) {
+      _bases_table[i] = UNUSED_BASE;
+    }
+  }
+#endif
+}
+
+void SlidingForwarding::end() {
+#ifdef _LP64
+  if (UseAltGCForwarding) {
+    assert(_bases_table != nullptr, "should be initialized");
+    FREE_C_HEAP_ARRAY(HeapWord*, _bases_table);
+    _bases_table = nullptr;
+    delete _fallback_table;
+    _fallback_table = nullptr;
+  }
+#endif
+}
+
+void SlidingForwarding::fallback_forward_to(HeapWord* from, HeapWord* to) {
+  if (_fallback_table == nullptr) {
+    _fallback_table = new FallbackTable();
+  }
+  _fallback_table->forward_to(from, to);
+}
+
+HeapWord* SlidingForwarding::fallback_forwardee(HeapWord* from) {
+  assert(_fallback_table != nullptr, "fallback table must be present");
+  return _fallback_table->forwardee(from);
+}
+
+SlidingForwarding::FallbackTable::FallbackTable() {
+  for (uint i = 0; i < TABLE_SIZE; i++) {
+    _table[i]._next = nullptr;
+    _table[i]._from = nullptr;
+    _table[i]._to   = nullptr;
+  }
+}
+
+SlidingForwarding::FallbackTable::~FallbackTable() {
+  for (uint i = 0; i < TABLE_SIZE; i++) {
+    FallbackTableEntry* entry = _table[i]._next;
+    while (entry != nullptr) {
+      FallbackTableEntry* next = entry->_next;
+      FREE_C_HEAP_OBJ(entry);
+      entry = next;
+    }
+  }
+}
+
+size_t SlidingForwarding::FallbackTable::home_index(HeapWord* from) {
+  uint64_t val = reinterpret_cast<uint64_t>(from);
+  uint64_t hash = FastHash::get_hash64(val, UCONST64(0xAAAAAAAAAAAAAAAA));
+  return hash >> (64 - log2i_exact(TABLE_SIZE));
+}
+
+void SlidingForwarding::FallbackTable::forward_to(HeapWord* from, HeapWord* to) {
+  size_t idx = home_index(from);
+  FallbackTableEntry* head = &_table[idx];
+#ifdef ASSERT
+  // Search existing entry in chain starting at idx.
+  for (FallbackTableEntry* entry = head; entry != nullptr; entry = entry->_next) {
+    assert(entry->_from != from, "Don't re-forward entries into the fallback-table");
+  }
+#endif
+  // No entry found, create new one and insert after head.
+  FallbackTableEntry* new_entry = NEW_C_HEAP_OBJ(FallbackTableEntry, mtGC);
+  *new_entry = *head;
+  head->_next = new_entry;
+  head->_from = from;
+  head->_to   = to;
+}
+
+HeapWord* SlidingForwarding::FallbackTable::forwardee(HeapWord* from) const {
+  size_t idx = home_index(from);
+  const FallbackTableEntry* entry = &_table[idx];
+  while (entry != nullptr) {
+    if (entry->_from == from) {
+      return entry->_to;
+    }
+    entry = entry->_next;
+  }
+  return nullptr;
+}
diff --git a/src/hotspot/share/gc/shared/slidingForwarding.hpp b/src/hotspot/share/gc/shared/slidingForwarding.hpp
new file mode 100644
index 00000000000..def4732b94e
--- /dev/null
+++ b/src/hotspot/share/gc/shared/slidingForwarding.hpp
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 2021, Red Hat, Inc. All rights reserved.
+ * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_GC_SHARED_SLIDINGFORWARDING_HPP
+#define SHARE_GC_SHARED_SLIDINGFORWARDING_HPP
+
+#include "memory/allocation.hpp"
+#include "memory/memRegion.hpp"
+#include "oops/markWord.hpp"
+#include "oops/oopsHierarchy.hpp"
+
+/**
+ * SlidingForwarding is a method to store forwarding information in a compressed form into the object header,
+ * that has been specifically designed for sliding compaction GCs and compact object headers. With compact object
+ * headers, we store the compressed class pointer in the header, which would be overwritten by full forwarding
+ * pointer, if we allow the legacy forwarding code to act. This would lose the class information for the object,
+ * which is required later in GC cycle to iterate the reference fields and get the object size for copying.
+ *
+ * SlidingForwarding requires only small side tables and guarantees constant-time access and modification.
+ *
+ * The idea is to use a pointer compression scheme very similar to the one that is used for compressed oops.
+ * We divide the heap into number of logical regions. Each region spans maximum of 2^NUM_OFFSET_BITS words.
+ *
+ * The key advantage of sliding compaction for encoding efficiency: it can forward objects from one region to a
+ * maximum of two regions. This is an intuitive property: when we slide the compact region full of data, it can
+ * only span two adjacent regions. This property allows us to use the off-side table to record the addresses of
+ * two target regions. The table holds N*2 entries for N logical regions. For each region, it gives the base
+ * address of the two target regions, or a special placeholder if not used. A single bit in forwarding would
+ * indicate to which of the two "to" regions the object is forwarded into.
+ *
+ * This encoding efficiency allows to store the forwarding information in the object header _together_ with the
+ * compressed class pointer.
+ *
+ * When recording the sliding forwarding, the mark word would look roughly like this:
+ *
+ *   64                              32                                0
+ *    [................................OOOOOOOOOOOOOOOOOOOOOOOOOOOOAFTT]
+ *                                                                    ^----- normal lock bits, would record "object is forwarded"
+ *                                                                  ^------- fallback bit (explained below)
+ *                                                                 ^-------- alternate region select
+ *                                     ^------------------------------------ in-region offset
+ *     ^-------------------------------------------------------------------- protected area, *not touched* by this code, useful for
+ *                                                                           compressed class pointer with compact object headers
+ *
+ * Adding a forwarding then generally works as follows:
+ *   1. Compute the "to" offset in the "to" region, this gives "offset".
+ *   2. Check if the primary "from" offset at base table contains "to" region base, use it.
+ *      If not usable, continue to next step. If usable, set "alternate" = "false" and jump to (4).
+ *   3. Check if the alternate "from" offset at base table contains "to" region base, use it.
+ *      This gives us "alternate" = "true". This should always complete for sliding forwarding.
+ *   4. Compute the mark word from "offset" and "alternate", write it out
+ *
+ * Similarly, looking up the target address, given an original object address generally works as follows:
+ *   1. Load the mark from object, and decode "offset" and "alternate" from there
+ *   2. Compute the "from" base offset from the object
+ *   3. Look up "to" region base from the base table either at primary or alternate indices, using "alternate" flag
+ *   4. Compute the "to" address from "to" region base and "offset"
+ *
+ * This algorithm is broken by G1 last-ditch serial compaction: there, object from a single region can be
+ * forwarded to multiple, more than two regions. To deal with that, we initialize a fallback-hashtable for
+ * storing those extra forwardings, and set another bit in the header to indicate that the forwardee is not
+ * encoded but should be looked-up in the hashtable. G1 serial compaction is not very common - it is the
+ * last-last-ditch GC that is used when the JVM is scrambling to squeeze more space out of the heap, and at
+ * that point, ultimate performance is no longer the main concern.
+ */
+class SlidingForwarding : public AllStatic {
+private:
+
+  /*
+   * A simple hash-table that acts as fallback for the sliding forwarding.
+   * This is used in the case of G1 serial compaction, which violates the
+   * assumption of sliding forwarding that each object of any region is only
+   * ever forwarded to one of two target regions. At this point, the GC is
+   * scrambling to free up more Java heap memory, and therefore performance
+   * is not the major concern.
+   *
+   * The implementation is a straightforward open hashtable.
+   * It is a single-threaded (not thread-safe) implementation, and that
+   * is sufficient because G1 serial compaction is single-threaded.
+   */
+  class FallbackTable : public CHeapObj<mtGC>{
+  private:
+    struct FallbackTableEntry {
+      FallbackTableEntry* _next;
+      HeapWord* _from;
+      HeapWord* _to;
+    };
+
+    static const uint TABLE_SIZE = 1024;
+    FallbackTableEntry _table[TABLE_SIZE];
+
+    static size_t home_index(HeapWord* from);
+
+  public:
+    FallbackTable();
+    ~FallbackTable();
+
+    void forward_to(HeapWord* from, HeapWord* to);
+    HeapWord* forwardee(HeapWord* from) const;
+  };
+
+  static const uintptr_t MARK_LOWER_HALF_MASK = right_n_bits(32);
+
+  // We need the lowest two bits to indicate a forwarded object.
+  // The next bit indicates that the forwardee should be looked-up in a fallback-table.
+  static const int FALLBACK_SHIFT = markWord::lock_bits;
+  static const int FALLBACK_BITS = 1;
+  static const int FALLBACK_MASK = right_n_bits(FALLBACK_BITS) << FALLBACK_SHIFT;
+
+  // Next bit selects the target region
+  static const int ALT_REGION_SHIFT = FALLBACK_SHIFT + FALLBACK_BITS;
+  static const int ALT_REGION_BITS = 1;
+  // This will be "2" always, but expose it as named constant for clarity
+  static const size_t NUM_TARGET_REGIONS = 1 << ALT_REGION_BITS;
+
+  // The offset bits start then
+  static const int OFFSET_BITS_SHIFT = ALT_REGION_SHIFT + ALT_REGION_BITS;
+
+  // How many bits we use for the offset
+  static const int NUM_OFFSET_BITS = 32 - OFFSET_BITS_SHIFT;
+
+  // Indicates an unused base address in the target base table.
+  static HeapWord* const UNUSED_BASE;
+
+  static HeapWord*      _heap_start;
+  static size_t         _region_size_words;
+
+  static size_t         _heap_start_region_bias;
+  static size_t         _num_regions;
+  static uint           _region_size_bytes_shift;
+  static uintptr_t      _region_mask;
+
+  // The target base table memory.
+  static HeapWord**     _bases_table;
+  // Entries into the target base tables, biased to the start of the heap.
+  static HeapWord**     _biased_bases[NUM_TARGET_REGIONS];
+
+  static FallbackTable* _fallback_table;
+
+  static inline size_t biased_region_index_containing(HeapWord* addr);
+
+  static inline uintptr_t encode_forwarding(HeapWord* from, HeapWord* to);
+  static inline HeapWord* decode_forwarding(HeapWord* from, uintptr_t encoded);
+
+  static void fallback_forward_to(HeapWord* from, HeapWord* to);
+  static HeapWord* fallback_forwardee(HeapWord* from);
+
+  static inline void forward_to_impl(oop from, oop to);
+  static inline oop forwardee_impl(oop from);
+
+public:
+  static void initialize(MemRegion heap, size_t region_size_words);
+
+  static void begin();
+  static void end();
+
+  static inline bool is_forwarded(oop obj);
+  static inline bool is_not_forwarded(oop obj);
+
+  template <bool ALT_FWD>
+  static inline void forward_to(oop from, oop to);
+  template <bool ALT_FWD>
+  static inline oop forwardee(oop from);
+};
+
+#endif // SHARE_GC_SHARED_SLIDINGFORWARDING_HPP
diff --git a/src/hotspot/share/gc/shared/slidingForwarding.inline.hpp b/src/hotspot/share/gc/shared/slidingForwarding.inline.hpp
new file mode 100644
index 00000000000..e81b345d281
--- /dev/null
+++ b/src/hotspot/share/gc/shared/slidingForwarding.inline.hpp
@@ -0,0 +1,171 @@
+/*
+ * Copyright (c) 2021, Red Hat, Inc. All rights reserved.
+ * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+#ifndef SHARE_GC_SHARED_SLIDINGFORWARDING_INLINE_HPP
+#define SHARE_GC_SHARED_SLIDINGFORWARDING_INLINE_HPP
+
+#include "gc/shared/gc_globals.hpp"
+#include "gc/shared/slidingForwarding.hpp"
+#include "oops/markWord.hpp"
+#include "oops/oop.inline.hpp"
+#include "utilities/macros.hpp"
+
+inline bool SlidingForwarding::is_forwarded(oop obj) {
+  return obj->is_forwarded();
+}
+
+inline bool SlidingForwarding::is_not_forwarded(oop obj) {
+  return !obj->is_forwarded();
+}
+
+size_t SlidingForwarding::biased_region_index_containing(HeapWord* addr) {
+  return (uintptr_t)addr >> _region_size_bytes_shift;
+}
+
+uintptr_t SlidingForwarding::encode_forwarding(HeapWord* from, HeapWord* to) {
+  static_assert(NUM_TARGET_REGIONS == 2, "Only implemented for this amount");
+
+  size_t from_reg_idx = biased_region_index_containing(from);
+  HeapWord* to_region_base = (HeapWord*)((uintptr_t)to & _region_mask);
+
+  HeapWord** base = &_biased_bases[0][from_reg_idx];
+  uintptr_t alternate = 0;
+  if (*base == to_region_base) {
+    // Primary is good
+  } else if (*base == UNUSED_BASE) {
+    // Primary is free
+    *base = to_region_base;
+  } else {
+    base = &_biased_bases[1][from_reg_idx];
+    if (*base == to_region_base) {
+      // Alternate is good
+    } else if (*base == UNUSED_BASE) {
+      // Alternate is free
+      *base = to_region_base;
+    } else {
+      // Both primary and alternate are not fitting
+      // This happens only in the following rare situations:
+      // - In Serial GC, sometimes when compact-top switches spaces, because the
+      //   region boudaries are virtual and objects can cross regions
+      // - In G1 serial compaction, because tails of various compaction chains
+      //   are distributed across the remainders of already compacted regions.
+      return (1 << FALLBACK_SHIFT) | markWord::marked_value;
+    }
+    alternate = 1;
+  }
+
+  size_t offset = pointer_delta(to, to_region_base);
+  assert(offset < _region_size_words, "Offset should be within the region. from: " PTR_FORMAT
+         ", to: " PTR_FORMAT ", to_region_base: " PTR_FORMAT ", offset: " SIZE_FORMAT,
+         p2i(from), p2i(to), p2i(to_region_base), offset);
+
+  uintptr_t encoded = (offset << OFFSET_BITS_SHIFT) |
+                      (alternate << ALT_REGION_SHIFT) |
+                      markWord::marked_value;
+
+  assert(to == decode_forwarding(from, encoded), "must be reversible");
+  assert((encoded & ~MARK_LOWER_HALF_MASK) == 0, "must encode to lowest 32 bits");
+  return encoded;
+}
+
+HeapWord* SlidingForwarding::decode_forwarding(HeapWord* from, uintptr_t encoded) {
+  assert((encoded & markWord::lock_mask_in_place) == markWord::marked_value, "must be marked as forwarded");
+  assert((encoded & FALLBACK_MASK) == 0, "must not be fallback-forwarded");
+  assert((encoded & ~MARK_LOWER_HALF_MASK) == 0, "must decode from lowest 32 bits");
+  size_t alternate = (encoded >> ALT_REGION_SHIFT) & right_n_bits(ALT_REGION_BITS);
+  assert(alternate < NUM_TARGET_REGIONS, "Sanity");
+  uintptr_t offset = (encoded >> OFFSET_BITS_SHIFT);
+
+  size_t from_idx = biased_region_index_containing(from);
+  HeapWord* base = _biased_bases[alternate][from_idx];
+  assert(base != UNUSED_BASE, "must not be unused base");
+  HeapWord* decoded = base + offset;
+  assert(decoded >= _heap_start,
+         "Address must be above heap start. encoded: " INTPTR_FORMAT ", alt_region: " SIZE_FORMAT ", base: " PTR_FORMAT,
+         encoded, alternate, p2i(base));
+
+  return decoded;
+}
+
+inline void SlidingForwarding::forward_to_impl(oop from, oop to) {
+  assert(_bases_table != nullptr, "call begin() before forwarding");
+
+  markWord from_header = from->mark();
+  if (from_header.has_displaced_mark_helper()) {
+    from_header = from_header.displaced_mark_helper();
+  }
+
+  HeapWord* from_hw = cast_from_oop<HeapWord*>(from);
+  HeapWord* to_hw   = cast_from_oop<HeapWord*>(to);
+  uintptr_t encoded = encode_forwarding(from_hw, to_hw);
+  markWord new_header = markWord((from_header.value() & ~MARK_LOWER_HALF_MASK) | encoded);
+  from->set_mark(new_header);
+
+  if ((encoded & FALLBACK_MASK) != 0) {
+    fallback_forward_to(from_hw, to_hw);
+  }
+}
+
+template <bool ALT_FWD>
+inline void SlidingForwarding::forward_to(oop obj, oop fwd) {
+#ifdef _LP64
+  if (ALT_FWD) {
+    assert(_bases_table != nullptr, "expect sliding forwarding initialized");
+    forward_to_impl(obj, fwd);
+    assert(forwardee<ALT_FWD>(obj) == fwd, "must be forwarded to correct forwardee");
+  } else
+#endif
+  {
+    obj->forward_to(fwd);
+  }
+}
+
+inline oop SlidingForwarding::forwardee_impl(oop from) {
+  assert(_bases_table != nullptr, "call begin() before asking for forwarding");
+
+  markWord header = from->mark();
+  HeapWord* from_hw = cast_from_oop<HeapWord*>(from);
+  if ((header.value() & FALLBACK_MASK) != 0) {
+    HeapWord* to = fallback_forwardee(from_hw);
+    return cast_to_oop(to);
+  }
+  uintptr_t encoded = header.value() & MARK_LOWER_HALF_MASK;
+  HeapWord* to = decode_forwarding(from_hw, encoded);
+  return cast_to_oop(to);
+}
+
+template <bool ALT_FWD>
+inline oop SlidingForwarding::forwardee(oop obj) {
+#ifdef _LP64
+  if (ALT_FWD) {
+    assert(_bases_table != nullptr, "expect sliding forwarding initialized");
+    return forwardee_impl(obj);
+  } else
+#endif
+  {
+    return obj->forwardee();
+  }
+}
+
+#endif // SHARE_GC_SHARED_SLIDINGFORWARDING_INLINE_HPP
diff --git a/src/hotspot/share/gc/shared/space.cpp b/src/hotspot/share/gc/shared/space.cpp
index 8e95115c2e7..5db419dd21a 100644
--- a/src/hotspot/share/gc/shared/space.cpp
+++ b/src/hotspot/share/gc/shared/space.cpp
@@ -29,6 +29,7 @@
 #include "gc/shared/collectedHeap.inline.hpp"
 #include "gc/shared/genCollectedHeap.hpp"
 #include "gc/shared/genOopClosures.inline.hpp"
+#include "gc/shared/slidingForwarding.inline.hpp"
 #include "gc/shared/space.hpp"
 #include "gc/shared/space.inline.hpp"
 #include "gc/shared/spaceDecorator.inline.hpp"
@@ -346,6 +347,7 @@ void CompactibleSpace::clear(bool mangle_space) {
   _compaction_top = bottom();
 }
 
+template <bool ALT_FWD>
 HeapWord* CompactibleSpace::forward(oop q, size_t size,
                                     CompactPoint* cp, HeapWord* compact_top) {
   // q is alive
@@ -370,13 +372,13 @@ HeapWord* CompactibleSpace::forward(oop q, size_t size,
 
   // store the forwarding pointer into the mark word
   if (cast_from_oop<HeapWord*>(q) != compact_top) {
-    q->forward_to(cast_to_oop(compact_top));
+    SlidingForwarding::forward_to<ALT_FWD>(q, cast_to_oop(compact_top));
     assert(q->is_gc_marked(), "encoding the pointer should preserve the mark");
   } else {
     // if the object isn't moving we can just set the mark to the default
     // mark and handle it specially later on.
     q->init_mark();
-    assert(q->forwardee() == NULL, "should be forwarded to NULL");
+    assert(SlidingForwarding::is_not_forwarded(q), "should not be forwarded");
   }
 
   compact_top += size;
@@ -393,7 +395,11 @@ HeapWord* CompactibleSpace::forward(oop q, size_t size,
 #if INCLUDE_SERIALGC
 
 void ContiguousSpace::prepare_for_compaction(CompactPoint* cp) {
-  scan_and_forward(this, cp);
+  if (UseAltGCForwarding) {
+    scan_and_forward<true>(this, cp);
+  } else {
+    scan_and_forward<false>(this, cp);
+  }
 }
 
 void CompactibleSpace::adjust_pointers() {
@@ -402,11 +408,19 @@ void CompactibleSpace::adjust_pointers() {
     return;   // Nothing to do.
   }
 
-  scan_and_adjust_pointers(this);
+  if (UseAltGCForwarding) {
+    scan_and_adjust_pointers<true>(this);
+  } else {
+    scan_and_adjust_pointers<false>(this);
+  }
 }
 
 void CompactibleSpace::compact() {
-  scan_and_compact(this);
+  if (UseAltGCForwarding) {
+    scan_and_compact<true>(this);
+  } else {
+    scan_and_compact<false>(this);
+  }
 }
 
 #endif // INCLUDE_SERIALGC
@@ -586,22 +600,30 @@ void ContiguousSpace::allocate_temporary_filler(int factor) {
   }
   size = align_object_size(size);
 
-  const size_t array_header_size = typeArrayOopDesc::header_size(T_INT);
+  const size_t array_header_size = (arrayOopDesc::base_offset_in_bytes(T_INT) + BytesPerWord) / BytesPerWord;
   if (size >= align_object_size(array_header_size)) {
     size_t length = (size - array_header_size) * (HeapWordSize / sizeof(jint));
     // allocate uninitialized int array
     typeArrayOop t = (typeArrayOop) cast_to_oop(allocate(size));
     assert(t != NULL, "allocation should succeed");
-    t->set_mark(markWord::prototype());
-    t->set_klass(Universe::intArrayKlassObj());
+    if (UseCompactObjectHeaders) {
+      t->set_mark(Universe::intArrayKlassObj()->prototype_header());
+    } else {
+      t->set_mark(markWord::prototype());
+      t->set_klass(Universe::intArrayKlassObj());
+    }
     t->set_length((int)length);
   } else {
     assert(size == CollectedHeap::min_fill_size(),
            "size for smallest fake object doesn't match");
     instanceOop obj = (instanceOop) cast_to_oop(allocate(size));
-    obj->set_mark(markWord::prototype());
-    obj->set_klass_gap(0);
-    obj->set_klass(vmClasses::Object_klass());
+    if (UseCompactObjectHeaders) {
+      obj->set_mark(vmClasses::Object_klass()->prototype_header());
+    } else {
+      obj->set_mark(markWord::prototype());
+      obj->set_klass_gap(0);
+      obj->set_klass(vmClasses::Object_klass());
+    }
   }
 }
 
diff --git a/src/hotspot/share/gc/shared/space.hpp b/src/hotspot/share/gc/shared/space.hpp
index f0e4e80bd2e..d80ee860e12 100644
--- a/src/hotspot/share/gc/shared/space.hpp
+++ b/src/hotspot/share/gc/shared/space.hpp
@@ -431,7 +431,8 @@ class CompactibleSpace: public Space {
   // If the forwarding crosses "cp->threshold", invokes the "cross_threshold"
   // function of the then-current compaction space, and updates "cp->threshold
   // accordingly".
-  virtual HeapWord* forward(oop q, size_t size, CompactPoint* cp,
+  template <bool ALT_FWD>
+  HeapWord* forward(oop q, size_t size, CompactPoint* cp,
                     HeapWord* compact_top);
 
   // Return a size with adjustments as required of the space.
@@ -460,17 +461,17 @@ class CompactibleSpace: public Space {
 
 #if INCLUDE_SERIALGC
   // Frequently calls adjust_obj_size().
-  template <class SpaceType>
+  template <bool ALT_FWD, class SpaceType>
   static inline void scan_and_adjust_pointers(SpaceType* space);
 #endif
 
   // Frequently calls obj_size().
-  template <class SpaceType>
+  template <bool ALT_FWD, class SpaceType>
   static inline void scan_and_compact(SpaceType* space);
 
   // Frequently calls scanned_block_is_obj() and scanned_block_size().
   // Requires the scan_limit() function.
-  template <class SpaceType>
+  template <bool ALT_FWD, class SpaceType>
   static inline void scan_and_forward(SpaceType* space, CompactPoint* cp);
 };
 
@@ -481,7 +482,7 @@ class GenSpaceMangler;
 class ContiguousSpace: public CompactibleSpace {
   friend class VMStructs;
   // Allow scan_and_forward function to call (private) overrides for auxiliary functions on this class
-  template <typename SpaceType>
+  template <bool ALT_FWD, typename SpaceType>
   friend void CompactibleSpace::scan_and_forward(SpaceType* space, CompactPoint* cp);
 
  private:
diff --git a/src/hotspot/share/gc/shared/space.inline.hpp b/src/hotspot/share/gc/shared/space.inline.hpp
index ce2423ece3f..850e52f1ada 100644
--- a/src/hotspot/share/gc/shared/space.inline.hpp
+++ b/src/hotspot/share/gc/shared/space.inline.hpp
@@ -133,7 +133,7 @@ class DeadSpacer : StackObj {
 
 };
 
-template <class SpaceType>
+template <bool ALT_FWD, class SpaceType>
 inline void CompactibleSpace::scan_and_forward(SpaceType* space, CompactPoint* cp) {
   // Compute the new addresses for the live objects and store it in the mark
   // Used by universe::mark_sweep_phase2()
@@ -168,7 +168,7 @@ inline void CompactibleSpace::scan_and_forward(SpaceType* space, CompactPoint* c
       // prefetch beyond cur_obj
       Prefetch::write(cur_obj, interval);
       size_t size = space->scanned_block_size(cur_obj);
-      compact_top = cp->space->forward(cast_to_oop(cur_obj), size, cp, compact_top);
+      compact_top = cp->space->forward<ALT_FWD>(cast_to_oop(cur_obj), size, cp, compact_top);
       cur_obj += size;
       end_of_live = cur_obj;
     } else {
@@ -184,7 +184,7 @@ inline void CompactibleSpace::scan_and_forward(SpaceType* space, CompactPoint* c
       // we don't have to compact quite as often.
       if (cur_obj == compact_top && dead_spacer.insert_deadspace(cur_obj, end)) {
         oop obj = cast_to_oop(cur_obj);
-        compact_top = cp->space->forward(obj, obj->size(), cp, compact_top);
+        compact_top = cp->space->forward<ALT_FWD>(obj, obj->size(), cp, compact_top);
         end_of_live = end;
       } else {
         // otherwise, it really is a free region.
@@ -215,7 +215,7 @@ inline void CompactibleSpace::scan_and_forward(SpaceType* space, CompactPoint* c
   cp->space->set_compaction_top(compact_top);
 }
 
-template <class SpaceType>
+template <bool ALT_FWD, class SpaceType>
 inline void CompactibleSpace::scan_and_adjust_pointers(SpaceType* space) {
   // adjust all the interior pointers to point at the new locations of objects
   // Used by MarkSweep::mark_sweep_phase3()
@@ -234,7 +234,7 @@ inline void CompactibleSpace::scan_and_adjust_pointers(SpaceType* space) {
     if (cur_obj < first_dead || cast_to_oop(cur_obj)->is_gc_marked()) {
       // cur_obj is alive
       // point all the oops to the new location
-      size_t size = MarkSweep::adjust_pointers(cast_to_oop(cur_obj));
+      size_t size = MarkSweep::adjust_pointers<ALT_FWD>(cast_to_oop(cur_obj));
       size = space->adjust_obj_size(size);
       debug_only(prev_obj = cur_obj);
       cur_obj += size;
@@ -288,7 +288,7 @@ inline void CompactibleSpace::clear_empty_region(SpaceType* space) {
   }
 }
 
-template <class SpaceType>
+template <bool ALT_FWD, class SpaceType>
 inline void CompactibleSpace::scan_and_compact(SpaceType* space) {
   // Copy all live objects to their new location
   // Used by MarkSweep::mark_sweep_phase4()
@@ -329,7 +329,7 @@ inline void CompactibleSpace::scan_and_compact(SpaceType* space) {
 
       // size and destination
       size_t size = space->obj_size(cur_obj);
-      HeapWord* compaction_top = cast_from_oop<HeapWord*>(cast_to_oop(cur_obj)->forwardee());
+      HeapWord* compaction_top = cast_from_oop<HeapWord*>(SlidingForwarding::forwardee<ALT_FWD>(cast_to_oop(cur_obj)));
 
       // prefetch beyond compaction_top
       Prefetch::write(compaction_top, copy_interval);
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahAsserts.cpp b/src/hotspot/share/gc/shenandoah/shenandoahAsserts.cpp
index 87cf1c41d3f..b4e7bc7cc31 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahAsserts.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahAsserts.cpp
@@ -197,7 +197,7 @@ void ShenandoahAsserts::assert_correct(void* interior_loc, oop obj, const char*
                   file, line);
   }
 
-  Klass* obj_klass = obj->klass_or_null();
+  Klass* obj_klass = obj->forward_safe_klass();
   if (obj_klass == NULL) {
     print_failure(_safe_unknown, obj, interior_loc, NULL, "Shenandoah assert_correct failed",
                   "Object klass pointer should not be NULL",
@@ -229,7 +229,7 @@ void ShenandoahAsserts::assert_correct(void* interior_loc, oop obj, const char*
                     file, line);
     }
 
-    if (obj_klass != fwd->klass()) {
+    if (obj_klass != fwd->forward_safe_klass()) {
       print_failure(_safe_oop, obj, interior_loc, NULL, "Shenandoah assert_correct failed",
                     "Forwardee klass disagrees with object class",
                     file, line);
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFullGC.cpp b/src/hotspot/share/gc/shenandoah/shenandoahFullGC.cpp
index 26c31356f44..f8efd52e81c 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahFullGC.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahFullGC.cpp
@@ -27,6 +27,7 @@
 #include "compiler/oopMap.hpp"
 #include "gc/shared/gcTraceTime.inline.hpp"
 #include "gc/shared/preservedMarks.inline.hpp"
+#include "gc/shared/slidingForwarding.inline.hpp"
 #include "gc/shared/tlab_globals.hpp"
 #include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"
 #include "gc/shenandoah/shenandoahConcurrentGC.hpp"
@@ -222,6 +223,8 @@ void ShenandoahFullGC::do_it(GCCause::Cause gc_cause) {
     // until all phases run together.
     ShenandoahHeapLocker lock(heap->lock());
 
+    SlidingForwarding::begin();
+
     phase2_calculate_target_addresses(worker_slices);
 
     OrderAccess::fence();
@@ -236,6 +239,7 @@ void ShenandoahFullGC::do_it(GCCause::Cause gc_cause) {
     _preserved_marks->restore(heap->workers());
     BiasedLocking::restore_marks();
     _preserved_marks->reclaim();
+    SlidingForwarding::end();
   }
 
   // Resize metaspace
@@ -297,6 +301,7 @@ void ShenandoahFullGC::phase1_mark_heap() {
   heap->parallel_cleaning(true /* full_gc */);
 }
 
+template <bool ALT_FWD>
 class ShenandoahPrepareForCompactionObjectClosure : public ObjectClosure {
 private:
   PreservedMarks*          const _preserved_marks;
@@ -365,7 +370,7 @@ class ShenandoahPrepareForCompactionObjectClosure : public ObjectClosure {
     assert(_compact_point + obj_size <= _to_region->end(), "must fit");
     shenandoah_assert_not_forwarded(NULL, p);
     _preserved_marks->push_if_necessary(p, p->mark());
-    p->forward_to(cast_to_oop(_compact_point));
+    SlidingForwarding::forward_to<ALT_FWD>(p, cast_to_oop(_compact_point));
     _compact_point += obj_size;
   }
 };
@@ -396,6 +401,16 @@ class ShenandoahPrepareForCompactionTask : public AbstractGangTask {
   }
 
   void work(uint worker_id) {
+    if (UseAltGCForwarding) {
+      work_impl<true>(worker_id);
+    } else {
+      work_impl<false>(worker_id);
+    }
+  }
+
+private:
+  template <bool ALT_FWD>
+  void work_impl(uint worker_id) {
     ShenandoahParallelWorkerSession worker_session(worker_id);
     ShenandoahHeapRegionSet* slice = _worker_slices[worker_id];
     ShenandoahHeapRegionSetIterator it(slice);
@@ -411,7 +426,7 @@ class ShenandoahPrepareForCompactionTask : public AbstractGangTask {
 
     GrowableArray<ShenandoahHeapRegion*> empty_regions((int)_heap->num_regions());
 
-    ShenandoahPrepareForCompactionObjectClosure cl(_preserved_marks->get(worker_id), empty_regions, from_region);
+    ShenandoahPrepareForCompactionObjectClosure<ALT_FWD> cl(_preserved_marks->get(worker_id), empty_regions, from_region);
 
     while (from_region != NULL) {
       assert(is_candidate_region(from_region), "Sanity");
@@ -437,7 +452,8 @@ class ShenandoahPrepareForCompactionTask : public AbstractGangTask {
   }
 };
 
-void ShenandoahFullGC::calculate_target_humongous_objects() {
+template <bool ALT_FWD>
+void ShenandoahFullGC::calculate_target_humongous_objects_impl() {
   ShenandoahHeap* heap = ShenandoahHeap::heap();
 
   // Compute the new addresses for humongous objects. We need to do this after addresses
@@ -473,7 +489,7 @@ void ShenandoahFullGC::calculate_target_humongous_objects() {
       if (start >= to_begin && start != r->index()) {
         // Fits into current window, and the move is non-trivial. Record the move then, and continue scan.
         _preserved_marks->get(0)->push_if_necessary(old_obj, old_obj->mark());
-        old_obj->forward_to(cast_to_oop(heap->get_region(start)->bottom()));
+        SlidingForwarding::forward_to<ALT_FWD>(old_obj, cast_to_oop(heap->get_region(start)->bottom()));
         to_end = start;
         continue;
       }
@@ -485,6 +501,14 @@ void ShenandoahFullGC::calculate_target_humongous_objects() {
   }
 }
 
+void ShenandoahFullGC::calculate_target_humongous_objects() {
+  if (UseAltGCForwarding) {
+    calculate_target_humongous_objects_impl<true>();
+  } else {
+    calculate_target_humongous_objects_impl<false>();
+  }
+}
+
 class ShenandoahEnsureHeapActiveClosure: public ShenandoahHeapRegionClosure {
 private:
   ShenandoahHeap* const _heap;
@@ -722,6 +746,7 @@ void ShenandoahFullGC::phase2_calculate_target_addresses(ShenandoahHeapRegionSet
   }
 }
 
+template <bool ALT_FWD>
 class ShenandoahAdjustPointersClosure : public MetadataVisitingOopIterateClosure {
 private:
   ShenandoahHeap* const _heap;
@@ -733,8 +758,8 @@ class ShenandoahAdjustPointersClosure : public MetadataVisitingOopIterateClosure
     if (!CompressedOops::is_null(o)) {
       oop obj = CompressedOops::decode_not_null(o);
       assert(_ctx->is_marked(obj), "must be marked");
-      if (obj->is_forwarded()) {
-        oop forw = obj->forwardee();
+      if (SlidingForwarding::is_forwarded(obj)) {
+        oop forw = SlidingForwarding::forwardee<ALT_FWD>(obj);
         RawAccess<IS_NOT_NULL>::oop_store(p, forw);
       }
     }
@@ -749,10 +774,11 @@ class ShenandoahAdjustPointersClosure : public MetadataVisitingOopIterateClosure
   void do_oop(narrowOop* p) { do_oop_work(p); }
 };
 
+template <bool ALT_FWD>
 class ShenandoahAdjustPointersObjectClosure : public ObjectClosure {
 private:
   ShenandoahHeap* const _heap;
-  ShenandoahAdjustPointersClosure _cl;
+  ShenandoahAdjustPointersClosure<ALT_FWD> _cl;
 
 public:
   ShenandoahAdjustPointersObjectClosure() :
@@ -775,9 +801,11 @@ class ShenandoahAdjustPointersTask : public AbstractGangTask {
     _heap(ShenandoahHeap::heap()) {
   }
 
-  void work(uint worker_id) {
+private:
+  template <bool ALT_FWD>
+  void work_impl(uint worker_id) {
     ShenandoahParallelWorkerSession worker_session(worker_id);
-    ShenandoahAdjustPointersObjectClosure obj_cl;
+    ShenandoahAdjustPointersObjectClosure<ALT_FWD> obj_cl;
     ShenandoahHeapRegion* r = _regions.next();
     while (r != NULL) {
       if (!r->is_humongous_continuation() && r->has_live()) {
@@ -786,24 +814,45 @@ class ShenandoahAdjustPointersTask : public AbstractGangTask {
       r = _regions.next();
     }
   }
+
+public:
+  void work(uint worker_id) {
+    if (UseAltGCForwarding) {
+      work_impl<true>(worker_id);
+    } else {
+      work_impl<false>(worker_id);
+    }
+  }
 };
 
 class ShenandoahAdjustRootPointersTask : public AbstractGangTask {
 private:
   ShenandoahRootAdjuster* _rp;
   PreservedMarksSet* _preserved_marks;
+
 public:
   ShenandoahAdjustRootPointersTask(ShenandoahRootAdjuster* rp, PreservedMarksSet* preserved_marks) :
     AbstractGangTask("Shenandoah Adjust Root Pointers"),
     _rp(rp),
     _preserved_marks(preserved_marks) {}
 
-  void work(uint worker_id) {
+private:
+  template <bool ALT_FWD>
+  void work_impl(uint worker_id) {
     ShenandoahParallelWorkerSession worker_session(worker_id);
-    ShenandoahAdjustPointersClosure cl;
+    ShenandoahAdjustPointersClosure<ALT_FWD> cl;
     _rp->roots_do(worker_id, &cl);
     _preserved_marks->get(worker_id)->adjust_during_full_gc();
   }
+
+public:
+  void work(uint worker_id) {
+    if (UseAltGCForwarding) {
+      work_impl<true>(worker_id);
+    } else {
+      work_impl<false>(worker_id);
+    }
+  }
 };
 
 void ShenandoahFullGC::phase3_update_references() {
@@ -830,6 +879,7 @@ void ShenandoahFullGC::phase3_update_references() {
   workers->run_task(&adjust_pointers_task);
 }
 
+template <bool ALT_FWD>
 class ShenandoahCompactObjectsClosure : public ObjectClosure {
 private:
   ShenandoahHeap* const _heap;
@@ -842,9 +892,9 @@ class ShenandoahCompactObjectsClosure : public ObjectClosure {
   void do_object(oop p) {
     assert(_heap->complete_marking_context()->is_marked(p), "must be marked");
     size_t size = (size_t)p->size();
-    if (p->is_forwarded()) {
+    if (SlidingForwarding::is_forwarded(p)) {
       HeapWord* compact_from = cast_from_oop<HeapWord*>(p);
-      HeapWord* compact_to = cast_from_oop<HeapWord*>(p->forwardee());
+      HeapWord* compact_to = cast_from_oop<HeapWord*>(SlidingForwarding::forwardee<ALT_FWD>(p));
       Copy::aligned_conjoint_words(compact_from, compact_to, size);
       oop new_obj = cast_to_oop(compact_to);
       new_obj->init_mark();
@@ -864,11 +914,13 @@ class ShenandoahCompactObjectsTask : public AbstractGangTask {
     _worker_slices(worker_slices) {
   }
 
-  void work(uint worker_id) {
+private:
+  template <bool ALT_FWD>
+  void work_impl(uint worker_id) {
     ShenandoahParallelWorkerSession worker_session(worker_id);
     ShenandoahHeapRegionSetIterator slice(_worker_slices[worker_id]);
 
-    ShenandoahCompactObjectsClosure cl(worker_id);
+    ShenandoahCompactObjectsClosure<ALT_FWD> cl(worker_id);
     ShenandoahHeapRegion* r = slice.next();
     while (r != NULL) {
       assert(!r->is_humongous(), "must not get humongous regions here");
@@ -879,6 +931,15 @@ class ShenandoahCompactObjectsTask : public AbstractGangTask {
       r = slice.next();
     }
   }
+
+public:
+  void work(uint worker_id) {
+    if (UseAltGCForwarding) {
+      work_impl<true>(worker_id);
+    } else {
+      work_impl<false>(worker_id);
+    }
+  }
 };
 
 class ShenandoahPostCompactClosure : public ShenandoahHeapRegionClosure {
@@ -931,7 +992,8 @@ class ShenandoahPostCompactClosure : public ShenandoahHeapRegionClosure {
   }
 };
 
-void ShenandoahFullGC::compact_humongous_objects() {
+template <bool ALT_FWD>
+void ShenandoahFullGC::compact_humongous_objects_impl() {
   // Compact humongous regions, based on their fwdptr objects.
   //
   // This code is serial, because doing the in-slice parallel sliding is tricky. In most cases,
@@ -944,7 +1006,7 @@ void ShenandoahFullGC::compact_humongous_objects() {
     ShenandoahHeapRegion* r = heap->get_region(c - 1);
     if (r->is_humongous_start()) {
       oop old_obj = cast_to_oop(r->bottom());
-      if (!old_obj->is_forwarded()) {
+      if (SlidingForwarding::is_not_forwarded(old_obj)) {
         // No need to move the object, it stays at the same slot
         continue;
       }
@@ -953,7 +1015,7 @@ void ShenandoahFullGC::compact_humongous_objects() {
 
       size_t old_start = r->index();
       size_t old_end   = old_start + num_regions - 1;
-      size_t new_start = heap->heap_region_index_containing(old_obj->forwardee());
+      size_t new_start = heap->heap_region_index_containing(SlidingForwarding::forwardee<ALT_FWD>(old_obj));
       size_t new_end   = new_start + num_regions - 1;
       assert(old_start != new_start, "must be real move");
       assert(r->is_stw_move_allowed(), "Region " SIZE_FORMAT " should be movable", r->index());
@@ -995,6 +1057,14 @@ void ShenandoahFullGC::compact_humongous_objects() {
   }
 }
 
+void ShenandoahFullGC::compact_humongous_objects() {
+  if (UseAltGCForwarding) {
+    compact_humongous_objects_impl<true>();
+  } else {
+    compact_humongous_objects_impl<false>();
+  }
+}
+
 // This is slightly different to ShHeap::reset_next_mark_bitmap:
 // we need to remain able to walk pinned regions.
 // Since pinned region do not move and don't get compacted, we will get holes with
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahFullGC.hpp b/src/hotspot/share/gc/shenandoah/shenandoahFullGC.hpp
index 1c1653e59ec..af8c25bc17f 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahFullGC.hpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahFullGC.hpp
@@ -55,6 +55,7 @@ class VM_ShenandoahFullGC;
 class ShenandoahDegenGC;
 
 class ShenandoahFullGC : public ShenandoahGC {
+  template <bool ALT_FWD>
   friend class ShenandoahPrepareForCompactionObjectClosure;
   friend class VM_ShenandoahFullGC;
   friend class ShenandoahDegenGC;
@@ -83,7 +84,11 @@ class ShenandoahFullGC : public ShenandoahGC {
   void phase4_compact_objects(ShenandoahHeapRegionSet** worker_slices);
 
   void distribute_slices(ShenandoahHeapRegionSet** worker_slices);
+  template <bool ALT_FWD>
+  void calculate_target_humongous_objects_impl();
   void calculate_target_humongous_objects();
+  template <bool ALT_FWD>
+  void compact_humongous_objects_impl();
   void compact_humongous_objects();
 };
 
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp b/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp
index b2d85ddf7df..ebee8f91bec 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp
@@ -32,6 +32,7 @@
 #include "gc/shared/locationPrinter.inline.hpp"
 #include "gc/shared/memAllocator.hpp"
 #include "gc/shared/plab.hpp"
+#include "gc/shared/slidingForwarding.hpp"
 #include "gc/shared/tlab_globals.hpp"
 
 #include "gc/shenandoah/shenandoahBarrierSet.hpp"
@@ -401,6 +402,8 @@ jint ShenandoahHeap::initialize() {
 
   ShenandoahInitLogger::print();
 
+  SlidingForwarding::initialize(_heap_region, ShenandoahHeapRegion::region_size_words());
+
   return JNI_OK;
 }
 
@@ -951,7 +954,7 @@ class ShenandoahConcurrentEvacuateRegionObjectClosure : public ObjectClosure {
 
   void do_object(oop p) {
     shenandoah_assert_marked(NULL, p);
-    if (!p->is_forwarded()) {
+    if (!ShenandoahForwarding::is_forwarded(p)) {
       _heap->evacuate_object(p, _thread);
     }
   }
@@ -1239,7 +1242,7 @@ class ObjectIterateScanRootClosure : public BasicOopIterateClosure {
         // There may be dead oops in weak roots in concurrent root phase, do not touch them.
         return;
       }
-      obj = ShenandoahBarrierSet::resolve_forwarded_not_null(obj);
+      obj = ShenandoahBarrierSet::barrier_set()->load_reference_barrier(obj);
 
       assert(oopDesc::is_oop(obj), "must be a valid oop");
       if (!_bitmap->is_marked(obj)) {
@@ -1295,6 +1298,7 @@ void ShenandoahHeap::object_iterate(ObjectClosure* cl) {
   while (! oop_stack.is_empty()) {
     oop obj = oop_stack.pop();
     assert(oopDesc::is_oop(obj), "must be a valid oop");
+    shenandoah_assert_not_in_cset_except(NULL, obj, cancelled_gc());
     cl->do_object(obj);
     obj->oop_iterate(&oops);
   }
@@ -1348,7 +1352,7 @@ class ShenandoahObjectIterateParScanClosure : public BasicOopIterateClosure {
         // There may be dead oops in weak roots in concurrent root phase, do not touch them.
         return;
       }
-      obj = ShenandoahBarrierSet::resolve_forwarded_not_null(obj);
+      obj = ShenandoahBarrierSet::barrier_set()->load_reference_barrier(obj);
 
       assert(oopDesc::is_oop(obj), "Must be a valid oop");
       if (_bitmap->par_mark(obj)) {
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahHeap.inline.hpp b/src/hotspot/share/gc/shenandoah/shenandoahHeap.inline.hpp
index cacca535dbd..02be85dfbab 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahHeap.inline.hpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahHeap.inline.hpp
@@ -202,7 +202,7 @@ inline oop ShenandoahHeap::evacuate_object(oop p, Thread* thread) {
 
   assert(ShenandoahThreadLocalData::is_evac_allowed(thread), "must be enclosed in oom-evac scope");
 
-  size_t size = p->size();
+  size_t size = p->forward_safe_size();
 
   assert(!heap_region_containing(p)->is_humongous(), "never evacuate humongous objects");
 
@@ -419,7 +419,7 @@ inline void ShenandoahHeap::marked_object_iterate(ShenandoahHeapRegion* region,
     oop obj = cast_to_oop(cs);
     assert(oopDesc::is_oop(obj), "sanity");
     assert(ctx->is_marked(obj), "object expected to be marked");
-    int size = obj->size();
+    size_t size = obj->forward_safe_size();
     cl->do_object(obj);
     cs += size;
   }
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahVerifier.cpp b/src/hotspot/share/gc/shenandoah/shenandoahVerifier.cpp
index 4748688b2da..73f620b1fd1 100644
--- a/src/hotspot/share/gc/shenandoah/shenandoahVerifier.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahVerifier.cpp
@@ -97,7 +97,7 @@ class ShenandoahVerifyOopClosure : public BasicOopIterateClosure {
     T o = RawAccess<>::oop_load(p);
     if (!CompressedOops::is_null(o)) {
       oop obj = CompressedOops::decode_not_null(o);
-      if (is_instance_ref_klass(obj->klass())) {
+      if (is_instance_ref_klass(obj->forward_safe_klass())) {
         obj = ShenandoahForwarding::get_forwardee(obj);
       }
       // Single threaded verification can use faster non-atomic stack and bitmap
@@ -124,7 +124,7 @@ class ShenandoahVerifyOopClosure : public BasicOopIterateClosure {
               "oop must be aligned");
 
     ShenandoahHeapRegion *obj_reg = _heap->heap_region_containing(obj);
-    Klass* obj_klass = obj->klass_or_null();
+    Klass* obj_klass = obj->forward_safe_klass();
 
     // Verify that obj is not in dead space:
     {
@@ -139,11 +139,11 @@ class ShenandoahVerifyOopClosure : public BasicOopIterateClosure {
              "Object start should be within the region");
 
       if (!obj_reg->is_humongous()) {
-        check(ShenandoahAsserts::_safe_unknown, obj, (obj_addr + obj->size()) <= obj_reg->top(),
+        check(ShenandoahAsserts::_safe_unknown, obj, (obj_addr + obj->forward_safe_size()) <= obj_reg->top(),
                "Object end should be within the region");
       } else {
         size_t humongous_start = obj_reg->index();
-        size_t humongous_end = humongous_start + (obj->size() >> ShenandoahHeapRegion::region_size_words_shift());
+        size_t humongous_end = humongous_start + (obj->forward_safe_size() >> ShenandoahHeapRegion::region_size_words_shift());
         for (size_t idx = humongous_start + 1; idx < humongous_end; idx++) {
           check(ShenandoahAsserts::_safe_unknown, obj, _heap->get_region(idx)->is_humongous_continuation(),
                  "Humongous object is in continuation that fits it");
@@ -160,7 +160,7 @@ class ShenandoahVerifyOopClosure : public BasicOopIterateClosure {
           // skip
           break;
         case ShenandoahVerifier::_verify_liveness_complete:
-          Atomic::add(&_ld[obj_reg->index()], (uint) obj->size(), memory_order_relaxed);
+          Atomic::add(&_ld[obj_reg->index()], (uint) obj->forward_safe_size(), memory_order_relaxed);
           // fallthrough for fast failure for un-live regions:
         case ShenandoahVerifier::_verify_liveness_conservative:
           check(ShenandoahAsserts::_safe_oop, obj, obj_reg->has_live(),
@@ -201,7 +201,7 @@ class ShenandoahVerifyOopClosure : public BasicOopIterateClosure {
       HeapWord *fwd_addr = cast_from_oop<HeapWord *>(fwd);
       check(ShenandoahAsserts::_safe_oop, obj, fwd_addr < fwd_reg->top(),
              "Forwardee start should be within the region");
-      check(ShenandoahAsserts::_safe_oop, obj, (fwd_addr + fwd->size()) <= fwd_reg->top(),
+      check(ShenandoahAsserts::_safe_oop, obj, (fwd_addr + fwd->forward_safe_size()) <= fwd_reg->top(),
              "Forwardee end should be within the region");
 
       oop fwd2 = ShenandoahForwarding::get_forwardee_raw_unchecked(fwd);
@@ -304,7 +304,8 @@ class ShenandoahVerifyOopClosure : public BasicOopIterateClosure {
    */
   void verify_oops_from(oop obj) {
     _loc = obj;
-    obj->oop_iterate(this);
+    Klass* klass = obj->forward_safe_klass();
+    obj->oop_iterate_backwards(this, klass);
     _loc = NULL;
   }
 
@@ -584,7 +585,7 @@ class ShenandoahVerifierMarkedRegionTask : public AbstractGangTask {
 
     // Verify everything reachable from that object too, hopefully realizing
     // everything was already marked, and never touching further:
-    if (!is_instance_ref_klass(obj->klass())) {
+    if (!is_instance_ref_klass(obj->forward_safe_klass())) {
       cl.verify_oops_from(obj);
       (*processed)++;
     }
diff --git a/src/hotspot/share/gc/z/c2/zBarrierSetC2.cpp b/src/hotspot/share/gc/z/c2/zBarrierSetC2.cpp
index 30df287bdc0..93c8bada903 100644
--- a/src/hotspot/share/gc/z/c2/zBarrierSetC2.cpp
+++ b/src/hotspot/share/gc/z/c2/zBarrierSetC2.cpp
@@ -296,7 +296,7 @@ void ZBarrierSetC2::clone_at_expansion(PhaseMacroExpand* phase, ArrayCopyNode* a
       assert(src_offset == dest_offset, "should be equal");
       jlong offset = src_offset->get_long();
       if (offset != arrayOopDesc::base_offset_in_bytes(T_OBJECT)) {
-        assert(!UseCompressedClassPointers, "should only happen without compressed class pointers");
+        assert(!UseCompressedClassPointers || UseCompactObjectHeaders, "should only happen without compressed class pointers or with compact object headers");
         assert((arrayOopDesc::base_offset_in_bytes(T_OBJECT) - offset) == BytesPerLong, "unexpected offset");
         length = phase->transform_later(new SubLNode(length, phase->longcon(1))); // Size is in longs
         src_offset = phase->longcon(arrayOopDesc::base_offset_in_bytes(T_OBJECT));
diff --git a/src/hotspot/share/gc/z/zObjArrayAllocator.cpp b/src/hotspot/share/gc/z/zObjArrayAllocator.cpp
index 2e30bd8c694..4147e168d7e 100644
--- a/src/hotspot/share/gc/z/zObjArrayAllocator.cpp
+++ b/src/hotspot/share/gc/z/zObjArrayAllocator.cpp
@@ -43,14 +43,16 @@ oop ZObjArrayAllocator::finish(HeapWord* mem) const {
   // A max segment size of 64K was chosen because microbenchmarking
   // suggested that it offered a good trade-off between allocation
   // time and time-to-safepoint
-  const size_t segment_max = ZUtils::bytes_to_words(64 * K);
-  const size_t skip = arrayOopDesc::header_size(ArrayKlass::cast(_klass)->element_type());
-  size_t remaining = _word_size - skip;
+  const size_t segment_max = 64 * K;
+  const size_t skip = arrayOopDesc::base_offset_in_bytes(ArrayKlass::cast(_klass)->element_type());
+  size_t byte_size = _word_size * BytesPerWord;
+  size_t remaining = byte_size - skip;
 
+  char* const start = reinterpret_cast<char*>(mem);
   while (remaining > 0) {
     // Clear segment
     const size_t segment = MIN2(remaining, segment_max);
-    Copy::zero_to_words(mem + (_word_size - remaining), segment);
+    Copy::zero_to_bytes(start + (byte_size - remaining), segment);
     remaining -= segment;
 
     if (remaining > 0) {
diff --git a/src/hotspot/share/interpreter/interpreterRuntime.cpp b/src/hotspot/share/interpreter/interpreterRuntime.cpp
index d66ed24d862..4dd02d6dac4 100644
--- a/src/hotspot/share/interpreter/interpreterRuntime.cpp
+++ b/src/hotspot/share/interpreter/interpreterRuntime.cpp
@@ -741,7 +741,6 @@ JRT_ENTRY_NO_ASYNC(void, InterpreterRuntime::monitorenter(JavaThread* current, B
 #endif
 JRT_END
 
-
 JRT_LEAF(void, InterpreterRuntime::monitorexit(BasicObjectLock* elem))
   oop obj = elem->obj();
   assert(Universe::heap()->is_in(obj), "must be an object");
diff --git a/src/hotspot/share/interpreter/interpreterRuntime.hpp b/src/hotspot/share/interpreter/interpreterRuntime.hpp
index c32431784aa..4773b75095e 100644
--- a/src/hotspot/share/interpreter/interpreterRuntime.hpp
+++ b/src/hotspot/share/interpreter/interpreterRuntime.hpp
@@ -106,6 +106,7 @@ class InterpreterRuntime: AllStatic {
  public:
   // Synchronization
   static void    monitorenter(JavaThread* current, BasicObjectLock* elem);
+  static void    monitorenter_obj(JavaThread* current, oopDesc* obj);
   static void    monitorexit (BasicObjectLock* elem);
 
   static void    throw_illegal_monitor_state_exception(JavaThread* current);
diff --git a/src/hotspot/share/interpreter/zero/bytecodeInterpreter.cpp b/src/hotspot/share/interpreter/zero/bytecodeInterpreter.cpp
index b93bb30f0c8..e50e62d90ee 100644
--- a/src/hotspot/share/interpreter/zero/bytecodeInterpreter.cpp
+++ b/src/hotspot/share/interpreter/zero/bytecodeInterpreter.cpp
@@ -627,7 +627,7 @@ void BytecodeInterpreter::run(interpreterState istate) {
         // Traditional lightweight locking.
         markWord displaced = rcvr->mark().set_unlocked();
         mon->lock()->set_displaced_header(displaced);
-        bool call_vm = UseHeavyMonitors;
+        bool call_vm = (LockingMode == LM_MONITOR);
         if (call_vm || rcvr->cas_set_mark(markWord::from_pointer(mon), displaced) != displaced) {
           // Is it simple recursive case?
           if (!call_vm && THREAD->is_lock_owned((address) displaced.clear_lock_bits().to_pointer())) {
@@ -723,7 +723,7 @@ void BytecodeInterpreter::run(interpreterState istate) {
       // traditional lightweight locking
       markWord displaced = lockee->mark().set_unlocked();
       entry->lock()->set_displaced_header(displaced);
-      bool call_vm = UseHeavyMonitors;
+      bool call_vm = (LockingMode == LM_MONITOR);
       if (call_vm || lockee->cas_set_mark(markWord::from_pointer(entry), displaced) != displaced) {
         // Is it simple recursive case?
         if (!call_vm && THREAD->is_lock_owned((address) displaced.clear_lock_bits().to_pointer())) {
@@ -1633,7 +1633,7 @@ void BytecodeInterpreter::run(interpreterState istate) {
           // traditional lightweight locking
           markWord displaced = lockee->mark().set_unlocked();
           entry->lock()->set_displaced_header(displaced);
-          bool call_vm = UseHeavyMonitors;
+          bool call_vm = (LockingMode == LM_MONITOR);
           if (call_vm || lockee->cas_set_mark(markWord::from_pointer(entry), displaced) != displaced) {
             // Is it simple recursive case?
             if (!call_vm && THREAD->is_lock_owned((address) displaced.clear_lock_bits().to_pointer())) {
@@ -1665,7 +1665,7 @@ void BytecodeInterpreter::run(interpreterState istate) {
             assert(!UseBiasedLocking, "Not implemented");
 
             // If it isn't recursive we either must swap old header or call the runtime
-            bool call_vm = UseHeavyMonitors;
+            bool call_vm = (LockingMode == LM_MONITOR);
             if (header.to_pointer() != NULL || call_vm) {
               markWord old_header = markWord::encode(lock);
               if (call_vm || lockee->cas_set_mark(header, old_header) != old_header) {
@@ -1955,10 +1955,12 @@ void BytecodeInterpreter::run(interpreterState istate) {
 
               // Initialize header
               assert(!UseBiasedLocking, "Not implemented");
-              obj->set_mark(markWord::prototype());
-              obj->set_klass_gap(0);
-              obj->set_klass(ik);
-
+              if (UseCompactObjectHeaders) {
+                oopDesc::release_set_mark(result, ik->prototype_header());
+              } else {
+                obj->set_mark(markWord::prototype());
+                obj->set_klass(ik);
+              }
               // Must prevent reordering of stores for object initialization
               // with stores that publish the new object.
               OrderAccess::storestore();
@@ -3146,7 +3148,7 @@ void BytecodeInterpreter::run(interpreterState istate) {
               illegal_state_oop = Handle(THREAD, THREAD->pending_exception());
               THREAD->clear_pending_exception();
             }
-          } else if (UseHeavyMonitors) {
+          } else if (LockingMode == LM_MONITOR) {
             InterpreterRuntime::monitorexit(base);
             if (THREAD->has_pending_exception()) {
               if (!suppress_error) illegal_state_oop = Handle(THREAD, THREAD->pending_exception());
diff --git a/src/hotspot/share/jfr/leakprofiler/chains/bfsClosure.cpp b/src/hotspot/share/jfr/leakprofiler/chains/bfsClosure.cpp
index 3f60cf6729e..5bedc8c0339 100644
--- a/src/hotspot/share/jfr/leakprofiler/chains/bfsClosure.cpp
+++ b/src/hotspot/share/jfr/leakprofiler/chains/bfsClosure.cpp
@@ -22,12 +22,12 @@
  *
  */
 #include "precompiled.hpp"
-#include "jfr/leakprofiler/chains/bitset.inline.hpp"
 #include "jfr/leakprofiler/chains/bfsClosure.hpp"
 #include "jfr/leakprofiler/chains/dfsClosure.hpp"
 #include "jfr/leakprofiler/chains/edge.hpp"
 #include "jfr/leakprofiler/chains/edgeStore.hpp"
 #include "jfr/leakprofiler/chains/edgeQueue.hpp"
+#include "jfr/leakprofiler/chains/jfrbitset.hpp"
 #include "jfr/leakprofiler/utilities/granularTimer.hpp"
 #include "jfr/leakprofiler/utilities/unifiedOopRef.inline.hpp"
 #include "logging/log.hpp"
@@ -37,7 +37,7 @@
 #include "oops/oop.inline.hpp"
 #include "utilities/align.hpp"
 
-BFSClosure::BFSClosure(EdgeQueue* edge_queue, EdgeStore* edge_store, BitSet* mark_bits) :
+BFSClosure::BFSClosure(EdgeQueue* edge_queue, EdgeStore* edge_store, JFRBitSet* mark_bits) :
   _edge_queue(edge_queue),
   _edge_store(edge_store),
   _mark_bits(mark_bits),
diff --git a/src/hotspot/share/jfr/leakprofiler/chains/bfsClosure.hpp b/src/hotspot/share/jfr/leakprofiler/chains/bfsClosure.hpp
index 4cc6a716427..19daf344f09 100644
--- a/src/hotspot/share/jfr/leakprofiler/chains/bfsClosure.hpp
+++ b/src/hotspot/share/jfr/leakprofiler/chains/bfsClosure.hpp
@@ -25,10 +25,10 @@
 #ifndef SHARE_JFR_LEAKPROFILER_CHAINS_BFSCLOSURE_HPP
 #define SHARE_JFR_LEAKPROFILER_CHAINS_BFSCLOSURE_HPP
 
+#include "jfr/leakprofiler/chains/jfrbitset.hpp"
 #include "jfr/leakprofiler/utilities/unifiedOopRef.hpp"
 #include "memory/iterator.hpp"
 
-class BitSet;
 class Edge;
 class EdgeStore;
 class EdgeQueue;
@@ -38,7 +38,7 @@ class BFSClosure : public BasicOopIterateClosure {
  private:
   EdgeQueue* _edge_queue;
   EdgeStore* _edge_store;
-  BitSet* _mark_bits;
+  JFRBitSet* _mark_bits;
   const Edge* _current_parent;
   mutable size_t _current_frontier_level;
   mutable size_t _next_frontier_idx;
@@ -65,7 +65,7 @@ class BFSClosure : public BasicOopIterateClosure {
  public:
   virtual ReferenceIterationMode reference_iteration_mode() { return DO_FIELDS_EXCEPT_REFERENT; }
 
-  BFSClosure(EdgeQueue* edge_queue, EdgeStore* edge_store, BitSet* mark_bits);
+  BFSClosure(EdgeQueue* edge_queue, EdgeStore* edge_store, JFRBitSet* mark_bits);
   void process();
   void do_root(UnifiedOopRef ref);
 
diff --git a/src/hotspot/share/jfr/leakprofiler/chains/dfsClosure.cpp b/src/hotspot/share/jfr/leakprofiler/chains/dfsClosure.cpp
index 8161e172215..5670166574e 100644
--- a/src/hotspot/share/jfr/leakprofiler/chains/dfsClosure.cpp
+++ b/src/hotspot/share/jfr/leakprofiler/chains/dfsClosure.cpp
@@ -23,10 +23,10 @@
  */
 
 #include "precompiled.hpp"
-#include "jfr/leakprofiler/chains/bitset.inline.hpp"
 #include "jfr/leakprofiler/chains/dfsClosure.hpp"
 #include "jfr/leakprofiler/chains/edge.hpp"
 #include "jfr/leakprofiler/chains/edgeStore.hpp"
+#include "jfr/leakprofiler/chains/jfrbitset.hpp"
 #include "jfr/leakprofiler/chains/rootSetClosure.hpp"
 #include "jfr/leakprofiler/utilities/granularTimer.hpp"
 #include "jfr/leakprofiler/utilities/rootType.hpp"
@@ -40,7 +40,7 @@
 UnifiedOopRef DFSClosure::_reference_stack[max_dfs_depth];
 
 void DFSClosure::find_leaks_from_edge(EdgeStore* edge_store,
-                                      BitSet* mark_bits,
+                                      JFRBitSet* mark_bits,
                                       const Edge* start_edge) {
   assert(edge_store != NULL, "invariant");
   assert(mark_bits != NULL," invariant");
@@ -52,7 +52,7 @@ void DFSClosure::find_leaks_from_edge(EdgeStore* edge_store,
 }
 
 void DFSClosure::find_leaks_from_root_set(EdgeStore* edge_store,
-                                          BitSet* mark_bits) {
+                                          JFRBitSet* mark_bits) {
   assert(edge_store != NULL, "invariant");
   assert(mark_bits != NULL, "invariant");
 
@@ -68,7 +68,7 @@ void DFSClosure::find_leaks_from_root_set(EdgeStore* edge_store,
   rs.process();
 }
 
-DFSClosure::DFSClosure(EdgeStore* edge_store, BitSet* mark_bits, const Edge* start_edge)
+DFSClosure::DFSClosure(EdgeStore* edge_store, JFRBitSet* mark_bits, const Edge* start_edge)
   :_edge_store(edge_store), _mark_bits(mark_bits), _start_edge(start_edge),
   _max_depth(max_dfs_depth), _depth(0), _ignore_root_set(false) {
 }
@@ -80,24 +80,23 @@ void DFSClosure::closure_impl(UnifiedOopRef reference, const oop pointee) {
   if (GranularTimer::is_finished()) {
     return;
   }
+
   if (_depth == 0 && _ignore_root_set) {
     // Root set is already marked, but we want
     // to continue, so skip is_marked check.
     assert(_mark_bits->is_marked(pointee), "invariant");
-  }  else {
+    _reference_stack[_depth] = reference;
+  } else {
     if (_mark_bits->is_marked(pointee)) {
       return;
     }
+    _mark_bits->mark_obj(pointee);
+    _reference_stack[_depth] = reference;
+    // is the pointee a sample object?
+    if (pointee->mark().is_marked()) {
+      add_chain();
+    }
   }
-  _reference_stack[_depth] = reference;
-  _mark_bits->mark_obj(pointee);
-  assert(_mark_bits->is_marked(pointee), "invariant");
-
-  // is the pointee a sample object?
-  if (pointee->mark().is_marked()) {
-    add_chain();
-  }
-
   assert(_max_depth >= 1, "invariant");
   if (_depth < _max_depth - 1) {
     _depth++;
diff --git a/src/hotspot/share/jfr/leakprofiler/chains/dfsClosure.hpp b/src/hotspot/share/jfr/leakprofiler/chains/dfsClosure.hpp
index ad99f7d2320..be0cd2a5d7e 100644
--- a/src/hotspot/share/jfr/leakprofiler/chains/dfsClosure.hpp
+++ b/src/hotspot/share/jfr/leakprofiler/chains/dfsClosure.hpp
@@ -25,10 +25,10 @@
 #ifndef SHARE_JFR_LEAKPROFILER_CHAINS_DFSCLOSURE_HPP
 #define SHARE_JFR_LEAKPROFILER_CHAINS_DFSCLOSURE_HPP
 
+#include "jfr/leakprofiler/chains/jfrbitset.hpp"
 #include "jfr/leakprofiler/utilities/unifiedOopRef.hpp"
 #include "memory/iterator.hpp"
 
-class BitSet;
 class Edge;
 class EdgeStore;
 class EdgeQueue;
@@ -41,13 +41,13 @@ class DFSClosure : public BasicOopIterateClosure {
   static UnifiedOopRef _reference_stack[max_dfs_depth];
 
   EdgeStore* _edge_store;
-  BitSet* _mark_bits;
+  JFRBitSet* _mark_bits;
   const Edge*_start_edge;
   size_t _max_depth;
   size_t _depth;
   bool _ignore_root_set;
 
-  DFSClosure(EdgeStore* edge_store, BitSet* mark_bits, const Edge* start_edge);
+  DFSClosure(EdgeStore* edge_store, JFRBitSet* mark_bits, const Edge* start_edge);
 
   void add_chain();
   void closure_impl(UnifiedOopRef reference, const oop pointee);
@@ -55,8 +55,8 @@ class DFSClosure : public BasicOopIterateClosure {
  public:
   virtual ReferenceIterationMode reference_iteration_mode() { return DO_FIELDS_EXCEPT_REFERENT; }
 
-  static void find_leaks_from_edge(EdgeStore* edge_store, BitSet* mark_bits, const Edge* start_edge);
-  static void find_leaks_from_root_set(EdgeStore* edge_store, BitSet* mark_bits);
+  static void find_leaks_from_edge(EdgeStore* edge_store, JFRBitSet* mark_bits, const Edge* start_edge);
+  static void find_leaks_from_root_set(EdgeStore* edge_store, JFRBitSet* mark_bits);
   void do_root(UnifiedOopRef ref);
 
   virtual void do_oop(oop* ref);
diff --git a/src/hotspot/share/jfr/leakprofiler/chains/edgeStore.cpp b/src/hotspot/share/jfr/leakprofiler/chains/edgeStore.cpp
index 011dce70da4..c9bee7d1ab6 100644
--- a/src/hotspot/share/jfr/leakprofiler/chains/edgeStore.cpp
+++ b/src/hotspot/share/jfr/leakprofiler/chains/edgeStore.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2021, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,8 +25,10 @@
 #include "precompiled.hpp"
 #include "jfr/leakprofiler/chains/edgeStore.hpp"
 #include "jfr/leakprofiler/chains/edgeUtils.hpp"
+#include "jfr/leakprofiler/sampling/objectSample.hpp"
 #include "jfr/leakprofiler/utilities/unifiedOopRef.inline.hpp"
 #include "oops/oop.inline.hpp"
+#include "runtime/safepoint.hpp"
 
 StoredEdge::StoredEdge(const Edge* parent, UnifiedOopRef reference) : Edge(parent, reference), _gc_root_id(0), _skip_length(0) {}
 
@@ -36,15 +38,6 @@ StoredEdge::StoredEdge(const StoredEdge& edge) : Edge(edge), _gc_root_id(edge._g
 
 traceid EdgeStore::_edge_id_counter = 0;
 
-EdgeStore::EdgeStore() : _edges(NULL) {
-  _edges = new EdgeHashTable(this);
-}
-
-EdgeStore::~EdgeStore() {
-  assert(_edges != NULL, "invariant");
-  delete _edges;
-}
-
 bool EdgeStore::is_empty() const {
   return !_edges->has_entries();
 }
@@ -224,15 +217,91 @@ bool EdgeStore::put_edges(StoredEdge** previous, const Edge** current, size_t li
   return NULL == *current;
 }
 
-// Install the immediate edge into the mark word of the leak candidate object
+static GrowableArray<const StoredEdge*>* _leak_context_edges = nullptr;
+
+EdgeStore::EdgeStore() : _edges(new EdgeHashTable(this)) {}
+
+EdgeStore::~EdgeStore() {
+  assert(_edges != NULL, "invariant");
+  delete _edges;
+  delete _leak_context_edges;
+  _leak_context_edges = nullptr;
+}
+
+static int leak_context_edge_idx(const ObjectSample* sample) {
+  assert(sample != nullptr, "invariant");
+  return static_cast<int>(sample->object()->mark().value()) >> markWord::lock_bits;
+}
+
+bool EdgeStore::has_leak_context(const ObjectSample* sample) const {
+  const int idx = leak_context_edge_idx(sample);
+  if (idx == 0) {
+    return false;
+  }
+  assert(idx > 0, "invariant");
+  assert(_leak_context_edges != nullptr, "invariant");
+  assert(idx < _leak_context_edges->length(), "invariant");
+  assert(_leak_context_edges->at(idx) != nullptr, "invariant");
+  return true;
+}
+
+const StoredEdge* EdgeStore::get(const ObjectSample* sample) const {
+  assert(sample != nullptr, "invariant");
+  if (_leak_context_edges != nullptr) {
+    assert(SafepointSynchronize::is_at_safepoint(), "invariant");
+    const int idx = leak_context_edge_idx(sample);
+    if (idx > 0) {
+      assert(idx < _leak_context_edges->length(), "invariant");
+      const StoredEdge* const edge =_leak_context_edges->at(idx);
+      assert(edge != nullptr, "invariant");
+      return edge;
+    }
+  }
+  return get(UnifiedOopRef::encode_in_native(sample->object_addr()));
+}
+
+#ifdef ASSERT
+// max_idx to ensure idx fit in lower 32-bits of markword together with lock bits.
+static constexpr const int max_idx =  right_n_bits(32 - markWord::lock_bits);
+
+static void store_idx_precondition(oop sample_object, int idx) {
+  assert(sample_object != NULL, "invariant");
+  assert(sample_object->mark().is_marked(), "invariant");
+  assert(idx > 0, "invariant");
+  assert(idx <= max_idx, "invariant");
+}
+#endif
+
+static void store_idx_in_markword(oop sample_object, int idx) {
+  DEBUG_ONLY(store_idx_precondition(sample_object, idx);)
+  const markWord idx_mark_word(sample_object->mark().value() | idx << markWord::lock_bits);
+  sample_object->set_mark(idx_mark_word);
+  assert(sample_object->mark().is_marked(), "must still be marked");
+}
+
+static const int initial_size = 64;
+
+static int save(const StoredEdge* edge) {
+  assert(edge != nullptr, "invariant");
+  if (_leak_context_edges == nullptr) {
+    _leak_context_edges = new (ResourceObj::C_HEAP, mtTracing)GrowableArray<const StoredEdge*>(initial_size, mtTracing);
+    _leak_context_edges->append(nullptr); // next idx now at 1, for disambiguation in markword.
+  }
+  return _leak_context_edges->append(edge);
+}
+
+// We associate the leak context edge with the leak candidate object by saving the
+// edge in an array and storing the array idx (shifted) into the markword of the candidate object.
+static void associate_with_candidate(const StoredEdge* leak_context_edge) {
+  assert(leak_context_edge != nullptr, "invariant");
+  store_idx_in_markword(leak_context_edge->pointee(), save(leak_context_edge));
+}
+
 StoredEdge* EdgeStore::associate_leak_context_with_candidate(const Edge* edge) {
   assert(edge != NULL, "invariant");
   assert(!contains(edge->reference()), "invariant");
   StoredEdge* const leak_context_edge = put(edge->reference());
-  oop sample_object = edge->pointee();
-  assert(sample_object != NULL, "invariant");
-  assert(sample_object->mark().is_marked(), "invariant");
-  sample_object->set_mark(markWord::from_pointer(leak_context_edge));
+  associate_with_candidate(leak_context_edge);
   return leak_context_edge;
 }
 
diff --git a/src/hotspot/share/jfr/leakprofiler/chains/edgeStore.hpp b/src/hotspot/share/jfr/leakprofiler/chains/edgeStore.hpp
index f948525a5b9..e920fd64ea9 100644
--- a/src/hotspot/share/jfr/leakprofiler/chains/edgeStore.hpp
+++ b/src/hotspot/share/jfr/leakprofiler/chains/edgeStore.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2021, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -31,6 +31,7 @@
 #include "memory/allocation.hpp"
 
 typedef u8 traceid;
+class ObjectSample;
 
 class StoredEdge : public Edge {
  private:
@@ -79,6 +80,7 @@ class EdgeStore : public CHeapObj<mtTracing> {
   void on_unlink(EdgeEntry* entry);
 
   StoredEdge* get(UnifiedOopRef reference) const;
+  const StoredEdge* get(const ObjectSample* sample) const;
   StoredEdge* put(UnifiedOopRef reference);
   traceid gc_root_id(const Edge* edge) const;
 
@@ -90,6 +92,7 @@ class EdgeStore : public CHeapObj<mtTracing> {
   void store_gc_root_id_in_leak_context_edge(StoredEdge* leak_context_edge, const Edge* root) const;
   StoredEdge* link_new_edge(StoredEdge** previous, const Edge** current);
   void link_with_existing_chain(const StoredEdge* current_stored, StoredEdge** previous, size_t previous_length);
+  bool has_leak_context(const ObjectSample* sample) const;
 
   template <typename T>
   void iterate(T& functor) const { _edges->iterate_value<T>(functor); }
diff --git a/src/hotspot/share/jfr/leakprofiler/chains/jfrbitset.hpp b/src/hotspot/share/jfr/leakprofiler/chains/jfrbitset.hpp
new file mode 100644
index 00000000000..d786a87e3b8
--- /dev/null
+++ b/src/hotspot/share/jfr/leakprofiler/chains/jfrbitset.hpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_JFR_LEAKPROFILER_JFRBITSET_HPP
+#define SHARE_JFR_LEAKPROFILER_JFRBITSET_HPP
+
+#include "memory/allocation.hpp"
+#include "utilities/objectBitSet.inline.hpp"
+
+typedef ObjectBitSet<mtTracing> JFRBitSet;
+
+#endif // SHARE_JFR_LEAKPROFILER_JFRBITSET_HPP
diff --git a/src/hotspot/share/jfr/leakprofiler/chains/objectSampleMarker.hpp b/src/hotspot/share/jfr/leakprofiler/chains/objectSampleMarker.hpp
index 2d496ef9dc7..e902cbe5069 100644
--- a/src/hotspot/share/jfr/leakprofiler/chains/objectSampleMarker.hpp
+++ b/src/hotspot/share/jfr/leakprofiler/chains/objectSampleMarker.hpp
@@ -66,11 +66,22 @@ class ObjectSampleMarker : public StackObj {
   void mark(oop obj) {
     assert(obj != NULL, "invariant");
     // save the original markWord
-    _store->push(ObjectSampleMarkWord(obj, obj->mark()));
+    markWord mark = obj->mark();
+    _store->push(ObjectSampleMarkWord(obj, mark));
     // now we will set the mark word to "marked" in order to quickly
     // identify sample objects during the reachability search from gc roots.
     assert(!obj->mark().is_marked(), "should only mark an object once");
-    obj->set_mark(markWord::prototype().set_marked());
+#ifdef _LP64
+    if (UseCompactObjectHeaders) {
+      if (mark.has_displaced_mark_helper()) {
+        mark = mark.displaced_mark_helper();
+      }
+      obj->set_mark(markWord::prototype().set_marked().set_narrow_klass(mark.narrow_klass()));
+    } else
+#endif
+    {
+      obj->set_mark(markWord::prototype().set_marked());
+    }
     assert(obj->mark().is_marked(), "invariant");
   }
 };
diff --git a/src/hotspot/share/jfr/leakprofiler/chains/pathToGcRootsOperation.cpp b/src/hotspot/share/jfr/leakprofiler/chains/pathToGcRootsOperation.cpp
index 7710481dd7f..b6fd556a03e 100644
--- a/src/hotspot/share/jfr/leakprofiler/chains/pathToGcRootsOperation.cpp
+++ b/src/hotspot/share/jfr/leakprofiler/chains/pathToGcRootsOperation.cpp
@@ -27,11 +27,11 @@
 #include "gc/shared/gc_globals.hpp"
 #include "jfr/leakprofiler/leakProfiler.hpp"
 #include "jfr/leakprofiler/chains/bfsClosure.hpp"
-#include "jfr/leakprofiler/chains/bitset.inline.hpp"
 #include "jfr/leakprofiler/chains/dfsClosure.hpp"
 #include "jfr/leakprofiler/chains/edge.hpp"
 #include "jfr/leakprofiler/chains/edgeQueue.hpp"
 #include "jfr/leakprofiler/chains/edgeStore.hpp"
+#include "jfr/leakprofiler/chains/jfrbitset.hpp"
 #include "jfr/leakprofiler/chains/objectSampleMarker.hpp"
 #include "jfr/leakprofiler/chains/rootSetClosure.hpp"
 #include "jfr/leakprofiler/chains/edgeStore.hpp"
@@ -84,7 +84,7 @@ void PathToGcRootsOperation::doit() {
   assert(_cutoff_ticks > 0, "invariant");
 
   // The bitset used for marking is dimensioned as a function of the heap size
-  BitSet mark_bits;
+  JFRBitSet mark_bits;
 
   // The edge queue is dimensioned as a fraction of the heap size
   const size_t edge_queue_reservation_size = edge_queue_memory_reservation();
diff --git a/src/hotspot/share/jfr/leakprofiler/checkpoint/eventEmitter.cpp b/src/hotspot/share/jfr/leakprofiler/checkpoint/eventEmitter.cpp
index 0cb3841a64f..2fa5a996194 100644
--- a/src/hotspot/share/jfr/leakprofiler/checkpoint/eventEmitter.cpp
+++ b/src/hotspot/share/jfr/leakprofiler/checkpoint/eventEmitter.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -120,7 +120,7 @@ void EventEmitter::link_sample_with_edge(const ObjectSample* sample, EdgeStore*
   assert(!sample->is_dead(), "invariant");
   assert(edge_store != NULL, "invariant");
   if (SafepointSynchronize::is_at_safepoint()) {
-    if (!sample->object()->mark().is_marked()) {
+    if (edge_store->has_leak_context(sample)) {
       // Associated with an edge (chain) already during heap traversal.
       return;
     }
@@ -137,21 +137,12 @@ void EventEmitter::write_event(const ObjectSample* sample, EdgeStore* edge_store
   assert(edge_store != NULL, "invariant");
   assert(_jfr_thread_local != NULL, "invariant");
 
-  traceid gc_root_id = 0;
-  const Edge* edge = NULL;
-  if (SafepointSynchronize::is_at_safepoint()) {
-    if (!sample->object()->mark().is_marked()) {
-      edge = (const Edge*)(sample->object())->mark().to_pointer();
-    }
-  }
-  if (edge == NULL) {
-    edge = edge_store->get(UnifiedOopRef::encode_in_native(sample->object_addr()));
-  } else {
-    gc_root_id = edge_store->gc_root_id(edge);
-  }
+  const StoredEdge* const edge = edge_store->get(sample);
   assert(edge != NULL, "invariant");
+  assert(edge->pointee() == sample->object(), "invariant");
   const traceid object_id = edge_store->get_id(edge);
   assert(object_id != 0, "invariant");
+  const traceid gc_root_id = edge->gc_root_id();
 
   Tickspan object_age = Ticks(_start_time.value()) - sample->allocation_time();
 
diff --git a/src/hotspot/share/jvmci/jvmciCompilerToVM.cpp b/src/hotspot/share/jvmci/jvmciCompilerToVM.cpp
index dcb4f7b5a5d..eafb2ee74c6 100644
--- a/src/hotspot/share/jvmci/jvmciCompilerToVM.cpp
+++ b/src/hotspot/share/jvmci/jvmciCompilerToVM.cpp
@@ -2121,7 +2121,7 @@ C2V_VMENTRY_0(jint, arrayBaseOffset, (JNIEnv* env, jobject, jobject kind))
     JVMCI_THROW_0(NullPointerException);
   }
   BasicType type = JVMCIENV->kindToBasicType(JVMCIENV->wrap(kind), JVMCI_CHECK_0);
-  return arrayOopDesc::header_size(type) * HeapWordSize;
+  return arrayOopDesc::base_offset_in_bytes(type);
 C2V_END
 
 C2V_VMENTRY_0(jint, arrayIndexScale, (JNIEnv* env, jobject, jobject kind))
diff --git a/src/hotspot/share/jvmci/jvmciCompilerToVMInit.cpp b/src/hotspot/share/jvmci/jvmciCompilerToVMInit.cpp
index 41478060acc..9bcc0d5c064 100644
--- a/src/hotspot/share/jvmci/jvmciCompilerToVMInit.cpp
+++ b/src/hotspot/share/jvmci/jvmciCompilerToVMInit.cpp
@@ -111,10 +111,20 @@ void CompilerToVM::Data::initialize(JVMCI_TRAPS) {
 
   Universe_collectedHeap = Universe::heap();
   Universe_base_vtable_size = Universe::base_vtable_size();
-  Universe_narrow_oop_base = CompressedOops::base();
-  Universe_narrow_oop_shift = CompressedOops::shift();
-  Universe_narrow_klass_base = CompressedKlassPointers::base();
-  Universe_narrow_klass_shift = CompressedKlassPointers::shift();
+  if (UseCompressedOops) {
+    Universe_narrow_oop_base = CompressedOops::base();
+    Universe_narrow_oop_shift = CompressedOops::shift();
+  } else {
+    Universe_narrow_oop_base = nullptr;
+    Universe_narrow_oop_shift = 0;
+  }
+  if (UseCompressedClassPointers) {
+    Universe_narrow_klass_base = CompressedKlassPointers::base();
+    Universe_narrow_klass_shift = CompressedKlassPointers::shift();
+  } else {
+    Universe_narrow_klass_base = nullptr;
+    Universe_narrow_klass_shift = 0;
+  }
   Universe_non_oop_bits = Universe::non_oop_word();
   Universe_verify_oop_mask = Universe::verify_oop_mask();
   Universe_verify_oop_bits = Universe::verify_oop_bits();
@@ -217,7 +227,6 @@ JVMCIObjectArray CompilerToVM::initialize_intrinsics(JVMCI_TRAPS) {
   do_bool_flag(Inline)                                                     \
   do_intx_flag(JVMCICounterSize)                                           \
   do_bool_flag(JVMCIPrintProperties)                                       \
-  do_bool_flag(JVMCIUseFastLocking)                                        \
   do_intx_flag(ObjectAlignmentInBytes)                                     \
   do_bool_flag(PrintInlining)                                              \
   do_bool_flag(ReduceInitialCardMarks)                                     \
diff --git a/src/hotspot/share/jvmci/jvmci_globals.cpp b/src/hotspot/share/jvmci/jvmci_globals.cpp
index 7bc2eeb3965..7acd15a5381 100644
--- a/src/hotspot/share/jvmci/jvmci_globals.cpp
+++ b/src/hotspot/share/jvmci/jvmci_globals.cpp
@@ -101,6 +101,10 @@ bool JVMCIGlobals::check_jvmci_flags_are_consistent() {
           "-XX:+BootstrapJVMCI is not compatible with -XX:TieredStopAtLevel=%d\n", TieredStopAtLevel);
       return false;
     }
+    if (UseCompactObjectHeaders) {
+      log_warning(jvmci)("-XX:+UseCompactObjectHeaders not supported by JVMCI, disabling UseCompactObjectHeaders");
+      FLAG_SET_DEFAULT(UseCompactObjectHeaders, false);
+    }
   }
 
   if (!EnableJVMCI) {
@@ -116,7 +120,6 @@ bool JVMCIGlobals::check_jvmci_flags_are_consistent() {
   CHECK_NOT_SET(JVMCITraceLevel,              EnableJVMCI)
   CHECK_NOT_SET(JVMCICounterSize,             EnableJVMCI)
   CHECK_NOT_SET(JVMCICountersExcludeCompiler, EnableJVMCI)
-  CHECK_NOT_SET(JVMCIUseFastLocking,          EnableJVMCI)
   CHECK_NOT_SET(JVMCINMethodSizeLimit,        EnableJVMCI)
   CHECK_NOT_SET(JVMCIPrintProperties,         EnableJVMCI)
   CHECK_NOT_SET(UseJVMCINativeLibrary,        EnableJVMCI)
diff --git a/src/hotspot/share/jvmci/jvmci_globals.hpp b/src/hotspot/share/jvmci/jvmci_globals.hpp
index a1db06f8352..a427961d4e5 100644
--- a/src/hotspot/share/jvmci/jvmci_globals.hpp
+++ b/src/hotspot/share/jvmci/jvmci_globals.hpp
@@ -103,9 +103,6 @@ class fileStream;
   product(bool, JVMCICountersExcludeCompiler, true, EXPERIMENTAL,           \
           "Exclude JVMCI compiler threads from benchmark counters")         \
                                                                             \
-  develop(bool, JVMCIUseFastLocking, true,                                  \
-          "Use fast inlined locking code")                                  \
-                                                                            \
   product(intx, JVMCINMethodSizeLimit, (80*K)*wordSize, EXPERIMENTAL,       \
           "Maximum size of a compiled method.")                             \
           range(0, max_jint)                                                \
diff --git a/src/hotspot/share/jvmci/vmStructs_jvmci.cpp b/src/hotspot/share/jvmci/vmStructs_jvmci.cpp
index 3f57d487bae..29e4d666195 100644
--- a/src/hotspot/share/jvmci/vmStructs_jvmci.cpp
+++ b/src/hotspot/share/jvmci/vmStructs_jvmci.cpp
@@ -110,7 +110,7 @@
   nonstatic_field(Array<Klass*>,               _length,                                int)                                          \
   nonstatic_field(Array<Klass*>,               _data[0],                               Klass*)                                       \
                                                                                                                                      \
-  volatile_nonstatic_field(BasicLock,          _displaced_header,                      markWord)                                     \
+  volatile_nonstatic_field(BasicLock,          _metadata,                              uintptr_t)                                    \
                                                                                                                                      \
   static_field(CodeCache,                      _low_bound,                             address)                                      \
   static_field(CodeCache,                      _high_bound,                            address)                                      \
@@ -190,6 +190,8 @@
   nonstatic_field(JavaThread,                  _jni_environment,                              JNIEnv)                                \
   nonstatic_field(JavaThread,                  _poll_data,                                    SafepointMechanism::ThreadData)        \
   nonstatic_field(JavaThread,                  _stack_overflow_state._reserved_stack_activation, address)                            \
+  nonstatic_field(JavaThread,                  _lock_stack,                                   LockStack)                             \
+  nonstatic_field(LockStack,                   _top,                                          uint32_t)                              \
                                                                                                                                      \
   static_field(java_lang_Class,                _klass_offset,                                 int)                                   \
   static_field(java_lang_Class,                _array_klass_offset,                           int)                                   \
@@ -452,6 +454,7 @@
   declare_constant(BranchData::not_taken_off_set)                         \
                                                                           \
   declare_constant_with_value("CardTable::dirty_card", CardTable::dirty_card_val()) \
+  declare_constant_with_value("LockStack::_end_offset", LockStack::end_offset()) \
                                                                           \
   declare_constant(CodeInstaller::VERIFIED_ENTRY)                         \
   declare_constant(CodeInstaller::UNVERIFIED_ENTRY)                       \
@@ -580,6 +583,10 @@
   declare_constant(InstanceKlass::being_initialized)                      \
   declare_constant(InstanceKlass::fully_initialized)                      \
                                                                           \
+  declare_constant(LockingMode::LM_MONITOR)                               \
+  declare_constant(LockingMode::LM_LEGACY)                                \
+  declare_constant(LockingMode::LM_LIGHTWEIGHT)                           \
+                                                                          \
   /*********************************/                                     \
   /* InstanceKlass _misc_flags */                                         \
   /*********************************/                                     \
@@ -628,6 +635,9 @@
   declare_constant(MultiBranchData::per_case_cell_count)                  \
                                                                           \
   declare_constant(ReceiverTypeData::nonprofiled_count_off_set)           \
+                                                                          \
+  declare_constant(ObjectMonitor::ANONYMOUS_OWNER)                        \
+                                                                          \
   declare_constant(ReceiverTypeData::receiver_type_row_cell_count)        \
   declare_constant(ReceiverTypeData::receiver0_offset)                    \
   declare_constant(ReceiverTypeData::count0_offset)                       \
diff --git a/src/hotspot/share/logging/logTag.hpp b/src/hotspot/share/logging/logTag.hpp
index 53cd1d36433..77a7ea4d3d9 100644
--- a/src/hotspot/share/logging/logTag.hpp
+++ b/src/hotspot/share/logging/logTag.hpp
@@ -117,6 +117,7 @@
   LOG_TAG(module) \
   LOG_TAG(monitorinflation) \
   LOG_TAG(monitormismatch) \
+  LOG_TAG(monitortable) \
   LOG_TAG(nestmates) \
   LOG_TAG(nmethod) \
   LOG_TAG(nmt) \
@@ -187,6 +188,7 @@
   LOG_TAG(timer) \
   LOG_TAG(tlab) \
   LOG_TAG(tracking) \
+  LOG_TAG(trimnative) /* trim native heap */ \
   LOG_TAG(unload) /* Trace unloading of classes */ \
   LOG_TAG(unshareable) \
   LOG_TAG(update) \
diff --git a/src/hotspot/share/memory/arena.cpp b/src/hotspot/share/memory/arena.cpp
index 313c79dbd49..790e86603dc 100644
--- a/src/hotspot/share/memory/arena.cpp
+++ b/src/hotspot/share/memory/arena.cpp
@@ -30,6 +30,7 @@
 #include "runtime/task.hpp"
 #include "runtime/threadCritical.hpp"
 #include "services/memTracker.hpp"
+#include "runtime/trimNativeHeap.hpp"
 #include "utilities/ostream.hpp"
 
 //--------------------------------------------------------------------------------------
@@ -136,6 +137,7 @@ class ChunkPool: public CHeapObj<mtInternal> {
   }
 
   static void clean() {
+    NativeHeapTrimmer::SuspendMark sm("chunk pool cleaner");
     enum { BlocksToKeep = 5 };
      _tiny_pool->free_all_but(BlocksToKeep);
      _small_pool->free_all_but(BlocksToKeep);
diff --git a/src/hotspot/share/oops/arrayOop.hpp b/src/hotspot/share/oops/arrayOop.hpp
index 100497f52cb..ef4267fa748 100644
--- a/src/hotspot/share/oops/arrayOop.hpp
+++ b/src/hotspot/share/oops/arrayOop.hpp
@@ -50,8 +50,7 @@ class arrayOopDesc : public oopDesc {
   // Returns the aligned header_size_in_bytes.  This is not equivalent to
   // sizeof(arrayOopDesc) which should not appear in the code.
   static int header_size_in_bytes() {
-    size_t hs = align_up(length_offset_in_bytes() + sizeof(int),
-                              HeapWordSize);
+    size_t hs = length_offset_in_bytes() + sizeof(int);
 #ifdef ASSERT
     // make sure it isn't called before UseCompressedOops is initialized.
     static size_t arrayoopdesc_hs = 0;
@@ -71,6 +70,11 @@ class arrayOopDesc : public oopDesc {
   // aligned 0 mod 8.  The typeArrayOop itself must be aligned at least this
   // strongly.
   static bool element_type_should_be_aligned(BasicType type) {
+#ifdef _LP64
+    if (type == T_OBJECT || type == T_ARRAY) {
+      return !UseCompressedOops;
+    }
+#endif
     return type == T_DOUBLE || type == T_LONG;
   }
 
@@ -79,13 +83,15 @@ class arrayOopDesc : public oopDesc {
   // declared nonstatic fields in arrayOopDesc if not compressed, otherwise
   // it occupies the second half of the _klass field in oopDesc.
   static int length_offset_in_bytes() {
-    return UseCompressedClassPointers ? klass_gap_offset_in_bytes() :
-                               sizeof(arrayOopDesc);
+    return oopDesc::base_offset_in_bytes();
   }
 
   // Returns the offset of the first element.
   static int base_offset_in_bytes(BasicType type) {
-    return header_size(type) * HeapWordSize;
+    size_t typesize_in_bytes = header_size_in_bytes();
+    return (int)(element_type_should_be_aligned(type)
+                 ? align_up(typesize_in_bytes, BytesPerLong)
+                 : typesize_in_bytes);
   }
 
   // Returns the address of the first element. The elements in the array will not
@@ -127,6 +133,7 @@ class arrayOopDesc : public oopDesc {
   // Returns the header size in words aligned to the requirements of the
   // array object type.
   static int header_size(BasicType type) {
+    assert(!UseCompactObjectHeaders, "Don't use this with compact headers");
     size_t typesize_in_bytes = header_size_in_bytes();
     return (int)(element_type_should_be_aligned(type)
       ? align_object_offset(typesize_in_bytes/HeapWordSize)
@@ -141,16 +148,15 @@ class arrayOopDesc : public oopDesc {
     assert(type >= 0 && type < T_CONFLICT, "wrong type");
     assert(type2aelembytes(type) != 0, "wrong type");
 
-    const size_t max_element_words_per_size_t =
-      align_down((SIZE_MAX/HeapWordSize - header_size(type)), MinObjAlignment);
-    const size_t max_elements_per_size_t =
-      HeapWordSize * max_element_words_per_size_t / type2aelembytes(type);
+    const size_t max_size_bytes = align_down(SIZE_MAX - base_offset_in_bytes(type), MinObjAlignmentInBytes);
+    const size_t max_elements_per_size_t = max_size_bytes / type2aelembytes(type);
     if ((size_t)max_jint < max_elements_per_size_t) {
       // It should be ok to return max_jint here, but parts of the code
       // (CollectedHeap, Klass::oop_oop_iterate(), and more) uses an int for
       // passing around the size (in words) of an object. So, we need to avoid
       // overflowing an int when we add the header. See CRs 4718400 and 7110613.
-      return align_down(max_jint - header_size(type), MinObjAlignment);
+      int header_size_words = align_up(base_offset_in_bytes(type), HeapWordSize) / HeapWordSize;
+      return align_down(max_jint - header_size_words, MinObjAlignment);
     }
     return (int32_t)max_elements_per_size_t;
   }
diff --git a/src/hotspot/share/oops/instanceKlass.cpp b/src/hotspot/share/oops/instanceKlass.cpp
index c86ccd83544..61b5ebaa2af 100644
--- a/src/hotspot/share/oops/instanceKlass.cpp
+++ b/src/hotspot/share/oops/instanceKlass.cpp
@@ -2596,7 +2596,13 @@ void InstanceKlass::restore_unshareable_info(ClassLoaderData* loader_data, Handl
   // Initialize @ValueBased class annotation
   if (DiagnoseSyncOnValueBasedClasses && has_value_based_class_annotation()) {
     set_is_value_based();
-    set_prototype_header(markWord::prototype());
+    markWord prototype = markWord::prototype();
+#ifdef _LP64
+    if (UseCompactObjectHeaders) {
+      prototype = prototype.set_klass(this);
+    }
+#endif
+    set_prototype_header(prototype);
   }
 }
 
diff --git a/src/hotspot/share/oops/instanceOop.hpp b/src/hotspot/share/oops/instanceOop.hpp
index 005cf5a9e88..ffe86149140 100644
--- a/src/hotspot/share/oops/instanceOop.hpp
+++ b/src/hotspot/share/oops/instanceOop.hpp
@@ -32,15 +32,9 @@
 
 class instanceOopDesc : public oopDesc {
  public:
-  // aligned header size.
-  static int header_size() { return sizeof(instanceOopDesc)/HeapWordSize; }
-
   // If compressed, the offset of the fields of the instance may not be aligned.
   static int base_offset_in_bytes() {
-    return (UseCompressedClassPointers) ?
-            klass_gap_offset_in_bytes() :
-            sizeof(instanceOopDesc);
-
+    return oopDesc::base_offset_in_bytes();
   }
 };
 
diff --git a/src/hotspot/share/oops/instanceRefKlass.hpp b/src/hotspot/share/oops/instanceRefKlass.hpp
index 4156559df05..d87013abcdd 100644
--- a/src/hotspot/share/oops/instanceRefKlass.hpp
+++ b/src/hotspot/share/oops/instanceRefKlass.hpp
@@ -126,7 +126,7 @@ class InstanceRefKlass: public InstanceKlass {
   static void oop_oop_iterate_fields_except_referent(oop obj, OopClosureType* closure, Contains& contains);
 
   template <typename T>
-  static void trace_reference_gc(const char *s, oop obj) NOT_DEBUG_RETURN;
+  void trace_reference_gc(const char *s, oop obj) NOT_DEBUG_RETURN;
 
  public:
   // Update non-static oop maps so 'referent', 'nextPending' and
diff --git a/src/hotspot/share/oops/instanceRefKlass.inline.hpp b/src/hotspot/share/oops/instanceRefKlass.inline.hpp
index 4b729e6dd13..616c115e6b1 100644
--- a/src/hotspot/share/oops/instanceRefKlass.inline.hpp
+++ b/src/hotspot/share/oops/instanceRefKlass.inline.hpp
@@ -185,7 +185,7 @@ void InstanceRefKlass::trace_reference_gc(const char *s, oop obj) {
   T* discovered_addr = (T*) java_lang_ref_Reference::discovered_addr_raw(obj);
 
   log_develop_trace(gc, ref)("InstanceRefKlass %s for obj " PTR_FORMAT, s, p2i(obj));
-  if (java_lang_ref_Reference::is_phantom(obj)) {
+  if (reference_type() == REF_PHANTOM) {
     log_develop_trace(gc, ref)("     referent_addr/* " PTR_FORMAT " / " PTR_FORMAT,
                                p2i(referent_addr), p2i((oop)HeapAccess<ON_PHANTOM_OOP_REF | AS_NO_KEEPALIVE>::oop_load(referent_addr)));
   } else {
diff --git a/src/hotspot/share/oops/klass.cpp b/src/hotspot/share/oops/klass.cpp
index f50b533ff0d..cb4b7439c62 100644
--- a/src/hotspot/share/oops/klass.cpp
+++ b/src/hotspot/share/oops/klass.cpp
@@ -200,12 +200,22 @@ void* Klass::operator new(size_t size, ClassLoaderData* loader_data, size_t word
   return Metaspace::allocate(loader_data, word_size, MetaspaceObj::ClassType, THREAD);
 }
 
+static markWord make_prototype(Klass* kls) {
+  markWord prototype = markWord::prototype();
+#ifdef _LP64
+  if (UseCompactObjectHeaders) {
+    prototype = prototype.set_klass(kls);
+  }
+#endif
+  return prototype;
+}
+
 // "Normal" instantiation is preceeded by a MetaspaceObj allocation
 // which zeros out memory - calloc equivalent.
 // The constructor is also used from CppVtableCloner,
 // which doesn't zero out the memory before calling the constructor.
 Klass::Klass(KlassID id) : _id(id),
-                           _prototype_header(markWord::prototype()),
+                           _prototype_header(make_prototype(this)),
 #if HOTSPOT_TARGET_CLASSLIB == 8
                            _shared_class_path_index(-1),
                            _alt_kernel_ver(0) {
diff --git a/src/hotspot/share/oops/klass.inline.hpp b/src/hotspot/share/oops/klass.inline.hpp
index fe0ecb09e52..66c085727ad 100644
--- a/src/hotspot/share/oops/klass.inline.hpp
+++ b/src/hotspot/share/oops/klass.inline.hpp
@@ -52,7 +52,7 @@ inline bool Klass::is_loader_alive() const {
 }
 
 inline void Klass::set_prototype_header(markWord header) {
-  assert(!header.has_bias_pattern() || is_instance_klass(), "biased locking currently only supported for Java instances");
+  assert(UseCompactObjectHeaders || !header.has_bias_pattern() || is_instance_klass(), "biased locking currently only supported for Java instances");
   _prototype_header = header;
 }
 
diff --git a/src/hotspot/share/oops/markWord.cpp b/src/hotspot/share/oops/markWord.cpp
index acc007d2d48..43ad5ae9cf6 100644
--- a/src/hotspot/share/oops/markWord.cpp
+++ b/src/hotspot/share/oops/markWord.cpp
@@ -24,6 +24,7 @@
 
 #include "precompiled.hpp"
 #include "oops/markWord.hpp"
+#include "runtime/basicLock.inline.hpp"
 #include "runtime/thread.inline.hpp"
 #include "runtime/objectMonitor.inline.hpp"
 #include "utilities/ostream.hpp"
@@ -67,7 +68,7 @@ void markWord::print_on(outputStream* st, bool print_monitor_info) const {
   } else if (has_monitor()) {  // last bits = 10
     // have to check has_monitor() before is_locked()
     st->print(" monitor(" INTPTR_FORMAT ")=", value());
-    if (print_monitor_info) {
+    if (print_monitor_info && !UseObjectMonitorTable) {
       ObjectMonitor* mon = monitor();
       if (mon == NULL) {
         st->print("NULL (this should never be seen!)");
diff --git a/src/hotspot/share/oops/markWord.hpp b/src/hotspot/share/oops/markWord.hpp
index 797cd398093..56d3d24cd89 100644
--- a/src/hotspot/share/oops/markWord.hpp
+++ b/src/hotspot/share/oops/markWord.hpp
@@ -25,6 +25,7 @@
 #ifndef SHARE_OOPS_MARKWORD_HPP
 #define SHARE_OOPS_MARKWORD_HPP
 
+#include "gc/shared/gc_globals.hpp"
 #include "metaprogramming/integralConstant.hpp"
 #include "metaprogramming/primitiveConversions.hpp"
 #include "oops/oopsHierarchy.hpp"
@@ -44,6 +45,10 @@
 //  unused:25 hash:31 -->| unused_gap:1   age:4    biased_lock:1 lock:2 (normal object)
 //  JavaThread*:54 epoch:2 unused_gap:1   age:4    biased_lock:1 lock:2 (biased object)
 //
+//  64 bits (with compact headers):
+//  -------------------------------
+//  nklass:32 hash:25 -->| unused_gap:1  age:4  self-fwded:1  lock:2 (normal object)
+//
 //  - hash contains the identity hash value: largest value is
 //    31 bits, see os::random().  Also, 64-bit vm's require
 //    a hash value no bigger than 32 bits because they will not
@@ -81,9 +86,9 @@
 //
 //    [ptr             | 00]  locked             ptr points to real header on stack
 //    [header      | 0 | 01]  unlocked           regular object header
-//    [ptr             | 10]  monitor            inflated lock (header is wapped out)
+//    [ptr             | 10]  monitor            inflated lock (header is swapped out)
 //    [ptr             | 11]  marked             used to mark an object
-//    [0 ............ 0| 00]  inflating          inflation in progress
+//    [0 ............ 0| 00]  inflating          inflation in progress (stack-locking in use)
 //
 //    We assume that stack/thread pointers have the lowest two bits cleared.
 //
@@ -94,6 +99,7 @@
 class BasicLock;
 class ObjectMonitor;
 class JavaThread;
+class Klass;
 class outputStream;
 
 class markWord {
@@ -129,18 +135,31 @@ class markWord {
   static const int age_bits                       = 4;
   static const int lock_bits                      = 2;
   static const int biased_lock_bits               = 1;
-  static const int max_hash_bits                  = BitsPerWord - age_bits - lock_bits - biased_lock_bits;
+  static const int self_forwarded_bits            = 1;
+  static const int max_hash_bits                  = BitsPerWord - age_bits - lock_bits - self_forwarded_bits;
   static const int hash_bits                      = max_hash_bits > 31 ? 31 : max_hash_bits;
+  static const int hash_bits_compact              = max_hash_bits > 25 ? 25 : max_hash_bits;
+  // Used only without compact headers.
   static const int unused_gap_bits                = LP64_ONLY(1) NOT_LP64(0);
   static const int epoch_bits                     = 2;
+#ifdef _LP64
+  // Used only with compact headers.
+  static const int klass_bits                     = 32;
+#endif
 
   // The biased locking code currently requires that the age bits be
   // contiguous to the lock bits.
   static const int lock_shift                     = 0;
   static const int biased_lock_shift              = lock_bits;
-  static const int age_shift                      = lock_bits + biased_lock_bits;
+  static const int self_forwarded_shift           = lock_shift + lock_bits;
+  static const int age_shift                      = self_forwarded_shift + self_forwarded_bits;
   static const int unused_gap_shift               = age_shift + age_bits;
   static const int hash_shift                     = unused_gap_shift + unused_gap_bits;
+  static const int hash_shift_compact             = age_shift + age_bits;
+#ifdef _LP64
+  // Used only with compact headers.
+  static const int klass_shift                    = hash_shift_compact + hash_bits_compact;
+#endif
   static const int epoch_shift                    = hash_shift;
 
   static const uintptr_t lock_mask                = right_n_bits(lock_bits);
@@ -148,6 +167,8 @@ class markWord {
   static const uintptr_t biased_lock_mask         = right_n_bits(lock_bits + biased_lock_bits);
   static const uintptr_t biased_lock_mask_in_place= biased_lock_mask << lock_shift;
   static const uintptr_t biased_lock_bit_in_place = 1 << biased_lock_shift;
+  static const uintptr_t self_forwarded_mask      = right_n_bits(self_forwarded_bits);
+  static const uintptr_t self_forwarded_mask_in_place = self_forwarded_mask << self_forwarded_shift;
   static const uintptr_t age_mask                 = right_n_bits(age_bits);
   static const uintptr_t age_mask_in_place        = age_mask << age_shift;
   static const uintptr_t epoch_mask               = right_n_bits(epoch_bits);
@@ -155,6 +176,13 @@ class markWord {
 
   static const uintptr_t hash_mask                = right_n_bits(hash_bits);
   static const uintptr_t hash_mask_in_place       = hash_mask << hash_shift;
+  static const uintptr_t hash_mask_compact        = right_n_bits(hash_bits_compact);
+  static const uintptr_t hash_mask_compact_in_place = hash_mask_compact << hash_shift_compact;
+
+#ifdef _LP64
+  static const uintptr_t klass_mask               = right_n_bits(klass_bits);
+  static const uintptr_t klass_mask_in_place      = klass_mask << klass_shift;
+#endif
 
   // Alignment of JavaThread pointers encoded in object header required by biased locking
   static const size_t biased_lock_alignment       = 2 << (epoch_shift + epoch_bits);
@@ -236,6 +264,7 @@ class markWord {
   // check for and avoid overwriting a 0 value installed by some
   // other thread.  (They should spin or block instead.  The 0 value
   // is transient and *should* be short-lived).
+  // Fast-locking does not use INFLATING.
   static markWord INFLATING() { return zero(); }    // inflate-in-progress
 
   // Should this header be preserved during GC?
@@ -267,29 +296,51 @@ class markWord {
     return markWord(value() | unlocked_value);
   }
   bool has_locker() const {
-    return ((value() & lock_mask_in_place) == locked_value);
+    assert(LockingMode == LM_LEGACY, "should only be called with legacy stack locking");
+    return (value() & lock_mask_in_place) == locked_value;
   }
   BasicLock* locker() const {
     assert(has_locker(), "check");
     return (BasicLock*) value();
   }
+
+  bool is_fast_locked() const {
+    assert(LockingMode == LM_LIGHTWEIGHT, "should only be called with new lightweight locking");
+    return (value() & lock_mask_in_place) == locked_value;
+  }
+  markWord set_fast_locked() const {
+    return markWord(value() & ~lock_mask_in_place);
+  }
+
   bool has_monitor() const {
     return ((value() & monitor_value) != 0);
   }
   ObjectMonitor* monitor() const {
     assert(has_monitor(), "check");
+    assert(!UseObjectMonitorTable, "Lightweight locking with OM table does not use markWord for monitors");
     // Use xor instead of &~ to provide one extra tag-bit check.
     return (ObjectMonitor*) (value() ^ monitor_value);
   }
   bool has_displaced_mark_helper() const {
-    return ((value() & unlocked_value) == 0);
+    intptr_t lockbits = value() & lock_mask_in_place;
+    if (LockingMode == LM_LIGHTWEIGHT) {
+      return !UseObjectMonitorTable && lockbits == monitor_value;
+    }
+    // monitor (0b10) | stack-locked (0b00)?
+    return (lockbits & unlocked_value) == 0;
   }
   markWord displaced_mark_helper() const;
   void set_displaced_mark_helper(markWord m) const;
   markWord copy_set_hash(intptr_t hash) const {
-    uintptr_t tmp = value() & (~hash_mask_in_place);
-    tmp |= ((hash & hash_mask) << hash_shift);
-    return markWord(tmp);
+    if (UseCompactObjectHeaders) {
+      uintptr_t tmp = value() & (~hash_mask_compact_in_place);
+      tmp |= ((hash & hash_mask_compact) << hash_shift_compact);
+      return markWord(tmp);
+    } else {
+      uintptr_t tmp = value() & (~hash_mask_in_place);
+      tmp |= ((hash & hash_mask) << hash_shift);
+      return markWord(tmp);
+    }
   }
   // it is only used to be stored into BasicLock as the
   // indicator that the lock is using heavyweight monitor
@@ -302,9 +353,15 @@ class markWord {
     return from_pointer(lock);
   }
   static markWord encode(ObjectMonitor* monitor) {
+    assert(!UseObjectMonitorTable, "Lightweight locking with OM table does not use markWord for monitors");
     uintptr_t tmp = (uintptr_t) monitor;
     return markWord(tmp | monitor_value);
   }
+
+  markWord set_has_monitor() const {
+    return markWord((value() & ~lock_mask_in_place) | monitor_value);
+  }
+
   static markWord encode(JavaThread* thread, uint age, int bias_epoch) {
     uintptr_t tmp = (uintptr_t) thread;
     assert(UseBiasedLocking && ((tmp & (epoch_mask_in_place | age_mask_in_place | biased_lock_mask_in_place)) == 0), "misaligned JavaThread pointer");
@@ -329,13 +386,27 @@ class markWord {
 
   // hash operations
   intptr_t hash() const {
-    return mask_bits(value() >> hash_shift, hash_mask);
+    if (UseCompactObjectHeaders) {
+      return mask_bits(value() >> hash_shift_compact, hash_mask_compact);
+    } else {
+      return mask_bits(value() >> hash_shift, hash_mask);
+    }
   }
 
   bool has_no_hash() const {
     return hash() == no_hash;
   }
 
+#ifdef _LP64
+  inline markWord actual_mark() const;
+  inline Klass* klass() const;
+  inline Klass* klass_or_null() const;
+  inline Klass* safe_klass() const;
+  inline markWord set_klass(const Klass* klass) const;
+  inline narrowKlass narrow_klass() const;
+  inline markWord set_narrow_klass(const narrowKlass klass) const;
+#endif
+
   // Prototype mark for initialization
   static markWord prototype() {
     return markWord( no_hash_in_place | no_lock_in_place );
@@ -352,6 +423,19 @@ class markWord {
 
   // Recover address of oop from encoded form used in mark
   inline void* decode_pointer() { if (UseBiasedLocking && has_bias_pattern()) return NULL; return (void*)clear_lock_bits().value(); }
+
+#ifdef _LP64
+  inline bool self_forwarded() const {
+    bool self_fwd = mask_bits(value(), self_forwarded_mask_in_place) != 0;
+    assert(!self_fwd || UseAltGCForwarding, "Only set self-fwd bit when using alt GC forwarding");
+    return self_fwd;
+  }
+
+  inline markWord set_self_forwarded() const {
+    assert(UseAltGCForwarding, "Only call this with alt GC forwarding");
+    return markWord(value() | self_forwarded_mask_in_place | marked_value);
+  }
+#endif
 };
 
 // Support atomic operations.
diff --git a/src/hotspot/share/oops/markWord.inline.hpp b/src/hotspot/share/oops/markWord.inline.hpp
index 07c2bbd74e2..f49f0c98821 100644
--- a/src/hotspot/share/oops/markWord.inline.hpp
+++ b/src/hotspot/share/oops/markWord.inline.hpp
@@ -30,6 +30,7 @@
 #include "oops/klass.hpp"
 #include "oops/oop.inline.hpp"
 #include "runtime/globals.hpp"
+#include "runtime/safepoint.hpp"
 
 // Should this header be preserved during GC?
 inline bool markWord::must_be_preserved(const oopDesc* obj) const {
@@ -70,9 +71,59 @@ inline bool markWord::must_be_preserved_for_promotion_failure(const oopDesc* obj
 
 inline markWord markWord::prototype_for_klass(const Klass* klass) {
   markWord prototype_header = klass->prototype_header();
-  assert(prototype_header == prototype() || prototype_header.has_bias_pattern(), "corrupt prototype header");
+  assert(UseCompactObjectHeaders || prototype_header == prototype() || prototype_header.has_bias_pattern(), "corrupt prototype header");
 
   return prototype_header;
 }
 
+#ifdef _LP64
+markWord markWord::actual_mark() const {
+  assert(UseCompactObjectHeaders, "only safe when using compact headers");
+  if (has_displaced_mark_helper()) {
+    return displaced_mark_helper();
+  } else {
+    return *this;
+  }
+}
+
+narrowKlass markWord::narrow_klass() const {
+  assert(UseCompactObjectHeaders, "only used with compact object headers");
+  return narrowKlass(value() >> klass_shift);
+}
+
+Klass* markWord::klass() const {
+  assert(UseCompactObjectHeaders, "only used with compact object headers");
+  assert(!CompressedKlassPointers::is_null(narrow_klass()), "narrow klass must not be null: " INTPTR_FORMAT, value());
+  return CompressedKlassPointers::decode_not_null(narrow_klass());
+}
+
+Klass* markWord::klass_or_null() const {
+  assert(UseCompactObjectHeaders, "only used with compact object headers");
+  return CompressedKlassPointers::decode(narrow_klass());
+}
+
+markWord markWord::set_narrow_klass(const narrowKlass nklass) const {
+  assert(UseCompactObjectHeaders, "only used with compact object headers");
+  return markWord((value() & ~klass_mask_in_place) | ((uintptr_t) nklass << klass_shift));
+}
+
+Klass* markWord::safe_klass() const {
+  assert(UseCompactObjectHeaders, "only used with compact object headers");
+  assert(SafepointSynchronize::is_at_safepoint(), "only call at safepoint");
+  markWord m = *this;
+  if (m.has_displaced_mark_helper()) {
+    m = m.displaced_mark_helper();
+  }
+  return CompressedKlassPointers::decode_not_null(m.narrow_klass());
+}
+
+markWord markWord::set_klass(const Klass* klass) const {
+  assert(UseCompactObjectHeaders, "only used with compact object headers");
+  assert(UseCompressedClassPointers, "expect compressed klass pointers");
+  // TODO: Don't cast to non-const, change CKP::encode() to accept const Klass* instead.
+  narrowKlass nklass = CompressedKlassPointers::encode(const_cast<Klass*>(klass));
+  return set_narrow_klass(nklass);
+}
+#endif
+
 #endif // SHARE_OOPS_MARKWORD_INLINE_HPP
diff --git a/src/hotspot/share/oops/objArrayKlass.cpp b/src/hotspot/share/oops/objArrayKlass.cpp
index da75ec35ecb..20f7dd84cb0 100644
--- a/src/hotspot/share/oops/objArrayKlass.cpp
+++ b/src/hotspot/share/oops/objArrayKlass.cpp
@@ -156,7 +156,7 @@ ObjArrayKlass::ObjArrayKlass(int n, Klass* element_klass, Symbol* name) : ArrayK
 }
 
 int ObjArrayKlass::oop_size(oop obj) const {
-  assert(obj->is_objArray(), "must be object array");
+  assert(UseCompactObjectHeaders || obj->is_objArray(), "must be object array");
   return objArrayOop(obj)->object_size();
 }
 
diff --git a/src/hotspot/share/oops/objArrayKlass.inline.hpp b/src/hotspot/share/oops/objArrayKlass.inline.hpp
index 443a3eac09f..f3809ffc6d8 100644
--- a/src/hotspot/share/oops/objArrayKlass.inline.hpp
+++ b/src/hotspot/share/oops/objArrayKlass.inline.hpp
@@ -70,7 +70,7 @@ void ObjArrayKlass::oop_oop_iterate_elements_bounded(
 
 template <typename T, typename OopClosureType>
 void ObjArrayKlass::oop_oop_iterate(oop obj, OopClosureType* closure) {
-  assert (obj->is_array(), "obj must be array");
+  assert (UseCompactObjectHeaders || obj->is_array(), "obj must be array");
   objArrayOop a = objArrayOop(obj);
 
   if (Devirtualizer::do_metadata(closure)) {
diff --git a/src/hotspot/share/oops/objArrayOop.hpp b/src/hotspot/share/oops/objArrayOop.hpp
index 5952c058ef5..56ec973cc2c 100644
--- a/src/hotspot/share/oops/objArrayOop.hpp
+++ b/src/hotspot/share/oops/objArrayOop.hpp
@@ -47,29 +47,9 @@ class objArrayOopDesc : public arrayOopDesc {
   }
 
 private:
-  // Give size of objArrayOop in HeapWords minus the header
-  static int array_size(int length) {
-    const uint OopsPerHeapWord = HeapWordSize/heapOopSize;
-    assert(OopsPerHeapWord >= 1 && (HeapWordSize % heapOopSize == 0),
-           "Else the following (new) computation would be in error");
-    uint res = ((uint)length + OopsPerHeapWord - 1)/OopsPerHeapWord;
-#ifdef ASSERT
-    // The old code is left in for sanity-checking; it'll
-    // go away pretty soon. XXX
-    // Without UseCompressedOops, this is simply:
-    // oop->length() * HeapWordsPerOop;
-    // With narrowOops, HeapWordsPerOop is 1/2 or equal 0 as an integer.
-    // The oop elements are aligned up to wordSize
-    const uint HeapWordsPerOop = heapOopSize/HeapWordSize;
-    uint old_res;
-    if (HeapWordsPerOop > 0) {
-      old_res = length * HeapWordsPerOop;
-    } else {
-      old_res = align_up((uint)length, OopsPerHeapWord)/OopsPerHeapWord;
-    }
-    assert(res == old_res, "Inconsistency between old and new.");
-#endif  // ASSERT
-    return res;
+  // Give size of objArrayOop in bytes minus the header
+  static size_t array_size_in_bytes(int length) {
+    return (size_t)length * heapOopSize;
   }
 
  public:
@@ -89,16 +69,15 @@ class objArrayOopDesc : public arrayOopDesc {
   oop atomic_compare_exchange_oop(int index, oop exchange_value, oop compare_value);
 
   // Sizing
-  static int header_size()    { return arrayOopDesc::header_size(T_OBJECT); }
   int object_size()           { return object_size(length()); }
 
   static int object_size(int length) {
     // This returns the object size in HeapWords.
-    uint asz = array_size(length);
-    uint osz = align_object_size(header_size() + asz);
-    assert(osz >= asz,   "no overflow");
-    assert((int)osz > 0, "no overflow");
-    return (int)osz;
+    size_t asz = array_size_in_bytes(length);
+    size_t size_words = align_up(base_offset_in_bytes() + asz, HeapWordSize) / HeapWordSize;
+    size_t osz = align_object_size(size_words);
+    assert(osz < max_jint, "no overflow");
+    return checked_cast<int>(osz);
   }
 
   Klass* element_klass();
diff --git a/src/hotspot/share/oops/oop.cpp b/src/hotspot/share/oops/oop.cpp
index 57ef04220be..260cdd5989b 100644
--- a/src/hotspot/share/oops/oop.cpp
+++ b/src/hotspot/share/oops/oop.cpp
@@ -105,7 +105,7 @@ bool oopDesc::is_oop(oop obj, bool ignore_mark_word) {
   }
 
   // Header verification: the mark is typically non-zero. If we're
-  // at a safepoint, it must not be zero.
+  // at a safepoint, it must not be zero, except when using the new lightweight locking.
   // Outside of a safepoint, the header could be changing (for example,
   // another thread could be inflating a lock on this object).
   if (ignore_mark_word) {
@@ -114,7 +114,7 @@ bool oopDesc::is_oop(oop obj, bool ignore_mark_word) {
   if (obj->mark().value() != 0) {
     return true;
   }
-  return !SafepointSynchronize::is_at_safepoint();
+  return LockingMode == LM_LIGHTWEIGHT || !SafepointSynchronize::is_at_safepoint();
 }
 
 // used only for asserts and guarantees
@@ -139,26 +139,29 @@ bool oopDesc::is_objArray_noinline()          const { return is_objArray();
 bool oopDesc::is_typeArray_noinline()         const { return is_typeArray();           }
 
 bool oopDesc::has_klass_gap() {
-  // Only has a klass gap when compressed class pointers are used.
-  return UseCompressedClassPointers;
+  // Only has a klass gap when compressed class pointers are used, but
+  // only if not using compact headers..
+  return UseCompressedClassPointers && !UseCompactObjectHeaders;
 }
 
 #if INCLUDE_CDS_JAVA_HEAP
 void oopDesc::set_narrow_klass(narrowKlass nk) {
   assert(DumpSharedSpaces, "Used by CDS only. Do not abuse!");
   assert(UseCompressedClassPointers, "must be");
+  assert(!UseCompactObjectHeaders, "not with compact headers");
   _metadata._compressed_klass = nk;
 }
 #endif
 
 void* oopDesc::load_klass_raw(oop obj) {
-  if (UseCompressedClassPointers) {
-    narrowKlass narrow_klass = obj->_metadata._compressed_klass;
-    if (narrow_klass == 0) return NULL;
-    return (void*)CompressedKlassPointers::decode_raw(narrow_klass);
-  } else {
-    return obj->_metadata._klass;
-  }
+  // TODO: Remove method altogether and replace with calls to obj->klass() ?
+  // OTOH, we may eventually get rid of locking in header, and then no
+  // longer have to deal with that anymore.
+#ifdef _LP64
+  return obj->klass();
+#else
+  return obj->_metadata._klass;
+#endif
 }
 
 void* oopDesc::load_oop_raw(oop obj, int offset) {
diff --git a/src/hotspot/share/oops/oop.hpp b/src/hotspot/share/oops/oop.hpp
index c77b09adf0d..676f66221a8 100644
--- a/src/hotspot/share/oops/oop.hpp
+++ b/src/hotspot/share/oops/oop.hpp
@@ -57,17 +57,24 @@ class oopDesc {
     narrowKlass _compressed_klass;
   } _metadata;
 
- public:
+public:
   inline markWord  mark()          const;
+  inline markWord  mark_acquire()  const;
   inline markWord* mark_addr() const;
 
   inline void set_mark(markWord m);
   static inline void set_mark(HeapWord* mem, markWord m);
 
   inline void release_set_mark(markWord m);
+  static inline void release_set_mark(HeapWord* mem, markWord m);
   inline markWord cas_set_mark(markWord new_mark, markWord old_mark);
   inline markWord cas_set_mark(markWord new_mark, markWord old_mark, atomic_memory_order order);
 
+  inline markWord resolve_mark() const;
+
+  // Returns the prototype mark that should be used for this object.
+  inline markWord prototype_mark() const;
+
   // Used only to re-initialize the mark word (e.g., of promoted
   // objects during a GC) -- requires a valid klass pointer
   inline void init_mark();
@@ -86,7 +93,14 @@ class oopDesc {
   static inline void set_klass_gap(HeapWord* mem, int z);
 
   // size of object header, aligned to platform wordSize
-  static int header_size() { return sizeof(oopDesc)/HeapWordSize; }
+  static int header_size() {
+#ifdef _LP64
+    if (UseCompactObjectHeaders) {
+      return sizeof(markWord) / HeapWordSize;
+    } else
+#endif
+    return sizeof(oopDesc)/HeapWordSize;
+  }
 
   // Returns whether this is an instance of k or an instance of a subclass of k
   inline bool is_a(Klass* k) const;
@@ -98,6 +112,20 @@ class oopDesc {
   // to be able to figure out the size of an object knowing its klass.
   inline int size_given_klass(Klass* klass);
 
+  // The following set of methods is used to access the mark-word and related
+  // properties when the object may be forwarded. Be careful where and when
+  // using this method. It assumes that the forwardee is installed in
+  // the header as a plain pointer (or self-forwarded). In particular,
+  // those methods can not deal with the sliding-forwarding that is used
+  // in Serial, G1 and Shenandoah full-GCs.
+private:
+  inline Klass*   forward_safe_klass_impl(markWord m) const;
+public:
+  inline Klass*   forward_safe_klass() const;
+  inline size_t   forward_safe_size();
+  inline Klass*   forward_safe_klass(markWord m) const;
+  inline void     forward_safe_init_mark();
+
   // type test operations (inlined in oop.inline.hpp)
   inline bool is_instance()            const;
   inline bool is_array()               const;
@@ -248,15 +276,17 @@ class oopDesc {
   void verify_forwardee(oop forwardee) NOT_DEBUG_RETURN;
 
   inline void forward_to(oop p);
-  inline bool cas_forward_to(oop p, markWord compare, atomic_memory_order order = memory_order_conservative);
+  inline void forward_to_self();
 
   // Like "forward_to", but inserts the forwarding pointer atomically.
   // Exactly one thread succeeds in inserting the forwarding pointer, and
   // this call returns "NULL" for that thread; any other thread has the
   // value of the forwarding pointer returned and does not modify "this".
   inline oop forward_to_atomic(oop p, markWord compare, atomic_memory_order order = memory_order_conservative);
+  inline oop forward_to_self_atomic(markWord compare, atomic_memory_order order = memory_order_conservative);
 
   inline oop forwardee() const;
+  inline oop forwardee(markWord header) const;
 
   // Age of object during scavenge
   inline uint age() const;
@@ -302,12 +332,36 @@ class oopDesc {
 
   // for code generation
   static int mark_offset_in_bytes()      { return offset_of(oopDesc, _mark); }
-  static int klass_offset_in_bytes()     { return offset_of(oopDesc, _metadata._klass); }
   static int klass_gap_offset_in_bytes() {
     assert(has_klass_gap(), "only applicable to compressed klass pointers");
+    assert(!UseCompactObjectHeaders, "don't use klass_offset_in_bytes() with compact headers");
     return klass_offset_in_bytes() + sizeof(narrowKlass);
   }
 
+  static int klass_offset_in_bytes()     {
+#ifdef _LP64
+    if (UseCompactObjectHeaders) {
+      STATIC_ASSERT(markWord::klass_shift % 8 == 0);
+      return mark_offset_in_bytes() + markWord::klass_shift / 8;
+    } else
+#endif
+    return offset_of(oopDesc, _metadata._klass);
+  }
+
+  static int base_offset_in_bytes() {
+#ifdef _LP64
+    if (UseCompactObjectHeaders) {
+      // With compact headers, the Klass* field is not used for the Klass*
+      // and is used for the object fields instead.
+      assert(sizeof(markWord) == 8, "sanity");
+      return sizeof(markWord);
+    } else if (UseCompressedClassPointers) {
+      return sizeof(markWord) + sizeof(narrowKlass);
+    } else
+#endif
+    return sizeof(oopDesc);
+  }
+
   // for error reporting
   static void* load_klass_raw(oop obj);
   static void* load_oop_raw(oop obj, int offset);
diff --git a/src/hotspot/share/oops/oop.inline.hpp b/src/hotspot/share/oops/oop.inline.hpp
index 0b706f1aa3b..f72096efc2c 100644
--- a/src/hotspot/share/oops/oop.inline.hpp
+++ b/src/hotspot/share/oops/oop.inline.hpp
@@ -36,6 +36,8 @@
 #include "oops/oopsHierarchy.hpp"
 #include "runtime/atomic.hpp"
 #include "runtime/globals.hpp"
+#include "runtime/safepoint.hpp"
+#include "runtime/objectMonitor.inline.hpp"
 #include "utilities/align.hpp"
 #include "utilities/debug.hpp"
 #include "utilities/macros.hpp"
@@ -49,6 +51,9 @@ markWord oopDesc::mark() const {
   return markWord(v);
 }
 
+markWord oopDesc::mark_acquire() const {
+  return Atomic::load_acquire(&_mark);
+}
 markWord* oopDesc::mark_addr() const {
   return (markWord*) &_mark;
 }
@@ -65,6 +70,10 @@ void oopDesc::release_set_mark(markWord m) {
   HeapAccess<MO_RELEASE>::store_at(as_oop(), mark_offset_in_bytes(), m.value());
 }
 
+void oopDesc::release_set_mark(HeapWord* mem, markWord m) {
+  Atomic::release_store((markWord*)(((char*)mem) + mark_offset_in_bytes()), m);
+}
+
 markWord oopDesc::cas_set_mark(markWord new_mark, markWord old_mark) {
   uintptr_t v = HeapAccess<>::atomic_cmpxchg_at(as_oop(), mark_offset_in_bytes(), old_mark.value(), new_mark.value());
   return markWord(v);
@@ -74,37 +83,67 @@ markWord oopDesc::cas_set_mark(markWord new_mark, markWord old_mark, atomic_memo
   return Atomic::cmpxchg(&_mark, old_mark, new_mark, order);
 }
 
+markWord oopDesc::resolve_mark() const {
+  assert(LockingMode != LM_LEGACY, "Not safe with legacy stack-locking");
+  markWord hdr = mark();
+  if (hdr.has_displaced_mark_helper()) {
+    hdr = hdr.displaced_mark_helper();
+  }
+  return hdr;
+}
+
+markWord oopDesc::prototype_mark() const {
+  if (UseCompactObjectHeaders) {
+    return klass()->prototype_header();
+  } else {
+    return markWord::prototype();
+  }
+}
+
 void oopDesc::init_mark() {
   set_mark(markWord::prototype_for_klass(klass()));
 }
 
 Klass* oopDesc::klass() const {
-  if (UseCompressedClassPointers) {
+#ifdef _LP64
+  if (UseCompactObjectHeaders) {
+    assert(UseCompressedClassPointers, "only with compressed class pointers");
+    return mark().klass();
+  } else if (UseCompressedClassPointers) {
     return CompressedKlassPointers::decode_not_null(_metadata._compressed_klass);
-  } else {
-    return _metadata._klass;
-  }
+  } else
+#endif
+  return _metadata._klass;
 }
 
 Klass* oopDesc::klass_or_null() const {
-  if (UseCompressedClassPointers) {
+#ifdef _LP64
+  if (UseCompactObjectHeaders) {
+    assert(UseCompressedClassPointers, "only with compressed class pointers");
+    return mark().klass_or_null();
+  } else if (UseCompressedClassPointers) {
     return CompressedKlassPointers::decode(_metadata._compressed_klass);
-  } else {
-    return _metadata._klass;
-  }
+  } else
+#endif
+  return _metadata._klass;
 }
 
 Klass* oopDesc::klass_or_null_acquire() const {
-  if (UseCompressedClassPointers) {
-    narrowKlass nklass = Atomic::load_acquire(&_metadata._compressed_klass);
-    return CompressedKlassPointers::decode(nklass);
-  } else {
-    return Atomic::load_acquire(&_metadata._klass);
-  }
+#ifdef _LP64
+  if (UseCompactObjectHeaders) {
+    assert(UseCompressedClassPointers, "only with compressed class pointers");
+    return mark_acquire().klass_or_null();
+  } else if (UseCompressedClassPointers) {
+     narrowKlass nklass = Atomic::load_acquire(&_metadata._compressed_klass);
+     return CompressedKlassPointers::decode(nklass);
+  } else
+#endif
+  return Atomic::load_acquire(&_metadata._klass);
 }
 
 void oopDesc::set_klass(Klass* k) {
   assert(Universe::is_bootstrapping() || (k != NULL && k->is_klass()), "incorrect Klass");
+  assert(!UseCompactObjectHeaders, "don't set Klass* with compact headers");
   if (UseCompressedClassPointers) {
     _metadata._compressed_klass = CompressedKlassPointers::encode_not_null(k);
   } else {
@@ -114,6 +153,7 @@ void oopDesc::set_klass(Klass* k) {
 
 void oopDesc::release_set_klass(HeapWord* mem, Klass* k) {
   assert(Universe::is_bootstrapping() || (k != NULL && k->is_klass()), "incorrect Klass");
+  assert(!UseCompactObjectHeaders, "don't set Klass* with compact headers");
   char* raw_mem = ((char*)mem + klass_offset_in_bytes());
   if (UseCompressedClassPointers) {
     Atomic::release_store((narrowKlass*)raw_mem,
@@ -124,16 +164,19 @@ void oopDesc::release_set_klass(HeapWord* mem, Klass* k) {
 }
 
 int oopDesc::klass_gap() const {
+  assert(!UseCompactObjectHeaders, "don't get Klass* gap with compact headers");
   return *(int*)(((intptr_t)this) + klass_gap_offset_in_bytes());
 }
 
 void oopDesc::set_klass_gap(HeapWord* mem, int v) {
+  assert(!UseCompactObjectHeaders, "don't set Klass* gap with compact headers");
   if (UseCompressedClassPointers) {
     *(int*)(((char*)mem) + klass_gap_offset_in_bytes()) = v;
   }
 }
 
 void oopDesc::set_klass_gap(int v) {
+  assert(!UseCompactObjectHeaders, "don't set Klass* gap with compact headers");
   set_klass_gap((HeapWord*)this, v);
 }
 
@@ -201,6 +244,53 @@ int oopDesc::size_given_klass(Klass* klass)  {
   return s;
 }
 
+#ifdef _LP64
+Klass* oopDesc::forward_safe_klass_impl(markWord m) const {
+  assert(UseCompactObjectHeaders, "Only get here with compact headers");
+  if (m.is_marked()) {
+    oop fwd = forwardee(m);
+    markWord m2 = fwd->mark();
+    assert(!m2.is_marked() || m2.self_forwarded(), "no double forwarding: this: " PTR_FORMAT " (" INTPTR_FORMAT "), fwd: " PTR_FORMAT " (" INTPTR_FORMAT ")", p2i(this), m.value(), p2i(fwd), m2.value());
+    m = m2;
+  }
+  return m.actual_mark().klass();
+}
+#endif
+
+Klass* oopDesc::forward_safe_klass(markWord m) const {
+#ifdef _LP64
+  if (UseCompactObjectHeaders) {
+    return forward_safe_klass_impl(m);
+  } else
+#endif
+  {
+    return klass();
+  }
+}
+
+Klass* oopDesc::forward_safe_klass() const {
+#ifdef _LP64
+  if (UseCompactObjectHeaders) {
+    return forward_safe_klass_impl(mark());
+  } else
+#endif
+  {
+    return klass();
+  }
+}
+
+size_t oopDesc::forward_safe_size() {
+  return size_given_klass(forward_safe_klass());
+}
+
+void oopDesc::forward_safe_init_mark() {
+  if (UseCompactObjectHeaders) {
+    set_mark(forward_safe_klass()->prototype_header());
+  } else {
+    init_mark();
+  }
+}
+
 bool oopDesc::is_instance()  const { return klass()->is_instance_klass();  }
 bool oopDesc::is_array()     const { return klass()->is_array_klass();     }
 bool oopDesc::is_objArray()  const { return klass()->is_objArray_klass();  }
@@ -272,29 +362,81 @@ bool oopDesc::is_forwarded() const {
 
 // Used by scavengers
 void oopDesc::forward_to(oop p) {
+  assert(p != cast_to_oop(this) || !UseAltGCForwarding, "Must not be called with self-forwarding");
   verify_forwardee(p);
   markWord m = markWord::encode_pointer_as_mark(p);
-  assert(m.decode_pointer() == p, "encoding must be reversable");
+  assert(forwardee(m) == p, "encoding must be reversable");
   set_mark(m);
 }
 
-// Used by parallel scavengers
-bool oopDesc::cas_forward_to(oop p, markWord compare, atomic_memory_order order) {
-  verify_forwardee(p);
-  markWord m = markWord::encode_pointer_as_mark(p);
-  assert(m.decode_pointer() == p, "encoding must be reversable");
-  return cas_set_mark(m, compare, order) == compare;
+void oopDesc::forward_to_self() {
+#ifdef _LP64
+  if (UseAltGCForwarding) {
+    markWord m = mark();
+    // If mark is displaced, we need to preserve the real header during GC.
+    // It will be restored to the displaced header after GC.
+    assert(SafepointSynchronize::is_at_safepoint(), "we can only safely fetch the displaced header at safepoint");
+    if (m.has_displaced_mark_helper()) {
+      m = m.displaced_mark_helper();
+    }
+    m = m.set_self_forwarded();
+    assert(forwardee(m) == cast_to_oop(this), "encoding must be reversible");
+    set_mark(m);
+  } else
+#endif
+  {
+    forward_to(oop(this));
+  }
 }
 
 oop oopDesc::forward_to_atomic(oop p, markWord compare, atomic_memory_order order) {
+  assert(p != cast_to_oop(this) || !UseAltGCForwarding, "Must not be called with self-forwarding");
   verify_forwardee(p);
   markWord m = markWord::encode_pointer_as_mark(p);
-  assert(m.decode_pointer() == p, "encoding must be reversable");
+  assert(forwardee(m) == p, "encoding must be reversable");
   markWord old_mark = cas_set_mark(m, compare, order);
   if (old_mark == compare) {
     return NULL;
   } else {
-    return cast_to_oop(old_mark.decode_pointer());
+    return forwardee(old_mark);
+  }
+}
+
+oop oopDesc::forward_to_self_atomic(markWord compare, atomic_memory_order order) {
+#ifdef _LP64
+  if (UseAltGCForwarding) {
+   markWord m = compare;
+    // If mark is displaced, we need to preserve the real header during GC.
+    // It will be restored to the displaced header after GC.
+    assert(SafepointSynchronize::is_at_safepoint(), "we can only safely fetch the displaced header at safepoint");
+    if (m.has_displaced_mark_helper()) {
+      m = m.displaced_mark_helper();
+    }
+    m = m.set_self_forwarded();
+    assert(forwardee(m) == cast_to_oop(this), "encoding must be reversible");
+    markWord old_mark = cas_set_mark(m, compare, order);
+    if (old_mark == compare) {
+      return nullptr;
+    } else {
+      assert(old_mark.is_marked(), "must be marked here");
+      return forwardee(old_mark);
+    }
+  } else
+#endif
+  {
+    return forward_to_atomic(cast_to_oop(this), compare, order);
+  }
+}
+
+oop oopDesc::forwardee(markWord header) const {
+  assert(header.is_marked(), "only decode when actually forwarded");
+#ifdef _LP64
+  if (header.self_forwarded()) {
+    return cast_to_oop(this);
+  } else
+#endif
+  {
+    return cast_to_oop(header.decode_pointer());
   }
 }
 
@@ -302,7 +444,7 @@ oop oopDesc::forward_to_atomic(oop p, markWord compare, atomic_memory_order orde
 // The forwardee is used when copying during scavenge and mark-sweep.
 // It does need to clear the low two locking- and GC-related bits.
 oop oopDesc::forwardee() const {
-  return cast_to_oop(mark().decode_pointer());
+  return forwardee(mark());
 }
 
 // The following method needs to be MT safe.
@@ -357,7 +499,7 @@ void oopDesc::oop_iterate_backwards(OopClosureType* cl) {
 
 template <typename OopClosureType>
 void oopDesc::oop_iterate_backwards(OopClosureType* cl, Klass* k) {
-  assert(k == klass(), "wrong klass");
+  assert(UseCompactObjectHeaders || k == klass(), "wrong klass");
   OopIteratorClosureDispatch::oop_oop_iterate_backwards(cl, this, k);
 }
 
diff --git a/src/hotspot/share/oops/typeArrayKlass.cpp b/src/hotspot/share/oops/typeArrayKlass.cpp
index 57bec046822..26e742e57f1 100644
--- a/src/hotspot/share/oops/typeArrayKlass.cpp
+++ b/src/hotspot/share/oops/typeArrayKlass.cpp
@@ -228,9 +228,9 @@ Klass* TypeArrayKlass::array_klass_or_null() {
 }
 
 int TypeArrayKlass::oop_size(oop obj) const {
-  assert(obj->is_typeArray(),"must be a type array");
+  assert(UseCompactObjectHeaders || obj->is_typeArray(),"must be a type array");
   typeArrayOop t = typeArrayOop(obj);
-  return t->object_size();
+  return t->object_size(this);
 }
 
 void TypeArrayKlass::initialize(TRAPS) {
diff --git a/src/hotspot/share/oops/typeArrayKlass.inline.hpp b/src/hotspot/share/oops/typeArrayKlass.inline.hpp
index 098f9e73993..4d32c343f12 100644
--- a/src/hotspot/share/oops/typeArrayKlass.inline.hpp
+++ b/src/hotspot/share/oops/typeArrayKlass.inline.hpp
@@ -35,7 +35,7 @@
 class OopIterateClosure;
 
 inline void TypeArrayKlass::oop_oop_iterate_impl(oop obj, OopIterateClosure* closure) {
-  assert(obj->is_typeArray(),"must be a type array");
+  assert(UseCompactObjectHeaders || obj->is_typeArray(),"must be a type array");
   // Performance tweak: We skip processing the klass pointer since all
   // TypeArrayKlasses are guaranteed processed via the null class loader.
 }
diff --git a/src/hotspot/share/oops/typeArrayOop.hpp b/src/hotspot/share/oops/typeArrayOop.hpp
index dd6e718703c..2af7be778a4 100644
--- a/src/hotspot/share/oops/typeArrayOop.hpp
+++ b/src/hotspot/share/oops/typeArrayOop.hpp
@@ -131,7 +131,7 @@ class typeArrayOopDesc : public arrayOopDesc {
   }
 
  public:
-  inline int object_size();
+  inline int object_size(const TypeArrayKlass* tk) const;
 };
 
 #endif // SHARE_OOPS_TYPEARRAYOOP_HPP
diff --git a/src/hotspot/share/oops/typeArrayOop.inline.hpp b/src/hotspot/share/oops/typeArrayOop.inline.hpp
index 9d2e7ea0fd2..cf4cfc6995e 100644
--- a/src/hotspot/share/oops/typeArrayOop.inline.hpp
+++ b/src/hotspot/share/oops/typeArrayOop.inline.hpp
@@ -31,8 +31,7 @@
 #include "oops/oop.inline.hpp"
 #include "oops/arrayOop.hpp"
 
-int typeArrayOopDesc::object_size() {
-  TypeArrayKlass* tk = TypeArrayKlass::cast(klass());
+int typeArrayOopDesc::object_size(const TypeArrayKlass* tk) const {
   return object_size(tk->layout_helper(), length());
 }
 
diff --git a/src/hotspot/share/oops/weakHandle.hpp b/src/hotspot/share/oops/weakHandle.hpp
index c2b589958d2..38dc0f4d613 100644
--- a/src/hotspot/share/oops/weakHandle.hpp
+++ b/src/hotspot/share/oops/weakHandle.hpp
@@ -50,8 +50,8 @@ class WeakHandle {
   WeakHandle(OopStorage* storage, Handle obj);
   WeakHandle(OopStorage* storage, oop obj);
 
-  inline oop resolve() const;
-  inline oop peek() const;
+  oop resolve() const;
+  oop peek() const;
   void release(OopStorage* storage) const;
   bool is_null() const { return _obj == NULL; }
 
diff --git a/src/hotspot/cpu/x86/c2_safepointPollStubTable_x86.cpp b/src/hotspot/share/opto/c2_CodeStubs.cpp
similarity index 50%
rename from src/hotspot/cpu/x86/c2_safepointPollStubTable_x86.cpp
rename to src/hotspot/share/opto/c2_CodeStubs.cpp
index c3d4850a5db..84b59e686d6 100644
--- a/src/hotspot/cpu/x86/c2_safepointPollStubTable_x86.cpp
+++ b/src/hotspot/share/opto/c2_CodeStubs.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -23,38 +23,33 @@
  */
 
 #include "precompiled.hpp"
-#include "asm/macroAssembler.hpp"
+#include "asm/codeBuffer.hpp"
+#include "code/codeBlob.hpp"
+#include "opto/c2_CodeStubs.hpp"
+#include "opto/c2_MacroAssembler.hpp"
 #include "opto/compile.hpp"
-#include "opto/node.hpp"
 #include "opto/output.hpp"
-#include "runtime/sharedRuntime.hpp"
 
-#define __ masm.
-void C2SafepointPollStubTable::emit_stub_impl(MacroAssembler& masm, C2SafepointPollStub* entry) const {
-  assert(SharedRuntime::polling_page_return_handler_blob() != NULL,
-         "polling page return stub not created yet");
-  address stub = SharedRuntime::polling_page_return_handler_blob()->entry_point();
+C2CodeStubList::C2CodeStubList() :
+    _stubs(Compile::current()->comp_arena(), 2, 0, NULL) {}
 
-  RuntimeAddress callback_addr(stub);
+void C2CodeStubList::emit(CodeBuffer& cb) {
+  C2_MacroAssembler masm(&cb);
+  for (int i = _stubs.length() - 1; i >= 0; i--) {
+    C2CodeStub* stub = _stubs.at(i);
+    int max_size = stub->max_size();
+    // Make sure there is enough space in the code buffer
+    if (cb.insts()->maybe_expand_to_ensure_remaining(max_size) && cb.blob() == NULL) {
+      ciEnv::current()->record_failure("CodeCache is full");
+      return;
+    }
 
-  __ bind(entry->_stub_label);
-  InternalAddress safepoint_pc(masm.pc() - masm.offset() + entry->_safepoint_offset);
-#ifdef _LP64
-  __ lea(rscratch1, safepoint_pc);
-  __ movptr(Address(r15_thread, JavaThread::saved_exception_pc_offset()), rscratch1);
-#else
-  const Register tmp1 = rcx;
-  const Register tmp2 = rdx;
-  __ push(tmp1);
-  __ push(tmp2);
+    DEBUG_ONLY(int size_before = cb.insts_size();)
 
-  __ lea(tmp1, safepoint_pc);
-  __ get_thread(tmp2);
-  __ movptr(Address(tmp2, JavaThread::saved_exception_pc_offset()), tmp1);
+    stub->emit(masm);
 
-  __ pop(tmp2);
-  __ pop(tmp1);
-#endif
-  __ jump(callback_addr);
+    DEBUG_ONLY(int actual_size = cb.insts_size() - size_before;)
+    assert(max_size >= actual_size, "Expected stub size (%d) must be larger than or equal to actual stub size (%d)", max_size, actual_size);
+  }
+  _stubs.clear();
 }
-#undef __
diff --git a/src/hotspot/share/opto/c2_CodeStubs.hpp b/src/hotspot/share/opto/c2_CodeStubs.hpp
new file mode 100644
index 00000000000..33701499276
--- /dev/null
+++ b/src/hotspot/share/opto/c2_CodeStubs.hpp
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2022, 2024, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "asm/assembler.hpp"
+#include "asm/codeBuffer.hpp"
+#include "memory/allocation.hpp"
+#include "opto/c2_MacroAssembler.hpp"
+#include "utilities/growableArray.hpp"
+
+#ifndef SHARE_OPTO_C2_CODESTUBS_HPP
+#define SHARE_OPTO_C2_CODESTUBS_HPP
+
+class C2CodeStub : public ResourceObj {
+private:
+  Label _entry;
+  Label _continuation;
+
+protected:
+  C2CodeStub() :
+    _entry(),
+    _continuation() {}
+  ~C2CodeStub() {}
+
+public:
+  Label& entry()        { return _entry; }
+  Label& continuation() { return _continuation; }
+
+  virtual void emit(C2_MacroAssembler& masm) = 0;
+  virtual int max_size() const = 0;
+};
+
+class C2CodeStubList {
+private:
+  GrowableArray<C2CodeStub*> _stubs;
+
+public:
+  C2CodeStubList();
+  ~C2CodeStubList() {}
+  void add_stub(C2CodeStub* stub) { _stubs.append(stub); }
+  void emit(CodeBuffer& cb);
+};
+
+class C2SafepointPollStub : public C2CodeStub {
+private:
+  uintptr_t _safepoint_offset;
+public:
+  C2SafepointPollStub(uintptr_t safepoint_offset) :
+    _safepoint_offset(safepoint_offset) {}
+  int max_size() const;
+  void emit(C2_MacroAssembler& masm);
+};
+
+class C2CheckLockStackStub : public C2CodeStub {
+public:
+  C2CheckLockStackStub() : C2CodeStub() {}
+  int max_size() const;
+  void emit(C2_MacroAssembler& masm);
+};
+
+class C2FastUnlockLightweightStub : public C2CodeStub {
+private:
+  Register _obj;
+  Register _mark;
+  Register _t;
+  Register _thread;
+  Label _slow_path;
+  Label _push_and_slow_path;
+  Label _check_successor;
+  Label _unlocked_continuation;
+public:
+  C2FastUnlockLightweightStub(Register obj, Register mark, Register t, Register thread) : C2CodeStub(),
+    _obj(obj), _mark(mark), _t(t), _thread(thread) {}
+  int max_size() const;
+  void emit(C2_MacroAssembler& masm);
+  Label& slow_path() { return _slow_path; }
+  Label& push_and_slow_path() { return _push_and_slow_path; }
+  Label& check_successor() { return _check_successor; }
+  Label& unlocked_continuation() { return _unlocked_continuation; }
+  Label& slow_path_continuation() { return continuation(); }
+};
+
+#ifdef _LP64
+class C2HandleAnonOMOwnerStub : public C2CodeStub {
+private:
+  Register _monitor;
+  Register _tmp;
+public:
+  C2HandleAnonOMOwnerStub(Register monitor, Register tmp = noreg) : C2CodeStub(),
+    _monitor(monitor), _tmp(tmp) {}
+  Register monitor() { return _monitor; }
+  Register tmp() { return _tmp; }
+  int max_size() const;
+  void emit(C2_MacroAssembler& masm);
+};
+#endif
+
+#endif // SHARE_OPTO_C2_CODESTUBS_HPP
diff --git a/src/hotspot/share/opto/c2_MacroAssembler.hpp b/src/hotspot/share/opto/c2_MacroAssembler.hpp
index b6ba7429b16..f9422a551f4 100644
--- a/src/hotspot/share/opto/c2_MacroAssembler.hpp
+++ b/src/hotspot/share/opto/c2_MacroAssembler.hpp
@@ -27,6 +27,7 @@
 
 #include "asm/macroAssembler.hpp"
 #include "asm/macroAssembler.inline.hpp"
+#include "utilities/globalDefinitions.hpp"
 #include "utilities/macros.hpp"
 
 class C2_MacroAssembler: public MacroAssembler {
diff --git a/src/hotspot/share/opto/callnode.cpp b/src/hotspot/share/opto/callnode.cpp
index a6d50b44770..cd1268320d7 100644
--- a/src/hotspot/share/opto/callnode.cpp
+++ b/src/hotspot/share/opto/callnode.cpp
@@ -1676,12 +1676,12 @@ void AllocateNode::compute_MemBar_redundancy(ciMethod* initializer)
 }
 Node *AllocateNode::make_ideal_mark(PhaseGVN *phase, Node* obj, Node* control, Node* mem) {
   Node* mark_node = NULL;
-  // For now only enable fast locking for non-array types
-  if (UseBiasedLocking && Opcode() == Op_Allocate) {
+  if ((UseBiasedLocking && Opcode() == Op_Allocate) || UseCompactObjectHeaders) {
     Node* klass_node = in(AllocateNode::KlassNode);
     Node* proto_adr = phase->transform(new AddPNode(klass_node, klass_node, phase->MakeConX(in_bytes(Klass::prototype_header_offset()))));
     mark_node = LoadNode::make(*phase, control, mem, proto_adr, TypeRawPtr::BOTTOM, TypeX_X, TypeX_X->basic_type(), MemNode::unordered);
   } else {
+    // For now only enable fast locking for non-array types
     mark_node = phase->MakeConX(markWord::prototype().value());
   }
   return mark_node;
diff --git a/src/hotspot/share/opto/compile.cpp b/src/hotspot/share/opto/compile.cpp
index 550f6c6d70a..70fd1d287c6 100644
--- a/src/hotspot/share/opto/compile.cpp
+++ b/src/hotspot/share/opto/compile.cpp
@@ -1649,6 +1649,10 @@ Compile::AliasType* Compile::find_alias_type(const TypePtr* adr_type, bool no_cr
       }
     }
     if (flat->isa_klassptr()) {
+      if (UseCompactObjectHeaders) {
+        if (flat->offset() == in_bytes(Klass::prototype_header_offset()))
+          alias_type(idx)->set_rewritable(false);
+      }
       if (flat->offset() == in_bytes(Klass::super_check_offset_offset()))
         alias_type(idx)->set_rewritable(false);
       if (flat->offset() == in_bytes(Klass::modifier_flags_offset()))
diff --git a/src/hotspot/share/opto/library_call.cpp b/src/hotspot/share/opto/library_call.cpp
index c3f23da4044..b68bc83ed62 100644
--- a/src/hotspot/share/opto/library_call.cpp
+++ b/src/hotspot/share/opto/library_call.cpp
@@ -4243,21 +4243,31 @@ bool LibraryCallKit::inline_native_hashcode(bool is_virtual, bool is_static) {
   Node* no_ctrl = NULL;
   Node* header = make_load(no_ctrl, header_addr, TypeX_X, TypeX_X->basic_type(), MemNode::unordered);
 
-  // Test the header to see if it is unlocked.
-  Node *lock_mask      = _gvn.MakeConX(markWord::biased_lock_mask_in_place);
-  Node *lmasked_header = _gvn.transform(new AndXNode(header, lock_mask));
-  Node *unlocked_val   = _gvn.MakeConX(markWord::unlocked_value);
-  Node *chk_unlocked   = _gvn.transform(new CmpXNode( lmasked_header, unlocked_val));
-  Node *test_unlocked  = _gvn.transform(new BoolNode( chk_unlocked, BoolTest::ne));
+  if (!UseObjectMonitorTable) {
+    // Test the header to see if it is safe to read w.r.t. locking.
+    Node *lock_mask      = _gvn.MakeConX(markWord::lock_mask_in_place);
+    Node *lmasked_header = _gvn.transform(new AndXNode(header, lock_mask));
+    if (LockingMode == LM_LIGHTWEIGHT) {
+      Node *monitor_val   = _gvn.MakeConX(markWord::monitor_value);
+      Node *chk_monitor   = _gvn.transform(new CmpXNode(lmasked_header, monitor_val));
+      Node *test_monitor  = _gvn.transform(new BoolNode(chk_monitor, BoolTest::eq));
+
+      generate_slow_guard(test_monitor, slow_region);
+    } else {
+      Node *unlocked_val      = _gvn.MakeConX(markWord::unlocked_value);
+      Node *chk_unlocked      = _gvn.transform(new CmpXNode(lmasked_header, unlocked_val));
+      Node *test_not_unlocked = _gvn.transform(new BoolNode(chk_unlocked, BoolTest::ne));
 
-  generate_slow_guard(test_unlocked, slow_region);
+      generate_slow_guard(test_not_unlocked, slow_region);
+    }
+  }
 
   // Get the hash value and check to see that it has been properly assigned.
   // We depend on hash_mask being at most 32 bits and avoid the use of
   // hash_mask_in_place because it could be larger than 32 bits in a 64-bit
   // vm: see markWord.hpp.
-  Node *hash_mask      = _gvn.intcon(markWord::hash_mask);
-  Node *hash_shift     = _gvn.intcon(markWord::hash_shift);
+  Node *hash_mask      = _gvn.intcon(UseCompactObjectHeaders ? markWord::hash_mask_compact  : markWord::hash_mask);
+  Node *hash_shift     = _gvn.intcon(UseCompactObjectHeaders ? markWord::hash_shift_compact : markWord::hash_shift);
   Node *hshifted_header= _gvn.transform(new URShiftXNode(header, hash_shift));
   // This hack lets the hash bits live anywhere in the mark object now, as long
   // as the shift drops the relevant bits into the low 32 bits.  Note that
@@ -6985,31 +6995,31 @@ bool LibraryCallKit::inline_digestBase_implCompress(vmIntrinsics::ID id) {
   switch(id) {
   case vmIntrinsics::_md5_implCompress:
     assert(UseMD5Intrinsics, "need MD5 instruction support");
-    state = get_state_from_digest_object(digestBase_obj, "[I");
+    state = get_state_from_digest_object(digestBase_obj, T_INT);
     stubAddr = StubRoutines::md5_implCompress();
     stubName = "md5_implCompress";
     break;
   case vmIntrinsics::_sha_implCompress:
     assert(UseSHA1Intrinsics, "need SHA1 instruction support");
-    state = get_state_from_digest_object(digestBase_obj, "[I");
+    state = get_state_from_digest_object(digestBase_obj, T_INT);
     stubAddr = StubRoutines::sha1_implCompress();
     stubName = "sha1_implCompress";
     break;
   case vmIntrinsics::_sha2_implCompress:
     assert(UseSHA256Intrinsics, "need SHA256 instruction support");
-    state = get_state_from_digest_object(digestBase_obj, "[I");
+    state = get_state_from_digest_object(digestBase_obj, T_INT);
     stubAddr = StubRoutines::sha256_implCompress();
     stubName = "sha256_implCompress";
     break;
   case vmIntrinsics::_sha5_implCompress:
     assert(UseSHA512Intrinsics, "need SHA512 instruction support");
-    state = get_state_from_digest_object(digestBase_obj, "[J");
+    state = get_state_from_digest_object(digestBase_obj, T_LONG);
     stubAddr = StubRoutines::sha512_implCompress();
     stubName = "sha512_implCompress";
     break;
   case vmIntrinsics::_sha3_implCompress:
     assert(UseSHA3Intrinsics, "need SHA3 instruction support");
-    state = get_state_from_digest_object(digestBase_obj, "[B");
+    state = get_state_from_digest_object(digestBase_obj, T_BYTE);
     stubAddr = StubRoutines::sha3_implCompress();
     stubName = "sha3_implCompress";
     digest_length = get_digest_length_from_digest_object(digestBase_obj);
@@ -7073,7 +7083,7 @@ bool LibraryCallKit::inline_digestBase_implCompressMB(int predicate) {
   const char* klass_digestBase_name = NULL;
   const char* stub_name = NULL;
   address     stub_addr = NULL;
-  const char* state_type = "[I";
+  BasicType elem_type = T_INT;
 
   switch (predicate) {
   case 0:
@@ -7102,7 +7112,7 @@ bool LibraryCallKit::inline_digestBase_implCompressMB(int predicate) {
       klass_digestBase_name = "sun/security/provider/SHA5";
       stub_name = "sha512_implCompressMB";
       stub_addr = StubRoutines::sha512_implCompressMB();
-      state_type = "[J";
+      elem_type = T_LONG;
     }
     break;
   case 4:
@@ -7110,7 +7120,7 @@ bool LibraryCallKit::inline_digestBase_implCompressMB(int predicate) {
       klass_digestBase_name = "sun/security/provider/SHA3";
       stub_name = "sha3_implCompressMB";
       stub_addr = StubRoutines::sha3_implCompressMB();
-      state_type = "[B";
+      elem_type = T_BYTE;
     }
     break;
   default:
@@ -7128,21 +7138,21 @@ bool LibraryCallKit::inline_digestBase_implCompressMB(int predicate) {
     ciKlass* klass_digestBase = tinst->klass()->as_instance_klass()->find_klass(ciSymbol::make(klass_digestBase_name));
     assert(klass_digestBase->is_loaded(), "predicate checks that this class is loaded");
     ciInstanceKlass* instklass_digestBase = klass_digestBase->as_instance_klass();
-    return inline_digestBase_implCompressMB(digestBase_obj, instklass_digestBase, state_type, stub_addr, stub_name, src_start, ofs, limit);
+    return inline_digestBase_implCompressMB(digestBase_obj, instklass_digestBase, elem_type, stub_addr, stub_name, src_start, ofs, limit);
   }
   return false;
 }
 
 //------------------------------inline_digestBase_implCompressMB-----------------------
 bool LibraryCallKit::inline_digestBase_implCompressMB(Node* digestBase_obj, ciInstanceKlass* instklass_digestBase,
-                                                      const char* state_type, address stubAddr, const char *stubName,
+                                                      BasicType elem_type, address stubAddr, const char *stubName,
                                                       Node* src_start, Node* ofs, Node* limit) {
   const TypeKlassPtr* aklass = TypeKlassPtr::make(instklass_digestBase);
   const TypeOopPtr* xtype = aklass->as_instance_type();
   Node* digest_obj = new CheckCastPPNode(control(), digestBase_obj, xtype);
   digest_obj = _gvn.transform(digest_obj);
 
-  Node* state = get_state_from_digest_object(digest_obj, state_type);
+  Node* state = get_state_from_digest_object(digest_obj, elem_type);
   if (state == NULL) return false;
 
   Node* digest_length = NULL;
@@ -7173,13 +7183,20 @@ bool LibraryCallKit::inline_digestBase_implCompressMB(Node* digestBase_obj, ciIn
 }
 
 //------------------------------get_state_from_digest_object-----------------------
-Node * LibraryCallKit::get_state_from_digest_object(Node *digest_object, const char *state_type) {
+Node * LibraryCallKit::get_state_from_digest_object(Node *digest_object, BasicType elem_type) {
+  const char* state_type;
+  switch (elem_type) {
+    case T_BYTE: state_type = "[B"; break;
+    case T_INT:  state_type = "[I"; break;
+    case T_LONG: state_type = "[J"; break;
+    default: ShouldNotReachHere();
+  }
   Node* digest_state = load_field_from_object(digest_object, "state", state_type);
   assert (digest_state != NULL, "wrong version of sun.security.provider.MD5/SHA/SHA2/SHA5/SHA3");
   if (digest_state == NULL) return (Node *) NULL;
 
   // now have the array, need to get the start address of the state array
-  Node* state = array_element_address(digest_state, intcon(0), T_INT);
+  Node* state = array_element_address(digest_state, intcon(0), elem_type);
   return state;
 }
 
diff --git a/src/hotspot/share/opto/library_call.hpp b/src/hotspot/share/opto/library_call.hpp
index 3c317be2b6d..5eedf6540c0 100644
--- a/src/hotspot/share/opto/library_call.hpp
+++ b/src/hotspot/share/opto/library_call.hpp
@@ -290,9 +290,9 @@ class LibraryCallKit : public GraphKit {
   bool inline_digestBase_implCompress(vmIntrinsics::ID id);
   bool inline_digestBase_implCompressMB(int predicate);
   bool inline_digestBase_implCompressMB(Node* digestBaseObj, ciInstanceKlass* instklass,
-                                        const char* state_type, address stubAddr, const char *stubName,
+                                        BasicType elem_type, address stubAddr, const char *stubName,
                                         Node* src_start, Node* ofs, Node* limit);
-  Node* get_state_from_digest_object(Node *digestBase_object, const char* state_type);
+  Node* get_state_from_digest_object(Node *digestBase_object, BasicType elem_type);
   Node* get_digest_length_from_digest_object(Node *digestBase_object);
   Node* inline_digestBase_implCompressMB_predicate(int predicate);
   bool inline_encodeISOArray(bool ascii);
diff --git a/src/hotspot/share/opto/macro.cpp b/src/hotspot/share/opto/macro.cpp
index 829cc4c8aed..098b793e827 100644
--- a/src/hotspot/share/opto/macro.cpp
+++ b/src/hotspot/share/opto/macro.cpp
@@ -1677,7 +1677,10 @@ PhaseMacroExpand::initialize_object(AllocateNode* alloc,
   }
   rawmem = make_store(control, rawmem, object, oopDesc::mark_offset_in_bytes(), mark_node, TypeX_X->basic_type());
 
-  rawmem = make_store(control, rawmem, object, oopDesc::klass_offset_in_bytes(), klass_node, T_METADATA);
+  if (!UseCompactObjectHeaders) {
+    rawmem = make_store(control, rawmem, object, oopDesc::klass_offset_in_bytes(), klass_node, T_METADATA);
+  }
+
   int header_size = alloc->minimum_header_size();  // conservatively small
 
   // Array length
diff --git a/src/hotspot/share/opto/memnode.cpp b/src/hotspot/share/opto/memnode.cpp
index 37848a2793b..0efbe231427 100644
--- a/src/hotspot/share/opto/memnode.cpp
+++ b/src/hotspot/share/opto/memnode.cpp
@@ -1847,6 +1847,13 @@ Node *LoadNode::Ideal(PhaseGVN *phase, bool can_reshape) {
 const Type*
 LoadNode::load_array_final_field(const TypeKlassPtr *tkls,
                                  ciKlass* klass) const {
+  if (UseCompactObjectHeaders) {
+    if (tkls->offset() == in_bytes(Klass::prototype_header_offset())) {
+      // The field is Klass::_prototype_header.  Return its (constant) value.
+      assert(this->Opcode() == Op_LoadX, "must load a proper type from _prototype_header");
+      return TypeX::make(klass->prototype_header());
+    }
+  }
   if (tkls->offset() == in_bytes(Klass::modifier_flags_offset())) {
     // The field is Klass::_modifier_flags.  Return its (constant) value.
     // (Folds up the 2nd indirection in aClassConstant.getModifiers().)
@@ -2017,6 +2024,13 @@ const Type* LoadNode::Value(PhaseGVN* phase) const {
         assert(Opcode() == Op_LoadI, "must load an int from _super_check_offset");
         return TypeInt::make(klass->super_check_offset());
       }
+      if (UseCompactObjectHeaders) {
+        if (tkls->offset() == in_bytes(Klass::prototype_header_offset())) {
+          // The field is Klass::_prototype_header. Return its (constant) value.
+          assert(this->Opcode() == Op_LoadX, "must load a proper type from _prototype_header");
+          return TypeX::make(klass->prototype_header());
+        }
+      }
       // Compute index into primary_supers array
       juint depth = (tkls->offset() - in_bytes(Klass::primary_supers_offset())) / sizeof(Klass*);
       // Check for overflowing; use unsigned compare to handle the negative case.
@@ -2101,7 +2115,7 @@ const Type* LoadNode::Value(PhaseGVN* phase) const {
   }
 
   Node* alloc = is_new_object_mark_load(phase);
-  if (alloc != NULL && !(alloc->Opcode() == Op_Allocate && UseBiasedLocking)) {
+  if (alloc != NULL && !(alloc->Opcode() == Op_Allocate && UseBiasedLocking) && !UseCompactObjectHeaders) {
     return TypeX::make(markWord::prototype().value());
   }
 
diff --git a/src/hotspot/share/opto/output.cpp b/src/hotspot/share/opto/output.cpp
index 57d2fe05481..5f729f68f1a 100644
--- a/src/hotspot/share/opto/output.cpp
+++ b/src/hotspot/share/opto/output.cpp
@@ -225,79 +225,13 @@ class Scheduling {
 
 };
 
-volatile int C2SafepointPollStubTable::_stub_size = 0;
-
-Label& C2SafepointPollStubTable::add_safepoint(uintptr_t safepoint_offset) {
-  C2SafepointPollStub* entry = new (Compile::current()->comp_arena()) C2SafepointPollStub(safepoint_offset);
-  _safepoints.append(entry);
-  return entry->_stub_label;
-}
-
-void C2SafepointPollStubTable::emit(CodeBuffer& cb) {
-  MacroAssembler masm(&cb);
-  for (int i = _safepoints.length() - 1; i >= 0; i--) {
-    // Make sure there is enough space in the code buffer
-    if (cb.insts()->maybe_expand_to_ensure_remaining(PhaseOutput::MAX_inst_size) && cb.blob() == NULL) {
-      ciEnv::current()->record_failure("CodeCache is full");
-      return;
-    }
-
-    C2SafepointPollStub* entry = _safepoints.at(i);
-    emit_stub(masm, entry);
-  }
-}
-
-int C2SafepointPollStubTable::stub_size_lazy() const {
-  int size = Atomic::load(&_stub_size);
-
-  if (size != 0) {
-    return size;
-  }
-
-  Compile* const C = Compile::current();
-  BufferBlob* const blob = C->output()->scratch_buffer_blob();
-  CodeBuffer cb(blob->content_begin(), C->output()->scratch_buffer_code_size());
-  MacroAssembler masm(&cb);
-  C2SafepointPollStub* entry = _safepoints.at(0);
-  emit_stub(masm, entry);
-  size += cb.insts_size();
-
-  Atomic::store(&_stub_size, size);
-
-  return size;
-}
-
-int C2SafepointPollStubTable::estimate_stub_size() const {
-  if (_safepoints.length() == 0) {
-    return 0;
-  }
-
-  int result = stub_size_lazy() * _safepoints.length();
-
-#ifdef ASSERT
-  Compile* const C = Compile::current();
-  BufferBlob* const blob = C->output()->scratch_buffer_blob();
-  int size = 0;
-
-  for (int i = _safepoints.length() - 1; i >= 0; i--) {
-    CodeBuffer cb(blob->content_begin(), C->output()->scratch_buffer_code_size());
-    MacroAssembler masm(&cb);
-    C2SafepointPollStub* entry = _safepoints.at(i);
-    emit_stub(masm, entry);
-    size += cb.insts_size();
-  }
-  assert(size == result, "stubs should not have variable size");
-#endif
-
-  return result;
-}
-
 PhaseOutput::PhaseOutput()
   : Phase(Phase::Output),
     _code_buffer("Compile::Fill_buffer"),
     _first_block_size(0),
     _handler_table(),
     _inc_table(),
+    _stub_list(),
     _oop_map_set(NULL),
     _scratch_buffer_blob(NULL),
     _scratch_locs_memory(NULL),
@@ -1313,7 +1247,6 @@ CodeBuffer* PhaseOutput::init_buffer() {
 
   BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
   stub_req += bs->estimate_stub_size();
-  stub_req += safepoint_poll_table()->estimate_stub_size();
 
   // nmethod and CodeBuffer count stubs & constants as part of method's code.
   // class HandlerImpl is platform-specific and defined in the *.ad files.
@@ -1820,8 +1753,8 @@ void PhaseOutput::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
   bs->emit_stubs(*cb);
   if (C->failing())  return;
 
-  // Fill in stubs for calling the runtime from safepoint polls.
-  safepoint_poll_table()->emit(*cb);
+  // Fill in stubs.
+  _stub_list.emit(*cb);
   if (C->failing())  return;
 
 #ifndef PRODUCT
diff --git a/src/hotspot/share/opto/output.hpp b/src/hotspot/share/opto/output.hpp
index 4f296c64b8e..33893f4c344 100644
--- a/src/hotspot/share/opto/output.hpp
+++ b/src/hotspot/share/opto/output.hpp
@@ -29,6 +29,7 @@
 #include "code/exceptionHandlerTable.hpp"
 #include "metaprogramming/enableIf.hpp"
 #include "opto/ad.hpp"
+#include "opto/c2_CodeStubs.hpp"
 #include "opto/constantTable.hpp"
 #include "opto/phase.hpp"
 #include "runtime/vm_version.hpp"
@@ -72,47 +73,6 @@ class BufferSizingData {
   { };
 };
 
-class C2SafepointPollStubTable {
-private:
-  struct C2SafepointPollStub: public ResourceObj {
-    uintptr_t _safepoint_offset;
-    Label     _stub_label;
-    Label     _trampoline_label;
-    C2SafepointPollStub(uintptr_t safepoint_offset) :
-      _safepoint_offset(safepoint_offset),
-      _stub_label(),
-      _trampoline_label() {}
-  };
-
-  GrowableArray<C2SafepointPollStub*> _safepoints;
-
-  static volatile int _stub_size;
-
-  void emit_stub_impl(MacroAssembler& masm, C2SafepointPollStub* entry) const;
-
-  // The selection logic below relieves the need to add dummy files to unsupported platforms.
-  template <bool enabled>
-  typename EnableIf<enabled>::type
-  select_emit_stub(MacroAssembler& masm, C2SafepointPollStub* entry) const {
-    emit_stub_impl(masm, entry);
-  }
-
-  template <bool enabled>
-  typename EnableIf<!enabled>::type
-  select_emit_stub(MacroAssembler& masm, C2SafepointPollStub* entry) const {}
-
-  void emit_stub(MacroAssembler& masm, C2SafepointPollStub* entry) const {
-    select_emit_stub<VM_Version::supports_stack_watermark_barrier()>(masm, entry);
-  }
-
-  int stub_size_lazy() const;
-
-public:
-  Label& add_safepoint(uintptr_t safepoint_offset);
-  int estimate_stub_size() const;
-  void emit(CodeBuffer& cb);
-};
-
 class PhaseOutput : public Phase {
 private:
   // Instruction bits passed off to the VM
@@ -121,7 +81,7 @@ class PhaseOutput : public Phase {
   int                    _first_block_size;      // Size of unvalidated entry point code / OSR poison code
   ExceptionHandlerTable  _handler_table;         // Table of native-code exception handlers
   ImplicitExceptionTable _inc_table;             // Table of implicit null checks in native code
-  C2SafepointPollStubTable _safepoint_poll_table;// Table for safepoint polls
+  C2CodeStubList         _stub_list;             // List of code stubs
   OopMapSet*             _oop_map_set;           // Table of oop maps (one for each safepoint location)
   BufferBlob*            _scratch_buffer_blob;   // For temporary code buffers.
   relocInfo*             _scratch_locs_memory;   // For temporary code buffers.
@@ -169,8 +129,8 @@ class PhaseOutput : public Phase {
   // Constant table
   ConstantTable& constant_table() { return _constant_table; }
 
-  // Safepoint poll table
-  C2SafepointPollStubTable* safepoint_poll_table() { return &_safepoint_poll_table; }
+  // Code stubs list
+  void add_stub(C2CodeStub* stub) { _stub_list.add_stub(stub); }
 
   // Code emission iterator
   Block* block()   { return _block; }
diff --git a/src/hotspot/share/opto/runtime.cpp b/src/hotspot/share/opto/runtime.cpp
index b5cda23020a..4e91b148cf7 100644
--- a/src/hotspot/share/opto/runtime.cpp
+++ b/src/hotspot/share/opto/runtime.cpp
@@ -302,16 +302,19 @@ JRT_BLOCK_ENTRY(void, OptoRuntime::new_array_nozero_C(Klass* array_type, int len
   if ((len > 0) && (result != NULL) &&
       is_deoptimized_caller_frame(current)) {
     // Zero array here if the caller is deoptimized.
-    int size = ((typeArrayOop)result)->object_size();
+    int size = TypeArrayKlass::cast(array_type)->oop_size(result);
     BasicType elem_type = TypeArrayKlass::cast(array_type)->element_type();
-    const size_t hs = arrayOopDesc::header_size(elem_type);
-    // Align to next 8 bytes to avoid trashing arrays's length.
-    const size_t aligned_hs = align_object_offset(hs);
+    size_t hs_bytes = arrayOopDesc::base_offset_in_bytes(elem_type);
+    assert(is_aligned(hs_bytes, BytesPerInt), "must be 4 byte aligned");
     HeapWord* obj = cast_from_oop<HeapWord*>(result);
-    if (aligned_hs > hs) {
-      Copy::zero_to_words(obj+hs, aligned_hs-hs);
+    if (!is_aligned(hs_bytes, BytesPerLong)) {
+      *reinterpret_cast<jint*>(reinterpret_cast<char*>(obj) + hs_bytes) = 0;
+      hs_bytes += BytesPerInt;
     }
+
     // Optimized zeroing.
+    assert(is_aligned(hs_bytes, BytesPerLong), "must be 8-byte aligned");
+    const size_t aligned_hs = hs_bytes / BytesPerLong;
     Copy::fill_to_aligned_words(obj+aligned_hs, size-aligned_hs);
   }
 
diff --git a/src/hotspot/share/opto/type.cpp b/src/hotspot/share/opto/type.cpp
index 4c6fe91aee9..4a8f534b084 100644
--- a/src/hotspot/share/opto/type.cpp
+++ b/src/hotspot/share/opto/type.cpp
@@ -4561,7 +4561,8 @@ void TypeAryPtr::dump2( Dict &d, uint depth, outputStream *st ) const {
   }
 
   if( _offset != 0 ) {
-    int header_size = objArrayOopDesc::header_size() * wordSize;
+    BasicType basic_elem_type = elem()->basic_type();
+    int header_size = arrayOopDesc::base_offset_in_bytes(basic_elem_type);
     if( _offset == OffsetTop )       st->print("+undefined");
     else if( _offset == OffsetBot )  st->print("+any");
     else if( _offset < header_size ) st->print("+%d", _offset);
diff --git a/src/hotspot/share/precompiled/precompiled.hpp b/src/hotspot/share/precompiled/precompiled.hpp
index d34304741cd..84337449f65 100644
--- a/src/hotspot/share/precompiled/precompiled.hpp
+++ b/src/hotspot/share/precompiled/precompiled.hpp
@@ -31,7 +31,6 @@
 // measurements made in November 2018. This list excludes files named
 // *.include.hpp, since including them decreased build performance.
 
-#include "classfile/classLoaderData.hpp"
 #include "classfile/javaClasses.hpp"
 #include "classfile/systemDictionary.hpp"
 #include "gc/shared/collectedHeap.hpp"
diff --git a/src/hotspot/share/prims/jvmtiEnvBase.cpp b/src/hotspot/share/prims/jvmtiEnvBase.cpp
index 4160c2c8765..d6ada3df8b7 100644
--- a/src/hotspot/share/prims/jvmtiEnvBase.cpp
+++ b/src/hotspot/share/prims/jvmtiEnvBase.cpp
@@ -51,6 +51,7 @@
 #include "runtime/objectMonitor.inline.hpp"
 #include "runtime/osThread.hpp"
 #include "runtime/signature.hpp"
+#include "runtime/synchronizer.inline.hpp"
 #include "runtime/thread.inline.hpp"
 #include "runtime/threadSMR.hpp"
 #include "runtime/vframe.inline.hpp"
@@ -962,62 +963,32 @@ JvmtiEnvBase::get_object_monitor_usage(JavaThread* calling_thread, jobject objec
 
   ThreadsListHandle tlh(current_thread);
   JavaThread *owning_thread = NULL;
-  ObjectMonitor *mon = NULL;
   jvmtiMonitorUsage ret = {
       NULL, 0, 0, NULL, 0, NULL
   };
 
   uint32_t debug_bits = 0;
   // first derive the object's owner and entry_count (if any)
-  {
-    // Revoke any biases before querying the mark word
-    BiasedLocking::revoke_at_safepoint(hobj);
-
-    address owner = NULL;
-    {
-      markWord mark = hobj()->mark();
+  owning_thread = ObjectSynchronizer::get_lock_owner(tlh.list(), hobj);
+  if (owning_thread != NULL) {  // monitor is owned
+    Handle th(current_thread, owning_thread->threadObj());
+    ret.owner = (jthread)jni_reference(calling_thread, th);
 
-      if (!mark.has_monitor()) {
-        // this object has a lightweight monitor
-
-        if (mark.has_locker()) {
-          owner = (address)mark.locker(); // save the address of the Lock word
-        }
-        // implied else: no owner
-      } else {
-        // this object has a heavyweight monitor
-        mon = mark.monitor();
-
-        // The owner field of a heavyweight monitor may be NULL for no
-        // owner, a JavaThread * or it may still be the address of the
-        // Lock word in a JavaThread's stack. A monitor can be inflated
-        // by a non-owning JavaThread, but only the owning JavaThread
-        // can change the owner field from the Lock word to the
-        // JavaThread * and it may not have done that yet.
-        owner = (address)mon->owner();
-      }
-    }
-
-    if (owner != NULL) {
-      // This monitor is owned so we have to find the owning JavaThread.
-      owning_thread = Threads::owning_thread_from_monitor_owner(tlh.list(), owner);
-      assert(owning_thread != NULL, "owning JavaThread must not be NULL");
-      Handle     th(current_thread, owning_thread->threadObj());
-      ret.owner = (jthread)jni_reference(calling_thread, th);
-    }
-
-    if (owning_thread != NULL) {  // monitor is owned
-      // The recursions field of a monitor does not reflect recursions
-      // as lightweight locks before inflating the monitor are not included.
-      // We have to count the number of recursive monitor entries the hard way.
-      // We pass a handle to survive any GCs along the way.
-      ret.entry_count = count_locked_objects(owning_thread, hobj);
-    }
-    // implied else: entry_count == 0
+    // The recursions field of a monitor does not reflect recursions
+    // as lightweight locks before inflating the monitor are not included.
+    // We have to count the number of recursive monitor entries the hard way.
+    // We pass a handle to survive any GCs along the way.
+    ret.entry_count = count_locked_objects(owning_thread, hobj);
   }
+  // implied else: entry_count == 0
 
   jint nWant = 0, nWait = 0;
-  if (mon != NULL) {
+  markWord mark = hobj->mark();
+  ObjectMonitor* mon = mark.has_monitor()
+      ? ObjectSynchronizer::read_monitor(current_thread, hobj(), mark)
+      : nullptr;
+
+  if (mon != nullptr) {
     // this object has a heavyweight monitor
     nWant = mon->contentions(); // # of threads contending for monitor
     nWait = mon->waiters();     // # of threads in Object.wait()
diff --git a/src/hotspot/share/prims/jvmtiTagMap.cpp b/src/hotspot/share/prims/jvmtiTagMap.cpp
index 68228f5141b..50b6d2e104e 100644
--- a/src/hotspot/share/prims/jvmtiTagMap.cpp
+++ b/src/hotspot/share/prims/jvmtiTagMap.cpp
@@ -66,8 +66,11 @@
 #include "runtime/vframe.hpp"
 #include "runtime/vmThread.hpp"
 #include "runtime/vmOperations.hpp"
+#include "utilities/objectBitSet.inline.hpp"
 #include "utilities/macros.hpp"
 
+typedef ObjectBitSet<mtServiceability> JVMTIBitSet;
+
 bool JvmtiTagMap::_has_object_free_events = false;
 
 // create a JvmtiTagMap
@@ -1353,142 +1356,6 @@ jvmtiError JvmtiTagMap::get_objects_with_tags(const jlong* tags,
   return collector.result(count_ptr, object_result_ptr, tag_result_ptr);
 }
 
-
-// ObjectMarker is used to support the marking objects when walking the
-// heap.
-//
-// This implementation uses the existing mark bits in an object for
-// marking. Objects that are marked must later have their headers restored.
-// As most objects are unlocked and don't have their identity hash computed
-// we don't have to save their headers. Instead we save the headers that
-// are "interesting". Later when the headers are restored this implementation
-// restores all headers to their initial value and then restores the few
-// objects that had interesting headers.
-//
-// Future work: This implementation currently uses growable arrays to save
-// the oop and header of interesting objects. As an optimization we could
-// use the same technique as the GC and make use of the unused area
-// between top() and end().
-//
-
-// An ObjectClosure used to restore the mark bits of an object
-class RestoreMarksClosure : public ObjectClosure {
- public:
-  void do_object(oop o) {
-    if (o != NULL) {
-      markWord mark = o->mark();
-      if (mark.is_marked()) {
-        o->init_mark();
-      }
-    }
-  }
-};
-
-// ObjectMarker provides the mark and visited functions
-class ObjectMarker : AllStatic {
- private:
-  // saved headers
-  static GrowableArray<oop>* _saved_oop_stack;
-  static GrowableArray<markWord>* _saved_mark_stack;
-  static bool _needs_reset;                  // do we need to reset mark bits?
-
- public:
-  static void init();                       // initialize
-  static void done();                       // clean-up
-
-  static inline void mark(oop o);           // mark an object
-  static inline bool visited(oop o);        // check if object has been visited
-
-  static inline bool needs_reset()            { return _needs_reset; }
-  static inline void set_needs_reset(bool v)  { _needs_reset = v; }
-};
-
-GrowableArray<oop>* ObjectMarker::_saved_oop_stack = NULL;
-GrowableArray<markWord>* ObjectMarker::_saved_mark_stack = NULL;
-bool ObjectMarker::_needs_reset = true;  // need to reset mark bits by default
-
-// initialize ObjectMarker - prepares for object marking
-void ObjectMarker::init() {
-  assert(Thread::current()->is_VM_thread(), "must be VMThread");
-  assert(SafepointSynchronize::is_at_safepoint(), "must be at a safepoint");
-
-  // prepare heap for iteration
-  Universe::heap()->ensure_parsability(false);  // no need to retire TLABs
-
-  // create stacks for interesting headers
-  _saved_mark_stack = new (ResourceObj::C_HEAP, mtServiceability) GrowableArray<markWord>(4000, mtServiceability);
-  _saved_oop_stack = new (ResourceObj::C_HEAP, mtServiceability) GrowableArray<oop>(4000, mtServiceability);
-
-  if (UseBiasedLocking) {
-    BiasedLocking::preserve_marks();
-  }
-}
-
-// Object marking is done so restore object headers
-void ObjectMarker::done() {
-  // iterate over all objects and restore the mark bits to
-  // their initial value
-  RestoreMarksClosure blk;
-  if (needs_reset()) {
-    Universe::heap()->object_iterate(&blk);
-  } else {
-    // We don't need to reset mark bits on this call, but reset the
-    // flag to the default for the next call.
-    set_needs_reset(true);
-  }
-
-  // now restore the interesting headers
-  for (int i = 0; i < _saved_oop_stack->length(); i++) {
-    oop o = _saved_oop_stack->at(i);
-    markWord mark = _saved_mark_stack->at(i);
-    o->set_mark(mark);
-  }
-
-  if (UseBiasedLocking) {
-    BiasedLocking::restore_marks();
-  }
-
-  // free the stacks
-  delete _saved_oop_stack;
-  delete _saved_mark_stack;
-}
-
-// mark an object
-inline void ObjectMarker::mark(oop o) {
-  assert(Universe::heap()->is_in(o), "sanity check");
-  assert(!o->mark().is_marked(), "should only mark an object once");
-
-  // object's mark word
-  markWord mark = o->mark();
-
-  if (o->mark_must_be_preserved(mark)) {
-    _saved_mark_stack->push(mark);
-    _saved_oop_stack->push(o);
-  }
-
-  // mark the object
-  o->set_mark(markWord::prototype().set_marked());
-}
-
-// return true if object is marked
-inline bool ObjectMarker::visited(oop o) {
-  return o->mark().is_marked();
-}
-
-// Stack allocated class to help ensure that ObjectMarker is used
-// correctly. Constructor initializes ObjectMarker, destructor calls
-// ObjectMarker's done() function to restore object headers.
-class ObjectMarkerController : public StackObj {
- public:
-  ObjectMarkerController() {
-    ObjectMarker::init();
-  }
-  ~ObjectMarkerController() {
-    ObjectMarker::done();
-  }
-};
-
-
 // helper to map a jvmtiHeapReferenceKind to an old style jvmtiHeapRootKind
 // (not performance critical as only used for roots)
 static jvmtiHeapRootKind toJvmtiHeapRootKind(jvmtiHeapReferenceKind kind) {
@@ -1621,6 +1488,7 @@ class CallbackInvoker : AllStatic {
   static JvmtiTagMap* _tag_map;
   static const void* _user_data;
   static GrowableArray<oop>* _visit_stack;
+  static JVMTIBitSet* _bitset;
 
   // accessors
   static JvmtiTagMap* tag_map()                        { return _tag_map; }
@@ -1630,7 +1498,7 @@ class CallbackInvoker : AllStatic {
   // if the object hasn't been visited then push it onto the visit stack
   // so that it will be visited later
   static inline bool check_for_visit(oop obj) {
-    if (!ObjectMarker::visited(obj)) visit_stack()->push(obj);
+    if (!_bitset->is_marked(obj)) visit_stack()->push(obj);
     return true;
   }
 
@@ -1661,13 +1529,15 @@ class CallbackInvoker : AllStatic {
   static void initialize_for_basic_heap_walk(JvmtiTagMap* tag_map,
                                              GrowableArray<oop>* visit_stack,
                                              const void* user_data,
-                                             BasicHeapWalkContext context);
+                                             BasicHeapWalkContext context,
+                                             JVMTIBitSet* bitset);
 
   // initialize for advanced mode
   static void initialize_for_advanced_heap_walk(JvmtiTagMap* tag_map,
                                                 GrowableArray<oop>* visit_stack,
                                                 const void* user_data,
-                                                AdvancedHeapWalkContext context);
+                                                AdvancedHeapWalkContext context,
+                                                JVMTIBitSet* bitset);
 
    // functions to report roots
   static inline bool report_simple_root(jvmtiHeapReferenceKind kind, oop o);
@@ -1700,31 +1570,36 @@ AdvancedHeapWalkContext CallbackInvoker::_advanced_context;
 JvmtiTagMap* CallbackInvoker::_tag_map;
 const void* CallbackInvoker::_user_data;
 GrowableArray<oop>* CallbackInvoker::_visit_stack;
+JVMTIBitSet* CallbackInvoker::_bitset;
 
 // initialize for basic heap walk (IterateOverReachableObjects et al)
 void CallbackInvoker::initialize_for_basic_heap_walk(JvmtiTagMap* tag_map,
                                                      GrowableArray<oop>* visit_stack,
                                                      const void* user_data,
-                                                     BasicHeapWalkContext context) {
+                                                     BasicHeapWalkContext context,
+                                                     JVMTIBitSet* bitset) {
   _tag_map = tag_map;
   _visit_stack = visit_stack;
   _user_data = user_data;
   _basic_context = context;
   _advanced_context.invalidate();       // will trigger assertion if used
   _heap_walk_type = basic;
+  _bitset = bitset;
 }
 
 // initialize for advanced heap walk (FollowReferences)
 void CallbackInvoker::initialize_for_advanced_heap_walk(JvmtiTagMap* tag_map,
                                                         GrowableArray<oop>* visit_stack,
                                                         const void* user_data,
-                                                        AdvancedHeapWalkContext context) {
+                                                        AdvancedHeapWalkContext context,
+                                                        JVMTIBitSet* bitset) {
   _tag_map = tag_map;
   _visit_stack = visit_stack;
   _user_data = user_data;
   _advanced_context = context;
   _basic_context.invalidate();      // will trigger assertion if used
   _heap_walk_type = advanced;
+  _bitset = bitset;
 }
 
 
@@ -2396,6 +2271,8 @@ class VM_HeapWalkOperation: public VM_Operation {
   Handle _initial_object;
   GrowableArray<oop>* _visit_stack;                 // the visit stack
 
+  JVMTIBitSet _bitset;
+
   // Dead object tags in JvmtiTagMap
   GrowableArray<jlong>* _dead_objects;
 
@@ -2472,7 +2349,7 @@ VM_HeapWalkOperation::VM_HeapWalkOperation(JvmtiTagMap* tag_map,
   _dead_objects = objects;
 
 
-  CallbackInvoker::initialize_for_basic_heap_walk(tag_map, _visit_stack, user_data, callbacks);
+  CallbackInvoker::initialize_for_basic_heap_walk(tag_map, _visit_stack, user_data, callbacks, &_bitset);
 }
 
 VM_HeapWalkOperation::VM_HeapWalkOperation(JvmtiTagMap* tag_map,
@@ -2490,7 +2367,7 @@ VM_HeapWalkOperation::VM_HeapWalkOperation(JvmtiTagMap* tag_map,
   _visit_stack = create_visit_stack();
   _dead_objects = objects;
 
-  CallbackInvoker::initialize_for_advanced_heap_walk(tag_map, _visit_stack, user_data, callbacks);
+  CallbackInvoker::initialize_for_advanced_heap_walk(tag_map, _visit_stack, user_data, callbacks, &_bitset);
 }
 
 VM_HeapWalkOperation::~VM_HeapWalkOperation() {
@@ -2926,8 +2803,8 @@ inline bool VM_HeapWalkOperation::collect_stack_roots() {
 //
 bool VM_HeapWalkOperation::visit(oop o) {
   // mark object as visited
-  assert(!ObjectMarker::visited(o), "can't visit same object more than once");
-  ObjectMarker::mark(o);
+  assert(!_bitset.is_marked(o), "can't visit same object more than once");
+  _bitset.mark_obj(o);
 
   // instance
   if (o->is_instance()) {
@@ -2956,7 +2833,6 @@ bool VM_HeapWalkOperation::visit(oop o) {
 
 void VM_HeapWalkOperation::doit() {
   ResourceMark rm;
-  ObjectMarkerController marker;
   ClassFieldMapCacheMark cm;
 
   JvmtiTagMap::check_hashmaps_for_heapwalk(_dead_objects);
@@ -2965,20 +2841,11 @@ void VM_HeapWalkOperation::doit() {
 
   // the heap walk starts with an initial object or the heap roots
   if (initial_object().is_null()) {
-    // If either collect_stack_roots() or collect_simple_roots()
-    // returns false at this point, then there are no mark bits
-    // to reset.
-    ObjectMarker::set_needs_reset(false);
-
-    // Calling collect_stack_roots() before collect_simple_roots()
     // can result in a big performance boost for an agent that is
     // focused on analyzing references in the thread stacks.
     if (!collect_stack_roots()) return;
 
     if (!collect_simple_roots()) return;
-
-    // no early return so enable heap traversal to reset the mark bits
-    ObjectMarker::set_needs_reset(true);
   } else {
     visit_stack()->push(initial_object()());
   }
@@ -2990,7 +2857,7 @@ void VM_HeapWalkOperation::doit() {
     // visited or the callback asked to terminate the iteration.
     while (!visit_stack()->is_empty()) {
       oop o = visit_stack()->pop();
-      if (!ObjectMarker::visited(o)) {
+      if (!_bitset.is_marked(o)) {
         if (!visit(o)) {
           break;
         }
diff --git a/src/hotspot/share/prims/resolvedMethodTable.cpp b/src/hotspot/share/prims/resolvedMethodTable.cpp
index 215cf8e31ef..c495209750b 100644
--- a/src/hotspot/share/prims/resolvedMethodTable.cpp
+++ b/src/hotspot/share/prims/resolvedMethodTable.cpp
@@ -126,11 +126,9 @@ class ResolvedMethodTableLookup : StackObj {
   uintx get_hash() const {
     return _hash;
   }
-  bool equals(WeakHandle* value, bool* is_dead) {
+  bool equals(WeakHandle* value) {
     oop val_oop = value->peek();
     if (val_oop == NULL) {
-      // dead oop, mark this hash dead for cleaning
-      *is_dead = true;
       return false;
     }
     bool equals = _method == java_lang_invoke_ResolvedMethodName::vmtarget(val_oop);
@@ -141,6 +139,10 @@ class ResolvedMethodTableLookup : StackObj {
     _found = Handle(_thread, value->resolve());
     return true;
   }
+  bool is_dead(WeakHandle* value) {
+    oop val_oop = value->peek();
+    return val_oop == NULL;
+  }
 };
 
 
diff --git a/src/hotspot/share/prims/unsafe.cpp b/src/hotspot/share/prims/unsafe.cpp
index 1c1f5cd1e39..d2a53d88942 100644
--- a/src/hotspot/share/prims/unsafe.cpp
+++ b/src/hotspot/share/prims/unsafe.cpp
@@ -1,5 +1,3 @@
-// This project is a modified version of OpenJDK, licensed under GPL v2.
-// Modifications Copyright (C) 2025 ByteDance Inc.
 /*
  * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -69,81 +67,6 @@
  * Implementation of the jdk.internal.misc.Unsafe class
  */
 
-#if HOTSPOT_TARGET_CLASSLIB == 17
-#define MAX_OBJECT_SIZE \
-  ( arrayOopDesc::header_size(T_DOUBLE) * HeapWordSize \
-    + ((julong)max_jint * sizeof(double)) )
-
-
-#define UNSAFE_ENTRY(result_type, header) \
-  JVM_ENTRY(static result_type, header)
-
-#define UNSAFE_LEAF(result_type, header) \
-  JVM_LEAF(static result_type, header)
-
-#define UNSAFE_END JVM_END
-
-
-static inline void* addr_from_java(jlong addr) {
-  // This assert fails in a variety of ways on 32-bit systems.
-  // It is impossible to predict whether native code that converts
-  // pointers to longs will sign-extend or zero-extend the addresses.
-  //assert(addr == (uintptr_t)addr, "must not be odd high bits");
-  return (void*)(uintptr_t)addr;
-}
-
-static inline jlong addr_to_java(void* p) {
-  assert(p == (void*)(uintptr_t)p, "must not be odd high bits");
-  return (uintptr_t)p;
-}
-
-
-// Note: The VM's obj_field and related accessors use byte-scaled
-// ("unscaled") offsets, just as the unsafe methods do.
-
-// However, the method Unsafe.fieldOffset explicitly declines to
-// guarantee this.  The field offset values manipulated by the Java user
-// through the Unsafe API are opaque cookies that just happen to be byte
-// offsets.  We represent this state of affairs by passing the cookies
-// through conversion functions when going between the VM and the Unsafe API.
-// The conversion functions just happen to be no-ops at present.
-
-static inline jlong field_offset_to_byte_offset(jlong field_offset) {
-  return field_offset;
-}
-
-static inline jlong field_offset_from_byte_offset(jlong byte_offset) {
-  return byte_offset;
-}
-
-static inline void assert_field_offset_sane(oop p, jlong field_offset) {
-#ifdef ASSERT
-  jlong byte_offset = field_offset_to_byte_offset(field_offset);
-
-  if (p != NULL) {
-    assert(byte_offset >= 0 && byte_offset <= (jlong)MAX_OBJECT_SIZE, "sane offset");
-    if (byte_offset == (jint)byte_offset) {
-      void* ptr_plus_disp = cast_from_oop<address>(p) + byte_offset;
-      assert(p->field_addr((jint)byte_offset) == ptr_plus_disp,
-             "raw [ptr+disp] must be consistent with oop::field_addr");
-    }
-    jlong p_size = HeapWordSize * (jlong)(p->size());
-    assert(byte_offset < p_size, "Unsafe access: offset " INT64_FORMAT " > object's size " INT64_FORMAT, (int64_t)byte_offset, (int64_t)p_size);
-  }
-#endif
-}
-
-static inline void* index_oop_from_field_offset_long(oop p, jlong field_offset) {
-  assert_field_offset_sane(p, field_offset);
-  jlong byte_offset = field_offset_to_byte_offset(field_offset);
-
-  if (sizeof(char*) == sizeof(jint)) {   // (this constant folds!)
-    return cast_from_oop<address>(p) + (jint) byte_offset;
-  } else {
-    return cast_from_oop<address>(p) +        byte_offset;
-  }
-}
-#endif // HOTSPOT_TARGET_CLASSLIB
 
 // Externally callable versions:
 // (Use these in compiler intrinsics which emulate unsafe primitives.)
@@ -156,157 +79,32 @@ jlong Unsafe_field_offset_from_byte_offset(jlong byte_offset) {
 
 
 ///// Data read/writes on the Java heap and in native (off-heap) memory
-#if HOTSPOT_TARGET_CLASSLIB == 17
-/**
- * Helper class to wrap memory accesses in JavaThread::doing_unsafe_access()
- */
-class GuardUnsafeAccess {
-  JavaThread* _thread;
-
-public:
-  GuardUnsafeAccess(JavaThread* thread) : _thread(thread) {
-    // native/off-heap access which may raise SIGBUS if accessing
-    // memory mapped file data in a region of the file which has
-    // been truncated and is now invalid.
-    _thread->set_doing_unsafe_access(true);
-  }
-
-  ~GuardUnsafeAccess() {
-    _thread->set_doing_unsafe_access(false);
-  }
-};
-
-/**
- * Helper class for accessing memory.
- *
- * Normalizes values and wraps accesses in
- * JavaThread::doing_unsafe_access() if needed.
- */
-template <typename T>
-class MemoryAccess : StackObj {
-  JavaThread* _thread;
-  oop _obj;
-  ptrdiff_t _offset;
-
-  // Resolves and returns the address of the memory access.
-  // This raw memory access may fault, so we make sure it happens within the
-  // guarded scope by making the access volatile at least. Since the store
-  // of Thread::set_doing_unsafe_access() is also volatile, these accesses
-  // can not be reordered by the compiler. Therefore, if the access triggers
-  // a fault, we will know that Thread::doing_unsafe_access() returns true.
-  volatile T* addr() {
-    void* addr = index_oop_from_field_offset_long(_obj, _offset);
-    return static_cast<volatile T*>(addr);
-  }
-
-  template <typename U>
-  U normalize_for_write(U x) {
-    return x;
-  }
-
-  jboolean normalize_for_write(jboolean x) {
-    return x & 1;
-  }
-
-  template <typename U>
-  U normalize_for_read(U x) {
-    return x;
-  }
-
-  jboolean normalize_for_read(jboolean x) {
-    return x != 0;
-  }
-
-public:
-  MemoryAccess(JavaThread* thread, jobject obj, jlong offset)
-    : _thread(thread), _obj(JNIHandles::resolve(obj)), _offset((ptrdiff_t)offset) {
-    assert_field_offset_sane(_obj, offset);
-  }
-
-  T get() {
-    if (_obj == NULL) {
-      GuardUnsafeAccess guard(_thread);
-      T ret = RawAccess<>::load(addr());
-      return normalize_for_read(ret);
-    } else {
-      T ret = HeapAccess<>::load_at(_obj, _offset);
-      return normalize_for_read(ret);
-    }
-  }
-
-  void put(T x) {
-    if (_obj == NULL) {
-      GuardUnsafeAccess guard(_thread);
-      RawAccess<>::store(addr(), normalize_for_write(x));
-    } else {
-      HeapAccess<>::store_at(_obj, _offset, normalize_for_write(x));
-    }
-  }
-
-
-  T get_volatile() {
-    if (_obj == NULL) {
-      GuardUnsafeAccess guard(_thread);
-      volatile T ret = RawAccess<MO_SEQ_CST>::load(addr());
-      return normalize_for_read(ret);
-    } else {
-      T ret = HeapAccess<MO_SEQ_CST>::load_at(_obj, _offset);
-      return normalize_for_read(ret);
-    }
-  }
-
-  void put_volatile(T x) {
-    if (_obj == NULL) {
-      GuardUnsafeAccess guard(_thread);
-      RawAccess<MO_SEQ_CST>::store(addr(), normalize_for_write(x));
-    } else {
-      HeapAccess<MO_SEQ_CST>::store_at(_obj, _offset, normalize_for_write(x));
-    }
-  }
-};
-#endif // HOTSPOT_TARGET_CLASSLIB
 
 // These functions allow a null base pointer with an arbitrary address.
 // But if the base pointer is non-null, the offset should make some sense.
 // That is, it should be in the range [0, MAX_OBJECT_SIZE].
-#if HOTSPOT_TARGET_CLASSLIB == 8
 JVM_ENTRY(jobject, Unsafe_GetReference(JNIEnv *env, jobject unsafe, jobject obj, jlong offset)) {
-#else
-UNSAFE_ENTRY(jobject, Unsafe_GetReference(JNIEnv *env, jobject unsafe, jobject obj, jlong offset)) {
-#endif
   oop p = JNIHandles::resolve(obj);
   assert_field_offset_sane(p, offset);
   oop v = HeapAccess<ON_UNKNOWN_OOP_REF>::oop_load_at(p, offset);
   return JNIHandles::make_local(THREAD, v);
 } UNSAFE_END
 
-#if HOTSPOT_TARGET_CLASSLIB == 8
 JVM_ENTRY(void, Unsafe_PutReference(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, jobject x_h)) {
-#else
-UNSAFE_ENTRY(void, Unsafe_PutReference(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, jobject x_h)) {
-#endif
   oop x = JNIHandles::resolve(x_h);
   oop p = JNIHandles::resolve(obj);
   assert_field_offset_sane(p, offset);
   HeapAccess<ON_UNKNOWN_OOP_REF>::oop_store_at(p, offset, x);
 } UNSAFE_END
 
-#if HOTSPOT_TARGET_CLASSLIB == 8
 JVM_ENTRY(jobject, Unsafe_GetReferenceVolatile(JNIEnv *env, jobject unsafe, jobject obj, jlong offset)) {
-#else
-UNSAFE_ENTRY(jobject, Unsafe_GetReferenceVolatile(JNIEnv *env, jobject unsafe, jobject obj, jlong offset)) {
-#endif
   oop p = JNIHandles::resolve(obj);
   assert_field_offset_sane(p, offset);
   oop v = HeapAccess<MO_SEQ_CST | ON_UNKNOWN_OOP_REF>::oop_load_at(p, offset);
   return JNIHandles::make_local(THREAD, v);
 } UNSAFE_END
 
-#if HOTSPOT_TARGET_CLASSLIB == 8
 JVM_ENTRY(void, Unsafe_PutReferenceVolatile(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, jobject x_h)) {
-#else
-UNSAFE_ENTRY(void, Unsafe_PutReferenceVolatile(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, jobject x_h)) {
-#endif
   oop x = JNIHandles::resolve(x_h);
   oop p = JNIHandles::resolve(obj);
   assert_field_offset_sane(p, offset);
@@ -378,20 +176,12 @@ UNSAFE_LEAF(void, Unsafe_FullFence(JNIEnv *env, jobject unsafe)) {
 
 ////// Allocation requests
 
-#if HOTSPOT_TARGET_CLASSLIB == 8
 JVM_ENTRY(jobject, Unsafe_AllocateInstance(JNIEnv *env, jobject unsafe, jclass cls)) {
-#else
-UNSAFE_ENTRY(jobject, Unsafe_AllocateInstance(JNIEnv *env, jobject unsafe, jclass cls)) {
-#endif
   instanceOop i = InstanceKlass::allocate_instance(JNIHandles::resolve_non_null(cls), CHECK_NULL);
   return JNIHandles::make_local(THREAD, i);
 } UNSAFE_END
 
-#if HOTSPOT_TARGET_CLASSLIB == 8
 JVM_ENTRY(jlong, Unsafe_AllocateMemory0(JNIEnv *env, jobject unsafe, jlong size)) {
-#else
-UNSAFE_ENTRY(jlong, Unsafe_AllocateMemory0(JNIEnv *env, jobject unsafe, jlong size)) {
-#endif
   size_t sz = (size_t)size;
 
 #if HOTSPOT_TARGET_CLASSLIB == 8
@@ -408,11 +198,7 @@ UNSAFE_ENTRY(jlong, Unsafe_AllocateMemory0(JNIEnv *env, jobject unsafe, jlong si
   return addr_to_java(x);
 } UNSAFE_END
 
-#if HOTSPOT_TARGET_CLASSLIB == 8
 JVM_ENTRY(jlong, Unsafe_ReallocateMemory0(JNIEnv *env, jobject unsafe, jlong addr, jlong size)) {
-#else
-UNSAFE_ENTRY(jlong, Unsafe_ReallocateMemory0(JNIEnv *env, jobject unsafe, jlong addr, jlong size)) {
-#endif
   void* p = addr_from_java(addr);
   size_t sz = (size_t)size;
 
@@ -423,21 +209,13 @@ UNSAFE_ENTRY(jlong, Unsafe_ReallocateMemory0(JNIEnv *env, jobject unsafe, jlong
   return addr_to_java(x);
 } UNSAFE_END
 
-#if HOTSPOT_TARGET_CLASSLIB == 8
 JVM_ENTRY(void, Unsafe_FreeMemory0(JNIEnv *env, jobject unsafe, jlong addr)) {
-#else
-UNSAFE_ENTRY(void, Unsafe_FreeMemory0(JNIEnv *env, jobject unsafe, jlong addr)) {
-#endif
   void* p = addr_from_java(addr);
 
   os::free(p);
 } UNSAFE_END
 
-#if HOTSPOT_TARGET_CLASSLIB == 8
 JVM_ENTRY(void, Unsafe_SetMemory0(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, jlong size, jbyte value)) {
-#else
-UNSAFE_ENTRY(void, Unsafe_SetMemory0(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, jlong size, jbyte value)) {
-#endif
   size_t sz = (size_t)size;
 
   oop base = JNIHandles::resolve(obj);
@@ -446,11 +224,7 @@ UNSAFE_ENTRY(void, Unsafe_SetMemory0(JNIEnv *env, jobject unsafe, jobject obj, j
   Copy::fill_to_memory_atomic(p, sz, value);
 } UNSAFE_END
 
-#if HOTSPOT_TARGET_CLASSLIB == 8
 JVM_ENTRY(void, Unsafe_CopyMemory0(JNIEnv *env, jobject unsafe, jobject srcObj, jlong srcOffset, jobject dstObj, jlong dstOffset, jlong size)) {
-#else
-UNSAFE_ENTRY(void, Unsafe_CopyMemory0(JNIEnv *env, jobject unsafe, jobject srcObj, jlong srcOffset, jobject dstObj, jlong dstOffset, jlong size)) {
-#endif
   size_t sz = (size_t)size;
 
   oop srcp = JNIHandles::resolve(srcObj);
@@ -773,21 +547,15 @@ static jclass Unsafe_DefineClass_impl(JNIEnv *env, jstring name, jbyteArray data
   return result;
 }
 
-#if HOTSPOT_TARGET_CLASSLIB == 8
+
 JVM_ENTRY(jclass, Unsafe_DefineClass0(JNIEnv *env, jobject unsafe, jstring name, jbyteArray data, int offset, int length, jobject loader, jobject pd)) {
-#else
-UNSAFE_ENTRY(jclass, Unsafe_DefineClass0(JNIEnv *env, jobject unsafe, jstring name, jbyteArray data, int offset, int length, jobject loader, jobject pd)) {
-#endif
   ThreadToNativeFromVM ttnfv(thread);
 
   return Unsafe_DefineClass_impl(env, name, data, offset, length, loader, pd);
 } UNSAFE_END
 
-#if HOTSPOT_TARGET_CLASSLIB == 8
+
 JVM_ENTRY(void, Unsafe_ThrowException(JNIEnv *env, jobject unsafe, jthrowable thr)) {
-#else
-UNSAFE_ENTRY(void, Unsafe_ThrowException(JNIEnv *env, jobject unsafe, jthrowable thr)) {
-#endif
   ThreadToNativeFromVM ttnfv(thread);
   env->Throw(thr);
 } UNSAFE_END
@@ -825,11 +593,7 @@ UNSAFE_ENTRY(jlong, Unsafe_CompareAndExchangeLong(JNIEnv *env, jobject unsafe, j
   }
 } UNSAFE_END
 
-#if HOTSPOT_TARGET_CLASSLIB == 8
 JVM_ENTRY(jboolean, Unsafe_CompareAndSetReference(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, jobject e_h, jobject x_h)) {
-#else
-UNSAFE_ENTRY(jboolean, Unsafe_CompareAndSetReference(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, jobject e_h, jobject x_h)) {
-#endif
   oop x = JNIHandles::resolve(x_h);
   oop e = JNIHandles::resolve(e_h);
   oop p = JNIHandles::resolve(obj);
@@ -838,11 +602,7 @@ UNSAFE_ENTRY(jboolean, Unsafe_CompareAndSetReference(JNIEnv *env, jobject unsafe
   return ret == e;
 } UNSAFE_END
 
-#if HOTSPOT_TARGET_CLASSLIB == 8
 JVM_ENTRY(jboolean, Unsafe_CompareAndSetInt(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, jint e, jint x)) {
-#else
-UNSAFE_ENTRY(jboolean, Unsafe_CompareAndSetInt(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, jint e, jint x)) {
-#endif
   oop p = JNIHandles::resolve(obj);
   if (p == NULL) {
     volatile jint* addr = (volatile jint*)index_oop_from_field_offset_long(p, offset);
@@ -853,11 +613,7 @@ UNSAFE_ENTRY(jboolean, Unsafe_CompareAndSetInt(JNIEnv *env, jobject unsafe, jobj
   }
 } UNSAFE_END
 
-#if HOTSPOT_TARGET_CLASSLIB == 8
 JVM_ENTRY(jboolean, Unsafe_CompareAndSetLong(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, jlong e, jlong x)) {
-#else
-UNSAFE_ENTRY(jboolean, Unsafe_CompareAndSetLong(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, jlong e, jlong x)) {
-#endif
   oop p = JNIHandles::resolve(obj);
   if (p == NULL) {
     volatile jlong* addr = (volatile jlong*)index_oop_from_field_offset_long(p, offset);
@@ -878,11 +634,7 @@ static void post_thread_park_event(EventThreadPark* event, const oop obj, jlong
   event->commit();
 }
 
-#if HOTSPOT_TARGET_CLASSLIB == 8
 JVM_ENTRY(void, Unsafe_Park(JNIEnv *env, jobject unsafe, jboolean isAbsolute, jlong time)) {
-#else
-UNSAFE_ENTRY(void, Unsafe_Park(JNIEnv *env, jobject unsafe, jboolean isAbsolute, jlong time)) {
-#endif
   HOTSPOT_THREAD_PARK_BEGIN((uintptr_t) thread->parker(), (int) isAbsolute, time);
   EventThreadPark event;
 
@@ -903,11 +655,7 @@ UNSAFE_ENTRY(void, Unsafe_Park(JNIEnv *env, jobject unsafe, jboolean isAbsolute,
   HOTSPOT_THREAD_PARK_END((uintptr_t) thread->parker());
 } UNSAFE_END
 
-#if HOTSPOT_TARGET_CLASSLIB == 8
 JVM_ENTRY(void, Unsafe_Unpark(JNIEnv *env, jobject unsafe, jobject jthread)) {
-#else
-UNSAFE_ENTRY(void, Unsafe_Unpark(JNIEnv *env, jobject unsafe, jobject jthread)) {
-#endif
   if (jthread != NULL) {
     ThreadsListHandle tlh;
     JavaThread* thr = NULL;
@@ -926,11 +674,7 @@ UNSAFE_ENTRY(void, Unsafe_Unpark(JNIEnv *env, jobject unsafe, jobject jthread))
 
 } UNSAFE_END
 
-#if HOTSPOT_TARGET_CLASSLIB == 8
 JVM_ENTRY(jint, Unsafe_GetLoadAverage0(JNIEnv *env, jobject unsafe, jdoubleArray loadavg, jint nelem)) {
-#else
-UNSAFE_ENTRY(jint, Unsafe_GetLoadAverage0(JNIEnv *env, jobject unsafe, jdoubleArray loadavg, jint nelem)) {
-#endif
   const int max_nelem = 3;
   double la[max_nelem];
   jint ret;
diff --git a/src/hotspot/share/prims/unsafe.inline.hpp b/src/hotspot/share/prims/unsafe.inline.hpp
index 55419d43437..d6f34f3a335 100644
--- a/src/hotspot/share/prims/unsafe.inline.hpp
+++ b/src/hotspot/share/prims/unsafe.inline.hpp
@@ -23,7 +23,7 @@
 #include "runtime/thread.hpp"
 
 #define MAX_OBJECT_SIZE \
-  ( arrayOopDesc::header_size(T_DOUBLE) * HeapWordSize \
+  ( arrayOopDesc::base_offset_in_bytes(T_DOUBLE) \
     + ((julong)max_jint * sizeof(double)) )
 
 
diff --git a/src/hotspot/share/prims/unsafe8.cpp b/src/hotspot/share/prims/unsafe8.cpp
index fb04f7307ed..470625c237d 100644
--- a/src/hotspot/share/prims/unsafe8.cpp
+++ b/src/hotspot/share/prims/unsafe8.cpp
@@ -552,7 +552,7 @@ extern "C" jclass Unsafe_DefineClass0(JNIEnv *env, jobject unsafe, jstring name,
 static Klass*
 Unsafe_DefineAnonymousClass_impl(JNIEnv *env,
                                  jclass host_class, jbyteArray data, jobjectArray cp_patches_jh,
-                                 HeapWord* *temp_alloc,
+                                 jbyte* *temp_alloc,
                                  TRAPS) {
 
   if (UsePerfData) {
@@ -564,8 +564,7 @@ Unsafe_DefineAnonymousClass_impl(JNIEnv *env,
   }
 
   jint length = typeArrayOop(JNIHandles::resolve_non_null(data))->length();
-  jint word_length = (length + sizeof(HeapWord)-1) / sizeof(HeapWord);
-  HeapWord* body = NEW_C_HEAP_ARRAY(HeapWord, word_length, mtInternal);
+  jbyte* body = NEW_C_HEAP_ARRAY(jbyte, length, mtInternal);
   if (body == NULL) {
     THROW_0(vmSymbols::java_lang_OutOfMemoryError());
   }
@@ -575,7 +574,7 @@ Unsafe_DefineAnonymousClass_impl(JNIEnv *env,
 
   {
     jbyte* array_base = typeArrayOop(JNIHandles::resolve_non_null(data))->byte_at_addr(0);
-    Copy::conjoint_words((HeapWord*) array_base, body, word_length);
+    Copy::conjoint_jbytes(array_base, body, length);
   }
 
   u1* class_bytes = (u1*) body;
@@ -635,7 +634,7 @@ UNSAFE_ENTRY(jclass, Unsafe_DefineAnonymousClass(JNIEnv *env, jobject unsafe, jc
   UnsafeWrapper("Unsafe_DefineAnonymousClass");
   ResourceMark rm(THREAD);
 
-  HeapWord* temp_alloc = NULL;
+  jbyte* temp_alloc = NULL;
 
   anon_klass = Unsafe_DefineAnonymousClass_impl(env, host_class, data,
                                                 cp_patches_jh,
@@ -646,7 +645,7 @@ UNSAFE_ENTRY(jclass, Unsafe_DefineAnonymousClass(JNIEnv *env, jobject unsafe, jc
 
   // try/finally clause:
   if (temp_alloc != NULL) {
-    FREE_C_HEAP_ARRAY(HeapWord, temp_alloc);
+    FREE_C_HEAP_ARRAY(jbyte, temp_alloc);
   }
 
   // The anonymous class loader data has been artificially been kept alive to
diff --git a/src/hotspot/share/prims/whitebox.cpp b/src/hotspot/share/prims/whitebox.cpp
index 88cd514a33d..2d4303d71b0 100644
--- a/src/hotspot/share/prims/whitebox.cpp
+++ b/src/hotspot/share/prims/whitebox.cpp
@@ -80,6 +80,7 @@
 #include "runtime/interfaceSupport.inline.hpp"
 #include "runtime/javaCalls.hpp"
 #include "runtime/jniHandles.inline.hpp"
+#include "runtime/lockStack.hpp"
 #include "runtime/os.hpp"
 #include "runtime/stackFrameStream.inline.hpp"
 #include "runtime/sweeper.hpp"
@@ -1755,6 +1756,14 @@ WB_ENTRY(jboolean, WB_IsMonitorInflated(JNIEnv* env, jobject wb, jobject obj))
   return (jboolean) obj_oop->mark().has_monitor();
 WB_END
 
+WB_ENTRY(jint, WB_getLockStackCapacity(JNIEnv* env))
+  return (jint) LockStack::CAPACITY;
+WB_END
+
+WB_ENTRY(jboolean, WB_supportsRecursiveLightweightLocking(JNIEnv* env))
+  return (jboolean) VM_Version::supports_recursive_lightweight_locking();
+WB_END
+
 WB_ENTRY(jboolean, WB_DeflateIdleMonitors(JNIEnv* env, jobject wb))
   log_info(monitorinflation)("WhiteBox initiated DeflateIdleMonitors");
   return ObjectSynchronizer::request_deflate_idle_monitors();
@@ -2353,6 +2362,14 @@ WB_ENTRY(void, WB_UnlockCritical(JNIEnv* env, jobject wb))
   GCLocker::unlock_critical(thread);
 WB_END
 
+WB_ENTRY(void, WB_PreTouchMemory(JNIEnv* env, jobject wb, jlong addr, jlong size))
+  void* const from = (void*)addr;
+  void* const to = (void*)(addr + size);
+  if (from > to) {
+    os::pretouch_memory(from, to, os::vm_page_size());
+  }
+WB_END
+
 #define CC (char*)
 
 static JNINativeMethod methods[] = {
@@ -2535,6 +2552,8 @@ static JNINativeMethod methods[] = {
                                                       (void*)&WB_AddModuleExportsToAll },
   {CC"deflateIdleMonitors", CC"()Z",                  (void*)&WB_DeflateIdleMonitors },
   {CC"isMonitorInflated0", CC"(Ljava/lang/Object;)Z", (void*)&WB_IsMonitorInflated  },
+  {CC"getLockStackCapacity", CC"()I",                 (void*)&WB_getLockStackCapacity },
+  {CC"supportsRecursiveLightweightLocking", CC"()Z",  (void*)&WB_supportsRecursiveLightweightLocking },
   {CC"forceSafepoint",     CC"()V",                   (void*)&WB_ForceSafepoint     },
   {CC"getConstantPool0",   CC"(Ljava/lang/Class;)J",  (void*)&WB_GetConstantPool    },
   {CC"getConstantPoolCacheIndexTag0", CC"()I",  (void*)&WB_GetConstantPoolCacheIndexTag},
@@ -2624,6 +2643,7 @@ static JNINativeMethod methods[] = {
 
   {CC"lockCritical",    CC"()V",                      (void*)&WB_LockCritical},
   {CC"unlockCritical",  CC"()V",                      (void*)&WB_UnlockCritical},
+  {CC"preTouchMemory",  CC"(JJ)V",                    (void*)&WB_PreTouchMemory},
 };
 
 
diff --git a/src/hotspot/share/runtime/abstract_vm_version.hpp b/src/hotspot/share/runtime/abstract_vm_version.hpp
index 6bfa32398cb..86880a3c6f1 100644
--- a/src/hotspot/share/runtime/abstract_vm_version.hpp
+++ b/src/hotspot/share/runtime/abstract_vm_version.hpp
@@ -1,7 +1,7 @@
 // This project is a modified version of OpenJDK, licensed under GPL v2.
 // Modifications Copyright (C) 2025 ByteDance Inc.
 /*
- * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -196,6 +196,9 @@ class Abstract_VM_Version: AllStatic {
   // Does platform support stack watermark barriers for concurrent stack processing?
   constexpr static bool supports_stack_watermark_barrier() { return false; }
 
+  // Is recursive lightweight locking implemented for this platform?
+  constexpr static bool supports_recursive_lightweight_locking() { return false; }
+
   static bool print_matching_lines_from_file(const char* filename, outputStream* st, const char* keywords_to_match[]);
 };
 
diff --git a/src/hotspot/share/runtime/arguments.cpp b/src/hotspot/share/runtime/arguments.cpp
index 2ef1b20309f..5408d26f5ae 100644
--- a/src/hotspot/share/runtime/arguments.cpp
+++ b/src/hotspot/share/runtime/arguments.cpp
@@ -2046,6 +2046,22 @@ bool Arguments::check_vm_args_consistency() {
   }
 #endif
 
+#if !defined(X86) && !defined(AARCH64)
+  if (LockingMode == LM_LIGHTWEIGHT) {
+    FLAG_SET_CMDLINE(LockingMode, LM_LEGACY);
+    warning("New lightweight locking not supported on this platform");
+  }
+#endif
+
+  if (UseHeavyMonitors) {
+    if (FLAG_IS_CMDLINE(LockingMode) && LockingMode != LM_MONITOR) {
+      jio_fprintf(defaultStream::error_stream(),
+                  "Conflicting -XX:+UseHeavyMonitors and -XX:LockingMode=%d flags", LockingMode);
+      return false;
+    }
+    FLAG_SET_CMDLINE(LockingMode, LM_MONITOR);
+  }
+
   return status;
 }
 
@@ -3239,6 +3255,61 @@ jint Arguments::finalize_vm_init_args(bool patch_mod_javabase) {
   UNSUPPORTED_OPTION(ShowRegistersOnAssert);
 #endif // CAN_SHOW_REGISTERS_ON_ASSERT
 
+#ifdef _LP64
+  if (UseCompactObjectHeaders && UseZGC) {
+    if (FLAG_IS_CMDLINE(UseCompactObjectHeaders)) {
+      warning("ZGC does not work with compact object headers, disabling UseCompactObjectHeaders");
+    }
+    FLAG_SET_DEFAULT(UseCompactObjectHeaders, false);
+  }
+
+  if (UseCompactObjectHeaders && FLAG_IS_CMDLINE(UseCompressedClassPointers) && !UseCompressedClassPointers) {
+    // If user specifies -UseCompressedClassPointers, disable compact headers with a warning.
+    warning("Compact object headers require compressed class pointers. Disabling compact object headers.");
+    FLAG_SET_DEFAULT(UseCompactObjectHeaders, false);
+  }
+  if (UseCompactObjectHeaders && LockingMode == LM_LEGACY) {
+    FLAG_SET_DEFAULT(LockingMode, LM_LIGHTWEIGHT);
+  }
+  if (UseCompactObjectHeaders && UseBiasedLocking) {
+    FLAG_SET_DEFAULT(UseBiasedLocking, false);
+  }
+  if (UseCompactObjectHeaders && !UseAltGCForwarding) {
+    FLAG_SET_DEFAULT(UseAltGCForwarding, true);
+  }
+#endif
+
+  if (UseObjectMonitorTable && LockingMode != LM_LIGHTWEIGHT) {
+    // ObjectMonitorTable requires lightweight locking.
+    FLAG_SET_DEFAULT(LockingMode, LM_LIGHTWEIGHT);
+  }
+
+#ifdef _LP64
+  if (UseCompactObjectHeaders && FLAG_IS_CMDLINE(UseCompressedClassPointers) && !UseCompressedClassPointers) {
+    warning("Compact object headers require compressed class pointers. Disabling compact object headers.");
+    FLAG_SET_DEFAULT(UseCompactObjectHeaders, false);
+  }
+  if (UseCompactObjectHeaders && LockingMode != LM_LIGHTWEIGHT) {
+    FLAG_SET_DEFAULT(LockingMode, LM_LIGHTWEIGHT);
+  }
+  if (UseCompactObjectHeaders && !UseObjectMonitorTable) {
+    // If UseCompactObjectHeaders is on the command line, turn on UseObjectMonitorTable.
+    if (FLAG_IS_CMDLINE(UseCompactObjectHeaders)) {
+      FLAG_SET_DEFAULT(UseObjectMonitorTable, true);
+
+    // If UseObjectMonitorTable is on the command line, turn off UseCompactObjectHeaders.
+    } else if (FLAG_IS_CMDLINE(UseObjectMonitorTable)) {
+      FLAG_SET_DEFAULT(UseCompactObjectHeaders, false);
+    // If neither on the command line, the defaults are incompatible, but turn on UseObjectMonitorTable.
+    } else {
+      FLAG_SET_DEFAULT(UseObjectMonitorTable, true);
+    }
+  }
+  if (UseCompactObjectHeaders && !UseCompressedClassPointers) {
+    FLAG_SET_DEFAULT(UseCompressedClassPointers, true);
+  }
+#endif
+
   return JNI_OK;
 }
 
@@ -4123,9 +4194,6 @@ jint Arguments::apply_ergo() {
 #ifdef COMPILER1
       || !UseFastLocking
 #endif // COMPILER1
-#if INCLUDE_JVMCI
-      || !JVMCIUseFastLocking
-#endif
     ) {
     if (!FLAG_IS_DEFAULT(UseBiasedLocking) && UseBiasedLocking) {
       // flag set to true on command line; warn the user that they
diff --git a/src/hotspot/share/runtime/basicLock.cpp b/src/hotspot/share/runtime/basicLock.cpp
index 623c3d1f7e8..d6a07cea349 100644
--- a/src/hotspot/share/runtime/basicLock.cpp
+++ b/src/hotspot/share/runtime/basicLock.cpp
@@ -24,16 +24,24 @@
 
 #include "precompiled.hpp"
 #include "oops/oop.inline.hpp"
-#include "runtime/basicLock.hpp"
+#include "runtime/basicLock.inline.hpp"
+#include "runtime/objectMonitor.hpp"
 #include "runtime/synchronizer.hpp"
 
 void BasicLock::print_on(outputStream* st, oop owner) const {
   st->print("monitor");
-  markWord mark_word = displaced_header();
-  if (mark_word.value() != 0) {
-    // Print monitor info if there's an owning oop and it refers to this BasicLock.
-    bool print_monitor_info = (owner != NULL) && (owner->mark() == markWord::from_pointer((void*)this));
-    mark_word.print_on(st, print_monitor_info);
+  if (UseObjectMonitorTable) {
+    ObjectMonitor* mon = object_monitor_cache();
+    if (mon != nullptr) {
+      mon->print_on(st);
+    }
+  } else if (LockingMode == LM_LEGACY) {
+    markWord mark_word = displaced_header();
+    if (mark_word.value() != 0) {
+      // Print monitor info if there's an owning oop and it refers to this BasicLock.
+      bool print_monitor_info = (owner != nullptr) && (owner->mark() == markWord::from_pointer((void*)this));
+      mark_word.print_on(st, print_monitor_info);
+    }
   }
 }
 
@@ -66,19 +74,31 @@ void BasicLock::move_to(oop obj, BasicLock* dest) {
   // is small (given the support for inflated fast-path locking in the fast_lock, etc)
   // we'll leave that optimization for another time.
 
-  if (displaced_header().is_neutral()) {
-    // The object is locked and the resulting ObjectMonitor* will also be
-    // locked so it can't be async deflated until ownership is dropped.
-    ObjectSynchronizer::inflate_helper(obj);
-    // WARNING: We cannot put a check here, because the inflation
-    // will not update the displaced header. Once BasicLock is inflated,
-    // no one should ever look at its content.
-  } else {
-    // Typically the displaced header will be 0 (recursive stack lock) or
-    // unused_mark.  Naively we'd like to assert that the displaced mark
-    // value is either 0, neutral, or 3.  But with the advent of the
-    // store-before-CAS avoidance in fast_lock/compiler_lock_object
-    // we can find any flavor mark in the displaced mark.
+  if (LockingMode == LM_LEGACY) {
+    if (displaced_header().is_neutral()) {
+      // The object is locked and the resulting ObjectMonitor* will also be
+      // locked so it can't be async deflated until ownership is dropped.
+      ObjectSynchronizer::inflate_helper(obj);
+      // WARNING: We cannot put a check here, because the inflation
+      // will not update the displaced header. Once BasicLock is inflated,
+      // no one should ever look at its content.
+    } else {
+      // Typically the displaced header will be 0 (recursive stack lock) or
+      // unused_mark.  Naively we'd like to assert that the displaced mark
+      // value is either 0, neutral, or 3.  But with the advent of the
+      // store-before-CAS avoidance in fast_lock/compiler_lock_object
+      // we can find any flavor mark in the displaced mark.
+    }
+    dest->set_displaced_header(displaced_header());
+  } else if (UseObjectMonitorTable) {
+    // Preserve the ObjectMonitor*, the cache is cleared when a box is reused
+    // and only read while the lock is held, so no stale ObjectMonitor* is
+    // encountered.
+    dest->set_object_monitor_cache(object_monitor_cache());
   }
-  dest->set_displaced_header(displaced_header());
+#ifdef ASSERT
+  else {
+    dest->set_bad_metadata_deopt();
+  }
+#endif
 }
diff --git a/src/hotspot/share/runtime/basicLock.hpp b/src/hotspot/share/runtime/basicLock.hpp
index 18236b9e2a3..acf1b6c1404 100644
--- a/src/hotspot/share/runtime/basicLock.hpp
+++ b/src/hotspot/share/runtime/basicLock.hpp
@@ -28,27 +28,45 @@
 #include "oops/markWord.hpp"
 #include "runtime/atomic.hpp"
 #include "runtime/handles.hpp"
+#include "utilities/globalDefinitions.hpp"
 
 class BasicLock {
   friend class VMStructs;
   friend class JVMCIVMStructs;
  private:
-  volatile markWord _displaced_header;
+  // * For LM_MONITOR
+  // Unused.
+  // * For LM_LEGACY
+  // This is either the actual displaced header from a locked object, or
+  // a sentinel zero value indicating a recursive stack-lock.
+  // * For LM_LIGHTWEIGHT
+  // Used as a cache the ObjectMonitor* used when locking. Must either
+  // be nullptr or the ObjectMonitor* used when locking.
+  volatile uintptr_t _metadata;
+
+  uintptr_t get_metadata() const { return Atomic::load(&_metadata); }
+  void set_metadata(uintptr_t value) { Atomic::store(&_metadata, value); }
+  static int metadata_offset_in_bytes() { return (int)offset_of(BasicLock, _metadata); }
+
  public:
-  markWord displaced_header() const {
-    return Atomic::load(&_displaced_header);
-  }
+  // LM_MONITOR
+  void set_bad_metadata_deopt() { set_metadata(badDispHeaderDeopt); }
+
+  // LM_LEGACY
+  inline markWord displaced_header() const;
+  void set_displaced_header(markWord header);
+  static int displaced_header_offset_in_bytes() { return metadata_offset_in_bytes(); }
 
-  void set_displaced_header(markWord header) {
-    Atomic::store(&_displaced_header, header);
-  }
+  // LM_LIGHTWEIGHT
+  inline ObjectMonitor* object_monitor_cache() const;
+  inline void clear_object_monitor_cache();
+  inline void set_object_monitor_cache(ObjectMonitor* mon);
+  static int object_monitor_cache_offset_in_bytes() { return metadata_offset_in_bytes(); }
 
   void print_on(outputStream* st, oop owner) const;
 
   // move a basic lock (used during deoptimization
   void move_to(oop obj, BasicLock* dest);
-
-  static int displaced_header_offset_in_bytes()       { return offset_of(BasicLock, _displaced_header); }
 };
 
 // A BasicObjectLock associates a specific Java object with a BasicLock.
@@ -79,6 +97,8 @@ class BasicObjectLock {
   // GC support
   void oops_do(OopClosure* f) { f->do_oop(&_obj); }
 
+  static ByteSize lock_offset()                       { return byte_offset_of(BasicObjectLock, _lock); }
+
   static int obj_offset_in_bytes()                    { return offset_of(BasicObjectLock, _obj);  }
   static int lock_offset_in_bytes()                   { return offset_of(BasicObjectLock, _lock); }
 };
diff --git a/src/hotspot/share/runtime/basicLock.inline.hpp b/src/hotspot/share/runtime/basicLock.inline.hpp
new file mode 100644
index 00000000000..66f3a175c96
--- /dev/null
+++ b/src/hotspot/share/runtime/basicLock.inline.hpp
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_RUNTIME_BASICLOCK_INLINE_HPP
+#define SHARE_RUNTIME_BASICLOCK_INLINE_HPP
+
+#include "runtime/basicLock.hpp"
+
+inline markWord BasicLock::displaced_header() const {
+  assert(LockingMode == LM_LEGACY, "must be");
+  return markWord(get_metadata());
+}
+
+inline void BasicLock::set_displaced_header(markWord header) {
+  assert(LockingMode == LM_LEGACY, "must be");
+  Atomic::store(&_metadata, header.value());
+}
+
+inline ObjectMonitor* BasicLock::object_monitor_cache() const {
+  assert(UseObjectMonitorTable, "must be");
+  return reinterpret_cast<ObjectMonitor*>(get_metadata());
+}
+
+inline void BasicLock::clear_object_monitor_cache() {
+  assert(UseObjectMonitorTable, "must be");
+  set_metadata(0);
+}
+
+inline void BasicLock::set_object_monitor_cache(ObjectMonitor* mon) {
+  assert(UseObjectMonitorTable, "must be");
+  set_metadata(reinterpret_cast<uintptr_t>(mon));
+}
+
+#endif // SHARE_RUNTIME_BASICLOCK_INLINE_HPP
diff --git a/src/hotspot/share/runtime/biasedLocking.cpp b/src/hotspot/share/runtime/biasedLocking.cpp
index af43770c613..0211ea8de15 100644
--- a/src/hotspot/share/runtime/biasedLocking.cpp
+++ b/src/hotspot/share/runtime/biasedLocking.cpp
@@ -33,6 +33,7 @@
 #include "oops/oop.inline.hpp"
 #include "runtime/atomic.hpp"
 #include "runtime/basicLock.hpp"
+#include "runtime/basicLock.inline.hpp"
 #include "runtime/biasedLocking.hpp"
 #include "runtime/handles.inline.hpp"
 #include "runtime/handshake.hpp"
diff --git a/src/hotspot/share/runtime/deoptimization.cpp b/src/hotspot/share/runtime/deoptimization.cpp
index 852186aa272..fbe44fa0307 100644
--- a/src/hotspot/share/runtime/deoptimization.cpp
+++ b/src/hotspot/share/runtime/deoptimization.cpp
@@ -56,6 +56,7 @@
 #include "prims/vectorSupport.hpp"
 #include "prims/methodHandles.hpp"
 #include "runtime/atomic.hpp"
+#include "runtime/basicLock.inline.hpp"
 #include "runtime/biasedLocking.hpp"
 #include "runtime/deoptimization.hpp"
 #include "runtime/escapeBarrier.hpp"
@@ -66,6 +67,8 @@
 #include "runtime/interfaceSupport.inline.hpp"
 #include "runtime/jniHandles.inline.hpp"
 #include "runtime/keepStackGCProcessed.hpp"
+#include "runtime/lightweightSynchronizer.hpp"
+#include "runtime/lockStack.inline.hpp"
 #include "runtime/objectMonitor.inline.hpp"
 #include "runtime/osThread.hpp"
 #include "runtime/safepointVerifiers.hpp"
@@ -74,6 +77,7 @@
 #include "runtime/stackFrameStream.inline.hpp"
 #include "runtime/stackWatermarkSet.hpp"
 #include "runtime/stubRoutines.hpp"
+#include "runtime/synchronizer.inline.hpp"
 #include "runtime/thread.hpp"
 #include "runtime/threadSMR.hpp"
 #include "runtime/threadWXSetters.inline.hpp"
@@ -254,7 +258,8 @@ static void restore_eliminated_locks(JavaThread* thread, GrowableArray<compiledV
 #ifndef PRODUCT
   bool first = true;
 #endif
-  for (int i = 0; i < chunk->length(); i++) {
+  // Start locking from outermost/oldest frame
+  for (int i = (chunk->length() - 1); i >= 0; i--) {
     compiledVFrame* cvf = chunk->at(i);
     assert (cvf->scope() != NULL,"expect only compiled java frames");
     GrowableArray<MonitorInfo*>* monitors = cvf->monitors();
@@ -1472,7 +1477,7 @@ bool Deoptimization::relock_objects(JavaThread* thread, GrowableArray<MonitorInf
           markWord unbiased_prototype = markWord::prototype().set_age(mark.age());
           obj->set_mark(unbiased_prototype);
         } else if (exec_mode == Unpack_none) {
-          if (mark.has_locker() && fr.sp() > (intptr_t*)mark.locker()) {
+          if (LockingMode == LM_LEGACY && mark.has_locker() && fr.sp() > (intptr_t*)mark.locker()) {
             // With exec_mode == Unpack_none obj may be thread local and locked in
             // a callee frame. In this case the bias was revoked before in revoke_for_object_deoptimization().
             // Make the lock in the callee a recursive lock and restore the displaced header.
@@ -1485,15 +1490,40 @@ bool Deoptimization::relock_objects(JavaThread* thread, GrowableArray<MonitorInf
             ObjectMonitor* waiting_monitor = deoptee_thread->current_waiting_monitor();
             if (waiting_monitor != NULL && waiting_monitor->object() == obj()) {
               assert(fr.is_deoptimized_frame(), "frame must be scheduled for deoptimization");
-              mon_info->lock()->set_displaced_header(markWord::unused_mark());
+              if (LockingMode == LM_LEGACY) {
+                mon_info->lock()->set_displaced_header(markWord::unused_mark());
+              } else if (UseObjectMonitorTable) {
+                mon_info->lock()->clear_object_monitor_cache();
+              }
+#ifdef ASSERT
+              else {
+                assert(LockingMode == LM_MONITOR || !UseObjectMonitorTable, "must be");
+                mon_info->lock()->set_bad_metadata_deopt();
+              }
+#endif
               JvmtiDeferredUpdates::inc_relock_count_after_wait(deoptee_thread);
               continue;
             }
           }
         }
         BasicLock* lock = mon_info->lock();
-        ObjectSynchronizer::enter(obj, lock, deoptee_thread);
-        assert(mon_info->owner()->is_locked(), "object must be locked now");
+        if (LockingMode == LM_LIGHTWEIGHT) {
+          // We have lost information about the correct state of the lock stack.
+          // Entering may create an invalid lock stack. Inflate the lock if it
+          // was fast_locked to restore the valid lock stack.
+          ObjectSynchronizer::enter_for(obj, lock, deoptee_thread);
+          if (deoptee_thread->lock_stack().contains(obj())) {
+            LightweightSynchronizer::inflate_fast_locked_object(obj(), deoptee_thread, thread,
+                                                                ObjectSynchronizer::InflateCause::inflate_cause_vm_internal);
+          }
+          assert(mon_info->owner()->is_locked(), "object must be locked now");
+          assert(obj->mark().has_monitor(), "must be");
+          assert(!deoptee_thread->lock_stack().contains(obj()), "must be");
+          assert(ObjectSynchronizer::read_monitor(thread, obj(), obj->mark())->owner() == deoptee_thread, "must be");
+        } else {
+          ObjectSynchronizer::enter_for(obj, lock, deoptee_thread);
+          assert(mon_info->owner()->is_locked(), "object must be locked now");
+        }
       }
     }
   }
@@ -1600,7 +1630,8 @@ void Deoptimization::pop_frames_failed_reallocs(JavaThread* thread, vframeArray*
   for (int i = 0; i < array->frames(); i++) {
     MonitorChunk* monitors = array->element(i)->monitors();
     if (monitors != NULL) {
-      for (int j = 0; j < monitors->number_of_monitors(); j++) {
+      // Unlock in reverse order starting from most nested monitor.
+      for (int j = (monitors->number_of_monitors() - 1); j >= 0; j--) {
         BasicObjectLock* src = monitors->at(j);
         if (src->obj() != NULL) {
           ObjectSynchronizer::exit(src->obj(), src->lock(), thread);
diff --git a/src/hotspot/share/runtime/globals.hpp b/src/hotspot/share/runtime/globals.hpp
index a18c97b065b..e7549bc2352 100644
--- a/src/hotspot/share/runtime/globals.hpp
+++ b/src/hotspot/share/runtime/globals.hpp
@@ -131,6 +131,9 @@ const size_t minimumSymbolTableSize = 1024;
           "Use 32-bit class pointers in 64-bit VM. "                        \
           "lp64_product means flag is always constant in 32 bit VM")        \
                                                                             \
+  product(bool, UseCompactObjectHeaders, false, EXPERIMENTAL,               \
+          "Use 64-bit object headers instead of 96-bit headers")            \
+                                                                            \
   product(intx, ObjectAlignmentInBytes, 8,                                  \
           "Default object alignment in bytes, 8 is minimum")                \
           range(8, 256)                                                     \
@@ -148,6 +151,7 @@ const size_t minimumSymbolTableSize = 1024;
                            constraint)
 const bool UseCompressedOops = false;
 const bool UseCompressedClassPointers = false;
+const bool UseCompactObjectHeaders = false;
 const intx ObjectAlignmentInBytes = 8;
 
 #endif // _LP64
@@ -2095,9 +2099,40 @@ const intx ObjectAlignmentInBytes = 8;
              "Mark all threads after a safepoint, and clear on a modify "   \
              "fence. Add cleanliness checks.")                              \
                                                                             \
+  product(bool, HeapObjectStats, false, DIAGNOSTIC,                         \
+             "Enable gathering of heap object statistics")                  \
+                                                                            \
+  product(size_t, HeapObjectStatsSamplingInterval, 500, DIAGNOSTIC,         \
+             "Heap object statistics sampling interval (ms)")               \
+                                                                            \
+  product(int, LockingMode, LM_LEGACY, EXPERIMENTAL,                        \
+          "Select locking mode: "                                           \
+          "0: monitors only (LM_MONITOR), "                                 \
+          "1: monitors & legacy stack-locking (LM_LEGACY, default), "       \
+          "2: monitors & new lightweight locking (LM_LIGHTWEIGHT)")         \
+          range(0, 2)                                                       \
+                                                                            \
+  product(bool, UseObjectMonitorTable, false, DIAGNOSTIC,                   \
+          "With Lightweight Locking mode, use a table to record inflated "  \
+          "monitors rather than the first word of the object.")             \
+                                                                            \
+  product(int, LightweightFastLockingSpins, 13, DIAGNOSTIC,                 \
+          "Specifies the number of time lightweight fast locking will "     \
+          "attempt to CAS the markWord before inflating. Between each "     \
+          "CAS it will spin for exponentially more time, resulting in "     \
+          "a total number of spins on the order of O(2^value)")             \
+          range(1, 30)                                                      \
+                                                                            \
   develop(bool, TraceOptimizedUpcallStubs, false,                              \
                 "Trace optimized upcall stub generation")                      \
                                                                             \
+  product(uint, TrimNativeHeapInterval, 0, EXPERIMENTAL,                    \
+          "Interval, in ms, at which the JVM will trim the native heap if " \
+          "the platform supports that. Lower values will reclaim memory "   \
+          "more eagerly at the cost of higher overhead. A value of 0 "      \
+          "(default) disables native heap trimming.")                       \
+          range(0, UINT_MAX)                                                \
+                                                                            \
   CLASSLIB8_ONLY(develop(bool, VerifyLatin1NamesOnly, false,                \
                  "Ensure class/method/field names are encoded in Latin1"))  \
                                                                             \
diff --git a/src/hotspot/share/runtime/java.cpp b/src/hotspot/share/runtime/java.cpp
index 8f8b6bd6cfd..5dc47c7c245 100644
--- a/src/hotspot/share/runtime/java.cpp
+++ b/src/hotspot/share/runtime/java.cpp
@@ -71,6 +71,7 @@
 #include "runtime/task.hpp"
 #include "runtime/thread.inline.hpp"
 #include "runtime/timer.hpp"
+#include "runtime/trimNativeHeap.hpp"
 #include "runtime/vmOperations.hpp"
 #include "runtime/vmThread.hpp"
 #include "runtime/vm_version.hpp"
@@ -467,6 +468,8 @@ void before_exit(JavaThread* thread, bool halt) {
     StringDedup::stop();
   }
 
+  NativeHeapTrimmer::cleanup();
+
   // Stop concurrent GC threads
   Universe::heap()->stop();
 
diff --git a/src/hotspot/share/runtime/lightweightSynchronizer.cpp b/src/hotspot/share/runtime/lightweightSynchronizer.cpp
new file mode 100644
index 00000000000..c1a9e366759
--- /dev/null
+++ b/src/hotspot/share/runtime/lightweightSynchronizer.cpp
@@ -0,0 +1,1211 @@
+/*
+ * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+
+#include "classfile/vmSymbols.hpp"
+#include "thread.inline.hpp"
+#include "jfrfiles/jfrEventClasses.hpp"
+#include "logging/log.hpp"
+#include "logging/logStream.hpp"
+#include "memory/resourceArea.hpp"
+#include "memory/allocation.hpp"
+#include "oops/oop.inline.hpp"
+#include "oops/weakHandle.inline.hpp"
+#include "runtime/atomic.hpp"
+#include "runtime/basicLock.inline.hpp"
+#include "runtime/globals_extension.hpp"
+#include "runtime/interfaceSupport.inline.hpp"
+#include "runtime/thread.hpp"
+#include "runtime/lightweightSynchronizer.hpp"
+#include "runtime/lockStack.inline.hpp"
+#include "runtime/mutexLocker.hpp"
+#include "runtime/objectMonitor.hpp"
+#include "runtime/objectMonitor.inline.hpp"
+#include "runtime/os.hpp"
+#include "runtime/perfData.inline.hpp"
+#include "runtime/safepointMechanism.inline.hpp"
+#include "runtime/safepointVerifiers.hpp"
+#include "runtime/synchronizer.inline.hpp"
+#include "runtime/timerTrace.hpp"
+#include "runtime/trimNativeHeap.hpp"
+#include "utilities/concurrentHashTable.inline.hpp"
+#include "utilities/concurrentHashTableTasks.inline.hpp"
+#include "utilities/globalDefinitions.hpp"
+
+
+//
+// Lightweight synchronization.
+//
+// When the lightweight synchronization needs to use a monitor the link
+// between the object and the monitor is stored in a concurrent hash table
+// instead of in the mark word. This has the benefit that it further decouples
+// the mark word from the synchronization code.
+//
+
+// ConcurrentHashTable storing links from objects to ObjectMonitors
+class ObjectMonitorWorld : public CHeapObj<MEMFLAGS::mtObjectMonitor> {
+  struct Config {
+    using Value = ObjectMonitor*;
+    static uintx get_hash(Value const& value, bool* is_dead) {
+      return (uintx)value->hash();
+    }
+    static void* allocate_node(void* context, size_t size, Value const& value) {
+      reinterpret_cast<ObjectMonitorWorld*>(context)->inc_table_count();
+      return AllocateHeap(size, MEMFLAGS::mtObjectMonitor);
+    };
+    static void free_node(void* context, void* memory, Value const& value) {
+      reinterpret_cast<ObjectMonitorWorld*>(context)->dec_table_count();
+      FreeHeap(memory);
+    }
+  };
+  using ConcurrentTable = ConcurrentHashTable<Config, MEMFLAGS::mtObjectMonitor>;
+
+  ConcurrentTable* _table;
+  volatile size_t _table_count;
+  size_t _table_size;
+  volatile bool _resize;
+
+  class Lookup : public StackObj {
+    oop _obj;
+
+  public:
+    Lookup(oop obj) : _obj(obj) {}
+
+    uintx get_hash() const {
+      uintx hash = _obj->mark().hash();
+      assert(hash != 0, "should have a hash");
+      return hash;
+    }
+
+    bool equals(ObjectMonitor** value) {
+      // The entry is going to be removed soon.
+      assert(*value != nullptr, "must be");
+      return (*value)->object_refers_to(_obj);
+    }
+
+    bool is_dead(ObjectMonitor** value) {
+      assert(*value != nullptr, "must be");
+      return (*value)->object_is_cleared();
+    }
+  };
+
+  class LookupMonitor : public StackObj {
+    ObjectMonitor* _monitor;
+
+  public:
+    LookupMonitor(ObjectMonitor* monitor) : _monitor(monitor) {}
+
+    uintx get_hash() const {
+      return _monitor->hash();
+    }
+
+    bool equals(ObjectMonitor** value) {
+      return (*value) == _monitor;
+    }
+
+    bool is_dead(ObjectMonitor** value) {
+      assert(*value != nullptr, "must be");
+      return (*value)->object_is_dead();
+    }
+  };
+
+  void inc_table_count() {
+    Atomic::inc(&_table_count);
+  }
+
+  void dec_table_count() {
+    Atomic::inc(&_table_count);
+  }
+
+  double get_load_factor() {
+    return (double)_table_count/(double)_table_size;
+  }
+
+  size_t table_size(Thread* current = Thread::current()) {
+    return ((size_t)1) << _table->get_size_log2(current);
+  }
+
+  static size_t max_log_size() {
+    // TODO[OMWorld]: Evaluate the max size.
+    // TODO[OMWorld]: Need to fix init order to use Universe::heap()->max_capacity();
+    //                Using MaxHeapSize directly this early may be wrong, and there
+    //                are definitely rounding errors (alignment).
+    const size_t max_capacity = MaxHeapSize;
+    const size_t min_object_size = CollectedHeap::min_dummy_object_size() * HeapWordSize;
+    const size_t max_objects = max_capacity / MAX2(MinObjAlignmentInBytes, checked_cast<int>(min_object_size));
+    const size_t log_max_objects = log2i_graceful(max_objects);
+
+    return MAX2(MIN2<size_t>(SIZE_BIG_LOG2, log_max_objects), min_log_size());
+  }
+
+  static size_t min_log_size() {
+    // ~= log(AvgMonitorsPerThreadEstimate default)
+    return 10;
+  }
+
+  template<typename V>
+  static size_t clamp_log_size(V log_size) {
+    return MAX2(MIN2(log_size, checked_cast<V>(max_log_size())), checked_cast<V>(min_log_size()));
+  }
+
+  static size_t initial_log_size() {
+    const size_t estimate = log2i(MAX2(os::processor_count(), 1)) + log2i(MAX2(AvgMonitorsPerThreadEstimate, size_t(1)));
+    return clamp_log_size(estimate);
+  }
+
+  static size_t grow_hint () {
+    return ConcurrentTable::DEFAULT_GROW_HINT;
+  }
+
+public:
+  ObjectMonitorWorld()
+  : _table(new ConcurrentTable(initial_log_size(),
+                               max_log_size(),
+                               grow_hint(),
+                               this)),
+    _table_count(0),
+    _table_size(table_size()),
+    _resize(false) {}
+
+  void verify_monitor_get_result(oop obj, ObjectMonitor* monitor) {
+#ifdef ASSERT
+    if (SafepointSynchronize::is_at_safepoint()) {
+      bool has_monitor = obj->mark().has_monitor();
+      assert(has_monitor == (monitor != nullptr),
+          "Inconsistency between markWord and OMW table has_monitor: %s monitor: " PTR_FORMAT,
+          BOOL_TO_STR(has_monitor), p2i(monitor));
+    }
+#endif
+  }
+
+  ObjectMonitor* monitor_get(Thread* current, oop obj) {
+    ObjectMonitor* result = nullptr;
+    Lookup lookup_f(obj);
+    auto found_f = [&](ObjectMonitor** found) {
+      assert((*found)->object_peek() == obj, "must be");
+      result = *found;
+    };
+    _table->get(current, lookup_f, found_f);
+    verify_monitor_get_result(obj, result);
+    return result;
+  }
+
+  void try_notify_grow() {
+    if (!_table->is_max_size_reached() && !Atomic::load(&_resize)) {
+      Atomic::store(&_resize, true);
+      if (Service_lock->try_lock()) {
+        Service_lock->notify();
+        Service_lock->unlock();
+      }
+    }
+  }
+
+  bool should_shrink() {
+    // No implemented;
+    return false;
+  }
+
+  static constexpr double GROW_LOAD_FACTOR = 0.75;
+
+  bool should_grow() {
+    return get_load_factor() > GROW_LOAD_FACTOR && !_table->is_max_size_reached();
+  }
+
+  bool should_resize() {
+    return should_grow() || should_shrink() || Atomic::load(&_resize);
+  }
+
+  template<typename Task, typename... Args>
+  bool run_task(JavaThread* current, Task& task, const char* task_name, Args&... args) {
+    if (task.prepare(current)) {
+      log_trace(monitortable)("Started to %s", task_name);
+      TraceTime timer(task_name, TRACETIME_LOG(Debug, monitortable, perf));
+      while (task.do_task(current, args...)) {
+        task.pause(current);
+        {
+          ThreadBlockInVM tbivm(current);
+        }
+        task.cont(current);
+      }
+      task.done(current);
+      return true;
+    }
+    return false;
+  }
+
+  bool grow(JavaThread* current) {
+    ConcurrentTable::GrowTask grow_task(_table);
+    if (run_task(current, grow_task, "Grow")) {
+      _table_size = table_size(current);
+      log_info(monitortable)("Grown to size: %zu", _table_size);
+      return true;
+    }
+    return false;
+  }
+
+  bool clean(JavaThread* current) {
+    ConcurrentTable::BulkDeleteTask clean_task(_table);
+    auto is_dead = [&](ObjectMonitor** monitor) {
+      return (*monitor)->object_is_dead();
+    };
+    auto do_nothing = [&](ObjectMonitor** monitor) {};
+    NativeHeapTrimmer::SuspendMark sm("omworld");
+    return run_task(current, clean_task, "Clean", is_dead, do_nothing);
+  }
+
+  bool resize(JavaThread* current) {
+    LogTarget(Info, monitortable) lt;
+    bool success = false;
+
+    if (should_grow()) {
+      lt.print("Start growing with load factor %f", get_load_factor());
+      success = grow(current);
+    } else {
+      if (!_table->is_max_size_reached() && Atomic::load(&_resize)) {
+        lt.print("WARNING: Getting resize hints with load factor %f", get_load_factor());
+      }
+      lt.print("Start cleaning with load factor %f", get_load_factor());
+      success = clean(current);
+    }
+
+    Atomic::store(&_resize, false);
+
+    return success;
+  }
+
+  ObjectMonitor* monitor_put_get(Thread* current, ObjectMonitor* monitor, oop obj) {
+    // Enter the monitor into the concurrent hashtable.
+    ObjectMonitor* result = monitor;
+    Lookup lookup_f(obj);
+    auto found_f = [&](ObjectMonitor** found) {
+      assert((*found)->object_peek() == obj, "must be");
+      result = *found;
+    };
+    bool grow;
+    _table->insert_get(current, lookup_f, monitor, found_f, &grow);
+    verify_monitor_get_result(obj, result);
+    if (grow) {
+      try_notify_grow();
+    }
+    return result;
+  }
+
+  bool remove_monitor_entry(Thread* current, ObjectMonitor* monitor) {
+    LookupMonitor lookup_f(monitor);
+    return _table->remove(current, lookup_f);
+  }
+
+  bool contains_monitor(Thread* current, ObjectMonitor* monitor) {
+    LookupMonitor lookup_f(monitor);
+    bool result = false;
+    auto found_f = [&](ObjectMonitor** found) {
+      result = true;
+    };
+    _table->get(current, lookup_f, found_f);
+    return result;
+  }
+
+  void print_on(outputStream* st) const {
+    auto printer = [&] (ObjectMonitor** entry) {
+       ObjectMonitor* om = *entry;
+       oop obj = om->object_peek();
+       st->print("monitor " PTR_FORMAT " ", p2i(om));
+       st->print("object " PTR_FORMAT, p2i(obj));
+       assert(obj->mark().hash() == om->hash(), "hash must match");
+       st->cr();
+       return true;
+    };
+    if (SafepointSynchronize::is_at_safepoint()) {
+      _table->do_safepoint_scan(printer);
+    } else {
+      _table->do_scan(Thread::current(), printer);
+    }
+  }
+};
+
+ObjectMonitorWorld* LightweightSynchronizer::_omworld = nullptr;
+
+ObjectMonitor* LightweightSynchronizer::get_or_insert_monitor_from_table(oop object, JavaThread* current, bool try_read, bool* inserted) {
+  assert(LockingMode == LM_LIGHTWEIGHT, "must be");
+
+  if (try_read) {
+    ObjectMonitor* monitor = get_monitor_from_table(current, object);
+    if (monitor != nullptr) {
+      *inserted = false;
+      return monitor;
+    }
+  }
+
+  ObjectMonitor* alloced_monitor = new ObjectMonitor(object);
+  alloced_monitor->set_owner_anonymous();
+
+  // Try insert monitor
+  ObjectMonitor* monitor = add_monitor(current, alloced_monitor, object);
+
+  *inserted = alloced_monitor == monitor;
+  if (!*inserted) {
+    delete alloced_monitor;
+  }
+
+  return monitor;
+}
+
+static void log_inflate(Thread* current, oop object, const ObjectSynchronizer::InflateCause cause) {
+  if (log_is_enabled(Trace, monitorinflation)) {
+    ResourceMark rm(current);
+    log_info(monitorinflation)("inflate: object=" INTPTR_FORMAT ", mark="
+                               INTPTR_FORMAT ", type='%s' cause %s", p2i(object),
+                               object->mark().value(), object->klass()->external_name(),
+                               ObjectSynchronizer::inflate_cause_name(cause));
+  }
+}
+
+static void post_monitor_inflate_event(EventJavaMonitorInflate* event,
+                                       const oop obj,
+                                       ObjectSynchronizer::InflateCause cause) {
+  assert(event != nullptr, "invariant");
+  event->set_monitorClass(obj->klass());
+  event->set_address((uintptr_t)(void*)obj);
+  event->set_cause((u1)cause);
+  event->commit();
+}
+
+
+ObjectMonitor* LightweightSynchronizer::get_or_insert_monitor(oop object, JavaThread* current, const ObjectSynchronizer::InflateCause cause, bool try_read) {
+  assert(LockingMode == LM_LIGHTWEIGHT, "must be");
+
+  EventJavaMonitorInflate event;
+
+  bool inserted;
+  ObjectMonitor* monitor = get_or_insert_monitor_from_table(object, current, try_read, &inserted);
+
+  if (inserted) {
+    // Hopefully the performance counters are allocated on distinct
+    // cache lines to avoid false sharing on MP systems ...
+    OM_PERFDATA_OP(Inflations, inc());
+    log_inflate(current, object, cause);
+    if (event.should_commit()) {
+      post_monitor_inflate_event(&event, object, cause);
+    }
+
+    // The monitor has an anonymous owner so it is safe from async deflation.
+    ObjectSynchronizer::_in_use_list.add(monitor);
+  }
+
+  return monitor;
+}
+
+// Add the hashcode to the monitor to match the object and put it in the hashtable.
+ObjectMonitor* LightweightSynchronizer::add_monitor(JavaThread* current, ObjectMonitor* monitor, oop obj) {
+  assert(UseObjectMonitorTable, "must be");
+  assert(obj == monitor->object(), "must be");
+
+  intptr_t hash = obj->mark().hash();
+  assert(hash != 0, "must be set when claiming the object monitor");
+  monitor->set_hash(hash);
+
+  return _omworld->monitor_put_get(current, monitor, obj);
+}
+
+bool LightweightSynchronizer::remove_monitor(Thread* current, oop obj, ObjectMonitor* monitor) {
+  assert(UseObjectMonitorTable, "must be");
+  assert(monitor->object_peek() == obj, "must be, cleared objects are removed by is_dead");
+
+  return _omworld->remove_monitor_entry(current, monitor);
+}
+
+void LightweightSynchronizer::deflate_mark_word(oop obj) {
+  assert(UseObjectMonitorTable, "must be");
+
+  markWord mark = obj->mark_acquire();
+  assert(!mark.has_no_hash(), "obj with inflated monitor must have had a hash");
+
+  while (mark.has_monitor()) {
+    const markWord new_mark = mark.clear_lock_bits().set_unlocked();
+    mark = obj->cas_set_mark(new_mark, mark);
+  }
+}
+
+void LightweightSynchronizer::initialize() {
+  if (!UseObjectMonitorTable) {
+    return;
+  }
+  _omworld = new ObjectMonitorWorld();
+}
+
+bool LightweightSynchronizer::needs_resize() {
+  if (!UseObjectMonitorTable) {
+    return false;
+  }
+  return _omworld->should_resize();
+}
+
+bool LightweightSynchronizer::resize_table(JavaThread* current) {
+  if (!UseObjectMonitorTable) {
+    return true;
+  }
+  return _omworld->resize(current);
+}
+
+class LightweightSynchronizer::LockStackInflateContendedLocks : private OopClosure {
+ private:
+  oop _contended_oops[LockStack::CAPACITY];
+  int _length;
+
+  void do_oop(oop* o) final {
+    oop obj = *o;
+    if (obj->mark_acquire().has_monitor()) {
+      if (_length > 0 && _contended_oops[_length-1] == obj) {
+        // assert(VM_Version::supports_recursive_lightweight_locking(), "must be");
+        // Recursive
+        return;
+      }
+      _contended_oops[_length++] = obj;
+    }
+  }
+
+  void do_oop(narrowOop* o) final {
+    ShouldNotReachHere();
+  }
+
+ public:
+  LockStackInflateContendedLocks() :
+    _contended_oops(),
+    _length(0) {};
+
+  void inflate(JavaThread* current) {
+    assert(current == JavaThread::current(), "must be");
+    current->lock_stack().oops_do(this);
+    for (int i = 0; i < _length; i++) {
+      LightweightSynchronizer::
+        inflate_fast_locked_object(_contended_oops[i], current, current, ObjectSynchronizer::inflate_cause_vm_internal);
+    }
+  }
+};
+
+void LightweightSynchronizer::ensure_lock_stack_space(JavaThread* current) {
+  assert(current == JavaThread::current(), "must be");
+  LockStack& lock_stack = current->lock_stack();
+
+  // Make room on lock_stack
+  if (lock_stack.is_full()) {
+    // Inflate contented objects
+    LockStackInflateContendedLocks().inflate(current);
+    if (lock_stack.is_full()) {
+      // Inflate the oldest object
+      inflate_fast_locked_object(lock_stack.bottom(), current, current, ObjectSynchronizer::inflate_cause_vm_internal);
+    }
+  }
+}
+
+class LightweightSynchronizer::CacheSetter : StackObj {
+  JavaThread* const _thread;
+  BasicLock* const _lock;
+  ObjectMonitor* _monitor;
+
+  NONCOPYABLE(CacheSetter);
+
+public:
+  CacheSetter(JavaThread* thread, BasicLock* lock) :
+    _thread(thread),
+    _lock(lock),
+    _monitor(nullptr) {}
+
+  ~CacheSetter() {
+    // Only use the cache if using the table.
+    if (UseObjectMonitorTable) {
+      if (_monitor != nullptr) {
+        _thread->om_set_monitor_cache(_monitor);
+        _lock->set_object_monitor_cache(_monitor);
+      } else {
+        _lock->clear_object_monitor_cache();
+      }
+    }
+  }
+
+  void set_monitor(ObjectMonitor* monitor) {
+    assert(_monitor == nullptr, "only set once");
+    _monitor = monitor;
+  }
+
+};
+
+class LightweightSynchronizer::VerifyThreadState {
+  bool _no_safepoint;
+  union {
+    struct {} _dummy;
+    NoSafepointVerifier _nsv;
+  };
+
+public:
+  VerifyThreadState(JavaThread* locking_thread, JavaThread* current) : _no_safepoint(locking_thread != current) {
+    assert(current == Thread::current(), "must be");
+    assert(locking_thread == current || locking_thread->is_obj_deopt_suspend(), "locking_thread may not run concurrently");
+    if (_no_safepoint) {
+      ::new (&_nsv) NoSafepointVerifier();
+    }
+  }
+  ~VerifyThreadState() {
+    if (_no_safepoint){
+      _nsv.~NoSafepointVerifier();
+    }
+  }
+};
+
+bool LightweightSynchronizer::fast_lock_spin_enter(oop obj, JavaThread* current, bool observed_deflation) {
+  // Will spin with exponential backoff with an accumulative O(2^spin_limit) spins.
+  const int log_spin_limit = os::is_MP() || !UseObjectMonitorTable ? LightweightFastLockingSpins : 1;
+  const int log_min_safepoint_check_interval = 10;
+
+  LockStack& lock_stack = current->lock_stack();
+
+  markWord mark = obj->mark();
+  const auto should_spin = [&]() {
+    if (!mark.has_monitor()) {
+      // Spin while not inflated.
+      return true;
+    } else if (observed_deflation) {
+      // Spin while monitor is being deflated.
+      ObjectMonitor* monitor = ObjectSynchronizer::read_monitor(current, obj, mark);
+      return monitor == nullptr || monitor->is_being_async_deflated();
+    }
+    // Else stop spinning.
+    return false;
+  };
+  // Always attempt to lock once even when safepoint synchronizing.
+  bool should_process = false;
+  for (int i = 0; should_spin() && !should_process && i < log_spin_limit; i++) {
+    // Spin with exponential backoff.
+    const int total_spin_count = 1 << i;
+    const int inner_spin_count = MIN2(1 << log_min_safepoint_check_interval, total_spin_count);
+    const int outer_spin_count = total_spin_count / inner_spin_count;
+    for (int outer = 0; outer < outer_spin_count; outer++) {
+      should_process = SafepointMechanism::should_process(current);
+      if (should_process) {
+        // Stop spinning for safepoint.
+        break;
+      }
+      for (int inner = 1; inner < inner_spin_count; inner++) {
+        SpinPause();
+      }
+    }
+
+    mark = obj->mark();
+    while (mark.is_unlocked()) {
+      ensure_lock_stack_space(current);
+      assert(!lock_stack.is_full(), "must have made room on the lock stack");
+      assert(!lock_stack.contains(obj), "thread must not already hold the lock");
+      // Try to swing into 'fast-locked' state.
+      markWord locked_mark = mark.set_fast_locked();
+      markWord old_mark = mark;
+      mark = obj->cas_set_mark(locked_mark, old_mark);
+      if (old_mark == mark) {
+        // Successfully fast-locked, push object to lock-stack and return.
+        lock_stack.push(obj);
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+void LightweightSynchronizer::enter_for(Handle obj, BasicLock* lock, JavaThread* locking_thread) {
+  assert(LockingMode == LM_LIGHTWEIGHT, "must be");
+  JavaThread* current = JavaThread::current();
+  VerifyThreadState vts(locking_thread, current);
+
+  if (obj->klass()->is_value_based()) {
+    ObjectSynchronizer::handle_sync_on_value_based_class(obj, locking_thread);
+  }
+
+  CacheSetter cache_setter(locking_thread, lock);
+
+  LockStack& lock_stack = locking_thread->lock_stack();
+
+  ObjectMonitor* monitor = nullptr;
+  if (lock_stack.contains(obj())) {
+    monitor = inflate_fast_locked_object(obj(), locking_thread, current, ObjectSynchronizer::inflate_cause_monitor_enter);
+    bool entered = monitor->enter_for(locking_thread);
+    assert(entered, "recursive ObjectMonitor::enter_for must succeed");
+  } else {
+    // It is assumed that enter_for must enter on an object without contention.
+    monitor = inflate_and_enter(obj(), locking_thread, current, ObjectSynchronizer::inflate_cause_monitor_enter);
+  }
+
+  assert(monitor != nullptr, "LightweightSynchronizer::enter_for must succeed");
+  cache_setter.set_monitor(monitor);
+}
+
+void LightweightSynchronizer::enter(Handle obj, BasicLock* lock, JavaThread* current) {
+  assert(LockingMode == LM_LIGHTWEIGHT, "must be");
+  assert(current == JavaThread::current(), "must be");
+
+  if (obj->klass()->is_value_based()) {
+    ObjectSynchronizer::handle_sync_on_value_based_class(obj, current);
+  }
+
+  CacheSetter cache_setter(current, lock);
+
+  // Used when deflation is observed. Progress here requires progress
+  // from the deflator. After observing the that the deflator is not
+  // making progress (after two yields), switch to sleeping.
+  SpinYield spin_yield(0, 2);
+  bool observed_deflation = false;
+
+  LockStack& lock_stack = current->lock_stack();
+
+  if (!lock_stack.is_full() && lock_stack.try_recursive_enter(obj())) {
+    // Recursively fast locked
+    return;
+  }
+
+  if (lock_stack.contains(obj())) {
+    ObjectMonitor* monitor = inflate_fast_locked_object(obj(), current, current, ObjectSynchronizer::inflate_cause_monitor_enter);
+    bool entered = monitor->enter(current);
+    assert(entered, "recursive ObjectMonitor::enter must succeed");
+    cache_setter.set_monitor(monitor);
+    return;
+  }
+
+  while (true) {
+    // Fast-locking does not use the 'lock' argument.
+    // Fast-lock spinning to avoid inflating for short critical sections.
+    // The goal is to only inflate when the extra cost of using ObjectMonitors
+    // is worth it.
+    // If deflation has been observed we also spin while deflation is onging.
+    if (fast_lock_spin_enter(obj(), current, observed_deflation)) {
+      return;
+    }
+
+    if (observed_deflation) {
+      spin_yield.wait();
+    }
+
+    ObjectMonitor* monitor = inflate_and_enter(obj(), current, current, ObjectSynchronizer::inflate_cause_monitor_enter);
+    if (monitor != nullptr) {
+      cache_setter.set_monitor(monitor);
+      return;
+    }
+
+    // If inflate_and_enter returns nullptr it is because a deflated monitor
+    // was encountered. Fallback to fast locking. The deflater is responsible
+    // for clearing out the monitor and transitioning the markWord back to
+    // fast locking.
+    observed_deflation = true;
+  }
+}
+
+void LightweightSynchronizer::exit(oop object, JavaThread* current) {
+  assert(LockingMode == LM_LIGHTWEIGHT, "must be");
+  assert(current == Thread::current(), "must be");
+
+  markWord mark = object->mark();
+  assert(!mark.is_unlocked(), "must be unlocked");
+
+  LockStack& lock_stack = current->lock_stack();
+  if (mark.is_fast_locked()) {
+    if (lock_stack.try_recursive_exit(object)) {
+      // This is a recursive exit which succeeded
+      return;
+    }
+    if (lock_stack.is_recursive(object)) {
+      // Must inflate recursive locks if try_recursive_exit fails
+      // This happens for un-structured unlocks, could potentially
+      // fix try_recursive_exit to handle these.
+      inflate_fast_locked_object(object, current, current, ObjectSynchronizer::inflate_cause_vm_internal);
+    }
+  }
+
+  // Fast-locking does not use the 'lock' argument.
+  while (mark.is_fast_locked()) {
+    markWord unlocked_mark = mark.set_unlocked();
+    markWord old_mark = mark;
+    mark = object->cas_set_mark(unlocked_mark, old_mark);
+    if (old_mark == mark) {
+      // CAS successful, remove from lock_stack
+      size_t recursion = lock_stack.remove(object) - 1;
+      assert(recursion == 0, "Should not have unlocked here");
+      return;
+    }
+  }
+
+  assert(mark.has_monitor(), "must be");
+  // The monitor exists
+  ObjectMonitor* monitor = ObjectSynchronizer::read_monitor(current, object, mark);
+  if (monitor->is_owner_anonymous()) {
+    assert(current->lock_stack().contains(object), "current must have object on its lock stack");
+    monitor->set_owner_from_anonymous(current);
+    monitor->set_recursions(current->lock_stack().remove(object) - 1);
+  }
+
+  monitor->exit(current);
+}
+
+ObjectMonitor* LightweightSynchronizer::inflate_locked_or_imse(oop obj, const ObjectSynchronizer::InflateCause cause, TRAPS) {
+  assert(LockingMode == LM_LIGHTWEIGHT, "must be");
+  JavaThread* current = THREAD;
+
+  for(;;) {
+    markWord mark = obj->mark_acquire();
+    if (mark.is_unlocked()) {
+      // No lock, IMSE.
+      THROW_MSG_(vmSymbols::java_lang_IllegalMonitorStateException(),
+                "current thread is not owner", nullptr);
+    }
+
+    if (mark.is_fast_locked()) {
+      if (!current->lock_stack().contains(obj)) {
+        // Fast locked by other thread, IMSE.
+        THROW_MSG_(vmSymbols::java_lang_IllegalMonitorStateException(),
+                  "current thread is not owner", nullptr);
+      } else {
+        // Current thread owns the lock, must inflate
+        return inflate_fast_locked_object(obj, current, current, cause);
+      }
+    }
+
+    assert(mark.has_monitor(), "must be");
+    ObjectMonitor* monitor = ObjectSynchronizer::read_monitor(current, obj, mark);
+    if (monitor != nullptr) {
+      if (monitor->is_owner_anonymous()) {
+        LockStack& lock_stack = current->lock_stack();
+        if (lock_stack.contains(obj)) {
+          // Current thread owns the lock but someone else inflated
+          // fix owner and pop lock stack
+          monitor->set_owner_from_anonymous(current);
+          monitor->set_recursions(lock_stack.remove(obj) - 1);
+        } else {
+          // Fast locked (and inflated) by other thread, or deflation in progress, IMSE.
+          THROW_MSG_(vmSymbols::java_lang_IllegalMonitorStateException(),
+                    "current thread is not owner", nullptr);
+        }
+      }
+      return monitor;
+    }
+  }
+}
+
+ObjectMonitor* LightweightSynchronizer::inflate_into_object_header(Thread* current, JavaThread* inflating_thread, oop object, const ObjectSynchronizer::InflateCause cause) {
+
+  // The JavaThread* inflating_thread parameter is only used by LM_LIGHTWEIGHT and requires
+  // that the inflating_thread == Thread::current() or is suspended throughout the call by
+  // some other mechanism.
+  // Even with LM_LIGHTWEIGHT the thread might be nullptr when called from a non
+  // JavaThread. (As may still be the case from FastHashCode). However it is only
+  // important for the correctness of the LM_LIGHTWEIGHT algorithm that the thread
+  // is set when called from ObjectSynchronizer::enter from the owning thread,
+  // ObjectSynchronizer::enter_for from any thread, or ObjectSynchronizer::exit.
+  EventJavaMonitorInflate event;
+
+  for (;;) {
+    const markWord mark = object->mark_acquire();
+
+    // The mark can be in one of the following states:
+    // *  inflated     - Just return if using stack-locking.
+    //                   If using fast-locking and the ObjectMonitor owner
+    //                   is anonymous and the inflating_thread owns the
+    //                   object lock, then we make the inflating_thread
+    //                   the ObjectMonitor owner and remove the lock from
+    //                   the inflating_thread's lock stack.
+    // *  fast-locked  - Coerce it to inflated from fast-locked.
+    // *  unlocked     - Aggressively inflate the object.
+
+    // CASE: inflated
+    if (mark.has_monitor()) {
+      ObjectMonitor* inf = mark.monitor();
+      markWord dmw = inf->header();
+      assert(dmw.is_neutral(), "invariant: header=" INTPTR_FORMAT, dmw.value());
+      if (inf->is_owner_anonymous() &&
+          inflating_thread != nullptr && inflating_thread->lock_stack().contains(object)) {
+        inf->set_owner_from_anonymous(inflating_thread);
+        size_t removed = inflating_thread->lock_stack().remove(object);
+        inf->set_recursions(removed - 1);
+      }
+      return inf;
+    }
+
+    // CASE: fast-locked
+    // Could be fast-locked either by the inflating_thread or by some other thread.
+    //
+    // Note that we allocate the ObjectMonitor speculatively, _before_
+    // attempting to set the object's mark to the new ObjectMonitor. If
+    // the inflating_thread owns the monitor, then we set the ObjectMonitor's
+    // owner to the inflating_thread. Otherwise, we set the ObjectMonitor's owner
+    // to anonymous. If we lose the race to set the object's mark to the
+    // new ObjectMonitor, then we just delete it and loop around again.
+    //
+    if (mark.is_fast_locked()) {
+      ObjectMonitor* monitor = new ObjectMonitor(object);
+      monitor->set_header(mark.set_unlocked());
+      bool own = inflating_thread != nullptr && inflating_thread->lock_stack().contains(object);
+      if (own) {
+        // Owned by inflating_thread.
+        monitor->set_owner_from(nullptr, inflating_thread);
+      } else {
+        // Owned by somebody else.
+        monitor->set_owner_anonymous();
+      }
+      markWord monitor_mark = markWord::encode(monitor);
+      markWord old_mark = object->cas_set_mark(monitor_mark, mark);
+      if (old_mark == mark) {
+        // Success! Return inflated monitor.
+        if (own) {
+          size_t removed = inflating_thread->lock_stack().remove(object);
+          monitor->set_recursions(removed - 1);
+        }
+        // Once the ObjectMonitor is configured and object is associated
+        // with the ObjectMonitor, it is safe to allow async deflation:
+        ObjectSynchronizer::_in_use_list.add(monitor);
+
+        // Hopefully the performance counters are allocated on distinct
+        // cache lines to avoid false sharing on MP systems ...
+        OM_PERFDATA_OP(Inflations, inc());
+        log_inflate(current, object, cause);
+        if (event.should_commit()) {
+          post_monitor_inflate_event(&event, object, cause);
+        }
+        return monitor;
+      } else {
+        delete monitor;
+        continue;  // Interference -- just retry
+      }
+    }
+
+    // CASE: unlocked
+    // TODO-FIXME: for entry we currently inflate and then try to CAS _owner.
+    // If we know we're inflating for entry it's better to inflate by swinging a
+    // pre-locked ObjectMonitor pointer into the object header.   A successful
+    // CAS inflates the object *and* confers ownership to the inflating thread.
+    // In the current implementation we use a 2-step mechanism where we CAS()
+    // to inflate and then CAS() again to try to swing _owner from null to current.
+    // An inflateTry() method that we could call from enter() would be useful.
+
+    assert(mark.is_unlocked(), "invariant: header=" INTPTR_FORMAT, mark.value());
+    ObjectMonitor* m = new ObjectMonitor(object);
+    // prepare m for installation - set monitor to initial state
+    m->set_header(mark);
+
+    if (object->cas_set_mark(markWord::encode(m), mark) != mark) {
+      delete m;
+      m = nullptr;
+      continue;
+      // interference - the markword changed - just retry.
+      // The state-transitions are one-way, so there's no chance of
+      // live-lock -- "Inflated" is an absorbing state.
+    }
+
+    // Once the ObjectMonitor is configured and object is associated
+    // with the ObjectMonitor, it is safe to allow async deflation:
+    ObjectSynchronizer::_in_use_list.add(m);
+
+    // Hopefully the performance counters are allocated on distinct
+    // cache lines to avoid false sharing on MP systems ...
+    OM_PERFDATA_OP(Inflations, inc());
+    log_inflate(current, object, cause);
+    if (event.should_commit()) {
+      post_monitor_inflate_event(&event, object, cause);
+    }
+    return m;
+  }
+}
+
+ObjectMonitor* LightweightSynchronizer::inflate_fast_locked_object(oop object, JavaThread* locking_thread, JavaThread* current, const ObjectSynchronizer::InflateCause cause) {
+  assert(LockingMode == LM_LIGHTWEIGHT, "only used for lightweight");
+  VerifyThreadState vts(locking_thread, current);
+  assert(locking_thread->lock_stack().contains(object), "locking_thread must have object on its lock stack");
+
+  ObjectMonitor* monitor;
+
+  if (!UseObjectMonitorTable) {
+    return inflate_into_object_header(current, locking_thread, object, cause);
+  }
+
+  // Inflating requires a hash code
+  ObjectSynchronizer::FastHashCode(current, object);
+
+  markWord mark = object->mark_acquire();
+  assert(!mark.is_unlocked(), "Cannot be unlocked");
+
+  for (;;) {
+  // Fetch the monitor from the table
+    monitor = get_or_insert_monitor(object, current, cause, true /* try_read */);
+
+    if (monitor->is_owner_anonymous()) {
+      assert(monitor == get_monitor_from_table(current, object), "The monitor must be found");
+      // New fresh monitor
+      break;
+    }
+
+    os::naked_yield();
+    assert(monitor->is_being_async_deflated(), "Should be the reason");
+  }
+
+  // Set the mark word; loop to handle concurrent updates to other parts of the mark word
+  while (mark.is_fast_locked()) {
+    mark = object->cas_set_mark(mark.set_has_monitor(), mark);
+  }
+
+  // Indicate that the monitor now has a known owner
+  monitor->set_owner_from_anonymous(locking_thread);
+
+  // Remove the entry from the thread's lock stack
+  monitor->set_recursions(locking_thread->lock_stack().remove(object) - 1);
+
+  if (locking_thread == current) {
+    locking_thread->om_set_monitor_cache(monitor);
+  }
+
+  return monitor;
+}
+
+ObjectMonitor* LightweightSynchronizer::inflate_and_enter(oop object, JavaThread* locking_thread, JavaThread* current, const ObjectSynchronizer::InflateCause cause) {
+  assert(LockingMode == LM_LIGHTWEIGHT, "only used for lightweight");
+  VerifyThreadState vts(locking_thread, current);
+
+  // Note: In some paths (deoptimization) the 'current' thread inflates and
+  // enters the lock on behalf of the 'locking_thread' thread.
+
+  ObjectMonitor* monitor = nullptr;
+
+  if (!UseObjectMonitorTable) {
+    // Do the old inflate and enter.
+    monitor = inflate_into_object_header(current, locking_thread, object, cause);
+
+    bool entered;
+    if (locking_thread == current) {
+      entered = monitor->enter(locking_thread);
+    } else {
+      entered = monitor->enter_for(locking_thread);
+    }
+
+    // enter returns false for deflation found.
+    return entered ? monitor : nullptr;
+  }
+
+  NoSafepointVerifier nsv;
+
+  // Lightweight monitors require that hash codes are installed first
+  ObjectSynchronizer::FastHashCode(locking_thread, object);
+
+  // Try to get the monitor from the thread-local cache.
+  // There's no need to use the cache if we are locking
+  // on behalf of another thread.
+  if (current == locking_thread) {
+    monitor = current->om_get_from_monitor_cache(object);
+  }
+
+  // Get or create the monitor
+  if (monitor == nullptr) {
+    monitor = get_or_insert_monitor(object, current, cause, true /* try_read */);
+  }
+
+  if (monitor->try_enter(locking_thread)) {
+    return monitor;
+  }
+
+  // Holds is_being_async_deflated() stable throughout this function.
+  ObjectMonitorContentionMark contention_mark(monitor);
+
+  /// First handle the case where the monitor from the table is deflated
+  if (monitor->is_being_async_deflated()) {
+    // The MonitorDeflation thread is deflating the monitor. The locking thread
+    // can either help transition the mark word or yield / spin until further
+    // progress have been made.
+
+    const markWord mark = object->mark_acquire();
+
+    if (mark.has_monitor()) {
+      // Waiting on the deflation thread to remove the deflated monitor from the table.
+      os::naked_yield();
+
+    } else if (mark.is_fast_locked()) {
+      // Some other thread managed to fast-lock the lock, or this is a
+      // recursive lock from the same thread; yield for the deflation
+      // thread to remove the deflated monitor from the table.
+      os::naked_yield();
+
+    } else {
+      assert(mark.is_unlocked(), "Implied");
+      // Retry immediately
+    }
+
+    // Retry
+    return nullptr;
+  }
+
+  for (;;) {
+    const markWord mark = object->mark_acquire();
+    // The mark can be in one of the following states:
+    // *  inflated     - If the ObjectMonitor owner is anonymous
+    //                   and the locking_thread thread owns the object
+    //                   lock, then we make the locking_thread thread
+    //                   the ObjectMonitor owner and remove the
+    //                   lock from the locking_thread thread's lock stack.
+    // *  fast-locked  - Coerce it to inflated from fast-locked.
+    // *  neutral      - Inflate the object. Successful CAS is locked
+
+    // CASE: inflated
+    if (mark.has_monitor()) {
+      LockStack& lock_stack = locking_thread->lock_stack();
+      if (monitor->is_owner_anonymous() && lock_stack.contains(object)) {
+        // The lock is fast-locked by the locking thread,
+        // convert it to a held monitor with a known owner.
+        monitor->set_owner_from_anonymous(locking_thread);
+        monitor->set_recursions(lock_stack.remove(object) - 1);
+      }
+
+      break; // Success
+    }
+
+    // CASE: fast-locked
+    // Could be fast-locked either by locking_thread or by some other thread.
+    //
+    if (mark.is_fast_locked()) {
+      markWord old_mark = object->cas_set_mark(mark.set_has_monitor(), mark);
+      if (old_mark != mark) {
+        // CAS failed
+        continue;
+      }
+
+      // Success! Return inflated monitor.
+      LockStack& lock_stack = locking_thread->lock_stack();
+      if (lock_stack.contains(object)) {
+        // The lock is fast-locked by the locking thread,
+        // convert it to a held monitor with a known owner.
+        monitor->set_owner_from_anonymous(locking_thread);
+        monitor->set_recursions(lock_stack.remove(object) - 1);
+      }
+
+      break; // Success
+    }
+
+    // CASE: neutral (unlocked)
+
+    // Catch if the object's header is not neutral (not locked and
+    // not marked is what we care about here).
+    assert(mark.is_neutral(), "invariant: header=" INTPTR_FORMAT, mark.value());
+    markWord old_mark = object->cas_set_mark(mark.set_has_monitor(), mark);
+    if (old_mark != mark) {
+      // CAS failed
+      continue;
+    }
+
+    // Transitioned from unlocked to monitor means locking_thread owns the lock.
+    monitor->set_owner_from_anonymous(locking_thread);
+
+    return monitor;
+  }
+
+  if (current == locking_thread) {
+    // One round of spinning
+    if (monitor->spin_enter(locking_thread)) {
+      return monitor;
+    }
+
+    // Monitor is contended, take the time befor entering to fix the lock stack.
+    LockStackInflateContendedLocks().inflate(current);
+  }
+
+  // enter can block for safepoints; clear the unhandled object oop
+  PauseNoSafepointVerifier pnsv(&nsv);
+  object = nullptr;
+
+  if (current == locking_thread) {
+    monitor->enter_with_contention_mark(locking_thread, contention_mark);
+  } else {
+    monitor->enter_for_with_contention_mark(locking_thread, contention_mark);
+  }
+
+  return monitor;
+}
+
+void LightweightSynchronizer::deflate_monitor(Thread* current, oop obj, ObjectMonitor* monitor) {
+  if (obj != nullptr) {
+    deflate_mark_word(obj);
+  }
+  bool removed = remove_monitor(current, obj, monitor);
+  if (obj != nullptr) {
+    assert(removed, "Should have removed the entry if obj was alive");
+  }
+}
+
+ObjectMonitor* LightweightSynchronizer::get_monitor_from_table(Thread* current, oop obj) {
+  assert(UseObjectMonitorTable, "must be");
+  return _omworld->monitor_get(current, obj);
+}
+
+bool LightweightSynchronizer::contains_monitor(Thread* current, ObjectMonitor* monitor) {
+  assert(UseObjectMonitorTable, "must be");
+  return _omworld->contains_monitor(current, monitor);
+}
+
+bool LightweightSynchronizer::quick_enter(oop obj, JavaThread* current, BasicLock* lock) {
+  assert(current->thread_state() == _thread_in_Java, "must be");
+  assert(obj != nullptr, "must be");
+  NoSafepointVerifier nsv;
+
+  CacheSetter cache_setter(current, lock);
+
+  LockStack& lock_stack = current->lock_stack();
+  if (lock_stack.is_full()) {
+    // Always go into runtime if the lock stack is full.
+    return false;
+  }
+
+  if (lock_stack.try_recursive_enter(obj)) {
+    // Clears object monitor cache, because ?
+    return true;
+  }
+
+  const markWord mark = obj->mark();
+
+  if (mark.is_unlocked()) {
+    markWord locked_mark = mark.set_fast_locked();
+    if (obj->cas_set_mark(locked_mark, mark) == mark) {
+      // Successfully fast-locked, push object to lock-stack and return.
+      lock_stack.push(obj);
+      return true;
+    }
+  }
+
+  if (mark.has_monitor()) {
+    ObjectMonitor* const monitor = UseObjectMonitorTable ? current->om_get_from_monitor_cache(obj) :
+                                                           ObjectSynchronizer::read_monitor(mark);
+
+    if (monitor == nullptr) {
+      // Take the slow-path on a cache miss.
+      return false;
+    }
+
+    if (monitor->try_enter(current)) {
+      // ObjectMonitor enter successful.
+      cache_setter.set_monitor(monitor);
+      return true;
+    }
+  }
+
+  // Slow-path.
+  return false;
+}
diff --git a/src/hotspot/share/runtime/lightweightSynchronizer.hpp b/src/hotspot/share/runtime/lightweightSynchronizer.hpp
new file mode 100644
index 00000000000..c98195c9300
--- /dev/null
+++ b/src/hotspot/share/runtime/lightweightSynchronizer.hpp
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_RUNTIME_LIGHTWEIGHTSYNCHRONIZER_HPP
+#define SHARE_RUNTIME_LIGHTWEIGHTSYNCHRONIZER_HPP
+
+#include "memory/allStatic.hpp"
+#include "runtime/thread.hpp"
+#include "runtime/objectMonitor.hpp"
+#include "runtime/synchronizer.hpp"
+
+class ObjectMonitorWorld;
+
+class LightweightSynchronizer : AllStatic {
+private:
+  static ObjectMonitorWorld* _omworld;
+
+  static ObjectMonitor* get_or_insert_monitor_from_table(oop object, JavaThread* current, bool try_read, bool* inserted);
+  static ObjectMonitor* get_or_insert_monitor(oop object, JavaThread* current, const ObjectSynchronizer::InflateCause cause, bool try_read);
+
+  static ObjectMonitor* add_monitor(JavaThread* current, ObjectMonitor* monitor, oop obj);
+  static bool remove_monitor(Thread* current, oop obj, ObjectMonitor* monitor);
+
+  static void deflate_mark_word(oop object);
+
+  static void ensure_lock_stack_space(JavaThread* current);
+
+  class CacheSetter;
+  class LockStackInflateContendedLocks;
+  class VerifyThreadState;
+
+ public:
+  static void initialize();
+
+  static bool needs_resize();
+  static bool resize_table(JavaThread* current);
+
+private:
+  static bool fast_lock_spin_enter(oop obj, JavaThread* current, bool observed_deflation);
+
+public:
+  static void enter_for(Handle obj, BasicLock* lock, JavaThread* locking_thread);
+  static void enter(Handle obj, BasicLock* lock, JavaThread* current);
+  static void exit(oop object, JavaThread* current);
+
+  static ObjectMonitor* inflate_into_object_header(Thread* current, JavaThread* inflating_thread, oop object, const ObjectSynchronizer::InflateCause cause);
+  static ObjectMonitor* inflate_locked_or_imse(oop object, const ObjectSynchronizer::InflateCause cause, TRAPS);
+  static ObjectMonitor* inflate_fast_locked_object(oop object, JavaThread* locking_thread, JavaThread* current, const ObjectSynchronizer::InflateCause cause);
+  static ObjectMonitor* inflate_and_enter(oop object, JavaThread* locking_thread, JavaThread* current, const ObjectSynchronizer::InflateCause cause);
+
+  static void deflate_monitor(Thread* current, oop obj, ObjectMonitor* monitor);
+
+  static ObjectMonitor* get_monitor_from_table(Thread* current, oop obj);
+
+  static bool contains_monitor(Thread* current, ObjectMonitor* monitor);
+
+  static bool quick_enter(oop obj, JavaThread* current, BasicLock* Lock);
+};
+
+#endif // SHARE_RUNTIME_LIGHTWEIGHTSYNCHRONIZER_HPP
diff --git a/src/hotspot/share/runtime/lockStack.cpp b/src/hotspot/share/runtime/lockStack.cpp
new file mode 100644
index 00000000000..541eda01df7
--- /dev/null
+++ b/src/hotspot/share/runtime/lockStack.cpp
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2022, Red Hat, Inc. All rights reserved.
+ * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
+ * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "memory/allocation.hpp"
+#include "runtime/globals.hpp"
+#include "runtime/lockStack.inline.hpp"
+#include "runtime/safepoint.hpp"
+#include "runtime/stackWatermark.hpp"
+#include "runtime/stackWatermarkSet.inline.hpp"
+#include "runtime/synchronizer.inline.hpp"
+#include "runtime/thread.hpp"
+#include "utilities/copy.hpp"
+#include "utilities/debug.hpp"
+#include "utilities/globalDefinitions.hpp"
+#include "utilities/ostream.hpp"
+#include "utilities/sizes.hpp"
+
+#include <type_traits>
+
+const int LockStack::lock_stack_offset =      in_bytes(JavaThread::lock_stack_offset());
+const int LockStack::lock_stack_top_offset =  in_bytes(JavaThread::lock_stack_top_offset());
+const int LockStack::lock_stack_base_offset = in_bytes(JavaThread::lock_stack_base_offset());
+
+LockStack::LockStack(JavaThread* jt) :
+  _top(lock_stack_base_offset), _base() {
+  // Make sure the layout of the object is compatible with the emitted code's assumptions.
+  STATIC_ASSERT(sizeof(_bad_oop_sentinel) == oopSize);
+  STATIC_ASSERT(sizeof(_base[0]) == oopSize);
+  STATIC_ASSERT(std::is_standard_layout<LockStack>::value);
+  STATIC_ASSERT(offsetof(LockStack, _bad_oop_sentinel) == offsetof(LockStack, _base) - oopSize);
+#ifdef ASSERT
+  for (int i = 0; i < CAPACITY; i++) {
+    _base[i] = NULL;
+  }
+#endif
+}
+
+uint32_t LockStack::start_offset() {
+  int offset = lock_stack_base_offset;
+  assert(offset > 0, "must be positive offset");
+  return static_cast<uint32_t>(offset);
+}
+
+uint32_t LockStack::end_offset() {
+  int offset = lock_stack_base_offset + CAPACITY * oopSize;
+  assert(offset > 0, "must be positive offset");
+  return static_cast<uint32_t>(offset);
+}
+
+#ifndef PRODUCT
+void LockStack::verify(const char* msg) const {
+  assert(LockingMode == LM_LIGHTWEIGHT, "never use lock-stack when light weight locking is disabled");
+  assert((_top <= end_offset()), "lockstack overflow: _top %d end_offset %d", _top, end_offset());
+  assert((_top >= start_offset()), "lockstack underflow: _top %d start_offset %d", _top, start_offset());
+  if (SafepointSynchronize::is_at_safepoint() || (Thread::current()->is_Java_thread() && is_owning_thread())) {
+    int top = to_index(_top);
+    for (int i = 0; i < top; i++) {
+      assert(_base[i] != NULL, "no zapped before top");
+      if (VM_Version::supports_recursive_lightweight_locking()) {
+        oop o = _base[i];
+        for (; i < top - 1; i++) {
+          // Consecutive entries may be the same
+          if (_base[i + 1] != o) {
+            break;
+          }
+        }
+      }
+
+      for (int j = i + 1; j < top; j++) {
+        assert(_base[i] != _base[j], "entries must be unique: %s", msg);
+      }
+    }
+    for (int i = top; i < CAPACITY; i++) {
+      assert(_base[i] == NULL, "only zapped entries after top: i: %d, top: %d, entry: " PTR_FORMAT, i, top, p2i(_base[i]));
+    }
+  }
+}
+#endif
+
+void LockStack::print_on(outputStream* st) {
+  for (int i = to_index(_top); (--i) >= 0;) {
+    st->print("LockStack[%d]: ", i);
+    oop o = _base[i];
+    if (oopDesc::is_oop(o)) {
+      o->print_on(st);
+    } else {
+      st->print_cr("not an oop: " PTR_FORMAT, p2i(o));
+    }
+  }
+}
+
+OMCache::OMCache(JavaThread* jt) : _entries() {
+  STATIC_ASSERT(std::is_standard_layout<OMCache>::value);
+  STATIC_ASSERT(std::is_standard_layout<OMCache::OMCacheEntry>::value);
+  STATIC_ASSERT(offsetof(OMCache, _null_sentinel) == offsetof(OMCache, _entries) +
+                offsetof(OMCache::OMCacheEntry, _oop) +
+                OMCache::CAPACITY * in_bytes(oop_to_oop_difference()));
+}
diff --git a/src/hotspot/share/runtime/lockStack.hpp b/src/hotspot/share/runtime/lockStack.hpp
new file mode 100644
index 00000000000..9085f1628cc
--- /dev/null
+++ b/src/hotspot/share/runtime/lockStack.hpp
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2022, Red Hat, Inc. All rights reserved.
+ * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
+ * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_RUNTIME_LOCKSTACK_HPP
+#define SHARE_RUNTIME_LOCKSTACK_HPP
+
+#include "oops/oopsHierarchy.hpp"
+#include "utilities/globalDefinitions.hpp"
+#include "utilities/sizes.hpp"
+
+class JavaThread;
+class ObjectMonitor;
+class OopClosure;
+class outputStream;
+class Thread;
+template<typename>
+class GrowableArray;
+
+class LockStack {
+  friend class LockStackTest;
+  friend class VMStructs;
+  JVMCI_ONLY(friend class JVMCIVMStructs;)
+public:
+  static const int CAPACITY = 8;
+private:
+
+  // TODO: It would be very useful if JavaThread::lock_stack_offset() and friends were constexpr,
+  // but this is currently not the case because we're using offset_of() which is non-constexpr,
+  // GCC would warn about non-standard-layout types if we were using offsetof() (which *is* constexpr).
+  static const int lock_stack_offset;
+  static const int lock_stack_top_offset;
+  static const int lock_stack_base_offset;
+
+  // The offset of the next element, in bytes, relative to the JavaThread structure.
+  // We do this instead of a simple index into the array because this allows for
+  // efficient addressing in generated code.
+  uint32_t _top;
+  // The _bad_oop_sentinel acts as a sentinel value to elide underflow checks in generated code.
+  // The correct layout is statically asserted in the constructor.
+  const uintptr_t _bad_oop_sentinel = badOopVal;
+  oop _base[CAPACITY];
+
+  // Get the owning thread of this lock-stack.
+  inline JavaThread* get_thread() const;
+
+  // Tests if the calling thread is the thread that owns this lock-stack.
+  bool is_owning_thread() const;
+
+  // Verifies consistency of the lock-stack.
+  void verify(const char* msg) const PRODUCT_RETURN;
+
+  // Given an offset (in bytes) calculate the index into the lock-stack.
+  static inline int to_index(uint32_t offset);
+
+public:
+  static ByteSize top_offset()  { return byte_offset_of(LockStack, _top); }
+  static ByteSize base_offset() { return byte_offset_of(LockStack, _base); }
+
+  LockStack(JavaThread* jt);
+
+  // The boundary indicies of the lock-stack.
+  static uint32_t start_offset();
+  static uint32_t end_offset();
+
+  // Returns true if the lock-stack is full. False otherwise.
+  inline bool is_full() const;
+
+  // Pushes an oop on this lock-stack.
+  inline void push(oop o);
+
+  // Get the oldest oop from this lock-stack.
+  // Precondition: This lock-stack must not be empty.
+  inline oop bottom() const;
+
+  // Is the lock-stack empty.
+  inline bool is_empty() const;
+
+  // Check if object is recursive.
+  // Precondition: This lock-stack must contain the oop.
+  inline bool is_recursive(oop o) const;
+
+  // Try recursive enter.
+  // Precondition: This lock-stack must not be full.
+  inline bool try_recursive_enter(oop o);
+
+  // Try recursive exit.
+  // Precondition: This lock-stack must contain the oop.
+  inline bool try_recursive_exit(oop o);
+
+  // Removes an oop from an arbitrary location of this lock-stack.
+  // Precondition: This lock-stack must contain the oop.
+  // Returns the number of oops removed.
+  inline size_t remove(oop o);
+
+  // Tests whether the oop is on this lock-stack.
+  inline bool contains(oop o) const;
+
+  // GC support
+  inline void oops_do(OopClosure* cl);
+
+  // Printing
+  void print_on(outputStream* st);
+};
+
+class OMCache {
+  friend class VMStructs;
+public:
+  static constexpr int CAPACITY = 8;
+
+private:
+  struct OMCacheEntry {
+    oop _oop = nullptr;
+    ObjectMonitor* _monitor = nullptr;
+  } _entries[CAPACITY];
+  const oop _null_sentinel = nullptr;
+
+public:
+  static ByteSize entries_offset() { return byte_offset_of(OMCache, _entries); }
+  static constexpr ByteSize oop_to_oop_difference() { return in_ByteSize(sizeof(OMCacheEntry)); }
+  static constexpr ByteSize oop_to_monitor_difference() { return in_ByteSize(sizeof(oop)); }
+
+  explicit OMCache(JavaThread* jt);
+
+  inline ObjectMonitor* get_monitor(oop o);
+  inline void set_monitor(ObjectMonitor* monitor);
+  inline void clear();
+
+};
+
+#endif // SHARE_RUNTIME_LOCKSTACK_HPP
diff --git a/src/hotspot/share/runtime/lockStack.inline.hpp b/src/hotspot/share/runtime/lockStack.inline.hpp
new file mode 100644
index 00000000000..edfa07b3451
--- /dev/null
+++ b/src/hotspot/share/runtime/lockStack.inline.hpp
@@ -0,0 +1,278 @@
+/*
+ * Copyright (c) 2022, Red Hat, Inc. All rights reserved.
+ * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
+ * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_RUNTIME_LOCKSTACK_INLINE_HPP
+#define SHARE_RUNTIME_LOCKSTACK_INLINE_HPP
+
+#include "runtime/lockStack.hpp"
+
+#include "memory/iterator.hpp"
+#include "runtime/lightweightSynchronizer.hpp"
+#include "runtime/objectMonitor.inline.hpp"
+#include "runtime/safepoint.hpp"
+#include "runtime/stackWatermark.hpp"
+#include "runtime/stackWatermarkSet.inline.hpp"
+#include "runtime/thread.hpp"
+#include "runtime/vm_version.hpp"
+#include "utilities/align.hpp"
+#include "utilities/globalDefinitions.hpp"
+
+inline int LockStack::to_index(uint32_t offset) {
+  assert(is_aligned(offset, oopSize), "Bad alignment: %u", offset);
+  assert((offset <= end_offset()), "lockstack overflow: offset %d end_offset %d", offset, end_offset());
+  assert((offset >= start_offset()), "lockstack underflow: offset %d start_offset %d", offset, start_offset());
+  return (offset - lock_stack_base_offset) / oopSize;
+}
+
+JavaThread* LockStack::get_thread() const {
+  char* addr = reinterpret_cast<char*>(const_cast<LockStack*>(this));
+  return reinterpret_cast<JavaThread*>(addr - lock_stack_offset);
+}
+
+inline bool LockStack::is_full() const {
+  return to_index(_top) == CAPACITY;
+}
+
+inline bool LockStack::is_owning_thread() const {
+  Thread* current = Thread::current();
+  if (current->is_Java_thread()) {
+    JavaThread* thread = current->as_Java_thread();
+    bool is_owning = &thread->lock_stack() == this;
+    assert(is_owning == (get_thread() == thread), "is_owning sanity");
+    return is_owning;
+  }
+  return false;
+}
+
+inline void LockStack::push(oop o) {
+  verify("pre-push");
+  assert(oopDesc::is_oop(o), "must be");
+  assert(!contains(o), "entries must be unique");
+  assert(!is_full(), "must have room");
+  assert(_base[to_index(_top)] == NULL, "expect zapped entry");
+  _base[to_index(_top)] = o;
+  _top += oopSize;
+  verify("post-push");
+}
+
+inline oop LockStack::bottom() const {
+  assert(to_index(_top) > 0, "must contain an oop");
+  return _base[0];
+}
+
+inline bool LockStack::is_empty() const {
+  return to_index(_top) == 0;
+}
+
+inline bool LockStack::is_recursive(oop o) const {
+  if (!VM_Version::supports_recursive_lightweight_locking()) {
+    return false;
+  }
+  verify("pre-is_recursive");
+
+  // This will succeed iff there is a consecutive run of oops on the
+  // lock-stack with a length of at least 2.
+
+  assert(contains(o), "at least one entry must exist");
+  int end = to_index(_top);
+  // Start iterating from the top because the runtime code is more
+  // interested in the balanced locking case when the top oop on the
+  // lock-stack matches o. This will cause the for loop to break out
+  // in the first loop iteration if it is non-recursive.
+  for (int i = end - 1; i > 0; i--) {
+    if (_base[i - 1] == o && _base[i] == o) {
+      verify("post-is_recursive");
+      return true;
+    }
+    if (_base[i] == o) {
+      // o can only occur in one consecutive run on the lock-stack.
+      // Only one of the two oops checked matched o, so this run
+      // must be of length 1 and thus not be recursive. Stop the search.
+      break;
+    }
+  }
+
+  verify("post-is_recursive");
+  return false;
+}
+
+inline bool LockStack::try_recursive_enter(oop o) {
+  if (!VM_Version::supports_recursive_lightweight_locking()) {
+    return false;
+  }
+  verify("pre-try_recursive_enter");
+
+  // This will succeed iff the top oop on the stack matches o.
+  // When successful o will be pushed to the lock-stack creating
+  // a consecutive run at least 2 oops that matches o on top of
+  // the lock-stack.
+
+  assert(!is_full(), "precond");
+
+  int end = to_index(_top);
+  if (end == 0 || _base[end - 1] != o) {
+    // Topmost oop does not match o.
+    verify("post-try_recursive_enter");
+    return false;
+  }
+
+  _base[end] = o;
+  _top += oopSize;
+  verify("post-try_recursive_enter");
+  return true;
+}
+
+inline bool LockStack::try_recursive_exit(oop o) {
+  if (!VM_Version::supports_recursive_lightweight_locking()) {
+    return false;
+  }
+  verify("pre-try_recursive_exit");
+
+  // This will succeed iff the top two oops on the stack matches o.
+  // When successful the top oop will be popped of the lock-stack.
+  // When unsuccessful the lock may still be recursive, in which
+  // case the locking is unbalanced. This case is handled externally.
+
+  assert(contains(o), "entries must exist");
+
+  int end = to_index(_top);
+  if (end <= 1 || _base[end - 1] != o || _base[end - 2] != o) {
+    // The two topmost oops do not match o.
+    verify("post-try_recursive_exit");
+    return false;
+  }
+
+  _top -= oopSize;
+  DEBUG_ONLY(_base[to_index(_top)] = nullptr;)
+  verify("post-try_recursive_exit");
+  return true;
+}
+
+inline size_t LockStack::remove(oop o) {
+  verify("pre-remove");
+  assert(contains(o), "entry must be present: " PTR_FORMAT, p2i(o));
+
+  int end = to_index(_top);
+  int inserted = 0;
+  for (int i = 0; i < end; i++) {
+    if (_base[i] != o) {
+      if (inserted != i) {
+        _base[inserted] = _base[i];
+      }
+      inserted++;
+    }
+  }
+
+#ifdef ASSERT
+  for (int i = inserted; i < end; i++) {
+    _base[i] = nullptr;
+  }
+#endif
+
+  uint32_t removed = end - inserted;
+  _top -= removed * oopSize;
+  assert(!contains(o), "entry must have been removed: " PTR_FORMAT, p2i(o));
+  verify("post-remove");
+  return removed;
+}
+
+inline bool LockStack::contains(oop o) const {
+  verify("pre-contains");
+
+  // Can't poke around in thread oops without having started stack watermark processing.
+  assert(StackWatermarkSet::processing_started(get_thread()), "Processing must have started!");
+
+  int end = to_index(_top);
+  for (int i = end - 1; i >= 0; i--) {
+    if (_base[i] == o) {
+      verify("post-contains");
+      return true;
+    }
+  }
+  verify("post-contains");
+  return false;
+}
+
+inline void LockStack::oops_do(OopClosure* cl) {
+  verify("pre-oops-do");
+  int end = to_index(_top);
+  for (int i = 0; i < end; i++) {
+    cl->do_oop(&_base[i]);
+  }
+  verify("post-oops-do");
+}
+
+inline void OMCache::set_monitor(ObjectMonitor *monitor) {
+  const int end = OMCache::CAPACITY - 1;
+
+  oop obj = monitor->object_peek();
+  assert(obj != nullptr, "must be alive");
+  assert(monitor == LightweightSynchronizer::get_monitor_from_table(JavaThread::current(), obj), "must be exist in table");
+
+  OMCacheEntry to_insert = {obj, monitor};
+
+  for (int i = 0; i < end; ++i) {
+    if (_entries[i]._oop == obj ||
+        _entries[i]._monitor == nullptr ||
+        _entries[i]._monitor->is_being_async_deflated()) {
+      // Use stale slot.
+      _entries[i] = to_insert;
+      return;
+    }
+    // Swap with the most recent value.
+    ::swap(to_insert, _entries[i]);
+  }
+  _entries[end] = to_insert;
+}
+
+inline ObjectMonitor* OMCache::get_monitor(oop o) {
+  for (int i = 0; i < CAPACITY; ++i) {
+    if (_entries[i]._oop == o) {
+      assert(_entries[i]._monitor != nullptr, "monitor must exist");
+      if (_entries[i]._monitor->is_being_async_deflated()) {
+        // Bad monitor
+        // Shift down rest
+        for (; i < CAPACITY - 1; ++i) {
+          _entries[i] = _entries[i + 1];
+        }
+        // Clear end
+        _entries[i] = {};
+        return nullptr;
+      }
+      return _entries[i]._monitor;
+    }
+  }
+  return nullptr;
+}
+
+inline void OMCache::clear() {
+  for (size_t i = 0 , r = 0; i < CAPACITY; ++i) {
+    // Clear
+    _entries[i] = {};
+  }
+}
+
+#endif // SHARE_RUNTIME_LOCKSTACK_INLINE_HPP
diff --git a/src/hotspot/share/runtime/objectMonitor.cpp b/src/hotspot/share/runtime/objectMonitor.cpp
index 08dc4033471..36820c745ae 100644
--- a/src/hotspot/share/runtime/objectMonitor.cpp
+++ b/src/hotspot/share/runtime/objectMonitor.cpp
@@ -1,7 +1,7 @@
 // This project is a modified version of OpenJDK, licensed under GPL v2.
 // Modifications Copyright (C) 2025 ByteDance Inc.
 /*
- * Copyright (c) 1998, 2022, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -41,8 +41,10 @@
 #include "prims/jvmtiDeferredUpdates.hpp"
 #include "prims/jvmtiExport.hpp"
 #include "runtime/atomic.hpp"
+#include "runtime/globals.hpp"
 #include "runtime/handles.inline.hpp"
 #include "runtime/interfaceSupport.inline.hpp"
+#include "runtime/lightweightSynchronizer.hpp"
 #include "runtime/mutexLocker.hpp"
 #include "runtime/objectMonitor.hpp"
 #include "runtime/objectMonitor.inline.hpp"
@@ -52,9 +54,12 @@
 #include "runtime/safefetch.inline.hpp"
 #include "runtime/safepointMechanism.inline.hpp"
 #include "runtime/sharedRuntime.hpp"
+#include "runtime/synchronizer.hpp"
 #include "runtime/thread.inline.hpp"
 #include "services/threadService.hpp"
+#include "utilities/debug.hpp"
 #include "utilities/dtrace.hpp"
+#include "utilities/globalDefinitions.hpp"
 #include "utilities/macros.hpp"
 #include "utilities/preserveException.hpp"
 #if INCLUDE_JFR
@@ -259,7 +264,7 @@ static void check_object_context() {
 }
 
 ObjectMonitor::ObjectMonitor(oop object) :
-  _header(markWord::zero()),
+  _metadata(0),
   _object(_oop_storage, object),
   _owner(NULL),
   _previous_owner_tid(0),
@@ -289,13 +294,6 @@ oop ObjectMonitor::object() const {
   return _object.resolve();
 }
 
-oop ObjectMonitor::object_peek() const {
-  if (_object.is_null()) {
-    return NULL;
-  }
-  return _object.peek();
-}
-
 void ObjectMonitor::ExitOnSuspend::operator()(JavaThread* current) {
   if (current->is_suspended()) {
     _om->_recursions = 0;
@@ -317,33 +315,91 @@ void ObjectMonitor::ClearSuccOnSuspend::operator()(JavaThread* current) {
   }
 }
 
+#define assert_mark_word_consistency()                                         \
+  assert(UseObjectMonitorTable || object()->mark() == markWord::encode(this),  \
+         "object mark must match encoded this: mark=" INTPTR_FORMAT            \
+         ", encoded this=" INTPTR_FORMAT, object()->mark().value(),            \
+         markWord::encode(this).value());
+
 // -----------------------------------------------------------------------------
 // Enter support
 
 #if HOTSPOT_TARGET_CLASSLIB == 8
-bool ObjectMonitor::try_enter(Thread* current) {
-  if (current != _owner) {
-    if (current->is_lock_owned ((address)_owner)) {
-       assert(_recursions == 0, "internal state error");
-       _owner = current ;
-       _recursions = 1 ;
-       return true;
-    }
-    if (Atomic::cmpxchg (&_owner, (void*)(NULL), (void*)(current)) != NULL) {
-      return false;
+bool ObjectMonitor::enter_is_async_deflating() {
+  if (is_being_async_deflated()) {
+    if (!UseObjectMonitorTable) {
+      const oop l_object = object();
+      if (l_object != nullptr) {
+        // Attempt to restore the header/dmw to the object's header so that
+        // we only retry once if the deflater thread happens to be slow.
+        install_displaced_markword_in_object(l_object);
+      }
     }
     return true;
-  } else {
+  }
+
+  return false;
+}
+
+void ObjectMonitor::enter_for_with_contention_mark(JavaThread* locking_thread, ObjectMonitorContentionMark& contention_mark) {
+  // Used by ObjectSynchronizer::enter_for to enter for another thread.
+  // The monitor is private to or already owned by locking_thread which must be suspended.
+  // So this code may only contend with deflation.
+  assert(locking_thread == Thread::current() || locking_thread->is_obj_deopt_suspend(), "must be");
+  assert(contention_mark._monitor == this, "must be");
+  assert(!is_being_async_deflated(), "must be");
+
+  void* prev_owner = try_set_owner_from(nullptr, locking_thread);
+
+  bool success = false;
+  if (prev_owner == nullptr) {
+    assert(_recursions == 0, "invariant");
+    success = true;
+  } else if (prev_owner == locking_thread) {
     _recursions++;
-    return true;
+    success = true;
+  } else if (prev_owner == DEFLATER_MARKER) {
+    // Racing with deflation.
+    prev_owner = try_set_owner_from(DEFLATER_MARKER, locking_thread);
+    if (prev_owner == DEFLATER_MARKER) {
+      // Cancelled deflation. Increment contentions as part of the deflation protocol.
+      add_to_contentions(1);
+      success = true;
+    } else if (prev_owner == nullptr) {
+      // At this point we cannot race with deflation as we have both incremented
+      // contentions, seen contention > 0 and seen a DEFLATER_MARKER.
+      // success will only be false if this races with something other than
+      // deflation.
+      prev_owner = try_set_owner_from(nullptr, locking_thread);
+      success = prev_owner == nullptr;
+    }
+  } else if (LockingMode == LM_LEGACY && locking_thread->is_lock_owned((address)prev_owner)) {
+    assert(_recursions == 0, "must be");
+    _recursions = 1;
+    set_owner_from_BasicLock(prev_owner, locking_thread);
+    success = true;
   }
+  assert(success, "Failed to enter_for: locking_thread=" INTPTR_FORMAT
+          ", this=" INTPTR_FORMAT "{owner=" INTPTR_FORMAT "}, observed owner: " INTPTR_FORMAT,
+          p2i(locking_thread), p2i(this), p2i(owner_raw()), p2i(prev_owner));
 }
-#endif
 
-bool ObjectMonitor::enter(JavaThread* current) {
-  // The following code is ordered to check the most common cases first
-  // and to reduce RTS->RTO cache line upgrades on SPARC and IA32 processors.
+bool ObjectMonitor::enter_for(JavaThread* locking_thread) {
+
+  // Block out deflation as soon as possible.
+  ObjectMonitorContentionMark contention_mark(this);
 
+  // Check for deflation.
+  if (enter_is_async_deflating()) {
+    return false;
+  }
+
+  enter_for_with_contention_mark(locking_thread, contention_mark);
+  assert(owner_raw() == locking_thread, "must be");
+  return true;
+}
+
+bool ObjectMonitor::try_enter(JavaThread* current) {
   void* cur = try_set_owner_from(NULL, current);
   if (cur == NULL) {
     assert(_recursions == 0, "invariant");
@@ -351,35 +407,56 @@ bool ObjectMonitor::enter(JavaThread* current) {
   }
 
   if (cur == current) {
-    // TODO-FIXME: check for integer overflow!  BUGID 6557169.
     _recursions++;
     return true;
   }
 
-  if (current->is_lock_owned((address)cur)) {
+  if (LockingMode != LM_LIGHTWEIGHT && current->is_lock_owned((address)cur)) {
     assert(_recursions == 0, "internal state error");
     _recursions = 1;
     set_owner_from_BasicLock(cur, current);  // Convert from BasicLock* to Thread*.
     return true;
   }
 
+  return false;
+}
+
+bool ObjectMonitor::spin_enter(JavaThread* current) {
+  assert(current == JavaThread::current(), "must be");
+
+  // Check for recursion.
+  if (try_enter(current)) {
+    return true;
+  }
+
+  // Check for deflation.
+  if (enter_is_async_deflating()) {
+    return false;
+  }
+
   // We've encountered genuine contention.
   assert(current->_Stalled == 0, "invariant");
   current->_Stalled = intptr_t(this);
 
-  // Try one round of spinning *before* enqueueing current
-  // and before going through the awkward and expensive state
-  // transitions.  The following spin is strictly optional ...
+  // Do one round of spinning.
   // Note that if we acquire the monitor from an initial spin
   // we forgo posting JVMTI events and firing DTRACE probes.
   if (TrySpin(current) > 0) {
     assert(owner_raw() == current, "must be current: owner=" INTPTR_FORMAT, p2i(owner_raw()));
     assert(_recursions == 0, "must be 0: recursions=" INTX_FORMAT, _recursions);
-    assert(object()->mark() == markWord::encode(this),
-           "object mark must match encoded this: mark=" INTPTR_FORMAT
-           ", encoded this=" INTPTR_FORMAT, object()->mark().value(),
-           markWord::encode(this).value());
     current->_Stalled = 0;
+    assert_mark_word_consistency();
+    return true;
+  }
+
+  return false;
+}
+#endif
+
+bool ObjectMonitor::enter(JavaThread* current) {
+  assert(current == JavaThread::current(), "must be");
+
+  if (spin_enter(current)) {
     return true;
   }
 
@@ -388,23 +465,25 @@ bool ObjectMonitor::enter(JavaThread* current) {
   assert(!SafepointSynchronize::is_at_safepoint(), "invariant");
   assert(current->thread_state() != _thread_blocked, "invariant");
 
-  // Keep track of contention for JVM/TI and M&M queries.
-  add_to_contentions(1);
-  if (is_being_async_deflated()) {
-    // Async deflation is in progress and our contentions increment
-    // above lost the race to async deflation. Undo the work and
-    // force the caller to retry.
-    const oop l_object = object();
-    if (l_object != NULL) {
-      // Attempt to restore the header/dmw to the object's header so that
-      // we only retry once if the deflater thread happens to be slow.
-      install_displaced_markword_in_object(l_object);
-    }
-    current->_Stalled = 0;
-    add_to_contentions(-1);
+  // Keep is_being_async_deflated stable across the rest of enter
+  ObjectMonitorContentionMark contention_mark(this);
+
+  // Check for deflation.
+  if (enter_is_async_deflating()) {
     return false;
   }
 
+  // At this point this ObjectMonitor cannot be deflated, finish contended enter
+  enter_with_contention_mark(current, contention_mark);
+  return true;
+}
+
+void ObjectMonitor::enter_with_contention_mark(JavaThread *current, ObjectMonitorContentionMark &cm) {
+  assert(current == JavaThread::current(), "must be");
+  assert(owner_raw() != current, "must be");
+  assert(cm._monitor == this, "must be");
+  assert(!is_being_async_deflated(), "must be");
+
   JFR_ONLY(JfrConditionalFlushWithStacktrace<EventJavaMonitorEnter> flush(current);)
   EventJavaMonitorEnter event;
   if (event.is_started()) {
@@ -461,7 +540,6 @@ bool ObjectMonitor::enter(JavaThread* current) {
     // the monitor free and clear.
   }
 
-  add_to_contentions(-1);
   assert(contentions() >= 0, "must not be negative: contentions=%d", contentions());
   current->_Stalled = 0;
 
@@ -469,7 +547,7 @@ bool ObjectMonitor::enter(JavaThread* current) {
   assert(_recursions == 0, "invariant");
   assert(owner_raw() == current, "invariant");
   assert(_succ != current, "invariant");
-  assert(object()->mark() == markWord::encode(this), "invariant");
+  assert_mark_word_consistency();
 
   // The thread -- now the owner -- is back in vm mode.
   // Report the glorious news via TI,DTrace and jvmstat.
@@ -498,7 +576,6 @@ bool ObjectMonitor::enter(JavaThread* current) {
     event.commit();
   }
   OM_PERFDATA_OP(ContendedLockAttempts, inc());
-  return true;
 }
 
 // Caveat: TryLock() is not necessarily serializing if it returns failure.
@@ -531,22 +608,12 @@ int ObjectMonitor::TryLock(JavaThread* current) {
 //   (contentions < 0)
 // Contending threads that see that condition know to retry their operation.
 //
-bool ObjectMonitor::deflate_monitor() {
+bool ObjectMonitor::deflate_monitor(Thread* current) {
   if (is_busy()) {
     // Easy checks are first - the ObjectMonitor is busy so no deflation.
     return false;
   }
 
-  if (ObjectSynchronizer::is_final_audit() && owner_is_DEFLATER_MARKER()) {
-    // The final audit can see an already deflated ObjectMonitor on the
-    // in-use list because MonitorList::unlink_deflated() might have
-    // blocked for the final safepoint before unlinking all the deflated
-    // monitors.
-    assert(contentions() < 0, "must be negative: contentions=%d", contentions());
-    // Already returned 'true' when it was originally deflated.
-    return false;
-  }
-
   const oop obj = object_peek();
 
   if (obj == NULL) {
@@ -612,9 +679,15 @@ bool ObjectMonitor::deflate_monitor() {
                                   p2i(obj), obj->mark().value(),
                                   obj->klass()->external_name());
     }
+  }
 
-    // Install the old mark word if nobody else has already done it.
-    install_displaced_markword_in_object(obj);
+  if (UseObjectMonitorTable) {
+    LightweightSynchronizer::deflate_monitor(current, obj, this);
+  } else {
+    if (obj != nullptr) {
+      // Install the old mark word if nobody else has already done it.
+      install_displaced_markword_in_object(obj);
+    }
   }
 
   // We leave owner == DEFLATER_MARKER and contentions < 0
@@ -628,6 +701,7 @@ bool ObjectMonitor::deflate_monitor() {
 // monitor and by other threads that have detected a race with the
 // deflation process.
 void ObjectMonitor::install_displaced_markword_in_object(const oop obj) {
+  assert(!UseObjectMonitorTable, "Lightweight has no dmw");
   // This function must only be called when (owner == DEFLATER_MARKER
   // && contentions <= 0), but we can't guarantee that here because
   // those values could change when the ObjectMonitor gets moved from
@@ -961,12 +1035,11 @@ void ObjectMonitor::EnterI(JavaThread* current) {
 
 void ObjectMonitor::ReenterI(JavaThread* current, ObjectWaiter* currentNode) {
   assert(current != NULL, "invariant");
+  assert(current->thread_state() != _thread_blocked, "invariant");
   assert(currentNode != NULL, "invariant");
   assert(currentNode->_thread == current, "invariant");
   assert(_waiters > 0, "invariant");
-  assert(object()->mark() == markWord::encode(this), "invariant");
-
-  assert(current->thread_state() != _thread_blocked, "invariant");
+  assert_mark_word_consistency();
 
   int nWakeups = 0;
   for (;;) {
@@ -1023,7 +1096,7 @@ void ObjectMonitor::ReenterI(JavaThread* current, ObjectWaiter* currentNode) {
   // In addition, current.TState is stable.
 
   assert(owner_raw() == current, "invariant");
-  assert(object()->mark() == markWord::encode(this), "invariant");
+  assert_mark_word_consistency();
   UnlinkAfterAcquire(current, currentNode);
   if (_succ == current) _succ = NULL;
   assert(_succ != current, "invariant");
@@ -1157,7 +1230,7 @@ void ObjectMonitor::UnlinkAfterAcquire(JavaThread* current, ObjectWaiter* curren
 void ObjectMonitor::exit(JavaThread* current, bool not_suspended) {
   void* cur = owner_raw();
   if (current != cur) {
-    if (current->is_lock_owned((address)cur)) {
+    if (LockingMode != LM_LIGHTWEIGHT && current->is_lock_owned((address)cur)) {
       assert(_recursions == 0, "invariant");
       set_owner_from_BasicLock(cur, current);  // Convert from BasicLock* to Thread*.
       _recursions = 0;
@@ -1377,7 +1450,7 @@ intx ObjectMonitor::complete_exit(JavaThread* current) {
 
   void* cur = owner_raw();
   if (current != cur) {
-    if (current->is_lock_owned((address)cur)) {
+    if (LockingMode != LM_LIGHTWEIGHT && current->is_lock_owned((address)cur)) {
       assert(_recursions == 0, "internal state error");
       set_owner_from_BasicLock(cur, current);  // Convert from BasicLock* to Thread*.
       _recursions = 0;
@@ -1426,10 +1499,11 @@ bool ObjectMonitor::reenter(intx recursions, JavaThread* current) {
 bool ObjectMonitor::check_owner(TRAPS) {
   JavaThread* current = THREAD;
   void* cur = owner_raw();
+  assert(cur != anon_owner_ptr(), "no anon owner here");
   if (cur == current) {
     return true;
   }
-  if (current->is_lock_owned((address)cur)) {
+  if (LockingMode != LM_LIGHTWEIGHT && current->is_lock_owned((address)cur)) {
     set_owner_from_BasicLock(cur, current);  // Convert from BasicLock* to Thread*.
     _recursions = 0;
     return true;
@@ -1670,7 +1744,7 @@ void ObjectMonitor::wait(jlong millis, bool interruptible, TRAPS) {
   // Verify a few postconditions
   assert(owner_raw() == current, "invariant");
   assert(_succ != current, "invariant");
-  assert(object()->mark() == markWord::encode(this), "invariant");
+  assert_mark_word_consistency();
 
   // check if the notification happened
   if (!WasNotified) {
@@ -1899,10 +1973,6 @@ int ObjectMonitor::TrySpin(JavaThread* current) {
   ctr = _SpinDuration;
   if (ctr <= 0) return 0;
 
-  if (NotRunnable(current, (JavaThread*) owner_raw())) {
-    return 0;
-  }
-
   // We're good to spin ... spin ingress.
   // CONSIDER: use Prefetch::write() to avoid RTS->RTO upgrades
   // when preparing to LD...CAS _owner, etc and the CAS is likely
@@ -1985,13 +2055,6 @@ int ObjectMonitor::TrySpin(JavaThread* current) {
     }
     prv = ox;
 
-    // Abort the spin if the owner is not executing.
-    // The owner must be executing in order to drop the lock.
-    // Spinning while the owner is OFFPROC is idiocy.
-    // Consider: ctr -= RunnablePenalty ;
-    if (NotRunnable(current, ox)) {
-      goto Abort;
-    }
     if (_succ == NULL) {
       _succ = current;
     }
@@ -2024,62 +2087,6 @@ int ObjectMonitor::TrySpin(JavaThread* current) {
   return 0;
 }
 
-// NotRunnable() -- informed spinning
-//
-// Don't bother spinning if the owner is not eligible to drop the lock.
-// Spin only if the owner thread is _thread_in_Java or _thread_in_vm.
-// The thread must be runnable in order to drop the lock in timely fashion.
-// If the _owner is not runnable then spinning will not likely be
-// successful (profitable).
-//
-// Beware -- the thread referenced by _owner could have died
-// so a simply fetch from _owner->_thread_state might trap.
-// Instead, we use SafeFetchXX() to safely LD _owner->_thread_state.
-// Because of the lifecycle issues, the _thread_state values
-// observed by NotRunnable() might be garbage.  NotRunnable must
-// tolerate this and consider the observed _thread_state value
-// as advisory.
-//
-// Beware too, that _owner is sometimes a BasicLock address and sometimes
-// a thread pointer.
-// Alternately, we might tag the type (thread pointer vs basiclock pointer)
-// with the LSB of _owner.  Another option would be to probabilistically probe
-// the putative _owner->TypeTag value.
-//
-// Checking _thread_state isn't perfect.  Even if the thread is
-// in_java it might be blocked on a page-fault or have been preempted
-// and sitting on a ready/dispatch queue.
-//
-// The return value from NotRunnable() is *advisory* -- the
-// result is based on sampling and is not necessarily coherent.
-// The caller must tolerate false-negative and false-positive errors.
-// Spinning, in general, is probabilistic anyway.
-
-
-int ObjectMonitor::NotRunnable(JavaThread* current, JavaThread* ox) {
-  // Check ox->TypeTag == 2BAD.
-  if (ox == NULL) return 0;
-
-  // Avoid transitive spinning ...
-  // Say T1 spins or blocks trying to acquire L.  T1._Stalled is set to L.
-  // Immediately after T1 acquires L it's possible that T2, also
-  // spinning on L, will see L.Owner=T1 and T1._Stalled=L.
-  // This occurs transiently after T1 acquired L but before
-  // T1 managed to clear T1.Stalled.  T2 does not need to abort
-  // its spin in this circumstance.
-  intptr_t BlockedOn = SafeFetchN((intptr_t *) &ox->_Stalled, intptr_t(1));
-
-  if (BlockedOn == 1) return 1;
-  if (BlockedOn != 0) {
-    return BlockedOn != intptr_t(this) && owner_raw() == ox;
-  }
-
-  assert(sizeof(ox->_thread_state == sizeof(int)), "invariant");
-  int jst = SafeFetch32((int *) &ox->_thread_state, -1);;
-  // consider also: jst != _thread_in_Java -- but that's overspecific.
-  return jst == _thread_blocked || jst == _thread_in_native;
-}
-
 
 // -----------------------------------------------------------------------------
 // WaitSet management ...
@@ -2224,7 +2231,7 @@ void ObjectMonitor::print() const { print_on(tty); }
 // Print the ObjectMonitor like a debugger would:
 //
 // (ObjectMonitor) 0x00007fdfb6012e40 = {
-//   _header = 0x0000000000000001
+//   _metadata = 0x0000000000000001
 //   _object = 0x000000070ff45fd0
 //   _pad_buf0 = {
 //     [0] = '\0'
@@ -2254,7 +2261,7 @@ void ObjectMonitor::print() const { print_on(tty); }
 //
 void ObjectMonitor::print_debug_style_on(outputStream* st) const {
   st->print_cr("(ObjectMonitor*) " INTPTR_FORMAT " = {", p2i(this));
-  st->print_cr("  _header = " INTPTR_FORMAT, header().value());
+  st->print_cr("  _metadata = " INTPTR_FORMAT, _metadata);
   st->print_cr("  _object = " INTPTR_FORMAT, p2i(object_peek()));
   st->print_cr("  _pad_buf0 = {");
   st->print_cr("    [0] = '\\0'");
diff --git a/src/hotspot/share/runtime/objectMonitor.hpp b/src/hotspot/share/runtime/objectMonitor.hpp
index fd0fa12581a..1ebc7f47ff9 100644
--- a/src/hotspot/share/runtime/objectMonitor.hpp
+++ b/src/hotspot/share/runtime/objectMonitor.hpp
@@ -1,7 +1,7 @@
 // This project is a modified version of OpenJDK, licensed under GPL v2.
 // Modifications Copyright (C) 2025 ByteDance Inc.
 /*
- * Copyright (c) 1998, 2022, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -36,6 +36,7 @@
 #include "runtime/perfDataTypes.hpp"
 
 class ObjectMonitor;
+class ObjectMonitorContentionMark;
 
 // ObjectWaiter serves as a "proxy" or surrogate thread.
 // TODO-FIXME: Eliminate ObjectWaiter and use the thread-specific
@@ -71,20 +72,21 @@ class ObjectWaiter : public StackObj {
 //
 // ObjectMonitor Layout Overview/Highlights/Restrictions:
 //
-// - The _header field must be at offset 0 because the displaced header
+// - The _metadata field must be at offset 0 because the displaced header
 //   from markWord is stored there. We do not want markWord.hpp to include
 //   ObjectMonitor.hpp to avoid exposing ObjectMonitor everywhere. This
 //   means that ObjectMonitor cannot inherit from any other class nor can
 //   it use any virtual member functions. This restriction is critical to
 //   the proper functioning of the VM.
-// - The _header and _owner fields should be separated by enough space
+// - The _metadata and _owner fields should be separated by enough space
 //   to avoid false sharing due to parallel access by different threads.
 //   This is an advisory recommendation.
 // - The general layout of the fields in ObjectMonitor is:
-//     _header
+//     _metadata
 //     <lightly_used_fields>
 //     <optional padding>
 //     _owner
+//     <optional padding>
 //     <remaining_fields>
 // - The VM assumes write ordering and machine word alignment with
 //   respect to the _owner field and the <remaining_fields> that can
@@ -108,20 +110,19 @@ class ObjectWaiter : public StackObj {
 //   in synchronizer.cpp. Also see TEST_VM(SynchronizerTest, sanity) gtest.
 //
 // Futures notes:
-//   - Separating _owner from the <remaining_fields> by enough space to
-//     avoid false sharing might be profitable. Given
-//     http://blogs.oracle.com/dave/entry/cas_and_cache_trivia_invalidate
-//     we know that the CAS in monitorenter will invalidate the line
-//     underlying _owner. We want to avoid an L1 data cache miss on that
-//     same line for monitorexit. Putting these <remaining_fields>:
-//     _recursions, _EntryList, _cxq, and _succ, all of which may be
-//     fetched in the inflated unlock path, on a different cache line
-//     would make them immune to CAS-based invalidation from the _owner
-//     field.
+// - Separating _owner from the <remaining_fields> by enough space to
+//   avoid false sharing might be profitable. Given that the CAS in
+//   monitorenter will invalidate the line underlying _owner. We want
+//   to avoid an L1 data cache miss on that same line for monitorexit.
+//   Putting these <remaining_fields>:
+//   _recursions, _EntryList, _cxq, and _succ, all of which may be
+//   fetched in the inflated unlock path, on a different cache line
+//   would make them immune to CAS-based invalidation from the _owner
+//   field.
 //
-//   - The _recursions field should be of type int, or int32_t but not
-//     intptr_t. There's no reason to use a 64-bit type for this field
-//     in a 64-bit JVM.
+// - The _recursions field should be of type int, or int32_t but not
+//   intptr_t. There's no reason to use a 64-bit type for this field
+//   in a 64-bit JVM.
 
 #ifndef OM_CACHE_LINE_SIZE
 // Use DEFAULT_CACHE_LINE_SIZE if not already specified for
@@ -137,18 +138,37 @@ class ObjectMonitor : public CHeapObj<mtObjectMonitor> {
 
   static OopStorage* _oop_storage;
 
-  // The sync code expects the header field to be at offset zero (0).
-  // Enforced by the assert() in header_addr().
-  volatile markWord _header;        // displaced object header word - mark
+  // The sync code expects the metadata field to be at offset zero (0).
+  // Enforced by the assert() in metadata_addr().
+  // * LM_LIGHTWEIGHT with UseObjectMonitorTable:
+  // Contains the _object's hashCode.
+  // * LM_LEGACY, LM_MONITOR, LM_LIGHTWEIGHT without UseObjectMonitorTable:
+  // Contains the displaced object header word - mark
+  volatile uintptr_t _metadata;     // metadata
   WeakHandle _object;               // backward object pointer
-  // Separate _header and _owner on different cache lines since both can
-  // have busy multi-threaded access. _header and _object are set at initial
+  // Separate _metadata and _owner on different cache lines since both can
+  // have busy multi-threaded access. _metadata and _object are set at initial
   // inflation. The _object does not change, so it is a good choice to share
-  // its cache line with _header.
-  DEFINE_PAD_MINUS_SIZE(0, OM_CACHE_LINE_SIZE, sizeof(volatile markWord) +
+  // its cache line with _metadata.
+  DEFINE_PAD_MINUS_SIZE(0, OM_CACHE_LINE_SIZE, sizeof(_metadata) +
                         sizeof(WeakHandle));
-  // Used by async deflation as a marker in the _owner field:
-  #define DEFLATER_MARKER reinterpret_cast<void*>(-1)
+  // Used by async deflation as a marker in the _owner field.
+  // Note that the choice of the two markers is peculiar:
+  // - They need to represent values that cannot be pointers. In particular,
+  //   we achieve this by using the lowest two bits.
+  // - ANONYMOUS_OWNER should be a small value, it is used in generated code
+  //   and small values encode much better.
+  // - We test for anonymous owner by testing for the lowest bit, therefore
+  //   DEFLATER_MARKER must *not* have that bit set.
+  static const uintptr_t DEFLATER_MARKER_VALUE = 2;
+  #define DEFLATER_MARKER reinterpret_cast<void*>(DEFLATER_MARKER_VALUE)
+public:
+  // NOTE: Typed as uintptr_t so that we can pick it up in SA, via vmStructs.
+  static const uintptr_t ANONYMOUS_OWNER = 1;
+
+private:
+  static void* anon_owner_ptr() { return reinterpret_cast<void*>(ANONYMOUS_OWNER); }
+
   void* volatile _owner;            // pointer to owning thread OR BasicLock
   volatile jlong _previous_owner_tid;  // thread id of the previous owner of the monitor
   // Separate _owner and _next_om on different cache lines since
@@ -174,7 +194,7 @@ class ObjectMonitor : public CHeapObj<mtObjectMonitor> {
                                     // along with other fields to determine if an ObjectMonitor can be
                                     // deflated. It is also used by the async deflation protocol. See
                                     // ObjectMonitor::deflate_monitor().
- protected:
+
   ObjectWaiter* volatile _WaitSet;  // LL of threads wait()ing on the monitor
   volatile jint  _waiters;          // number of waiting threads
  private:
@@ -207,7 +227,14 @@ class ObjectMonitor : public CHeapObj<mtObjectMonitor> {
 
   // TODO-FIXME: the "offset" routines should return a type of off_t instead of int ...
   // ByteSize would also be an appropriate type.
-  static int header_offset_in_bytes()      { return offset_of(ObjectMonitor, _header); }
+  static ByteSize metadata_offset()   { return byte_offset_of(ObjectMonitor, _metadata); }
+  static ByteSize object_offset()     { return byte_offset_of(ObjectMonitor, _object); }
+  static ByteSize owner_offset()      { return byte_offset_of(ObjectMonitor, _owner); }
+  static ByteSize recursions_offset() { return byte_offset_of(ObjectMonitor, _recursions); }
+  static ByteSize cxq_offset()        { return byte_offset_of(ObjectMonitor, _cxq); }
+  static ByteSize succ_offset()       { return byte_offset_of(ObjectMonitor, _succ); }
+  static ByteSize EntryList_offset()  { return byte_offset_of(ObjectMonitor, _EntryList); }
+
   static int object_offset_in_bytes()      { return offset_of(ObjectMonitor, _object); }
   static int owner_offset_in_bytes()       { return offset_of(ObjectMonitor, _owner); }
   static int recursions_offset_in_bytes()  { return offset_of(ObjectMonitor, _recursions); }
@@ -229,9 +256,15 @@ class ObjectMonitor : public CHeapObj<mtObjectMonitor> {
   #define OM_OFFSET_NO_MONITOR_VALUE_TAG(f) \
     ((ObjectMonitor::f ## _offset_in_bytes()) - markWord::monitor_value)
 
-  markWord           header() const;
-  volatile markWord* header_addr();
-  void               set_header(markWord hdr);
+  uintptr_t           metadata() const;
+  void                set_metadata(uintptr_t value);
+  volatile uintptr_t* metadata_addr();
+
+  markWord            header() const;
+  void                set_header(markWord hdr);
+
+  intptr_t            hash() const;
+  void                set_hash(intptr_t hash);
 
   bool is_busy() const {
     // TODO-FIXME: assert _owner == null implies _recursions = 0
@@ -246,8 +279,9 @@ class ObjectMonitor : public CHeapObj<mtObjectMonitor> {
   }
   const char* is_busy_to_string(stringStream* ss);
 
-  intptr_t  is_entered(JavaThread* current) const;
+  bool is_entered(JavaThread* current) const;
 
+  bool      has_owner() const;
   void*     owner() const;  // Returns NULL if DEFLATER_MARKER is observed.
   void*     owner_raw() const;
   // Returns true if owner field == DEFLATER_MARKER and false otherwise.
@@ -265,6 +299,18 @@ class ObjectMonitor : public CHeapObj<mtObjectMonitor> {
   // _owner field. Returns the prior value of the _owner field.
   void*     try_set_owner_from(void* old_value, void* new_value);
 
+  void set_owner_anonymous() {
+    set_owner_from(NULL, anon_owner_ptr());
+  }
+
+  bool is_owner_anonymous() const {
+    return owner_raw() == anon_owner_ptr();
+  }
+
+  void set_owner_from_anonymous(Thread* owner) {
+    set_owner_from(anon_owner_ptr(), owner);
+  }
+
   // Simply get _next_om field.
   ObjectMonitor* next_om() const;
   // Get _next_om field with acquire semantics.
@@ -283,6 +329,7 @@ class ObjectMonitor : public CHeapObj<mtObjectMonitor> {
   jint      contentions() const;
   void      add_to_contentions(jint value);
   intx      recursions() const                                         { return _recursions; }
+  void      set_recursions(size_t recursions);
 
   // JVM/TI GetObjectMonitorUsage() needs this:
   ObjectWaiter* first_waiter()                                         { return _WaitSet; }
@@ -294,6 +341,9 @@ class ObjectMonitor : public CHeapObj<mtObjectMonitor> {
 
   oop       object() const;
   oop       object_peek() const;
+  bool      object_is_cleared() const;
+  bool      object_is_dead() const;
+  bool      object_refers_to(oop obj) const;
 
   // Returns true if the specified thread owns the ObjectMonitor. Otherwise
   // returns false and throws IllegalMonitorStateException (IMSE).
@@ -316,9 +366,17 @@ class ObjectMonitor : public CHeapObj<mtObjectMonitor> {
     ClearSuccOnSuspend(ObjectMonitor* om) : _om(om)  {}
     void operator()(JavaThread* current);
   };
+
+#if HOTSPOT_TARGET_CLASSLIB == 8
+  bool      enter_is_async_deflating();
+#endif
  public:
 #if HOTSPOT_TARGET_CLASSLIB == 8
-  bool      try_enter(Thread* current);
+  bool      try_enter(JavaThread* current);
+  void      enter_for_with_contention_mark(JavaThread* locking_thread, ObjectMonitorContentionMark& contention_mark);
+  bool      enter_for(JavaThread* locking_thread);
+  bool      spin_enter(JavaThread* current);
+  void      enter_with_contention_mark(JavaThread* current, ObjectMonitorContentionMark& contention_mark);
 #endif
   bool      enter(JavaThread* current);
   void      exit(JavaThread* current, bool not_suspended = true);
@@ -345,13 +403,27 @@ class ObjectMonitor : public CHeapObj<mtObjectMonitor> {
   void      ReenterI(JavaThread* current, ObjectWaiter* current_node);
   void      UnlinkAfterAcquire(JavaThread* current, ObjectWaiter* current_node);
   int       TryLock(JavaThread* current);
-  int       NotRunnable(JavaThread* current, JavaThread* Owner);
   int       TrySpin(JavaThread* current);
   void      ExitEpilog(JavaThread* current, ObjectWaiter* Wakee);
 
   // Deflation support
-  bool      deflate_monitor();
+  bool      deflate_monitor(Thread* current);
+private:
   void      install_displaced_markword_in_object(const oop obj);
 };
 
+// RAII object to ensure that ObjectMonitor::is_being_async_deflated() is
+// stable within the context of this mark.
+class ObjectMonitorContentionMark : StackObj {
+  DEBUG_ONLY(friend class ObjectMonitor;)
+
+  ObjectMonitor* _monitor;
+
+  NONCOPYABLE(ObjectMonitorContentionMark);
+
+public:
+  explicit ObjectMonitorContentionMark(ObjectMonitor* monitor);
+  ~ObjectMonitorContentionMark();
+};
+
 #endif // SHARE_RUNTIME_OBJECTMONITOR_HPP
diff --git a/src/hotspot/share/runtime/objectMonitor.inline.hpp b/src/hotspot/share/runtime/objectMonitor.inline.hpp
index 09389425733..c8475fd44f3 100644
--- a/src/hotspot/share/runtime/objectMonitor.inline.hpp
+++ b/src/hotspot/share/runtime/objectMonitor.inline.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -29,33 +29,74 @@
 
 #include "logging/log.hpp"
 #include "oops/access.inline.hpp"
+#include "oops/markWord.hpp"
 #include "runtime/atomic.hpp"
+#include "runtime/globals.hpp"
+#include "runtime/lockStack.inline.hpp"
 #include "runtime/synchronizer.hpp"
+#include "utilities/debug.hpp"
+#include "utilities/globalDefinitions.hpp"
 
-inline intptr_t ObjectMonitor::is_entered(JavaThread* current) const {
-  void* owner = owner_raw();
-  if (current == owner || current->is_lock_owned((address)owner)) {
-    return 1;
+inline bool ObjectMonitor::is_entered(JavaThread* current) const {
+  if (LockingMode == LM_LIGHTWEIGHT) {
+    if (is_owner_anonymous()) {
+      return current->lock_stack().contains(object());
+    } else {
+      return current == owner_raw();
+    }
+  } else {
+    void* owner = owner_raw();
+    if (current == owner || current->is_lock_owned((address)owner)) {
+      return true;
+    }
   }
-  return 0;
+  return false;
 }
 
-inline markWord ObjectMonitor::header() const {
-  return Atomic::load(&_header);
+inline uintptr_t ObjectMonitor::metadata() const {
+  return Atomic::load(&_metadata);
+}
+
+inline void ObjectMonitor::set_metadata(uintptr_t value) {
+  Atomic::store(&_metadata, value);
 }
 
-inline volatile markWord* ObjectMonitor::header_addr() {
-  return &_header;
+inline volatile uintptr_t* ObjectMonitor::metadata_addr() {
+  // TODO: Re-add the asserts
+  //STATIC_ASSERT(std::is_standard_layout<ObjectMonitor>::value);
+  //STATIC_ASSERT(offsetof(ObjectMonitor, _metadata) == 0);
+  return &_metadata;
+}
+
+inline markWord ObjectMonitor::header() const {
+  assert(!UseObjectMonitorTable, "Lightweight locking with OM table does not use header");
+  return markWord(metadata());
 }
 
 inline void ObjectMonitor::set_header(markWord hdr) {
-  Atomic::store(&_header, hdr);
+  assert(!UseObjectMonitorTable, "Lightweight locking with OM table does not use header");
+  set_metadata(hdr.value());
+}
+
+inline intptr_t ObjectMonitor::hash() const {
+  assert(UseObjectMonitorTable, "Only used by lightweight locking with OM table");
+  return metadata();
+}
+
+inline void ObjectMonitor::set_hash(intptr_t hash) {
+  assert(UseObjectMonitorTable, "Only used by lightweight locking with OM table");
+  set_metadata(hash);
 }
 
 inline jint ObjectMonitor::waiters() const {
   return _waiters;
 }
 
+inline bool ObjectMonitor::has_owner() const {
+  void* owner = owner_raw();
+  return owner != NULL && owner != DEFLATER_MARKER;
+}
+
 // Returns NULL if DEFLATER_MARKER is observed.
 inline void* ObjectMonitor::owner() const {
   void* owner = owner_raw();
@@ -88,6 +129,12 @@ inline void ObjectMonitor::add_to_contentions(jint value) {
   Atomic::add(&_contentions, value);
 }
 
+inline void ObjectMonitor::set_recursions(size_t recursions) {
+  assert(_recursions == 0, "must be");
+  assert(has_owner(), "must be owned");
+  _recursions = checked_cast<intx>(recursions);
+}
+
 // Clear _owner field; current value must match old_value.
 inline void ObjectMonitor::release_clear_owner(void* old_value) {
 #ifdef ASSERT
@@ -177,4 +224,35 @@ inline ObjectMonitor* ObjectMonitor::try_set_next_om(ObjectMonitor* old_value, O
   return Atomic::cmpxchg(&_next_om, old_value, new_value);
 }
 
+inline ObjectMonitorContentionMark::ObjectMonitorContentionMark(ObjectMonitor* monitor)
+  : _monitor(monitor) {
+  _monitor->add_to_contentions(1);
+}
+
+inline ObjectMonitorContentionMark::~ObjectMonitorContentionMark() {
+  _monitor->add_to_contentions(-1);
+}
+
+inline oop ObjectMonitor::object_peek() const {
+  if (_object.is_null()) {
+    return nullptr;
+  }
+  return _object.peek();
+}
+
+inline bool ObjectMonitor::object_is_dead() const {
+  return object_peek() == nullptr;
+}
+
+inline bool ObjectMonitor::object_is_cleared() const {
+  return _object.is_null();
+}
+
+inline bool ObjectMonitor::object_refers_to(oop obj) const {
+  if (_object.is_null()) {
+    return false;
+  }
+  return _object.peek() == obj;
+}
+
 #endif // SHARE_RUNTIME_OBJECTMONITOR_INLINE_HPP
diff --git a/src/hotspot/share/runtime/os.hpp b/src/hotspot/share/runtime/os.hpp
index 6ac92b2d28b..d8a87a80300 100644
--- a/src/hotspot/share/runtime/os.hpp
+++ b/src/hotspot/share/runtime/os.hpp
@@ -368,6 +368,15 @@ class os: AllStatic {
   static bool   uncommit_memory(char* addr, size_t bytes, bool executable = false);
   static bool   release_memory(char* addr, size_t bytes);
 
+  // Does the platform support trimming the native heap?
+  static bool can_trim_native_heap();
+
+  // Trim the C-heap. Optionally returns working set size change (RSS+Swap) in *rss_change.
+  // Note: If trimming succeeded but no size change information could be obtained,
+  // rss_change.after will contain SIZE_MAX upon return.
+  struct size_change_t { size_t before; size_t after; };
+  static bool trim_native_heap(size_change_t* rss_change = nullptr);
+
   // A diagnostic function to print memory mappings in the given range.
   static void print_memory_mappings(char* addr, size_t bytes, outputStream* st);
   // Prints all mappings
diff --git a/src/hotspot/share/runtime/safepoint.cpp b/src/hotspot/share/runtime/safepoint.cpp
index 3191275884c..f21082e767d 100644
--- a/src/hotspot/share/runtime/safepoint.cpp
+++ b/src/hotspot/share/runtime/safepoint.cpp
@@ -879,6 +879,11 @@ void ThreadSafepointState::account_safe_thread() {
   DEBUG_ONLY(_thread->set_visited_for_critical_count(SafepointSynchronize::safepoint_counter());)
   assert(!_safepoint_safe, "Must be unsafe before safe");
   _safepoint_safe = true;
+
+  // The oops in the monitor cache are cleared to prevent stale cache entries
+  // from keeping dead objects alive. Because these oops are always cleared
+  // before safepoint operations they are not visited in JavaThread::oops_do.
+  _thread->om_clear_monitor_cache();
 }
 
 void ThreadSafepointState::restart() {
diff --git a/src/hotspot/share/runtime/serviceThread.cpp b/src/hotspot/share/runtime/serviceThread.cpp
index 1c6a7b7874e..cab5a7e29bb 100644
--- a/src/hotspot/share/runtime/serviceThread.cpp
+++ b/src/hotspot/share/runtime/serviceThread.cpp
@@ -39,6 +39,7 @@
 #include "runtime/java.hpp"
 #include "runtime/javaCalls.hpp"
 #include "runtime/jniHandles.hpp"
+#include "runtime/lightweightSynchronizer.hpp"
 #include "runtime/serviceThread.hpp"
 #include "runtime/mutexLocker.hpp"
 #include "runtime/os.hpp"
@@ -149,6 +150,7 @@ void ServiceThread::service_thread_entry(JavaThread* jt, TRAPS) {
     bool oop_handles_to_release = false;
     bool cldg_cleanup_work = false;
     bool jvmti_tagmap_work = false;
+    bool omworldtable_work = false;
     {
       // Need state transition ThreadBlockInVM so that this thread
       // will be handled by safepoint correctly when this thread is
@@ -177,7 +179,8 @@ void ServiceThread::service_thread_entry(JavaThread* jt, TRAPS) {
               (oopstorage_work = OopStorage::has_cleanup_work_and_reset()) |
               (oop_handles_to_release = (_oop_handle_list != NULL)) |
               (cldg_cleanup_work = ClassLoaderDataGraph::should_clean_metaspaces_and_reset()) |
-              (jvmti_tagmap_work = JvmtiTagMap::has_object_free_events_and_reset())
+              (jvmti_tagmap_work = JvmtiTagMap::has_object_free_events_and_reset()) |
+              (omworldtable_work = LightweightSynchronizer::needs_resize())
              ) == 0) {
         // Wait until notified that there is some work to do.
         ml.wait();
@@ -244,6 +247,10 @@ void ServiceThread::service_thread_entry(JavaThread* jt, TRAPS) {
     if (jvmti_tagmap_work) {
       JvmtiTagMap::flush_all_object_free_events();
     }
+
+    if (omworldtable_work) {
+      LightweightSynchronizer::resize_table(jt);
+    }
   }
 }
 
diff --git a/src/hotspot/share/runtime/sharedRuntime.cpp b/src/hotspot/share/runtime/sharedRuntime.cpp
index 66613e5d8c6..5a6795d3132 100644
--- a/src/hotspot/share/runtime/sharedRuntime.cpp
+++ b/src/hotspot/share/runtime/sharedRuntime.cpp
@@ -56,6 +56,7 @@
 #include "prims/methodHandles.hpp"
 #include "prims/nativeLookup.hpp"
 #include "runtime/atomic.hpp"
+#include "runtime/basicLock.inline.hpp"
 #include "runtime/biasedLocking.hpp"
 #include "runtime/frame.inline.hpp"
 #include "runtime/handles.inline.hpp"
@@ -73,6 +74,7 @@
 #include "utilities/copy.hpp"
 #include "utilities/dtrace.hpp"
 #include "utilities/events.hpp"
+#include "utilities/globalDefinitions.hpp"
 #include "utilities/hashtable.inline.hpp"
 #include "utilities/macros.hpp"
 #include "utilities/xmlstream.hpp"
@@ -3235,16 +3237,26 @@ JRT_LEAF(intptr_t*, SharedRuntime::OSR_migration_begin( JavaThread *current) )
        kptr2 = fr.next_monitor_in_interpreter_frame(kptr2) ) {
     if (kptr2->obj() != NULL) {         // Avoid 'holes' in the monitor array
       BasicLock *lock = kptr2->lock();
-      // Inflate so the object's header no longer refers to the BasicLock.
-      if (lock->displaced_header().is_unlocked()) {
-        // The object is locked and the resulting ObjectMonitor* will also be
-        // locked so it can't be async deflated until ownership is dropped.
-        // See the big comment in basicLock.cpp: BasicLock::move_to().
-        ObjectSynchronizer::inflate_helper(kptr2->obj());
+      if (LockingMode == LM_LEGACY) {
+        // Inflate so the object's header no longer refers to the BasicLock.
+        if (lock->displaced_header().is_unlocked()) {
+          // The object is locked and the resulting ObjectMonitor* will also be
+          // locked so it can't be async deflated until ownership is dropped.
+          // See the big comment in basicLock.cpp: BasicLock::move_to().
+          ObjectSynchronizer::inflate_helper(kptr2->obj());
+        }
+        // Now the displaced header is free to move because the
+        // object's header no longer refers to it.
+        buf[i] = (intptr_t)lock->displaced_header().value();
+      } else if (UseObjectMonitorTable) {
+        buf[i] = (intptr_t)lock->object_monitor_cache();
       }
-      // Now the displaced header is free to move because the
-      // object's header no longer refers to it.
-      buf[i++] = (intptr_t)lock->displaced_header().value();
+#ifdef ASSERT
+      else {
+        buf[i] = badDispHeaderOSR;
+      }
+#endif
+      i++;
       buf[i++] = cast_from_oop<intptr_t>(kptr2->obj());
     }
   }
diff --git a/src/hotspot/share/runtime/synchronizer.cpp b/src/hotspot/share/runtime/synchronizer.cpp
index ef4d9f717da..b4a31f691b8 100644
--- a/src/hotspot/share/runtime/synchronizer.cpp
+++ b/src/hotspot/share/runtime/synchronizer.cpp
@@ -1,7 +1,7 @@
 // This project is a modified version of OpenJDK, licensed under GPL v2.
 // Modifications Copyright (C) 2025 ByteDance Inc.
 /*
- * Copyright (c) 1998, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -27,6 +27,7 @@
 #include "precompiled.hpp"
 #include "classfile/vmSymbols.hpp"
 #include "jfr/jfrEvents.hpp"
+#include "gc/shared/suspendibleThreadSet.hpp"
 #include "logging/log.hpp"
 #include "logging/logStream.hpp"
 #include "memory/allocation.inline.hpp"
@@ -35,11 +36,16 @@
 #include "memory/universe.hpp"
 #include "oops/markWord.hpp"
 #include "oops/oop.inline.hpp"
+#include "oops/weakHandle.inline.hpp"
 #include "runtime/atomic.hpp"
+#include "runtime/basicLock.inline.hpp"
 #include "runtime/biasedLocking.hpp"
+#include "runtime/globals.hpp"
 #include "runtime/handles.inline.hpp"
 #include "runtime/handshake.hpp"
 #include "runtime/interfaceSupport.inline.hpp"
+#include "runtime/lightweightSynchronizer.hpp"
+#include "runtime/lockStack.inline.hpp"
 #include "runtime/mutexLocker.hpp"
 #include "runtime/objectMonitor.hpp"
 #include "runtime/objectMonitor.inline.hpp"
@@ -51,13 +57,17 @@
 #include "runtime/sharedRuntime.hpp"
 #include "runtime/stubRoutines.hpp"
 #include "runtime/synchronizer.hpp"
+#include "runtime/synchronizer.inline.hpp"
+#include "runtime/thread.hpp"
 #include "runtime/thread.inline.hpp"
 #include "runtime/timer.hpp"
+#include "runtime/trimNativeHeap.hpp"
 #include "runtime/vframe.hpp"
 #include "runtime/vmThread.hpp"
 #include "utilities/align.hpp"
 #include "utilities/dtrace.hpp"
 #include "utilities/events.hpp"
+#include "utilities/globalDefinitions.hpp"
 #include "utilities/preserveException.hpp"
 
 void MonitorList::add(ObjectMonitor* m) {
@@ -218,6 +228,10 @@ void ObjectSynchronizer::initialize() {
 
   // Start the timer for deflations, so it does not trigger immediately.
   _last_async_deflation_time_ns = os::javaTimeNanos();
+
+  if (LockingMode == LM_LIGHTWEIGHT) {
+    LightweightSynchronizer::initialize();
+  }
 }
 
 MonitorList ObjectSynchronizer::_in_use_list;
@@ -276,14 +290,26 @@ bool ObjectSynchronizer::quick_notify(oopDesc* obj, JavaThread* current, bool al
   if (obj == NULL) return false;  // slow-path for invalid obj
   const markWord mark = obj->mark();
 
-  if (mark.has_locker() && current->is_lock_owned((address)mark.locker())) {
-    // Degenerate notify
-    // stack-locked by caller so by definition the implied waitset is empty.
-    return true;
+  if (LockingMode == LM_LIGHTWEIGHT) {
+    if (mark.is_fast_locked() && current->lock_stack().contains(cast_to_oop(obj))) {
+      // Degenerate notify
+      // fast-locked by caller so by definition the implied waitset is empty.
+      return true;
+    }
+  } else if (LockingMode == LM_LEGACY) {
+    if (mark.has_locker() && current->is_lock_owned((address)mark.locker())) {
+      // Degenerate notify
+      // stack-locked by caller so by definition the implied waitset is empty.
+      return true;
+    }
   }
 
   if (mark.has_monitor()) {
-    ObjectMonitor* const mon = mark.monitor();
+    ObjectMonitor* const mon = read_monitor(current, obj, mark);
+    if (LockingMode == LM_LIGHTWEIGHT && mon == nullptr) {
+      // Racing with inflation/deflation go slow path
+      return false;
+    }
     assert(mon->object() == oop(obj), "invariant");
     if (mon->owner() != current) return false;  // slow-path for IMS exception
 
@@ -310,6 +336,13 @@ bool ObjectSynchronizer::quick_notify(oopDesc* obj, JavaThread* current, bool al
   return false;
 }
 
+static bool useHeavyMonitors() {
+#if defined(X86) || defined(AARCH64) || defined(PPC64) || defined(RISCV64) || defined(S390)
+  return LockingMode == LM_MONITOR;
+#else
+  return false;
+#endif
+}
 
 // The LockNode emitted directly at the synchronization site would have
 // been too big if it were to have included support for the cases of inflated
@@ -327,10 +360,21 @@ bool ObjectSynchronizer::quick_enter(oop obj, JavaThread* current,
     return false;
   }
 
+  if (useHeavyMonitors()) {
+    return false;  // Slow path
+  }
+
+  if (LockingMode == LM_LIGHTWEIGHT) {
+    return LightweightSynchronizer::quick_enter(obj, current, lock);
+  }
+
+  assert(LockingMode == LM_LEGACY, "legacy mode below");
+
   const markWord mark = obj->mark();
 
   if (mark.has_monitor()) {
-    ObjectMonitor* const m = mark.monitor();
+    ObjectMonitor* const m = read_monitor(mark);
+
     // An async deflation or GC can race us before we manage to make
     // the ObjectMonitor busy by setting the owner below. If we detect
     // that race we just bail out to the slow-path here.
@@ -351,12 +395,11 @@ bool ObjectSynchronizer::quick_enter(oop obj, JavaThread* current,
 
     // This Java Monitor is inflated so obj's header will never be
     // displaced to this thread's BasicLock. Make the displaced header
-    // non-NULL so this BasicLock is not seen as recursive nor as
+    // non-null so this BasicLock is not seen as recursive nor as
     // being locked. We do this unconditionally so that this thread's
     // BasicLock cannot be mis-interpreted by any stack walkers. For
     // performance reasons, stack walkers generally first check for
-    // Biased Locking in the object's header, the second check is for
-    // stack-locking in the object's header, the third check is for
+    // stack-locking in the object's header, the second check is for
     // recursive stack-locking in the displaced header in the BasicLock,
     // and last are the inflated Java Monitor (ObjectMonitor) checks.
     lock->set_displaced_header(markWord::unused_mark());
@@ -378,8 +421,9 @@ bool ObjectSynchronizer::quick_enter(oop obj, JavaThread* current,
 }
 
 // Handle notifications when synchronizing on value based classes
-void ObjectSynchronizer::handle_sync_on_value_based_class(Handle obj, JavaThread* current) {
-  frame last_frame = current->last_frame();
+void ObjectSynchronizer::handle_sync_on_value_based_class(Handle obj, JavaThread* locking_thread) {
+  assert(locking_thread == Thread::current() || locking_thread->is_obj_deopt_suspend(), "must be");
+  frame last_frame = locking_thread->last_frame();
   bool bcp_was_adjusted = false;
   // Don't decrement bcp if it points to the frame's first instruction.  This happens when
   // handle_sync_on_value_based_class() is called because of a synchronized method.  There
@@ -392,9 +436,9 @@ void ObjectSynchronizer::handle_sync_on_value_based_class(Handle obj, JavaThread
   }
 
   if (DiagnoseSyncOnValueBasedClasses == FATAL_EXIT) {
-    ResourceMark rm(current);
+    ResourceMark rm;
     stringStream ss;
-    current->print_stack_on(&ss);
+    locking_thread->print_stack_on(&ss);
     char* base = (char*)strstr(ss.base(), "at");
     char* newline = (char*)strchr(ss.base(), '\n');
     if (newline != NULL) {
@@ -403,13 +447,13 @@ void ObjectSynchronizer::handle_sync_on_value_based_class(Handle obj, JavaThread
     fatal("Synchronizing on object " INTPTR_FORMAT " of klass %s %s", p2i(obj()), obj->klass()->external_name(), base);
   } else {
     assert(DiagnoseSyncOnValueBasedClasses == LOG_WARNING, "invalid value for DiagnoseSyncOnValueBasedClasses");
-    ResourceMark rm(current);
+    ResourceMark rm;
     Log(valuebasedclasses) vblog;
 
     vblog.info("Synchronizing on object " INTPTR_FORMAT " of klass %s", p2i(obj()), obj->klass()->external_name());
-    if (current->has_last_Java_frame()) {
+    if (locking_thread->has_last_Java_frame()) {
       LogStream info_stream(vblog.info());
-      current->print_stack_on(&info_stream);
+      locking_thread->print_stack_on(&info_stream);
     } else {
       vblog.info("Cannot find the last Java frame");
     }
@@ -428,96 +472,147 @@ void ObjectSynchronizer::handle_sync_on_value_based_class(Handle obj, JavaThread
 
 // -----------------------------------------------------------------------------
 // Monitor Enter/Exit
+
+void ObjectSynchronizer::enter_for(Handle obj, BasicLock* lock, JavaThread* locking_thread) {
+  // When called with locking_thread != Thread::current() some mechanism must synchronize
+  // the locking_thread with respect to the current thread. Currently only used when
+  // deoptimizing and re-locking locks. See Deoptimization::relock_objects
+  assert(locking_thread == Thread::current() || locking_thread->is_obj_deopt_suspend(), "must be");
+
+  if (LockingMode == LM_LIGHTWEIGHT) {
+    return LightweightSynchronizer::enter_for(obj, lock, locking_thread);
+  }
+
+  if (!enter_fast_impl(obj, lock, locking_thread)) {
+    // Inflated ObjectMonitor::enter_for is required
+
+    // An async deflation can race after the inflate_for() call and before
+    // enter_for() can make the ObjectMonitor busy. enter_for() returns false
+    // if we have lost the race to async deflation and we simply try again.
+    while (true) {
+      ObjectMonitor* monitor = inflate_for(locking_thread, obj(), inflate_cause_monitor_enter);
+      if (monitor->enter_for(locking_thread)) {
+        return;
+      }
+      assert(monitor->is_being_async_deflated(), "must be");
+    }
+  }
+}
+
+void ObjectSynchronizer::enter(Handle obj, BasicLock* lock, JavaThread* current) {
+  assert(current == Thread::current(), "must be");
+
+  if (LockingMode == LM_LIGHTWEIGHT) {
+    return LightweightSynchronizer::enter(obj, lock, current);
+  }
+
+  if (!enter_fast_impl(obj, lock, current)) {
+    // Inflated ObjectMonitor::enter is required
+
+    // An async deflation can race after the inflate() call and before
+    // enter() can make the ObjectMonitor busy. enter() returns false if
+    // we have lost the race to async deflation and we simply try again.
+    while (true) {
+      ObjectMonitor* monitor = inflate(current, obj(), inflate_cause_monitor_enter);
+      if (monitor->enter(current)) {
+        return;
+      }
+    }
+  }
+}
+
 // The interpreter and compiler assembly code tries to lock using the fast path
 // of this algorithm. Make sure to update that code if the following function is
 // changed. The implementation is extremely sensitive to race condition. Be careful.
+bool ObjectSynchronizer::enter_fast_impl(Handle obj, BasicLock* lock, JavaThread* locking_thread) {
+  assert(LockingMode != LM_LIGHTWEIGHT, "Use LightweightSynchronizer");
 
-void ObjectSynchronizer::enter(Handle obj, BasicLock* lock, JavaThread* current) {
   if (obj->klass()->is_value_based()) {
-    handle_sync_on_value_based_class(obj, current);
+    handle_sync_on_value_based_class(obj, locking_thread);
   }
 
-  if (UseBiasedLocking) {
-    BiasedLocking::revoke(current, obj);
-  }
+  if (!useHeavyMonitors()) {
+    if (LockingMode == LM_LEGACY) {
+      if (UseBiasedLocking) {
+        BiasedLocking::revoke(locking_thread, obj);
+      }
 
-  markWord mark = obj->mark();
-  assert(!mark.has_bias_pattern(), "should not see bias pattern here");
+      markWord mark = obj->mark();
+      if (mark.is_neutral()) {
+        // Anticipate successful CAS -- the ST of the displaced mark must
+        // be visible <= the ST performed by the CAS.
+        lock->set_displaced_header(mark);
+        if (mark == obj()->cas_set_mark(markWord::from_pointer(lock), mark)) {
+          return true;
+        }
+      } else if (mark.has_locker() &&
+                 locking_thread->is_lock_owned((address) mark.locker())) {
+        assert(lock != mark.locker(), "must not re-lock the same lock");
+        assert(lock != (BasicLock*) obj->mark().value(), "don't relock with same BasicLock");
+        lock->set_displaced_header(markWord::from_pointer(NULL));
+        return true;
+      }
 
-  if (mark.is_neutral()) {
-    // Anticipate successful CAS -- the ST of the displaced mark must
-    // be visible <= the ST performed by the CAS.
-    lock->set_displaced_header(mark);
-    if (mark == obj()->cas_set_mark(markWord::from_pointer(lock), mark)) {
-      return;
-    }
-    // Fall through to inflate() ...
-  } else if (mark.has_locker() &&
-             current->is_lock_owned((address)mark.locker())) {
-    assert(lock != mark.locker(), "must not re-lock the same lock");
-    assert(lock != (BasicLock*)obj->mark().value(), "don't relock with same BasicLock");
-    lock->set_displaced_header(markWord::from_pointer(NULL));
-    return;
-  }
+      // The object header will never be displaced to this lock,
+      // so it does not matter what the value is, except that it
+      // must be non-zero to avoid looking like a re-entrant lock,
+      // and must not look locked either.
+      lock->set_displaced_header(markWord::unused_mark());
 
-  // The object header will never be displaced to this lock,
-  // so it does not matter what the value is, except that it
-  // must be non-zero to avoid looking like a re-entrant lock,
-  // and must not look locked either.
-  lock->set_displaced_header(markWord::unused_mark());
-  // An async deflation can race after the inflate() call and before
-  // enter() can make the ObjectMonitor busy. enter() returns false if
-  // we have lost the race to async deflation and we simply try again.
-  while (true) {
-    ObjectMonitor* monitor = inflate(current, obj(), inflate_cause_monitor_enter);
-    if (monitor->enter(current)) {
-      return;
+      // Failed to fast lock.
+      return false;
     }
   }
+
+  return false;
 }
 
 void ObjectSynchronizer::exit(oop object, BasicLock* lock, JavaThread* current) {
-  markWord mark = object->mark();
-  // We cannot check for Biased Locking if we are racing an inflation.
-  assert(mark == markWord::INFLATING() ||
-         !mark.has_bias_pattern(), "should not see bias pattern here");
-
-  markWord dhw = lock->displaced_header();
-  if (dhw.value() == 0) {
-    // If the displaced header is NULL, then this exit matches up with
-    // a recursive enter. No real work to do here except for diagnostics.
+  if (LockingMode == LM_LIGHTWEIGHT) {
+    return LightweightSynchronizer::exit(object, current);
+  }
+
+  if (!useHeavyMonitors()) {
+    markWord mark = object->mark();
+    if (LockingMode == LM_LEGACY) {
+      markWord dhw = lock->displaced_header();
+      if (dhw.value() == 0) {
+        // If the displaced header is NULL, then this exit matches up with
+        // a recursive enter. No real work to do here except for diagnostics.
 #ifndef PRODUCT
-    if (mark != markWord::INFLATING()) {
-      // Only do diagnostics if we are not racing an inflation. Simply
-      // exiting a recursive enter of a Java Monitor that is being
-      // inflated is safe; see the has_monitor() comment below.
-      assert(!mark.is_neutral(), "invariant");
-      assert(!mark.has_locker() ||
-             current->is_lock_owned((address)mark.locker()), "invariant");
-      if (mark.has_monitor()) {
-        // The BasicLock's displaced_header is marked as a recursive
-        // enter and we have an inflated Java Monitor (ObjectMonitor).
-        // This is a special case where the Java Monitor was inflated
-        // after this thread entered the stack-lock recursively. When a
-        // Java Monitor is inflated, we cannot safely walk the Java
-        // Monitor owner's stack and update the BasicLocks because a
-        // Java Monitor can be asynchronously inflated by a thread that
-        // does not own the Java Monitor.
-        ObjectMonitor* m = mark.monitor();
-        assert(m->object()->mark() == mark, "invariant");
-        assert(m->is_entered(current), "invariant");
-      }
-    }
+        if (mark != markWord::INFLATING()) {
+          // Only do diagnostics if we are not racing an inflation. Simply
+          // exiting a recursive enter of a Java Monitor that is being
+          // inflated is safe; see the has_monitor() comment below.
+          assert(!mark.is_neutral(), "invariant");
+          assert(!mark.has_locker() ||
+                 current->is_lock_owned((address)mark.locker()), "invariant");
+          if (mark.has_monitor()) {
+            // The BasicLock's displaced_header is marked as a recursive
+            // enter and we have an inflated Java Monitor (ObjectMonitor).
+            // This is a special case where the Java Monitor was inflated
+            // after this thread entered the stack-lock recursively. When a
+            // Java Monitor is inflated, we cannot safely walk the Java
+            // Monitor owner's stack and update the BasicLocks because a
+            // Java Monitor can be asynchronously inflated by a thread that
+            // does not own the Java Monitor.
+            ObjectMonitor* m = read_monitor(mark);
+            assert(m->object()->mark() == mark, "invariant");
+            assert(m->is_entered(current), "invariant");
+          }
+        }
 #endif
-    return;
-  }
+        return;
+      }
 
-  if (mark == markWord::from_pointer(lock)) {
-    // If the object is stack-locked by the current thread, try to
-    // swing the displaced header from the BasicLock back to the mark.
-    assert(dhw.is_neutral(), "invariant");
-    if (object->cas_set_mark(dhw, mark) == mark) {
-      return;
+      if (mark == markWord::from_pointer(lock)) {
+        // If the object is stack-locked by the current thread, try to
+        // swing the displaced header from the BasicLock back to the mark.
+        assert(dhw.is_neutral(), "invariant");
+        if (object->cas_set_mark(dhw, mark) == mark) {
+          return;
+        }
+      }
     }
   }
 
@@ -525,6 +620,7 @@ void ObjectSynchronizer::exit(oop object, BasicLock* lock, JavaThread* current)
   // The ObjectMonitor* can't be async deflated until ownership is
   // dropped inside exit() and the ObjectMonitor* must be !is_busy().
   ObjectMonitor* monitor = inflate(current, object, inflate_cause_vm_internal);
+  assert(!monitor->is_owner_anonymous(), "must not be");
   monitor->exit(current);
 }
 
@@ -541,6 +637,8 @@ void ObjectSynchronizer::exit(oop object, BasicLock* lock, JavaThread* current)
 //  5) lock lock2
 // NOTE: must use heavy weight monitor to handle complete_exit/reenter()
 intx ObjectSynchronizer::complete_exit(Handle obj, JavaThread* current) {
+  // TODO: FIXME
+  assert(LockingMode != LM_LIGHTWEIGHT, "LM_LIGHTWEIGHT cannot use this");
   if (UseBiasedLocking) {
     BiasedLocking::revoke(current, obj);
     assert(!obj->mark().has_bias_pattern(), "biases should be revoked by now");
@@ -555,6 +653,8 @@ intx ObjectSynchronizer::complete_exit(Handle obj, JavaThread* current) {
 
 // NOTE: must use heavy weight monitor to handle complete_exit/reenter()
 void ObjectSynchronizer::reenter(Handle obj, intx recursions, JavaThread* current) {
+  // TODO: FIXME
+  assert(LockingMode != LM_LIGHTWEIGHT, "LM_LIGHTWEIGHT cannot use this");
   if (UseBiasedLocking) {
     BiasedLocking::revoke(current, obj);
     assert(!obj->mark().has_bias_pattern(), "biases should be revoked by now");
@@ -590,8 +690,16 @@ void ObjectSynchronizer::jni_enter(Handle obj, JavaThread* current) {
   // enter() can make the ObjectMonitor busy. enter() returns false if
   // we have lost the race to async deflation and we simply try again.
   while (true) {
-    ObjectMonitor* monitor = inflate(current, obj(), inflate_cause_jni_enter);
-    if (monitor->enter(current)) {
+    ObjectMonitor* monitor;
+    bool entered;
+    if (LockingMode == LM_LIGHTWEIGHT) {
+      entered = LightweightSynchronizer::inflate_and_enter(obj(), current, current, inflate_cause_jni_enter) != nullptr;
+    } else {
+      monitor = inflate(current, obj(), inflate_cause_jni_enter);
+      entered = monitor->enter(current);
+    }
+
+    if (entered) {
       break;
     }
   }
@@ -621,9 +729,14 @@ void ObjectSynchronizer::jni_exit(oop obj, TRAPS) {
   }
   assert(!obj->mark().has_bias_pattern(), "biases should be revoked by now");
 
-  // The ObjectMonitor* can't be async deflated until ownership is
-  // dropped inside exit() and the ObjectMonitor* must be !is_busy().
-  ObjectMonitor* monitor = inflate(current, obj, inflate_cause_jni_exit);
+  ObjectMonitor* monitor;
+  if (LockingMode == LM_LIGHTWEIGHT) {
+    monitor = LightweightSynchronizer::inflate_locked_or_imse(obj, inflate_cause_jni_exit, CHECK);
+  } else {
+    // The ObjectMonitor* can't be async deflated until ownership is
+    // dropped inside exit() and the ObjectMonitor* must be !is_busy().
+    monitor = inflate(current, obj, inflate_cause_jni_exit);
+  }
   // If this thread has locked the object, exit the monitor. We
   // intentionally do not use CHECK on check_owner because we must exit the
   // monitor even if an exception was already pending.
@@ -655,6 +768,7 @@ ObjectLocker::~ObjectLocker() {
 // -----------------------------------------------------------------------------
 //  Wait/Notify/NotifyAll
 // NOTE: must use heavy weight monitor to handle wait()
+
 int ObjectSynchronizer::wait(Handle obj, jlong millis, TRAPS) {
   JavaThread* current = THREAD;
   if (UseBiasedLocking) {
@@ -664,10 +778,16 @@ int ObjectSynchronizer::wait(Handle obj, jlong millis, TRAPS) {
   if (millis < 0) {
     THROW_MSG_0(vmSymbols::java_lang_IllegalArgumentException(), "timeout value is negative");
   }
-  // The ObjectMonitor* can't be async deflated because the _waiters
-  // field is incremented before ownership is dropped and decremented
-  // after ownership is regained.
-  ObjectMonitor* monitor = inflate(current, obj(), inflate_cause_wait);
+
+  ObjectMonitor* monitor;
+  if (LockingMode == LM_LIGHTWEIGHT) {
+    monitor = LightweightSynchronizer::inflate_locked_or_imse(obj(), inflate_cause_wait, CHECK_0);
+  } else {
+    // The ObjectMonitor* can't be async deflated because the _waiters
+    // field is incremented before ownership is dropped and decremented
+    // after ownership is regained.
+    monitor = inflate(current, obj(), inflate_cause_wait);
+  }
 
   DTRACE_MONITOR_WAIT_PROBE(monitor, obj(), current, millis);
   monitor->wait(millis, true, THREAD); // Not CHECK as we need following code
@@ -682,16 +802,25 @@ int ObjectSynchronizer::wait(Handle obj, jlong millis, TRAPS) {
 
 // No exception are possible in this case as we only use this internally when locking is
 // correct and we have to wait until notified - so no interrupts or timeouts.
-void ObjectSynchronizer::wait_uninterruptibly(Handle obj, JavaThread* current) {
+void ObjectSynchronizer::wait_uninterruptibly(Handle obj, TRAPS) {
+  JavaThread* current = THREAD;
   if (UseBiasedLocking) {
     BiasedLocking::revoke(current, obj);
     assert(!obj->mark().has_bias_pattern(), "biases should be revoked by now");
   }
-  // The ObjectMonitor* can't be async deflated because the _waiters
-  // field is incremented before ownership is dropped and decremented
-  // after ownership is regained.
-  ObjectMonitor* monitor = inflate(current, obj(), inflate_cause_wait);
-  monitor->wait(0 /* wait-forever */, false /* not interruptible */, current);
+
+  ObjectMonitor* monitor;
+  if (LockingMode == LM_LIGHTWEIGHT) {
+    monitor = LightweightSynchronizer::inflate_locked_or_imse(obj(), inflate_cause_wait, CHECK);
+  } else {
+    // The ObjectMonitor* can't be async deflated because the _waiters
+    // field is incremented before ownership is dropped and decremented
+    // after ownership is regained.
+    monitor = inflate(current, obj(), inflate_cause_wait);
+  }
+
+  DTRACE_MONITOR_WAIT_PROBE(monitor, obj(), current, 0);
+  monitor->wait(0 /* wait-forever */, false /* not interruptible */, THREAD);
 }
 
 void ObjectSynchronizer::notify(Handle obj, TRAPS) {
@@ -702,13 +831,26 @@ void ObjectSynchronizer::notify(Handle obj, TRAPS) {
   }
 
   markWord mark = obj->mark();
-  if (mark.has_locker() && current->is_lock_owned((address)mark.locker())) {
-    // Not inflated so there can't be any waiters to notify.
-    return;
+  if (LockingMode == LM_LIGHTWEIGHT) {
+    if ((mark.is_fast_locked() && current->lock_stack().contains(obj()))) {
+      // Not inflated so there can't be any waiters to notify.
+      return;
+    }
+  } else if (LockingMode == LM_LEGACY) {
+    if (mark.has_locker() && current->is_lock_owned((address)mark.locker())) {
+      // Not inflated so there can't be any waiters to notify.
+      return;
+    }
+  }
+
+  ObjectMonitor* monitor;
+  if (LockingMode == LM_LIGHTWEIGHT) {
+    monitor = LightweightSynchronizer::inflate_locked_or_imse(obj(), inflate_cause_notify, CHECK);
+  } else {
+    // The ObjectMonitor* can't be async deflated until ownership is
+    // dropped by the calling thread.
+    monitor = inflate(current, obj(), inflate_cause_notify);
   }
-  // The ObjectMonitor* can't be async deflated until ownership is
-  // dropped by the calling thread.
-  ObjectMonitor* monitor = inflate(current, obj(), inflate_cause_notify);
   monitor->notify(CHECK);
 }
 
@@ -721,13 +863,26 @@ void ObjectSynchronizer::notifyall(Handle obj, TRAPS) {
   }
 
   markWord mark = obj->mark();
-  if (mark.has_locker() && current->is_lock_owned((address)mark.locker())) {
-    // Not inflated so there can't be any waiters to notify.
-    return;
+  if (LockingMode == LM_LIGHTWEIGHT) {
+    if ((mark.is_fast_locked() && current->lock_stack().contains(obj()))) {
+      // Not inflated so there can't be any waiters to notify.
+      return;
+    }
+  } else if (LockingMode == LM_LEGACY) {
+    if (mark.has_locker() && current->is_lock_owned((address)mark.locker())) {
+      // Not inflated so there can't be any waiters to notify.
+      return;
+    }
+  }
+
+  ObjectMonitor* monitor;
+  if (LockingMode == LM_LIGHTWEIGHT) {
+    monitor = LightweightSynchronizer::inflate_locked_or_imse(obj(), inflate_cause_notify, CHECK);
+  } else {
+    // The ObjectMonitor* can't be async deflated until ownership is
+    // dropped by the calling thread.
+    monitor = inflate(current, obj(), inflate_cause_notify);
   }
-  // The ObjectMonitor* can't be async deflated until ownership is
-  // dropped by the calling thread.
-  ObjectMonitor* monitor = inflate(current, obj(), inflate_cause_notify);
   monitor->notifyAll(CHECK);
 }
 
@@ -749,7 +904,8 @@ static SharedGlobals GVars;
 
 static markWord read_stable_mark(oop obj) {
   markWord mark = obj->mark();
-  if (!mark.is_being_inflated()) {
+  if (!mark.is_being_inflated() || LockingMode == LM_LIGHTWEIGHT) {
+    // New lightweight locking does not use the markWord::INFLATING() protocol.
     return mark;       // normal fast-path return
   }
 
@@ -824,7 +980,7 @@ static markWord read_stable_mark(oop obj) {
 //   There are simple ways to "diffuse" the middle address bits over the
 //   generated hashCode values:
 
-static inline intptr_t get_next_hash(Thread* current, oop obj) {
+static intptr_t get_next_hash(Thread* current, oop obj) {
   intptr_t value = 0;
   if (hashCode == 0) {
     // This form uses global Park-Miller RNG.
@@ -858,13 +1014,39 @@ static inline intptr_t get_next_hash(Thread* current, oop obj) {
     value = v;
   }
 
-  value &= markWord::hash_mask;
+  value &= UseCompactObjectHeaders ? markWord::hash_mask_compact : markWord::hash_mask;
   if (value == 0) value = 0xBAD;
   assert(value != markWord::no_hash, "invariant");
   return value;
 }
 
+static intptr_t install_hash_code(Thread* current, oop obj) {
+  assert(UseObjectMonitorTable && LockingMode == LM_LIGHTWEIGHT, "must be");
+
+  markWord mark = obj->mark_acquire();
+  for(;;) {
+    intptr_t hash = mark.hash();
+    if (hash != 0) {
+      return hash;
+    }
+
+    hash = get_next_hash(current, obj);
+    const markWord old_mark = mark;
+    const markWord new_mark = old_mark.copy_set_hash(hash);
+
+    mark = obj->cas_set_mark(new_mark, old_mark);
+    if (old_mark == mark) {
+      return hash;
+    }
+  }
+}
+
 intptr_t ObjectSynchronizer::FastHashCode(Thread* current, oop obj) {
+  // Since the monitor isn't in the object header, it can simply be installed.
+  if (UseObjectMonitorTable) {
+    return install_hash_code(current, obj);
+  }
+
   if (UseBiasedLocking) {
     // NOTE: many places throughout the JVM do not expect a safepoint
     // to be taken here. However, we only ever bias Java instances and all
@@ -894,7 +1076,7 @@ intptr_t ObjectSynchronizer::FastHashCode(Thread* current, oop obj) {
     // object should remain ineligible for biased locking
     assert(!mark.has_bias_pattern(), "invariant");
 
-    if (mark.is_neutral()) {               // if this is a normal header
+    if (mark.is_neutral() || (LockingMode == LM_LIGHTWEIGHT && mark.is_fast_locked())) {
       hash = mark.hash();
       if (hash != 0) {                     // if it has a hash, just return it
         return hash;
@@ -906,6 +1088,10 @@ intptr_t ObjectSynchronizer::FastHashCode(Thread* current, oop obj) {
       if (test == mark) {                  // if the hash was installed, return it
         return hash;
       }
+      if (LockingMode == LM_LIGHTWEIGHT) {
+        // CAS failed, retry
+        continue;
+      }
       // Failed to install the hash. It could be that another thread
       // installed the hash just before our attempt or inflation has
       // occurred or... so we fall thru to inflate the monitor for
@@ -937,7 +1123,7 @@ intptr_t ObjectSynchronizer::FastHashCode(Thread* current, oop obj) {
       }
       // Fall thru so we only have one place that installs the hash in
       // the ObjectMonitor.
-    } else if (current->is_lock_owned((address)mark.locker())) {
+    } else if (LockingMode == LM_LEGACY && mark.has_locker() && current->is_lock_owned((address)mark.locker())) {
       // This is a stack lock owned by the calling thread so fetch the
       // displaced markWord from the BasicLock on the stack.
       temp = mark.displaced_mark_helper();
@@ -958,9 +1144,11 @@ intptr_t ObjectSynchronizer::FastHashCode(Thread* current, oop obj) {
 
     // Inflate the monitor to set the hash.
 
-    // An async deflation can race after the inflate() call and before we
-    // can update the ObjectMonitor's header with the hash value below.
-    monitor = inflate(current, obj, inflate_cause_hash_code);
+    // There's no need to inflate if the mark has already got a monitor.
+    // NOTE: an async deflation can race after we get the monitor and
+    // before we can update the ObjectMonitor's header with the hash
+    // value below.
+    monitor = mark.has_monitor() ? mark.monitor() : inflate(current, obj, inflate_cause_hash_code);
     // Load ObjectMonitor's header/dmw field and see if it has a hash.
     mark = monitor->header();
     assert(mark.is_neutral(), "invariant: header=" INTPTR_FORMAT, mark.value());
@@ -969,7 +1157,7 @@ intptr_t ObjectSynchronizer::FastHashCode(Thread* current, oop obj) {
       hash = get_next_hash(current, obj);  // get a new hash
       temp = mark.copy_set_hash(hash)   ;  // merge the hash into header
       assert(temp.is_neutral(), "invariant: header=" INTPTR_FORMAT, temp.value());
-      uintptr_t v = Atomic::cmpxchg((volatile uintptr_t*)monitor->header_addr(), mark.value(), temp.value());
+      uintptr_t v = Atomic::cmpxchg(monitor->metadata_addr(), mark.value(), temp.value());
       test = markWord(v);
       if (test != mark) {
         // The attempt to update the ObjectMonitor's header/dmw field
@@ -981,7 +1169,7 @@ intptr_t ObjectSynchronizer::FastHashCode(Thread* current, oop obj) {
         assert(test.is_neutral(), "invariant: header=" INTPTR_FORMAT, test.value());
         assert(hash != 0, "should only have lost the race to a thread that set a non-zero hash");
       }
-      if (monitor->is_being_async_deflated()) {
+      if (monitor->is_being_async_deflated() && !UseObjectMonitorTable) {
         // If we detect that async deflation has occurred, then we
         // attempt to restore the header/dmw to the object's header
         // so that we only retry once if the deflater thread happens
@@ -1014,15 +1202,35 @@ bool ObjectSynchronizer::current_thread_holds_lock(JavaThread* current,
 
   markWord mark = read_stable_mark(obj);
 
-  // Uncontended case, header points to stack
-  if (mark.has_locker()) {
+  if (LockingMode == LM_LEGACY && mark.has_locker()) {
+    // stack-locked case, header points into owner's stack
     return current->is_lock_owned((address)mark.locker());
   }
+
+  if (LockingMode == LM_LIGHTWEIGHT && mark.is_fast_locked()) {
+    // fast-locking case, see if lock is in current's lock stack
+    return current->lock_stack().contains(h_obj());
+  }
+
   // Contended case, header points to ObjectMonitor (tagged pointer)
-  if (mark.has_monitor()) {
+  while (LockingMode == LM_LIGHTWEIGHT && mark.has_monitor()) {
+    ObjectMonitor* monitor = read_monitor(current, obj, mark);
+    if (monitor != nullptr) {
+      return monitor->is_entered(current) != 0;
+    }
+    // Racing with inflation/deflation, retry
+    mark = obj->mark_acquire();
+
+    if (mark.is_fast_locked()) {
+      // Some other thread fast_locked, current could not have held the lock
+      return false;
+    }
+  }
+
+  if (LockingMode != LM_LIGHTWEIGHT && mark.has_monitor()) {
     // The first stage of async deflation does not affect any field
     // used by this comparison so the ObjectMonitor* is usable here.
-    ObjectMonitor* monitor = mark.monitor();
+    ObjectMonitor* monitor = read_monitor(mark);
     return monitor->is_entered(current) != 0;
   }
   // Unlocked case, header in place
@@ -1042,27 +1250,42 @@ JavaThread* ObjectSynchronizer::get_lock_owner(ThreadsList * t_list, Handle h_ob
   }
 
   oop obj = h_obj();
-  address owner = NULL;
-
   markWord mark = read_stable_mark(obj);
 
-  // Uncontended case, header points to stack
-  if (mark.has_locker()) {
-    owner = (address) mark.locker();
+  if (LockingMode == LM_LEGACY && mark.has_locker()) {
+    // stack-locked so header points into owner's stack.
+    // owning_thread_from_monitor_owner() may also return null here:
+    return Threads::owning_thread_from_monitor_owner(t_list, (address) mark.locker());
+  }
+
+  if (LockingMode == LM_LIGHTWEIGHT && mark.is_fast_locked()) {
+    // fast-locked so get owner from the object.
+    // owning_thread_from_object() may also return null here:
+    return Threads::owning_thread_from_object(t_list, h_obj());
   }
 
   // Contended case, header points to ObjectMonitor (tagged pointer)
-  else if (mark.has_monitor()) {
+  while (LockingMode == LM_LIGHTWEIGHT && mark.has_monitor()) {
+    ObjectMonitor* monitor = read_monitor(Thread::current(), obj, mark);
+    if (monitor != nullptr) {
+      return Threads::owning_thread_from_monitor(t_list, monitor);
+    }
+    // Racing with inflation/deflation, retry
+    mark = obj->mark_acquire();
+
+    if (mark.is_fast_locked()) {
+      // Some other thread fast_locked
+      return Threads::owning_thread_from_object(t_list, h_obj());
+    }
+  }
+
+  if (LockingMode != LM_LIGHTWEIGHT && mark.has_monitor()) {
     // The first stage of async deflation does not affect any field
     // used by this comparison so the ObjectMonitor* is usable here.
-    ObjectMonitor* monitor = mark.monitor();
+    ObjectMonitor* monitor = read_monitor(mark);
     assert(monitor != NULL, "monitor should be non-null");
-    owner = (address) monitor->owner();
-  }
-
-  if (owner != NULL) {
-    // owning_thread_from_monitor_owner() may also return NULL here
-    return Threads::owning_thread_from_monitor_owner(t_list, owner);
+    // owning_thread_from_monitor() may also return null here:
+    return Threads::owning_thread_from_monitor(t_list, monitor);
   }
 
   // Unlocked case, header in place
@@ -1114,7 +1337,7 @@ static bool monitors_used_above_threshold(MonitorList* list) {
   if (NoAsyncDeflationProgressMax != 0 &&
       _no_progress_cnt >= NoAsyncDeflationProgressMax) {
     float remainder = (100.0 - MonitorUsedDeflationThreshold) / 100.0;
-    size_t new_ceiling = ceiling + (ceiling * remainder) + 1;
+    size_t new_ceiling = ceiling / remainder + 1;
     ObjectSynchronizer::set_in_use_list_ceiling(new_ceiling);
     log_info(monitorinflation)("Too many deflations without progress; "
                                "bumping in_use_list_ceiling from " SIZE_FORMAT
@@ -1247,9 +1470,10 @@ static void post_monitor_inflate_event(EventJavaMonitorInflate* event,
 
 // Fast path code shared by multiple functions
 void ObjectSynchronizer::inflate_helper(oop obj) {
-  markWord mark = obj->mark();
+  assert(LockingMode != LM_LIGHTWEIGHT, "only inflate through enter");
+  markWord mark = obj->mark_acquire();
   if (mark.has_monitor()) {
-    ObjectMonitor* monitor = mark.monitor();
+    ObjectMonitor* monitor = read_monitor(mark);
     markWord dmw = monitor->header();
     assert(dmw.is_neutral(), "sanity check: header=" INTPTR_FORMAT, dmw.value());
     return;
@@ -1257,8 +1481,20 @@ void ObjectSynchronizer::inflate_helper(oop obj) {
   (void)inflate(Thread::current(), obj, inflate_cause_vm_internal);
 }
 
-ObjectMonitor* ObjectSynchronizer::inflate(Thread* current, oop object,
-                                           const InflateCause cause) {
+ObjectMonitor* ObjectSynchronizer::inflate(Thread* current, oop obj, const InflateCause cause) {
+  assert(current == Thread::current(), "must be");
+  assert(LockingMode != LM_LIGHTWEIGHT, "only inflate through enter");
+  return inflate_impl(obj, cause);
+}
+
+ObjectMonitor* ObjectSynchronizer::inflate_for(JavaThread* thread, oop obj, const InflateCause cause) {
+  assert(thread == Thread::current() || thread->is_obj_deopt_suspend(), "must be");
+  assert(LockingMode != LM_LIGHTWEIGHT, "LM_LIGHTWEIGHT cannot use inflate_for");
+  return inflate_impl(obj, cause);
+}
+
+ObjectMonitor* ObjectSynchronizer::inflate_impl(oop object, const InflateCause cause) {
+  assert(LockingMode != LM_LIGHTWEIGHT, "LM_LIGHTWEIGHT cannot use inflate_impl");
   EventJavaMonitorInflate event;
 
   for (;;) {
@@ -1266,8 +1502,8 @@ ObjectMonitor* ObjectSynchronizer::inflate(Thread* current, oop object,
     assert(!mark.has_bias_pattern(), "invariant");
 
     // The mark can be in one of the following states:
-    // *  Inflated     - just return
-    // *  Stack-locked - coerce it to inflated
+    // *  inflated     - Just return it.
+    // *  stack-locked - Coerce it to inflated from stack-locked.
     // *  INFLATING    - busy wait for conversion to complete
     // *  Neutral      - aggressively inflate the object.
     // *  BIASED       - Illegal.  We should never see this
@@ -1302,8 +1538,7 @@ ObjectMonitor* ObjectSynchronizer::inflate(Thread* current, oop object,
     // the odds of inflation contention.
 
     LogStreamHandle(Trace, monitorinflation) lsh;
-
-    if (mark.has_locker()) {
+    if (LockingMode == LM_LEGACY && mark.has_locker()) {
       ObjectMonitor* m = new ObjectMonitor(object);
       // Optimistically prepare the ObjectMonitor - anticipate successful CAS
       // We do this before the CAS in order to minimize the length of time
@@ -1375,7 +1610,7 @@ ObjectMonitor* ObjectSynchronizer::inflate(Thread* current, oop object,
       // to avoid false sharing on MP systems ...
       OM_PERFDATA_OP(Inflations, inc());
       if (log_is_enabled(Trace, monitorinflation)) {
-        ResourceMark rm(current);
+        ResourceMark rm;
         lsh.print_cr("inflate(has_locker): object=" INTPTR_FORMAT ", mark="
                      INTPTR_FORMAT ", type='%s'", p2i(object),
                      object->mark().value(), object->klass()->external_name());
@@ -1419,7 +1654,7 @@ ObjectMonitor* ObjectSynchronizer::inflate(Thread* current, oop object,
     // cache lines to avoid false sharing on MP systems ...
     OM_PERFDATA_OP(Inflations, inc());
     if (log_is_enabled(Trace, monitorinflation)) {
-      ResourceMark rm(current);
+      ResourceMark rm;
       lsh.print_cr("inflate(neutral): object=" INTPTR_FORMAT ", mark="
                    INTPTR_FORMAT ", type='%s'", p2i(object),
                    object->mark().value(), object->klass()->external_name());
@@ -1464,6 +1699,7 @@ void ObjectSynchronizer::chk_for_block_req(JavaThread* current, const char* op_n
 // ObjectMonitors. Returns the number of deflated ObjectMonitors.
 size_t ObjectSynchronizer::deflate_monitor_list(Thread* current, LogStream* ls,
                                                 elapsedTimer* timer_p) {
+  assert(current == JavaThread::current(), "must be current Java thread");
   MonitorList::Iterator iter = _in_use_list.iterator();
   size_t deflated_count = 0;
 
@@ -1472,7 +1708,7 @@ size_t ObjectSynchronizer::deflate_monitor_list(Thread* current, LogStream* ls,
       break;
     }
     ObjectMonitor* mid = iter.next();
-    if (mid->deflate_monitor()) {
+    if (mid->deflate_monitor(current)) {
       deflated_count++;
     }
 
@@ -1493,12 +1729,26 @@ class HandshakeForDeflation : public HandshakeClosure {
   void do_thread(Thread* thread) {
     log_trace(monitorinflation)("HandshakeForDeflation::do_thread: thread="
                                 INTPTR_FORMAT, p2i(thread));
+    if (thread->is_Java_thread()) {
+      // Clear OM cache
+      JavaThread* jt = thread->as_Java_thread();
+      jt->om_clear_monitor_cache();
+    }
   }
 };
 
+class VM_RendezvousGCThreads : public VM_Operation {
+public:
+  bool evaluate_at_safepoint() const override { return false; }
+  VMOp_Type type() const override { return VMOp_RendezvousGCThreads; }
+  void doit() override {
+    SuspendibleThreadSet::synchronize();
+    SuspendibleThreadSet::desynchronize();
+  };
+};
+
 // This function is called by the MonitorDeflationThread to deflate
-// ObjectMonitors. It is also called via do_final_audit_and_print_stats()
-// by the VMThread.
+// ObjectMonitors.
 size_t ObjectSynchronizer::deflate_idle_monitors() {
   Thread* current = Thread::current();
   if (current->is_Java_thread()) {
@@ -1525,17 +1775,22 @@ size_t ObjectSynchronizer::deflate_idle_monitors() {
 
   // Deflate some idle ObjectMonitors.
   size_t deflated_count = deflate_monitor_list(current, ls, &timer);
-  if (deflated_count > 0 || is_final_audit()) {
-    // There are ObjectMonitors that have been deflated or this is the
-    // final audit and all the remaining ObjectMonitors have been
-    // deflated, BUT the MonitorDeflationThread blocked for the final
-    // safepoint during unlinking.
-
+  if (deflated_count > 0) {
+    // There are ObjectMonitors that have been deflated.
     // Unlink deflated ObjectMonitors from the in-use list.
     ResourceMark rm;
     GrowableArray<ObjectMonitor*> delete_list((int)deflated_count);
     size_t unlinked_count = _in_use_list.unlink_deflated(current, ls, &timer,
                                                          &delete_list);
+
+#ifdef ASSERT
+    if (UseObjectMonitorTable) {
+      for (ObjectMonitor* monitor : delete_list) {
+        assert(!LightweightSynchronizer::contains_monitor(current, monitor), "Should have been removed");
+      }
+    }
+#endif
+
     if (current->is_Java_thread()) {
       if (ls != NULL) {
         timer.stop();
@@ -1548,8 +1803,13 @@ size_t ObjectSynchronizer::deflate_idle_monitors() {
 
       // A JavaThread needs to handshake in order to safely free the
       // ObjectMonitors that were deflated in this cycle.
+      // Also, we sync and desync GC threads around the handshake, so that they can
+      // safely read the mark-word and look-through to the object-monitor, without
+      // being afraid that the object-monitor is going away.
       HandshakeForDeflation hfd_hc;
       Handshake::execute(&hfd_hc);
+      VM_RendezvousGCThreads sync_gc;
+      VMThread::execute(&sync_gc);
 
       if (ls != NULL) {
         ls->print_cr("after handshaking: in_use_list stats: ceiling="
@@ -1559,6 +1819,8 @@ size_t ObjectSynchronizer::deflate_idle_monitors() {
       }
     }
 
+    NativeHeapTrimmer::SuspendMark sm("monitor deletion");
+
     // After the handshake, safely free the ObjectMonitors that were
     // deflated in this cycle.
     size_t deleted_count = 0;
@@ -1684,12 +1946,6 @@ void ObjectSynchronizer::do_final_audit_and_print_stats() {
   log_info(monitorinflation)("Starting the final audit.");
 
   if (log_is_enabled(Info, monitorinflation)) {
-    // Do a deflation in order to reduce the in-use monitor population
-    // that is reported by ObjectSynchronizer::log_in_use_monitor_details()
-    // which is called by ObjectSynchronizer::audit_and_print_stats().
-    while (ObjectSynchronizer::deflate_idle_monitors() != 0) {
-      ; // empty
-    }
     // The other audit_and_print_stats() call is done at the Debug
     // level at a safepoint in ObjectSynchronizer::do_safepoint_work().
     ObjectSynchronizer::audit_and_print_stats(true /* on_exit */);
@@ -1738,7 +1994,7 @@ void ObjectSynchronizer::audit_and_print_stats(bool on_exit) {
     // When exiting this log output is at the Info level. When called
     // at a safepoint, this log output is at the Trace level since
     // there can be a lot of it.
-    log_in_use_monitor_details(ls);
+    log_in_use_monitor_details(ls, !on_exit /* log_all */);
   }
 
   ls->flush();
@@ -1789,38 +2045,44 @@ void ObjectSynchronizer::chk_in_use_entry(ObjectMonitor* n, outputStream* out,
                   "deflated.", p2i(n));
     return;
   }
-  if (n->header().value() == 0) {
+
+  if (n->metadata() == 0) {
     out->print_cr("ERROR: monitor=" INTPTR_FORMAT ": in-use monitor must "
-                  "have non-NULL _header field.", p2i(n));
+                  "have non-NULL _metadata (header/hash) field.", p2i(n));
     *error_cnt_p = *error_cnt_p + 1;
   }
+
   const oop obj = n->object_peek();
-  if (obj != NULL) {
-    const markWord mark = obj->mark();
-    if (!mark.has_monitor()) {
-      out->print_cr("ERROR: monitor=" INTPTR_FORMAT ": in-use monitor's "
-                    "object does not think it has a monitor: obj="
-                    INTPTR_FORMAT ", mark=" INTPTR_FORMAT, p2i(n),
-                    p2i(obj), mark.value());
-      *error_cnt_p = *error_cnt_p + 1;
-    }
-    ObjectMonitor* const obj_mon = mark.monitor();
-    if (n != obj_mon) {
-      out->print_cr("ERROR: monitor=" INTPTR_FORMAT ": in-use monitor's "
-                    "object does not refer to the same monitor: obj="
-                    INTPTR_FORMAT ", mark=" INTPTR_FORMAT ", obj_mon="
-                    INTPTR_FORMAT, p2i(n), p2i(obj), mark.value(), p2i(obj_mon));
-      *error_cnt_p = *error_cnt_p + 1;
-    }
+  if (obj == nullptr) {
+    return;
+  }
+
+  const markWord mark = obj->mark();
+  if (!mark.has_monitor()) {
+    out->print_cr("ERROR: monitor=" INTPTR_FORMAT ": in-use monitor's "
+                  "object does not think it has a monitor: obj="
+                  INTPTR_FORMAT ", mark=" INTPTR_FORMAT, p2i(n),
+                  p2i(obj), mark.value());
+    *error_cnt_p = *error_cnt_p + 1;
+    return;
+  }
+
+  ObjectMonitor* const obj_mon = read_monitor(Thread::current(), obj, mark);
+  if (n != obj_mon) {
+    out->print_cr("ERROR: monitor=" INTPTR_FORMAT ": in-use monitor's "
+                  "object does not refer to the same monitor: obj="
+                  INTPTR_FORMAT ", mark=" INTPTR_FORMAT ", obj_mon="
+                  INTPTR_FORMAT, p2i(n), p2i(obj), mark.value(), p2i(obj_mon));
+    *error_cnt_p = *error_cnt_p + 1;
   }
 }
 
 // Log details about ObjectMonitors on the in_use_list. The 'BHL'
 // flags indicate why the entry is in-use, 'object' and 'object type'
 // indicate the associated object and its type.
-void ObjectSynchronizer::log_in_use_monitor_details(outputStream* out) {
-  stringStream ss;
+void ObjectSynchronizer::log_in_use_monitor_details(outputStream* out, bool log_all) {
   if (_in_use_list.count() > 0) {
+    stringStream ss;
     out->print_cr("In-use monitor info:");
     out->print_cr("(B -> is_busy, H -> has hash code, L -> lock status)");
     out->print_cr("%18s  %s  %18s  %18s",
@@ -1828,15 +2090,19 @@ void ObjectSynchronizer::log_in_use_monitor_details(outputStream* out) {
     out->print_cr("==================  ===  ==================  ==================");
     MonitorList::Iterator iter = _in_use_list.iterator();
     while (iter.has_next()) {
-      ObjectMonitor* mid = iter.next();
-      const oop obj = mid->object_peek();
-      const markWord mark = mid->header();
+      ObjectMonitor* monitor = iter.next();
+      if (!log_all && !monitor->has_owner() && !monitor->is_busy()) {
+        continue;
+      }
+
+      const oop obj = monitor->object_peek();
+      const intptr_t hash = UseObjectMonitorTable ? monitor->hash() : monitor->header().hash();
       ResourceMark rm;
-      out->print(INTPTR_FORMAT "  %d%d%d  " INTPTR_FORMAT "  %s", p2i(mid),
-                 mid->is_busy(), mark.hash() != 0, mid->owner() != NULL,
+      out->print(INTPTR_FORMAT "  %d%d%d  " INTPTR_FORMAT "  %s", p2i(monitor),
+                 monitor->is_busy(), hash != 0, monitor->owner() != NULL,
                  p2i(obj), obj == NULL ? "" : obj->klass()->external_name());
-      if (mid->is_busy()) {
-        out->print(" (%s)", mid->is_busy_to_string(&ss));
+      if (monitor->is_busy()) {
+        out->print(" (%s)", monitor->is_busy_to_string(&ss));
         ss.reset();
       }
       out->cr();
diff --git a/src/hotspot/share/runtime/synchronizer.hpp b/src/hotspot/share/runtime/synchronizer.hpp
index d11fa7903e9..061529c89b7 100644
--- a/src/hotspot/share/runtime/synchronizer.hpp
+++ b/src/hotspot/share/runtime/synchronizer.hpp
@@ -1,7 +1,7 @@
 // This project is a modified version of OpenJDK, licensed under GPL v2.
 // Modifications Copyright (C) 2025 ByteDance Inc.
 /*
- * Copyright (c) 1998, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -36,6 +36,8 @@
 class LogStream;
 class ObjectMonitor;
 class ThreadsList;
+class Thread;
+class JavaThread;
 
 class MonitorList {
   friend class VMStructs;
@@ -92,7 +94,18 @@ class ObjectSynchronizer : AllStatic {
   // This is the "slow path" version of monitor enter and exit.
   static void enter(Handle obj, BasicLock* lock, JavaThread* current);
   static void exit(oop obj, BasicLock* lock, JavaThread* current);
+  // Used to enter a monitor for another thread. This requires that the
+  // locking_thread is suspended, and that entering on a potential
+  // inflated monitor may only contend with deflation. That is the obj being
+  // locked on is either already locked by the locking_thread or cannot
+  // escape the locking_thread.
+  static void enter_for(Handle obj, BasicLock* lock, JavaThread* locking_thread);
+private:
+  // Shared implementation for enter and enter_for. Performs all but
+  // inflated monitor enter.
+  static bool enter_fast_impl(Handle obj, BasicLock* lock, JavaThread* locking_thread);
 
+public:
   // Used only to handle jni locks or other unmatched monitor enter/exit
   // Internally they will use heavy weight monitor.
   static void jni_enter(Handle obj, JavaThread* current);
@@ -122,10 +135,21 @@ class ObjectSynchronizer : AllStatic {
 
   // Inflate light weight monitor to heavy weight monitor
   static ObjectMonitor* inflate(Thread* current, oop obj, const InflateCause cause);
+  // Used to inflate a monitor as if it was done from the thread JavaThread.
+  static ObjectMonitor* inflate_for(JavaThread* thread, oop obj, const InflateCause cause);
+
+private:
+  // Shared implementation between the different LockingMode.
+  static ObjectMonitor* inflate_impl(oop obj, const InflateCause cause);
+
+public:
   // This version is only for internal use
   static void inflate_helper(oop obj);
   static const char* inflate_cause_name(const InflateCause cause);
 
+  inline static ObjectMonitor* read_monitor(markWord mark);
+  inline static ObjectMonitor* read_monitor(Thread* current, oop obj, markWord mark);
+
   // Returns the identity hash value for an oop
   // NOTE: It may cause monitor inflation
   static intptr_t identity_hash_value_for(Handle obj);
@@ -172,10 +196,11 @@ class ObjectSynchronizer : AllStatic {
   static void chk_in_use_entry(ObjectMonitor* n, outputStream* out,
                                int* error_cnt_p);
   static void do_final_audit_and_print_stats();
-  static void log_in_use_monitor_details(outputStream* out);
+  static void log_in_use_monitor_details(outputStream* out, bool log_all);
 
  private:
   friend class SynchronizerTest;
+  friend class LightweightSynchronizer;
 
   static MonitorList _in_use_list;
   static volatile bool _is_async_deflation_requested;
@@ -188,7 +213,7 @@ class ObjectSynchronizer : AllStatic {
   static size_t get_gvars_size();
   static u_char* get_gvars_stw_random_addr();
 
-  static void handle_sync_on_value_based_class(Handle obj, JavaThread* current);
+  static void handle_sync_on_value_based_class(Handle obj, JavaThread* locking_thread);
 };
 
 // ObjectLocker enforces balanced locking and can never throw an
diff --git a/src/hotspot/cpu/aarch64/c2_safepointPollStubTable_aarch64.cpp b/src/hotspot/share/runtime/synchronizer.inline.hpp
similarity index 53%
rename from src/hotspot/cpu/aarch64/c2_safepointPollStubTable_aarch64.cpp
rename to src/hotspot/share/runtime/synchronizer.inline.hpp
index fb36406fbde..df61909ad61 100644
--- a/src/hotspot/cpu/aarch64/c2_safepointPollStubTable_aarch64.cpp
+++ b/src/hotspot/share/runtime/synchronizer.inline.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -22,25 +22,23 @@
  *
  */
 
-#include "precompiled.hpp"
-#include "asm/macroAssembler.hpp"
-#include "opto/compile.hpp"
-#include "opto/node.hpp"
-#include "opto/output.hpp"
-#include "runtime/sharedRuntime.hpp"
+#ifndef SHARE_RUNTIME_SYNCHRONIZER_INLINE_HPP
+#define SHARE_RUNTIME_SYNCHRONIZER_INLINE_HPP
 
-#define __ masm.
-void C2SafepointPollStubTable::emit_stub_impl(MacroAssembler& masm, C2SafepointPollStub* entry) const {
-  assert(SharedRuntime::polling_page_return_handler_blob() != NULL,
-         "polling page return stub not created yet");
-  address stub = SharedRuntime::polling_page_return_handler_blob()->entry_point();
+#include "runtime/synchronizer.hpp"
 
-  RuntimeAddress callback_addr(stub);
+#include "runtime/lightweightSynchronizer.hpp"
 
-  __ bind(entry->_stub_label);
-  InternalAddress safepoint_pc(masm.pc() - masm.offset() + entry->_safepoint_offset);
-  __ adr(rscratch1, safepoint_pc);
-  __ str(rscratch1, Address(rthread, JavaThread::saved_exception_pc_offset()));
-  __ far_jump(callback_addr);
+ObjectMonitor* ObjectSynchronizer::read_monitor(markWord mark) {
+  return mark.monitor();
 }
-#undef __
+
+ObjectMonitor* ObjectSynchronizer::read_monitor(Thread* current, oop obj, markWord mark) {
+  if (!UseObjectMonitorTable) {
+    return read_monitor(mark);
+  } else {
+    return LightweightSynchronizer::get_monitor_from_table(current, obj);
+  }
+}
+
+#endif // SHARE_RUNTIME_SYNCHRONIZER_INLINE_HPP
diff --git a/src/hotspot/share/runtime/thread.cpp b/src/hotspot/share/runtime/thread.cpp
index 09ad54c273a..f8210dc9f37 100644
--- a/src/hotspot/share/runtime/thread.cpp
+++ b/src/hotspot/share/runtime/thread.cpp
@@ -91,10 +91,11 @@
 #include "runtime/javaCalls.hpp"
 #include "runtime/jniHandles.inline.hpp"
 #include "runtime/jniPeriodicChecker.hpp"
+#include "runtime/lockStack.inline.hpp"
 #include "runtime/monitorDeflationThread.hpp"
 #include "runtime/mutexLocker.hpp"
 #include "runtime/nonJavaThread.hpp"
-#include "runtime/objectMonitor.hpp"
+#include "runtime/objectMonitor.inline.hpp"
 #include "runtime/orderAccess.hpp"
 #include "runtime/osThread.hpp"
 #include "runtime/prefetch.inline.hpp"
@@ -104,7 +105,7 @@
 #include "runtime/serviceThread.hpp"
 #include "runtime/sharedRuntime.hpp"
 #include "runtime/stackFrameStream.inline.hpp"
-#include "runtime/stackWatermarkSet.hpp"
+#include "runtime/stackWatermarkSet.inline.hpp"
 #include "runtime/statSampler.hpp"
 #include "runtime/task.hpp"
 #include "runtime/thread.inline.hpp"
@@ -114,6 +115,7 @@
 #include "runtime/threadWXSetters.inline.hpp"
 #include "runtime/timer.hpp"
 #include "runtime/timerTrace.hpp"
+#include "runtime/trimNativeHeap.hpp"
 #include "runtime/vframe.inline.hpp"
 #include "runtime/vframeArray.hpp"
 #include "runtime/vframe_hp.hpp"
@@ -150,6 +152,9 @@
 #if INCLUDE_JFR
 #include "jfr/jfr.hpp"
 #endif
+#if INCLUDE_VM_STRUCTS
+#include "runtime/vmStructs.hpp"
+#endif
 
 // Initialization after module runtime initialization
 void universe_post_module_init();  // must happen after call_initPhase2
@@ -704,6 +709,7 @@ void Thread::print_owned_locks_on(outputStream* st) const {
 // should be revisited, and they should be removed if possible.
 
 bool Thread::is_lock_owned(address adr) const {
+  assert(LockingMode != LM_LIGHTWEIGHT, "should not be called with new lightweight locking");
   return is_in_full_stack(adr);
 }
 
@@ -1099,8 +1105,10 @@ JavaThread::JavaThread() :
 
   _class_to_be_initialized(nullptr),
 
-  _SleepEvent(ParkEvent::Allocate(this))
-{
+  _SleepEvent(ParkEvent::Allocate(this)),
+
+  _lock_stack(this),
+  _om_cache(this) {
   set_jni_functions(jni_functions());
 
 #if INCLUDE_JVMCI
@@ -1366,6 +1374,8 @@ void JavaThread::exit(bool destroy_vm, ExitType exit_type) {
   elapsedTimer _timer_exit_phase3;
   elapsedTimer _timer_exit_phase4;
 
+  om_clear_monitor_cache();
+
   if (log_is_enabled(Debug, os, thread, timer)) {
     _timer_exit_phase1.start();
   }
@@ -1577,7 +1587,8 @@ JavaThread* JavaThread::active() {
 }
 
 bool JavaThread::is_lock_owned(address adr) const {
-  if (Thread::is_lock_owned(adr)) return true;
+  assert(LockingMode != LM_LIGHTWEIGHT, "should not be called with new lightweight locking");
+ if (Thread::is_lock_owned(adr)) return true;
 
   for (MonitorChunk* chunk = monitor_chunks(); chunk != NULL; chunk = chunk->next()) {
     if (chunk->contains(adr)) return true;
@@ -2027,6 +2038,10 @@ void JavaThread::oops_do_no_frames(OopClosure* f, CodeBlobClosure* cf) {
   if (jvmti_thread_state() != NULL) {
     jvmti_thread_state()->oops_do(f, cf);
   }
+
+  if (LockingMode == LM_LIGHTWEIGHT) {
+    lock_stack().oops_do(f);
+  }
 }
 
 void JavaThread::oops_do_frames(OopClosure* f, CodeBlobClosure* cf) {
@@ -2867,6 +2882,13 @@ jint Threads::create_vm(JavaVMInitArgs* args, bool* canTryAgain) {
     convert_vm_init_libraries_to_agents();
   }
 
+  // Should happen before any agent attaches and pokes into vmStructs
+#if INCLUDE_VM_STRUCTS
+  if (UseCompactObjectHeaders) {
+    VMStructs::compact_headers_overrides();
+  }
+#endif
+
   // Launch -agentlib/-agentpath and converted -Xrun agents
   if (Arguments::init_agents_at_startup()) {
     create_vm_init_agents();
@@ -3099,6 +3121,10 @@ jint Threads::create_vm(JavaVMInitArgs* args, bool* canTryAgain) {
   }
 #endif
 
+  if (NativeHeapTrimmer::enabled()) {
+    NativeHeapTrimmer::initialize();
+  }
+
   // Always call even when there are not JVMTI environments yet, since environments
   // may be attached late and JVMTI must track phases of VM execution
   JvmtiExport::enter_live_phase();
@@ -3752,6 +3778,7 @@ GrowableArray<JavaThread*>* Threads::get_pending_threads(ThreadsList * t_list,
 
 JavaThread *Threads::owning_thread_from_monitor_owner(ThreadsList * t_list,
                                                       address owner) {
+  assert(LockingMode != LM_LIGHTWEIGHT, "Not with new lightweight locking");
   // NULL owner means not locked so we can skip the search
   if (owner == NULL) return NULL;
 
@@ -3781,6 +3808,37 @@ JavaThread *Threads::owning_thread_from_monitor_owner(ThreadsList * t_list,
   return the_owner;
 }
 
+JavaThread* Threads::owning_thread_from_object(ThreadsList * t_list, oop obj) {
+  assert(LockingMode == LM_LIGHTWEIGHT, "Only with new lightweight locking");
+  DO_JAVA_THREADS(t_list, q) {
+    // Need to start processing before accessing oops in the thread.
+    StackWatermark* watermark = StackWatermarkSet::get(q, StackWatermarkKind::gc);
+    if (watermark != nullptr) {
+      watermark->start_processing();
+    }
+
+    if (q->lock_stack().contains(obj)) {
+      return q;
+    }
+  }
+  return NULL;
+}
+
+JavaThread* Threads::owning_thread_from_monitor(ThreadsList* t_list, ObjectMonitor* monitor) {
+  if (LockingMode == LM_LIGHTWEIGHT) {
+    if (monitor->is_owner_anonymous()) {
+      return owning_thread_from_object(t_list, monitor->object());
+    } else {
+      Thread* owner = reinterpret_cast<Thread*>(monitor->owner());
+      assert(owner == NULL || owner->is_Java_thread(), "only JavaThreads own monitors");
+      return reinterpret_cast<JavaThread*>(owner);
+    }
+  } else {
+    address owner = (address)monitor->owner();
+    return owning_thread_from_monitor_owner(t_list, owner);
+  }
+}
+
 class PrintOnClosure : public ThreadClosure {
 private:
   outputStream* _st;
diff --git a/src/hotspot/share/runtime/thread.hpp b/src/hotspot/share/runtime/thread.hpp
index 741e8e45143..8e731064a4f 100644
--- a/src/hotspot/share/runtime/thread.hpp
+++ b/src/hotspot/share/runtime/thread.hpp
@@ -36,6 +36,7 @@
 #include "runtime/globals.hpp"
 #include "runtime/handshake.hpp"
 #include "runtime/javaFrameAnchor.hpp"
+#include "runtime/lockStack.hpp"
 #include "runtime/mutexLocker.hpp"
 #include "runtime/os.hpp"
 #include "runtime/park.hpp"
@@ -69,6 +70,7 @@ class OSThread;
 class ThreadStatistics;
 class ConcurrentLocksDump;
 class MonitorInfo;
+class MonitorChunk;
 
 class vframeArray;
 class vframe;
@@ -82,9 +84,12 @@ class ICRefillVerifier;
 
 class Metadata;
 class ResourceArea;
+class RegisterMap;
 
 class OopStorage;
 
+class CompiledMethod;
+
 DEBUG_ONLY(class ResourceMark;)
 
 class WorkerThread;
@@ -1615,6 +1620,27 @@ class JavaThread: public Thread {
   void interrupt();
   bool is_interrupted(bool clear_interrupted);
 
+private:
+  LockStack _lock_stack;
+  OMCache _om_cache;
+
+public:
+  LockStack& lock_stack() { return _lock_stack; }
+
+  static ByteSize lock_stack_offset()      { return byte_offset_of(JavaThread, _lock_stack); }
+  // Those offsets are used in code generators to access the LockStack that is embedded in this
+  // JavaThread structure. Those accesses are relative to the current thread, which
+  // is typically in a dedicated register.
+  static ByteSize lock_stack_top_offset()  { return lock_stack_offset() + LockStack::top_offset(); }
+  static ByteSize lock_stack_base_offset() { return lock_stack_offset() + LockStack::base_offset(); }
+
+  static ByteSize om_cache_offset()        { return byte_offset_of(JavaThread, _om_cache); }
+  static ByteSize om_cache_oops_offset()   { return om_cache_offset() + OMCache::entries_offset(); }
+
+  void om_set_monitor_cache(ObjectMonitor* monitor);
+  void om_clear_monitor_cache();
+  ObjectMonitor* om_get_from_monitor_cache(oop obj);
+
   static OopStorage* thread_oop_storage();
 
   static void verify_cross_modify_fence_failure(JavaThread *thread) PRODUCT_RETURN;
@@ -1738,6 +1764,9 @@ class Threads: AllStatic {
   static JavaThread *owning_thread_from_monitor_owner(ThreadsList * t_list,
                                                       address owner);
 
+  static JavaThread* owning_thread_from_object(ThreadsList* t_list, oop obj);
+  static JavaThread* owning_thread_from_monitor(ThreadsList* t_list, ObjectMonitor* owner);
+
   // Number of threads on the active threads list
   static int number_of_threads()                 { return _number_of_threads; }
   // Number of non-daemon threads on the active threads list
diff --git a/src/hotspot/share/runtime/thread.inline.hpp b/src/hotspot/share/runtime/thread.inline.hpp
index 63101e77855..abf47132f25 100644
--- a/src/hotspot/share/runtime/thread.inline.hpp
+++ b/src/hotspot/share/runtime/thread.inline.hpp
@@ -30,7 +30,9 @@
 
 #include "gc/shared/tlab_globals.hpp"
 #include "runtime/atomic.hpp"
+#include "runtime/lockStack.inline.hpp"
 #include "runtime/nonJavaThread.hpp"
+#include "runtime/objectMonitor.inline.hpp"
 #include "runtime/orderAccess.hpp"
 #include "runtime/safepoint.hpp"
 
@@ -214,4 +216,27 @@ inline InstanceKlass* JavaThread::class_to_be_initialized() const {
   return _class_to_be_initialized;
 }
 
+inline void JavaThread::om_set_monitor_cache(ObjectMonitor* monitor) {
+  assert(UseObjectMonitorTable, "must be");
+  assert(monitor != nullptr, "use om_clear_monitor_cache to clear");
+  assert(this == current() || monitor->owner_raw() == this, "only add owned monitors for other threads");
+  assert(this == current() || is_obj_deopt_suspend(), "thread must not run concurrently");
+
+  _om_cache.set_monitor(monitor);
+}
+
+inline void JavaThread::om_clear_monitor_cache() {
+  if (!UseObjectMonitorTable) {
+    return;
+  }
+
+  _om_cache.clear();
+}
+
+inline ObjectMonitor* JavaThread::om_get_from_monitor_cache(oop obj) {
+  assert(obj != nullptr, "do not look for null objects");
+  assert(this == current(), "only get own thread locals");
+  return _om_cache.get_monitor(obj);
+}
+
 #endif // SHARE_RUNTIME_THREAD_INLINE_HPP
diff --git a/src/hotspot/share/runtime/trimNativeHeap.cpp b/src/hotspot/share/runtime/trimNativeHeap.cpp
new file mode 100644
index 00000000000..0ec2b93761f
--- /dev/null
+++ b/src/hotspot/share/runtime/trimNativeHeap.cpp
@@ -0,0 +1,275 @@
+/*
+ * Copyright (c) 2023 SAP SE. All rights reserved.
+ * Copyright (c) 2023 Red Hat Inc. All rights reserved.
+ * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "logging/log.hpp"
+#include "runtime/globals.hpp"
+#include "runtime/globals_extension.hpp"
+#include "runtime/mutex.hpp"
+#include "runtime/mutexLocker.hpp"
+#include "runtime/nonJavaThread.hpp"
+#include "runtime/os.inline.hpp"
+#include "runtime/safepoint.hpp"
+#include "runtime/trimNativeHeap.hpp"
+#include "utilities/debug.hpp"
+#include "utilities/globalDefinitions.hpp"
+#include "utilities/ostream.hpp"
+#include "utilities/vmError.hpp"
+
+class NativeHeapTrimmerThread : public NamedThread {
+
+  // Upper limit for the backoff during pending/in-progress safepoint.
+  // Chosen as reasonable value to balance the overheads of waking up
+  // during the safepoint, which might have undesired effects on latencies,
+  // and the accuracy in tracking the trimming interval.
+  static constexpr int64_t safepoint_poll_ms = 250;
+
+  Monitor* const _lock;
+  bool _stop;
+  uint16_t _suspend_count;
+
+  // Statistics
+  uint64_t _num_trims_performed;
+
+  bool is_suspended() const {
+    assert(_lock->is_locked(), "Must be");
+    return _suspend_count > 0;
+  }
+
+  uint16_t inc_suspend_count() {
+    assert(_lock->is_locked(), "Must be");
+    assert(_suspend_count < UINT16_MAX, "Sanity");
+    return ++_suspend_count;
+  }
+
+  uint16_t dec_suspend_count() {
+    assert(_lock->is_locked(), "Must be");
+    assert(_suspend_count != 0, "Sanity");
+    return --_suspend_count;
+  }
+
+  bool is_stopped() const {
+    assert(_lock->is_locked(), "Must be");
+    return _stop;
+  }
+
+  bool at_or_nearing_safepoint() const {
+    return SafepointSynchronize::is_at_safepoint() ||
+           SafepointSynchronize::is_synchronizing();
+  }
+
+  // in seconds
+  static double now() { return os::elapsedTime(); }
+  static double to_ms(double seconds) { return seconds * 1000.0; }
+
+  struct LogStartStopMark {
+    void log(const char* s) { log_info(trimnative)("Native heap trimmer %s", s); }
+    LogStartStopMark()  { log("start"); }
+    ~LogStartStopMark() { log("stop"); }
+  };
+
+  void run() override {
+    assert(NativeHeapTrimmer::enabled(), "Only call if enabled");
+
+    LogStartStopMark lssm;
+
+    const double interval_secs = (double)TrimNativeHeapInterval / 1000;
+
+    while (true) {
+      double tnow = now();
+      double next_trim_time = tnow + interval_secs;
+
+      unsigned times_suspended = 0;
+      unsigned times_waited = 0;
+      unsigned times_safepoint = 0;
+
+      {
+        MonitorLocker ml(_lock, Mutex::_no_safepoint_check_flag);
+        if (_stop) return;
+
+        while (at_or_nearing_safepoint() || is_suspended() || next_trim_time > tnow) {
+          if (is_suspended()) {
+            times_suspended ++;
+            ml.wait(0); // infinite
+          } else if (next_trim_time > tnow) {
+            times_waited ++;
+            const int64_t wait_ms = MAX2(1.0, to_ms(next_trim_time - tnow));
+            ml.wait(wait_ms);
+          } else if (at_or_nearing_safepoint()) {
+            times_safepoint ++;
+            const int64_t wait_ms = MIN2<int64_t>(TrimNativeHeapInterval, safepoint_poll_ms);
+            ml.wait(wait_ms);
+          }
+
+          if (_stop) return;
+
+          tnow = now();
+        }
+      }
+
+      log_trace(trimnative)("Times: %u suspended, %u timed, %u safepoint",
+                            times_suspended, times_waited, times_safepoint);
+
+      execute_trim_and_log(tnow);
+    }
+  }
+
+  // Execute the native trim, log results.
+  void execute_trim_and_log(double t1) {
+    assert(os::can_trim_native_heap(), "Unexpected");
+
+    os::size_change_t sc = { 0, 0 };
+    LogTarget(Info, trimnative) lt;
+    const bool logging_enabled = lt.is_enabled();
+
+    // We only collect size change information if we are logging; save the access to procfs otherwise.
+    if (os::trim_native_heap(logging_enabled ? &sc : nullptr)) {
+      _num_trims_performed++;
+      if (logging_enabled) {
+        double t2 = now();
+        if (sc.after != SIZE_MAX) {
+          const size_t delta = sc.after < sc.before ? (sc.before - sc.after) : (sc.after - sc.before);
+          const char sign = sc.after < sc.before ? '-' : '+';
+          log_info(trimnative)("Periodic Trim (" UINT64_FORMAT "): " PROPERFMT "->" PROPERFMT " (%c" PROPERFMT ") %.3fms",
+                               _num_trims_performed,
+                               PROPERFMTARGS(sc.before), PROPERFMTARGS(sc.after), sign, PROPERFMTARGS(delta),
+                               to_ms(t2 - t1));
+        } else {
+          log_info(trimnative)("Periodic Trim (" UINT64_FORMAT "): complete (no details) %.3fms",
+                               _num_trims_performed,
+                               to_ms(t2 - t1));
+        }
+      }
+    }
+  }
+
+public:
+
+  NativeHeapTrimmerThread() :
+    _lock(new (std::nothrow) PaddedMonitor(Mutex::leaf, "NativeHeapTrimmer_lock", true, Mutex::_safepoint_check_never)),
+    _stop(false),
+    _suspend_count(0),
+    _num_trims_performed(0)
+  {
+    set_name("Native Heap Trimmer");
+    if (os::create_thread(this, os::vm_thread)) {
+      os::start_thread(this);
+    }
+  }
+
+  void suspend(const char* reason) {
+    assert(NativeHeapTrimmer::enabled(), "Only call if enabled");
+    uint16_t n = 0;
+    {
+      MonitorLocker ml(_lock, Mutex::_no_safepoint_check_flag);
+      n = inc_suspend_count();
+      // No need to wakeup trimmer
+    }
+    log_debug(trimnative)("Trim suspended for %s (%u suspend requests)", reason, n);
+  }
+
+  void resume(const char* reason) {
+    assert(NativeHeapTrimmer::enabled(), "Only call if enabled");
+    uint16_t n = 0;
+    {
+      MonitorLocker ml(_lock, Mutex::_no_safepoint_check_flag);
+      n = dec_suspend_count();
+      if (n == 0) {
+        ml.notify_all(); // pause end
+      }
+    }
+    if (n == 0) {
+      log_debug(trimnative)("Trim resumed after %s", reason);
+    } else {
+      log_debug(trimnative)("Trim still suspended after %s (%u suspend requests)", reason, n);
+    }
+  }
+
+  void stop() {
+    MonitorLocker ml(_lock, Mutex::_no_safepoint_check_flag);
+    _stop = true;
+    ml.notify_all();
+  }
+
+  void print_state(outputStream* st) const {
+    // Don't pull lock during error reporting
+    Mutex* const lock = VMError::is_error_reported() ? nullptr : _lock;
+    int64_t num_trims = 0;
+    bool stopped = false;
+    uint16_t suspenders = 0;
+    {
+      MutexLocker ml(lock, Mutex::_no_safepoint_check_flag);
+      num_trims = _num_trims_performed;
+      stopped = _stop;
+      suspenders = _suspend_count;
+    }
+    st->print_cr("Trims performed: " UINT64_FORMAT ", current suspend count: %d, stopped: %d",
+                 num_trims, suspenders, stopped);
+  }
+
+}; // NativeHeapTrimmer
+
+static NativeHeapTrimmerThread* g_trimmer_thread = nullptr;
+
+void NativeHeapTrimmer::initialize() {
+  assert(g_trimmer_thread == nullptr, "Only once");
+  if (TrimNativeHeapInterval > 0) {
+    if (!os::can_trim_native_heap()) {
+      FLAG_SET_ERGO(TrimNativeHeapInterval, 0);
+      log_warning(trimnative)("Native heap trim is not supported on this platform");
+      return;
+    }
+    g_trimmer_thread = new NativeHeapTrimmerThread();
+    log_info(trimnative)("Periodic native trim enabled (interval: %u ms)", TrimNativeHeapInterval);
+  }
+}
+
+void NativeHeapTrimmer::cleanup() {
+  if (g_trimmer_thread != nullptr) {
+    g_trimmer_thread->stop();
+  }
+}
+
+void NativeHeapTrimmer::suspend_periodic_trim(const char* reason) {
+  if (g_trimmer_thread != nullptr) {
+    g_trimmer_thread->suspend(reason);
+  }
+}
+
+void NativeHeapTrimmer::resume_periodic_trim(const char* reason) {
+  if (g_trimmer_thread != nullptr) {
+    g_trimmer_thread->resume(reason);
+  }
+}
+
+void NativeHeapTrimmer::print_state(outputStream* st) {
+  if (g_trimmer_thread != nullptr) {
+    st->print_cr("Periodic native trim enabled (interval: %u ms)", TrimNativeHeapInterval);
+    g_trimmer_thread->print_state(st);
+  } else {
+    st->print_cr("Periodic native trim disabled");
+  }
+}
diff --git a/src/hotspot/share/runtime/trimNativeHeap.hpp b/src/hotspot/share/runtime/trimNativeHeap.hpp
new file mode 100644
index 00000000000..06dc88ebb08
--- /dev/null
+++ b/src/hotspot/share/runtime/trimNativeHeap.hpp
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2023 SAP SE. All rights reserved.
+ * Copyright (c) 2023 Red Hat Inc. All rights reserved.
+ * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_RUNTIME_TRIMNATIVEHEAP_HPP
+#define SHARE_RUNTIME_TRIMNATIVEHEAP_HPP
+
+#include "memory/allStatic.hpp"
+#include "runtime/globals.hpp"
+
+class outputStream;
+
+class NativeHeapTrimmer : public AllStatic {
+
+  // Pause periodic trim (if enabled).
+  static void suspend_periodic_trim(const char* reason);
+
+  // Unpause periodic trim (if enabled).
+  static void resume_periodic_trim(const char* reason);
+
+public:
+
+  static void initialize();
+  static void cleanup();
+
+  static inline bool enabled() { return TrimNativeHeapInterval > 0; }
+
+  static void print_state(outputStream* st);
+
+  // Pause periodic trimming while in scope; when leaving scope,
+  // resume periodic trimming.
+  struct SuspendMark {
+    const char* const _reason;
+    SuspendMark(const char* reason = "unknown") : _reason(reason) {
+      if (NativeHeapTrimmer::enabled()) {
+        suspend_periodic_trim(_reason);
+      }
+    }
+    ~SuspendMark()  {
+      if (NativeHeapTrimmer::enabled()) {
+        resume_periodic_trim(_reason);
+      }
+    }
+  };
+};
+
+#endif // SHARE_RUNTIME_TRIMNATIVEHEAP_HPP
diff --git a/src/hotspot/share/runtime/vframe.cpp b/src/hotspot/share/runtime/vframe.cpp
index 9087e6a95b8..73c1ac7a0a4 100644
--- a/src/hotspot/share/runtime/vframe.cpp
+++ b/src/hotspot/share/runtime/vframe.cpp
@@ -49,6 +49,7 @@
 #include "runtime/stackFrameStream.inline.hpp"
 #include "runtime/stubRoutines.hpp"
 #include "runtime/synchronizer.hpp"
+#include "runtime/synchronizer.inline.hpp"
 #include "runtime/thread.inline.hpp"
 #include "runtime/vframe.inline.hpp"
 #include "runtime/vframeArray.hpp"
@@ -259,13 +260,16 @@ void javaVFrame::print_lock_info_on(outputStream* st, int frame_count) {
           markWord mark = monitor->owner()->mark();
           // The first stage of async deflation does not affect any field
           // used by this comparison so the ObjectMonitor* is usable here.
-          if (mark.has_monitor() &&
-              ( // we have marked ourself as pending on this monitor
-                mark.monitor() == thread()->current_pending_monitor() ||
+          if (mark.has_monitor()) {
+            ObjectMonitor* mon = ObjectSynchronizer::read_monitor(current, monitor->owner(), mark);
+            if (// if the monitor is null we must be in the process of locking
+                mon == nullptr ||
+                // we have marked ourself as pending on this monitor
+                mon == thread()->current_pending_monitor() ||
                 // we are not the owner of this monitor
-                !mark.monitor()->is_entered(thread())
-              )) {
-            lock_state = "waiting to lock";
+                !mon->is_entered(thread())) {
+              lock_state = "waiting to lock";
+            }
           }
         }
         print_locked_object_class_name(st, Handle(current, monitor->owner()), lock_state);
diff --git a/src/hotspot/share/runtime/vmOperation.hpp b/src/hotspot/share/runtime/vmOperation.hpp
index d331a0a0539..5328613b448 100644
--- a/src/hotspot/share/runtime/vmOperation.hpp
+++ b/src/hotspot/share/runtime/vmOperation.hpp
@@ -82,6 +82,7 @@
   template(ChangeSingleStep)                      \
   template(HeapWalkOperation)                     \
   template(HeapIterateOperation)                  \
+  template(HeapObjectStatistics)                  \
   template(ReportJavaOutOfMemory)                 \
   template(JFRCheckpoint)                         \
   template(ShenandoahFullGC)                      \
@@ -91,6 +92,7 @@
   template(ShenandoahFinalUpdateRefs)             \
   template(ShenandoahFinalRoots)                  \
   template(ShenandoahDegeneratedGC)               \
+  template(RendezvousGCThreads)                   \
   template(Exit)                                  \
   template(LinuxDllLoad)                          \
   template(RotateGCLog)                           \
diff --git a/src/hotspot/share/runtime/vmStructs.cpp b/src/hotspot/share/runtime/vmStructs.cpp
index 7bf40685cf6..dc4061e53d7 100644
--- a/src/hotspot/share/runtime/vmStructs.cpp
+++ b/src/hotspot/share/runtime/vmStructs.cpp
@@ -142,6 +142,16 @@
 #include "opto/vectornode.hpp"
 #endif // COMPILER2
 
+// Used by VMStructs when CompactObjectHeaders are enabled.
+// Must match the relevant parts from the real oopDesc.
+class fakeOopDesc {
+private:
+  union _metadata {
+    Klass*      _klass;
+    narrowKlass _compressed_klass;
+  } _metadata;
+};
+
 // Note: the cross-product of (c1, c2, product, nonproduct, ...),
 // (nonstatic, static), and (unchecked, checked) has not been taken.
 // Only the macros currently needed have been defined.
@@ -734,6 +744,9 @@ typedef HashtableEntry<InstanceKlass*, mtClass>  KlassHashtableEntry;
   nonstatic_field(Thread,                      _active_handles,                               JNIHandleBlock*)                       \
   nonstatic_field(Thread,                      _tlab,                                         ThreadLocalAllocBuffer)                \
   nonstatic_field(Thread,                      _allocated_bytes,                              jlong)                                 \
+  nonstatic_field(JavaThread,                  _lock_stack,                                   LockStack)                             \
+  nonstatic_field(LockStack,                   _top,                                          uint32_t)                              \
+  nonstatic_field(LockStack,                   _base[0],                                      oop)                                   \
   nonstatic_field(NamedThread,                 _name,                                         char*)                                 \
   nonstatic_field(NamedThread,                 _processed_thread,                             Thread*)                               \
   nonstatic_field(JavaThread,                  _threadObj,                                    OopHandle)                             \
@@ -876,11 +889,11 @@ typedef HashtableEntry<InstanceKlass*, mtClass>  KlassHashtableEntry;
   /* Monitors */                                                                                                                     \
   /************/                                                                                                                     \
                                                                                                                                      \
-  volatile_nonstatic_field(ObjectMonitor,      _header,                                       markWord)                              \
+  volatile_nonstatic_field(ObjectMonitor,      _metadata,                                     uintptr_t)                             \
   unchecked_nonstatic_field(ObjectMonitor,     _object,                                       sizeof(void *)) /* NOTE: no type */    \
   unchecked_nonstatic_field(ObjectMonitor,     _owner,                                        sizeof(void *)) /* NOTE: no type */    \
   volatile_nonstatic_field(ObjectMonitor,      _next_om,                                      ObjectMonitor*)                        \
-  volatile_nonstatic_field(BasicLock,          _displaced_header,                             markWord)                              \
+  volatile_nonstatic_field(BasicLock,          _metadata,                                     uintptr_t)                             \
   nonstatic_field(ObjectMonitor,               _contentions,                                  jint)                                  \
   volatile_nonstatic_field(ObjectMonitor,      _waiters,                                      jint)                                  \
   volatile_nonstatic_field(ObjectMonitor,      _recursions,                                   intx)                                  \
@@ -1239,6 +1252,8 @@ typedef HashtableEntry<InstanceKlass*, mtClass>  KlassHashtableEntry;
       declare_type(objArrayOopDesc, arrayOopDesc)                         \
     declare_type(instanceOopDesc, oopDesc)                                \
                                                                           \
+  declare_toplevel_type(fakeOopDesc)                                      \
+                                                                          \
   /**************************************************/                    \
   /* MetadataOopDesc hierarchy (NOTE: some missing) */                    \
   /**************************************************/                    \
@@ -1350,6 +1365,7 @@ typedef HashtableEntry<InstanceKlass*, mtClass>  KlassHashtableEntry;
                                                                           \
   declare_toplevel_type(ThreadsSMRSupport)                                \
   declare_toplevel_type(ThreadsList)                                      \
+  declare_toplevel_type(LockStack)                                        \
                                                                           \
   /***************/                                                       \
   /* Interpreter */                                                       \
@@ -2471,6 +2487,14 @@ typedef HashtableEntry<InstanceKlass*, mtClass>  KlassHashtableEntry;
   declare_constant(T_NARROWKLASS_size)                                    \
   declare_constant(T_VOID_size)                                           \
                                                                           \
+  /**********************************************/                        \
+  /* LockingMode enum (globalDefinitions.hpp) */                          \
+  /**********************************************/                        \
+                                                                          \
+  declare_constant(LM_MONITOR)                                            \
+  declare_constant(LM_LEGACY)                                             \
+  declare_constant(LM_LIGHTWEIGHT)                                        \
+                                                                          \
   /*********************/                                                 \
   /* Matcher (C2 only) */                                                 \
   /*********************/                                                 \
@@ -2633,11 +2657,14 @@ typedef HashtableEntry<InstanceKlass*, mtClass>  KlassHashtableEntry;
   declare_constant(markWord::biased_lock_bits)                            \
   declare_constant(markWord::max_hash_bits)                               \
   declare_constant(markWord::hash_bits)                                   \
+  declare_constant(markWord::hash_bits_compact)                           \
                                                                           \
   declare_constant(markWord::lock_shift)                                  \
   declare_constant(markWord::biased_lock_shift)                           \
   declare_constant(markWord::age_shift)                                   \
   declare_constant(markWord::hash_shift)                                  \
+  declare_constant(markWord::hash_shift_compact)                          \
+  LP64_ONLY(declare_constant(markWord::klass_shift))                      \
                                                                           \
   declare_constant(markWord::lock_mask)                                   \
   declare_constant(markWord::lock_mask_in_place)                          \
@@ -2650,6 +2677,8 @@ typedef HashtableEntry<InstanceKlass*, mtClass>  KlassHashtableEntry;
   declare_constant(markWord::epoch_mask_in_place)                         \
   declare_constant(markWord::hash_mask)                                   \
   declare_constant(markWord::hash_mask_in_place)                          \
+  declare_constant(markWord::hash_mask_compact)                           \
+  declare_constant(markWord::hash_mask_compact_in_place)                  \
   declare_constant(markWord::biased_lock_alignment)                       \
                                                                           \
   declare_constant(markWord::locked_value)                                \
@@ -2665,8 +2694,10 @@ typedef HashtableEntry<InstanceKlass*, mtClass>  KlassHashtableEntry;
                                                                           \
   /* InvocationCounter constants */                                       \
   declare_constant(InvocationCounter::count_increment)                    \
-  declare_constant(InvocationCounter::count_shift)
-
+  declare_constant(InvocationCounter::count_shift)                        \
+                                                                          \
+  /* ObjectMonitor constants */                                           \
+  declare_constant(ObjectMonitor::ANONYMOUS_OWNER)                        \
 
 //--------------------------------------------------------------------------------
 //
@@ -3186,3 +3217,29 @@ void vmStructs_init() {
   VMStructs::init();
 }
 #endif // ASSERT
+
+void VMStructs::compact_headers_overrides() {
+  assert(UseCompactObjectHeaders, "Should have been checked before");
+
+  // We cannot allow SA and other facilities to poke into VM internal fields
+  // expecting the class pointers there. This will crash in the best case,
+  // or yield incorrect execution in the worst case. This code hides the
+  // risky fields from external code by replacing their original container
+  // type to a fake one. The fake type should exist for VMStructs verification
+  // code to work.
+
+  size_t len = localHotSpotVMStructsLength();
+  for (size_t off = 0; off < len; off++) {
+    VMStructEntry* e = &localHotSpotVMStructs[off];
+    if (e == nullptr) continue;
+    if (e->typeName == nullptr) continue;
+    if (e->fieldName == nullptr) continue;
+
+    if (strcmp(e->typeName, "oopDesc") == 0) {
+      if ((strcmp(e->fieldName, "_metadata._klass") == 0) ||
+          (strcmp(e->fieldName, "_metadata._compressed_klass") == 0)) {
+        e->typeName = "fakeOopDesc";
+      }
+    }
+  }
+}
diff --git a/src/hotspot/share/runtime/vmStructs.hpp b/src/hotspot/share/runtime/vmStructs.hpp
index 7b0425b17c9..83106e43814 100644
--- a/src/hotspot/share/runtime/vmStructs.hpp
+++ b/src/hotspot/share/runtime/vmStructs.hpp
@@ -146,6 +146,9 @@ class VMStructs {
   // Returns 1 if found, 0 if not.
   static int findType(const char* typeName) NOT_VM_STRUCTS_RETURN_(0);
 #endif // ASSERT
+
+public:
+  static void compact_headers_overrides() NOT_VM_STRUCTS_RETURN;
 };
 
 // This utility macro quotes the passed string
diff --git a/src/hotspot/share/services/heapObjectStatistics.cpp b/src/hotspot/share/services/heapObjectStatistics.cpp
new file mode 100644
index 00000000000..866c7855b9f
--- /dev/null
+++ b/src/hotspot/share/services/heapObjectStatistics.cpp
@@ -0,0 +1,177 @@
+/*
+ * Copyright (c) 2021, Red Hat, Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "gc/shared/collectedHeap.hpp"
+#include "logging/logStream.hpp"
+#include "logging/logTag.hpp"
+#include "memory/allocation.hpp"
+#include "memory/iterator.hpp"
+#include "memory/resourceArea.hpp"
+#include "memory/universe.hpp"
+#include "oops/oop.inline.hpp"
+#include "runtime/mutexLocker.hpp"
+#include "runtime/vmThread.hpp"
+#include "services/heapObjectStatistics.hpp"
+#include "utilities/copy.hpp"
+#include "utilities/globalDefinitions.hpp"
+#include "utilities/ostream.hpp"
+
+HeapObjectStatistics* HeapObjectStatistics::_instance = NULL;
+
+class HeapObjectStatsObjectClosure : public ObjectClosure {
+private:
+  HeapObjectStatistics* const _stats;
+public:
+  HeapObjectStatsObjectClosure() : _stats(HeapObjectStatistics::instance()) {}
+  void do_object(oop obj) {
+    _stats->visit_object(obj);
+  }
+};
+
+class VM_HeapObjectStatistics : public VM_Operation {
+public:
+  VMOp_Type type() const { return VMOp_HeapObjectStatistics; }
+  bool doit_prologue() {
+    Heap_lock->lock();
+    return true;
+  }
+
+  void doit_epilogue() {
+    Heap_lock->unlock();
+  }
+
+  void doit() {
+    assert(SafepointSynchronize::is_at_safepoint(), "all threads are stopped");
+    assert(Heap_lock->is_locked(), "should have the Heap_lock");
+
+    CollectedHeap* heap = Universe::heap();
+    heap->ensure_parsability(false);
+
+    HeapObjectStatistics* stats = HeapObjectStatistics::instance();
+    stats->begin_sample();
+
+    HeapObjectStatsObjectClosure cl;
+    heap->object_iterate(&cl);
+  }
+};
+
+HeapObjectStatisticsTask::HeapObjectStatisticsTask() : PeriodicTask(HeapObjectStatsSamplingInterval) {}
+
+void HeapObjectStatisticsTask::task() {
+  VM_HeapObjectStatistics vmop;
+  VMThread::execute(&vmop);
+}
+
+void HeapObjectStatistics::initialize() {
+  assert(_instance == NULL, "Don't init twice");
+  if (HeapObjectStats) {
+    _instance = new HeapObjectStatistics();
+    _instance->start();
+  }
+}
+
+void HeapObjectStatistics::shutdown() {
+  if (HeapObjectStats) {
+    assert(_instance != NULL, "Must be initialized");
+    LogTarget(Info, heap, stats) lt;
+    if (lt.is_enabled()) {
+      LogStream ls(lt);
+      ResourceMark rm;
+      _instance->print(&ls);
+    }
+    _instance->stop();
+    delete _instance;
+    _instance = NULL;
+  }
+}
+
+HeapObjectStatistics* HeapObjectStatistics::instance() {
+  assert(_instance != NULL, "Must be initialized");
+  return _instance;
+}
+
+void HeapObjectStatistics::increase_counter(uint64_t& counter, uint64_t val) {
+  uint64_t oldval = counter;
+  uint64_t newval = counter + val;
+  if (newval < oldval) {
+    log_warning(heap, stats)("HeapObjectStats counter overflow: resulting statistics will be useless");
+  }
+  counter = newval;
+}
+
+HeapObjectStatistics::HeapObjectStatistics() :
+  _task(), _num_samples(0), _num_objects(0), _num_ihashed(0), _num_locked(0), _lds(0) { }
+
+void HeapObjectStatistics::start() {
+  _task.enroll();
+}
+
+void HeapObjectStatistics::stop() {
+  _task.disenroll();
+}
+
+void HeapObjectStatistics::begin_sample() {
+  _num_samples++;
+}
+
+void HeapObjectStatistics::visit_object(oop obj) {
+  increase_counter(_num_objects);
+  markWord mark = obj->mark();
+  if (!mark.has_no_hash()) {
+    increase_counter(_num_ihashed);
+    if (mark.age() > 0) {
+      increase_counter(_num_ihashed_moved);
+    }
+  }
+  if (mark.is_locked()) {
+    increase_counter(_num_locked);
+  }
+#ifdef ASSERT
+#ifdef _LP64
+  if (!mark.has_displaced_mark_helper()) {
+    assert(mark.narrow_klass() == CompressedKlassPointers::encode(obj->klass_or_null()), "upper 32 mark bits must be narrow klass: mark: " INTPTR_FORMAT ", compressed-klass: " INTPTR_FORMAT, (intptr_t)mark.narrow_klass(), (intptr_t)CompressedKlassPointers::encode(obj->klass_or_null()));
+  }
+#endif
+#endif
+  increase_counter(_lds, obj->size());
+}
+
+void HeapObjectStatistics::print(outputStream* out) const {
+  if (!HeapObjectStats) {
+    return;
+  }
+  if (_num_samples == 0 || _num_objects == 0) {
+    return;
+  }
+
+  out->print_cr("Number of samples:  " UINT64_FORMAT, _num_samples);
+  out->print_cr("Average number of objects: " UINT64_FORMAT, _num_objects / _num_samples);
+  out->print_cr("Average object size: " UINT64_FORMAT " bytes, %.1f words", (_lds * HeapWordSize) / _num_objects, (float) _lds / _num_objects);
+  out->print_cr("Average number of hashed objects: " UINT64_FORMAT " (%.2f%%)", _num_ihashed / _num_samples, (float) (_num_ihashed * 100.0) / _num_objects);
+  out->print_cr("Average number of moved hashed objects: " UINT64_FORMAT " (%.2f%%)", _num_ihashed_moved / _num_samples, (float) (_num_ihashed_moved * 100.0) / _num_objects);
+  out->print_cr("Average number of locked objects: " UINT64_FORMAT " (%.2f%%)", _num_locked / _num_samples, (float) (_num_locked * 100) / _num_objects);
+  out->print_cr("Average LDS: " UINT64_FORMAT " bytes", _lds * HeapWordSize / _num_samples);
+  out->print_cr("Avg LDS with (assumed) 64bit header: " UINT64_FORMAT " bytes (%.1f%%)", (_lds - _num_objects) * HeapWordSize / _num_samples, ((float) _lds - _num_objects) * 100.0 / _lds);
+}
diff --git a/src/hotspot/share/services/heapObjectStatistics.hpp b/src/hotspot/share/services/heapObjectStatistics.hpp
new file mode 100644
index 00000000000..04fdf824263
--- /dev/null
+++ b/src/hotspot/share/services/heapObjectStatistics.hpp
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2021, Red Hat, Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_SERVICES_HEAPOBJECTSTATISTICS_HPP
+#define SHARE_SERVICES_HEAPOBJECTSTATISTICS_HPP
+
+#include "memory/allocation.hpp"
+#include "oops/oopsHierarchy.hpp"
+#include "runtime/task.hpp"
+#include "runtime/vmOperation.hpp"
+
+class outputStream;
+
+class HeapObjectStatisticsTask : public PeriodicTask {
+public:
+  HeapObjectStatisticsTask();
+  void task();
+};
+
+class HeapObjectStatistics : public CHeapObj<mtGC> {
+private:
+  static HeapObjectStatistics* _instance;
+
+  HeapObjectStatisticsTask _task;
+  uint64_t _num_samples;
+  uint64_t _num_objects;
+  uint64_t _num_ihashed;
+  uint64_t _num_ihashed_moved;
+  uint64_t _num_locked;
+  uint64_t _lds;
+
+  static void increase_counter(uint64_t& counter, uint64_t val = 1);
+
+  void print(outputStream* out) const;
+
+public:
+  static void initialize();
+  static void shutdown();
+
+  static HeapObjectStatistics* instance();
+
+  HeapObjectStatistics();
+  void start();
+  void stop();
+
+  void begin_sample();
+  void visit_object(oop object);
+};
+
+#endif // SHARE_SERVICES_HEAPOBJECTSTATISTICS_HPP
diff --git a/src/hotspot/share/services/threadIdTable.cpp b/src/hotspot/share/services/threadIdTable.cpp
index c108907854e..ffb29a81e70 100644
--- a/src/hotspot/share/services/threadIdTable.cpp
+++ b/src/hotspot/share/services/threadIdTable.cpp
@@ -192,13 +192,16 @@ class ThreadIdTableLookup : public StackObj {
   uintx get_hash() const {
     return _hash;
   }
-  bool equals(ThreadIdTableEntry** value, bool* is_dead) {
+  bool equals(ThreadIdTableEntry** value) {
     bool equals = primitive_equals(_tid, (*value)->tid());
     if (!equals) {
       return false;
     }
     return true;
   }
+  bool is_dead(ThreadIdTableEntry** value) {
+    return false;
+  }
 };
 
 class ThreadGet : public StackObj {
diff --git a/src/hotspot/share/services/threadService.cpp b/src/hotspot/share/services/threadService.cpp
index 7a143dbc73e..bdcacf04626 100644
--- a/src/hotspot/share/services/threadService.cpp
+++ b/src/hotspot/share/services/threadService.cpp
@@ -430,10 +430,8 @@ DeadlockCycle* ThreadService::find_deadlocks_at_safepoint(ThreadsList * t_list,
           currentThread = owner->as_Java_thread();
         }
       } else if (waitingToLockMonitor != NULL) {
-        address currentOwner = (address)waitingToLockMonitor->owner();
-        if (currentOwner != NULL) {
-          currentThread = Threads::owning_thread_from_monitor_owner(t_list,
-                                                                    currentOwner);
+        if (waitingToLockMonitor->has_owner()) {
+          currentThread = Threads::owning_thread_from_monitor(t_list, waitingToLockMonitor);
           if (currentThread == NULL) {
             // This function is called at a safepoint so the JavaThread
             // that owns waitingToLockMonitor should be findable, but
@@ -1009,8 +1007,7 @@ void DeadlockCycle::print_on_with(ThreadsList * t_list, outputStream* st) const
       if (!currentThread->current_pending_monitor_is_from_java()) {
         owner_desc = "\n  in JNI, which is held by";
       }
-      currentThread = Threads::owning_thread_from_monitor_owner(t_list,
-                                                                (address)waitingToLockMonitor->owner());
+      currentThread = Threads::owning_thread_from_monitor(t_list, waitingToLockMonitor);
       if (currentThread == NULL) {
         // The deadlock was detected at a safepoint so the JavaThread
         // that owns waitingToLockMonitor should be findable, but
diff --git a/src/hotspot/share/utilities/concurrentHashTable.hpp b/src/hotspot/share/utilities/concurrentHashTable.hpp
index a94d3ed9ac8..dc71085c245 100644
--- a/src/hotspot/share/utilities/concurrentHashTable.hpp
+++ b/src/hotspot/share/utilities/concurrentHashTable.hpp
@@ -205,11 +205,6 @@ class ConcurrentHashTable : public CHeapObj<F> {
   InternalTable* _table;      // Active table.
   InternalTable* _new_table;  // Table we are resizing to.
 
-  // Default sizes
-  static const size_t DEFAULT_MAX_SIZE_LOG2 = 21;
-  static const size_t DEFAULT_START_SIZE_LOG2 = 13;
-  static const size_t DEFAULT_GROW_HINT = 4; // Chain length
-
   const size_t _log2_size_limit;  // The biggest size.
   const size_t _log2_start_size;  // Start size.
   const size_t _grow_hint;        // Number of linked items
@@ -372,6 +367,11 @@ class ConcurrentHashTable : public CHeapObj<F> {
   void delete_in_bucket(Thread* thread, Bucket* bucket, LOOKUP_FUNC& lookup_f);
 
  public:
+  // Default sizes
+  static const size_t DEFAULT_MAX_SIZE_LOG2 = 21;
+  static const size_t DEFAULT_START_SIZE_LOG2 = 13;
+  static const size_t DEFAULT_GROW_HINT = 4; // Chain length
+
   ConcurrentHashTable(size_t log2size = DEFAULT_START_SIZE_LOG2,
                       size_t log2size_limit = DEFAULT_MAX_SIZE_LOG2,
                       size_t grow_hint = DEFAULT_GROW_HINT,
diff --git a/src/hotspot/share/utilities/concurrentHashTable.inline.hpp b/src/hotspot/share/utilities/concurrentHashTable.inline.hpp
index 660cdaa9d1b..8e8d3a7f268 100644
--- a/src/hotspot/share/utilities/concurrentHashTable.inline.hpp
+++ b/src/hotspot/share/utilities/concurrentHashTable.inline.hpp
@@ -451,9 +451,8 @@ inline bool ConcurrentHashTable<CONFIG, F>::
   assert(bucket->is_locked(), "Must be locked.");
   Node* const volatile * rem_n_prev = bucket->first_ptr();
   Node* rem_n = bucket->first();
-  bool have_dead = false;
   while (rem_n != NULL) {
-    if (lookup_f.equals(rem_n->value(), &have_dead)) {
+    if (lookup_f.equals(rem_n->value())) {
       bucket->release_assign_node_ptr(rem_n_prev, rem_n->next());
       break;
     } else {
@@ -540,9 +539,7 @@ inline void ConcurrentHashTable<CONFIG, F>::
   Node* const volatile * rem_n_prev = bucket->first_ptr();
   Node* rem_n = bucket->first();
   while (rem_n != NULL) {
-    bool is_dead = false;
-    lookup_f.equals(rem_n->value(), &is_dead);
-    if (is_dead) {
+    if (lookup_f.is_dead(rem_n->value())) {
       ndel[dels++] = rem_n;
       Node* next_node = rem_n->next();
       bucket->release_assign_node_ptr(rem_n_prev, next_node);
@@ -620,12 +617,11 @@ ConcurrentHashTable<CONFIG, F>::
   size_t loop_count = 0;
   Node* node = bucket->first();
   while (node != NULL) {
-    bool is_dead = false;
     ++loop_count;
-    if (lookup_f.equals(node->value(), &is_dead)) {
+    if (lookup_f.equals(node->value())) {
       break;
     }
-    if (is_dead && !(*have_dead)) {
+    if (!(*have_dead) && lookup_f.is_dead(node->value())) {
       *have_dead = true;
     }
     node = node->next();
diff --git a/src/hotspot/share/utilities/debug.hpp b/src/hotspot/share/utilities/debug.hpp
index 11b79f3cb6c..7e3f71a9dff 100644
--- a/src/hotspot/share/utilities/debug.hpp
+++ b/src/hotspot/share/utilities/debug.hpp
@@ -174,22 +174,7 @@ void report_untested(const char* file, int line, const char* message);
 
 void warning(const char* format, ...) ATTRIBUTE_PRINTF(1, 2);
 
-// Compile-time asserts.  Cond must be a compile-time constant expression that
-// is convertible to bool.  STATIC_ASSERT() can be used anywhere a declaration
-// may appear.
-//
-// Implementation Note: STATIC_ASSERT_FAILURE<true> provides a value member
-// rather than type member that could be used directly in the typedef, because
-// a type member would require conditional use of "typename", depending on
-// whether Cond is dependent or not.  The use of a value member leads to the
-// use of an array type.
-
-template<bool x> struct STATIC_ASSERT_FAILURE;
-template<> struct STATIC_ASSERT_FAILURE<true> { enum { value = 1 }; };
-
-#define STATIC_ASSERT(Cond) \
-  typedef char PASTE_TOKENS(STATIC_ASSERT_DUMMY_TYPE_, __LINE__)[ \
-    STATIC_ASSERT_FAILURE< (Cond) >::value ]
+#define STATIC_ASSERT(Cond) static_assert((Cond), #Cond)
 
 // out of memory reporting
 void report_java_out_of_memory(const char* message);
diff --git a/src/hotspot/share/utilities/fastHash.hpp b/src/hotspot/share/utilities/fastHash.hpp
new file mode 100644
index 00000000000..86b1dcf2b5e
--- /dev/null
+++ b/src/hotspot/share/utilities/fastHash.hpp
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_UTILITIES_FASTHASH_HPP
+#define SHARE_UTILITIES_FASTHASH_HPP
+
+#include "memory/allStatic.hpp"
+
+class FastHash : public AllStatic {
+private:
+  static void fullmul64(uint64_t& hi, uint64_t& lo, uint64_t op1, uint64_t op2) {
+#if defined(__SIZEOF_INT128__)
+    __uint128_t prod = static_cast<__uint128_t>(op1) * static_cast<__uint128_t>(op2);
+    hi = static_cast<uint64_t>(prod >> 64);
+    lo = static_cast<uint64_t>(prod >>  0);
+#else
+    /* First calculate all of the cross products. */
+    uint64_t lo_lo = (op1 & 0xFFFFFFFF) * (op2 & 0xFFFFFFFF);
+    uint64_t hi_lo = (op1 >> 32)        * (op2 & 0xFFFFFFFF);
+    uint64_t lo_hi = (op1 & 0xFFFFFFFF) * (op2 >> 32);
+    uint64_t hi_hi = (op1 >> 32)        * (op2 >> 32);
+
+    /* Now add the products together. These will never overflow. */
+    uint64_t cross = (lo_lo >> 32) + (hi_lo & 0xFFFFFFFF) + lo_hi;
+    uint64_t upper = (hi_lo >> 32) + (cross >> 32)        + hi_hi;
+    hi = upper;
+    lo = (cross << 32) | (lo_lo & 0xFFFFFFFF);
+#endif
+  }
+
+  static void fullmul32(uint32_t& hi, uint32_t& lo, uint32_t op1, uint32_t op2) {
+    uint64_t x64 = op1, y64 = op2, xy64 = x64 * y64;
+    hi = (uint32_t)(xy64 >> 32);
+    lo = (uint32_t)(xy64 >>  0);
+  }
+
+  static uint64_t ror(uint64_t x, uint64_t distance) {
+    distance = distance & 0x3F;
+    return (x >> distance) | (x << (64 - distance));
+  }
+
+public:
+  static uint64_t get_hash64(uint64_t x, uint64_t y) {
+    const uint64_t M  = 0x8ADAE89C337954D5;
+    const uint64_t A  = 0xAAAAAAAAAAAAAAAA; // REPAA
+    const uint64_t H0 = (x ^ y), L0 = (x ^ A);
+
+    uint64_t U0, V0; fullmul64(U0, V0, L0, M);
+    const uint64_t Q0 = (H0 * M);
+    const uint64_t L1 = (Q0 ^ U0);
+
+    uint64_t U1, V1; fullmul64(U1, V1, L1, M);
+    const uint64_t P1 = (V0 ^ M);
+    const uint64_t Q1 = ror(P1, L1);
+    const uint64_t L2 = (Q1 ^ U1);
+    return V1 ^ L2;
+  }
+
+  static uint32_t get_hash32(uint32_t x, uint32_t y) {
+    const uint32_t M  = 0x337954D5;
+    const uint32_t A  = 0xAAAAAAAA; // REPAA
+    const uint32_t H0 = (x ^ y), L0 = (x ^ A);
+
+    uint32_t U0, V0; fullmul32(U0, V0, L0, M);
+    const uint32_t Q0 = (H0 * M);
+    const uint32_t L1 = (Q0 ^ U0);
+
+    uint32_t U1, V1; fullmul32(U1, V1, L1, M);
+    const uint32_t P1 = (V0 ^ M);
+    const uint32_t Q1 = ror(P1, L1);
+    const uint32_t L2 = (Q1 ^ U1);
+    return V1 ^ L2;
+  }
+};
+
+#endif// SHARE_UTILITIES_FASTHASH_HPP
diff --git a/src/hotspot/share/utilities/globalDefinitions.hpp b/src/hotspot/share/utilities/globalDefinitions.hpp
index 9b70dc25570..09ed3f4b658 100644
--- a/src/hotspot/share/utilities/globalDefinitions.hpp
+++ b/src/hotspot/share/utilities/globalDefinitions.hpp
@@ -324,6 +324,9 @@ inline T byte_size_in_proper_unit(T s) {
   }
 }
 
+#define PROPERFMT             SIZE_FORMAT "%s"
+#define PROPERFMTARGS(s)      byte_size_in_proper_unit(s), proper_unit_for_byte_size(s)
+
 inline const char* exact_unit_for_byte_size(size_t s) {
 #ifdef _LP64
   if (s >= G && (s % G) == 0) {
@@ -957,6 +960,15 @@ enum JavaThreadState {
   _thread_max_state         = 12  // maximum thread state+1 - used for statistics allocation
 };
 
+enum LockingMode {
+  // Use only heavy monitors for locking
+  LM_MONITOR     = 0,
+  // Legacy stack-locking, with monitors as 2nd tier
+  LM_LEGACY      = 1,
+  // New lightweight locking, with monitors as 2nd tier
+  LM_LIGHTWEIGHT = 2
+};
+
 //----------------------------------------------------------------------------------------------------
 // Special constants for debugging
 
@@ -974,7 +986,8 @@ const juint    badHeapWordVal   = 0xBAADBABE;               // value used to zap
 const juint    badMetaWordVal   = 0xBAADFADE;               // value used to zap metadata heap after GC
 const int      badCodeHeapNewVal= 0xCC;                     // value used to zap Code heap at allocation
 const int      badCodeHeapFreeVal = 0xDD;                   // value used to zap Code heap at deallocation
-
+const intptr_t badDispHeaderDeopt = 0xDE0BD000;             // value to fill unused displaced header during deoptimization
+const intptr_t badDispHeaderOSR   = 0xDEAD05A0;             // value to fill unused displaced header during OSR
 
 // (These must be implemented as #defines because C++ compilers are
 // not obligated to inline non-integral constants!)
diff --git a/src/hotspot/share/jfr/leakprofiler/chains/bitset.hpp b/src/hotspot/share/utilities/objectBitSet.hpp
similarity index 73%
rename from src/hotspot/share/jfr/leakprofiler/chains/bitset.hpp
rename to src/hotspot/share/utilities/objectBitSet.hpp
index 7a21c07eee9..aa884683ea1 100644
--- a/src/hotspot/share/jfr/leakprofiler/chains/bitset.hpp
+++ b/src/hotspot/share/utilities/objectBitSet.hpp
@@ -22,8 +22,8 @@
  *
  */
 
-#ifndef SHARE_JFR_LEAKPROFILER_CHAINS_BITSET_HPP
-#define SHARE_JFR_LEAKPROFILER_CHAINS_BITSET_HPP
+#ifndef SHARE_UTILITIES_OBJECTBITSET_HPP
+#define SHARE_UTILITIES_OBJECTBITSET_HPP
 
 #include "memory/allocation.hpp"
 #include "oops/oop.hpp"
@@ -31,24 +31,30 @@
 #include "utilities/bitMap.hpp"
 #include "utilities/hashtable.hpp"
 
-class JfrVirtualMemory;
 class MemRegion;
 
-class BitSet : public CHeapObj<mtTracing> {
+/*
+ * ObjectBitSet is a sparse bitmap for marking objects in the Java heap.
+ * It holds one bit per ObjAlignmentInBytes-aligned address. Its underlying backing memory is
+ * allocated on-demand only, in fragments covering 64M heap ranges. Fragments are never deleted
+ * during the lifetime of the ObjectBitSet. The underlying memory is allocated from C-Heap.
+ */
+template<MEMFLAGS F>
+class ObjectBitSet : public CHeapObj<F> {
   const static size_t _bitmap_granularity_shift = 26; // 64M
   const static size_t _bitmap_granularity_size = (size_t)1 << _bitmap_granularity_shift;
   const static size_t _bitmap_granularity_mask = _bitmap_granularity_size - 1;
 
   class BitMapFragment;
 
-  class BitMapFragmentTable : public BasicHashtable<mtTracing> {
-    class Entry : public BasicHashtableEntry<mtTracing> {
+  class BitMapFragmentTable : public BasicHashtable<F> {
+    class Entry : public BasicHashtableEntry<F> {
     public:
       uintptr_t _key;
       CHeapBitMap* _value;
 
       Entry* next() {
-        return (Entry*)BasicHashtableEntry<mtTracing>::next();
+        return (Entry*)BasicHashtableEntry<F>::next();
       }
     };
 
@@ -63,11 +69,11 @@ class BitSet : public CHeapObj<mtTracing> {
     }
 
     unsigned hash_to_index(unsigned hash) {
-      return hash & (BasicHashtable<mtTracing>::table_size() - 1);
+      return hash & (BasicHashtable<F>::table_size() - 1);
     }
 
   public:
-    BitMapFragmentTable(int table_size) : BasicHashtable<mtTracing>(table_size, sizeof(Entry)) {}
+    BitMapFragmentTable(int table_size) : BasicHashtable<F>(table_size, sizeof(Entry)) {}
     ~BitMapFragmentTable();
     void add(uintptr_t key, CHeapBitMap* value);
     CHeapBitMap** lookup(uintptr_t key);
@@ -81,8 +87,8 @@ class BitSet : public CHeapObj<mtTracing> {
   uintptr_t _last_fragment_granule;
 
  public:
-  BitSet();
-  ~BitSet();
+  ObjectBitSet();
+  ~ObjectBitSet();
 
   BitMap::idx_t addr_to_bit(uintptr_t addr) const;
 
@@ -99,7 +105,8 @@ class BitSet : public CHeapObj<mtTracing> {
   }
 };
 
-class BitSet::BitMapFragment : public CHeapObj<mtTracing> {
+template<MEMFLAGS F>
+class ObjectBitSet<F>::BitMapFragment : public CHeapObj<F> {
   CHeapBitMap _bits;
   BitMapFragment* _next;
 
@@ -115,4 +122,4 @@ class BitSet::BitMapFragment : public CHeapObj<mtTracing> {
   }
 };
 
-#endif // SHARE_JFR_LEAKPROFILER_CHAINS_BITSET_HPP
+#endif // SHARE_UTILITIES_OBJECTBITSET_HPP
diff --git a/src/hotspot/share/jfr/leakprofiler/chains/bitset.inline.hpp b/src/hotspot/share/utilities/objectBitSet.inline.hpp
similarity index 52%
rename from src/hotspot/share/jfr/leakprofiler/chains/bitset.inline.hpp
rename to src/hotspot/share/utilities/objectBitSet.inline.hpp
index e2c92d71385..e03f243c722 100644
--- a/src/hotspot/share/jfr/leakprofiler/chains/bitset.inline.hpp
+++ b/src/hotspot/share/utilities/objectBitSet.inline.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2019, 2022, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -22,36 +22,75 @@
  *
  */
 
-#ifndef SHARE_JFR_LEAKPROFILER_CHAINS_BITSET_INLINE_HPP
-#define SHARE_JFR_LEAKPROFILER_CHAINS_BITSET_INLINE_HPP
+#ifndef SHARE_UTILITIES_OBJECTBITSET_INLINE_HPP
+#define SHARE_UTILITIES_OBJECTBITSET_INLINE_HPP
 
-#include "jfr/leakprofiler/chains/bitset.hpp"
+#include "utilities/objectBitSet.hpp"
 
-#include "jfr/recorder/storage/jfrVirtualMemory.hpp"
 #include "memory/memRegion.hpp"
 #include "utilities/bitMap.inline.hpp"
 #include "utilities/hashtable.inline.hpp"
 
-inline BitSet::BitMapFragmentTable::Entry* BitSet::BitMapFragmentTable::bucket(int i) const {
-  return (Entry*)BasicHashtable<mtTracing>::bucket(i);
+template<MEMFLAGS F>
+ObjectBitSet<F>::BitMapFragment::BitMapFragment(uintptr_t granule, BitMapFragment* next) :
+        _bits(_bitmap_granularity_size >> LogMinObjAlignmentInBytes, F, true /* clear */),
+        _next(next) {
 }
 
-inline BitSet::BitMapFragmentTable::Entry* BitSet::BitMapFragmentTable::new_entry(unsigned int hash,
-                                                                                  uintptr_t key,
-                                                                                  CHeapBitMap* value) {
-  Entry* entry = (Entry*)BasicHashtable<mtTracing>::new_entry(hash);
+template<MEMFLAGS F>
+ObjectBitSet<F>::ObjectBitSet() :
+        _bitmap_fragments(32),
+        _fragment_list(NULL),
+        _last_fragment_bits(NULL),
+        _last_fragment_granule(UINTPTR_MAX) {
+}
+
+template<MEMFLAGS F>
+ObjectBitSet<F>::~ObjectBitSet() {
+  BitMapFragment* current = _fragment_list;
+  while (current != NULL) {
+    BitMapFragment* next = current->next();
+    delete current;
+    current = next;
+  }
+}
+
+template<MEMFLAGS F>
+ObjectBitSet<F>::BitMapFragmentTable::~BitMapFragmentTable() {
+  for (int index = 0; index < BasicHashtable<F>::table_size(); index ++) {
+    Entry* e = bucket(index);
+    while (e != nullptr) {
+      Entry* tmp = e;
+      e = e->next();
+      BasicHashtable<F>::free_entry(tmp);
+    }
+  }
+}
+
+template<MEMFLAGS F>
+inline typename ObjectBitSet<F>::BitMapFragmentTable::Entry* ObjectBitSet<F>::BitMapFragmentTable::bucket(int i) const {
+  return (Entry*)BasicHashtable<F>::bucket(i);
+}
+
+template<MEMFLAGS F>
+inline typename ObjectBitSet<F>::BitMapFragmentTable::Entry*
+  ObjectBitSet<F>::BitMapFragmentTable::new_entry(unsigned int hash, uintptr_t key, CHeapBitMap* value) {
+
+  Entry* entry = (Entry*)BasicHashtable<F>::new_entry(hash);
   entry->_key = key;
   entry->_value = value;
   return entry;
 }
 
-inline void BitSet::BitMapFragmentTable::add(uintptr_t key, CHeapBitMap* value) {
+template<MEMFLAGS F>
+inline void ObjectBitSet<F>::BitMapFragmentTable::add(uintptr_t key, CHeapBitMap* value) {
   unsigned hash = hash_segment(key);
   Entry* entry = new_entry(hash, key, value);
-  BasicHashtable<mtTracing>::add_entry(hash_to_index(hash), entry);
+  BasicHashtable<F>::add_entry(hash_to_index(hash), entry);
 }
 
-inline CHeapBitMap** BitSet::BitMapFragmentTable::lookup(uintptr_t key) {
+template<MEMFLAGS F>
+inline CHeapBitMap** ObjectBitSet<F>::BitMapFragmentTable::lookup(uintptr_t key) {
   unsigned hash = hash_segment(key);
   int index = hash_to_index(hash);
   for (Entry* e = bucket(index); e != NULL; e = e->next()) {
@@ -62,11 +101,13 @@ inline CHeapBitMap** BitSet::BitMapFragmentTable::lookup(uintptr_t key) {
   return NULL;
 }
 
-inline BitMap::idx_t BitSet::addr_to_bit(uintptr_t addr) const {
+template<MEMFLAGS F>
+inline BitMap::idx_t ObjectBitSet<F>::addr_to_bit(uintptr_t addr) const {
   return (addr & _bitmap_granularity_mask) >> LogMinObjAlignmentInBytes;
 }
 
-inline CHeapBitMap* BitSet::get_fragment_bits(uintptr_t addr) {
+template<MEMFLAGS F>
+inline CHeapBitMap* ObjectBitSet<F>::get_fragment_bits(uintptr_t addr) {
   uintptr_t granule = addr >> _bitmap_granularity_shift;
   if (granule == _last_fragment_granule) {
     return _last_fragment_bits;
@@ -92,16 +133,18 @@ inline CHeapBitMap* BitSet::get_fragment_bits(uintptr_t addr) {
   return bits;
 }
 
-inline void BitSet::mark_obj(uintptr_t addr) {
+template<MEMFLAGS F>
+inline void ObjectBitSet<F>::mark_obj(uintptr_t addr) {
   CHeapBitMap* bits = get_fragment_bits(addr);
   const BitMap::idx_t bit = addr_to_bit(addr);
   bits->set_bit(bit);
 }
 
-inline bool BitSet::is_marked(uintptr_t addr) {
+template<MEMFLAGS F>
+inline bool ObjectBitSet<F>::is_marked(uintptr_t addr) {
   CHeapBitMap* bits = get_fragment_bits(addr);
   const BitMap::idx_t bit = addr_to_bit(addr);
   return bits->at(bit);
 }
 
-#endif // SHARE_JFR_LEAKPROFILER_CHAINS_BITSET_INLINE_HPP
+#endif // SHARE_UTILITIES_OBJECTBITSET_INLINE_HPP
diff --git a/src/hotspot/share/utilities/sizes.hpp b/src/hotspot/share/utilities/sizes.hpp
index 97e28cd6082..8578fbac2d9 100644
--- a/src/hotspot/share/utilities/sizes.hpp
+++ b/src/hotspot/share/utilities/sizes.hpp
@@ -52,6 +52,7 @@ constexpr int      in_bytes(ByteSize x)  { return static_cast<int>(x); }
 
 constexpr ByteSize operator + (ByteSize x, ByteSize y) { return in_ByteSize(in_bytes(x) + in_bytes(y)); }
 constexpr ByteSize operator - (ByteSize x, ByteSize y) { return in_ByteSize(in_bytes(x) - in_bytes(y)); }
+constexpr ByteSize operator - (ByteSize x, int      y) { return in_ByteSize(in_bytes(x) - y          ); }
 constexpr ByteSize operator * (ByteSize x, int      y) { return in_ByteSize(in_bytes(x) * y          ); }
 
 // Use the following #define to get C++ field member offsets
diff --git a/src/hotspot/share/utilities/vmError.cpp b/src/hotspot/share/utilities/vmError.cpp
index e9e82ac740e..69b5b4dae45 100644
--- a/src/hotspot/share/utilities/vmError.cpp
+++ b/src/hotspot/share/utilities/vmError.cpp
@@ -50,6 +50,7 @@
 #include "runtime/stackFrameStream.inline.hpp"
 #include "runtime/thread.inline.hpp"
 #include "runtime/threadSMR.hpp"
+#include "runtime/trimNativeHeap.hpp"
 #include "runtime/vmThread.hpp"
 #include "runtime/vmOperations.hpp"
 #include "runtime/vm_version.hpp"
@@ -820,7 +821,15 @@ void VMError::report(outputStream* st, bool _verbose) {
        st->cr();
      }
 
-  STEP("printing code blob if possible")
+  STEP("printing lock stack")
+
+    if (_verbose && _thread != nullptr && _thread->is_Java_thread() && LockingMode == LM_LIGHTWEIGHT) {
+      st->print_cr("Lock stack of current Java thread (top to bottom):");
+      _thread->as_Java_thread()->lock_stack().print_on(st);
+      st->cr();
+    }
+
+  STEP("printing code blobs if possible")
 
      if (_verbose && _context) {
        CodeBlob* cb = CodeCache::find_blob(_pc);
@@ -1052,6 +1061,14 @@ void VMError::report(outputStream* st, bool _verbose) {
   STEP("Native Memory Tracking")
      if (_verbose) {
        MemTracker::error_report(st);
+       st->cr();
+     }
+
+  STEP("printing periodic trim state")
+
+     if (_verbose) {
+       NativeHeapTrimmer::print_state(st);
+       st->cr();
      }
 
   STEP("printing system")
@@ -1237,10 +1254,14 @@ void VMError::print_vm_info(outputStream* st) {
   // STEP("Native Memory Tracking")
 
   MemTracker::error_report(st);
+  st->cr();
 
-  // STEP("printing system")
-
+  // STEP("printing periodic trim state")
+  NativeHeapTrimmer::print_state(st);
   st->cr();
+
+
+  // STEP("printing system")
   st->print_cr("---------------  S Y S T E M  ---------------");
   st->cr();
 
diff --git a/src/java.base/share/native/libjava/Thread.c b/src/java.base/share/native/libjava/Thread.c
index 3f176f814e4..e08c6a10358 100644
--- a/src/java.base/share/native/libjava/Thread.c
+++ b/src/java.base/share/native/libjava/Thread.c
@@ -47,6 +47,7 @@ static JNINativeMethod methods[] = {
     {"suspend0",         "()V",        (void *)&JVM_SuspendThread},
     {"resume0",          "()V",        (void *)&JVM_ResumeThread},
     {"setPriority0",     "(I)V",       (void *)&JVM_SetThreadPriority},
+    {"isAlive",          "()Z",        (void *)&JVM_IsThreadAlive},
     {"yield",            "()V",        (void *)&JVM_Yield},
     {"sleep",            "(J)V",       (void *)&JVM_Sleep},
     {"currentThread",    "()" THD,     (void *)&JVM_CurrentThread},
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/DebuggerBase.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/DebuggerBase.java
index ca5548ea187..7ee8c941010 100644
--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/DebuggerBase.java
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/DebuggerBase.java
@@ -24,6 +24,9 @@
 
 package sun.jvm.hotspot.debugger;
 
+import sun.jvm.hotspot.oops.Mark;
+import sun.jvm.hotspot.runtime.VM;
+
 /** <P> DebuggerBase is a recommended base class for debugger
     implementations. It can use a PageCache to cache data from the
     target process. Note that this class would not be suitable if the
@@ -474,7 +477,15 @@ protected long readCompOopAddressValue(long address)
 
   protected long readCompKlassAddressValue(long address)
     throws UnmappedAddressException, UnalignedAddressException {
-    long value = readCInteger(address, getKlassPtrSize(), true);
+    long value;
+    if (VM.getVM().isCompactObjectHeadersEnabled()) {
+      // On 64 bit systems, the compressed Klass* is currently read from the mark
+      // word. We need to load the whole mark, and shift the upper parts.
+      value = readCInteger(address, machDesc.getAddressSize(), true);
+      value = value >>> Mark.getKlassShift();
+    } else {
+      value = readCInteger(address, getKlassPtrSize(), true);
+    }
     if (value != 0) {
       value = (long)(narrowKlassBase + (long)(value << narrowKlassShift));
     }
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/memory/Universe.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/memory/Universe.java
index 428ae4f789a..a6237b68890 100644
--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/memory/Universe.java
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/memory/Universe.java
@@ -114,13 +114,6 @@ public void printOn(PrintStream tty) {
     heap().printOn(tty);
   }
 
-  // Check whether an element of a typeArrayOop with the given type must be
-  // aligned 0 mod 8.  The typeArrayOop itself must be aligned at least this
-  // strongly.
-  public static boolean elementTypeShouldBeAligned(BasicType type) {
-    return type == BasicType.T_DOUBLE || type == BasicType.T_LONG;
-  }
-
   // Check whether an object field (static/non-static) of the given type must be
   // aligned 0 mod 8.
   public static boolean fieldTypeShouldBeAligned(BasicType type) {
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/oops/Array.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/oops/Array.java
index 6ba23c9ea40..8046233125b 100644
--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/oops/Array.java
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/oops/Array.java
@@ -57,35 +57,55 @@ private static void initialize(TypeDataBase db) throws WrongTypeException {
   private static long lengthOffsetInBytes=0;
   private static long typeSize;
 
+  // Check whether an element of a typeArrayOop with the given type must be
+  // aligned 0 mod 8.  The typeArrayOop itself must be aligned at least this
+  // strongly.
+  public static boolean elementTypeShouldBeAligned(BasicType type) {
+    if (VM.getVM().isLP64()) {
+      if (type == BasicType.T_OBJECT || type == BasicType.T_ARRAY) {
+        return !VM.getVM().isCompressedOopsEnabled();
+      }
+    }
+    return type == BasicType.T_DOUBLE || type == BasicType.T_LONG;
+  }
+
   private static long headerSizeInBytes() {
     if (headerSize != 0) {
       return headerSize;
     }
-    if (VM.getVM().isCompressedKlassPointersEnabled()) {
-      headerSize = typeSize;
+    if (VM.getVM().isCompactObjectHeadersEnabled()) {
+      headerSize = lengthOffsetInBytes() + VM.getVM().getIntSize();
     } else {
-      headerSize = VM.getVM().alignUp(typeSize + VM.getVM().getIntSize(),
-                                      VM.getVM().getHeapWordSize());
+      if (VM.getVM().isCompressedKlassPointersEnabled()) {
+        headerSize = typeSize;
+      } else {
+        headerSize = VM.getVM().alignUp(typeSize + VM.getVM().getIntSize(),
+                                        VM.getVM().getHeapWordSize());
+      }
     }
     return headerSize;
   }
 
   private static long headerSize(BasicType type) {
-    if (Universe.elementTypeShouldBeAligned(type)) {
-       return alignObjectSize(headerSizeInBytes())/VM.getVM().getHeapWordSize();
-    } else {
-      return headerSizeInBytes()/VM.getVM().getHeapWordSize();
-    }
-  }
+     if (elementTypeShouldBeAligned(type)) {
+        return alignObjectSize(headerSizeInBytes())/VM.getVM().getHeapWordSize();
+     } else {
+       return headerSizeInBytes()/VM.getVM().getHeapWordSize();
+     }
+   }
 
-  private long lengthOffsetInBytes() {
+  private static long lengthOffsetInBytes() {
     if (lengthOffsetInBytes != 0) {
       return lengthOffsetInBytes;
     }
-    if (VM.getVM().isCompressedKlassPointersEnabled()) {
-      lengthOffsetInBytes = typeSize - VM.getVM().getIntSize();
+    if (VM.getVM().isCompactObjectHeadersEnabled()) {
+      lengthOffsetInBytes = Oop.getHeaderSize();
     } else {
-      lengthOffsetInBytes = typeSize;
+      if (VM.getVM().isCompressedKlassPointersEnabled()) {
+        lengthOffsetInBytes = typeSize - VM.getVM().getIntSize();
+      } else {
+        lengthOffsetInBytes = typeSize;
+      }
     }
     return lengthOffsetInBytes;
   }
@@ -108,7 +128,17 @@ public long getObjectSize() {
   }
 
   public static long baseOffsetInBytes(BasicType type) {
-    return headerSize(type) * VM.getVM().getHeapWordSize();
+    if (VM.getVM().isCompactObjectHeadersEnabled()) {
+      long typeSizeInBytes = headerSizeInBytes();
+      if (elementTypeShouldBeAligned(type)) {
+        VM vm = VM.getVM();
+        return vm.alignUp(typeSizeInBytes, vm.getVM().getHeapWordSize());
+      } else {
+        return typeSizeInBytes;
+      }
+    } else {
+      return headerSize(type) * VM.getVM().getHeapWordSize();
+    }
   }
 
   public boolean isArray()             { return true; }
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/oops/Instance.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/oops/Instance.java
index b837d869ea0..9e414db3743 100644
--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/oops/Instance.java
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/oops/Instance.java
@@ -55,6 +55,9 @@ private static synchronized void initialize(TypeDataBase db) throws WrongTypeExc
 
   // Returns header size in bytes.
   public static long getHeaderSize() {
+    if (VM.getVM().isCompactObjectHeadersEnabled()) {
+      return Oop.getHeaderSize();
+    }
     if (VM.getVM().isCompressedKlassPointersEnabled()) {
       return typeSize - VM.getVM().getIntSize();
     } else {
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/oops/Mark.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/oops/Mark.java
index bb3dc4fac90..3cf9b6a7e6f 100644
--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/oops/Mark.java
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/oops/Mark.java
@@ -52,10 +52,15 @@ private static synchronized void initialize(TypeDataBase db) throws WrongTypeExc
     biasedLockBits      = db.lookupLongConstant("markWord::biased_lock_bits").longValue();
     maxHashBits         = db.lookupLongConstant("markWord::max_hash_bits").longValue();
     hashBits            = db.lookupLongConstant("markWord::hash_bits").longValue();
+    hashBitsCompact     = db.lookupLongConstant("markWord::hash_bits_compact").longValue();
     lockShift           = db.lookupLongConstant("markWord::lock_shift").longValue();
     biasedLockShift     = db.lookupLongConstant("markWord::biased_lock_shift").longValue();
     ageShift            = db.lookupLongConstant("markWord::age_shift").longValue();
     hashShift           = db.lookupLongConstant("markWord::hash_shift").longValue();
+    hashShiftCompact    = db.lookupLongConstant("markWord::hash_shift_compact").longValue();
+    if (VM.getVM().isLP64()) {
+      klassShift          = db.lookupLongConstant("markWord::klass_shift").longValue();
+    }
     lockMask            = db.lookupLongConstant("markWord::lock_mask").longValue();
     lockMaskInPlace     = db.lookupLongConstant("markWord::lock_mask_in_place").longValue();
     biasedLockMask      = db.lookupLongConstant("markWord::biased_lock_mask").longValue();
@@ -65,6 +70,8 @@ private static synchronized void initialize(TypeDataBase db) throws WrongTypeExc
     ageMaskInPlace      = db.lookupLongConstant("markWord::age_mask_in_place").longValue();
     hashMask            = db.lookupLongConstant("markWord::hash_mask").longValue();
     hashMaskInPlace     = db.lookupLongConstant("markWord::hash_mask_in_place").longValue();
+    hashMaskCompact     = db.lookupLongConstant("markWord::hash_mask_compact").longValue();
+    hashMaskCompactInPlace = db.lookupLongConstant("markWord::hash_mask_compact_in_place").longValue();
     biasedLockAlignment  = db.lookupLongConstant("markWord::biased_lock_alignment").longValue();
     lockedValue         = db.lookupLongConstant("markWord::locked_value").longValue();
     unlockedValue       = db.lookupLongConstant("markWord::unlocked_value").longValue();
@@ -86,11 +93,14 @@ private static synchronized void initialize(TypeDataBase db) throws WrongTypeExc
   private static long biasedLockBits;
   private static long maxHashBits;
   private static long hashBits;
+  private static long hashBitsCompact;
 
   private static long lockShift;
   private static long biasedLockShift;
   private static long ageShift;
   private static long hashShift;
+  private static long hashShiftCompact;
+  private static long klassShift;
 
   private static long lockMask;
   private static long lockMaskInPlace;
@@ -101,6 +111,8 @@ private static synchronized void initialize(TypeDataBase db) throws WrongTypeExc
   private static long ageMaskInPlace;
   private static long hashMask;
   private static long hashMaskInPlace;
+  private static long hashMaskCompact;
+  private static long hashMaskCompactInPlace;
   private static long biasedLockAlignment;
 
   private static long lockedValue;
@@ -121,6 +133,10 @@ private static synchronized void initialize(TypeDataBase db) throws WrongTypeExc
   private static long cmsMask;
   private static long sizeShift;
 
+  public static long getKlassShift() {
+    return klassShift;
+  }
+
   public Mark(Address addr) {
     super(addr);
   }
@@ -197,6 +213,16 @@ public ObjectMonitor monitor() {
     if (Assert.ASSERTS_ENABLED) {
       Assert.that(hasMonitor(), "check");
     }
+    if (VM.getVM().getCommandLineFlag("UseObjectMonitorTable").getBool()) {
+      Iterator it = ObjectSynchronizer.objectMonitorIterator();
+      while (it != null && it.hasNext()) {
+        ObjectMonitor mon = (ObjectMonitor)it.next();
+        if (getAddress().equals(mon.object())) {
+          return mon;
+        }
+      }
+      return null;
+    }
     // Use xor instead of &~ to provide one extra tag-bit check.
     Address monAddr = valueAsAddress().xorWithMask(monitorValue);
     return new ObjectMonitor(monAddr);
@@ -215,13 +241,23 @@ public Mark displacedMarkHelper() {
 
   // hash operations
   public long hash() {
-    return Bits.maskBitsLong(value() >> hashShift, hashMask);
+    if (VM.getVM().isCompactObjectHeadersEnabled()) {
+      return Bits.maskBitsLong(value() >> hashShiftCompact, hashMaskCompact);
+    } else {
+      return Bits.maskBitsLong(value() >> hashShift, hashMask);
+    }
   }
 
   public boolean hasNoHash() {
     return hash() == noHash;
   }
 
+  public Klass getKlass() {
+    assert(VM.getVM().isCompactObjectHeadersEnabled());
+    assert(!hasMonitor());
+    return (Klass)Metadata.instantiateWrapperFor(addr.getCompKlassAddressAt(0));
+  }
+
   // Debugging
   public void printOn(PrintStream tty) {
     if (isLocked()) {
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/oops/Oop.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/oops/Oop.java
index 2d5162ea474..6ef9e8e053a 100644
--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/oops/Oop.java
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/oops/Oop.java
@@ -48,9 +48,14 @@ public void update(Observable o, Object data) {
   private static synchronized void initialize(TypeDataBase db) throws WrongTypeException {
     Type type  = db.lookupType("oopDesc");
     mark       = new CIntField(type.getCIntegerField("_mark"), 0);
-    klass      = new MetadataField(type.getAddressField("_metadata._klass"), 0);
-    compressedKlass  = new NarrowKlassField(type.getAddressField("_metadata._compressed_klass"), 0);
-    headerSize = type.getSize();
+    if (VM.getVM().isCompactObjectHeadersEnabled()) {
+      Type markType = db.lookupType("markWord");
+      headerSize = markType.getSize();
+    } else {
+      headerSize = type.getSize();
+      klass      = new MetadataField(type.getAddressField("_metadata._klass"), 0);
+      compressedKlass  = new NarrowKlassField(type.getAddressField("_metadata._compressed_klass"), 0);
+    }
   }
 
   private OopHandle  handle;
@@ -77,8 +82,17 @@ private static synchronized void initialize(TypeDataBase db) throws WrongTypeExc
 
   // Accessors for declared fields
   public Mark  getMark()   { return new Mark(getHandle()); }
+
+  private static Klass getKlass(Mark mark) {
+    assert(VM.getVM().isCompactObjectHeadersEnabled());
+    return mark.getKlass();
+  }
+
   public Klass getKlass() {
-    if (VM.getVM().isCompressedKlassPointersEnabled()) {
+    if (VM.getVM().isCompactObjectHeadersEnabled()) {
+      assert(VM.getVM().isCompressedKlassPointersEnabled());
+      return getKlass(getMark());
+    } else if (VM.getVM().isCompressedKlassPointersEnabled()) {
       return (Klass)compressedKlass.getValue(getHandle());
     } else {
       return (Klass)klass.getValue(getHandle());
@@ -149,10 +163,12 @@ public void iterate(OopVisitor visitor, boolean doVMFields) {
   void iterateFields(OopVisitor visitor, boolean doVMFields) {
     if (doVMFields) {
       visitor.doCInt(mark, true);
-      if (VM.getVM().isCompressedKlassPointersEnabled()) {
-        visitor.doMetadata(compressedKlass, true);
-      } else {
-        visitor.doMetadata(klass, true);
+      if (!VM.getVM().isCompactObjectHeadersEnabled()) {
+        if (VM.getVM().isCompressedKlassPointersEnabled()) {
+          visitor.doMetadata(compressedKlass, true);
+        } else {
+          visitor.doMetadata(klass, true);
+        }
       }
     }
   }
@@ -208,7 +224,10 @@ public static Klass getKlassForOopHandle(OopHandle handle) {
     if (handle == null) {
       return null;
     }
-    if (VM.getVM().isCompressedKlassPointersEnabled()) {
+    if (VM.getVM().isCompactObjectHeadersEnabled()) {
+      Mark mark = new Mark(handle);
+      return getKlass(mark);
+    } else if (VM.getVM().isCompressedKlassPointersEnabled()) {
       return (Klass)Metadata.instantiateWrapperFor(handle.getCompKlassAddressAt(compressedKlass.getOffset()));
     } else {
       return (Klass)Metadata.instantiateWrapperFor(handle.getAddressAt(klass.getOffset()));
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/BasicLock.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/BasicLock.java
index 55e5d0e4598..4028bae3f5b 100644
--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/BasicLock.java
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/BasicLock.java
@@ -43,7 +43,7 @@ public void update(Observable o, Object data) {
 
   private static synchronized void initialize(TypeDataBase db) throws WrongTypeException {
     Type type  = db.lookupType("BasicLock");
-    displacedHeaderField = type.getCIntegerField("_displaced_header");
+    displacedHeaderField = type.getCIntegerField("_metadata");
   }
 
   private static CIntegerField displacedHeaderField;
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/JavaThread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/JavaThread.java
index c830ac8d384..b1b23f5e43e 100644
--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/JavaThread.java
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/JavaThread.java
@@ -44,6 +44,8 @@ public class JavaThread extends Thread {
   private static final boolean DEBUG = System.getProperty("sun.jvm.hotspot.runtime.JavaThread.DEBUG") != null;
 
   private static long          threadObjFieldOffset;
+  private static long          lockStackTopOffset;
+  private static long          lockStackBaseOffset;
   private static AddressField  anchorField;
   private static AddressField  lastJavaSPField;
   private static AddressField  lastJavaPCField;
@@ -52,6 +54,7 @@ public class JavaThread extends Thread {
   private static AddressField  stackBaseField;
   private static CIntegerField stackSizeField;
   private static CIntegerField terminatedField;
+  private static long oopPtrSize;
 
   private static JavaThreadPDAccess access;
 
@@ -84,6 +87,7 @@ public void update(Observable o, Object data) {
   private static synchronized void initialize(TypeDataBase db) {
     Type type = db.lookupType("JavaThread");
     Type anchorType = db.lookupType("JavaFrameAnchor");
+    Type typeLockStack = db.lookupType("LockStack");
 
     threadObjFieldOffset = type.getField("_threadObj").getOffset();
 
@@ -96,6 +100,10 @@ private static synchronized void initialize(TypeDataBase db) {
     stackSizeField    = type.getCIntegerField("_stack_size");
     terminatedField   = type.getCIntegerField("_terminated");
 
+    lockStackTopOffset = type.getField("_lock_stack").getOffset() + typeLockStack.getField("_top").getOffset();
+    lockStackBaseOffset = type.getField("_lock_stack").getOffset() + typeLockStack.getField("_base[0]").getOffset();
+    oopPtrSize = VM.getVM().getAddressSize();
+
     UNINITIALIZED     = db.lookupIntConstant("_thread_uninitialized").intValue();
     NEW               = db.lookupIntConstant("_thread_new").intValue();
     NEW_TRANS         = db.lookupIntConstant("_thread_new_trans").intValue();
@@ -392,6 +400,23 @@ public boolean isInStack(Address a) {
     return stackBase.greaterThan(a) && sp.lessThanOrEqual(a);
   }
 
+  public boolean isLockOwned(OopHandle obj) {
+    long current = lockStackBaseOffset;
+    long end = addr.getJIntAt(lockStackTopOffset);
+    if (Assert.ASSERTS_ENABLED) {
+      Assert.that(current <= end, "current stack offset must be above base offset");
+    }
+
+    while (current < end) {
+      Address oop = addr.getAddressAt(current);
+      if (oop.equals(obj)) {
+        return true;
+      }
+      current += oopPtrSize;
+    }
+    return false;
+  }
+
   public boolean isLockOwned(Address a) {
     Address stackBase = getStackBase();
     Address stackLimit = stackBase.addOffsetTo(-getStackSize());
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/JavaVFrame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/JavaVFrame.java
index 38b604d2de8..6f56a22e511 100644
--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/JavaVFrame.java
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/JavaVFrame.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -77,6 +77,10 @@ private String identifyLockState(MonitorInfo monitor, String waitingState) {
     if (mark.hasMonitor() &&
         ( // we have marked ourself as pending on this monitor
           mark.monitor().equals(thread.getCurrentPendingMonitor()) ||
+          // Owned anonymously means that we are not the owner of
+          // the monitor and must be waiting for the owner to
+          // exit it.
+          mark.monitor().isOwnedAnonymous() ||
           // we are not the owner of this monitor
           !mark.monitor().isEntered(thread)
         )) {
diff --git a/src/hotspot/share/jfr/leakprofiler/chains/bitset.cpp b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/LockingMode.java
similarity index 52%
rename from src/hotspot/share/jfr/leakprofiler/chains/bitset.cpp
rename to src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/LockingMode.java
index 9785116f0e9..2046fd075ad 100644
--- a/src/hotspot/share/jfr/leakprofiler/chains/bitset.cpp
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/LockingMode.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, 2022, Oracle and/or its affiliates. All rights reserved.
+ * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -21,37 +21,40 @@
  * questions.
  *
  */
-#include "precompiled.hpp"
-#include "jfr/leakprofiler/chains/bitset.inline.hpp"
 
-BitSet::BitMapFragment::BitMapFragment(uintptr_t granule, BitMapFragment* next) :
-    _bits(_bitmap_granularity_size >> LogMinObjAlignmentInBytes, mtTracing, true /* clear */),
-    _next(next) {
-}
+package sun.jvm.hotspot.runtime;
+
+import sun.jvm.hotspot.types.TypeDataBase;
+
 
-BitSet::BitMapFragmentTable::~BitMapFragmentTable() {
-  for (int index = 0; index < table_size(); index ++) {
-    Entry* e = bucket(index);
-    while (e != nullptr) {
-      Entry* tmp = e;
-      e = e->next();
-      free_entry(tmp);
-    }
+/** Encapsulates the LockingMode enum in globalDefinitions.hpp in
+    the VM. */
+
+public class LockingMode {
+  private static int monitor;
+  private static int legacy;
+  private static int lightweight;
+
+  static {
+    VM.registerVMInitializedObserver(
+        (o, d) -> initialize(VM.getVM().getTypeDataBase()));
   }
-}
 
-BitSet::BitSet() :
-    _bitmap_fragments(32),
-    _fragment_list(NULL),
-    _last_fragment_bits(NULL),
-    _last_fragment_granule(UINTPTR_MAX) {
-}
+  private static synchronized void initialize(TypeDataBase db) {
+    monitor     = db.lookupIntConstant("LM_MONITOR").intValue();
+    legacy      = db.lookupIntConstant("LM_LEGACY").intValue();
+    lightweight = db.lookupIntConstant("LM_LIGHTWEIGHT").intValue();
+  }
+
+  public static int getMonitor() {
+    return monitor;
+  }
+
+  public static int getLegacy() {
+    return legacy;
+  }
 
-BitSet::~BitSet() {
-  BitMapFragment* current = _fragment_list;
-  while (current != NULL) {
-    BitMapFragment* next = current->next();
-    delete current;
-    current = next;
+  public static int getLightweight() {
+    return lightweight;
   }
 }
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/ObjectMonitor.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/ObjectMonitor.java
index 43ddf257d04..4300bf361dc 100644
--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/ObjectMonitor.java
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/ObjectMonitor.java
@@ -44,8 +44,8 @@ public void update(Observable o, Object data) {
   private static synchronized void initialize(TypeDataBase db) throws WrongTypeException {
     heap = VM.getVM().getObjectHeap();
     Type type  = db.lookupType("ObjectMonitor");
-    sun.jvm.hotspot.types.Field f = type.getField("_header");
-    headerFieldOffset = f.getOffset();
+    sun.jvm.hotspot.types.Field f = type.getField("_metadata");
+    metadataFieldOffset = f.getOffset();
     f = type.getField("_object");
     objectFieldOffset = f.getOffset();
     f = type.getField("_owner");
@@ -55,6 +55,8 @@ private static synchronized void initialize(TypeDataBase db) throws WrongTypeExc
     contentionsField  = type.getJIntField("_contentions");
     waitersField = type.getJIntField("_waiters");
     recursionsField = type.getCIntegerField("_recursions");
+
+    ANONYMOUS_OWNER = db.lookupLongConstant("ObjectMonitor::ANONYMOUS_OWNER").longValue();
   }
 
   public ObjectMonitor(Address addr) {
@@ -62,7 +64,7 @@ public ObjectMonitor(Address addr) {
   }
 
   public Mark header() {
-    return new Mark(addr.addOffsetTo(headerFieldOffset));
+    return new Mark(addr.addOffsetTo(metadataFieldOffset));
   }
 
   // FIXME
@@ -79,6 +81,10 @@ public boolean isEntered(sun.jvm.hotspot.runtime.Thread current) {
     return false;
   }
 
+  public boolean isOwnedAnonymous() {
+    return addr.getAddressAt(ownerFieldOffset).asLongValue() == ANONYMOUS_OWNER;
+  }
+
   public Address owner() { return addr.getAddressAt(ownerFieldOffset); }
   // FIXME
   //  void      set_owner(void* owner);
@@ -107,12 +113,14 @@ public int contentions() {
   // vmStructs.cpp because they aren't strongly typed in the VM, or
   // would confuse the SA's type system.
   private static ObjectHeap    heap;
-  private static long          headerFieldOffset;
+  private static long          metadataFieldOffset;
   private static long          objectFieldOffset;
   private static long          ownerFieldOffset;
   private static long          nextOMFieldOffset;
   private static JIntField     contentionsField;
   private static JIntField     waitersField;
   private static CIntegerField recursionsField;
+  private static long          ANONYMOUS_OWNER;
+
   // FIXME: expose platform-dependent stuff
 }
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/ObjectSynchronizer.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/ObjectSynchronizer.java
index d0fcb48c013..3bbe06709dc 100644
--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/ObjectSynchronizer.java
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/ObjectSynchronizer.java
@@ -43,8 +43,10 @@ public void update(Observable o, Object data) {
   }
 
   private static synchronized void initialize(TypeDataBase db) throws WrongTypeException {
-    Type type = db.lookupType("ObjectSynchronizer");
-    inUseList = type.getAddressField("_in_use_list").getValue();
+    Type objectSynchronizerType = db.lookupType("ObjectSynchronizer");
+    Type monitorListType = db.lookupType("MonitorList");
+    Address monitorListAddr = objectSynchronizerType.getField("_in_use_list").getStaticFieldAddress();
+    inUseListHead = monitorListType.getAddressField("_head").getAddress(monitorListAddr);
   }
 
   public long identityHashValueFor(Oop obj) {
@@ -53,6 +55,9 @@ public long identityHashValueFor(Oop obj) {
       // FIXME: can not generate marks in debugging system
       return mark.hash();
     } else if (mark.hasMonitor()) {
+      if (VM.getVM().getCommandLineFlag("UseObjectMonitorTable").getBool()) {
+        return mark.hash();
+      }
       ObjectMonitor monitor = mark.monitor();
       Mark temp = monitor.header();
       return temp.hash();
@@ -70,11 +75,7 @@ public long identityHashValueFor(Oop obj) {
   }
 
   public static Iterator objectMonitorIterator() {
-    if (inUseList != null) {
-      return new ObjectMonitorIterator();
-    } else {
-      return null;
-    }
+    return new ObjectMonitorIterator();
   }
 
   private static class ObjectMonitorIterator implements Iterator {
@@ -83,21 +84,23 @@ private static class ObjectMonitorIterator implements Iterator {
     // are not returned by this Iterator.
 
     ObjectMonitorIterator() {
-      mon = new ObjectMonitor(inUseList);
+      mon = inUseListHead == null ? null : new ObjectMonitor(inUseListHead);
     }
 
     public boolean hasNext() {
-      return (mon.nextOM() != null);
+      return (mon != null);
     }
 
     public Object next() {
-      // advance to next entry
-      Address monAddr = mon.nextOM();
-      if (monAddr == null) {
+      ObjectMonitor ret = mon;
+      if (ret == null) {
         throw new NoSuchElementException();
       }
-      mon = new ObjectMonitor(monAddr);
-      return mon;
+      // advance to next entry
+      Address nextMon = mon.nextOM();
+      mon = nextMon == null ? null : new ObjectMonitor(nextMon);
+
+      return ret;
     }
 
     public void remove() {
@@ -107,6 +110,5 @@ public void remove() {
     private ObjectMonitor mon;
   }
 
-  private static Address inUseList;
-
+  private static Address inUseListHead;
 }
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java
index 4a2fa691d3f..7b3b76d97ca 100644
--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java
@@ -208,6 +208,7 @@ public void oopsDo(AddressVisitor oopVisitor) {
 
     // refer to Threads::owning_thread_from_monitor_owner
     public JavaThread owningThreadFromMonitor(Address o) {
+        assert(VM.getVM().getCommandLineFlag("LockingMode").getInt() != LockingMode.getLightweight());
         if (o == null) return null;
         for (int i = 0; i < getNumberOfThreads(); i++) {
             JavaThread thread = getJavaThreadAt(i);
@@ -225,7 +226,28 @@ public JavaThread owningThreadFromMonitor(Address o) {
     }
 
     public JavaThread owningThreadFromMonitor(ObjectMonitor monitor) {
-        return owningThreadFromMonitor(monitor.owner());
+        if (VM.getVM().getCommandLineFlag("LockingMode").getInt() == LockingMode.getLightweight()) {
+            if (monitor.isOwnedAnonymous()) {
+                OopHandle object = monitor.object();
+                for (int i = 0; i < getNumberOfThreads(); i++) {
+                    JavaThread thread = getJavaThreadAt(i);
+                    if (thread.isLockOwned(object)) {
+                        return thread;
+                     }
+                }
+                // We should have found the owner, however, as the VM could be in any state, including the middle
+                // of performing GC, it is not always possible to do so. Just return null if we can't locate it.
+                System.out.println("Warning: We failed to find a thread that owns an anonymous lock. This is likely");
+                System.out.println("due to the JVM currently running a GC. Locking information may not be accurate.");
+                return null;
+            }
+            // Owner can only be threads at this point.
+            Address o = monitor.owner();
+            if (o == null) return null;
+            return new JavaThread(o);
+        } else {
+            return owningThreadFromMonitor(monitor.owner());
+        }
     }
 
     // refer to Threads::get_pending_threads
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/VM.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/VM.java
index f6b812d6271..e2ff196c5c9 100644
--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/VM.java
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/VM.java
@@ -147,6 +147,7 @@ public class VM {
   private Boolean sharingEnabled;
   private Boolean compressedOopsEnabled;
   private Boolean compressedKlassPointersEnabled;
+  private Boolean compactObjectHeadersEnabled;
 
   // command line flags supplied to VM - see struct JVMFlag in jvmFlag.hpp
   public static final class Flag {
@@ -962,6 +963,15 @@ public boolean isCompressedKlassPointersEnabled() {
     return compressedKlassPointersEnabled.booleanValue();
   }
 
+  public boolean isCompactObjectHeadersEnabled() {
+    if (compactObjectHeadersEnabled == null) {
+      Flag flag = getCommandLineFlag("UseCompactObjectHeaders");
+      compactObjectHeadersEnabled = (flag == null) ? Boolean.FALSE:
+                     (flag.getBool()? Boolean.TRUE: Boolean.FALSE);
+    }
+    return compactObjectHeadersEnabled.booleanValue();
+  }
+
   public int getObjectAlignmentInBytes() {
     if (objectAlignmentInBytes == 0) {
         Flag flag = getCommandLineFlag("ObjectAlignmentInBytes");
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/ui/MonitorCacheDumpPanel.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/ui/MonitorCacheDumpPanel.java
index 9f6ec170d86..3bd0a5f74e8 100644
--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/ui/MonitorCacheDumpPanel.java
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/ui/MonitorCacheDumpPanel.java
@@ -90,11 +90,6 @@ private static void dumpMonitor(PrintStream tty, ObjectMonitor mon, boolean raw)
 
   private void dumpOn(PrintStream tty) {
     Iterator i = ObjectSynchronizer.objectMonitorIterator();
-    if (i == null) {
-      tty.println("This version of HotSpot VM doesn't support monitor cache dump.");
-      tty.println("You need 1.4.0_04, 1.4.1_01 or later versions");
-      return;
-    }
     ObjectMonitor mon;
     while (i.hasNext()) {
       mon = (ObjectMonitor)i.next();
diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/RobustOopDeterminator.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/RobustOopDeterminator.java
index 1b7d4c74c27..1b6aaee0767 100644
--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/RobustOopDeterminator.java
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/RobustOopDeterminator.java
@@ -29,6 +29,7 @@
 import sun.jvm.hotspot.memory.*;
 import sun.jvm.hotspot.oops.Metadata;
 import sun.jvm.hotspot.oops.Klass;
+import sun.jvm.hotspot.oops.Oop;
 import sun.jvm.hotspot.runtime.*;
 import sun.jvm.hotspot.types.*;
 import sun.jvm.hotspot.utilities.Observable;
@@ -42,25 +43,6 @@
     states than the ObjectHeap code. */
 
 public class RobustOopDeterminator {
-  private static AddressField klassField;
-
-  static {
-    VM.registerVMInitializedObserver(new Observer() {
-        public void update(Observable o, Object data) {
-          initialize(VM.getVM().getTypeDataBase());
-        }
-      });
-  }
-
-  private static void initialize(TypeDataBase db) {
-    Type type = db.lookupType("oopDesc");
-
-    if (VM.getVM().isCompressedKlassPointersEnabled()) {
-      klassField = type.getAddressField("_metadata._compressed_klass");
-    } else {
-      klassField = type.getAddressField("_metadata._klass");
-    }
-  }
 
   public static boolean oopLooksValid(OopHandle oop) {
     if (oop == null) {
@@ -71,11 +53,7 @@ public static boolean oopLooksValid(OopHandle oop) {
     }
     try {
       // Try to instantiate the Klass
-      if (VM.getVM().isCompressedKlassPointersEnabled()) {
-        Metadata.instantiateWrapperFor(oop.getCompKlassAddressAt(klassField.getOffset()));
-      } else {
-        Metadata.instantiateWrapperFor(klassField.getValue(oop));
-      }
+      Oop.getKlassForOopHandle(oop);
       return true;
     } catch (AddressException e) {
       return false;
diff --git a/test/hotspot/gtest/classfile/test_symbolTable.cpp b/test/hotspot/gtest/classfile/test_symbolTable.cpp
index 0f598447441..85be30ff479 100644
--- a/test/hotspot/gtest/classfile/test_symbolTable.cpp
+++ b/test/hotspot/gtest/classfile/test_symbolTable.cpp
@@ -152,3 +152,17 @@ TEST_VM_FATAL_ERROR_MSG(SymbolTable, test_symbol_underflow, ".*refcount has gone
   my_symbol->decrement_refcount();
   my_symbol->increment_refcount();  // Should crash even in PRODUCT mode
 }
+
+TEST_VM(SymbolTable, test_cleanup_leak) {
+  // Check that dead entry cleanup doesn't increment refcount of live entry in same bucket.
+
+  // Create symbol and release ref, marking it available for cleanup.
+  Symbol* entry1 = SymbolTable::new_symbol("hash_collision_123");
+  entry1->decrement_refcount();
+
+  // Create a new symbol in the same bucket, which will notice the dead entry and trigger cleanup.
+  // Note: relies on SymbolTable's use of String::hashCode which collides for these two values.
+  Symbol* entry2 = SymbolTable::new_symbol("hash_collision_397476851");
+
+  ASSERT_EQ(entry2->refcount(), 1) << "Symbol refcount just created is 1";
+}
diff --git a/test/hotspot/gtest/gc/shared/test_preservedMarks.cpp b/test/hotspot/gtest/gc/shared/test_preservedMarks.cpp
index c5fd1680f5a..ca9e9d9f1c7 100644
--- a/test/hotspot/gtest/gc/shared/test_preservedMarks.cpp
+++ b/test/hotspot/gtest/gc/shared/test_preservedMarks.cpp
@@ -22,6 +22,7 @@
  */
 
 #include "precompiled.hpp"
+#include "gc/shared/gc_globals.hpp"
 #include "gc/shared/preservedMarks.inline.hpp"
 #include "oops/oop.inline.hpp"
 #include "unittest.hpp"
@@ -66,6 +67,8 @@ TEST_VM(PreservedMarks, iterate_and_restore) {
   FakeOop o3;
   FakeOop o4;
 
+  FlagSetting fs(UseAltGCForwarding, false);
+
   // Make sure initial marks are correct.
   ASSERT_MARK_WORD_EQ(o1.mark(), FakeOop::originalMark());
   ASSERT_MARK_WORD_EQ(o2.mark(), FakeOop::originalMark());
diff --git a/test/hotspot/gtest/gc/shared/test_slidingForwarding.cpp b/test/hotspot/gtest/gc/shared/test_slidingForwarding.cpp
new file mode 100644
index 00000000000..418b27bf499
--- /dev/null
+++ b/test/hotspot/gtest/gc/shared/test_slidingForwarding.cpp
@@ -0,0 +1,124 @@
+/*
+ * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+#include "precompiled.hpp"
+#include "gc/shared/gc_globals.hpp"
+#include "gc/shared/slidingForwarding.inline.hpp"
+#include "oops/markWord.hpp"
+#include "oops/oop.inline.hpp"
+#include "utilities/align.hpp"
+#include "unittest.hpp"
+
+#ifdef _LP64
+#ifndef PRODUCT
+
+static uintptr_t make_mark(uintptr_t target_region, uintptr_t offset) {
+  return (target_region) << 3 | (offset << 4) | 3 /* forwarded */;
+}
+
+static uintptr_t make_fallback() {
+  return ((uintptr_t(1) << 2) /* fallback */ | 3 /* forwarded */);
+}
+
+// Test simple forwarding within the same region.
+TEST_VM(SlidingForwarding, simple) {
+  FlagSetting fs(UseAltGCForwarding, true);
+  HeapWord fakeheap[32] = { nullptr };
+  HeapWord* heap = align_up(fakeheap, 8 * sizeof(HeapWord));
+  oop obj1 = cast_to_oop(&heap[2]);
+  oop obj2 = cast_to_oop(&heap[0]);
+  SlidingForwarding::initialize(MemRegion(&heap[0], &heap[16]), 8);
+  obj1->set_mark(markWord::prototype());
+  SlidingForwarding::begin();
+
+  SlidingForwarding::forward_to<true>(obj1, obj2);
+  ASSERT_EQ(obj1->mark().value(), make_mark(0 /* target_region */, 0 /* offset */));
+  ASSERT_EQ(SlidingForwarding::forwardee<true>(obj1), obj2);
+
+  SlidingForwarding::end();
+}
+
+// Test forwardings crossing 2 regions.
+TEST_VM(SlidingForwarding, tworegions) {
+  FlagSetting fs(UseAltGCForwarding, true);
+  HeapWord fakeheap[32] = { nullptr };
+  HeapWord* heap = align_up(fakeheap, 8 * sizeof(HeapWord));
+  oop obj1 = cast_to_oop(&heap[14]);
+  oop obj2 = cast_to_oop(&heap[2]);
+  oop obj3 = cast_to_oop(&heap[10]);
+  SlidingForwarding::initialize(MemRegion(&heap[0], &heap[16]), 8);
+  obj1->set_mark(markWord::prototype());
+  SlidingForwarding::begin();
+
+  SlidingForwarding::forward_to<true>(obj1, obj2);
+  ASSERT_EQ(obj1->mark().value(), make_mark(0 /* target_region */, 2 /* offset */));
+  ASSERT_EQ(SlidingForwarding::forwardee<true>(obj1), obj2);
+
+  SlidingForwarding::forward_to<true>(obj1, obj3);
+  ASSERT_EQ(obj1->mark().value(), make_mark(1 /* target_region */, 2 /* offset */));
+  ASSERT_EQ(SlidingForwarding::forwardee<true>(obj1), obj3);
+
+  SlidingForwarding::end();
+}
+
+// Test fallback forwardings crossing 4 regions.
+TEST_VM(SlidingForwarding, fallback) {
+  FlagSetting fs(UseAltGCForwarding, true);
+  HeapWord fakeheap[32] = { nullptr };
+  HeapWord* heap = align_up(fakeheap, 8 * sizeof(HeapWord));
+  oop s_obj1 = cast_to_oop(&heap[12]);
+  oop s_obj2 = cast_to_oop(&heap[13]);
+  oop s_obj3 = cast_to_oop(&heap[14]);
+  oop s_obj4 = cast_to_oop(&heap[15]);
+  oop t_obj1 = cast_to_oop(&heap[2]);
+  oop t_obj2 = cast_to_oop(&heap[4]);
+  oop t_obj3 = cast_to_oop(&heap[10]);
+  oop t_obj4 = cast_to_oop(&heap[12]);
+  SlidingForwarding::initialize(MemRegion(&heap[0], &heap[16]), 4);
+  s_obj1->set_mark(markWord::prototype());
+  s_obj2->set_mark(markWord::prototype());
+  s_obj3->set_mark(markWord::prototype());
+  s_obj4->set_mark(markWord::prototype());
+  SlidingForwarding::begin();
+
+  SlidingForwarding::forward_to<true>(s_obj1, t_obj1);
+  ASSERT_EQ(s_obj1->mark().value(), make_mark(0 /* target_region */, 2 /* offset */));
+  ASSERT_EQ(SlidingForwarding::forwardee<true>(s_obj1), t_obj1);
+
+  SlidingForwarding::forward_to<true>(s_obj2, t_obj2);
+  ASSERT_EQ(s_obj2->mark().value(), make_mark(1 /* target_region */, 0 /* offset */));
+  ASSERT_EQ(SlidingForwarding::forwardee<true>(s_obj2), t_obj2);
+
+  SlidingForwarding::forward_to<true>(s_obj3, t_obj3);
+  ASSERT_EQ(s_obj3->mark().value(), make_fallback());
+  ASSERT_EQ(SlidingForwarding::forwardee<true>(s_obj3), t_obj3);
+
+  SlidingForwarding::forward_to<true>(s_obj4, t_obj4);
+  ASSERT_EQ(s_obj4->mark().value(), make_fallback());
+  ASSERT_EQ(SlidingForwarding::forwardee<true>(s_obj4), t_obj4);
+
+  SlidingForwarding::end();
+}
+
+#endif // PRODUCT
+#endif // _LP64
diff --git a/test/hotspot/gtest/oops/test_arrayOop.cpp b/test/hotspot/gtest/oops/test_arrayOop.cpp
index 84063813be3..030a47568e5 100644
--- a/test/hotspot/gtest/oops/test_arrayOop.cpp
+++ b/test/hotspot/gtest/oops/test_arrayOop.cpp
@@ -87,3 +87,58 @@ TEST_VM(arrayOopDesc, narrowOop) {
   ASSERT_PRED1(check_max_length_overflow, T_NARROWOOP);
 }
 // T_VOID and T_ADDRESS are not supported by max_array_length()
+
+TEST_VM(arrayOopDesc, base_offset) {
+#ifdef _LP64
+  if (UseCompactObjectHeaders) {
+    EXPECT_EQ(arrayOopDesc::base_offset_in_bytes(T_BOOLEAN), 12);
+    EXPECT_EQ(arrayOopDesc::base_offset_in_bytes(T_BYTE),    12);
+    EXPECT_EQ(arrayOopDesc::base_offset_in_bytes(T_SHORT),   12);
+    EXPECT_EQ(arrayOopDesc::base_offset_in_bytes(T_CHAR),    12);
+    EXPECT_EQ(arrayOopDesc::base_offset_in_bytes(T_INT),     12);
+    EXPECT_EQ(arrayOopDesc::base_offset_in_bytes(T_FLOAT),   12);
+    EXPECT_EQ(arrayOopDesc::base_offset_in_bytes(T_LONG),    16);
+    EXPECT_EQ(arrayOopDesc::base_offset_in_bytes(T_DOUBLE),  16);
+    EXPECT_EQ(arrayOopDesc::base_offset_in_bytes(T_OBJECT),  12);
+    EXPECT_EQ(arrayOopDesc::base_offset_in_bytes(T_ARRAY),   12);
+  } else if (UseCompressedClassPointers) {
+    EXPECT_EQ(arrayOopDesc::base_offset_in_bytes(T_BOOLEAN), 16);
+    EXPECT_EQ(arrayOopDesc::base_offset_in_bytes(T_BYTE),    16);
+    EXPECT_EQ(arrayOopDesc::base_offset_in_bytes(T_SHORT),   16);
+    EXPECT_EQ(arrayOopDesc::base_offset_in_bytes(T_CHAR),    16);
+    EXPECT_EQ(arrayOopDesc::base_offset_in_bytes(T_INT),     16);
+    EXPECT_EQ(arrayOopDesc::base_offset_in_bytes(T_FLOAT),   16);
+    EXPECT_EQ(arrayOopDesc::base_offset_in_bytes(T_LONG),    16);
+    EXPECT_EQ(arrayOopDesc::base_offset_in_bytes(T_DOUBLE),  16);
+    EXPECT_EQ(arrayOopDesc::base_offset_in_bytes(T_OBJECT),  16);
+    EXPECT_EQ(arrayOopDesc::base_offset_in_bytes(T_ARRAY),   16);
+  } else {
+    EXPECT_EQ(arrayOopDesc::base_offset_in_bytes(T_BOOLEAN), 20);
+    EXPECT_EQ(arrayOopDesc::base_offset_in_bytes(T_BYTE),    20);
+    EXPECT_EQ(arrayOopDesc::base_offset_in_bytes(T_SHORT),   20);
+    EXPECT_EQ(arrayOopDesc::base_offset_in_bytes(T_CHAR),    20);
+    EXPECT_EQ(arrayOopDesc::base_offset_in_bytes(T_INT),     20);
+    EXPECT_EQ(arrayOopDesc::base_offset_in_bytes(T_FLOAT),   20);
+    EXPECT_EQ(arrayOopDesc::base_offset_in_bytes(T_LONG),    24);
+    EXPECT_EQ(arrayOopDesc::base_offset_in_bytes(T_DOUBLE),  24);
+    if (UseCompressedOops) {
+      EXPECT_EQ(arrayOopDesc::base_offset_in_bytes(T_OBJECT), 20);
+      EXPECT_EQ(arrayOopDesc::base_offset_in_bytes(T_ARRAY),  20);
+    } else {
+      EXPECT_EQ(arrayOopDesc::base_offset_in_bytes(T_OBJECT), 24);
+      EXPECT_EQ(arrayOopDesc::base_offset_in_bytes(T_ARRAY),  24);
+    }
+  }
+#else
+  EXPECT_EQ(arrayOopDesc::base_offset_in_bytes(T_BOOLEAN), 12);
+  EXPECT_EQ(arrayOopDesc::base_offset_in_bytes(T_BYTE),    12);
+  EXPECT_EQ(arrayOopDesc::base_offset_in_bytes(T_SHORT),   12);
+  EXPECT_EQ(arrayOopDesc::base_offset_in_bytes(T_CHAR),    12);
+  EXPECT_EQ(arrayOopDesc::base_offset_in_bytes(T_INT),     12);
+  EXPECT_EQ(arrayOopDesc::base_offset_in_bytes(T_FLOAT),   12);
+  EXPECT_EQ(arrayOopDesc::base_offset_in_bytes(T_LONG),    16);
+  EXPECT_EQ(arrayOopDesc::base_offset_in_bytes(T_DOUBLE),  16);
+  EXPECT_EQ(arrayOopDesc::base_offset_in_bytes(T_OBJECT),  12);
+  EXPECT_EQ(arrayOopDesc::base_offset_in_bytes(T_ARRAY),   12);
+#endif
+}
diff --git a/test/hotspot/gtest/oops/test_objArrayOop.cpp b/test/hotspot/gtest/oops/test_objArrayOop.cpp
new file mode 100644
index 00000000000..09f37bf5aae
--- /dev/null
+++ b/test/hotspot/gtest/oops/test_objArrayOop.cpp
@@ -0,0 +1,69 @@
+/*
+ * Copyright Amazon.com Inc. or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+#include "precompiled.hpp"
+#include "oops/objArrayOop.hpp"
+#include "unittest.hpp"
+#include "utilities/globalDefinitions.hpp"
+
+TEST_VM(objArrayOop, osize) {
+  static const struct {
+    int objal; bool ccp; bool coops; bool coh; int result;
+  } x[] = {
+//    ObjAligInB, UseCCP, UseCoops, UseCOH, object size in heap words
+#ifdef _LP64
+    { 8,          false,  false,    false,  4 },  // 20 byte header, 8 byte oops
+    { 8,          false,  true,     false,  3 },  // 20 byte header, 4 byte oops
+    { 8,          true,   false,    false,  3 },  // 16 byte header, 8 byte oops
+    { 8,          true,   true,     false,  3 },  // 16 byte header, 4 byte oops
+    { 16,         false,  false,    false,  4 },  // 20 byte header, 8 byte oops, 16-byte align
+    { 16,         false,  true,     false,  4 },  // 20 byte header, 4 byte oops, 16-byte align
+    { 16,         true,   false,    false,  4 },  // 16 byte header, 8 byte oops, 16-byte align
+    { 16,         true,   true,     false,  4 },  // 16 byte header, 4 byte oops, 16-byte align
+    { 256,        false,  false,    false,  32 }, // 20 byte header, 8 byte oops, 256-byte align
+    { 256,        false,  true,     false,  32 }, // 20 byte header, 4 byte oops, 256-byte align
+    { 256,        true,   false,    false,  32 }, // 16 byte header, 8 byte oops, 256-byte align
+    { 256,        true,   true,     false,  32 }, // 16 byte header, 4 byte oops, 256-byte align
+    { 8,          false,  false,    true,   3 },  // 16 byte header, 8 byte oops
+    { 8,          false,  true,     true,   2 },  // 12 byte header, 4 byte oops
+    { 8,          true,   false,    true,   3 },  // 16 byte header, 8 byte oops
+    { 8,          true,   true,     true,   2 },  // 12 byte header, 4 byte oops
+    { 16,         false,  false,    true,   4 },  // 16 byte header, 8 byte oops, 16-byte align
+    { 16,         false,  true,     true,   2 },  // 12 byte header, 4 byte oops, 16-byte align
+    { 16,         true,   false,    true,   4 },  // 16 byte header, 8 byte oops, 16-byte align
+    { 16,         true,   true,     true,   2 },  // 12 byte header, 4 byte oops, 16-byte align
+    { 256,        false,  false,    true,   32 }, // 16 byte header, 8 byte oops, 256-byte align
+    { 256,        false,  true,     true,   32 }, // 12 byte header, 4 byte oops, 256-byte align
+    { 256,        true,   false,    true,   32 }, // 16 byte header, 8 byte oops, 256-byte align
+    { 256,        true,   true,     true,   32 }, // 12 byte header, 4 byte oops, 256-byte align
+#else
+    { 8,          false,  false,    false,  4 }, // 12 byte header, 4 byte oops, wordsize 4
+#endif
+    { -1,         false,  false,    false, -1 }
+  };
+  for (int i = 0; x[i].result != -1; i++) {
+    if (x[i].objal == (int)ObjectAlignmentInBytes && x[i].ccp == UseCompressedClassPointers && x[i].coops == UseCompressedOops && x[i].coh == UseCompactObjectHeaders) {
+      EXPECT_EQ(objArrayOopDesc::object_size(1), x[i].result);
+    }
+  }
+}
diff --git a/test/hotspot/gtest/oops/test_typeArrayOop.cpp b/test/hotspot/gtest/oops/test_typeArrayOop.cpp
index 2d9c8cfd990..dbdce5e3708 100644
--- a/test/hotspot/gtest/oops/test_typeArrayOop.cpp
+++ b/test/hotspot/gtest/oops/test_typeArrayOop.cpp
@@ -36,7 +36,14 @@ TEST_VM(typeArrayOopDesc, bool_at_put) {
   char* addr = align_up(mem, 16);
 
   typeArrayOop o = (typeArrayOop) cast_to_oop(addr);
-  o->set_klass(Universe::boolArrayKlassObj());
+#ifdef _LP64
+  if (UseCompactObjectHeaders) {
+    o->set_mark(Universe::boolArrayKlassObj()->prototype_header());
+  } else
+#endif
+  {
+    o->set_klass(Universe::boolArrayKlassObj());
+  }
   o->set_length(10);
 
 
diff --git a/test/hotspot/gtest/runtime/test_lockStack.cpp b/test/hotspot/gtest/runtime/test_lockStack.cpp
new file mode 100644
index 00000000000..157f291a4b7
--- /dev/null
+++ b/test/hotspot/gtest/runtime/test_lockStack.cpp
@@ -0,0 +1,428 @@
+/*
+ * Copyright (c) 2023, 2024, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+#include "precompiled.hpp"
+#include "memory/universe.hpp"
+#include "runtime/interfaceSupport.inline.hpp"
+#include "runtime/lockStack.inline.hpp"
+#include "runtime/os.hpp"
+#include "unittest.hpp"
+#include "utilities/globalDefinitions.hpp"
+
+class LockStackTest : public ::testing::Test {
+public:
+  static void push_raw(LockStack& ls, oop obj) {
+    ls._base[ls.to_index(ls._top)] = obj;
+    ls._top += oopSize;
+  }
+
+  static void pop_raw(LockStack& ls) {
+    ls._top -= oopSize;
+#ifdef ASSERT
+    ls._base[ls.to_index(ls._top)] = nullptr;
+#endif
+  }
+
+  static oop at(LockStack& ls, int index) {
+    return ls._base[index];
+  }
+
+  static size_t size(LockStack& ls) {
+    return ls.to_index(ls._top);
+  }
+};
+
+#define recursive_enter(ls, obj)             \
+  do {                                       \
+    bool ret = ls.try_recursive_enter(obj);  \
+    EXPECT_TRUE(ret);                        \
+  } while (false)
+
+#define recursive_exit(ls, obj)             \
+  do {                                      \
+    bool ret = ls.try_recursive_exit(obj);  \
+    EXPECT_TRUE(ret);                       \
+  } while (false)
+
+TEST_VM_F(LockStackTest, is_recursive) {
+  if (LockingMode != LM_LIGHTWEIGHT || !VM_Version::supports_recursive_lightweight_locking()) {
+    return;
+  }
+
+  JavaThread* THREAD = JavaThread::current();
+  // the thread should be in vm to use locks
+  ThreadInVMfromNative ThreadInVMfromNative(THREAD);
+
+  LockStack& ls = THREAD->lock_stack();
+
+  EXPECT_TRUE(ls.is_empty());
+
+  oop obj0 = Universe::int_mirror();
+  oop obj1 = Universe::float_mirror();
+
+  push_raw(ls, obj0);
+
+  // 0
+  EXPECT_FALSE(ls.is_recursive(obj0));
+
+  push_raw(ls, obj1);
+
+  // 0, 1
+  EXPECT_FALSE(ls.is_recursive(obj0));
+  EXPECT_FALSE(ls.is_recursive(obj1));
+
+  push_raw(ls, obj1);
+
+  // 0, 1, 1
+  EXPECT_FALSE(ls.is_recursive(obj0));
+  EXPECT_TRUE(ls.is_recursive(obj1));
+
+  pop_raw(ls);
+  pop_raw(ls);
+  push_raw(ls, obj0);
+
+  // 0, 0
+  EXPECT_TRUE(ls.is_recursive(obj0));
+
+  push_raw(ls, obj0);
+
+  // 0, 0, 0
+  EXPECT_TRUE(ls.is_recursive(obj0));
+
+  pop_raw(ls);
+  push_raw(ls, obj1);
+
+  // 0, 0, 1
+  EXPECT_TRUE(ls.is_recursive(obj0));
+  EXPECT_FALSE(ls.is_recursive(obj1));
+
+  push_raw(ls, obj1);
+
+  // 0, 0, 1, 1
+  EXPECT_TRUE(ls.is_recursive(obj0));
+  EXPECT_TRUE(ls.is_recursive(obj1));
+
+  // Clear stack
+  pop_raw(ls);
+  pop_raw(ls);
+  pop_raw(ls);
+  pop_raw(ls);
+
+  EXPECT_TRUE(ls.is_empty());
+}
+
+TEST_VM_F(LockStackTest, try_recursive_enter) {
+  if (LockingMode != LM_LIGHTWEIGHT || !VM_Version::supports_recursive_lightweight_locking()) {
+    return;
+  }
+
+  JavaThread* THREAD = JavaThread::current();
+  // the thread should be in vm to use locks
+  ThreadInVMfromNative ThreadInVMfromNative(THREAD);
+
+  LockStack& ls = THREAD->lock_stack();
+
+  EXPECT_TRUE(ls.is_empty());
+
+  oop obj0 = Universe::int_mirror();
+  oop obj1 = Universe::float_mirror();
+
+  ls.push(obj0);
+
+  // 0
+  EXPECT_FALSE(ls.is_recursive(obj0));
+
+  ls.push(obj1);
+
+  // 0, 1
+  EXPECT_FALSE(ls.is_recursive(obj0));
+  EXPECT_FALSE(ls.is_recursive(obj1));
+
+  recursive_enter(ls, obj1);
+
+  // 0, 1, 1
+  EXPECT_FALSE(ls.is_recursive(obj0));
+  EXPECT_TRUE(ls.is_recursive(obj1));
+
+  recursive_exit(ls, obj1);
+  pop_raw(ls);
+  recursive_enter(ls, obj0);
+
+  // 0, 0
+  EXPECT_TRUE(ls.is_recursive(obj0));
+
+  recursive_enter(ls, obj0);
+
+  // 0, 0, 0
+  EXPECT_TRUE(ls.is_recursive(obj0));
+
+  recursive_exit(ls, obj0);
+  push_raw(ls, obj1);
+
+  // 0, 0, 1
+  EXPECT_TRUE(ls.is_recursive(obj0));
+  EXPECT_FALSE(ls.is_recursive(obj1));
+
+  recursive_enter(ls, obj1);
+
+  // 0, 0, 1, 1
+  EXPECT_TRUE(ls.is_recursive(obj0));
+  EXPECT_TRUE(ls.is_recursive(obj1));
+
+  // Clear stack
+  pop_raw(ls);
+  pop_raw(ls);
+  pop_raw(ls);
+  pop_raw(ls);
+
+  EXPECT_TRUE(ls.is_empty());
+}
+
+TEST_VM_F(LockStackTest, contains) {
+  if (LockingMode != LM_LIGHTWEIGHT) {
+    return;
+  }
+
+  const bool test_recursive = VM_Version::supports_recursive_lightweight_locking();
+
+  JavaThread* THREAD = JavaThread::current();
+  // the thread should be in vm to use locks
+  ThreadInVMfromNative ThreadInVMfromNative(THREAD);
+
+  LockStack& ls = THREAD->lock_stack();
+
+  EXPECT_TRUE(ls.is_empty());
+
+  oop obj0 = Universe::int_mirror();
+  oop obj1 = Universe::float_mirror();
+
+  EXPECT_FALSE(ls.contains(obj0));
+
+  ls.push(obj0);
+
+  // 0
+  EXPECT_TRUE(ls.contains(obj0));
+  EXPECT_FALSE(ls.contains(obj1));
+
+  if (test_recursive) {
+    push_raw(ls, obj0);
+
+    // 0, 0
+    EXPECT_TRUE(ls.contains(obj0));
+    EXPECT_FALSE(ls.contains(obj1));
+  }
+
+  push_raw(ls, obj1);
+
+  // 0, 0, 1
+  EXPECT_TRUE(ls.contains(obj0));
+  EXPECT_TRUE(ls.contains(obj1));
+
+  if (test_recursive) {
+    push_raw(ls, obj1);
+
+    // 0, 0, 1, 1
+    EXPECT_TRUE(ls.contains(obj0));
+    EXPECT_TRUE(ls.contains(obj1));
+  }
+
+  pop_raw(ls);
+  if (test_recursive) {
+    pop_raw(ls);
+    pop_raw(ls);
+  }
+  push_raw(ls, obj1);
+
+  // 0, 1
+  EXPECT_TRUE(ls.contains(obj0));
+  EXPECT_TRUE(ls.contains(obj1));
+
+  // Clear stack
+  pop_raw(ls);
+  pop_raw(ls);
+
+  EXPECT_TRUE(ls.is_empty());
+}
+
+TEST_VM_F(LockStackTest, remove) {
+  if (LockingMode != LM_LIGHTWEIGHT) {
+    return;
+  }
+
+  const bool test_recursive = VM_Version::supports_recursive_lightweight_locking();
+
+  JavaThread* THREAD = JavaThread::current();
+  // the thread should be in vm to use locks
+  ThreadInVMfromNative ThreadInVMfromNative(THREAD);
+
+  LockStack& ls = THREAD->lock_stack();
+
+  EXPECT_TRUE(ls.is_empty());
+
+  oop obj0 = Universe::int_mirror();
+  oop obj1 = Universe::float_mirror();
+  oop obj2 = Universe::short_mirror();
+  oop obj3 = Universe::long_mirror();
+
+  push_raw(ls, obj0);
+
+  // 0
+  {
+    size_t removed = ls.remove(obj0);
+    EXPECT_EQ(removed, 1u);
+    EXPECT_FALSE(ls.contains(obj0));
+  }
+
+  if (test_recursive) {
+    push_raw(ls, obj0);
+    push_raw(ls, obj0);
+
+    // 0, 0
+    {
+      size_t removed = ls.remove(obj0);
+      EXPECT_EQ(removed, 2u);
+      EXPECT_FALSE(ls.contains(obj0));
+    }
+  }
+
+  push_raw(ls, obj0);
+  push_raw(ls, obj1);
+
+  // 0, 1
+  {
+    size_t removed = ls.remove(obj0);
+    EXPECT_EQ(removed, 1u);
+    EXPECT_FALSE(ls.contains(obj0));
+    EXPECT_TRUE(ls.contains(obj1));
+
+    ls.remove(obj1);
+    EXPECT_TRUE(ls.is_empty());
+  }
+
+  push_raw(ls, obj0);
+  push_raw(ls, obj1);
+
+  // 0, 1
+  {
+    size_t removed = ls.remove(obj1);
+    EXPECT_EQ(removed, 1u);
+    EXPECT_FALSE(ls.contains(obj1));
+    EXPECT_TRUE(ls.contains(obj0));
+
+    ls.remove(obj0);
+    EXPECT_TRUE(ls.is_empty());
+  }
+
+  if (test_recursive) {
+    push_raw(ls, obj0);
+    push_raw(ls, obj0);
+    push_raw(ls, obj1);
+
+    // 0, 0, 1
+    {
+      size_t removed = ls.remove(obj0);
+      EXPECT_EQ(removed, 2u);
+      EXPECT_FALSE(ls.contains(obj0));
+      EXPECT_TRUE(ls.contains(obj1));
+
+      ls.remove(obj1);
+      EXPECT_TRUE(ls.is_empty());
+    }
+
+    push_raw(ls, obj0);
+    push_raw(ls, obj1);
+    push_raw(ls, obj1);
+
+    // 0, 1, 1
+    {
+      size_t removed = ls.remove(obj1);
+      EXPECT_EQ(removed, 2u);
+      EXPECT_FALSE(ls.contains(obj1));
+      EXPECT_TRUE(ls.contains(obj0));
+
+      ls.remove(obj0);
+      EXPECT_TRUE(ls.is_empty());
+    }
+
+    push_raw(ls, obj0);
+    push_raw(ls, obj1);
+    push_raw(ls, obj1);
+    push_raw(ls, obj2);
+    push_raw(ls, obj2);
+    push_raw(ls, obj2);
+    push_raw(ls, obj2);
+    push_raw(ls, obj3);
+
+    // 0, 1, 1, 2, 2, 2, 2, 3
+    {
+      EXPECT_EQ(size(ls), 8u);
+
+      size_t removed = ls.remove(obj1);
+      EXPECT_EQ(removed, 2u);
+
+      EXPECT_TRUE(ls.contains(obj0));
+      EXPECT_FALSE(ls.contains(obj1));
+      EXPECT_TRUE(ls.contains(obj2));
+      EXPECT_TRUE(ls.contains(obj3));
+
+      EXPECT_EQ(at(ls, 0), obj0);
+      EXPECT_EQ(at(ls, 1), obj2);
+      EXPECT_EQ(at(ls, 2), obj2);
+      EXPECT_EQ(at(ls, 3), obj2);
+      EXPECT_EQ(at(ls, 4), obj2);
+      EXPECT_EQ(at(ls, 5), obj3);
+      EXPECT_EQ(size(ls), 6u);
+
+      removed = ls.remove(obj2);
+      EXPECT_EQ(removed, 4u);
+
+      EXPECT_TRUE(ls.contains(obj0));
+      EXPECT_FALSE(ls.contains(obj1));
+      EXPECT_FALSE(ls.contains(obj2));
+      EXPECT_TRUE(ls.contains(obj3));
+
+      EXPECT_EQ(at(ls, 0), obj0);
+      EXPECT_EQ(at(ls, 1), obj3);
+      EXPECT_EQ(size(ls), 2u);
+
+      removed = ls.remove(obj0);
+      EXPECT_EQ(removed, 1u);
+
+      EXPECT_FALSE(ls.contains(obj0));
+      EXPECT_FALSE(ls.contains(obj1));
+      EXPECT_FALSE(ls.contains(obj2));
+      EXPECT_TRUE(ls.contains(obj3));
+
+      EXPECT_EQ(at(ls, 0), obj3);
+      EXPECT_EQ(size(ls), 1u);
+
+      removed = ls.remove(obj3);
+      EXPECT_EQ(removed, 1u);
+
+      EXPECT_TRUE(ls.is_empty());
+      EXPECT_EQ(size(ls), 0u);
+    }
+  }
+
+  EXPECT_TRUE(ls.is_empty());
+}
diff --git a/test/hotspot/gtest/runtime/test_objectMonitor.cpp b/test/hotspot/gtest/runtime/test_objectMonitor.cpp
index 33bec3253d1..783a610eb48 100644
--- a/test/hotspot/gtest/runtime/test_objectMonitor.cpp
+++ b/test/hotspot/gtest/runtime/test_objectMonitor.cpp
@@ -27,20 +27,20 @@
 #include "unittest.hpp"
 
 TEST_VM(ObjectMonitor, sanity) {
- uint cache_line_size = VM_Version::L1_data_cache_line_size();
+  uint cache_line_size = VM_Version::L1_data_cache_line_size();
 
- if (cache_line_size != 0) {
-   // We were able to determine the L1 data cache line size so
-   // do some cache line specific sanity checks
-   EXPECT_EQ((size_t) 0, sizeof (PaddedEnd<ObjectMonitor>) % cache_line_size)
-        << "PaddedEnd<ObjectMonitor> size is not a "
-        << "multiple of a cache line which permits false sharing. "
-        << "sizeof(PaddedEnd<ObjectMonitor>) = "
-        << sizeof (PaddedEnd<ObjectMonitor>)
-        << "; cache_line_size = " << cache_line_size;
+  if (cache_line_size != 0) {
 
-   EXPECT_GE((size_t) ObjectMonitor::owner_offset_in_bytes(), cache_line_size)
-        << "the _header and _owner fields are closer "
-        << "than a cache line which permits false sharing.";
+    EXPECT_EQ(in_bytes(ObjectMonitor::metadata_offset()), 0)
+         << "_metadata at a non 0 offset. metadata_offset = "
+         << in_bytes(ObjectMonitor::metadata_offset());
+
+    EXPECT_GE((size_t) in_bytes(ObjectMonitor::owner_offset()), cache_line_size)
+         << "the _metadata and _owner fields are closer "
+         << "than a cache line which permits false sharing.";
+
+    EXPECT_GE((size_t) in_bytes(ObjectMonitor::recursions_offset() - ObjectMonitor::owner_offset()), cache_line_size)
+         << "the _owner and _recursions fields are closer "
+         << "than a cache line which permits false sharing.";
   }
 }
diff --git a/test/hotspot/gtest/runtime/test_os.cpp b/test/hotspot/gtest/runtime/test_os.cpp
index dc59ec90343..8332889fb66 100644
--- a/test/hotspot/gtest/runtime/test_os.cpp
+++ b/test/hotspot/gtest/runtime/test_os.cpp
@@ -24,7 +24,7 @@
 #include "precompiled.hpp"
 #include "memory/allocation.hpp"
 #include "memory/resourceArea.hpp"
-#include "runtime/os.hpp"
+#include "runtime/os.inline.hpp"
 #include "runtime/thread.hpp"
 #include "services/memTracker.hpp"
 #include "utilities/globalDefinitions.hpp"
@@ -852,3 +852,27 @@ TEST_VM(os, is_first_C_frame) {
   EXPECT_FALSE(os::is_first_C_frame(&cur_frame));
 #endif // _WIN32
 }
+
+#ifdef __GLIBC__
+TEST_VM(os, trim_native_heap) {
+  EXPECT_TRUE(os::can_trim_native_heap());
+  os::size_change_t sc;
+  sc.before = sc.after = (size_t)-1;
+  EXPECT_TRUE(os::trim_native_heap(&sc));
+  tty->print_cr(SIZE_FORMAT "->" SIZE_FORMAT, sc.before, sc.after);
+  // Regardless of whether we freed memory, both before and after
+  // should be somewhat believable numbers (RSS).
+  const size_t min = 5 * M;
+  const size_t max = LP64_ONLY(20 * G) NOT_LP64(3 * G);
+  ASSERT_LE(min, sc.before);
+  ASSERT_GT(max, sc.before);
+  ASSERT_LE(min, sc.after);
+  ASSERT_GT(max, sc.after);
+  // Should also work
+  EXPECT_TRUE(os::trim_native_heap());
+}
+#else
+TEST_VM(os, trim_native_heap) {
+  EXPECT_FALSE(os::can_trim_native_heap());
+}
+#endif // __GLIBC__
diff --git a/test/hotspot/gtest/runtime/test_trim_native.cpp b/test/hotspot/gtest/runtime/test_trim_native.cpp
new file mode 100644
index 00000000000..327680e5b4f
--- /dev/null
+++ b/test/hotspot/gtest/runtime/test_trim_native.cpp
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2023 Red Hat Inc. All rights reserved.
+ * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "runtime/os.hpp"
+#include "runtime/trimNativeHeap.hpp"
+#include "utilities/globalDefinitions.hpp"
+#include "utilities/ostream.hpp"
+#include "testutils.hpp"
+#include "unittest.hpp"
+
+using ::testing::HasSubstr;
+
+// Check the state of the trimmer via print_state; returns the suspend count
+static int check_trim_state() {
+  char buf [1024];
+  stringStream ss(buf, sizeof(buf));
+  NativeHeapTrimmer::print_state(&ss);
+  if (NativeHeapTrimmer::enabled()) {
+    assert(TrimNativeHeapInterval > 0, "Sanity");
+    EXPECT_THAT(buf, HasSubstr("Periodic native trim enabled"));
+
+    const char* s = ::strstr(buf, "Trims performed");
+    EXPECT_NE(p2i(s), 0);
+
+    uint64_t num_trims = 0;
+    int suspend_count = 0;
+    int stopped = 0;
+    EXPECT_EQ(::sscanf(s, "Trims performed: " UINT64_FORMAT ", current suspend count: %d, stopped: %d",
+                       &num_trims, &suspend_count, &stopped), 3);
+
+    // Number of trims we can reasonably expect should be limited
+    const double fudge_factor = 1.5;
+    const uint64_t elapsed_ms = (uint64_t)(os::elapsedTime() * fudge_factor * 1000.0);
+    const uint64_t max_num_trims = (elapsed_ms / TrimNativeHeapInterval) + 1;
+    EXPECT_LE(num_trims, max_num_trims);
+
+    // We should not be stopped
+    EXPECT_EQ(stopped, 0);
+
+    // Suspend count must not underflow
+    EXPECT_GE(suspend_count, 0);
+    return suspend_count;
+
+  } else {
+    EXPECT_THAT(buf, HasSubstr("Periodic native trim disabled"));
+    EXPECT_THAT(buf, Not(HasSubstr("Trims performed")));
+    return 0;
+  }
+}
+
+TEST_VM(os, TrimNative) {
+
+  if (!NativeHeapTrimmer::enabled()) {
+    return;
+  }
+
+  // Try recursive pausing. This tests that we are able to pause, that pauses stack,
+  // and that stacking works within the same thread.
+  int c1 = 0, c2 = 0, c3 = 0;
+  {
+    NativeHeapTrimmer::SuspendMark sm1("Test1");
+    c1 = check_trim_state();
+    {
+      NativeHeapTrimmer::SuspendMark sm2("Test2");
+      c2 = check_trim_state();
+      {
+        NativeHeapTrimmer::SuspendMark sm3("Test3");
+        c3 = check_trim_state();
+      }
+    }
+  }
+  // We also check the state: the suspend count should go up. But since we don't know
+  // whether concurrent code will have increased the suspend count too, this is fuzzy and
+  // we must avoid intermittent false positives.
+  EXPECT_GT(c2, c1);
+  EXPECT_GT(c3, c2);
+}
diff --git a/test/hotspot/gtest/utilities/test_concurrentHashtable.cpp b/test/hotspot/gtest/utilities/test_concurrentHashtable.cpp
index cfc445a4add..6e42bf98488 100644
--- a/test/hotspot/gtest/utilities/test_concurrentHashtable.cpp
+++ b/test/hotspot/gtest/utilities/test_concurrentHashtable.cpp
@@ -105,9 +105,12 @@ struct SimpleTestLookup {
   uintx get_hash() {
     return Pointer::get_hash(_val, NULL);
   }
-  bool equals(const uintptr_t* value, bool* is_dead) {
+  bool equals(const uintptr_t* value) {
     return _val == *value;
   }
+  bool is_dead(const uintptr_t* value) {
+    return false;
+  }
 };
 
 struct ValueGet {
@@ -533,9 +536,12 @@ struct TestLookup {
   uintx get_hash() {
     return TestInterface::get_hash(_val, NULL);
   }
-  bool equals(const uintptr_t* value, bool* is_dead) {
+  bool equals(const uintptr_t* value) {
     return _val == *value;
   }
+  bool is_dead(const uintptr_t* value) {
+    return false;
+  }
 };
 
 static uintptr_t cht_get_copy(TestTable* cht, Thread* thr, TestLookup tl) {
diff --git a/test/hotspot/gtest/utilities/test_objectBitSet.cpp b/test/hotspot/gtest/utilities/test_objectBitSet.cpp
new file mode 100644
index 00000000000..a3425e4bfda
--- /dev/null
+++ b/test/hotspot/gtest/utilities/test_objectBitSet.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+#include "precompiled.hpp"
+#include "memory/allocation.hpp"
+#include "utilities/objectBitSet.inline.hpp"
+#include "unittest.hpp"
+
+TEST_VM(ObjectBitSet, empty) {
+  ObjectBitSet<mtTracing> obs;
+  oopDesc obj1;
+  ASSERT_FALSE(obs.is_marked(&obj1));
+}
+
+// NOTE: This is a little weird. NULL is not treated any special: ObjectBitSet will happily
+// allocate a fragement for the memory range starting at 0 and mark the first bit when passing NULL.
+// In the absense of any error handling, I am not sure what would possibly be a reasonable better
+// way to do it, though.
+TEST_VM(ObjectBitSet, null) {
+  ObjectBitSet<mtTracing> obs;
+  ASSERT_FALSE(obs.is_marked((oop)NULL));
+  obs.mark_obj((oop) NULL);
+  ASSERT_TRUE(obs.is_marked((oop)NULL));
+}
+
+TEST_VM(ObjectBitSet, mark_single) {
+  ObjectBitSet<mtTracing> obs;
+  oopDesc obj1;
+  ASSERT_FALSE(obs.is_marked(&obj1));
+  obs.mark_obj(&obj1);
+  ASSERT_TRUE(obs.is_marked(&obj1));
+}
+
+TEST_VM(ObjectBitSet, mark_multi) {
+  ObjectBitSet<mtTracing> obs;
+  oopDesc obj1;
+  oopDesc obj2;
+  ASSERT_FALSE(obs.is_marked(&obj1));
+  ASSERT_FALSE(obs.is_marked(&obj2));
+  obs.mark_obj(&obj1);
+  ASSERT_TRUE(obs.is_marked(&obj1));
+  ASSERT_FALSE(obs.is_marked(&obj2));
+  obs.mark_obj(&obj2);
+  ASSERT_TRUE(obs.is_marked(&obj1));
+  ASSERT_TRUE(obs.is_marked(&obj2));
+}
diff --git a/test/hotspot/jtreg/ProblemList-lilliput.txt b/test/hotspot/jtreg/ProblemList-lilliput.txt
new file mode 100644
index 00000000000..3c2724ed0ab
--- /dev/null
+++ b/test/hotspot/jtreg/ProblemList-lilliput.txt
@@ -0,0 +1,52 @@
+#
+# Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
+# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+#
+# This code is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License version 2 only, as
+# published by the Free Software Foundation.
+#
+# This code is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# version 2 for more details (a copy is included in the LICENSE file that
+# accompanied this code).
+#
+# You should have received a copy of the GNU General Public License version
+# 2 along with this work; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+# or visit www.oracle.com if you need additional information or have any
+# questions.
+#
+
+#
+# These tests are problematic when +UseCompactObjectHeaders is enabled.
+# The test exclusions are for the cases when we are sure the tests would fail
+# for the known and innocuous implementation reasons.
+#
+
+
+#
+# Tests require specific UseCompressedClassPointers mode
+#
+gc/arguments/TestCompressedClassFlags.java                                          1234567 generic-all
+
+
+#
+# Tests require specific UseBiasedLocking mode
+#
+runtime/logging/BiasedLockingTest.java                                              8256425 generic-all
+compiler/rtm/cli/TestUseRTMLockingOptionWithBiasedLocking.java                      8256425 generic-x64,generic-i586
+jdk/jfr/event/runtime/TestBiasedLockRevocationEvents.java                           8256425 generic-all
+
+
+#
+# Test library tests do not like non-whitelisted +UCOH flag
+#
+testlibrary_tests/ir_framework/tests/TestCheckedTests.java                          1234567 generic-all
+testlibrary_tests/ir_framework/tests/TestBadFormat.java                             1234567 generic-all
+testlibrary_tests/ir_framework/tests/TestRunTests.java                              1234567 generic-all
+testlibrary_tests/ir_framework/tests/TestScenarios.java                             1234567 generic-all
+testlibrary_tests/ir_framework/tests/TestIRMatching.java                            1234567 generic-all
diff --git a/test/hotspot/jtreg/TEST.groups b/test/hotspot/jtreg/TEST.groups
index 74fb2266c70..a3610cc9c4e 100644
--- a/test/hotspot/jtreg/TEST.groups
+++ b/test/hotspot/jtreg/TEST.groups
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2013, 2022, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2013, 2024, Oracle and/or its affiliates. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
@@ -100,6 +100,7 @@ hotspot_compiler_arraycopy = \
 tier1_common = \
   sanity/BasicVMTest.java \
   gtest/GTestWrapper.java \
+  gtest/LockStackGtests.java \
   gtest/MetaspaceGtests.java \
   gtest/LargePageGtests.java \
   gtest/NMTGtests.java \
diff --git a/test/hotspot/jtreg/compiler/c2/TestLWLockingCodeGen.java b/test/hotspot/jtreg/compiler/c2/TestLWLockingCodeGen.java
new file mode 100644
index 00000000000..ad91ba0b02e
--- /dev/null
+++ b/test/hotspot/jtreg/compiler/c2/TestLWLockingCodeGen.java
@@ -0,0 +1,38 @@
+/*
+ * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @summary Tests correct code generation of lightweight locking when using -XX:+ShowMessageBoxOnError; times-out on failure
+ * @bug 8329726
+ * @run main/othervm -XX:+ShowMessageBoxOnError -Xcomp -XX:-TieredCompilation -XX:CompileOnly=TestLWLockingCodeGen::sync TestLWLockingCodeGen
+ */
+public class TestLWLockingCodeGen {
+    private static int val = 0;
+    public static void main(String[] args) {
+        sync();
+    }
+    private static synchronized void sync() {
+        val = val + (int)(Math.random() * 42);
+    }
+}
diff --git a/test/hotspot/jtreg/compiler/escapeAnalysis/Test8329757.java b/test/hotspot/jtreg/compiler/escapeAnalysis/Test8329757.java
new file mode 100644
index 00000000000..483c2c49962
--- /dev/null
+++ b/test/hotspot/jtreg/compiler/escapeAnalysis/Test8329757.java
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8329757
+ * @summary Deoptimization with nested eliminated and not eliminated locks
+ *          caused reordered lock stacks. This can be handled by the interpreter
+ *          but when a frame is migrated back to compiled code via OSR the C2
+ *          assumption about balanced monitorenter-monitorexit is broken.
+ *
+ * @requires vm.compMode != "Xint"
+ *
+ * @run main/othervm compiler.escapeAnalysis.Test8329757
+ */
+
+package compiler.escapeAnalysis;
+
+public class Test8329757 {
+
+    int a = 400;
+    Double ddd;
+
+    void q() {
+        int e;
+        synchronized (new Double(1.1f)) {
+        int[] f = new int[a];
+        synchronized (Test8329757.class) {
+            for (int d = 4; d < 127; d++) {
+            e = 13;
+            do switch (d * 5) {
+                case 0:
+                case 42:
+                case 29:
+                e = d;
+                default:
+                f[1] = e;
+            } while (--e > 0);
+            }
+        }
+        }
+    }
+
+    void n() {
+        for (int j = 6; j < 274; ++j) q();
+    }
+
+    public static void main(String[] args) {
+        Test8329757 r = new Test8329757();
+        for (int i = 0; i < 1000; i++) r.n();
+    }
+}
diff --git a/test/hotspot/jtreg/compiler/escapeAnalysis/TestNestedRelockAtDeopt.java b/test/hotspot/jtreg/compiler/escapeAnalysis/TestNestedRelockAtDeopt.java
new file mode 100644
index 00000000000..45bd97b7bc6
--- /dev/null
+++ b/test/hotspot/jtreg/compiler/escapeAnalysis/TestNestedRelockAtDeopt.java
@@ -0,0 +1,146 @@
+/*
+ * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8324174
+ * @summary During deoptimization locking and unlocking for nested locks are executed in incorrect order.
+ * @requires vm.compMode != "Xint"
+ * @run main/othervm -XX:-TieredCompilation -XX:-BackgroundCompilation -Xmx128M
+ *                   -XX:CompileCommand=exclude,TestNestedRelockAtDeopt::main TestNestedRelockAtDeopt
+ */
+
+import java.util.ArrayList;
+public class TestNestedRelockAtDeopt {
+
+    static final int CHUNK = 1000;
+    static ArrayList<Object> arr = null;
+
+    public static void main(String[] args) {
+        arr = new ArrayList<>();
+        try {
+            while (true) {
+                test1();
+            }
+        } catch (OutOfMemoryError oom) {
+            arr = null; // Free memory
+            System.out.println("OOM caught in test1");
+        }
+        arr = new ArrayList<>();
+        try {
+            while (true) {
+                test2();
+            }
+        } catch (OutOfMemoryError oom) {
+            arr = null; // Free memory
+            System.out.println("OOM caught in test2");
+        }
+        arr = new ArrayList<>();
+        TestNestedRelockAtDeopt obj = new TestNestedRelockAtDeopt();
+        try {
+            while (true) {
+                test3(obj);
+            }
+        } catch (OutOfMemoryError oom) {
+            arr = null; // Free memory
+            System.out.println("OOM caught in test3");
+        }
+        arr = new ArrayList<>();
+        try {
+            while (true) {
+                test4(obj);
+            }
+        } catch (OutOfMemoryError oom) {
+            arr = null; // Free memory
+            System.out.println("OOM caught in test4");
+        }
+    }
+
+    // Nested locks in one method
+    static void test1() { // Nested lock in one method
+        synchronized (TestNestedRelockAtDeopt.class) {
+            synchronized (new TestNestedRelockAtDeopt()) { // lock eliminated - not escaped allocation
+                synchronized (TestNestedRelockAtDeopt.class) { // nested lock eliminated
+                    synchronized (new TestNestedRelockAtDeopt()) { // lock eliminated - not escaped allocation
+                        synchronized (TestNestedRelockAtDeopt.class) { // nested lock eliminated
+                            arr.add(new byte[CHUNK]);
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    // Nested locks in inlined method
+    static void foo() {
+        synchronized (new TestNestedRelockAtDeopt()) {  // lock eliminated - not escaped allocation
+            synchronized (TestNestedRelockAtDeopt.class) {  // nested lock eliminated when inlined
+                arr.add(new byte[CHUNK]);
+            }
+        }
+    }
+
+    static void test2() {
+        synchronized (TestNestedRelockAtDeopt.class) {
+            synchronized (new TestNestedRelockAtDeopt()) {  // lock eliminated - not escaped allocation
+                synchronized (TestNestedRelockAtDeopt.class) { // nested lock eliminated
+                    foo(); // Inline
+                }
+            }
+        }
+    }
+
+    // Nested locks in one method
+    static void test3(TestNestedRelockAtDeopt obj) {
+        synchronized (TestNestedRelockAtDeopt.class) {
+            synchronized (obj) { // lock not eliminated - external object
+                synchronized (TestNestedRelockAtDeopt.class) { // nested lock eliminated
+                    synchronized (obj) { // nested lock eliminated
+                        synchronized (TestNestedRelockAtDeopt.class) { // nested lock eliminated
+                            arr.add(new byte[CHUNK]);
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    // Nested locks with different objects in inlined method
+    static void bar(TestNestedRelockAtDeopt obj) {
+        synchronized (obj) {  // nested lock eliminated when inlined
+            synchronized (TestNestedRelockAtDeopt.class) {  // nested lock eliminated when inlined
+                arr.add(new byte[CHUNK]);
+            }
+        }
+    }
+
+    static void test4(TestNestedRelockAtDeopt obj) {
+        synchronized (TestNestedRelockAtDeopt.class) {
+            synchronized (obj) {  // lock not eliminated - external object
+                synchronized (TestNestedRelockAtDeopt.class) { // nested lock eliminated
+                    bar(obj); // Inline
+                }
+            }
+        }
+    }
+}
diff --git a/test/hotspot/jtreg/compiler/locks/TestUnlockOSR.java b/test/hotspot/jtreg/compiler/locks/TestUnlockOSR.java
new file mode 100644
index 00000000000..f2133b49658
--- /dev/null
+++ b/test/hotspot/jtreg/compiler/locks/TestUnlockOSR.java
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2023 SAP SE. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/*
+ * @test
+ * @bug 8316746
+ * @summary During OSR, locks get transferred from interpreter frame.
+ *          Check that unlocking 2 such locks works in the OSR compiled nmethod.
+ *          Some platforms verify that the unlocking happens in the corrent order.
+ *
+ * @run main/othervm -Xbatch TestUnlockOSR
+ */
+
+public class TestUnlockOSR {
+    static void test_method(Object a, Object b, int limit) {
+        synchronized(a) { // allocate space for monitors
+            synchronized(b) {
+            }
+        } // free space to test allocation in reused space
+        synchronized(a) { // reuse the space
+            synchronized(b) {
+                for (int i = 0; i < limit; i++) {}
+            }
+        }
+    }
+
+    public static void main(String[] args) {
+        Object a = new TestUnlockOSR(),
+               b = new TestUnlockOSR();
+        // avoid uncommon trap before last unlocks
+        for (int i = 0; i < 100; i++) { test_method(a, b, 0); }
+        // trigger OSR
+        test_method(a, b, 100000);
+    }
+}
diff --git a/test/hotspot/jtreg/gc/g1/plab/TestPLABPromotion.java b/test/hotspot/jtreg/gc/g1/plab/TestPLABPromotion.java
index bb04b25f736..ebeabc14a95 100644
--- a/test/hotspot/jtreg/gc/g1/plab/TestPLABPromotion.java
+++ b/test/hotspot/jtreg/gc/g1/plab/TestPLABPromotion.java
@@ -45,6 +45,7 @@
 import gc.g1.plab.lib.PLABUtils;
 import gc.g1.plab.lib.PlabInfo;
 
+import jdk.test.lib.Platform;
 import jdk.test.lib.process.OutputAnalyzer;
 import jdk.test.lib.process.ProcessTools;
 
@@ -72,7 +73,7 @@ public class TestPLABPromotion {
     private static final int PLAB_SIZE_HIGH = 65536;
     private static final int OBJECT_SIZE_SMALL = 10;
     private static final int OBJECT_SIZE_MEDIUM = 100;
-    private static final int OBJECT_SIZE_HIGH = 3250;
+    private static final int OBJECT_SIZE_HIGH = Platform.is64bit() ? 3266 : 3258;
     private static final int GC_NUM_SMALL = 1;
     private static final int GC_NUM_MEDIUM = 3;
     private static final int GC_NUM_HIGH = 7;
diff --git a/test/hotspot/jtreg/gc/stress/systemgc/TestSystemGCWithG1.java b/test/hotspot/jtreg/gc/stress/systemgc/TestSystemGCWithG1.java
index 64a090025ac..eeb6bfac5f0 100644
--- a/test/hotspot/jtreg/gc/stress/systemgc/TestSystemGCWithG1.java
+++ b/test/hotspot/jtreg/gc/stress/systemgc/TestSystemGCWithG1.java
@@ -25,7 +25,7 @@
 package gc.stress.systemgc;
 
 /*
- * @test TestSystemGCWithG1
+ * @test id=default
  * @key stress
  * @bug 8190703
  * @library /
@@ -33,6 +33,17 @@
  * @summary Stress the G1 GC full GC by allocating objects of different lifetimes concurrently with System.gc().
  * @run main/othervm/timeout=300 -Xlog:gc*=info -Xmx512m -XX:+UseG1GC gc.stress.systemgc.TestSystemGCWithG1 270
  */
+
+/*
+ * @test id=alt-forwarding
+ * @key stress
+ * @bug 8190703
+ * @library /
+ * @requires vm.gc.G1
+ * @requires (vm.bits == "64")
+ * @summary Stress the G1 GC full GC by allocating objects of different lifetimes concurrently with System.gc().
+ * @run main/othervm/timeout=300 -XX:+UnlockExperimentalVMOptions -XX:+UseAltGCForwarding -Xlog:gc*=info -Xmx512m -XX:+UseG1GC gc.stress.systemgc.TestSystemGCWithG1 270
+ */
 public class TestSystemGCWithG1 {
     public static void main(String[] args) throws Exception {
         TestSystemGC.main(args);
diff --git a/test/hotspot/jtreg/gc/stress/systemgc/TestSystemGCWithSerial.java b/test/hotspot/jtreg/gc/stress/systemgc/TestSystemGCWithSerial.java
index 1db15b76e18..c64459e159b 100644
--- a/test/hotspot/jtreg/gc/stress/systemgc/TestSystemGCWithSerial.java
+++ b/test/hotspot/jtreg/gc/stress/systemgc/TestSystemGCWithSerial.java
@@ -25,7 +25,7 @@
 package gc.stress.systemgc;
 
 /*
- * @test TestSystemGCWithSerial
+ * @test id=default
  * @key stress
  * @bug 8190703
  * @library /
@@ -33,6 +33,37 @@
  * @summary Stress the Serial GC full GC by allocating objects of different lifetimes concurrently with System.gc().
  * @run main/othervm/timeout=300 -Xlog:gc*=info -Xmx512m -XX:+UseSerialGC gc.stress.systemgc.TestSystemGCWithSerial 270
  */
+
+/*
+ * @test id=alt-forwarding
+ * @key stress
+ * @bug 8190703
+ * @library /
+ * @requires vm.gc.Serial
+ * @summary Stress the Serial GC full GC by allocating objects of different lifetimes concurrently with System.gc().
+ * @run main/othervm/timeout=300 -XX:+UnlockExperimentalVMOptions -XX:+UseAltGCForwarding -Xlog:gc*=info -Xmx512m -XX:+UseSerialGC gc.stress.systemgc.TestSystemGCWithSerial 270
+ */
+
+/*
+ * @test id=alt-forwarding-unaligned
+ * @key stress
+ * @bug 8190703
+ * @library /
+ * @requires vm.gc.Serial
+ * @summary Stress the Serial GC full GC by allocating objects of different lifetimes concurrently with System.gc().
+ * @run main/othervm/timeout=300 -XX:+UnlockExperimentalVMOptions -XX:+UseAltGCForwarding -Xlog:gc*=info -Xmx700m -XX:+UseSerialGC gc.stress.systemgc.TestSystemGCWithSerial 270
+ */
+
+/*
+ * @test id=alt-forwarding-large-heap
+ * @key stress
+ * @bug 8190703
+ * @library /
+ * @requires vm.gc.Serial
+ * @requires (vm.bits == "64") & (os.maxMemory >= 6G)
+ * @summary Stress the Serial GC full GC by allocating objects of different lifetimes concurrently with System.gc().
+ * @run main/othervm/timeout=300 -XX:+UnlockExperimentalVMOptions -XX:+UseAltGCForwarding -Xlog:gc*=info -Xmx6g -XX:+UseSerialGC gc.stress.systemgc.TestSystemGCWithSerial 270
+ */
 public class TestSystemGCWithSerial {
     public static void main(String[] args) throws Exception {
         TestSystemGC.main(args);
diff --git a/test/hotspot/jtreg/gc/stress/systemgc/TestSystemGCWithShenandoah.java b/test/hotspot/jtreg/gc/stress/systemgc/TestSystemGCWithShenandoah.java
index 1b12e22b62f..d7322c7e3d2 100644
--- a/test/hotspot/jtreg/gc/stress/systemgc/TestSystemGCWithShenandoah.java
+++ b/test/hotspot/jtreg/gc/stress/systemgc/TestSystemGCWithShenandoah.java
@@ -41,6 +41,23 @@
  *      gc.stress.systemgc.TestSystemGCWithShenandoah 270
  */
 
+/*
+ * @test id=alt-forwarding
+ * @key stress
+ * @library /
+ * @requires vm.gc.Shenandoah
+ * @summary Stress the Shenandoah GC full GC by allocating objects of different lifetimes concurrently with System.gc().
+ *
+ * @run main/othervm/timeout=300 -Xlog:gc*=info -Xmx512m -XX:+UnlockExperimentalVMOptions -XX:+UnlockDiagnosticVMOptions
+ *      -XX:+UseShenandoahGC -XX:+UseAltGCForwarding
+ *      -XX:+ShenandoahVerify
+ *      gc.stress.systemgc.TestSystemGCWithShenandoah 270
+ *
+ * @run main/othervm/timeout=300 -Xlog:gc*=info -Xmx512m -XX:+UnlockExperimentalVMOptions -XX:+UnlockDiagnosticVMOptions
+ *      -XX:+UseShenandoahGC
+ *      gc.stress.systemgc.TestSystemGCWithShenandoah 270
+ */
+
 /*
  * @test id=iu
  * @key stress
diff --git a/test/hotspot/jtreg/gtest/LockStackGtests.java b/test/hotspot/jtreg/gtest/LockStackGtests.java
new file mode 100644
index 00000000000..b51cebdbf98
--- /dev/null
+++ b/test/hotspot/jtreg/gtest/LockStackGtests.java
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2023, 2024, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/* @test
+ * @summary Run LockStack gtests with LockingMode=2
+ * @library /test/lib
+ * @modules java.base/jdk.internal.misc
+ *          java.xml
+ * @requires vm.flagless
+ * @run main/native GTestWrapper --gtest_filter=LockStackTest* -XX:+UnlockExperimentalVMOptions -XX:LockingMode=2
+ */
diff --git a/test/hotspot/jtreg/gtest/NativeHeapTrimmerGtest.java b/test/hotspot/jtreg/gtest/NativeHeapTrimmerGtest.java
new file mode 100644
index 00000000000..7aa3dd8a322
--- /dev/null
+++ b/test/hotspot/jtreg/gtest/NativeHeapTrimmerGtest.java
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2023 Red Hat, Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/* @test
+ * @summary Run a subset of gtests with the native trimmer activated.
+ * @library /test/lib
+ * @modules java.base/jdk.internal.misc
+ *          java.xml
+ * @run main/native GTestWrapper --gtest_filter=os.trim* -Xlog:trimnative -XX:+UnlockExperimentalVMOptions -XX:TrimNativeHeapInterval=100
+ */
diff --git a/test/hotspot/jtreg/runtime/FieldLayout/BaseOffsets.java b/test/hotspot/jtreg/runtime/FieldLayout/BaseOffsets.java
new file mode 100644
index 00000000000..9b521d2d449
--- /dev/null
+++ b/test/hotspot/jtreg/runtime/FieldLayout/BaseOffsets.java
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test id=default
+ * @library /test/lib /
+ * @modules java.base/jdk.internal.misc
+ *          java.management
+ * @build jdk.test.whitebox.WhiteBox
+ * @run driver jdk.test.lib.helpers.ClassFileInstaller jdk.test.whitebox.WhiteBox
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI BaseOffsets
+ */
+/*
+ * @test id=no-coops
+ * @library /test/lib /
+ * @requires vm.bits == "64"
+ * @modules java.base/jdk.internal.misc
+ *          java.management
+ * @build jdk.test.whitebox.WhiteBox
+ * @run driver jdk.test.lib.helpers.ClassFileInstaller jdk.test.whitebox.WhiteBox
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:-UseCompressedOops BaseOffsets
+ */
+
+import java.lang.reflect.Field;
+import java.util.Arrays;
+import java.util.Comparator;
+import jdk.internal.misc.Unsafe;
+
+import jdk.test.lib.Asserts;
+import jdk.test.lib.Platform;
+import jdk.test.whitebox.WhiteBox;
+
+public class BaseOffsets {
+
+    static class LIClass {
+        public int i;
+    }
+
+    static final long INT_OFFSET;
+    static final int  INT_ARRAY_OFFSET;
+    static final int  LONG_ARRAY_OFFSET;
+    static {
+        WhiteBox WB = WhiteBox.getWhiteBox();
+        if (!Platform.is64bit() || WB.getBooleanVMFlag("UseCompactObjectHeaders")) {
+            INT_OFFSET = 8;
+            INT_ARRAY_OFFSET = 12;
+            LONG_ARRAY_OFFSET = 16;
+        } else if (WB.getBooleanVMFlag("UseCompressedClassPointers")) {
+            INT_OFFSET = 12;
+            INT_ARRAY_OFFSET = 16;
+            LONG_ARRAY_OFFSET = 16;
+        } else {
+            INT_OFFSET = 16;
+            INT_ARRAY_OFFSET = 20;
+            LONG_ARRAY_OFFSET = 24;
+        }
+    }
+
+    static public void main(String[] args) {
+        Unsafe unsafe = Unsafe.getUnsafe();
+        Class c = LIClass.class;
+        Field[] fields = c.getFields();
+        for (int i = 0; i < fields.length; i++) {
+            long offset = unsafe.objectFieldOffset(fields[i]);
+            if (fields[i].getType() == int.class) {
+                Asserts.assertEquals(offset, INT_OFFSET, "Misplaced int field");
+            } else {
+                Asserts.fail("Unexpected field type");
+            }
+        }
+
+        Asserts.assertEquals(unsafe.arrayBaseOffset(boolean[].class), INT_ARRAY_OFFSET,  "Misplaced boolean array base");
+        Asserts.assertEquals(unsafe.arrayBaseOffset(byte[].class),    INT_ARRAY_OFFSET,  "Misplaced byte    array base");
+        Asserts.assertEquals(unsafe.arrayBaseOffset(char[].class),    INT_ARRAY_OFFSET,  "Misplaced char    array base");
+        Asserts.assertEquals(unsafe.arrayBaseOffset(short[].class),   INT_ARRAY_OFFSET,  "Misplaced short   array base");
+        Asserts.assertEquals(unsafe.arrayBaseOffset(int[].class),     INT_ARRAY_OFFSET,  "Misplaced int     array base");
+        Asserts.assertEquals(unsafe.arrayBaseOffset(long[].class),    LONG_ARRAY_OFFSET, "Misplaced long    array base");
+        Asserts.assertEquals(unsafe.arrayBaseOffset(float[].class),   INT_ARRAY_OFFSET,  "Misplaced float   array base");
+        Asserts.assertEquals(unsafe.arrayBaseOffset(double[].class),  LONG_ARRAY_OFFSET, "Misplaced double  array base");
+        boolean narrowOops = System.getProperty("java.vm.compressedOopsMode") != null ||
+                             !Platform.is64bit();
+        int expected_objary_offset = narrowOops ? INT_ARRAY_OFFSET : LONG_ARRAY_OFFSET;
+        Asserts.assertEquals(unsafe.arrayBaseOffset(Object[].class),  expected_objary_offset, "Misplaced object  array base");
+    }
+}
diff --git a/test/hotspot/jtreg/runtime/FieldLayout/OldLayoutCheck.java b/test/hotspot/jtreg/runtime/FieldLayout/OldLayoutCheck.java
index a68b0a9efaf..e59d2e0a523 100644
--- a/test/hotspot/jtreg/runtime/FieldLayout/OldLayoutCheck.java
+++ b/test/hotspot/jtreg/runtime/FieldLayout/OldLayoutCheck.java
@@ -25,20 +25,24 @@
  * @test
  * @bug 8239014
  * @summary -XX:-UseEmptySlotsInSupers sometime fails to reproduce the layout of the old code
- * @library /test/lib
+ * @library /test/lib /
  * @modules java.base/jdk.internal.misc
  *          java.management
  * @requires vm.bits == "64" & vm.opt.final.UseCompressedOops == true & vm.gc != "Z"
- * @run main/othervm -XX:+UseCompressedClassPointers -XX:-UseEmptySlotsInSupers OldLayoutCheck
+ * @build jdk.test.whitebox.WhiteBox
+ * @run driver jdk.test.lib.helpers.ClassFileInstaller jdk.test.whitebox.WhiteBox
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:+UseCompressedClassPointers -XX:-UseEmptySlotsInSupers OldLayoutCheck
  */
 
 /*
  * @test
  * @requires vm.bits == "32"
- * @library /test/lib
+ * @library /test/lib /
  * @modules java.base/jdk.internal.misc
  *          java.management
- * @run main/othervm -XX:-UseEmptySlotsInSupers OldLayoutCheck
+ * @build jdk.test.whitebox.WhiteBox
+ * @run driver jdk.test.lib.helpers.ClassFileInstaller jdk.test.whitebox.WhiteBox
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:-UseEmptySlotsInSupers OldLayoutCheck
  */
 
 import java.lang.reflect.Field;
@@ -48,6 +52,7 @@
 
 import jdk.test.lib.Asserts;
 import jdk.test.lib.Platform;
+import jdk.test.whitebox.WhiteBox;
 
 public class OldLayoutCheck {
 
@@ -56,11 +61,18 @@ static class LIClass {
         public int i;
     }
 
-    // 32-bit VMs: @0:  8 byte header,  @8: long field, @16:  int field
-    // 64-bit VMs: @0: 12 byte header, @12:  int field, @16: long field
-    static final long INT_OFFSET  = Platform.is64bit() ? 12L : 16L;
-    static final long LONG_OFFSET = Platform.is64bit() ? 16L :  8L;
-
+    private static final long INT_OFFSET;
+    private static final long LONG_OFFSET;
+    static {
+      WhiteBox WB = WhiteBox.getWhiteBox();
+      if (!Platform.is64bit() || WB.getBooleanVMFlag("UseCompactObjectHeaders")) {
+        INT_OFFSET = 16L;
+        LONG_OFFSET = 8L;
+      } else {
+        INT_OFFSET = 12L;
+        LONG_OFFSET = 16L;
+      }
+    }
     static public void main(String[] args) {
         Unsafe unsafe = Unsafe.getUnsafe();
         Class c = LIClass.class;
diff --git a/test/hotspot/jtreg/runtime/cds/CdsDifferentCompactObjectHeaders.java b/test/hotspot/jtreg/runtime/cds/CdsDifferentCompactObjectHeaders.java
new file mode 100644
index 00000000000..604bfb678ac
--- /dev/null
+++ b/test/hotspot/jtreg/runtime/cds/CdsDifferentCompactObjectHeaders.java
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/**
+ * @test CdsDifferentCompactObjectHeaders
+ * @summary Testing CDS (class data sharing) using opposite compact object header settings.
+ *          Using different compact bject headers setting for each dump/load pair.
+ *          This is a negative test; using compact header setting for loading that
+ *          is different from compact headers for creating a CDS file
+ *          should fail when loading.
+ * @requires vm.cds
+ * @requires vm.bits == 64
+ * @library /test/lib
+ * @run driver CdsDifferentCompactObjectHeaders
+ */
+
+import jdk.test.lib.cds.CDSTestUtils;
+import jdk.test.lib.process.OutputAnalyzer;
+import jdk.test.lib.Platform;
+
+public class CdsDifferentCompactObjectHeaders {
+
+    public static void main(String[] args) throws Exception {
+        createAndLoadSharedArchive(true, false);
+        createAndLoadSharedArchive(false, true);
+    }
+
+    // Parameters are object alignment expressed in bytes
+    private static void
+    createAndLoadSharedArchive(boolean createCompactHeaders, boolean loadCompactHeaders)
+    throws Exception {
+        String createCompactHeadersArg = "-XX:" + (createCompactHeaders ? "+" : "-") + "UseCompactObjectHeaders";
+        String loadCompactHeadersArg   = "-XX:" + (loadCompactHeaders   ? "+" : "-") + "UseCompactObjectHeaders";
+        String expectedErrorMsg =
+            String.format(
+            "The shared archive file's UseCompactObjectHeaders setting (%s)" +
+            " does not equal the current UseCompactObjectHeaders setting (%s)",
+            createCompactHeaders ? "enabled" : "disabled",
+            loadCompactHeaders   ? "enabled" : "disabled");
+
+        CDSTestUtils.createArchiveAndCheck("-XX:+UnlockExperimentalVMOptions", createCompactHeadersArg);
+
+        OutputAnalyzer out = CDSTestUtils.runWithArchive("-Xlog:cds", "-XX:+UnlockExperimentalVMOptions", loadCompactHeadersArg);
+        CDSTestUtils.checkExecExpectError(out, 1, expectedErrorMsg);
+    }
+}
diff --git a/test/hotspot/jtreg/runtime/cds/appcds/TestZGCWithCDS.java b/test/hotspot/jtreg/runtime/cds/appcds/TestZGCWithCDS.java
index 585840c9f2a..05a0d9b6538 100644
--- a/test/hotspot/jtreg/runtime/cds/appcds/TestZGCWithCDS.java
+++ b/test/hotspot/jtreg/runtime/cds/appcds/TestZGCWithCDS.java
@@ -47,6 +47,8 @@ public static void main(String... args) throws Exception {
                                   .dump(helloJar,
                                         new String[] {"Hello"},
                                         "-XX:+UseZGC",
+                                        "-XX:+UnlockExperimentalVMOptions",
+                                        "-XX:-UseCompactObjectHeaders",
                                         "-Xlog:cds");
          out.shouldContain("Dumping shared data to file:");
          out.shouldHaveExitValue(0);
@@ -55,6 +57,8 @@ public static void main(String... args) throws Exception {
          out = TestCommon
                    .exec(helloJar,
                          "-XX:+UseZGC",
+                         "-XX:+UnlockExperimentalVMOptions",
+                         "-XX:-UseCompactObjectHeaders",
                          "-Xlog:cds",
                          "Hello");
          out.shouldContain(HELLO);
@@ -66,6 +70,8 @@ public static void main(String... args) throws Exception {
                          "-XX:-UseZGC",
                          "-XX:+UseCompressedOops",           // in case turned off by vmoptions
                          "-XX:+UseCompressedClassPointers",  // by jtreg
+                         "-XX:+UnlockExperimentalVMOptions",
+                         "-XX:-UseCompactObjectHeaders",
                          "-Xlog:cds",
                          "Hello");
          out.shouldContain(UNABLE_TO_USE_ARCHIVE);
@@ -78,6 +84,8 @@ public static void main(String... args) throws Exception {
                          "-XX:+UseSerialGC",
                          "-XX:-UseCompressedOops",
                          "-XX:-UseCompressedClassPointers",
+                         "-XX:+UnlockExperimentalVMOptions",
+                         "-XX:-UseCompactObjectHeaders",
                          "-Xlog:cds",
                          "Hello");
          out.shouldContain(UNABLE_TO_USE_ARCHIVE);
@@ -90,6 +98,8 @@ public static void main(String... args) throws Exception {
                          "-XX:+UseSerialGC",
                          "-XX:-UseCompressedOops",
                          "-XX:+UseCompressedClassPointers",
+                         "-XX:+UnlockExperimentalVMOptions",
+                         "-XX:-UseCompactObjectHeaders",
                          "-Xlog:cds",
                          "Hello");
          out.shouldContain(HELLO);
@@ -101,6 +111,8 @@ public static void main(String... args) throws Exception {
                          "-XX:+UseSerialGC",
                          "-XX:+UseCompressedOops",
                          "-XX:-UseCompressedClassPointers",
+                         "-XX:+UnlockExperimentalVMOptions",
+                         "-XX:-UseCompactObjectHeaders",
                          "-Xlog:cds",
                          "Hello");
          out.shouldContain(UNABLE_TO_USE_ARCHIVE);
@@ -113,6 +125,8 @@ public static void main(String... args) throws Exception {
                          "-XX:+UseSerialGC",
                          "-XX:+UseCompressedOops",
                          "-XX:+UseCompressedClassPointers",
+                         "-XX:+UnlockExperimentalVMOptions",
+                         "-XX:-UseCompactObjectHeaders",
                          "-Xlog:cds",
                          "Hello");
          out.shouldContain(UNABLE_TO_USE_ARCHIVE);
@@ -126,6 +140,8 @@ public static void main(String... args) throws Exception {
                          "-XX:+UseSerialGC",
                          "-XX:-UseCompressedOops",
                          "-XX:+UseCompressedClassPointers",
+                         "-XX:+UnlockExperimentalVMOptions",
+                         "-XX:-UseCompactObjectHeaders",
                          "-Xlog:cds");
          out.shouldContain("Dumping shared data to file:");
          out.shouldHaveExitValue(0);
@@ -134,6 +150,8 @@ public static void main(String... args) throws Exception {
          out = TestCommon
                    .exec(helloJar,
                          "-XX:+UseZGC",
+                         "-XX:+UnlockExperimentalVMOptions",
+                         "-XX:-UseCompactObjectHeaders",
                          "-Xlog:cds",
                          "Hello");
          out.shouldContain(HELLO);
diff --git a/test/hotspot/jtreg/runtime/cds/appcds/jcmd/JCmdTestDumpBase.java b/test/hotspot/jtreg/runtime/cds/appcds/jcmd/JCmdTestDumpBase.java
index 563e0698479..27b794e178a 100644
--- a/test/hotspot/jtreg/runtime/cds/appcds/jcmd/JCmdTestDumpBase.java
+++ b/test/hotspot/jtreg/runtime/cds/appcds/jcmd/JCmdTestDumpBase.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -63,7 +63,8 @@ public static void runTest(JCmdTest t) throws Exception {
     private static final String TEST_CLASSES[] =
                              {"JCmdTestLingeredApp",
                               "jdk/test/lib/apps/LingeredApp",
-                              "jdk/test/lib/apps/LingeredApp$1"};
+                              "jdk/test/lib/apps/LingeredApp$1",
+                              "jdk/test/lib/apps/LingeredApp$SteadyStateLock"};
     private static final String BOOT_CLASSES[] = {"Hello"};
 
     protected static String testJar = null;
diff --git a/test/hotspot/jtreg/runtime/lockStack/TestLockStackCapacity.java b/test/hotspot/jtreg/runtime/lockStack/TestLockStackCapacity.java
new file mode 100644
index 00000000000..61c78e68322
--- /dev/null
+++ b/test/hotspot/jtreg/runtime/lockStack/TestLockStackCapacity.java
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/*
+ * @test TestLockStackCapacity
+ * @summary Tests the interaction between recursive lightweight locking and
+ *          when the lock stack capacity is exceeded.
+ * @requires vm.flagless
+ * @library /testlibrary /test/lib
+ * @build jdk.test.whitebox.WhiteBox
+ * @run driver jdk.test.lib.helpers.ClassFileInstaller jdk.test.whitebox.WhiteBox
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -Xint -XX:+UnlockExperimentalVMOptions -XX:LockingMode=2 TestLockStackCapacity
+ */
+
+import jdk.test.lib.Asserts;
+import jdk.test.whitebox.WhiteBox;
+import jtreg.SkippedException;
+
+public class TestLockStackCapacity {
+    static final WhiteBox WB = WhiteBox.getWhiteBox();
+    static final int LockingMode = WB.getIntVMFlag("LockingMode").intValue();
+    static final int LM_LIGHTWEIGHT = 2;
+
+    static class SynchronizedObject {
+        static final SynchronizedObject OUTER = new SynchronizedObject();
+        static final SynchronizedObject INNER = new SynchronizedObject();
+        static final int LockStackCapacity = WB.getLockStackCapacity();
+
+        synchronized void runInner(int depth) {
+            assertNotInflated();
+            if (depth == 1) {
+                return;
+            } else {
+                runInner(depth - 1);
+            }
+            assertNotInflated();
+        }
+
+        synchronized void runOuter(int depth, SynchronizedObject inner) {
+            assertNotInflated();
+            if (depth == 1) {
+                inner.runInner(LockStackCapacity);
+            } else {
+                runOuter(depth - 1, inner);
+            }
+            assertInflated();
+        }
+
+        public static void runTest() {
+            // Test Requires a capacity of at least 2.
+            Asserts.assertGTE(LockStackCapacity, 2);
+
+            // Just checking
+            OUTER.assertNotInflated();
+            INNER.assertNotInflated();
+
+            synchronized(OUTER) {
+                OUTER.assertNotInflated();
+                INNER.assertNotInflated();
+                OUTER.runOuter(LockStackCapacity - 1, INNER);
+
+                OUTER.assertInflated();
+                INNER.assertNotInflated();
+            }
+        }
+
+        void assertNotInflated() {
+            Asserts.assertFalse(WB.isMonitorInflated(this));
+        }
+
+        void assertInflated() {
+            Asserts.assertTrue(WB.isMonitorInflated(this));
+        }
+    }
+
+    public static void main(String... args) throws Exception {
+        if (LockingMode != LM_LIGHTWEIGHT) {
+            throw new SkippedException("Test only valid for LM_LIGHTWEIGHT");
+        }
+
+        if (!WB.supportsRecursiveLightweightLocking()) {
+            throw new SkippedException("Test only valid if LM_LIGHTWEIGHT supports recursion");
+        }
+
+        SynchronizedObject.runTest();
+    }
+}
diff --git a/test/hotspot/jtreg/runtime/lockStack/TestStackWalk.java b/test/hotspot/jtreg/runtime/lockStack/TestStackWalk.java
new file mode 100644
index 00000000000..3bac0ce3375
--- /dev/null
+++ b/test/hotspot/jtreg/runtime/lockStack/TestStackWalk.java
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/*
+ * @test
+ * @bug 8317262
+ * @library /testlibrary /test/lib
+ * @build jdk.test.whitebox.WhiteBox
+ * @run driver jdk.test.lib.helpers.ClassFileInstaller jdk.test.whitebox.WhiteBox
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:+HandshakeALot -XX:GuaranteedSafepointInterval=1 TestStackWalk
+ */
+
+import jdk.test.lib.Asserts;
+import jdk.test.whitebox.WhiteBox;
+import java.util.concurrent.CountDownLatch;
+
+public class TestStackWalk {
+    static Thread worker1;
+    static Thread worker2;
+    static volatile boolean done;
+    static volatile int counter = 0;
+    static Object lock = new Object();
+
+    public static void main(String... args) throws Exception {
+        worker1 = new Thread(() -> syncedWorker());
+        worker1.start();
+        worker2 = new Thread(() -> syncedWorker());
+        worker2.start();
+        Thread worker3 = new Thread(() -> stackWalker());
+        worker3.start();
+
+        worker1.join();
+        worker2.join();
+        worker3.join();
+    }
+
+    public static void syncedWorker() {
+        synchronized (lock) {
+            while (!done) {
+                counter++;
+            }
+        }
+    }
+
+    public static void stackWalker() {
+        // Suspend workers so the one looping waiting for "done"
+        // doesn't execute the handshake below, increasing the
+        // chances the VMThread will do it.
+        suspendWorkers();
+
+        WhiteBox wb = WhiteBox.getWhiteBox();
+        long end = System.currentTimeMillis() + 20000;
+        while (end > System.currentTimeMillis()) {
+            wb.handshakeWalkStack(worker1, false /* all_threads */);
+            wb.handshakeWalkStack(worker2, false /* all_threads */);
+        }
+
+        resumeWorkers();
+        done = true;
+    }
+
+    static void suspendWorkers() {
+        worker1.suspend();
+        worker2.suspend();
+    }
+
+    static void resumeWorkers() {
+        worker1.resume();
+        worker2.resume();
+    }
+}
diff --git a/test/hotspot/jtreg/runtime/logging/MonitorInflationTest.java b/test/hotspot/jtreg/runtime/logging/MonitorInflationTest.java
index 568055617f9..c29131a01be 100644
--- a/test/hotspot/jtreg/runtime/logging/MonitorInflationTest.java
+++ b/test/hotspot/jtreg/runtime/logging/MonitorInflationTest.java
@@ -37,7 +37,7 @@
 public class MonitorInflationTest {
     static void analyzeOutputOn(ProcessBuilder pb) throws Exception {
         OutputAnalyzer output = new OutputAnalyzer(pb.start());
-        output.shouldContain("inflate(has_locker):");
+        output.shouldContain("inflate:");
         output.shouldContain("type='MonitorInflationTest$Waiter'");
         output.shouldContain("I've been waiting.");
         output.shouldHaveExitValue(0);
diff --git a/test/hotspot/jtreg/runtime/oom/TestOOM.java b/test/hotspot/jtreg/runtime/oom/TestOOM.java
new file mode 100644
index 00000000000..6ab5e5f0c0d
--- /dev/null
+++ b/test/hotspot/jtreg/runtime/oom/TestOOM.java
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8303027
+ * @requires vm.bits == "64"
+ * @summary Test that we're failing with OOME and not with VM crash
+ * @run main/othervm -Xmx1g -XX:-UseCompressedOops TestOOM
+ */
+/*
+ * @test
+ * @bug 8303027
+ * @requires vm.bits == "32"
+ * @summary Test that we're failing with OOME and not with VM crash
+ * @run main/othervm -Xmx1g TestOOM
+ */
+public class TestOOM {
+    public static void main(String[] args) {
+        // Test that it exits with OOME and not with VM crash.
+        try {
+            LinkedInsanity previous = null;
+            while (true) {
+                previous = new LinkedInsanity(previous);
+            }
+        } catch (OutOfMemoryError e) {
+            // That's expected
+        }
+    }
+
+    private static class LinkedInsanity {
+        private final LinkedInsanity previous;
+        private final int[] padding = new int[64000];
+
+        public LinkedInsanity(LinkedInsanity previous) {
+            this.previous = previous;
+        }
+    }
+}
diff --git a/test/hotspot/jtreg/runtime/os/TestTrimNative.java b/test/hotspot/jtreg/runtime/os/TestTrimNative.java
new file mode 100644
index 00000000000..50b6e561734
--- /dev/null
+++ b/test/hotspot/jtreg/runtime/os/TestTrimNative.java
@@ -0,0 +1,309 @@
+/*
+ * Copyright (c) 2023 SAP SE. All rights reserved.
+ * Copyright (c) 2023 Red Hat, Inc. All rights reserved.
+ * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/*
+ * @test id=trimNative
+ * @requires (os.family=="linux") & !vm.musl
+ * @modules java.base/jdk.internal.misc
+ * @library /test/lib
+ * @build jdk.test.whitebox.WhiteBox
+ * @run driver jdk.test.lib.helpers.ClassFileInstaller jdk.test.whitebox.WhiteBox
+ * @run driver TestTrimNative trimNative
+ */
+
+/*
+ * @test id=trimNativeHighInterval
+ * @summary High interval trimming should not even kick in for short program runtimes
+ * @requires (os.family=="linux") & !vm.musl
+ * @modules java.base/jdk.internal.misc
+ * @library /test/lib
+ * @build jdk.test.whitebox.WhiteBox
+ * @run driver jdk.test.lib.helpers.ClassFileInstaller jdk.test.whitebox.WhiteBox
+ * @run driver TestTrimNative trimNativeHighInterval
+ */
+
+/*
+ * @test id=trimNativeLowInterval
+ * @summary Very low (sub-second) interval, nothing should explode
+ * @requires (os.family=="linux") & !vm.musl
+ * @modules java.base/jdk.internal.misc
+ * @library /test/lib
+ * @build jdk.test.whitebox.WhiteBox
+ * @run driver jdk.test.lib.helpers.ClassFileInstaller jdk.test.whitebox.WhiteBox
+ * @run driver TestTrimNative trimNativeLowInterval
+ */
+
+/*
+ * @test id=testOffByDefault
+ * @summary Test that trimming is disabled by default
+ * @requires (os.family=="linux") & !vm.musl
+ * @modules java.base/jdk.internal.misc
+ * @library /test/lib
+ * @build jdk.test.whitebox.WhiteBox
+ * @run driver jdk.test.lib.helpers.ClassFileInstaller jdk.test.whitebox.WhiteBox
+ * @run driver TestTrimNative testOffByDefault
+ */
+
+/*
+ * @test id=testOffExplicit
+ * @summary Test that trimming can be disabled explicitly
+ * @requires (os.family=="linux") & !vm.musl
+ * @modules java.base/jdk.internal.misc
+ * @library /test/lib
+ * @build jdk.test.whitebox.WhiteBox
+ * @run driver jdk.test.lib.helpers.ClassFileInstaller jdk.test.whitebox.WhiteBox
+ * @run driver TestTrimNative testOffExplicit
+ */
+
+/*
+ * @test id=testOffOnNonCompliantPlatforms
+ * @summary Test that trimming is correctly reported as unavailable if unavailable
+ * @requires (os.family!="linux") | vm.musl
+ * @modules java.base/jdk.internal.misc
+ * @library /test/lib
+ * @build jdk.test.whitebox.WhiteBox
+ * @run driver jdk.test.lib.helpers.ClassFileInstaller jdk.test.whitebox.WhiteBox
+ * @run driver TestTrimNative testOffOnNonCompliantPlatforms
+ */
+
+import jdk.test.lib.Platform;
+import jdk.test.lib.process.OutputAnalyzer;
+import jdk.test.lib.process.ProcessTools;
+
+import java.io.IOException;
+import java.util.*;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import jdk.test.whitebox.WhiteBox;
+
+public class TestTrimNative {
+
+    // Actual RSS increase is a lot larger than 4 MB. Depends on glibc overhead, and NMT malloc headers in debug VMs.
+    // We need small-grained allocations to make sure they actually increase RSS (all touched) and to see the
+    // glibc-retaining-memory effect.
+    static final int szAllocations = 128;
+    static final int totalAllocationsSize = 128 * 1024 * 1024; // 128 MB total
+    static final int numAllocations = totalAllocationsSize / szAllocations;
+
+    static long[] ptrs = new long[numAllocations];
+
+    enum Unit {
+        B(1), K(1024), M(1024*1024), G(1024*1024*1024);
+        public final long size;
+        Unit(long size) { this.size = size; }
+    }
+
+    private static String[] prepareOptions(String[] extraVMOptions, String[] programOptions) {
+        List<String> allOptions = new ArrayList<String>();
+        if (extraVMOptions != null) {
+            allOptions.addAll(Arrays.asList(extraVMOptions));
+        }
+        allOptions.add("-Xmx128m");
+        allOptions.add("-Xms128m"); // Stabilize RSS
+        allOptions.add("-XX:+AlwaysPreTouch"); // Stabilize RSS
+        allOptions.add("-XX:+UnlockDiagnosticVMOptions"); // For whitebox
+        allOptions.add("-XX:+WhiteBoxAPI");
+        allOptions.add("-Xbootclasspath/a:.");
+        allOptions.add("-XX:-ExplicitGCInvokesConcurrent"); // Invoke explicit GC on System.gc
+        allOptions.add("-Xlog:trimnative=debug");
+        allOptions.add("--add-exports=java.base/jdk.internal.misc=ALL-UNNAMED");
+        if (programOptions != null) {
+            allOptions.addAll(Arrays.asList(programOptions));
+        }
+        return allOptions.toArray(new String[0]);
+    }
+
+    private static OutputAnalyzer runTestWithOptions(String[] extraOptions, String[] programOptions) throws IOException {
+        ProcessBuilder pb = ProcessTools.createJavaProcessBuilder(prepareOptions(extraOptions, programOptions));
+        OutputAnalyzer output = new OutputAnalyzer(pb.start());
+        output.shouldHaveExitValue(0);
+        return output;
+    }
+
+    private static void checkExpectedLogMessages(OutputAnalyzer output, boolean expectEnabled,
+                                                 int expectedInterval) {
+        if (expectEnabled) {
+            output.shouldContain("Periodic native trim enabled (interval: " + expectedInterval + " ms");
+            output.shouldContain("Native heap trimmer start");
+            output.shouldContain("Native heap trimmer stop");
+        } else {
+            output.shouldNotContain("Periodic native trim enabled");
+        }
+    }
+
+    /**
+     * Given JVM output, look for one or more log lines that describes a successful negative trim. The total amount
+     * of trims should be matching about what the test program allocated.
+     * @param output
+     * @param minTrimsExpected min number of periodic trim lines expected in UL log
+     * @param maxTrimsExpected min number of periodic trim lines expected in UL log
+     */
+    private static void parseOutputAndLookForNegativeTrim(OutputAnalyzer output, int minTrimsExpected,
+                                                          int maxTrimsExpected) {
+        output.reportDiagnosticSummary();
+        List<String> lines = output.asLines();
+        Pattern pat = Pattern.compile(".*\\[trimnative\\] Periodic Trim \\(\\d+\\): (\\d+)([BKMG])->(\\d+)([BKMG]).*");
+        int numTrimsFound = 0;
+        long rssReductionTotal = 0;
+        for (String line : lines) {
+            Matcher mat = pat.matcher(line);
+            if (mat.matches()) {
+                long rss1 = Long.parseLong(mat.group(1)) * Unit.valueOf(mat.group(2)).size;
+                long rss2 = Long.parseLong(mat.group(3)) * Unit.valueOf(mat.group(4)).size;
+                if (rss1 > rss2) {
+                    rssReductionTotal += (rss1 - rss2);
+                }
+                numTrimsFound ++;
+            }
+            if (numTrimsFound > maxTrimsExpected) {
+                throw new RuntimeException("Abnormal high number of periodic trim attempts found (more than " + maxTrimsExpected +
+                        "). Does the interval setting not work?");
+            }
+        }
+        if (numTrimsFound < minTrimsExpected) {
+            throw new RuntimeException("We found fewer (periodic) trim lines in UL log than expected (expected at least " + minTrimsExpected +
+                    ", found " + numTrimsFound + ").");
+        }
+        if (maxTrimsExpected > 0) {
+            // This is very fuzzy. Test program malloced X bytes, then freed them again and trimmed. But the log line prints change in RSS.
+            // Which, of course, is influenced by a lot of other factors. But we expect to see *some* reasonable reduction in RSS
+            // due to trimming.
+            float fudge = 0.5f;
+            // On ppc, we see a vastly diminished return (~3M reduction instead of ~200), I suspect because of the underlying
+            // 64k pages lead to a different geometry. Manual tests with larger reclaim sizes show that autotrim works. For
+            // this test, we just reduce the fudge factor.
+            if (Platform.isPPC()) { // le and be both
+                fudge = 0.01f;
+            }
+            long expectedMinimalReduction = (long) (totalAllocationsSize * fudge);
+            if (rssReductionTotal < expectedMinimalReduction) {
+                throw new RuntimeException("We did not see the expected RSS reduction in the UL log. Expected (with fudge)" +
+                        " to see at least a combined reduction of " + expectedMinimalReduction + ".");
+            }
+        }
+    }
+
+    static class Tester {
+        public static void main(String[] args) throws Exception {
+            long sleeptime = Long.parseLong(args[0]);
+
+            System.out.println("Will spike now...");
+            WhiteBox wb = WhiteBox.getWhiteBox();
+            for (int i = 0; i < numAllocations; i++) {
+                ptrs[i] = wb.NMTMalloc(szAllocations);
+                wb.preTouchMemory(ptrs[i], szAllocations);
+            }
+            for (int i = 0; i < numAllocations; i++) {
+                wb.NMTFree(ptrs[i]);
+            }
+            System.out.println("Done spiking.");
+
+            System.out.println("GC...");
+            System.gc();
+
+            // give GC time to react
+            System.out.println("Sleeping...");
+            Thread.sleep(sleeptime);
+            System.out.println("Done.");
+        }
+    }
+
+    public static void main(String[] args) throws Exception {
+
+        if (args.length == 0) {
+            throw new RuntimeException("Argument error");
+        }
+
+        switch (args[0]) {
+            case "trimNative": {
+                long trimInterval = 500; // twice per second
+                long ms1 = System.currentTimeMillis();
+                OutputAnalyzer output = runTestWithOptions(
+                        new String[] { "-XX:+UnlockExperimentalVMOptions", "-XX:TrimNativeHeapInterval=" + trimInterval },
+                        new String[] { TestTrimNative.Tester.class.getName(), "5000" }
+                );
+                long ms2 = System.currentTimeMillis();
+                long runtime_ms = ms2 - ms1;
+
+                checkExpectedLogMessages(output, true, 500);
+
+                long maxTrimsExpected = runtime_ms / trimInterval;
+                long minTrimsExpected = maxTrimsExpected / 2;
+                parseOutputAndLookForNegativeTrim(output, (int) minTrimsExpected, (int) maxTrimsExpected);
+            } break;
+
+            case "trimNativeHighInterval": {
+                OutputAnalyzer output = runTestWithOptions(
+                        new String[] { "-XX:+UnlockExperimentalVMOptions", "-XX:TrimNativeHeapInterval=" + Integer.MAX_VALUE },
+                        new String[] { TestTrimNative.Tester.class.getName(), "5000" }
+                );
+                checkExpectedLogMessages(output, true, Integer.MAX_VALUE);
+                // We should not see any trims since the interval would prevent them
+                parseOutputAndLookForNegativeTrim(output, 0, 0);
+            } break;
+
+            case "trimNativeLowInterval": {
+                OutputAnalyzer output = runTestWithOptions(
+                        new String[] { "-XX:+UnlockExperimentalVMOptions", "-XX:TrimNativeHeapInterval=1" },
+                        new String[] { TestTrimNative.Tester.class.getName(), "0" }
+                );
+                checkExpectedLogMessages(output, true, 1);
+                parseOutputAndLookForNegativeTrim(output, 1, 3000);
+            } break;
+
+            case "testOffOnNonCompliantPlatforms": {
+                OutputAnalyzer output = runTestWithOptions(
+                        new String[] { "-XX:+UnlockExperimentalVMOptions", "-XX:TrimNativeHeapInterval=1" },
+                        new String[] { "-version" }
+                );
+                checkExpectedLogMessages(output, false, 0);
+                parseOutputAndLookForNegativeTrim(output, 0, 0);
+                // The following output is expected to be printed with warning level, so it should not need -Xlog
+                output.shouldContain("[warning][trimnative] Native heap trim is not supported on this platform");
+            } break;
+
+            case "testOffExplicit": {
+                OutputAnalyzer output = runTestWithOptions(
+                        new String[] { "-XX:+UnlockExperimentalVMOptions", "-XX:TrimNativeHeapInterval=0" },
+                        new String[] { "-version" }
+                );
+                checkExpectedLogMessages(output, false, 0);
+                parseOutputAndLookForNegativeTrim(output, 0, 0);
+            } break;
+
+            case "testOffByDefault": {
+                OutputAnalyzer output = runTestWithOptions(null, new String[] { "-version" } );
+                checkExpectedLogMessages(output, false, 0);
+                parseOutputAndLookForNegativeTrim(output, 0, 0);
+            } break;
+
+            default:
+                throw new RuntimeException("Invalid test " + args[0]);
+
+        }
+    }
+}
diff --git a/test/hotspot/jtreg/runtime/whitebox/TestWBDeflateIdleMonitors.java b/test/hotspot/jtreg/runtime/whitebox/TestWBDeflateIdleMonitors.java
index e2b85484fc8..9c5dbff664a 100644
--- a/test/hotspot/jtreg/runtime/whitebox/TestWBDeflateIdleMonitors.java
+++ b/test/hotspot/jtreg/runtime/whitebox/TestWBDeflateIdleMonitors.java
@@ -61,12 +61,11 @@ public static class InflateMonitorsTest {
         static WhiteBox wb = WhiteBox.getWhiteBox();
         public static Object obj;
 
-        public static void main(String args[]) {
+        public static void main(String args[]) throws Exception {
             obj = new Object();
             synchronized (obj) {
-                // HotSpot implementation detail: asking for the hash code
-                // when the object is locked causes monitor inflation.
-                if (obj.hashCode() == 0xBAD) System.out.println("!");
+                // The current implementation of notify-wait requires inflation.
+                obj.wait(1);
                 Asserts.assertEQ(wb.isMonitorInflated(obj), true,
                                  "Monitor should be inflated.");
             }
diff --git a/test/hotspot/jtreg/serviceability/dcmd/vm/TrimLibcHeapTest.java b/test/hotspot/jtreg/serviceability/dcmd/vm/TrimLibcHeapTest.java
index 2688c8e8fe7..75cfcdfaf24 100644
--- a/test/hotspot/jtreg/serviceability/dcmd/vm/TrimLibcHeapTest.java
+++ b/test/hotspot/jtreg/serviceability/dcmd/vm/TrimLibcHeapTest.java
@@ -22,6 +22,7 @@
  * questions.
  */
 
+import jdk.test.lib.Platform;
 import org.testng.annotations.Test;
 import jdk.test.lib.dcmd.CommandExecutor;
 import jdk.test.lib.dcmd.JMXExecutor;
@@ -31,7 +32,7 @@
  * @test
  * @summary Test of diagnostic command VM.trim_libc_heap
  * @library /test/lib
- * @requires os.family == "linux"
+ * @requires (os.family=="linux") & !vm.musl
  * @modules java.base/jdk.internal.misc
  *          java.compiler
  *          java.management
@@ -42,9 +43,10 @@ public class TrimLibcHeapTest {
     public void run(CommandExecutor executor) {
         OutputAnalyzer output = executor.execute("System.trim_native_heap");
         output.reportDiagnosticSummary();
-        output.shouldMatch("(Done|Not available)"); // Not available could happen on Linux + non-glibc (eg. muslc)
-        if (output.firstMatch("Done") != null) {
-            output.shouldMatch("(Virtual size before|RSS before|Swap before|No details available)");
+        if (Platform.isMusl()) {
+            output.shouldContain("Not available");
+        } else {
+            output.shouldMatch("Trim native heap: RSS\\+Swap: \\d+[BKMG]->\\d+[BKMG] \\(-\\d+[BKMG]\\)");
         }
     }
 
diff --git a/test/hotspot/jtreg/serviceability/sa/TestObjectMonitorIterate.java b/test/hotspot/jtreg/serviceability/sa/TestObjectMonitorIterate.java
index e2d79b1ce9d..a4b13a3fcbf 100644
--- a/test/hotspot/jtreg/serviceability/sa/TestObjectMonitorIterate.java
+++ b/test/hotspot/jtreg/serviceability/sa/TestObjectMonitorIterate.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, 2022, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2021, 2024, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2021, NTT DATA.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -82,6 +82,7 @@ private static void createAnotherToAttach(long lingeredAppPid) throws Exception
              Long.toString(lingeredAppPid));
         SATestUtils.addPrivilegesIfNeeded(processBuilder);
         OutputAnalyzer SAOutput = ProcessTools.executeProcess(processBuilder);
+        SAOutput.shouldContain("SteadyStateLock");
         SAOutput.shouldHaveExitValue(0);
         System.out.println(SAOutput.getOutput());
     }
diff --git a/test/jdk/com/sun/jdi/EATests.java b/test/jdk/com/sun/jdi/EATests.java
index 7aed62f0a70..cb8200ea252 100644
--- a/test/jdk/com/sun/jdi/EATests.java
+++ b/test/jdk/com/sun/jdi/EATests.java
@@ -42,6 +42,7 @@
  *                 -XX:+WhiteBoxAPI
  *                 -Xbatch
  *                 -XX:+DoEscapeAnalysis -XX:+EliminateAllocations -XX:+EliminateLocks -XX:+EliminateNestedLocks -XX:+UseBiasedLocking
+ *                 -XX:+UnlockExperimentalVMOptions -XX:LockingMode=1
  * @run driver EATests
  *                 -XX:+UnlockDiagnosticVMOptions
  *                 -Xms256m -Xmx256m
@@ -50,6 +51,7 @@
  *                 -XX:+WhiteBoxAPI
  *                 -Xbatch
  *                 -XX:+DoEscapeAnalysis -XX:+EliminateAllocations -XX:-EliminateLocks -XX:+EliminateNestedLocks -XX:+UseBiasedLocking -XX:-UseOptoBiasInlining
+ *                 -XX:+UnlockExperimentalVMOptions -XX:LockingMode=1
  * @run driver EATests
  *                 -XX:+UnlockDiagnosticVMOptions
  *                 -Xms256m -Xmx256m
@@ -58,6 +60,7 @@
  *                 -XX:+WhiteBoxAPI
  *                 -Xbatch
  *                 -XX:+DoEscapeAnalysis -XX:-EliminateAllocations -XX:+EliminateLocks -XX:+EliminateNestedLocks -XX:+UseBiasedLocking
+ *                 -XX:+UnlockExperimentalVMOptions -XX:LockingMode=1
  * @run driver EATests
  *                 -XX:+UnlockDiagnosticVMOptions
  *                 -Xms256m -Xmx256m
@@ -66,6 +69,7 @@
  *                 -XX:+WhiteBoxAPI
  *                 -Xbatch
  *                 -XX:-DoEscapeAnalysis -XX:-EliminateAllocations -XX:+EliminateLocks -XX:+EliminateNestedLocks -XX:+UseBiasedLocking
+ *                 -XX:+UnlockExperimentalVMOptions -XX:LockingMode=1
  * @run driver EATests
  *                 -XX:+UnlockDiagnosticVMOptions
  *                 -Xms256m -Xmx256m
@@ -74,6 +78,7 @@
  *                 -XX:+WhiteBoxAPI
  *                 -Xbatch
  *                 -XX:+DoEscapeAnalysis -XX:+EliminateAllocations -XX:+EliminateLocks -XX:+EliminateNestedLocks -XX:-UseBiasedLocking
+ *                 -XX:+UnlockExperimentalVMOptions -XX:LockingMode=1
  * @run driver EATests
  *                 -XX:+UnlockDiagnosticVMOptions
  *                 -Xms256m -Xmx256m
@@ -82,6 +87,7 @@
  *                 -XX:+WhiteBoxAPI
  *                 -Xbatch
  *                 -XX:+DoEscapeAnalysis -XX:-EliminateAllocations -XX:+EliminateLocks -XX:+EliminateNestedLocks -XX:-UseBiasedLocking
+ *                 -XX:+UnlockExperimentalVMOptions -XX:LockingMode=1
  * @run driver EATests
  *                 -XX:+UnlockDiagnosticVMOptions
  *                 -Xms256m -Xmx256m
@@ -90,6 +96,44 @@
  *                 -XX:+WhiteBoxAPI
  *                 -Xbatch
  *                 -XX:-DoEscapeAnalysis -XX:-EliminateAllocations -XX:+EliminateLocks -XX:+EliminateNestedLocks -XX:-UseBiasedLocking
+ *                 -XX:+UnlockExperimentalVMOptions -XX:LockingMode=1
+ *
+ * @run driver EATests
+ *                 -XX:+UnlockDiagnosticVMOptions
+ *                 -Xms256m -Xmx256m
+ *                 -Xbootclasspath/a:.
+ *                 -XX:CompileCommand=dontinline,*::dontinline_*
+ *                 -XX:+WhiteBoxAPI
+ *                 -Xbatch
+ *                 -XX:+DoEscapeAnalysis -XX:+EliminateAllocations -XX:+EliminateLocks -XX:+EliminateNestedLocks
+ *                 -XX:+UnlockExperimentalVMOptions -XX:LockingMode=2
+ * @run driver EATests
+ *                 -XX:+UnlockDiagnosticVMOptions
+ *                 -Xms256m -Xmx256m
+ *                 -Xbootclasspath/a:.
+ *                 -XX:CompileCommand=dontinline,*::dontinline_*
+ *                 -XX:+WhiteBoxAPI
+ *                 -Xbatch
+ *                 -XX:+DoEscapeAnalysis -XX:+EliminateAllocations -XX:-EliminateLocks -XX:+EliminateNestedLocks
+ *                 -XX:+UnlockExperimentalVMOptions -XX:LockingMode=2
+ * @run driver EATests
+ *                 -XX:+UnlockDiagnosticVMOptions
+ *                 -Xms256m -Xmx256m
+ *                 -Xbootclasspath/a:.
+ *                 -XX:CompileCommand=dontinline,*::dontinline_*
+ *                 -XX:+WhiteBoxAPI
+ *                 -Xbatch
+ *                 -XX:+DoEscapeAnalysis -XX:-EliminateAllocations -XX:+EliminateLocks -XX:+EliminateNestedLocks
+ *                 -XX:+UnlockExperimentalVMOptions -XX:LockingMode=2
+ * @run driver EATests
+ *                 -XX:+UnlockDiagnosticVMOptions
+ *                 -Xms256m -Xmx256m
+ *                 -Xbootclasspath/a:.
+ *                 -XX:CompileCommand=dontinline,*::dontinline_*
+ *                 -XX:+WhiteBoxAPI
+ *                 -Xbatch
+ *                 -XX:-DoEscapeAnalysis -XX:-EliminateAllocations -XX:+EliminateLocks -XX:+EliminateNestedLocks
+ *                 -XX:+UnlockExperimentalVMOptions -XX:LockingMode=2
  *
  * @comment Excercise -XX:+DeoptimizeObjectsALot. Mostly to prevent bit-rot because the option is meant to stress object deoptimization
  *          with non-synthetic workloads.
@@ -103,7 +147,46 @@
  *                 -XX:-DoEscapeAnalysis -XX:-EliminateAllocations -XX:+EliminateLocks -XX:+EliminateNestedLocks -XX:-UseBiasedLocking
  *                 -XX:+IgnoreUnrecognizedVMOptions -XX:+DeoptimizeObjectsALot
  *
+ * @bug 8324881
+ * @comment Regression test for using the wrong thread when logging during re-locking from deoptimization.
+ *
+ * @comment DiagnoseSyncOnValueBasedClasses=2 will cause logging when locking on \@ValueBased objects.
+ * @run driver EATests
+ *                 -XX:+UnlockDiagnosticVMOptions
+ *                 -Xms256m -Xmx256m
+ *                 -Xbootclasspath/a:.
+ *                 -XX:CompileCommand=dontinline,*::dontinline_*
+ *                 -XX:+WhiteBoxAPI
+ *                 -Xbatch
+ *                 -XX:+DoEscapeAnalysis -XX:+EliminateAllocations -XX:+EliminateLocks -XX:+EliminateNestedLocks
+ *                 -XX:+UnlockExperimentalVMOptions -XX:LockingMode=1
+ *                 -XX:DiagnoseSyncOnValueBasedClasses=2
+ *
+ * @comment Re-lock may inflate monitors when re-locking, which cause monitorinflation trace logging.
+ * @run driver EATests
+ *                 -XX:+UnlockDiagnosticVMOptions
+ *                 -Xms256m -Xmx256m
+ *                 -Xbootclasspath/a:.
+ *                 -XX:CompileCommand=dontinline,*::dontinline_*
+ *                 -XX:+WhiteBoxAPI
+ *                 -Xbatch
+ *                 -XX:+DoEscapeAnalysis -XX:+EliminateAllocations -XX:+EliminateLocks -XX:+EliminateNestedLocks
+ *                 -XX:+UnlockExperimentalVMOptions -XX:LockingMode=2
+ *                 -Xlog:monitorinflation=trace:file=monitorinflation.log
+ *
+ * @comment Re-lock may race with deflation.
+ * @run driver EATests
+ *                 -XX:+UnlockDiagnosticVMOptions
+ *                 -Xms256m -Xmx256m
+ *                 -Xbootclasspath/a:.
+ *                 -XX:CompileCommand=dontinline,*::dontinline_*
+ *                 -XX:+WhiteBoxAPI
+ *                 -Xbatch
+ *                 -XX:+DoEscapeAnalysis -XX:+EliminateAllocations -XX:+EliminateLocks -XX:+EliminateNestedLocks
+ *                 -XX:+UnlockExperimentalVMOptions -XX:LockingMode=0
+ *                 -XX:GuaranteedAsyncDeflationInterval=1000
  */
+
 /**
  * @test
  * @bug 8227745
@@ -233,9 +316,11 @@ public static void main(String[] args) {
         // Relocking test cases
         new EARelockingSimpleTarget()                                                       .run();
         new EARelockingSimple_2Target()                                                     .run();
+        new EARelockingSimpleWithAccessInOtherThreadTarget()                                .run();
         new EARelockingRecursiveTarget()                                                    .run();
         new EARelockingNestedInflatedTarget()                                               .run();
         new EARelockingNestedInflated_02Target()                                            .run();
+        new EARelockingNestedInflated_03Target()                                            .run();
         new EARelockingArgEscapeLWLockedInCalleeFrameTarget()                               .run();
         new EARelockingArgEscapeLWLockedInCalleeFrame_2Target()                             .run();
         new EARelockingArgEscapeLWLockedInCalleeFrame_3Target()                             .run();
@@ -243,6 +328,7 @@ public static void main(String[] args) {
         new EAGetOwnedMonitorsTarget()                                                      .run();
         new EAEntryCountTarget()                                                            .run();
         new EARelockingObjectCurrentlyWaitingOnTarget()                                     .run();
+        new EARelockingValueBasedTarget()                                                   .run();
 
         // Test cases that require deoptimization even though neither
         // locks nor allocations are eliminated at the point where
@@ -353,9 +439,11 @@ protected void runTests() throws Exception {
         // Relocking test cases
         new EARelockingSimple()                                                       .run(this);
         new EARelockingSimple_2()                                                     .run(this);
+        new EARelockingSimpleWithAccessInOtherThread()                                .run(this);
         new EARelockingRecursive()                                                    .run(this);
         new EARelockingNestedInflated()                                               .run(this);
         new EARelockingNestedInflated_02()                                            .run(this);
+        new EARelockingNestedInflated_03()                                            .run(this);
         new EARelockingArgEscapeLWLockedInCalleeFrame()                               .run(this);
         new EARelockingArgEscapeLWLockedInCalleeFrame_2()                             .run(this);
         new EARelockingArgEscapeLWLockedInCalleeFrame_3()                             .run(this);
@@ -363,6 +451,7 @@ protected void runTests() throws Exception {
         new EAGetOwnedMonitors()                                                      .run(this);
         new EAEntryCount()                                                            .run(this);
         new EARelockingObjectCurrentlyWaitingOn()                                     .run(this);
+        new EARelockingValueBased()                                                   .run(this);
 
         // Test cases that require deoptimization even though neither
         // locks nor allocations are eliminated at the point where
@@ -1787,6 +1876,7 @@ public void dontinline_testMethod() {
  */
 class EARelockingSimple_2 extends EATestCaseBaseDebugger {
 
+
     public void runTestCase() throws Exception {
         BreakpointEvent bpe = resumeTo(TARGET_TESTCASE_BASE_NAME, "dontinline_brkpt", "()V");
         printStack(bpe.thread());
@@ -1812,6 +1902,66 @@ public void dontinline_testMethod() {
 
 /////////////////////////////////////////////////////////////////////////////
 
+// The debugger reads and publishes an object with eliminated locking to an instance field.
+// A 2nd thread in the debuggee finds it there and changes its state using a synchronized method.
+// Without eager relocking the accesses are unsynchronized which can be observed.
+class EARelockingSimpleWithAccessInOtherThread extends EATestCaseBaseDebugger {
+
+    public void runTestCase() throws Exception {
+        BreakpointEvent bpe = resumeTo(TARGET_TESTCASE_BASE_NAME, "dontinline_brkpt", "()V");
+        printStack(bpe.thread());
+        String l1ClassName = EARelockingSimpleWithAccessInOtherThreadTarget.SyncCounter.class.getName();
+        ObjectReference ctr = getLocalRef(bpe.thread().frame(1), l1ClassName, "l1");
+        setField(testCase, "sharedCounter", ctr);
+        terminateEndlessLoop();
+    }
+}
+
+class EARelockingSimpleWithAccessInOtherThreadTarget extends EATestCaseBaseTarget {
+
+    public static class SyncCounter {
+        private int val;
+        public synchronized int inc() { return val++; }
+    }
+
+    public volatile SyncCounter sharedCounter;
+
+    @Override
+    public void setUp() {
+        super.setUp();
+        doLoop = true;
+        Thread t = new Thread() {
+            public void run() {
+                while (doLoop) {
+                    SyncCounter ctr = sharedCounter;
+                    if (ctr != null) {
+                        ctr.inc();
+                    }
+                }
+            }
+        };
+        t.setDaemon(true);
+        t.start();
+    }
+
+    public void dontinline_testMethod() {
+        SyncCounter l1 = new SyncCounter();
+        synchronized (l1) {      // Eliminated locking
+            l1.inc();
+            dontinline_brkpt();  // Debugger publishes l1 to sharedCounter.
+            iResult = l1.inc();  // Changes by the 2nd thread will be observed if l1
+                                 // was not relocked before passing it to the debugger.
+        }
+    }
+
+    @Override
+    public int getExpectedIResult() {
+        return 1;
+    }
+}
+
+/////////////////////////////////////////////////////////////////////////////
+
 // Test recursive locking
 class EARelockingRecursiveTarget extends EATestCaseBaseTarget {
 
@@ -1930,6 +2080,94 @@ public void testMethod_inlined(XYVal l2) {
 
 /////////////////////////////////////////////////////////////////////////////
 
+/**
+ * Like {@link EARelockingNestedInflated_02} with the difference that the
+ * inflation of the lock happens because of contention.
+ */
+class EARelockingNestedInflated_03 extends EATestCaseBaseDebugger {
+
+    public void runTestCase() throws Exception {
+        BreakpointEvent bpe = resumeTo(TARGET_TESTCASE_BASE_NAME, "dontinline_brkpt", "()V");
+        printStack(bpe.thread());
+        @SuppressWarnings("unused")
+        ObjectReference o = getLocalRef(bpe.thread().frame(2), XYVAL_NAME, "l1");
+    }
+}
+
+class EARelockingNestedInflated_03Target extends EATestCaseBaseTarget {
+
+    public XYVal lockInflatedByContention;
+    public boolean doLockNow;
+    public EATestCaseBaseTarget testCase;
+
+    @Override
+    public void setUp() {
+        super.setUp();
+        testMethodDepth = 2;
+        lockInflatedByContention = new XYVal(1, 1);
+        testCase = this;
+    }
+
+    @Override
+    public void warmupDone() {
+        super.warmupDone();
+        // Use new lock. lockInflatedByContention might have been inflated because of recursion.
+        lockInflatedByContention = new XYVal(1, 1);
+        // Start thread that tries to enter lockInflatedByContention while the main thread owns it -> inflation
+        new Thread(() -> {
+            while (true) {
+                synchronized (testCase) {
+                    try {
+                        if (doLockNow) {
+                            doLockNow = false; // reset for main thread
+                            testCase.notify();
+                            break;
+                        }
+                        testCase.wait();
+                    } catch (InterruptedException e) { /* ignored */ }
+                }
+            }
+            synchronized (lockInflatedByContention) { // will block and trigger inflation
+                msg(Thread.currentThread().getName() + ": acquired lockInflatedByContention");
+            }
+            }, testCaseName + ": Lock Contender (test thread)").start();
+    }
+
+    public void dontinline_testMethod() {
+        @SuppressWarnings("unused")
+        XYVal xy = new XYVal(1, 1);            // scalar replaced
+        XYVal l1 = lockInflatedByContention;   // read by debugger
+        synchronized (l1) {
+            testMethod_inlined(l1);
+        }
+    }
+
+    public void testMethod_inlined(XYVal l2) {
+        synchronized (l2) {                 // eliminated nested locking
+            dontinline_notifyOtherThread();
+            dontinline_brkpt();
+        }
+    }
+
+    public void dontinline_notifyOtherThread() {
+        if (!warmupDone) {
+            return;
+        }
+        synchronized (testCase) {
+            doLockNow = true;
+            testCase.notify();
+            // wait for other thread to reset doLockNow again
+            while (doLockNow) {
+                try {
+                    testCase.wait();
+                } catch (InterruptedException e) { /* ignored */ }
+            }
+        }
+    }
+}
+
+/////////////////////////////////////////////////////////////////////////////
+
 /**
  * Checks if an eliminated lock of an ArgEscape object l1 can be relocked if
  * l1 is locked in a callee frame.
@@ -2109,6 +2347,48 @@ public boolean testFrameShouldBeDeoptimized() {
 
 /////////////////////////////////////////////////////////////////////////////
 
+/**
+ * Similar to {@link EARelockingArgEscapeLWLockedInCalleeFrame_2Target}. It does
+ * not use recursive locking and exposed a bug in the lightweight-locking implementation.
+ */
+class EARelockingArgEscapeLWLockedInCalleeFrameNoRecursive extends EATestCaseBaseDebugger {
+
+    public void runTestCase() throws Exception {
+        BreakpointEvent bpe = resumeTo(TARGET_TESTCASE_BASE_NAME, "dontinline_brkpt", "()V");
+        printStack(bpe.thread());
+        @SuppressWarnings("unused")
+        ObjectReference o = getLocalRef(bpe.thread().frame(2), XYVAL_NAME, "l1");
+    }
+}
+
+class EARelockingArgEscapeLWLockedInCalleeFrameNoRecursiveTarget extends EATestCaseBaseTarget {
+
+    @Override
+    public void setUp() {
+        super.setUp();
+        testMethodDepth = 2;
+    }
+
+    public void dontinline_testMethod() {
+        XYVal l1 = new XYVal(1, 1);       // NoEscape, scalar replaced
+        XYVal l2 = new XYVal(4, 2);       // NoEscape, scalar replaced
+        XYVal l3 = new XYVal(5, 3);       // ArgEscape
+        synchronized (l1) {                   // eliminated
+            synchronized (l2) {               // eliminated
+                l3.dontinline_sync_method(this);  // l3 escapes
+            }
+        }
+        iResult = l2.x + l2.y;
+    }
+
+    @Override
+    public int getExpectedIResult() {
+        return 6;
+    }
+}
+
+/////////////////////////////////////////////////////////////////////////////
+
 /**
  * Test relocking eliminated (nested) locks of an object on which the
  * target thread currently waits.
@@ -2202,6 +2482,32 @@ public void dontinline_waitWhenWarmupDone(ForLocking l2) throws Exception {
     }
 }
 
+
+/////////////////////////////////////////////////////////////////////////////
+
+/**
+ * Test relocking eliminated @ValueBased object.
+ */
+class EARelockingValueBased extends EATestCaseBaseDebugger {
+
+    public void runTestCase() throws Exception {
+        BreakpointEvent bpe = resumeTo(TARGET_TESTCASE_BASE_NAME, "dontinline_brkpt", "()V");
+        printStack(bpe.thread());
+        @SuppressWarnings("unused")
+        ObjectReference o = getLocalRef(bpe.thread().frame(1), Integer.class.getName(), "l1");
+    }
+}
+
+class EARelockingValueBasedTarget extends EATestCaseBaseTarget {
+
+    public void dontinline_testMethod() {
+        Integer l1 = new Integer(255);
+        synchronized (l1) {
+            dontinline_brkpt();
+        }
+    }
+}
+
 /////////////////////////////////////////////////////////////////////////////
 //
 // Test cases that require deoptimization even though neither locks
diff --git a/test/jdk/java/lang/instrument/GetObjectSizeIntrinsicsTest.java b/test/jdk/java/lang/instrument/GetObjectSizeIntrinsicsTest.java
index a4fdab2ec47..3f4e8841912 100644
--- a/test/jdk/java/lang/instrument/GetObjectSizeIntrinsicsTest.java
+++ b/test/jdk/java/lang/instrument/GetObjectSizeIntrinsicsTest.java
@@ -313,6 +313,9 @@ public class GetObjectSizeIntrinsicsTest extends ASimpleInstrumentationTestCase
     static final int LARGE_INT_ARRAY_SIZE = 1024*1024*1024 + 1024;
     static final int LARGE_OBJ_ARRAY_SIZE = (4096/(int)REF_SIZE)*1024*1024 + 1024;
 
+    static final boolean COMPACT_HEADERS = WhiteBox.getWhiteBox().getBooleanVMFlag("UseCompactObjectHeaders");
+    static final int HEADER_SIZE = COMPACT_HEADERS ? 8 : (Platform.is64bit() ? 16 : 8);
+
     final String mode;
 
     public GetObjectSizeIntrinsicsTest(String name, String mode) {
@@ -372,14 +375,14 @@ private static long roundUp(long v, long a) {
     }
 
     private void testSize_newObject() {
-        long expected = roundUp(Platform.is64bit() ? 16 : 8, OBJ_ALIGN);
+        long expected = roundUp(HEADER_SIZE, OBJ_ALIGN);
         for (int c = 0; c < ITERS; c++) {
             assertEquals(expected, fInst.getObjectSize(new Object()));
         }
     }
 
     private void testSize_localObject() {
-        long expected = roundUp(Platform.is64bit() ? 16 : 8, OBJ_ALIGN);
+        long expected = roundUp(HEADER_SIZE, OBJ_ALIGN);
         Object o = new Object();
         for (int c = 0; c < ITERS; c++) {
             assertEquals(expected, fInst.getObjectSize(o));
@@ -389,7 +392,7 @@ private void testSize_localObject() {
     static Object staticO = new Object();
 
     private void testSize_fieldObject() {
-        long expected = roundUp(Platform.is64bit() ? 16 : 8, OBJ_ALIGN);
+        long expected = roundUp(HEADER_SIZE, OBJ_ALIGN);
         for (int c = 0; c < ITERS; c++) {
             assertEquals(expected, fInst.getObjectSize(staticO));
         }
diff --git a/test/jtreg-ext/requires/VMProps.java b/test/jtreg-ext/requires/VMProps.java
index ec4ca37bd4b..e39c3cff233 100644
--- a/test/jtreg-ext/requires/VMProps.java
+++ b/test/jtreg-ext/requires/VMProps.java
@@ -524,7 +524,11 @@ private String isFlagless() {
                 // added by run-test framework
                 "MaxRAMPercentage",
                 // added by test environment
-                "CreateCoredumpOnCrash"
+                "CreateCoredumpOnCrash",
+                // experimental features unlocking flag does not affect behavior
+                "UnlockExperimentalVMOptions",
+                // all compact headers settings should run flagless tests
+                "UseCompactObjectHeaders"
         );
         result &= allFlags.stream()
                           .filter(s -> s.startsWith("-XX:"))
diff --git a/test/lib/jdk/test/lib/apps/LingeredApp.java b/test/lib/jdk/test/lib/apps/LingeredApp.java
index 732c988ebe7..d78dba76e01 100644
--- a/test/lib/jdk/test/lib/apps/LingeredApp.java
+++ b/test/lib/jdk/test/lib/apps/LingeredApp.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -557,6 +557,7 @@ public void run() {
         }
     }
 
+    static class SteadyStateLock {};
 
     /**
      * This part is the application itself. First arg is optional "forceCrash".
@@ -586,7 +587,7 @@ public static void main(String args[]) {
         Path path = Paths.get(theLockFileName);
 
         try {
-            Object steadyStateObj = new Object();
+            Object steadyStateObj = new SteadyStateLock();
             synchronized(steadyStateObj) {
                 startSteadyStateThread(steadyStateObj);
                 if (forceCrash) {
diff --git a/test/lib/jdk/test/whitebox/WhiteBox.java b/test/lib/jdk/test/whitebox/WhiteBox.java
index 846c9ed614c..72a756c2b97 100644
--- a/test/lib/jdk/test/whitebox/WhiteBox.java
+++ b/test/lib/jdk/test/whitebox/WhiteBox.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -119,6 +119,10 @@ public         boolean isMonitorInflated(Object obj) {
     return isMonitorInflated0(obj);
   }
 
+  public native int getLockStackCapacity();
+
+  public native boolean supportsRecursiveLightweightLocking();
+
   public native void forceSafepoint();
 
   private native long getConstantPool0(Class<?> aClass);
@@ -646,4 +650,6 @@ public native int validateCgroup(String procCgroups,
   public native void lockCritical();
 
   public native void unlockCritical();
+
+  public native void preTouchMemory(long addr, long size);
 }
diff --git a/test/micro/org/openjdk/bench/vm/lang/LockUnlock.java b/test/micro/org/openjdk/bench/vm/lang/LockUnlock.java
index ea25250f45b..7785f7498dc 100644
--- a/test/micro/org/openjdk/bench/vm/lang/LockUnlock.java
+++ b/test/micro/org/openjdk/bench/vm/lang/LockUnlock.java
@@ -48,6 +48,8 @@ public class LockUnlock {
 
     public Object lockObject1;
     public Object lockObject2;
+    public volatile Object lockObject3Inflated;
+    public volatile Object lockObject4Inflated;
     public int factorial;
     public int dummyInt1;
     public int dummyInt2;
@@ -56,13 +58,28 @@ public class LockUnlock {
     public void setup() {
         lockObject1 = new Object();
         lockObject2 = new Object();
+        lockObject3Inflated = new Object();
+        lockObject4Inflated = new Object();
+
+        // Inflate the lock to use an ObjectMonitor
+        try {
+          synchronized (lockObject3Inflated) {
+            lockObject3Inflated.wait(1);
+          }
+          synchronized (lockObject4Inflated) {
+            lockObject4Inflated.wait(1);
+          }
+        } catch (InterruptedException e) {
+          throw new RuntimeException(e);
+        }
+
         dummyInt1 = 47;
         dummyInt2 = 11; // anything
     }
 
     /** Perform a synchronized on a local object within a loop. */
     @Benchmark
-    public void testSimpleLockUnlock() {
+    public void testBasicSimpleLockUnlockLocal() {
         Object localObject = lockObject1;
         for (int i = 0; i < innerCount; i++) {
             synchronized (localObject) {
@@ -72,9 +89,43 @@ public void testSimpleLockUnlock() {
         }
     }
 
+    /** Perform a synchronized on an object within a loop. */
+    @Benchmark
+    public void testBasicSimpleLockUnlock() {
+        for (int i = 0; i < innerCount; i++) {
+            synchronized (lockObject1) {
+                dummyInt1++;
+                dummyInt2++;
+            }
+        }
+    }
+
+    /** Perform a synchronized on a local object within a loop. */
+    @Benchmark
+    public void testInflatedSimpleLockUnlockLocal() {
+        Object localObject = lockObject3Inflated;
+        for (int i = 0; i < innerCount; i++) {
+            synchronized (localObject) {
+                dummyInt1++;
+                dummyInt2++;
+            }
+        }
+    }
+
+    /** Perform a synchronized on an object within a loop. */
+    @Benchmark
+    public void testInflatedSimpleLockUnlock() {
+        for (int i = 0; i < innerCount; i++) {
+            synchronized (lockObject3Inflated) {
+                dummyInt1++;
+                dummyInt2++;
+            }
+        }
+    }
+
     /** Perform a recursive synchronized on a local object within a loop. */
     @Benchmark
-    public void testRecursiveLockUnlock() {
+    public void testBasicRecursiveLockUnlockLocal() {
         Object localObject = lockObject1;
         for (int i = 0; i < innerCount; i++) {
             synchronized (localObject) {
@@ -86,9 +137,22 @@ public void testRecursiveLockUnlock() {
         }
     }
 
+    /** Perform a recursive synchronized on an object within a loop. */
+    @Benchmark
+    public void testBasicRecursiveLockUnlock() {
+        for (int i = 0; i < innerCount; i++) {
+            synchronized (lockObject1) {
+                synchronized (lockObject1) {
+                    dummyInt1++;
+                    dummyInt2++;
+                }
+            }
+        }
+    }
+
     /** Perform two synchronized after each other on the same local object. */
     @Benchmark
-    public void testSerialLockUnlock() {
+    public void testBasicSerialLockUnlockLocal() {
         Object localObject = lockObject1;
         for (int i = 0; i < innerCount; i++) {
             synchronized (localObject) {
@@ -100,6 +164,126 @@ public void testSerialLockUnlock() {
         }
     }
 
+  /** Perform two synchronized after each other on the same object. */
+    @Benchmark
+    public void testBasicSerialLockUnlock() {
+        for (int i = 0; i < innerCount; i++) {
+            synchronized (lockObject1) {
+                dummyInt1++;
+            }
+            synchronized (lockObject1) {
+                dummyInt2++;
+            }
+        }
+    }
+
+    /** Perform two synchronized after each other on the same local object. */
+    @Benchmark
+    public void testInflatedSerialLockUnlockLocal() {
+        Object localObject = lockObject3Inflated;
+        for (int i = 0; i < innerCount; i++) {
+            synchronized (localObject) {
+                dummyInt1++;
+            }
+            synchronized (localObject) {
+                dummyInt2++;
+            }
+        }
+    }
+
+  /** Perform two synchronized after each other on the same object. */
+    @Benchmark
+    public void testInflatedSerialLockUnlock() {
+        for (int i = 0; i < innerCount; i++) {
+            synchronized (lockObject3Inflated) {
+                dummyInt1++;
+            }
+            synchronized (lockObject3Inflated) {
+                dummyInt2++;
+            }
+        }
+    }
+
+    /** Perform two synchronized after each other on the same object. */
+    @Benchmark
+    public void testInflatedMultipleSerialLockUnlock() {
+        for (int i = 0; i < innerCount; i++) {
+            synchronized (lockObject3Inflated) {
+                dummyInt1++;
+            }
+            synchronized (lockObject4Inflated) {
+                dummyInt2++;
+            }
+        }
+    }
+
+    /** Perform two synchronized after each other on the same object. */
+    @Benchmark
+    public void testInflatedMultipleRecursiveLockUnlock() {
+        for (int i = 0; i < innerCount; i++) {
+            synchronized (lockObject3Inflated) {
+                dummyInt1++;
+                synchronized (lockObject4Inflated) {
+                    dummyInt2++;
+                }
+            }
+        }
+    }
+
+      /** Perform a recursive-only synchronized on a local object within a loop. */
+    @Benchmark
+    public void testInflatedRecursiveOnlyLockUnlockLocal() {
+        Object localObject = lockObject3Inflated;
+        synchronized (localObject) {
+            for (int i = 0; i < innerCount; i++) {
+                synchronized (localObject) {
+                    dummyInt1++;
+                    dummyInt2++;
+                }
+            }
+        }
+    }
+
+    /** Perform a recursive-only synchronized on an object within a loop. */
+    @Benchmark
+    public void testInflatedRecursiveOnlyLockUnlock() {
+        synchronized (lockObject3Inflated) {
+            for (int i = 0; i < innerCount; i++) {
+                synchronized (lockObject3Inflated) {
+                    dummyInt1++;
+                    dummyInt2++;
+                }
+            }
+        }
+    }
+
+    /** Perform a recursive-only synchronized on a local object within a loop. */
+    @Benchmark
+    public void testInflatedRecursiveLockUnlockLocal() {
+        Object localObject = lockObject3Inflated;
+        for (int i = 0; i < innerCount; i++) {
+            synchronized (localObject) {
+                synchronized (localObject) {
+                    dummyInt1++;
+                    dummyInt2++;
+                }
+            }
+        }
+    }
+
+    /** Perform a recursive-only synchronized on an object within a loop. */
+    @Benchmark
+    public void testInflatedRecursiveLockUnlock() {
+        for (int i = 0; i < innerCount; i++) {
+            synchronized (lockObject3Inflated) {
+                synchronized (lockObject3Inflated) {
+                    dummyInt1++;
+                    dummyInt2++;
+                }
+            }
+        }
+    }
+
     /**
      * Performs recursive synchronizations on the same local object.
      * <p/>