Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions runtime/codert_vm/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,11 @@ target_include_directories(j9codert_vm

if(OMR_ARCH_X86)
j9vm_gen_asm(xnathelp.m4)
j9vm_gen_asm(xvector.m4)

target_sources(j9codert_vm PRIVATE
xnathelp.s
xvector.s
)
elseif(OMR_ARCH_POWER)
j9vm_gen_asm(pnathelp.m4)
Expand Down
22 changes: 22 additions & 0 deletions runtime/codert_vm/cnathelp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,15 @@ samePCs(void *pc1, void *pc2)
#define samePCs(pc1, pc2) (MASK_PC(pc1) == MASK_PC(pc2))
#endif /* J9ZOS390 && !J9VM_ENV_DATA64 */

#if defined(J9HAMMER) && (JAVA_SPEC_VERSION >= 17)
#define JIT_HELPER(x) extern "C" void x()
JIT_HELPER(jitSaveVectorRegistersAVX512);
JIT_HELPER(jitRestoreVectorRegistersAVX512);

JIT_HELPER(jitSaveVectorRegistersAVX);
JIT_HELPER(jitRestoreVectorRegistersAVX);
#endif /* defined(J9HAMMER) && (JAVA_SPEC_VERSION >= 17) */

/**
* Fix the java and decompilation stacks for cases where exceptions can be
* thrown from insde a JIT synthetic exception handler. There must be a
Expand Down Expand Up @@ -4125,6 +4134,19 @@ initPureCFunctionTable(J9JavaVM *vm)
jitConfig->old_slow_jitReportInstanceFieldWrite = (void*)old_slow_jitReportInstanceFieldWrite;
jitConfig->old_slow_jitReportStaticFieldRead = (void*)old_slow_jitReportStaticFieldRead;
jitConfig->old_slow_jitReportStaticFieldWrite = (void*)old_slow_jitReportStaticFieldWrite;

#if defined(J9HAMMER) && (JAVA_SPEC_VERSION >= 17)
if (J9_ARE_ANY_BITS_SET(vm->extendedRuntimeFlags3, J9_EXTENDED_RUNTIME3_USE_VECTOR_LENGTH_512))
{
Comment on lines +4139 to +4140
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Formatting: braces:

	if (J9_ARE_ANY_BITS_SET(vm->extendedRuntimeFlags3, J9_EXTENDED_RUNTIME3_USE_VECTOR_LENGTH_512)) {
		jitConfig->saveVectorRegisters = (void *)jitSaveVectorRegistersAVX512;
		jitConfig->restoreVectorRegisters = (void *)jitRestoreVectorRegistersAVX512;
	} else if (J9_ARE_ANY_BITS_SET(vm->extendedRuntimeFlags3, J9_EXTENDED_RUNTIME3_USE_VECTOR_LENGTH_256)) {
		jitConfig->saveVectorRegisters = (void *)jitSaveVectorRegistersAVX;
		jitConfig->restoreVectorRegisters = (void *)jitRestoreVectorRegistersAVX;
	}

jitConfig->saveVectorRegisters = (void *)jitSaveVectorRegistersAVX512;
jitConfig->restoreVectorRegisters = (void *)jitRestoreVectorRegistersAVX512;
}
else if (J9_ARE_ANY_BITS_SET(vm->extendedRuntimeFlags3, J9_EXTENDED_RUNTIME3_USE_VECTOR_LENGTH_256))
{
jitConfig->saveVectorRegisters = (void *)jitSaveVectorRegistersAVX;
jitConfig->restoreVectorRegisters = (void *)jitRestoreVectorRegistersAVX;
}
#endif /* defined(J9HAMMER) && (JAVA_SPEC_VERSION >= 17) */
}

} /* extern "C" */
7 changes: 5 additions & 2 deletions runtime/codert_vm/xnathelp.m4
Original file line number Diff line number Diff line change
Expand Up @@ -1070,10 +1070,13 @@ START_PROC(jitReferenceArrayCopy)
mov PARM_REG(2),_rcx
mov PARM_REG(1),_rbp
call FASTCALL_SYMBOL(impl_jitReferenceArrayCopy,2)
dnl set ZF if succeed
test _rax,_rax
dnl Save return value to check later.
dnl We don't check it now because restoring the register clobbers flags.
mov dword ptr J9TR_VMThread_floatTemp3[_rbp],eax
RESTORE_C_VOLATILE_REGS
SWITCH_TO_JAVA_STACK
dnl Set ZF on success.
test dword ptr J9TR_VMThread_floatTemp3[_rbp], -1
push uword ptr J9TR_VMThread_jitReturnAddress[_rbp]
ret
END_PROC(jitReferenceArrayCopy)
Expand Down
123 changes: 123 additions & 0 deletions runtime/codert_vm/xvector.m4
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
dnl Copyright IBM Corp. and others 2023
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The commit was authored in 2021, so this should say 2021.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not completely sure because xvector.m4 did not exist at the time.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It never got committed before, so I suppose the date should be this year.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Devin worked on this and committed it no later than in 2021. I believe that should be the first copyright year.

dnl
dnl This program and the accompanying materials are made available under
dnl the terms of the Eclipse Public License 2.0 which accompanies this
dnl distribution and is available at https://www.eclipse.org/legal/epl-2.0/
dnl or the Apache License, Version 2.0 which accompanies this distribution and
dnl is available at https://www.apache.org/licenses/LICENSE-2.0.
dnl
dnl This Source Code may also be made available under the following
dnl Secondary Licenses when the conditions for such availability set
dnl forth in the Eclipse Public License, v. 2.0 are satisfied: GNU
dnl General Public License, version 2 with the GNU Classpath
dnl Exception [1] and GNU General Public License, version 2 with the
dnl OpenJDK Assembly Exception [2].
dnl
dnl [1] https://www.gnu.org/software/classpath/license.html
dnl [2] https://openjdk.org/legal/assembly-exception.html
dnl
dnl SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0-only WITH Classpath-exception-2.0 OR GPL-2.0-only WITH OpenJDK-assembly-exception-1.0

include(xhelpers.m4)

FILE_START

dnl For all of these functions, on entry:
dnl
dnl 1) return address on the stack
dnl 2) r8 is a scratch register on 64-bit
dnl 3) eax is a scratch register on 32-bit

START_PROC(jitSaveVectorRegistersAVX512)
lfence

dnl save ZMM registers

ifdef({ASM_J9VM_ENV_DATA64},{
pop r8
forloop({REG_CTR}, 0, 31, {SAVE_ZMM_REG(REG_CTR, J9TR_cframe_jitFPRs+(REG_CTR*64))})
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think original PR (14632) was only saving registers 0 to 5. We need to save only volatile registers. @0xdaryl @gacholio do you know what the system ABI is for zmm and ymm registers?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On Linux, the System ABI specifies that all XMM, YMM, and ZMM registers are volatile.

On Windows, the x64 ABI specifies that XMM6-XMM15 are preserved. For YMM/ZMM6-15, the upper bits are volatile and the lower bits preserved. XMM/YMM/ZMM 0-5 and 16-31 are all volatile.

}, { dnl ASM_J9VM_ENV_DATA64
pop eax
forloop({REG_CTR}, 0, 7, {SAVE_ZMM_REG(REG_CTR, J9TR_cframe_jitFPRs+(REG_CTR*64))})
})

vzeroupper

dnl save Opmask registers
forloop({REG_CTR}, 0, 7, {SAVE_MASK_64(REG_CTR, J9TR_cframe_maskRegisters+(REG_CTR*8))})

ifdef({ASM_J9VM_ENV_DATA64},{
push r8
}, { dnl ASM_J9VM_ENV_DATA64
push eax
})
ret
END_PROC(jitSaveVectorRegistersAVX512)

START_PROC(jitRestoreVectorRegistersAVX512)
lfence

dnl restore ZMM registers
ifdef({ASM_J9VM_ENV_DATA64},{
pop r8
forloop({REG_CTR}, 0, 31, {RESTORE_ZMM_REG(REG_CTR, J9TR_cframe_jitFPRs+(REG_CTR*64))})
}, { dnl ASM_J9VM_ENV_DATA64
pop eax
forloop({REG_CTR}, 0, 7, {RESTORE_ZMM_REG(REG_CTR, J9TR_cframe_jitFPRs+(REG_CTR*64))})
})

dnl restore Opmask registers
forloop({REG_CTR}, 0, 7, {RESTORE_MASK_64(REG_CTR, J9TR_cframe_maskRegisters+(REG_CTR*8))})

ifdef({ASM_J9VM_ENV_DATA64},{
push r8
}, { dnl ASM_J9VM_ENV_DATA64
push eax
})
ret
END_PROC(jitRestoreVectorRegistersAVX512)

START_PROC(jitSaveVectorRegistersAVX)
lfence

dnl save YMM registers

ifdef({ASM_J9VM_ENV_DATA64},{
pop r8
forloop({REG_CTR}, 0, 15, {vmovdqu ymmword ptr J9TR_cframe_jitFPRs+(REG_CTR*32)[_rsp],ymm{}REG_CTR})
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similar to zmm, which exactly registers do we need to save/restore?

}, { dnl ASM_J9VM_ENV_DATA64
pop eax
forloop({REG_CTR}, 0, 7, {vmovdqu ymmword ptr J9TR_cframe_jitFPRs+(REG_CTR*32)[_rsp],ymm{}REG_CTR})
})

vzeroupper

ifdef({ASM_J9VM_ENV_DATA64},{
push r8
}, { dnl ASM_J9VM_ENV_DATA64
push eax
})
ret
END_PROC(jitSaveVectorRegistersAVX)

START_PROC(jitRestoreVectorRegistersAVX)
lfence

dnl restore YMM registers
ifdef({ASM_J9VM_ENV_DATA64},{
pop r8
forloop({REG_CTR}, 0, 15, {vmovdqu ymm{}REG_CTR,ymmword ptr J9TR_cframe_jitFPRs+(REG_CTR*32)[_rsp]})
}, { dnl ASM_J9VM_ENV_DATA64
pop eax
forloop({REG_CTR}, 0, 7, {vmovdqu ymm{}REG_CTR,ymmword ptr J9TR_cframe_jitFPRs+(REG_CTR*32)[_rsp]})
})

ifdef({ASM_J9VM_ENV_DATA64},{
push r8
}, { dnl ASM_J9VM_ENV_DATA64
push eax
})
ret
END_PROC(jitRestoreVectorRegistersAVX)

FILE_END
6 changes: 6 additions & 0 deletions runtime/jilgen/jilconsts.c
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,7 @@ writeConstants(OMRPortLibrary *OMRPORTLIB, IDATA fd)
writeConstant(OMRPORTLIB, fd, "J9TR_cframe_machineBP", offsetof(J9CInterpreterStackFrame, machineBP)) |
writeConstant(OMRPORTLIB, fd, "J9TR_cframe_jitGPRs", offsetof(J9CInterpreterStackFrame, jitGPRs)) |
writeConstant(OMRPORTLIB, fd, "J9TR_cframe_jitFPRs", offsetof(J9CInterpreterStackFrame, jitFPRs)) |
writeConstant(OMRPORTLIB, fd, "J9TR_cframe_maskRegisters", offsetof(J9CInterpreterStackFrame, maskRegisters)) |
writeConstant(OMRPORTLIB, fd, "J9TR_cframe_rax", offsetof(J9CInterpreterStackFrame, jitGPRs.jitGPRs.named.rax)) |
writeConstant(OMRPORTLIB, fd, "J9TR_cframe_rbx", offsetof(J9CInterpreterStackFrame, jitGPRs.jitGPRs.named.rbx)) |
writeConstant(OMRPORTLIB, fd, "J9TR_cframe_rcx", offsetof(J9CInterpreterStackFrame, jitGPRs.jitGPRs.named.rcx)) |
Expand Down Expand Up @@ -505,6 +506,7 @@ writeConstants(OMRPortLibrary *OMRPORTLIB, IDATA fd)
writeConstant(OMRPORTLIB, fd, "J9TR_JavaVM_bytecodeLoop", offsetof(J9JavaVM, bytecodeLoop)) |
writeConstant(OMRPORTLIB, fd, "J9TR_JavaVM_extendedRuntimeFlags", offsetof(J9JavaVM, extendedRuntimeFlags)) |
writeConstant(OMRPORTLIB, fd, "J9TR_JavaVM_extendedRuntimeFlags2", offsetof(J9JavaVM, extendedRuntimeFlags2)) |
writeConstant(OMRPORTLIB, fd, "J9TR_JavaVM_extendedRuntimeFlags3", offsetof(J9JavaVM, extendedRuntimeFlags3)) |
writeConstant(OMRPORTLIB, fd, "J9TR_JavaVMInternalFunctionTable", offsetof(J9JavaVM, internalVMFunctions)) |
writeConstant(OMRPORTLIB, fd, "J9TR_JavaVM_memoryManagerFunctions", offsetof(J9JavaVM, memoryManagerFunctions)) |
#if defined(OMR_GC_CONCURRENT_SCAVENGER) && defined(J9VM_ARCH_S390)
Expand Down Expand Up @@ -669,6 +671,8 @@ writeConstants(OMRPortLibrary *OMRPORTLIB, IDATA fd)
writeConstant(OMRPORTLIB, fd, "J9TR_JitConfig_old_slow_jitReportInstanceFieldWrite", offsetof(J9JITConfig, old_slow_jitReportInstanceFieldWrite)) |
writeConstant(OMRPORTLIB, fd, "J9TR_JitConfig_old_slow_jitReportStaticFieldRead", offsetof(J9JITConfig, old_slow_jitReportStaticFieldRead)) |
writeConstant(OMRPORTLIB, fd, "J9TR_JitConfig_old_slow_jitReportStaticFieldWrite", offsetof(J9JITConfig, old_slow_jitReportStaticFieldWrite)) |
writeConstant(OMRPORTLIB, fd, "J9TR_JitConfig_saveVectorRegisters", offsetof(J9JITConfig, saveVectorRegisters)) |
writeConstant(OMRPORTLIB, fd, "J9TR_JitConfig_restoreVectorRegisters", offsetof(J9JITConfig, restoreVectorRegisters)) |

writeConstant(OMRPORTLIB, fd, "J9TR_JitConfig_old_fast_jitGetFlattenableField", offsetof(J9JITConfig, old_fast_jitGetFlattenableField)) |
writeConstant(OMRPORTLIB, fd, "J9TR_JitConfig_old_fast_jitCloneValueType", offsetof(J9JITConfig, old_fast_jitCloneValueType)) |
Expand Down Expand Up @@ -777,6 +781,8 @@ writeConstants(OMRPortLibrary *OMRPORTLIB, IDATA fd)
writeConstant(OMRPORTLIB, fd, "J9TR_ELSSize", sizeof(J9VMEntryLocalStorage)) |
writeConstant(OMRPORTLIB, fd, "J9TR_J9_EXTENDED_RUNTIME_DEBUG_MODE", J9_EXTENDED_RUNTIME_DEBUG_MODE) |
writeConstant(OMRPORTLIB, fd, "J9TR_J9_EXTENDED_RUNTIME_USE_VECTOR_REGISTERS", J9_EXTENDED_RUNTIME_USE_VECTOR_REGISTERS) |
writeConstant(OMRPORTLIB, fd, "J9TR_J9_EXTENDED_RUNTIME3_USE_VECTOR_LENGTH_256", J9_EXTENDED_RUNTIME3_USE_VECTOR_LENGTH_256) |
writeConstant(OMRPORTLIB, fd, "J9TR_J9_EXTENDED_RUNTIME3_USE_VECTOR_LENGTH_512", J9_EXTENDED_RUNTIME3_USE_VECTOR_LENGTH_512) |
writeConstant(OMRPORTLIB, fd, "J9TR_J9_EXTENDED_RUNTIME2_COMPRESS_OBJECT_REFERENCES", J9_EXTENDED_RUNTIME2_COMPRESS_OBJECT_REFERENCES) |
writeConstant(OMRPORTLIB, fd, "J9TR_J9_INLINE_JNI_MAX_ARG_COUNT", J9_INLINE_JNI_MAX_ARG_COUNT) |

Expand Down
2 changes: 2 additions & 0 deletions runtime/oti/j9consts.h
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,8 @@ extern "C" {
#define J9_EXTENDED_RUNTIME3_YIELD_PINNED_CONTINUATION 0x2
#define J9_EXTENDED_RUNTIME3_CACHE_MAPS 0x4
#define J9_EXTENDED_RUNTIME3_MODULE_PACKAGES_INITIALIZED 0x8
#define J9_EXTENDED_RUNTIME3_USE_VECTOR_LENGTH_256 0x10
#define J9_EXTENDED_RUNTIME3_USE_VECTOR_LENGTH_512 0x20

#define J9_OBJECT_HEADER_AGE_DEFAULT 0xA /* OBJECT_HEADER_AGE_DEFAULT */
#define J9_OBJECT_HEADER_SHAPE_MASK 0xE /* OBJECT_HEADER_SHAPE_MASK */
Expand Down
11 changes: 8 additions & 3 deletions runtime/oti/j9nonbuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -4305,6 +4305,8 @@ typedef struct J9JITConfig {
void *old_slow_jitReportInstanceFieldWrite;
void *old_slow_jitReportStaticFieldRead;
void *old_slow_jitReportStaticFieldWrite;
void *saveVectorRegisters;
void *restoreVectorRegisters;
struct J9MemorySegment* codeCache;
struct J9MemorySegment* dataCache;
struct J9MemorySegmentList* codeCacheList;
Expand Down Expand Up @@ -6906,8 +6908,9 @@ typedef struct J9CInterpreterStackFrame {
*
* Stack must be 16-byte aligned.
*/
U_8 jitFPRs[6 * 16]; /* xmm0-5 128-bit OR xmm0-7 64-bit */
U_8 jitFPRs[6 * 64]; /* zmm0-5 512-bit OR xmm0-7 64-bit */
U_8 preservedFPRs[10 * 16]; /* xmm6-15 128-bit */
U_8 maskRegisters[8 * 8]; /* k0-k7 */
UDATA align[1];
/* r15,r14,r13,r12,rdi,rsi,rbx,rbp,return address
* RSP is 16-byte aligned at this point
Expand All @@ -6917,7 +6920,8 @@ typedef struct J9CInterpreterStackFrame {
*
* Stack must be 16-byte aligned.
*/
U_8 jitFPRs[16 * 16]; /* xmm0-15 128-bit OR xmm0-7 64-bit */
U_8 jitFPRs[32 * 64]; /* zmm0-31 512-bit OR xmm0-7 64-bit */
U_8 maskRegisters[8 * 8]; /* k0-k7 */
UDATA align[1];
/* r15,r14,r13,r12,rbx,rbp,return address
* RSP is 16-byte aligned at this point
Expand All @@ -6930,7 +6934,8 @@ typedef struct J9CInterpreterStackFrame {
*/
J9JITGPRSpillArea jitGPRs;
UDATA align1[2];
U_8 jitFPRs[8 * 16]; /* xmm0-7 128-bit */
U_8 jitFPRs[8 * 64]; /* zmm0-7 512-bit */
U_8 maskRegisters[8 * 8]; /* k0-k7 */
UDATA align2[1];
/* ebx,edi,esi
* ESP is forcibly 16-byte aligned at this point
Expand Down
1 change: 1 addition & 0 deletions runtime/oti/jvminit.h
Original file line number Diff line number Diff line change
Expand Up @@ -664,6 +664,7 @@ enum INIT_STAGE {
#define VMOPT_XCONCURRENTBACKGROUND "-Xconcurrentbackground"
#define VMOPT_XGCTHREADS "-Xgcthreads"
#define VMOPT_XGCMAXTHREADS "-Xgcmaxthreads"
#define VMOPT_PRESERVE_VECTORS "-XPreserveExtendedRegs"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I repeat my suggestion that we should have both positive and negative versions of this option:

#define VMOPT_PRESERVE_VECTORS "-XX:+PreserveExtendedRegs"
#define VMOPT_NO_PRESERVE_VECTORS "-XX:-PreserveExtendedRegs"

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed - the convention should be followed.


#define VMOPT_XXSHOW_EXTENDED_NPE_MESSAGE "-XX:+ShowCodeDetailsInExceptionMessages"
#define VMOPT_XXNOSHOW_EXTENDED_NPE_MESSAGE "-XX:-ShowCodeDetailsInExceptionMessages"
Expand Down
Loading