Skip to content

Commit 3ced5a4

Browse files
Devin NakamuraBradleyWood
andcommitted
x86: Preserve ymm/zmm registers under option
This commit restores #14632 and applies a number of changes. Preserving extended registers (ymm/zmm/opmask) registers is only done if the "-XPreserveExtendedRegs" vm option is provided. Two new VM flags are introduced. One flag marks AVX-256 register preservation, the other for AVX-512. Vector lengths 256/512 are preserved in an out-of-line sequence assigned by a function pointer if extended vector preservation is enabled. This is done to prevent speculative execution from causing CPU frequency regressions even when AVX-2 / AVX-512 code is never executed. Co-authored-by: Devin Nakamura <[email protected]> Co-authored-by: Bradley Wood <[email protected]>
1 parent c372bf7 commit 3ced5a4

File tree

10 files changed

+413
-26
lines changed

10 files changed

+413
-26
lines changed

runtime/codert_vm/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,11 @@ target_include_directories(j9codert_vm
4949

5050
if(OMR_ARCH_X86)
5151
j9vm_gen_asm(xnathelp.m4)
52+
j9vm_gen_asm(xvector.m4)
5253

5354
target_sources(j9codert_vm PRIVATE
5455
xnathelp.s
56+
xvector.s
5557
)
5658
elseif(OMR_ARCH_POWER)
5759
j9vm_gen_asm(pnathelp.m4)

runtime/codert_vm/cnathelp.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,15 @@ samePCs(void *pc1, void *pc2)
7171
#define samePCs(pc1, pc2) (MASK_PC(pc1) == MASK_PC(pc2))
7272
#endif /* J9ZOS390 && !J9VM_ENV_DATA64 */
7373

74+
#if defined(J9HAMMER) && JAVA_SPEC_VERSION >= 17
75+
#define JIT_HELPER(x) extern "C" void x()
76+
JIT_HELPER(jitSaveVectorRegistersAVX512);
77+
JIT_HELPER(jitRestoreVectorRegistersAVX512);
78+
79+
JIT_HELPER(jitSaveVectorRegistersAVX);
80+
JIT_HELPER(jitRestoreVectorRegistersAVX);
81+
#endif
82+
7483
/**
7584
* Fix the java and decompilation stacks for cases where exceptions can be
7685
* thrown from insde a JIT synthetic exception handler. There must be a
@@ -4125,6 +4134,19 @@ initPureCFunctionTable(J9JavaVM *vm)
41254134
jitConfig->old_slow_jitReportInstanceFieldWrite = (void*)old_slow_jitReportInstanceFieldWrite;
41264135
jitConfig->old_slow_jitReportStaticFieldRead = (void*)old_slow_jitReportStaticFieldRead;
41274136
jitConfig->old_slow_jitReportStaticFieldWrite = (void*)old_slow_jitReportStaticFieldWrite;
4137+
4138+
#if defined(J9HAMMER) && (JAVA_SPEC_VERSION >= 17)
4139+
if (J9_ARE_ANY_BITS_SET(vm->extendedRuntimeFlags3, J9_EXTENDED_RUNTIME3_USE_VECTOR_LENGTH_512))
4140+
{
4141+
jitConfig->saveVectorRegisters = (void *)jitSaveVectorRegistersAVX512;
4142+
jitConfig->restoreVectorRegisters = (void *)jitRestoreVectorRegistersAVX512;
4143+
}
4144+
else if (J9_ARE_ANY_BITS_SET(vm->extendedRuntimeFlags3, J9_EXTENDED_RUNTIME3_USE_VECTOR_LENGTH_256))
4145+
{
4146+
jitConfig->saveVectorRegisters = (void *)jitSaveVectorRegistersAVX;
4147+
jitConfig->restoreVectorRegisters = (void *)jitRestoreVectorRegistersAVX;
4148+
}
4149+
#endif /* defined(J9HAMMER) && (JAVA_SPEC_VERSION >= 17) */
41284150
}
41294151

41304152
} /* extern "C" */

runtime/codert_vm/xnathelp.m4

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1070,10 +1070,13 @@ START_PROC(jitReferenceArrayCopy)
10701070
mov PARM_REG(2),_rcx
10711071
mov PARM_REG(1),_rbp
10721072
call FASTCALL_SYMBOL(impl_jitReferenceArrayCopy,2)
1073-
dnl set ZF if succeed
1074-
test _rax,_rax
1073+
dnl Save return value to check later.
1074+
dnl We don't check it now because restoring the register clobbers flags.
1075+
mov dword ptr J9TR_VMThread_floatTemp3[_rbp],eax
10751076
RESTORE_C_VOLATILE_REGS
10761077
SWITCH_TO_JAVA_STACK
1078+
dnl Set ZF on success.
1079+
test dword ptr J9TR_VMThread_floatTemp3[_rbp], -1
10771080
push uword ptr J9TR_VMThread_jitReturnAddress[_rbp]
10781081
ret
10791082
END_PROC(jitReferenceArrayCopy)

runtime/codert_vm/xvector.m4

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
dnl Copyright IBM Corp. and others 2023
2+
dnl
3+
dnl This program and the accompanying materials are made available under
4+
dnl the terms of the Eclipse Public License 2.0 which accompanies this
5+
dnl distribution and is available at https://www.eclipse.org/legal/epl-2.0/
6+
dnl or the Apache License, Version 2.0 which accompanies this distribution and
7+
dnl is available at https://www.apache.org/licenses/LICENSE-2.0.
8+
dnl
9+
dnl This Source Code may also be made available under the following
10+
dnl Secondary Licenses when the conditions for such availability set
11+
dnl forth in the Eclipse Public License, v. 2.0 are satisfied: GNU
12+
dnl General Public License, version 2 with the GNU Classpath
13+
dnl Exception [1] and GNU General Public License, version 2 with the
14+
dnl OpenJDK Assembly Exception [2].
15+
dnl
16+
dnl [1] https://www.gnu.org/software/classpath/license.html
17+
dnl [2] https://openjdk.org/legal/assembly-exception.html
18+
dnl
19+
dnl SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0-only WITH Classpath-exception-2.0 OR GPL-2.0-only WITH OpenJDK-assembly-exception-1.0
20+
21+
include(xhelpers.m4)
22+
23+
FILE_START
24+
25+
dnl For all of these functions, on entry:
26+
dnl
27+
dnl 1) return address on the stack
28+
dnl 2) r8 is a scratch register on 64-bit
29+
dnl 3) eax is a scratch register on 32-bit
30+
31+
START_PROC(jitSaveVectorRegistersAVX512)
32+
lfence
33+
34+
dnl save ZMM registers
35+
36+
ifdef({ASM_J9VM_ENV_DATA64},{
37+
pop r8
38+
forloop({REG_CTR}, 0, 31, {SAVE_ZMM_REG(REG_CTR, J9TR_cframe_jitFPRs+(REG_CTR*64))})
39+
}, { dnl ASM_J9VM_ENV_DATA64
40+
pop eax
41+
forloop({REG_CTR}, 0, 7, {SAVE_ZMM_REG(REG_CTR, J9TR_cframe_jitFPRs+(REG_CTR*64))})
42+
})
43+
44+
vzeroupper
45+
46+
dnl save Opmask registers
47+
forloop({REG_CTR}, 0, 7, {SAVE_MASK_64(REG_CTR, J9TR_cframe_maskRegisters+(REG_CTR*8))})
48+
49+
ifdef({ASM_J9VM_ENV_DATA64},{
50+
push r8
51+
}, { dnl ASM_J9VM_ENV_DATA64
52+
push eax
53+
})
54+
ret
55+
END_PROC(jitSaveVectorRegistersAVX512)
56+
57+
START_PROC(jitRestoreVectorRegistersAVX512)
58+
lfence
59+
60+
dnl restore ZMM registers
61+
ifdef({ASM_J9VM_ENV_DATA64},{
62+
pop r8
63+
forloop({REG_CTR}, 0, 31, {RESTORE_ZMM_REG(REG_CTR, J9TR_cframe_jitFPRs+(REG_CTR*64))})
64+
}, { dnl ASM_J9VM_ENV_DATA64
65+
pop eax
66+
forloop({REG_CTR}, 0, 7, {RESTORE_ZMM_REG(REG_CTR, J9TR_cframe_jitFPRs+(REG_CTR*64))})
67+
})
68+
69+
dnl restore Opmask registers
70+
forloop({REG_CTR}, 0, 7, {RESTORE_MASK_64(REG_CTR, J9TR_cframe_maskRegisters+(REG_CTR*8))})
71+
72+
ifdef({ASM_J9VM_ENV_DATA64},{
73+
push r8
74+
}, { dnl ASM_J9VM_ENV_DATA64
75+
push eax
76+
})
77+
ret
78+
END_PROC(jitRestoreVectorRegistersAVX512)
79+
80+
START_PROC(jitSaveVectorRegistersAVX)
81+
lfence
82+
83+
dnl save YMM registers
84+
85+
ifdef({ASM_J9VM_ENV_DATA64},{
86+
pop r8
87+
forloop({REG_CTR}, 0, 15, {vmovdqu ymmword ptr J9TR_cframe_jitFPRs+(REG_CTR*32)[_rsp],ymm{}REG_CTR})
88+
}, { dnl ASM_J9VM_ENV_DATA64
89+
pop eax
90+
forloop({REG_CTR}, 0, 7, {vmovdqu ymmword ptr J9TR_cframe_jitFPRs+(REG_CTR*32)[_rsp],ymm{}REG_CTR})
91+
})
92+
93+
vzeroupper
94+
95+
ifdef({ASM_J9VM_ENV_DATA64},{
96+
push r8
97+
}, { dnl ASM_J9VM_ENV_DATA64
98+
push eax
99+
})
100+
ret
101+
END_PROC(jitSaveVectorRegistersAVX)
102+
103+
START_PROC(jitRestoreVectorRegistersAVX)
104+
lfence
105+
106+
dnl restore YMM registers
107+
ifdef({ASM_J9VM_ENV_DATA64},{
108+
pop r8
109+
forloop({REG_CTR}, 0, 15, {vmovdqu ymm{}REG_CTR,ymmword ptr J9TR_cframe_jitFPRs+(REG_CTR*32)[_rsp]})
110+
}, { dnl ASM_J9VM_ENV_DATA64
111+
pop eax
112+
forloop({REG_CTR}, 0, 7, {vmovdqu ymm{}REG_CTR,ymmword ptr J9TR_cframe_jitFPRs+(REG_CTR*32)[_rsp]})
113+
})
114+
115+
ifdef({ASM_J9VM_ENV_DATA64},{
116+
push r8
117+
}, { dnl ASM_J9VM_ENV_DATA64
118+
push eax
119+
})
120+
ret
121+
END_PROC(jitRestoreVectorRegistersAVX)
122+
123+
FILE_END

runtime/jilgen/jilconsts.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -399,6 +399,7 @@ writeConstants(OMRPortLibrary *OMRPORTLIB, IDATA fd)
399399
writeConstant(OMRPORTLIB, fd, "J9TR_cframe_machineBP", offsetof(J9CInterpreterStackFrame, machineBP)) |
400400
writeConstant(OMRPORTLIB, fd, "J9TR_cframe_jitGPRs", offsetof(J9CInterpreterStackFrame, jitGPRs)) |
401401
writeConstant(OMRPORTLIB, fd, "J9TR_cframe_jitFPRs", offsetof(J9CInterpreterStackFrame, jitFPRs)) |
402+
writeConstant(OMRPORTLIB, fd, "J9TR_cframe_maskRegisters", offsetof(J9CInterpreterStackFrame, maskRegisters)) |
402403
writeConstant(OMRPORTLIB, fd, "J9TR_cframe_rax", offsetof(J9CInterpreterStackFrame, jitGPRs.jitGPRs.named.rax)) |
403404
writeConstant(OMRPORTLIB, fd, "J9TR_cframe_rbx", offsetof(J9CInterpreterStackFrame, jitGPRs.jitGPRs.named.rbx)) |
404405
writeConstant(OMRPORTLIB, fd, "J9TR_cframe_rcx", offsetof(J9CInterpreterStackFrame, jitGPRs.jitGPRs.named.rcx)) |
@@ -505,6 +506,7 @@ writeConstants(OMRPortLibrary *OMRPORTLIB, IDATA fd)
505506
writeConstant(OMRPORTLIB, fd, "J9TR_JavaVM_bytecodeLoop", offsetof(J9JavaVM, bytecodeLoop)) |
506507
writeConstant(OMRPORTLIB, fd, "J9TR_JavaVM_extendedRuntimeFlags", offsetof(J9JavaVM, extendedRuntimeFlags)) |
507508
writeConstant(OMRPORTLIB, fd, "J9TR_JavaVM_extendedRuntimeFlags2", offsetof(J9JavaVM, extendedRuntimeFlags2)) |
509+
writeConstant(OMRPORTLIB, fd, "J9TR_JavaVM_extendedRuntimeFlags3", offsetof(J9JavaVM, extendedRuntimeFlags3)) |
508510
writeConstant(OMRPORTLIB, fd, "J9TR_JavaVMInternalFunctionTable", offsetof(J9JavaVM, internalVMFunctions)) |
509511
writeConstant(OMRPORTLIB, fd, "J9TR_JavaVM_memoryManagerFunctions", offsetof(J9JavaVM, memoryManagerFunctions)) |
510512
#if defined(OMR_GC_CONCURRENT_SCAVENGER) && defined(J9VM_ARCH_S390)
@@ -669,6 +671,8 @@ writeConstants(OMRPortLibrary *OMRPORTLIB, IDATA fd)
669671
writeConstant(OMRPORTLIB, fd, "J9TR_JitConfig_old_slow_jitReportInstanceFieldWrite", offsetof(J9JITConfig, old_slow_jitReportInstanceFieldWrite)) |
670672
writeConstant(OMRPORTLIB, fd, "J9TR_JitConfig_old_slow_jitReportStaticFieldRead", offsetof(J9JITConfig, old_slow_jitReportStaticFieldRead)) |
671673
writeConstant(OMRPORTLIB, fd, "J9TR_JitConfig_old_slow_jitReportStaticFieldWrite", offsetof(J9JITConfig, old_slow_jitReportStaticFieldWrite)) |
674+
writeConstant(OMRPORTLIB, fd, "J9TR_JitConfig_saveVectorRegisters", offsetof(J9JITConfig, saveVectorRegisters)) |
675+
writeConstant(OMRPORTLIB, fd, "J9TR_JitConfig_restoreVectorRegisters", offsetof(J9JITConfig, restoreVectorRegisters)) |
672676

673677
writeConstant(OMRPORTLIB, fd, "J9TR_JitConfig_old_fast_jitGetFlattenableField", offsetof(J9JITConfig, old_fast_jitGetFlattenableField)) |
674678
writeConstant(OMRPORTLIB, fd, "J9TR_JitConfig_old_fast_jitCloneValueType", offsetof(J9JITConfig, old_fast_jitCloneValueType)) |
@@ -777,6 +781,8 @@ writeConstants(OMRPortLibrary *OMRPORTLIB, IDATA fd)
777781
writeConstant(OMRPORTLIB, fd, "J9TR_ELSSize", sizeof(J9VMEntryLocalStorage)) |
778782
writeConstant(OMRPORTLIB, fd, "J9TR_J9_EXTENDED_RUNTIME_DEBUG_MODE", J9_EXTENDED_RUNTIME_DEBUG_MODE) |
779783
writeConstant(OMRPORTLIB, fd, "J9TR_J9_EXTENDED_RUNTIME_USE_VECTOR_REGISTERS", J9_EXTENDED_RUNTIME_USE_VECTOR_REGISTERS) |
784+
writeConstant(OMRPORTLIB, fd, "J9TR_J9_EXTENDED_RUNTIME3_USE_VECTOR_LENGTH_256", J9_EXTENDED_RUNTIME3_USE_VECTOR_LENGTH_256) |
785+
writeConstant(OMRPORTLIB, fd, "J9TR_J9_EXTENDED_RUNTIME3_USE_VECTOR_LENGTH_512", J9_EXTENDED_RUNTIME3_USE_VECTOR_LENGTH_512) |
780786
writeConstant(OMRPORTLIB, fd, "J9TR_J9_EXTENDED_RUNTIME2_COMPRESS_OBJECT_REFERENCES", J9_EXTENDED_RUNTIME2_COMPRESS_OBJECT_REFERENCES) |
781787
writeConstant(OMRPORTLIB, fd, "J9TR_J9_INLINE_JNI_MAX_ARG_COUNT", J9_INLINE_JNI_MAX_ARG_COUNT) |
782788

runtime/oti/j9consts.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -374,6 +374,8 @@ extern "C" {
374374
#define J9_EXTENDED_RUNTIME3_YIELD_PINNED_CONTINUATION 0x2
375375
#define J9_EXTENDED_RUNTIME3_CACHE_MAPS 0x4
376376
#define J9_EXTENDED_RUNTIME3_MODULE_PACKAGES_INITIALIZED 0x8
377+
#define J9_EXTENDED_RUNTIME3_USE_VECTOR_LENGTH_256 0x10
378+
#define J9_EXTENDED_RUNTIME3_USE_VECTOR_LENGTH_512 0x20
377379

378380
#define J9_OBJECT_HEADER_AGE_DEFAULT 0xA /* OBJECT_HEADER_AGE_DEFAULT */
379381
#define J9_OBJECT_HEADER_SHAPE_MASK 0xE /* OBJECT_HEADER_SHAPE_MASK */

runtime/oti/j9nonbuilder.h

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4305,6 +4305,8 @@ typedef struct J9JITConfig {
43054305
void *old_slow_jitReportInstanceFieldWrite;
43064306
void *old_slow_jitReportStaticFieldRead;
43074307
void *old_slow_jitReportStaticFieldWrite;
4308+
void *saveVectorRegisters;
4309+
void *restoreVectorRegisters;
43084310
struct J9MemorySegment* codeCache;
43094311
struct J9MemorySegment* dataCache;
43104312
struct J9MemorySegmentList* codeCacheList;
@@ -6906,8 +6908,9 @@ typedef struct J9CInterpreterStackFrame {
69066908
*
69076909
* Stack must be 16-byte aligned.
69086910
*/
6909-
U_8 jitFPRs[6 * 16]; /* xmm0-5 128-bit OR xmm0-7 64-bit */
6911+
U_8 jitFPRs[6 * 64]; /* zmm0-5 512-bit OR xmm0-7 64-bit */
69106912
U_8 preservedFPRs[10 * 16]; /* xmm6-15 128-bit */
6913+
U_8 maskRegisters[8 * 8]; /* k0-k7 */
69116914
UDATA align[1];
69126915
/* r15,r14,r13,r12,rdi,rsi,rbx,rbp,return address
69136916
* RSP is 16-byte aligned at this point
@@ -6917,7 +6920,8 @@ typedef struct J9CInterpreterStackFrame {
69176920
*
69186921
* Stack must be 16-byte aligned.
69196922
*/
6920-
U_8 jitFPRs[16 * 16]; /* xmm0-15 128-bit OR xmm0-7 64-bit */
6923+
U_8 jitFPRs[32 * 64]; /* zmm0-31 512-bit OR xmm0-7 64-bit */
6924+
U_8 maskRegisters[8 * 8]; /* k0-k7 */
69216925
UDATA align[1];
69226926
/* r15,r14,r13,r12,rbx,rbp,return address
69236927
* RSP is 16-byte aligned at this point
@@ -6930,7 +6934,8 @@ typedef struct J9CInterpreterStackFrame {
69306934
*/
69316935
J9JITGPRSpillArea jitGPRs;
69326936
UDATA align1[2];
6933-
U_8 jitFPRs[8 * 16]; /* xmm0-7 128-bit */
6937+
U_8 jitFPRs[8 * 64]; /* zmm0-7 512-bit */
6938+
U_8 maskRegisters[8 * 8]; /* k0-k7 */
69346939
UDATA align2[1];
69356940
/* ebx,edi,esi
69366941
* ESP is forcibly 16-byte aligned at this point

runtime/oti/jvminit.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -664,6 +664,7 @@ enum INIT_STAGE {
664664
#define VMOPT_XCONCURRENTBACKGROUND "-Xconcurrentbackground"
665665
#define VMOPT_XGCTHREADS "-Xgcthreads"
666666
#define VMOPT_XGCMAXTHREADS "-Xgcmaxthreads"
667+
#define VMOPT_PRESERVE_VECTORS "-XPreserveExtendedRegs"
667668

668669
#define VMOPT_XXSHOW_EXTENDED_NPE_MESSAGE "-XX:+ShowCodeDetailsInExceptionMessages"
669670
#define VMOPT_XXNOSHOW_EXTENDED_NPE_MESSAGE "-XX:-ShowCodeDetailsInExceptionMessages"

0 commit comments

Comments
 (0)