Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -658,7 +658,7 @@ rand_distr = "0.5.1"
rand_seeder = "0.4.0"
range-requests = { path = "range-requests" }
ratatui = "0.29.0"
raw-cpuid = { git = "https://github.com/oxidecomputer/rust-cpuid.git", rev = "0a8dbd2311263f6a59ea58089e33c8331436ff3a" }
raw-cpuid = { git = "https://github.com/oxidecomputer/rust-cpuid.git", rev = "a4cf01df76f35430ff5d39dc2fe470bcb953503b" }
rayon = "1.10"
rcgen = "0.12.1"
reconfigurator-cli = { path = "dev-tools/reconfigurator-cli" }
Expand Down
325 changes: 322 additions & 3 deletions nexus/src/app/instance_platform/cpu_platform.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,12 @@ pub fn functionally_same(base: CpuIdDump, target: CpuIdDump) -> bool {
if base_info.has_fp256() != target_info.has_fp256() {
return false;
}

// TODO: same as above: we probably just need to require "base" has
// the same or wider FPU datapath than "target"
if base_info.has_fp512() != target_info.has_fp512() {
return false;
}
}
_ => {
// Specific cases here may be acceptable, but for expediency (and
Expand Down Expand Up @@ -500,8 +506,8 @@ fn milan_ideal() -> CpuIdDump {

// Set up processor optimization info (leaf 8000_001Ah)
let mut leaf = PerformanceOptimizationInfo::empty();
leaf.set_movu(true); // TODO: BREAKING
leaf.set_fp256(true); // TODO: BREAKINGISH?
leaf.set_movu(true);
leaf.set_fp256(true);
cpuid
.set_performance_optimization_info(Some(leaf))
.expect("can set leaf 8000_001Ah");
Expand Down Expand Up @@ -548,6 +554,249 @@ fn milan_ideal() -> CpuIdDump {
dump
}

pub fn turin_v1() -> CpuIdDump {
// For VMs, a Turin-like CPU is very much like Milan with AVX-512 features,
// so start from Milan.
let baseline = milan_ideal();

let mut cpuid = CpuId::with_cpuid_reader(baseline);

let mut leaf =
cpuid.get_feature_info().expect("baseline Milan defines leaf 1");

// Set up EAX: Family 1Ah model 2h stepping 1.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably useful to indicate why we're setting it to show up as a C1 processor. I'm guessing this is because that's the production stepping.

//
// This corresponds to processor revision C1, the production stepping of Turin processors.
leaf.set_extended_family_id(0x0B);
leaf.set_base_family_id(0x0F);
leaf.set_base_model_id(0x02);
leaf.set_stepping_id(0x01);

// EBX, ECX, EDX are all unchanged from Milan (same cache line flush size,
// leaf 1 features are unchanged)

cpuid.set_feature_info(Some(leaf)).expect("can set leaf 1");

let mut leaf = cpuid
.get_extended_feature_info()
.expect("baseline Milan defines leaf 7");

// Same as with initial Milan profiles, `rdseed` is not supported by the
// virt stack, so we should hide it from guests for now.
leaf.set_rdseed(false);

// Turin supports the TSC_ADJUST MSR but guest plumbing is not present for
// it and it's not clear what a guest would productively do with it anyway.
leaf.set_tsc_adjust_msr(false);

// Turin supports MOVDIR64B and MOVDIRI, so pass them through.
leaf.set_movdir64b(true);
leaf.set_movdiri(true);

// These AVX512 features are present for all Turin processors.
leaf.set_avx512f(true);
leaf.set_avx512dq(true);
leaf.set_avx512_ifma(true);
leaf.set_avx512cd(true);
leaf.set_avx512bw(true);
leaf.set_avx512vl(true);

leaf.set_avx512vbmi(true);
leaf.set_avx512vbmi2(true);
leaf.set_gfni(true);
leaf.set_avx512vnni(true);
leaf.set_avx512bitalg(true);
leaf.set_avx512vpopcntdq(true);
// While hardware supports 57-bit virtual addresses, the bhyve support is
// not there yet.
leaf.set_la57(false);

leaf.set_avx512_vp2intersect(true);

leaf.set_avx512_bf16(true);
leaf.set_avx_vnni(true);

cpuid.set_extended_feature_info(Some(leaf)).expect("can set leaf 7h");

// This is the same information for leaf D as in Milan, but with the new
// AVX-512 bits in Turin.
// TODO: kind of gross to have to pass an empty `CpuIdDump` here...
let mut state = ExtendedStateInfo::empty(CpuIdDump::new());
state.set_xcr0_supports_legacy_x87(true);
state.set_xcr0_supports_sse_128(true);
state.set_xcr0_supports_avx_256(true);
// Update leaf D for the larger XCR0 set
state.set_xcr0_supports_avx512_opmask(true);
state.set_xcr0_supports_avx512_zmm_hi256(true);
state.set_xcr0_supports_avx512_zmm_hi16(true);
// Managed dynamically in practice.
state.set_xsave_area_size_enabled_features(0x980);
// `Core::X86::Cpuid::ProcExtStateEnumEcx00`, but minus the MPK support we
// don't make available to guests.
state.set_xsave_area_size_supported_features(0x980);

state.set_xsaveopt(true);
state.set_xsavec(true);
state.set_xgetbv(true);
state.set_xsave_size(0x980);

let mut leaves = state.into_leaves().to_vec();
let mut ymm_state = ExtendedState::empty();
ymm_state.set_size(0x100);
ymm_state.set_offset(0x240);
leaves.push(Some(ymm_state.into_leaf()));
// level 3
leaves.push(None);
// level 4
leaves.push(None);
// levels 5, 6, and 7 are described in the PPR:
// `Core::X86::Cpuid::ProcExtStateEnumEax06`
//
// level 5
let mut kregs_state = ExtendedState::empty();
kregs_state.set_size(0x040);
kregs_state.set_offset(0x340);
leaves.push(Some(kregs_state.into_leaf()));
// level 6
let mut zmmhi_state = ExtendedState::empty();
zmmhi_state.set_size(0x200);
zmmhi_state.set_offset(0x380);
leaves.push(Some(zmmhi_state.into_leaf()));
// level 7
let mut zmmhi16_state = ExtendedState::empty();
zmmhi16_state.set_size(0x400);
zmmhi16_state.set_offset(0x580);
leaves.push(Some(zmmhi16_state.into_leaf()));

cpuid.set_extended_state_info(Some(&leaves[..])).expect("can set leaf Dh");

let mut leaf = cpuid
.get_extended_processor_and_feature_identifiers()
.expect("baseline Milan defines leaf 8000_0001");

// This is the same as the leaf 1 EAX configured earlier.
leaf.set_extended_signature(0x00B00F21);

// Hide topology extensions. We'd want to set this and set
// ThreadsPerComputeUnit to indicate SMT is active, but we'd run afoul of
// https://github.com/oxidecomputer/propolis/issues/940, which in turn
// really needs us to disallow VM shapes with odd vCPU counts. For now, just
// hide topology extensions and we'll get sockets into shape in a later CPU
// platform rev.
leaf.set_topology_extensions(false);
// This is just strange. bhyve supports all six performance counters, so we
// *should* be free to set this bit. Linux is fine with this. But
// experimentally I've seen that with this bit set and TopologyExtensions
// *not* set (and leaves 8000_001D,8000_001E zeroed), Windows Server 2022
// gets into an infinite loop somewhere early in boot.
//
// We want to hide topology extensions for a bit still - we'd like to
// indicate SMT there, but that wants some other changes (see above or
// Propolis#940)
//
// So, if we don't have TopologyExtensions, apparently Windows can't have
// six perf counters?
leaf.set_perf_cntr_extensions(false);
// RDTSCP requires some bhyve and Propolis work to support, so it is masked
// off for now.
leaf.set_rdtscp(false);
cpuid
.set_extended_processor_and_feature_identifiers(Some(leaf))
.expect("can set leaf 8000_0001h");
Comment on lines +704 to +705
Copy link
Member Author

@iximeow iximeow Oct 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in a truly distressing case of the ankle bone being connected to the wrist bone, if PerfCtrExtCore is set and TopologyExtensions is not, Windows Server 2022 sits in a loop at boot. I noticed this in checking out a fix for oxidecomputer/propolis#959, an initial version of which just cleared TopologyExtensions bit to match discarding leaf 0x8000_001E. Both bits together are fine. Having topology extensions and not six perf counters (as we've had on Milan for a while) is fine. Having neither is fine. Having six perf counters and no topology extensions does a loop at boot.

I'm a little suspicious there's some relationship between this and the incomplete representation of SMT, so I'm going to set this to a more Milan-like situation where we hide perf counter extensions for now, and omit topology extensions, and then see how this looks with issues like oxidecomputer/propolis#940 sorted out.

edit: these bits are now both cleared, and boy will I feel silly if I've overlooked something here

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How does this tie into the definition of 8000_0022 %ecx NumPerfCtrCore?

Copy link
Member Author

@iximeow iximeow Oct 16, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

8000_0022 eax is zero so guests shouldn't care, but if we were filling it in I'd pick 4 without PerfCtrExtCore and 6 with.


cpuid
.set_processor_brand_string(Some(b"Oxide Virtual Turin-like Processor"))
.expect("can set vCPU brand string");

let mut leaf = cpuid
.get_processor_capacity_feature_info()
.expect("can get leaf 8000_0008h");

// Support for `wbnoinvd` is hidden in bhyve for the time being. This would
// probably be fine to pass through, but it is as-yet untested. Continue
// hiding this instruction.
leaf.set_wbnoinvd(false);

// "Processor is not vulnerable to Branch Type Confusion"
// This is 1 for all Turin processors and does not require particular MSR
// settings or hypervisor support, so pass it along.
leaf.set_btc_no(true);

// BSFD, SSBD, STIBP, and IBRS, are all supported on Turin, but guests
// are not yet allowed to access SPEC_CTRL to enable (or confirm they are
// enabled).
leaf.set_psfd(false);
leaf.set_ssbd(false);
leaf.set_stibp(false);
leaf.set_ibrs(false);

cpuid
.set_processor_capacity_feature_info(Some(leaf))
.expect("can set leaf 8000_0008h");

let mut leaf = cpuid
.get_performance_optimization_info()
.expect("baseline Milan defines 8000_001Ah");
leaf.set_fp256(false);
leaf.set_fp512(true);
cpuid
.set_performance_optimization_info(Some(leaf))
.expect("can set leaf 8000_001Ah");

let mut leaf = cpuid
.get_extended_feature_identification_2()
.expect("can get leaf 8000_0021h");

// We don't support access to MSR `BP_CFG`, so SRSO_MSR_FIX stays hidden.
leaf.set_srso_msr_fix(false);
// SRSO_USER_KERNEL_NO is advice about vulnerabilities the processor is not
// affected by; no bhyve/Propolis support needed.
leaf.set_srso_user_kernel_no(true);
// SRSO_NO, more generally, is clear on Turin.
leaf.set_srso_no(false);
// IBPB_BRTYPE and SBPB are hidden because PRED_CMD and SPEC_CTRL generally
// aren't guest-accessible yet.
leaf.set_ibpb_brtype(false);
leaf.set_sbpb(false);
// Enhanced return address predictor security is another "this is just how
// the processor behaves" bit.
leaf.set_eraps(true);
leaf.set_prefetchi(true);
// FP512 downgrade is configurable via MSR, but the MSR is not made
// available to guests. The other bits are present on all Turin processors.
leaf.set_fp512_downgrade(false);
leaf.set_fast_rep_scasb(true);
leaf.set_epsf(true);
leaf.set_opcode_0f_017_reclaim(true);
leaf.set_amd_ermsb(true);
leaf.set_fast_short_repe_cmpsb(true);
leaf.set_fast_short_rep_stosb(true);
// The EFER write is permitted in bhyve, so this *should* work? But I'm not
// very familiar with ohw this is used in practice or where guest OSes would
// find it beneficial. Hide it for now and we'll come back to this for a
// broader speculative controls enablement with SPEC_CTRL/PRED_CMD later.
leaf.set_automatic_ibrs(false);
// The EFER write is permitted in bhyve, so this *should* work? But the
// forward utility of this bit is not as clear, so hide it.
leaf.set_upper_address_ignore(false);
// Architectural behavior, so we should pass this through.
leaf.set_fs_gs_base_write_not_serializing(true);

cpuid
.set_extended_feature_identification_2(Some(leaf))
.expect("can set leaf 8000_0021h");

// Cache topology leaves are otherwise left zeroed; if we can avoid getting
// into it, let's try!

let mut source = cpuid.into_source();
// We've cleared `topology_extensions` above, now remove the leaves so
// Propolis doesn't try specializing these; we don't want them presented yet!
source.set_leaf(0x8000_001D, None);
source.set_leaf(0x8000_001E, None);
source
}

pub fn milan_rfd314() -> CpuIdDump {
// This is the Milan we'd "want" to expose, absent any other constraints.
let baseline = milan_ideal();
Expand Down Expand Up @@ -753,7 +1002,7 @@ pub fn dump_to_cpuid_entries(dump: CpuIdDump) -> Vec<CpuidEntry> {
#[cfg(test)]
mod test {
use crate::app::instance_platform::cpu_platform::{
dump_to_cpuid_entries, milan_rfd314,
dump_to_cpuid_entries, milan_rfd314, turin_v1,
};
use raw_cpuid::{
CpuId, CpuIdReader, CpuIdResult, CpuIdWriter, L1CacheTlbInfo,
Expand Down Expand Up @@ -846,6 +1095,76 @@ mod test {
cpuid_leaf!(0x80000021, 0x00000045, 0x00000000, 0x00000000, 0x00000000),
];

// This CPUID leaf blob is some small tweaks on top of the "ideal Milan",
// maintaining some details that are disabled due to needed bhyve support
// and including Turin-specific features as supported and relevant to
// guests.
const TURIN_V1_CPUID: [CpuidEntry; 25] = [
cpuid_leaf!(0x0, 0x0000000D, 0x68747541, 0x444D4163, 0x69746E65),
cpuid_leaf!(0x1, 0x00B00F21, 0x00000800, 0xF6D83203, 0x078BFBFF),

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

%ecx bit 31 is the bit to indicate hypervisor leafs are present right?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, funnily this has more words in the APM than PPR..

cpuid_leaf!(0x5, 0x00000000, 0x00000000, 0x00000000, 0x00000000),

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is leaf 5 zero here because we don't actually indicate support for monitor / mwait?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, specifically it's zero and in this list because I want to confirm the assembled profile has this leaf zeroed in addition to leaf 1 ECX monitor being clear.

cpuid_leaf!(0x6, 0x00000004, 0x00000000, 0x00000000, 0x00000000),
cpuid_subleaf!(
0x7, 0x0, 0x00000001, 0xF1BB03A9, 0x18005F42, 0x00000110
),
cpuid_subleaf!(
0x7, 0x1, 0x00000030, 0x00000000, 0x00000000, 0x00000000
),
cpuid_subleaf!(

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I assume leaf B is left out here because it's dynamically generated?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that's right, Propolis will fill it in with a level 0 and 1 that look the same as Milan (https://github.com/oxidecomputer/propolis/blob/ff52055/lib/propolis/src/cpuid.rs#L370-L378)

0xD, 0x0, 0x000000E7, 0x00000980, 0x00000980, 0x00000000

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How do we get 980 in %ebx at this state without feeding in the value of %xcr0?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

here and D.1 ebx are managed on the read in bhyve, so the value here doesn't have any bearing on a VM. so I arbitrarily picked the largest (and I think most likely) values we'd see in these leaves at runtime.

),
cpuid_subleaf!(
0xD, 0x1, 0x00000007, 0x00000980, 0x00000000, 0x00000000

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See above about %ebx, but this time for xfeature.

),
cpuid_subleaf!(
0xD, 0x2, 0x00000100, 0x00000240, 0x00000000, 0x00000000
),
/*
* subleaves 3 and 4 are all-zero
*/
cpuid_subleaf!(
0xD, 0x5, 0x00000040, 0x00000340, 0x00000000, 0x00000000
),
cpuid_subleaf!(
0xD, 0x6, 0x00000200, 0x00000380, 0x00000000, 0x00000000
),
cpuid_subleaf!(
0xD, 0x7, 0x00000400, 0x00000580, 0x00000000, 0x00000000
),
cpuid_leaf!(0x80000000, 0x80000021, 0x68747541, 0x444D4163, 0x69746E65),
cpuid_leaf!(0x80000001, 0x00B00F21, 0x40000000, 0x440001F1, 0x25D3FBFF),
cpuid_leaf!(0x80000002, 0x6469784F, 0x69562065, 0x61757472, 0x7554206C),
cpuid_leaf!(0x80000003, 0x2D6E6972, 0x656B696C, 0x6F725020, 0x73736563),
cpuid_leaf!(0x80000004, 0x2020726F, 0x20202020, 0x20202020, 0x00202020),
cpuid_leaf!(0x80000007, 0x00000000, 0x00000000, 0x00000000, 0x00000100),
cpuid_leaf!(0x80000008, 0x00003030, 0x20000005, 0x00000000, 0x00000000),

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just confirming, we cap %eax at 0x30/0t48 because we don't support virtualizing 5 level paging, right?

cpuid_leaf!(0x8000000A, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
cpuid_leaf!(0x8000001A, 0x0000000A, 0x00000000, 0x00000000, 0x00000000),
cpuid_leaf!(0x8000001B, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
cpuid_leaf!(0x8000001C, 0x00000000, 0x00000000, 0x00000000, 0x00000000),

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I assume leaf 8000_001e is filled dynamically.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I actually omit 8000_001E entirely (and clear TopologyExtensions), this goes to the kind of awkward semi-SMT situation I want to fix with an early virtual platform change (oxidecomputer/propolis#940), because I think we want to disallow VM shapes with odd vCPU counts. Otherwise Linux for example will assume the 8000_001E leaf is bogus if it indicates an SMT sibling that doesn't exist. Not somewhere I'd love to rely on the grace of guest OSes..

8000_001E with ThreadsPerCore = 0 would be fine even now, but there's no API surface to not have Propolis indicate SMT when filling in CPU topology, so.. out with this leaf for now.

cpuid_leaf!(0x8000001F, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
cpuid_leaf!(0x80000021, 0x411D8C47, 0x00000000, 0x00000000, 0x00000000),
];

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I assume that right now we're not including the extended leaf 8000_0026 bits.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

right. that would need a smidge of Propolis work and more importantly won't be particularly interesting beyond 8000_001E for the time being.


// Test that Turin V1 matches the predetermined CPUID leaves written above
// (e.g. that the collection of builders behind `turin_v1` produce this
// profile as used for testing and elsewhere).
//
// This is largely "baseline Milan" with Turin-specific additions.
#[test]
fn turin_v1_is_as_described() {
let computed = dump_to_cpuid_entries(turin_v1());

for (l, r) in TURIN_V1_CPUID.iter().zip(computed.as_slice().iter()) {
eprintln!("comparing {:#08x}.{:?}", l.leaf, l.subleaf);
assert_eq!(
l, r,
"leaf 0x{:08x} (subleaf? {:?}) did not match",
l.leaf, l.subleaf
);
}
}

// Test that the initial RFD 314 definition matches what we compute as the
// CPUID profile with that configuration in `milan_rfd314()`.
#[test]
Expand Down
9 changes: 7 additions & 2 deletions nexus/src/app/instance_platform/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -509,13 +509,18 @@ fn cpuid_from_vmm_cpu_platform(
) -> Option<Cpuid> {
let cpuid = match platform {
db::model::VmmCpuPlatform::SledDefault => return None,
db::model::VmmCpuPlatform::AmdMilan
| db::model::VmmCpuPlatform::AmdTurin => Cpuid {
db::model::VmmCpuPlatform::AmdMilan => Cpuid {
entries: cpu_platform::dump_to_cpuid_entries(
cpu_platform::milan_rfd314(),
),
vendor: CpuidVendor::Amd,
},
db::model::VmmCpuPlatform::AmdTurin => Cpuid {
entries: cpu_platform::dump_to_cpuid_entries(
cpu_platform::turin_v1(),
),
vendor: CpuidVendor::Amd,
},
};

Some(cpuid)
Expand Down
Loading