From f89a61c7a0a133e4b9b51069c9a797e675252ccf Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Sat, 22 Oct 2022 20:11:35 +0300 Subject: [PATCH 001/213] security/tpm: improve tlcl_extend() signature Until now tcg-2.0/tss.c was just assuming certain buffer size and hash algorithm. Change it to accept digest type, which the call sites know. Also drop `uint8_t *out_digest` parameter which was always `NULL` and was handled only by tcg-1.2 code. Change-Id: I944302b502e3424c5041b17c713a867b0fc535c4 Signed-off-by: Sergii Dmytruk Reviewed-on: https://review.coreboot.org/c/coreboot/+/68745 Reviewed-by: Julius Werner Tested-by: build bot (Jenkins) Reviewed-by: Frans Hendriks --- src/security/tpm/tspi/crtm.c | 2 +- src/security/tpm/tspi/tspi.c | 2 +- src/security/tpm/tss.h | 5 +-- src/security/tpm/tss/tcg-1.2/tss.c | 19 ++++------- src/security/tpm/tss/tcg-2.0/tss.c | 37 ++++++++++++++++----- src/vendorcode/eltan/security/mboot/mboot.c | 3 +- 6 files changed, 42 insertions(+), 26 deletions(-) diff --git a/src/security/tpm/tspi/crtm.c b/src/security/tpm/tspi/crtm.c index 24133d9f9fc..8eefc11a362 100644 --- a/src/security/tpm/tspi/crtm.c +++ b/src/security/tpm/tspi/crtm.c @@ -171,7 +171,7 @@ int tspi_measure_cache_to_pcr(void) tce->name, tce->pcr); int result = tlcl_extend(tce->pcr, tce->digest, - NULL); + TPM_MEASURE_ALGO); if (result != TPM_SUCCESS) { printk(BIOS_ERR, "TPM: Writing digest" " of %s into PCR failed with error" diff --git a/src/security/tpm/tspi/tspi.c b/src/security/tpm/tspi/tspi.c index 891f9153272..fb7a5d50c76 100644 --- a/src/security/tpm/tspi/tspi.c +++ b/src/security/tpm/tspi/tspi.c @@ -233,7 +233,7 @@ uint32_t tpm_extend_pcr(int pcr, enum vb2_hash_algorithm digest_algo, } printk(BIOS_DEBUG, "TPM: Extending digest for `%s` into PCR %d\n", name, pcr); - result = tlcl_extend(pcr, digest, NULL); + result = tlcl_extend(pcr, digest, digest_algo); if (result != TPM_SUCCESS) { printk(BIOS_ERR, "TPM: Extending hash for `%s` into PCR %d failed.\n", name, pcr); diff --git a/src/security/tpm/tss.h b/src/security/tpm/tss.h index f68e1f47bab..a85503d7fdf 100644 --- a/src/security/tpm/tss.h +++ b/src/security/tpm/tss.h @@ -10,6 +10,7 @@ #define TSS_H_ #include +#include #include #include @@ -187,8 +188,8 @@ uint32_t tlcl_lock_nv_write(uint32_t index); /** * Perform a TPM_Extend. */ -uint32_t tlcl_extend(int pcr_num, const uint8_t *in_digest, - uint8_t *out_digest); +uint32_t tlcl_extend(int pcr_num, const uint8_t *digest_data, + enum vb2_hash_algorithm digest_algo); /** * Disable platform hierarchy. Specific to TPM2. The TPM error code is returned. diff --git a/src/security/tpm/tss/tcg-1.2/tss.c b/src/security/tpm/tss/tcg-1.2/tss.c index 52bc2722b25..6b79aabe871 100644 --- a/src/security/tpm/tss/tcg-1.2/tss.c +++ b/src/security/tpm/tss/tcg-1.2/tss.c @@ -331,25 +331,20 @@ uint32_t tlcl_set_global_lock(void) return tlcl_write(TPM_NV_INDEX0, NULL, 0); } -uint32_t tlcl_extend(int pcr_num, const uint8_t *in_digest, - uint8_t *out_digest) +uint32_t tlcl_extend(int pcr_num, const uint8_t *digest_data, + enum vb2_hash_algorithm digest_algo) { struct s_tpm_extend_cmd cmd; uint8_t response[kTpmResponseHeaderLength + kPcrDigestLength]; - uint32_t result; + + if (digest_algo != VB2_HASH_SHA1) + return TPM_E_INVALID_ARG; memcpy(&cmd, &tpm_extend_cmd, sizeof(cmd)); to_tpm_uint32(cmd.buffer + tpm_extend_cmd.pcrNum, pcr_num); - memcpy(cmd.buffer + cmd.inDigest, in_digest, kPcrDigestLength); - - result = tlcl_send_receive(cmd.buffer, response, sizeof(response)); - if (result != TPM_SUCCESS) - return result; + memcpy(cmd.buffer + cmd.inDigest, digest_data, kPcrDigestLength); - if (out_digest) - memcpy(out_digest, response + kTpmResponseHeaderLength, - kPcrDigestLength); - return result; + return tlcl_send_receive(cmd.buffer, response, sizeof(response)); } uint32_t tlcl_get_permissions(uint32_t index, uint32_t *permissions) diff --git a/src/security/tpm/tss/tcg-2.0/tss.c b/src/security/tpm/tss/tcg-2.0/tss.c index 8c9d12f7b08..06b0d6b8f92 100644 --- a/src/security/tpm/tss/tcg-2.0/tss.c +++ b/src/security/tpm/tss/tcg-2.0/tss.c @@ -118,21 +118,40 @@ uint32_t tlcl_assert_physical_presence(void) return TPM_SUCCESS; } -/* - * The caller will provide the digest in a 32 byte buffer, let's consider it a - * sha256 digest. - */ -uint32_t tlcl_extend(int pcr_num, const uint8_t *in_digest, - uint8_t *out_digest) +static TPM_ALG_ID tpmalg_from_vb2_hash(enum vb2_hash_algorithm hash_type) +{ + switch (hash_type) { + case VB2_HASH_SHA1: + return TPM_ALG_SHA1; + case VB2_HASH_SHA256: + return TPM_ALG_SHA256; + case VB2_HASH_SHA384: + return TPM_ALG_SHA384; + case VB2_HASH_SHA512: + return TPM_ALG_SHA512; + + default: + return TPM_ALG_ERROR; + } +} + +uint32_t tlcl_extend(int pcr_num, const uint8_t *digest_data, + enum vb2_hash_algorithm digest_type) { struct tpm2_pcr_extend_cmd pcr_ext_cmd; struct tpm2_response *response; + TPM_ALG_ID alg; + + alg = tpmalg_from_vb2_hash(digest_type); + if (alg == TPM_ALG_ERROR) + return TPM_E_HASH_ERROR; pcr_ext_cmd.pcrHandle = HR_PCR + pcr_num; pcr_ext_cmd.digests.count = 1; - pcr_ext_cmd.digests.digests[0].hashAlg = TPM_ALG_SHA256; - memcpy(pcr_ext_cmd.digests.digests[0].digest.sha256, in_digest, - sizeof(pcr_ext_cmd.digests.digests[0].digest.sha256)); + pcr_ext_cmd.digests.digests[0].hashAlg = alg; + /* Always copying to sha512 as it's the largest one */ + memcpy(pcr_ext_cmd.digests.digests[0].digest.sha512, digest_data, + vb2_digest_size(digest_type)); response = tpm_process_command(TPM2_PCR_Extend, &pcr_ext_cmd); diff --git a/src/vendorcode/eltan/security/mboot/mboot.c b/src/vendorcode/eltan/security/mboot/mboot.c index 575c5fc0220..50ca0256f39 100644 --- a/src/vendorcode/eltan/security/mboot/mboot.c +++ b/src/vendorcode/eltan/security/mboot/mboot.c @@ -136,7 +136,8 @@ int mboot_hash_extend_log(uint64_t flags, uint8_t *hashData, uint32_t hashDataLe printk(BIOS_DEBUG, "%s: SHA256 Hash Digest:\n", __func__); mboot_print_buffer(digest->digest.sha256, VB2_SHA256_DIGEST_SIZE); - return (tlcl_extend(newEventHdr->pcrIndex, (uint8_t *)&(newEventHdr->digest), NULL)); + return (tlcl_extend(newEventHdr->pcrIndex, (uint8_t *)&(newEventHdr->digest), + VB2_HASH_SHA256)); } /* From 4077a8d0fd1d9f2bca10420500d9d0d5a9dd49f6 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Sun, 23 Oct 2022 00:24:37 +0300 Subject: [PATCH 002/213] security/tpm: make log format configurable via Kconfig MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit doesn't add any new format options, just makes selecting existing format explicit. Ticket: https://ticket.coreboot.org/issues/422 Change-Id: I3903aff54e01093bc9ea75862bbf5989cc6e6c55 Signed-off-by: Sergii Dmytruk Reviewed-on: https://review.coreboot.org/c/coreboot/+/68746 Tested-by: build bot (Jenkins) Reviewed-by: Michał Żygowski --- src/security/tpm/Kconfig | 12 ++++++++++++ src/security/tpm/Makefile.inc | 10 +++++----- src/security/tpm/tspi/crtm.h | 14 +++++++++++++- 3 files changed, 30 insertions(+), 6 deletions(-) diff --git a/src/security/tpm/Kconfig b/src/security/tpm/Kconfig index fc339a2b3af..5bc817d7f31 100644 --- a/src/security/tpm/Kconfig +++ b/src/security/tpm/Kconfig @@ -94,6 +94,18 @@ config TPM_MEASURED_BOOT help Enables measured boot (experimental) +choice + prompt "TPM event log format" + depends on TPM_MEASURED_BOOT + default TPM_LOG_CB + +config TPM_LOG_CB + bool "coreboot's custom format" + help + Custom coreboot-specific format of the log derived from TPM1 log format. + +endchoice + config TPM_MEASURED_BOOT_INIT_BOOTBLOCK bool depends on TPM_MEASURED_BOOT && !VBOOT diff --git a/src/security/tpm/Makefile.inc b/src/security/tpm/Makefile.inc index 8f633a89bf7..7083c00e33b 100644 --- a/src/security/tpm/Makefile.inc +++ b/src/security/tpm/Makefile.inc @@ -55,10 +55,10 @@ romstage-y += tspi/crtm.c ramstage-y += tspi/crtm.c postcar-y += tspi/crtm.c -ramstage-y += tspi/log.c -romstage-y += tspi/log.c -verstage-y += tspi/log.c -postcar-y += tspi/log.c -bootblock-y += tspi/log.c +ramstage-$(CONFIG_TPM_LOG_CB) += tspi/log.c +romstage-$(CONFIG_TPM_LOG_CB) += tspi/log.c +verstage-$(CONFIG_TPM_LOG_CB) += tspi/log.c +postcar-$(CONFIG_TPM_LOG_CB) += tspi/log.c +bootblock-$(CONFIG_TPM_LOG_CB) += tspi/log.c endif # CONFIG_TPM_MEASURED_BOOT diff --git a/src/security/tpm/tspi/crtm.h b/src/security/tpm/tspi/crtm.h index bd5bc5785d0..e8e44fd7450 100644 --- a/src/security/tpm/tspi/crtm.h +++ b/src/security/tpm/tspi/crtm.h @@ -16,7 +16,19 @@ */ #define TPM_RUNTIME_DATA_PCR 3 -#define TPM_MEASURE_ALGO (CONFIG(TPM1) ? VB2_HASH_SHA1 : VB2_HASH_SHA256) +#if CONFIG(TPM_LOG_CB) && CONFIG(TPM1) +# define TPM_MEASURE_ALGO VB2_HASH_SHA1 +#elif CONFIG(TPM_LOG_CB) && CONFIG(TPM2) +# define TPM_MEASURE_ALGO VB2_HASH_SHA256 +#endif + +#if !defined(TPM_MEASURE_ALGO) +# if !CONFIG(TPM_MEASURED_BOOT) +# define TPM_MEASURE_ALGO VB2_HASH_INVALID +# else +# error "Misconfiguration: failed to determine TPM hashing algorithm" +# endif +#endif /** * Measure digests cached in TCPA log entries into PCRs From 6aefa39dab33a3c26a46d4fe87be8331844794bb Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Thu, 10 Nov 2022 00:40:51 +0200 Subject: [PATCH 003/213] treewide: stop calling custom TPM log "TCPA" TCPA usually refers to log described by TPM 1.2 specification. Change-Id: I896bd94f18b34d6c4b280f58b011d704df3d4022 Ticket: https://ticket.coreboot.org/issues/423 Signed-off-by: Sergii Dmytruk Reviewed-on: https://review.coreboot.org/c/coreboot/+/69444 Tested-by: build bot (Jenkins) Reviewed-by: Julius Werner --- src/arch/x86/car.ld | 4 +- .../bsd/include/commonlib/bsd/cbmem_id.h | 4 +- .../include/commonlib/coreboot_tables.h | 2 +- .../include/commonlib/tcpa_log_serialized.h | 29 ----- .../include/commonlib/tpm_log_serialized.h | 30 +++++ src/include/memlayout.h | 6 +- src/include/symbols.h | 2 +- src/lib/cbfs.c | 2 +- src/lib/coreboot_table.c | 2 +- src/security/tpm/tspi.h | 24 ++-- src/security/tpm/tspi/crtm.c | 32 +++--- src/security/tpm/tspi/crtm.h | 2 +- src/security/tpm/tspi/log.c | 104 +++++++++--------- src/security/tpm/tspi/tspi.c | 3 +- src/soc/cavium/cn81xx/memlayout.ld | 2 +- src/soc/mediatek/mt8173/memlayout.ld | 2 +- src/soc/mediatek/mt8183/memlayout.ld | 2 +- .../mediatek/mt8186/include/soc/memlayout.ld | 2 +- .../mediatek/mt8188/include/soc/memlayout.ld | 2 +- .../mediatek/mt8192/include/soc/memlayout.ld | 2 +- .../mediatek/mt8195/include/soc/memlayout.ld | 2 +- src/soc/nvidia/tegra124/memlayout.ld | 2 +- src/soc/nvidia/tegra210/memlayout.ld | 2 +- src/soc/qualcomm/sc7180/memlayout.ld | 2 +- src/soc/samsung/exynos5250/memlayout.ld | 2 +- util/cbmem/cbmem.c | 42 +++---- 26 files changed, 156 insertions(+), 154 deletions(-) delete mode 100644 src/commonlib/include/commonlib/tcpa_log_serialized.h create mode 100644 src/commonlib/include/commonlib/tpm_log_serialized.h diff --git a/src/arch/x86/car.ld b/src/arch/x86/car.ld index 132937f4ee9..1b8307ddf32 100644 --- a/src/arch/x86/car.ld +++ b/src/arch/x86/car.ld @@ -20,9 +20,9 @@ VBOOT2_WORK(., 12K) #endif #if CONFIG(TPM_MEASURED_BOOT) - /* Vboot measured boot TCPA log measurements. + /* Vboot measured boot TPM log measurements. * Needs to be transferred until CBMEM is available */ - TPM_TCPA_LOG(., 2K) + TPM_LOG(., 2K) #endif /* Stack for CAR stages. Since it persists across all stages that * use CAR it can be reused. The chipset/SoC is expected to provide diff --git a/src/commonlib/bsd/include/commonlib/bsd/cbmem_id.h b/src/commonlib/bsd/include/commonlib/bsd/cbmem_id.h index 0cf9c7fdef6..7f2824e70fa 100644 --- a/src/commonlib/bsd/include/commonlib/bsd/cbmem_id.h +++ b/src/commonlib/bsd/include/commonlib/bsd/cbmem_id.h @@ -59,7 +59,7 @@ #define CBMEM_ID_STAGEx_CACHE 0x57a9e100 #define CBMEM_ID_STAGEx_RAW 0x57a9e200 #define CBMEM_ID_STORAGE_DATA 0x53746f72 -#define CBMEM_ID_TCPA_LOG 0x54435041 +#define CBMEM_ID_TPM_CB_LOG 0x54435041 #define CBMEM_ID_TCPA_TCG_LOG 0x54445041 #define CBMEM_ID_TIMESTAMP 0x54494d45 #define CBMEM_ID_TPM2_TCG_LOG 0x54504d32 @@ -135,7 +135,7 @@ { CBMEM_ID_SMBIOS, "SMBIOS " }, \ { CBMEM_ID_SMM_SAVE_SPACE, "SMM BACKUP " }, \ { CBMEM_ID_STORAGE_DATA, "SD/MMC/eMMC" }, \ - { CBMEM_ID_TCPA_LOG, "TCPA LOG " }, \ + { CBMEM_ID_TPM_CB_LOG, "TPM CB LOG " }, \ { CBMEM_ID_TCPA_TCG_LOG, "TCPA TCGLOG" }, \ { CBMEM_ID_TIMESTAMP, "TIME STAMP " }, \ { CBMEM_ID_TPM2_TCG_LOG, "TPM2 TCGLOG" }, \ diff --git a/src/commonlib/include/commonlib/coreboot_tables.h b/src/commonlib/include/commonlib/coreboot_tables.h index 3f7ff2df29a..422fedfa99b 100644 --- a/src/commonlib/include/commonlib/coreboot_tables.h +++ b/src/commonlib/include/commonlib/coreboot_tables.h @@ -77,7 +77,7 @@ enum { LB_TAG_MAC_ADDRS = 0x0033, LB_TAG_VBOOT_WORKBUF = 0x0034, LB_TAG_MMC_INFO = 0x0035, - LB_TAG_TCPA_LOG = 0x0036, + LB_TAG_TPM_CB_LOG = 0x0036, LB_TAG_FMAP = 0x0037, LB_TAG_PLATFORM_BLOB_VERSION = 0x0038, LB_TAG_SMMSTOREV2 = 0x0039, diff --git a/src/commonlib/include/commonlib/tcpa_log_serialized.h b/src/commonlib/include/commonlib/tcpa_log_serialized.h deleted file mode 100644 index 4190a7db647..00000000000 --- a/src/commonlib/include/commonlib/tcpa_log_serialized.h +++ /dev/null @@ -1,29 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ - -#ifndef __TCPA_LOG_SERIALIZED_H__ -#define __TCPA_LOG_SERIALIZED_H__ - -#include - -#define MAX_TCPA_LOG_ENTRIES 50 -#define TCPA_DIGEST_MAX_LENGTH 64 -#define TCPA_PCR_HASH_NAME 50 -#define TCPA_PCR_HASH_LEN 10 -/* Assumption of 2K TCPA log size reserved for CAR/SRAM */ -#define MAX_PRERAM_TCPA_LOG_ENTRIES 15 - -struct tcpa_entry { - uint32_t pcr; - char digest_type[TCPA_PCR_HASH_LEN]; - uint8_t digest[TCPA_DIGEST_MAX_LENGTH]; - uint32_t digest_length; - char name[TCPA_PCR_HASH_NAME]; -} __packed; - -struct tcpa_table { - uint16_t max_entries; - uint16_t num_entries; - struct tcpa_entry entries[0]; /* Variable number of entries */ -} __packed; - -#endif diff --git a/src/commonlib/include/commonlib/tpm_log_serialized.h b/src/commonlib/include/commonlib/tpm_log_serialized.h new file mode 100644 index 00000000000..dc58dc09c3e --- /dev/null +++ b/src/commonlib/include/commonlib/tpm_log_serialized.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef COMMONLIB_TPM_LOG_SERIALIZED_H +#define COMMONLIB_TPM_LOG_SERIALIZED_H + +#include +#include + +#define MAX_TPM_LOG_ENTRIES 50 +#define TPM_CB_LOG_DIGEST_MAX_LENGTH 64 +#define TPM_CB_LOG_PCR_HASH_NAME 50 +#define TPM_CB_LOG_PCR_HASH_LEN 10 +/* Assumption of 2K TCPA log size reserved for CAR/SRAM */ +#define MAX_PRERAM_TPM_LOG_ENTRIES 15 + +struct tpm_cb_log_entry { + uint32_t pcr; + char digest_type[TPM_CB_LOG_PCR_HASH_LEN]; + uint8_t digest[TPM_CB_LOG_DIGEST_MAX_LENGTH]; + uint32_t digest_length; + char name[TPM_CB_LOG_PCR_HASH_NAME]; +} __packed; + +struct tpm_cb_log_table { + uint16_t max_entries; + uint16_t num_entries; + struct tpm_cb_log_entry entries[0]; /* Variable number of entries */ +} __packed; + +#endif diff --git a/src/include/memlayout.h b/src/include/memlayout.h index 1e9fca81980..dee5eaf3b6e 100644 --- a/src/include/memlayout.h +++ b/src/include/memlayout.h @@ -168,9 +168,9 @@ STR(vboot2 work buffer size must be equivalent to \ VB2_FIRMWARE_WORKBUF_RECOMMENDED_SIZE! (sz))); -#define TPM_TCPA_LOG(addr, size) \ - REGION(tpm_tcpa_log, addr, size, 16) \ - _ = ASSERT(size >= 2K, "tpm tcpa log buffer must be at least 2K!"); +#define TPM_LOG(addr, size) \ + REGION(tpm_log, addr, size, 16) \ + _ = ASSERT(size >= 2K, "tpm log buffer must be at least 2K!"); #if ENV_SEPARATE_VERSTAGE #define VERSTAGE(addr, sz) \ diff --git a/src/include/symbols.h b/src/include/symbols.h index ee7c5031fa8..a03af08463f 100644 --- a/src/include/symbols.h +++ b/src/include/symbols.h @@ -37,7 +37,7 @@ DECLARE_OPTIONAL_REGION(postram_cbfs_cache) DECLARE_OPTIONAL_REGION(cbfs_cache) DECLARE_REGION(cbfs_mcache) DECLARE_REGION(fmap_cache) -DECLARE_REGION(tpm_tcpa_log) +DECLARE_REGION(tpm_log) #if ENV_ROMSTAGE && CONFIG(ASAN_IN_ROMSTAGE) DECLARE_REGION(bss) diff --git a/src/lib/cbfs.c b/src/lib/cbfs.c index e1334f41528..40800005c13 100644 --- a/src/lib/cbfs.c +++ b/src/lib/cbfs.c @@ -176,7 +176,7 @@ static bool cbfs_file_hash_mismatch(const void *buffer, size_t size, } if (tspi_cbfs_measurement(mdata->h.filename, be32toh(mdata->h.type), hash)) - ERROR("failed to measure '%s' into TCPA log\n", mdata->h.filename); + ERROR("failed to measure '%s' into TPM log\n", mdata->h.filename); /* We intentionally continue to boot on measurement errors. */ } diff --git a/src/lib/coreboot_table.c b/src/lib/coreboot_table.c index 2a7ccc5f55c..77e449cd510 100644 --- a/src/lib/coreboot_table.c +++ b/src/lib/coreboot_table.c @@ -263,7 +263,7 @@ static void add_cbmem_pointers(struct lb_header *header) {CBMEM_ID_ACPI_CNVS, LB_TAG_ACPI_CNVS}, {CBMEM_ID_VPD, LB_TAG_VPD}, {CBMEM_ID_WIFI_CALIBRATION, LB_TAG_WIFI_CALIBRATION}, - {CBMEM_ID_TCPA_LOG, LB_TAG_TCPA_LOG}, + {CBMEM_ID_TPM_CB_LOG, LB_TAG_TPM_CB_LOG}, {CBMEM_ID_FMAP, LB_TAG_FMAP}, {CBMEM_ID_VBOOT_WORKBUF, LB_TAG_VBOOT_WORKBUF}, {CBMEM_ID_TYPE_C_INFO, LB_TAG_TYPE_C_INFO}, diff --git a/src/security/tpm/tspi.h b/src/security/tpm/tspi.h index 7157b4d7303..aee38aa9029 100644 --- a/src/security/tpm/tspi.h +++ b/src/security/tpm/tspi.h @@ -4,7 +4,7 @@ #define TSPI_H_ #include -#include +#include #include #include @@ -13,33 +13,33 @@ /** * Get the pointer to the single instance of global - * tcpa log data, and initialize it when necessary + * TPM log data, and initialize it when necessary */ -struct tcpa_table *tcpa_log_init(void); +struct tpm_cb_log_table *tpm_log_init(void); /** - * Clears the pre-RAM tcpa log data and initializes + * Clears the pre-RAM TPM log data and initializes * any content with default values */ -void tcpa_preram_log_clear(void); +void tpm_preram_log_clear(void); /** - * Add table entry for cbmem TCPA log. + * Add table entry for cbmem TPM log. * @param name Name of the hashed data * @param pcr PCR used to extend hashed data * @param diget_algo sets the digest algorithm * @param digest sets the hash extended into the tpm * @param digest_len the length of the digest */ -void tcpa_log_add_table_entry(const char *name, const uint32_t pcr, - enum vb2_hash_algorithm digest_algo, - const uint8_t *digest, - const size_t digest_len); +void tpm_log_add_table_entry(const char *name, const uint32_t pcr, + enum vb2_hash_algorithm digest_algo, + const uint8_t *digest, + const size_t digest_len); /** - * Dump TCPA log entries on console + * Dump TPM log entries on console */ -void tcpa_log_dump(void *unused); +void tpm_log_dump(void *unused); /** * Ask vboot for a digest and extend a TPM PCR with it. diff --git a/src/security/tpm/tspi/crtm.c b/src/security/tpm/tspi/crtm.c index 8eefc11a362..6e4fadad2bd 100644 --- a/src/security/tpm/tspi/crtm.c +++ b/src/security/tpm/tspi/crtm.c @@ -6,11 +6,11 @@ #include "crtm.h" #include -static int tcpa_log_initialized; -static inline int tcpa_log_available(void) +static int tpm_log_initialized; +static inline int tpm_log_available(void) { if (ENV_BOOTBLOCK) - return tcpa_log_initialized; + return tpm_log_initialized; return 1; } @@ -33,10 +33,10 @@ static inline int tcpa_log_available(void) */ static uint32_t tspi_init_crtm(void) { - /* Initialize TCPA PRERAM log. */ - if (!tcpa_log_available()) { - tcpa_preram_log_clear(); - tcpa_log_initialized = 1; + /* Initialize TPM PRERAM log. */ + if (!tpm_log_available()) { + tpm_preram_log_clear(); + tpm_log_initialized = 1; } else { printk(BIOS_WARNING, "TSPI: CRTM already initialized!\n"); return VB2_SUCCESS; @@ -109,9 +109,9 @@ static bool is_runtime_data(const char *name) uint32_t tspi_cbfs_measurement(const char *name, uint32_t type, const struct vb2_hash *hash) { uint32_t pcr_index; - char tcpa_metadata[TCPA_PCR_HASH_NAME]; + char tpm_log_metadata[TPM_CB_LOG_PCR_HASH_NAME]; - if (!tcpa_log_available()) { + if (!tpm_log_available()) { if (tspi_init_crtm() != VB2_SUCCESS) { printk(BIOS_WARNING, "Initializing CRTM failed!\n"); @@ -142,29 +142,29 @@ uint32_t tspi_cbfs_measurement(const char *name, uint32_t type, const struct vb2 break; } - snprintf(tcpa_metadata, TCPA_PCR_HASH_NAME, "CBFS: %s", name); + snprintf(tpm_log_metadata, TPM_CB_LOG_PCR_HASH_NAME, "CBFS: %s", name); return tpm_extend_pcr(pcr_index, hash->algo, hash->raw, vb2_digest_size(hash->algo), - tcpa_metadata); + tpm_log_metadata); } int tspi_measure_cache_to_pcr(void) { int i; - struct tcpa_table *tclt = tcpa_log_init(); + struct tpm_cb_log_table *tclt = tpm_log_init(); /* This means the table is empty. */ - if (!tcpa_log_available()) + if (!tpm_log_available()) return VB2_SUCCESS; if (!tclt) { - printk(BIOS_WARNING, "TCPA: Log non-existent!\n"); + printk(BIOS_WARNING, "TPM LOG: log non-existent!\n"); return VB2_ERROR_UNKNOWN; } - printk(BIOS_DEBUG, "TPM: Write digests cached in TCPA log to PCR\n"); + printk(BIOS_DEBUG, "TPM: Write digests cached in TPM log to PCR\n"); for (i = 0; i < tclt->num_entries; i++) { - struct tcpa_entry *tce = &tclt->entries[i]; + struct tpm_cb_log_entry *tce = &tclt->entries[i]; if (tce) { printk(BIOS_DEBUG, "TPM: Write digest for" " %s into PCR %d\n", diff --git a/src/security/tpm/tspi/crtm.h b/src/security/tpm/tspi/crtm.h index e8e44fd7450..97ef09a9b72 100644 --- a/src/security/tpm/tspi/crtm.h +++ b/src/security/tpm/tspi/crtm.h @@ -31,7 +31,7 @@ #endif /** - * Measure digests cached in TCPA log entries into PCRs + * Measure digests cached in TPM log entries into PCRs */ int tspi_measure_cache_to_pcr(void); diff --git a/src/security/tpm/tspi/log.c b/src/security/tpm/tspi/log.c index 296cb2d1082..fa95b80e81f 100644 --- a/src/security/tpm/tspi/log.c +++ b/src/security/tpm/tspi/log.c @@ -9,20 +9,20 @@ #include #include -static struct tcpa_table *tcpa_cbmem_init(void) +static struct tpm_cb_log_table *tpm_log_cbmem_init(void) { - static struct tcpa_table *tclt; + static struct tpm_cb_log_table *tclt; if (tclt) return tclt; if (cbmem_possibly_online()) { - tclt = cbmem_find(CBMEM_ID_TCPA_LOG); + tclt = cbmem_find(CBMEM_ID_TPM_CB_LOG); if (!tclt) { - size_t tcpa_log_len = sizeof(struct tcpa_table) + - MAX_TCPA_LOG_ENTRIES * sizeof(struct tcpa_entry); - tclt = cbmem_add(CBMEM_ID_TCPA_LOG, tcpa_log_len); + size_t tpm_log_len = sizeof(struct tpm_cb_log_table) + + MAX_TPM_LOG_ENTRIES * sizeof(struct tpm_cb_log_entry); + tclt = cbmem_add(CBMEM_ID_TPM_CB_LOG, tpm_log_len); if (tclt) { - tclt->max_entries = MAX_TCPA_LOG_ENTRIES; + tclt->max_entries = MAX_TPM_LOG_ENTRIES; tclt->num_entries = 0; } } @@ -30,39 +30,39 @@ static struct tcpa_table *tcpa_cbmem_init(void) return tclt; } -struct tcpa_table *tcpa_log_init(void) +struct tpm_cb_log_table *tpm_log_init(void) { - static struct tcpa_table *tclt; + static struct tpm_cb_log_table *tclt; /* We are dealing here with pre CBMEM environment. * If cbmem isn't available use CAR or SRAM */ if (!cbmem_possibly_online() && !CONFIG(VBOOT_RETURN_FROM_VERSTAGE)) - return (struct tcpa_table *)_tpm_tcpa_log; + return (struct tpm_cb_log_table *)_tpm_log; else if (ENV_CREATES_CBMEM && !CONFIG(VBOOT_RETURN_FROM_VERSTAGE)) { - tclt = tcpa_cbmem_init(); + tclt = tpm_log_cbmem_init(); if (!tclt) - return (struct tcpa_table *)_tpm_tcpa_log; + return (struct tpm_cb_log_table *)_tpm_log; } else { - tclt = tcpa_cbmem_init(); + tclt = tpm_log_cbmem_init(); } return tclt; } -void tcpa_log_dump(void *unused) +void tpm_log_dump(void *unused) { int i, j; - struct tcpa_table *tclt; + struct tpm_cb_log_table *tclt; - tclt = tcpa_log_init(); + tclt = tpm_log_init(); if (!tclt) return; - printk(BIOS_INFO, "coreboot TCPA measurements:\n\n"); + printk(BIOS_INFO, "coreboot TPM log measurements:\n\n"); for (i = 0; i < tclt->num_entries; i++) { - struct tcpa_entry *tce = &tclt->entries[i]; + struct tpm_cb_log_entry *tce = &tclt->entries[i]; if (tce) { printk(BIOS_INFO, " PCR-%u ", tce->pcr); @@ -76,85 +76,87 @@ void tcpa_log_dump(void *unused) printk(BIOS_INFO, "\n"); } -void tcpa_log_add_table_entry(const char *name, const uint32_t pcr, - enum vb2_hash_algorithm digest_algo, - const uint8_t *digest, - const size_t digest_len) +void tpm_log_add_table_entry(const char *name, const uint32_t pcr, + enum vb2_hash_algorithm digest_algo, + const uint8_t *digest, + const size_t digest_len) { - struct tcpa_table *tclt = tcpa_log_init(); + struct tpm_cb_log_table *tclt = tpm_log_init(); if (!tclt) { - printk(BIOS_WARNING, "TCPA: Log non-existent!\n"); + printk(BIOS_WARNING, "TPM LOG: Log non-existent!\n"); return; } if (tclt->num_entries >= tclt->max_entries) { - printk(BIOS_WARNING, "TCPA: TCPA log table is full\n"); + printk(BIOS_WARNING, "TPM LOG: log table is full\n"); return; } if (!name) { - printk(BIOS_WARNING, "TCPA: TCPA entry name not set\n"); + printk(BIOS_WARNING, "TPM LOG: entry name not set\n"); return; } - struct tcpa_entry *tce = &tclt->entries[tclt->num_entries++]; - strncpy(tce->name, name, TCPA_PCR_HASH_NAME - 1); + struct tpm_cb_log_entry *tce = &tclt->entries[tclt->num_entries++]; + strncpy(tce->name, name, TPM_CB_LOG_PCR_HASH_NAME - 1); tce->pcr = pcr; - if (digest_len > TCPA_DIGEST_MAX_LENGTH) { - printk(BIOS_WARNING, "TCPA: PCR digest too long for TCPA log entry\n"); + if (digest_len > TPM_CB_LOG_DIGEST_MAX_LENGTH) { + printk(BIOS_WARNING, "TPM LOG: PCR digest too long for log entry\n"); return; } strncpy(tce->digest_type, - vb2_get_hash_algorithm_name(digest_algo), - TCPA_PCR_HASH_LEN - 1); + vb2_get_hash_algorithm_name(digest_algo), + TPM_CB_LOG_PCR_HASH_LEN - 1); tce->digest_length = digest_len; memcpy(tce->digest, digest, tce->digest_length); } -void tcpa_preram_log_clear(void) +void tpm_preram_log_clear(void) { - printk(BIOS_INFO, "TCPA: Clearing coreboot TCPA log\n"); - struct tcpa_table *tclt = (struct tcpa_table *)_tpm_tcpa_log; - tclt->max_entries = MAX_TCPA_LOG_ENTRIES; + printk(BIOS_INFO, "TPM LOG: clearing preram log\n"); + struct tpm_cb_log_table *tclt = (struct tpm_cb_log_table *)_tpm_log; + tclt->max_entries = MAX_TPM_LOG_ENTRIES; tclt->num_entries = 0; } #if !CONFIG(VBOOT_RETURN_FROM_VERSTAGE) -static void recover_tcpa_log(int is_recovery) +static void recover_tpm_log(int is_recovery) { - struct tcpa_table *preram_log = (struct tcpa_table *)_tpm_tcpa_log; - struct tcpa_table *ram_log = NULL; + struct tpm_cb_log_table *preram_log = (struct tpm_cb_log_table *)_tpm_log; + struct tpm_cb_log_table *ram_log = NULL; int i; - if (preram_log->num_entries > MAX_PRERAM_TCPA_LOG_ENTRIES) { - printk(BIOS_WARNING, "TCPA: Pre-RAM TCPA log is too full, possible corruption\n"); + if (preram_log->num_entries > MAX_PRERAM_TPM_LOG_ENTRIES) { + printk(BIOS_WARNING, "TPM LOG: pre-RAM log is too full, possible corruption\n"); return; } - ram_log = tcpa_cbmem_init(); + ram_log = tpm_log_cbmem_init(); if (!ram_log) { - printk(BIOS_WARNING, "TCPA: CBMEM not available something went wrong\n"); + printk(BIOS_WARNING, "TPM LOG: CBMEM not available something went wrong\n"); return; } for (i = 0; i < preram_log->num_entries; i++) { - struct tcpa_entry *tce = &ram_log->entries[ram_log->num_entries++]; - strncpy(tce->name, preram_log->entries[i].name, TCPA_PCR_HASH_NAME - 1); + struct tpm_cb_log_entry *tce = &ram_log->entries[ram_log->num_entries++]; + strncpy(tce->name, preram_log->entries[i].name, TPM_CB_LOG_PCR_HASH_NAME - 1); tce->pcr = preram_log->entries[i].pcr; - if (preram_log->entries[i].digest_length > TCPA_DIGEST_MAX_LENGTH) { - printk(BIOS_WARNING, "TCPA: PCR digest too long for TCPA log entry\n"); + if (preram_log->entries[i].digest_length > TPM_CB_LOG_DIGEST_MAX_LENGTH) { + printk(BIOS_WARNING, "TPM LOG: PCR digest too long for log entry\n"); return; } - strncpy(tce->digest_type, preram_log->entries[i].digest_type, TCPA_PCR_HASH_LEN - 1); - tce->digest_length = MIN(preram_log->entries[i].digest_length, TCPA_DIGEST_MAX_LENGTH); + strncpy(tce->digest_type, preram_log->entries[i].digest_type, + TPM_CB_LOG_PCR_HASH_LEN - 1); + tce->digest_length = MIN(preram_log->entries[i].digest_length, + TPM_CB_LOG_DIGEST_MAX_LENGTH); memcpy(tce->digest, preram_log->entries[i].digest, tce->digest_length); } } -CBMEM_CREATION_HOOK(recover_tcpa_log); +CBMEM_CREATION_HOOK(recover_tpm_log); #endif -BOOT_STATE_INIT_ENTRY(BS_PAYLOAD_BOOT, BS_ON_ENTRY, tcpa_log_dump, NULL); +BOOT_STATE_INIT_ENTRY(BS_PAYLOAD_BOOT, BS_ON_ENTRY, tpm_log_dump, NULL); diff --git a/src/security/tpm/tspi/tspi.c b/src/security/tpm/tspi/tspi.c index fb7a5d50c76..72615fc3067 100644 --- a/src/security/tpm/tspi/tspi.c +++ b/src/security/tpm/tspi/tspi.c @@ -242,8 +242,7 @@ uint32_t tpm_extend_pcr(int pcr, enum vb2_hash_algorithm digest_algo, } if (CONFIG(TPM_MEASURED_BOOT)) - tcpa_log_add_table_entry(name, pcr, digest_algo, - digest, digest_len); + tpm_log_add_table_entry(name, pcr, digest_algo, digest, digest_len); printk(BIOS_DEBUG, "TPM: Digest of `%s` to PCR %d %s\n", name, pcr, tspi_tpm_is_setup() ? "measured" : "logged"); diff --git a/src/soc/cavium/cn81xx/memlayout.ld b/src/soc/cavium/cn81xx/memlayout.ld index 0257b23ae3c..41f091415ec 100644 --- a/src/soc/cavium/cn81xx/memlayout.ld +++ b/src/soc/cavium/cn81xx/memlayout.ld @@ -23,7 +23,7 @@ SECTIONS BOOTBLOCK(BOOTROM_OFFSET + 0x20000, 56K) CBFS_MCACHE(BOOTROM_OFFSET + 0x2e000, 8K) VBOOT2_WORK(BOOTROM_OFFSET + 0x30000, 12K) - TPM_TCPA_LOG(BOOTROM_OFFSET + 0x33000, 2K) + TPM_LOG(BOOTROM_OFFSET + 0x33000, 2K) VERSTAGE(BOOTROM_OFFSET + 0x33800, 50K) ROMSTAGE(BOOTROM_OFFSET + 0x40000, 256K) diff --git a/src/soc/mediatek/mt8173/memlayout.ld b/src/soc/mediatek/mt8173/memlayout.ld index 092cfdf2bf4..8dce4284de9 100644 --- a/src/soc/mediatek/mt8173/memlayout.ld +++ b/src/soc/mediatek/mt8173/memlayout.ld @@ -26,7 +26,7 @@ SECTIONS SRAM_START(0x00100000) VBOOT2_WORK(0x00100000, 12K) - TPM_TCPA_LOG(0x00103000, 2K) + TPM_LOG(0x00103000, 2K) FMAP_CACHE(0x00103800, 2K) PRERAM_CBMEM_CONSOLE(0x00104000, 12K) WATCHDOG_TOMBSTONE(0x00107000, 4) diff --git a/src/soc/mediatek/mt8183/memlayout.ld b/src/soc/mediatek/mt8183/memlayout.ld index 0acd174c84f..390842693f5 100644 --- a/src/soc/mediatek/mt8183/memlayout.ld +++ b/src/soc/mediatek/mt8183/memlayout.ld @@ -23,7 +23,7 @@ SECTIONS { SRAM_START(0x00100000) VBOOT2_WORK(0x00100000, 12K) - TPM_TCPA_LOG(0x00103000, 2K) + TPM_LOG(0x00103000, 2K) FMAP_CACHE(0x00103800, 2K) WATCHDOG_TOMBSTONE(0x00104000, 4) PRERAM_CBMEM_CONSOLE(0x00104004, 63K - 4) diff --git a/src/soc/mediatek/mt8186/include/soc/memlayout.ld b/src/soc/mediatek/mt8186/include/soc/memlayout.ld index 1764632f291..f8bb0fa898e 100644 --- a/src/soc/mediatek/mt8186/include/soc/memlayout.ld +++ b/src/soc/mediatek/mt8186/include/soc/memlayout.ld @@ -31,7 +31,7 @@ SECTIONS /* EMPTY(0x0010a804, 1K - 4) */ /* Regions that can also be moved to SRAM_L2C. */ TIMESTAMP(0x0010ac00, 1K) - TPM_TCPA_LOG(0x0010b000, 2K) + TPM_LOG(0x0010b000, 2K) FMAP_CACHE(0x0010b800, 2K) CBFS_MCACHE(0x0010c000, 16K) SRAM_END(0x00110000) diff --git a/src/soc/mediatek/mt8188/include/soc/memlayout.ld b/src/soc/mediatek/mt8188/include/soc/memlayout.ld index dc4090d74ba..8d1f2bde650 100644 --- a/src/soc/mediatek/mt8188/include/soc/memlayout.ld +++ b/src/soc/mediatek/mt8188/include/soc/memlayout.ld @@ -34,7 +34,7 @@ SECTIONS CBFS_MCACHE(0x00120000, 16k) VBOOT2_WORK(0x00124000, 12K) FMAP_CACHE(0x00127000, 2k) - TPM_TCPA_LOG(0x00127800, 2k) + TPM_LOG(0x00127800, 2k) TIMESTAMP(0x00128000, 1k) /* End of regions that can also be moved to SRAM_L2C. */ /* EMPTY(0x00128400, 31K) */ diff --git a/src/soc/mediatek/mt8192/include/soc/memlayout.ld b/src/soc/mediatek/mt8192/include/soc/memlayout.ld index 150bfdde788..6c238c7d8fc 100644 --- a/src/soc/mediatek/mt8192/include/soc/memlayout.ld +++ b/src/soc/mediatek/mt8192/include/soc/memlayout.ld @@ -23,7 +23,7 @@ SECTIONS { SRAM_START(0x00100000) VBOOT2_WORK(0x00100000, 12K) - TPM_TCPA_LOG(0x00103000, 2K) + TPM_LOG(0x00103000, 2K) FMAP_CACHE(0x00103800, 2K) WATCHDOG_TOMBSTONE(0x00104000, 4) CBFS_MCACHE(0x00107c00, 8K) diff --git a/src/soc/mediatek/mt8195/include/soc/memlayout.ld b/src/soc/mediatek/mt8195/include/soc/memlayout.ld index e8b51d24047..8b8463716fc 100644 --- a/src/soc/mediatek/mt8195/include/soc/memlayout.ld +++ b/src/soc/mediatek/mt8195/include/soc/memlayout.ld @@ -26,7 +26,7 @@ SECTIONS { SRAM_START(0x00100000) VBOOT2_WORK(0x00100000, 12K) - TPM_TCPA_LOG(0x00103000, 2K) + TPM_LOG(0x00103000, 2K) FMAP_CACHE(0x00103800, 2K) WATCHDOG_TOMBSTONE(0x00104000, 4) EARLY_INIT(0x00104010, 128) diff --git a/src/soc/nvidia/tegra124/memlayout.ld b/src/soc/nvidia/tegra124/memlayout.ld index 68c70c10545..ed386f1fdc7 100644 --- a/src/soc/nvidia/tegra124/memlayout.ld +++ b/src/soc/nvidia/tegra124/memlayout.ld @@ -19,7 +19,7 @@ SECTIONS CBFS_MCACHE(0x40006000, 8K) PRERAM_CBFS_CACHE(0x40008000, 6K) VBOOT2_WORK(0x40009800, 12K) - TPM_TCPA_LOG(0x4000D800, 2K) + TPM_LOG(0x4000D800, 2K) STACK(0x4000E000, 8K) BOOTBLOCK(0x40010000, 32K) VERSTAGE(0x40018000, 70K) diff --git a/src/soc/nvidia/tegra210/memlayout.ld b/src/soc/nvidia/tegra210/memlayout.ld index 42f21646447..4898fc14696 100644 --- a/src/soc/nvidia/tegra210/memlayout.ld +++ b/src/soc/nvidia/tegra210/memlayout.ld @@ -20,7 +20,7 @@ SECTIONS PRERAM_CBFS_CACHE(0x40001000, 20K) CBFS_MCACHE(0x40006000, 8K) VBOOT2_WORK(0x40008000, 12K) - TPM_TCPA_LOG(0x4000B000, 2K) + TPM_LOG(0x4000B000, 2K) #if ENV_ARM64 STACK(0x4000B800, 3K) #else /* AVP gets a separate stack to avoid any chance of handoff races. */ diff --git a/src/soc/qualcomm/sc7180/memlayout.ld b/src/soc/qualcomm/sc7180/memlayout.ld index 938f3e1e422..e956c647ff5 100644 --- a/src/soc/qualcomm/sc7180/memlayout.ld +++ b/src/soc/qualcomm/sc7180/memlayout.ld @@ -32,7 +32,7 @@ SECTIONS REGION(pbl_timestamps, 0x14800000, 83K, 4K) WATCHDOG_TOMBSTONE(0x14814FFC, 4) BOOTBLOCK(0x14815000, 48K) - TPM_TCPA_LOG(0x14821000, 2K) + TPM_LOG(0x14821000, 2K) PRERAM_CBFS_CACHE(0x14821800, 60K) PRERAM_CBMEM_CONSOLE(0x14830800, 32K) TIMESTAMP(0x14838800, 1K) diff --git a/src/soc/samsung/exynos5250/memlayout.ld b/src/soc/samsung/exynos5250/memlayout.ld index eec9f60a91f..142a8924b06 100644 --- a/src/soc/samsung/exynos5250/memlayout.ld +++ b/src/soc/samsung/exynos5250/memlayout.ld @@ -21,7 +21,7 @@ SECTIONS PRERAM_CBFS_CACHE(0x205C000, 68K) CBFS_MCACHE(0x206D000, 8K) FMAP_CACHE(0x206F000, 2K) - TPM_TCPA_LOG(0x206F800, 2K) + TPM_LOG(0x206F800, 2K) VBOOT2_WORK(0x2070000, 12K) STACK(0x2074000, 16K) SRAM_END(0x2078000) diff --git a/util/cbmem/cbmem.c b/util/cbmem/cbmem.c index 15431f1d6e2..b4106a1568b 100644 --- a/util/cbmem/cbmem.c +++ b/util/cbmem/cbmem.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #ifdef __OpenBSD__ @@ -267,7 +267,7 @@ static int find_cbmem_entry(uint32_t id, uint64_t *addr, size_t *size) static struct lb_cbmem_ref timestamps; static struct lb_cbmem_ref console; -static struct lb_cbmem_ref tcpa_log; +static struct lb_cbmem_ref tpm_cb_log; static struct lb_memory_range cbmem; /* This is a work-around for a nasty problem introduced by initially having @@ -336,9 +336,9 @@ static int parse_cbtable_entries(const struct mapping *table_mapping) console = parse_cbmem_ref((struct lb_cbmem_ref *)lbr_p); continue; } - case LB_TAG_TCPA_LOG: { - debug(" Found tcpa log table.\n"); - tcpa_log = + case LB_TAG_TPM_CB_LOG: { + debug(" Found TPM CB log table.\n"); + tpm_cb_log = parse_cbmem_ref((struct lb_cbmem_ref *)lbr_p); continue; } @@ -843,35 +843,35 @@ static void timestamp_add_now(uint32_t timestamp_id) unmap_memory(×tamp_mapping); } -/* dump the tcpa log table */ -static void dump_tcpa_log(void) +/* dump the TPM CB log table */ +static void dump_tpm_cb_log(void) { - const struct tcpa_table *tclt_p; + const struct tpm_cb_log_table *tclt_p; size_t size; - struct mapping tcpa_mapping; + struct mapping log_mapping; - if (tcpa_log.tag != LB_TAG_TCPA_LOG) { - fprintf(stderr, "No tcpa log found in coreboot table.\n"); + if (tpm_cb_log.tag != LB_TAG_TPM_CB_LOG) { + fprintf(stderr, "No TPM log found in coreboot table.\n"); return; } size = sizeof(*tclt_p); - tclt_p = map_memory(&tcpa_mapping, tcpa_log.cbmem_addr, size); + tclt_p = map_memory(&log_mapping, tpm_cb_log.cbmem_addr, size); if (!tclt_p) - die("Unable to map tcpa log header\n"); + die("Unable to map TPM log header\n"); size += tclt_p->num_entries * sizeof(tclt_p->entries[0]); - unmap_memory(&tcpa_mapping); + unmap_memory(&log_mapping); - tclt_p = map_memory(&tcpa_mapping, tcpa_log.cbmem_addr, size); + tclt_p = map_memory(&log_mapping, tpm_cb_log.cbmem_addr, size); if (!tclt_p) - die("Unable to map full tcpa log table\n"); + die("Unable to map full TPM log table\n"); - printf("coreboot TCPA log:\n\n"); + printf("coreboot TPM log:\n\n"); for (uint16_t i = 0; i < tclt_p->num_entries; i++) { - const struct tcpa_entry *tce = &tclt_p->entries[i]; + const struct tpm_cb_log_entry *tce = &tclt_p->entries[i]; printf(" PCR-%u ", tce->pcr); @@ -881,7 +881,7 @@ static void dump_tcpa_log(void) printf(" %s [%s]\n", tce->digest_type, tce->name); } - unmap_memory(&tcpa_mapping); + unmap_memory(&log_mapping); } struct cbmem_console { @@ -1339,7 +1339,7 @@ static void print_usage(const char *name, int exit_code) " -T | --parseable-timestamps: print parseable timestamps\n" " -S | --stacked-timestamps: print stacked timestamps (e.g. for flame graph tools)\n" " -a | --add-timestamp ID: append timestamp with ID\n" - " -L | --tcpa-log print TCPA log\n" + " -L | --tcpa-log print TPM log\n" " -V | --verbose: verbose (debugging) output\n" " -v | --version: print the version\n" " -h | --help: print this help\n" @@ -1677,7 +1677,7 @@ int main(int argc, char** argv) dump_timestamps(timestamp_type); if (print_tcpa_log) - dump_tcpa_log(); + dump_tpm_cb_log(); unmap_memory(&lbtable_mapping); From 817267c273269142dcca653bacf8f80d8104eab3 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Thu, 10 Nov 2022 17:56:26 +0200 Subject: [PATCH 004/213] security/tpm: make tspi/crtm.c agnostic to log format Change-Id: I3013bd5f29f1412fbe646dc74d8946704b750a66 Ticket: https://ticket.coreboot.org/issues/423 Signed-off-by: Sergii Dmytruk Reviewed-on: https://review.coreboot.org/c/coreboot/+/69445 Tested-by: build bot (Jenkins) Reviewed-by: Julius Werner --- .../include/commonlib/tpm_log_serialized.h | 3 - src/security/tpm/tspi.h | 27 +++++- src/security/tpm/tspi/crtm.c | 78 +++++++++++---- src/security/tpm/tspi/log.c | 94 +++++++++---------- 4 files changed, 132 insertions(+), 70 deletions(-) diff --git a/src/commonlib/include/commonlib/tpm_log_serialized.h b/src/commonlib/include/commonlib/tpm_log_serialized.h index dc58dc09c3e..8372f94dab6 100644 --- a/src/commonlib/include/commonlib/tpm_log_serialized.h +++ b/src/commonlib/include/commonlib/tpm_log_serialized.h @@ -6,12 +6,9 @@ #include #include -#define MAX_TPM_LOG_ENTRIES 50 #define TPM_CB_LOG_DIGEST_MAX_LENGTH 64 #define TPM_CB_LOG_PCR_HASH_NAME 50 #define TPM_CB_LOG_PCR_HASH_LEN 10 -/* Assumption of 2K TCPA log size reserved for CAR/SRAM */ -#define MAX_PRERAM_TPM_LOG_ENTRIES 15 struct tpm_cb_log_entry { uint32_t pcr; diff --git a/src/security/tpm/tspi.h b/src/security/tpm/tspi.h index aee38aa9029..b081439c443 100644 --- a/src/security/tpm/tspi.h +++ b/src/security/tpm/tspi.h @@ -10,12 +10,21 @@ #define TPM_PCR_MAX_LEN 64 #define HASH_DATA_CHUNK_SIZE 1024 +#define MAX_TPM_LOG_ENTRIES 50 +/* Assumption of 2K TCPA log size reserved for CAR/SRAM */ +#define MAX_PRERAM_TPM_LOG_ENTRIES 15 /** * Get the pointer to the single instance of global * TPM log data, and initialize it when necessary */ -struct tpm_cb_log_table *tpm_log_init(void); +void *tpm_log_init(void); + +/** + * Get the pointer to the single CBMEM instance of global + * TPM log data, and initialize it when necessary + */ +void *tpm_log_cbmem_init(void); /** * Clears the pre-RAM TPM log data and initializes @@ -23,6 +32,22 @@ struct tpm_cb_log_table *tpm_log_init(void); */ void tpm_preram_log_clear(void); +/** + * Retrieves number of entries currently stored in the log. + */ +uint16_t tpm_log_get_size(const void *log_table); + +/** + * Copies data from pre-RAM TPM log to CBMEM (RAM) log + */ +void tpm_log_copy_entries(const void *from, void *to); + +/** + * Retrieves an entry from a log. Returns non-zero on invalid index or error. + */ +int tpm_log_get(int entry_idx, int *pcr, const uint8_t **digest_data, + enum vb2_hash_algorithm *digest_algo, const char **event_name); + /** * Add table entry for cbmem TPM log. * @param name Name of the hashed data diff --git a/src/security/tpm/tspi/crtm.c b/src/security/tpm/tspi/crtm.c index 6e4fadad2bd..a7efcf21453 100644 --- a/src/security/tpm/tspi/crtm.c +++ b/src/security/tpm/tspi/crtm.c @@ -2,7 +2,9 @@ #include #include +#include #include +#include #include "crtm.h" #include @@ -148,39 +150,79 @@ uint32_t tspi_cbfs_measurement(const char *name, uint32_t type, const struct vb2 tpm_log_metadata); } +void *tpm_log_init(void) +{ + static void *tclt; + + /* We are dealing here with pre CBMEM environment. + * If cbmem isn't available use CAR or SRAM */ + if (!cbmem_possibly_online() && + !CONFIG(VBOOT_RETURN_FROM_VERSTAGE)) + return _tpm_log; + else if (ENV_CREATES_CBMEM + && !CONFIG(VBOOT_RETURN_FROM_VERSTAGE)) { + tclt = tpm_log_cbmem_init(); + if (!tclt) + return _tpm_log; + } else { + tclt = tpm_log_cbmem_init(); + } + + return tclt; +} + int tspi_measure_cache_to_pcr(void) { int i; - struct tpm_cb_log_table *tclt = tpm_log_init(); + int pcr; + const char *event_name; + const uint8_t *digest_data; + enum vb2_hash_algorithm digest_algo; /* This means the table is empty. */ if (!tpm_log_available()) return VB2_SUCCESS; - if (!tclt) { + if (tpm_log_init() == NULL) { printk(BIOS_WARNING, "TPM LOG: log non-existent!\n"); return VB2_ERROR_UNKNOWN; } printk(BIOS_DEBUG, "TPM: Write digests cached in TPM log to PCR\n"); - for (i = 0; i < tclt->num_entries; i++) { - struct tpm_cb_log_entry *tce = &tclt->entries[i]; - if (tce) { - printk(BIOS_DEBUG, "TPM: Write digest for" - " %s into PCR %d\n", - tce->name, tce->pcr); - int result = tlcl_extend(tce->pcr, - tce->digest, - TPM_MEASURE_ALGO); - if (result != TPM_SUCCESS) { - printk(BIOS_ERR, "TPM: Writing digest" - " of %s into PCR failed with error" - " %d\n", - tce->name, result); - return VB2_ERROR_UNKNOWN; - } + i = 0; + while (!tpm_log_get(i++, &pcr, &digest_data, &digest_algo, &event_name)) { + printk(BIOS_DEBUG, "TPM: Write digest for %s into PCR %d\n", event_name, pcr); + int result = tlcl_extend(pcr, digest_data, digest_algo); + if (result != TPM_SUCCESS) { + printk(BIOS_ERR, + "TPM: Writing digest of %s into PCR failed with error %d\n", + event_name, result); + return VB2_ERROR_UNKNOWN; } } return VB2_SUCCESS; } + +#if !CONFIG(VBOOT_RETURN_FROM_VERSTAGE) +static void recover_tpm_log(int is_recovery) +{ + const void *preram_log = _tpm_log; + void *ram_log = tpm_log_cbmem_init(); + + if (tpm_log_get_size(preram_log) > MAX_PRERAM_TPM_LOG_ENTRIES) { + printk(BIOS_WARNING, "TPM LOG: pre-RAM log is too full, possible corruption\n"); + return; + } + + if (ram_log == NULL) { + printk(BIOS_WARNING, "TPM LOG: CBMEM not available, something went wrong\n"); + return; + } + + tpm_log_copy_entries(_tpm_log, ram_log); +} +CBMEM_CREATION_HOOK(recover_tpm_log); +#endif + +BOOT_STATE_INIT_ENTRY(BS_PAYLOAD_BOOT, BS_ON_ENTRY, tpm_log_dump, NULL); diff --git a/src/security/tpm/tspi/log.c b/src/security/tpm/tspi/log.c index fa95b80e81f..96c3087f958 100644 --- a/src/security/tpm/tspi/log.c +++ b/src/security/tpm/tspi/log.c @@ -6,10 +6,9 @@ #include #include #include -#include #include -static struct tpm_cb_log_table *tpm_log_cbmem_init(void) +void *tpm_log_cbmem_init(void) { static struct tpm_cb_log_table *tclt; if (tclt) @@ -30,27 +29,6 @@ static struct tpm_cb_log_table *tpm_log_cbmem_init(void) return tclt; } -struct tpm_cb_log_table *tpm_log_init(void) -{ - static struct tpm_cb_log_table *tclt; - - /* We are dealing here with pre CBMEM environment. - * If cbmem isn't available use CAR or SRAM */ - if (!cbmem_possibly_online() && - !CONFIG(VBOOT_RETURN_FROM_VERSTAGE)) - return (struct tpm_cb_log_table *)_tpm_log; - else if (ENV_CREATES_CBMEM - && !CONFIG(VBOOT_RETURN_FROM_VERSTAGE)) { - tclt = tpm_log_cbmem_init(); - if (!tclt) - return (struct tpm_cb_log_table *)_tpm_log; - } else { - tclt = tpm_log_cbmem_init(); - } - - return tclt; -} - void tpm_log_dump(void *unused) { int i, j; @@ -121,42 +99,62 @@ void tpm_preram_log_clear(void) tclt->num_entries = 0; } -#if !CONFIG(VBOOT_RETURN_FROM_VERSTAGE) -static void recover_tpm_log(int is_recovery) +int tpm_log_get(int entry_idx, int *pcr, const uint8_t **digest_data, + enum vb2_hash_algorithm *digest_algo, const char **event_name) { - struct tpm_cb_log_table *preram_log = (struct tpm_cb_log_table *)_tpm_log; - struct tpm_cb_log_table *ram_log = NULL; - int i; + struct tpm_cb_log_table *tclt; + struct tpm_cb_log_entry *tce; + enum vb2_hash_algorithm algo; - if (preram_log->num_entries > MAX_PRERAM_TPM_LOG_ENTRIES) { - printk(BIOS_WARNING, "TPM LOG: pre-RAM log is too full, possible corruption\n"); - return; - } + tclt = tpm_log_init(); + if (!tclt) + return 1; - ram_log = tpm_log_cbmem_init(); - if (!ram_log) { - printk(BIOS_WARNING, "TPM LOG: CBMEM not available something went wrong\n"); - return; + if (entry_idx < 0 || entry_idx >= tclt->num_entries) + return 1; + + tce = &tclt->entries[entry_idx]; + + *pcr = tce->pcr; + *digest_data = tce->digest; + *event_name = tce->name; + + *digest_algo = VB2_HASH_INVALID; + for (algo = VB2_HASH_INVALID; algo != VB2_HASH_ALG_COUNT; ++algo) { + if (strcmp(tce->digest_type, vb2_hash_names[algo]) == 0) { + *digest_algo = algo; + break; + } } + return 0; +} - for (i = 0; i < preram_log->num_entries; i++) { - struct tpm_cb_log_entry *tce = &ram_log->entries[ram_log->num_entries++]; - strncpy(tce->name, preram_log->entries[i].name, TPM_CB_LOG_PCR_HASH_NAME - 1); - tce->pcr = preram_log->entries[i].pcr; +uint16_t tpm_log_get_size(const void *log_table) +{ + const struct tpm_cb_log_table *tclt = log_table; + return tclt->num_entries; +} + +void tpm_log_copy_entries(const void *from, void *to) +{ + const struct tpm_cb_log_table *from_log = from; + struct tpm_cb_log_table *to_log = to; + int i; + + for (i = 0; i < from_log->num_entries; i++) { + struct tpm_cb_log_entry *tce = &to_log->entries[to_log->num_entries++]; + strncpy(tce->name, from_log->entries[i].name, TPM_CB_LOG_PCR_HASH_NAME - 1); + tce->pcr = from_log->entries[i].pcr; - if (preram_log->entries[i].digest_length > TPM_CB_LOG_DIGEST_MAX_LENGTH) { + if (from_log->entries[i].digest_length > TPM_CB_LOG_DIGEST_MAX_LENGTH) { printk(BIOS_WARNING, "TPM LOG: PCR digest too long for log entry\n"); return; } - strncpy(tce->digest_type, preram_log->entries[i].digest_type, + strncpy(tce->digest_type, from_log->entries[i].digest_type, TPM_CB_LOG_PCR_HASH_LEN - 1); - tce->digest_length = MIN(preram_log->entries[i].digest_length, + tce->digest_length = MIN(from_log->entries[i].digest_length, TPM_CB_LOG_DIGEST_MAX_LENGTH); - memcpy(tce->digest, preram_log->entries[i].digest, tce->digest_length); + memcpy(tce->digest, from_log->entries[i].digest, tce->digest_length); } } -CBMEM_CREATION_HOOK(recover_tpm_log); -#endif - -BOOT_STATE_INIT_ENTRY(BS_PAYLOAD_BOOT, BS_ON_ENTRY, tpm_log_dump, NULL); From a1807c8793237987f08ddb221c52640a02991a6b Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Sun, 23 Oct 2022 00:34:32 +0300 Subject: [PATCH 005/213] security/tpm: add TPM log format as per 1.2 spec MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Used by default for all boards with TPM1 which don't specify log format explicitly. Ticket: https://ticket.coreboot.org/issues/423 Change-Id: I89720615a75573d44dd0a39ad3d7faa78f125843 Signed-off-by: Michał Żygowski Signed-off-by: Sergii Dmytruk Reviewed-on: https://review.coreboot.org/c/coreboot/+/68747 Tested-by: build bot (Jenkins) Reviewed-by: Julius Werner --- .../bsd/include/commonlib/bsd/cbmem_id.h | 6 +- .../bsd/include/commonlib/bsd/tpm_log_defs.h | 42 ++++ src/security/tpm/Kconfig | 7 + src/security/tpm/Makefile.inc | 6 + src/security/tpm/tpm1_log_serialized.h | 47 +++++ src/security/tpm/tspi.h | 69 +++++-- src/security/tpm/tspi/crtm.h | 2 + src/security/tpm/tspi/log-tpm1.c | 179 ++++++++++++++++++ src/security/tpm/tspi/log.c | 23 +-- src/security/tpm/tspi/logs.h | 39 ++++ src/security/tpm/tspi/tspi.c | 1 + 11 files changed, 396 insertions(+), 25 deletions(-) create mode 100644 src/commonlib/bsd/include/commonlib/bsd/tpm_log_defs.h create mode 100644 src/security/tpm/tpm1_log_serialized.h create mode 100644 src/security/tpm/tspi/log-tpm1.c create mode 100644 src/security/tpm/tspi/logs.h diff --git a/src/commonlib/bsd/include/commonlib/bsd/cbmem_id.h b/src/commonlib/bsd/include/commonlib/bsd/cbmem_id.h index 7f2824e70fa..76e495af720 100644 --- a/src/commonlib/bsd/include/commonlib/bsd/cbmem_id.h +++ b/src/commonlib/bsd/include/commonlib/bsd/cbmem_id.h @@ -59,10 +59,10 @@ #define CBMEM_ID_STAGEx_CACHE 0x57a9e100 #define CBMEM_ID_STAGEx_RAW 0x57a9e200 #define CBMEM_ID_STORAGE_DATA 0x53746f72 -#define CBMEM_ID_TPM_CB_LOG 0x54435041 -#define CBMEM_ID_TCPA_TCG_LOG 0x54445041 +#define CBMEM_ID_TPM_CB_LOG 0x54435041 /* TPM log in coreboot-specific format */ +#define CBMEM_ID_TCPA_TCG_LOG 0x54445041 /* TPM log per TPM 1.2 specification */ #define CBMEM_ID_TIMESTAMP 0x54494d45 -#define CBMEM_ID_TPM2_TCG_LOG 0x54504d32 +#define CBMEM_ID_TPM2_TCG_LOG 0x54504d32 /* TPM log per TPM 2.0 specification */ #define CBMEM_ID_TPM_PPI 0x54505049 #define CBMEM_ID_VBOOT_HANDOFF 0x780074f0 /* deprecated */ #define CBMEM_ID_VBOOT_SEL_REG 0x780074f1 /* deprecated */ diff --git a/src/commonlib/bsd/include/commonlib/bsd/tpm_log_defs.h b/src/commonlib/bsd/include/commonlib/bsd/tpm_log_defs.h new file mode 100644 index 00000000000..a45a2127468 --- /dev/null +++ b/src/commonlib/bsd/include/commonlib/bsd/tpm_log_defs.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ + +#ifndef COMMONLIB_BSD_TPM_LOG_DEFS_H +#define COMMONLIB_BSD_TPM_LOG_DEFS_H + +#include +#include + +#define TCPA_SPEC_ID_EVENT_SIGNATURE "Spec ID Event00" +#define TCG_EFI_SPEC_ID_EVENT_SIGNATURE "Spec ID Event03" + +#define EV_PREBOOT_CERT 0x00000000 +#define EV_POST_CODE 0x00000001 +#define EV_UNUSED 0x00000002 +#define EV_NO_ACTION 0x00000003 +#define EV_SEPARATOR 0x00000004 +#define EV_ACTION 0x00000005 +#define EV_EVENT_TAG 0x00000006 +#define EV_S_CRTM_CONTENTS 0x00000007 +#define EV_S_CRTM_VERSION 0x00000008 +#define EV_CPU_MICROCODE 0x00000009 +#define EV_PLATFORM_CONFIG_FLAGS 0x0000000A +#define EV_TABLE_OF_DEVICES 0x0000000B +#define EV_COMPACT_HASH 0x0000000C +#define EV_IPL 0x0000000D +#define EV_IPL_PARTITION_DATA 0x0000000E +#define EV_NONHOST_CODE 0x0000000F +#define EV_NONHOST_CONFIG 0x00000010 +#define EV_NONHOST_INFO 0x00000011 +#define EV_OMIT_BOOT_DEVICE_EVENTS 0x00000012 + +struct spec_id_event_data { + char signature[16]; + uint32_t platform_class; + uint8_t spec_version_minor; + uint8_t spec_version_major; + uint8_t spec_errata; + uint8_t reserved; + uint8_t vendor_info_size; +} __packed; + +#endif diff --git a/src/security/tpm/Kconfig b/src/security/tpm/Kconfig index 5bc817d7f31..467773dee46 100644 --- a/src/security/tpm/Kconfig +++ b/src/security/tpm/Kconfig @@ -97,12 +97,19 @@ config TPM_MEASURED_BOOT choice prompt "TPM event log format" depends on TPM_MEASURED_BOOT + default TPM_LOG_TPM1 if TPM1 default TPM_LOG_CB config TPM_LOG_CB bool "coreboot's custom format" help Custom coreboot-specific format of the log derived from TPM1 log format. +config TPM_LOG_TPM1 + bool "TPM 1.2 format" + depends on TPM1 + help + Log per TPM 1.2 specification. + See "TCG PC Client Specific Implementation Specification for Conventional BIOS". endchoice diff --git a/src/security/tpm/Makefile.inc b/src/security/tpm/Makefile.inc index 7083c00e33b..a8c25d6fd80 100644 --- a/src/security/tpm/Makefile.inc +++ b/src/security/tpm/Makefile.inc @@ -61,4 +61,10 @@ verstage-$(CONFIG_TPM_LOG_CB) += tspi/log.c postcar-$(CONFIG_TPM_LOG_CB) += tspi/log.c bootblock-$(CONFIG_TPM_LOG_CB) += tspi/log.c +ramstage-$(CONFIG_TPM_LOG_TPM1) += tspi/log-tpm1.c +romstage-$(CONFIG_TPM_LOG_TPM1) += tspi/log-tpm1.c +verstage-$(CONFIG_TPM_LOG_TPM1) += tspi/log-tpm1.c +postcar-$(CONFIG_TPM_LOG_TPM1) += tspi/log-tpm1.c +bootblock-$(CONFIG_TPM_LOG_TPM1) += tspi/log-tpm1.c + endif # CONFIG_TPM_MEASURED_BOOT diff --git a/src/security/tpm/tpm1_log_serialized.h b/src/security/tpm/tpm1_log_serialized.h new file mode 100644 index 00000000000..a8b7a60798a --- /dev/null +++ b/src/security/tpm/tpm1_log_serialized.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ + +#ifndef __TPM1_LOG_SERIALIZED_H__ +#define __TPM1_LOG_SERIALIZED_H__ + +#include +#include +#include + +#define TPM_1_LOG_DIGEST_MAX_LENGTH 20 +#define TPM_1_LOG_DATA_MAX_LENGTH 50 + +#define TPM_1_LOG_VI_MAGIC 0x31544243 /* "CBT1" in LE */ +#define TPM_1_LOG_VI_MAJOR 1 +#define TPM_1_LOG_VI_MINOR 0 + +struct tpm_1_log_entry { + uint32_t pcr; + uint32_t event_type; + uint8_t digest[TPM_1_LOG_DIGEST_MAX_LENGTH]; + uint32_t data_length; + uint8_t data[TPM_1_LOG_DATA_MAX_LENGTH]; +} __packed; + +struct tpm_1_vendor { + uint8_t reserved; + uint8_t version_major; + uint8_t version_minor; + uint32_t magic; + uint16_t max_entries; + uint16_t num_entries; + uint32_t entry_size; +} __packed; + +struct tpm_1_log_table { + /* The first entry of the log is inlined and describes the log itself */ + uint32_t pcr; + uint32_t event_type; + uint8_t digest[TPM_1_LOG_DIGEST_MAX_LENGTH]; + uint32_t spec_id_size; + struct spec_id_event_data spec_id; + struct tpm_1_vendor vendor; + + struct tpm_1_log_entry entries[0]; /* Variable number of entries */ +} __packed; + +#endif diff --git a/src/security/tpm/tspi.h b/src/security/tpm/tspi.h index b081439c443..5de0aa2dd3f 100644 --- a/src/security/tpm/tspi.h +++ b/src/security/tpm/tspi.h @@ -3,6 +3,8 @@ #ifndef TSPI_H_ #define TSPI_H_ +#include +#include #include #include #include @@ -24,29 +26,62 @@ void *tpm_log_init(void); * Get the pointer to the single CBMEM instance of global * TPM log data, and initialize it when necessary */ -void *tpm_log_cbmem_init(void); +static inline void *tpm_log_cbmem_init(void) +{ + if (CONFIG(TPM_LOG_CB)) + return tpm_cb_log_cbmem_init(); + if (CONFIG(TPM_LOG_TPM1)) + return tpm1_log_cbmem_init(); + return NULL; +} /** * Clears the pre-RAM TPM log data and initializes * any content with default values */ -void tpm_preram_log_clear(void); +static inline void tpm_preram_log_clear(void) +{ + if (CONFIG(TPM_LOG_CB)) + tpm_cb_preram_log_clear(); + else if (CONFIG(TPM_LOG_TPM1)) + tpm1_preram_log_clear(); +} /** * Retrieves number of entries currently stored in the log. */ -uint16_t tpm_log_get_size(const void *log_table); +static inline uint16_t tpm_log_get_size(const void *log_table) +{ + if (CONFIG(TPM_LOG_CB)) + return tpm_cb_log_get_size(log_table); + if (CONFIG(TPM_LOG_TPM1)) + return tpm1_log_get_size(log_table); + return 0; +} /** * Copies data from pre-RAM TPM log to CBMEM (RAM) log */ -void tpm_log_copy_entries(const void *from, void *to); +static inline void tpm_log_copy_entries(const void *from, void *to) +{ + if (CONFIG(TPM_LOG_CB)) + tpm_cb_log_copy_entries(from, to); + else if (CONFIG(TPM_LOG_TPM1)) + tpm1_log_copy_entries(from, to); +} /** * Retrieves an entry from a log. Returns non-zero on invalid index or error. */ -int tpm_log_get(int entry_idx, int *pcr, const uint8_t **digest_data, - enum vb2_hash_algorithm *digest_algo, const char **event_name); +static inline int tpm_log_get(int entry_idx, int *pcr, const uint8_t **digest_data, + enum vb2_hash_algorithm *digest_algo, const char **event_name) +{ + if (CONFIG(TPM_LOG_CB)) + return tpm_cb_log_get(entry_idx, pcr, digest_data, digest_algo, event_name); + if (CONFIG(TPM_LOG_TPM1)) + return tpm1_log_get(entry_idx, pcr, digest_data, digest_algo, event_name); + return 1; +} /** * Add table entry for cbmem TPM log. @@ -56,15 +91,27 @@ int tpm_log_get(int entry_idx, int *pcr, const uint8_t **digest_data, * @param digest sets the hash extended into the tpm * @param digest_len the length of the digest */ -void tpm_log_add_table_entry(const char *name, const uint32_t pcr, - enum vb2_hash_algorithm digest_algo, - const uint8_t *digest, - const size_t digest_len); +static inline void tpm_log_add_table_entry(const char *name, const uint32_t pcr, + enum vb2_hash_algorithm digest_algo, + const uint8_t *digest, + const size_t digest_len) +{ + if (CONFIG(TPM_LOG_CB)) + tpm_cb_log_add_table_entry(name, pcr, digest_algo, digest, digest_len); + else if (CONFIG(TPM_LOG_TPM1)) + tpm1_log_add_table_entry(name, pcr, digest_algo, digest, digest_len); +} /** * Dump TPM log entries on console */ -void tpm_log_dump(void *unused); +static inline void tpm_log_dump(void *unused) +{ + if (CONFIG(TPM_LOG_CB)) + tpm_cb_log_dump(); + else if (CONFIG(TPM_LOG_TPM1)) + tpm1_log_dump(); +} /** * Ask vboot for a digest and extend a TPM PCR with it. diff --git a/src/security/tpm/tspi/crtm.h b/src/security/tpm/tspi/crtm.h index 97ef09a9b72..241607757b7 100644 --- a/src/security/tpm/tspi/crtm.h +++ b/src/security/tpm/tspi/crtm.h @@ -20,6 +20,8 @@ # define TPM_MEASURE_ALGO VB2_HASH_SHA1 #elif CONFIG(TPM_LOG_CB) && CONFIG(TPM2) # define TPM_MEASURE_ALGO VB2_HASH_SHA256 +#elif CONFIG(TPM_LOG_TPM1) +# define TPM_MEASURE_ALGO VB2_HASH_SHA1 #endif #if !defined(TPM_MEASURE_ALGO) diff --git a/src/security/tpm/tspi/log-tpm1.c b/src/security/tpm/tspi/log-tpm1.c new file mode 100644 index 00000000000..52944263044 --- /dev/null +++ b/src/security/tpm/tspi/log-tpm1.c @@ -0,0 +1,179 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +/* + * Unlike log.c this implements TPM log according to TPM1.2 specification + * rather than using coreboot-specific log format. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +void *tpm1_log_cbmem_init(void) +{ + static struct tpm_1_log_table *tclt; + if (tclt) + return tclt; + + if (cbmem_possibly_online()) { + size_t tpm_log_len; + struct spec_id_event_data *hdr; + + tclt = cbmem_find(CBMEM_ID_TCPA_TCG_LOG); + if (tclt) + return tclt; + + tpm_log_len = sizeof(*tclt) + MAX_TPM_LOG_ENTRIES * sizeof(tclt->entries[0]); + tclt = cbmem_add(CBMEM_ID_TCPA_TCG_LOG, tpm_log_len); + if (!tclt) + return NULL; + + memset(tclt, 0, sizeof(*tclt)); + hdr = &tclt->spec_id; + + /* Fill in first "header" entry. */ + tclt->event_type = htole32(EV_NO_ACTION); + tclt->spec_id_size = htole32(sizeof(tclt->spec_id) + sizeof(tclt->vendor)); + strcpy(hdr->signature, TCPA_SPEC_ID_EVENT_SIGNATURE); + hdr->platform_class = htole32(0x00); // client platform + hdr->spec_version_minor = 0x02; + hdr->spec_version_major = 0x01; + hdr->spec_errata = 0x01; + hdr->vendor_info_size = sizeof(tclt->vendor); + + tclt->vendor.reserved = 0; + tclt->vendor.version_major = TPM_1_LOG_VI_MAJOR; + tclt->vendor.version_minor = TPM_1_LOG_VI_MINOR; + tclt->vendor.magic = htole32(TPM_1_LOG_VI_MAGIC); + tclt->vendor.max_entries = htole16(MAX_TPM_LOG_ENTRIES); + tclt->vendor.num_entries = htole16(0); + tclt->vendor.entry_size = htole32(sizeof(tclt->entries[0])); + } + + return tclt; +} + +void tpm1_log_dump(void) +{ + int i, j; + struct tpm_1_log_table *tclt; + + tclt = tpm_log_init(); + if (!tclt) + return; + + printk(BIOS_INFO, "coreboot TPM 1.2 measurements:\n\n"); + for (i = 0; i < le16toh(tclt->vendor.num_entries); i++) { + struct tpm_1_log_entry *tce = &tclt->entries[i]; + + printk(BIOS_INFO, " PCR-%u ", le32toh(tce->pcr)); + + for (j = 0; j < TPM_1_LOG_DIGEST_MAX_LENGTH; j++) + printk(BIOS_INFO, "%02x", tce->digest[j]); + + printk(BIOS_INFO, " %s [%s]\n", "SHA1", (char *)tce->data); + } + printk(BIOS_INFO, "\n"); +} + +void tpm1_log_add_table_entry(const char *name, const uint32_t pcr, + enum vb2_hash_algorithm digest_algo, + const uint8_t *digest, + const size_t digest_len) +{ + struct tpm_1_log_table *tclt; + struct tpm_1_log_entry *tce; + + tclt = tpm_log_init(); + if (!tclt) { + printk(BIOS_WARNING, "TPM LOG: non-existent!\n"); + return; + } + + if (!name) { + printk(BIOS_WARNING, "TPM LOG: entry name not set\n"); + return; + } + + if (digest_algo != VB2_HASH_SHA1) { + printk(BIOS_WARNING, "TPM LOG: unsupported hash algorithm\n"); + return; + } + + if (le16toh(tclt->vendor.num_entries) >= le16toh(tclt->vendor.max_entries)) { + printk(BIOS_WARNING, "TPM LOG: log table is full\n"); + return; + } + + tce = &tclt->entries[le16toh(tclt->vendor.num_entries)]; + tclt->vendor.num_entries = htole16(le16toh(tclt->vendor.num_entries) + 1); + + tce->pcr = htole32(pcr); + tce->event_type = htole32(EV_ACTION); + + memcpy(tce->digest, digest, digest_len); + + tce->data_length = htole32(TPM_1_LOG_DATA_MAX_LENGTH); + strncpy((char *)tce->data, name, sizeof(tce->data) - 1); + tce->data[sizeof(tce->data) - 1] = '\0'; +} + +void tpm1_preram_log_clear(void) +{ + printk(BIOS_INFO, "TPM LOG: clearing the log\n"); + /* + * Pre-RAM log is only for internal use and isn't exported anywhere, hence it's header + * is not initialized. + */ + struct tpm_1_log_table *tclt = (struct tpm_1_log_table *)_tpm_log; + tclt->vendor.max_entries = htole16(MAX_TPM_LOG_ENTRIES); + tclt->vendor.num_entries = htole16(0); +} + +int tpm1_log_get(int entry_idx, int *pcr, const uint8_t **digest_data, + enum vb2_hash_algorithm *digest_algo, const char **event_name) +{ + struct tpm_1_log_table *tclt; + struct tpm_1_log_entry *tce; + + tclt = tpm_log_init(); + if (!tclt) + return 1; + + if (entry_idx < 0 || entry_idx >= le16toh(tclt->vendor.num_entries)) + return 1; + + tce = &tclt->entries[entry_idx]; + + *pcr = le32toh(tce->pcr); + *digest_data = tce->digest; + *digest_algo = VB2_HASH_SHA1; + *event_name = (char *)tce->data; + return 0; +} + +uint16_t tpm1_log_get_size(const void *log_table) +{ + const struct tpm_1_log_table *tclt = log_table; + return le16toh(tclt->vendor.num_entries); +} + +void tpm1_log_copy_entries(const void *from, void *to) +{ + const struct tpm_1_log_table *from_log = from; + struct tpm_1_log_table *to_log = to; + int i; + + for (i = 0; i < le16toh(from_log->vendor.num_entries); i++) { + struct tpm_1_log_entry *tce = + &to_log->entries[le16toh(to_log->vendor.num_entries)]; + memcpy(tce, &from_log->entries[i], sizeof(*tce)); + + to_log->vendor.num_entries = htole16(le16toh(to_log->vendor.num_entries) + 1); + } +} diff --git a/src/security/tpm/tspi/log.c b/src/security/tpm/tspi/log.c index 96c3087f958..9a8fde347b9 100644 --- a/src/security/tpm/tspi/log.c +++ b/src/security/tpm/tspi/log.c @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ #include +#include #include #include #include @@ -8,7 +9,7 @@ #include #include -void *tpm_log_cbmem_init(void) +void *tpm_cb_log_cbmem_init(void) { static struct tpm_cb_log_table *tclt; if (tclt) @@ -29,7 +30,7 @@ void *tpm_log_cbmem_init(void) return tclt; } -void tpm_log_dump(void *unused) +void tpm_cb_log_dump(void) { int i, j; struct tpm_cb_log_table *tclt; @@ -54,10 +55,10 @@ void tpm_log_dump(void *unused) printk(BIOS_INFO, "\n"); } -void tpm_log_add_table_entry(const char *name, const uint32_t pcr, - enum vb2_hash_algorithm digest_algo, - const uint8_t *digest, - const size_t digest_len) +void tpm_cb_log_add_table_entry(const char *name, const uint32_t pcr, + enum vb2_hash_algorithm digest_algo, + const uint8_t *digest, + const size_t digest_len) { struct tpm_cb_log_table *tclt = tpm_log_init(); if (!tclt) { @@ -91,7 +92,7 @@ void tpm_log_add_table_entry(const char *name, const uint32_t pcr, memcpy(tce->digest, digest, tce->digest_length); } -void tpm_preram_log_clear(void) +void tpm_cb_preram_log_clear(void) { printk(BIOS_INFO, "TPM LOG: clearing preram log\n"); struct tpm_cb_log_table *tclt = (struct tpm_cb_log_table *)_tpm_log; @@ -99,8 +100,8 @@ void tpm_preram_log_clear(void) tclt->num_entries = 0; } -int tpm_log_get(int entry_idx, int *pcr, const uint8_t **digest_data, - enum vb2_hash_algorithm *digest_algo, const char **event_name) +int tpm_cb_log_get(int entry_idx, int *pcr, const uint8_t **digest_data, + enum vb2_hash_algorithm *digest_algo, const char **event_name) { struct tpm_cb_log_table *tclt; struct tpm_cb_log_entry *tce; @@ -129,13 +130,13 @@ int tpm_log_get(int entry_idx, int *pcr, const uint8_t **digest_data, return 0; } -uint16_t tpm_log_get_size(const void *log_table) +uint16_t tpm_cb_log_get_size(const void *log_table) { const struct tpm_cb_log_table *tclt = log_table; return tclt->num_entries; } -void tpm_log_copy_entries(const void *from, void *to) +void tpm_cb_log_copy_entries(const void *from, void *to) { const struct tpm_cb_log_table *from_log = from; struct tpm_cb_log_table *to_log = to; diff --git a/src/security/tpm/tspi/logs.h b/src/security/tpm/tspi/logs.h new file mode 100644 index 00000000000..417017628ee --- /dev/null +++ b/src/security/tpm/tspi/logs.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef LOGS_H_ +#define LOGS_H_ + +#include +#include + +/* coreboot-specific TPM log format */ + +void *tpm_cb_log_init(void); +void *tpm_cb_log_cbmem_init(void); +void tpm_cb_preram_log_clear(void); +uint16_t tpm_cb_log_get_size(const void *log_table); +void tpm_cb_log_copy_entries(const void *from, void *to); +int tpm_cb_log_get(int entry_idx, int *pcr, const uint8_t **digest_data, + enum vb2_hash_algorithm *digest_algo, const char **event_name); +void tpm_cb_log_add_table_entry(const char *name, const uint32_t pcr, + enum vb2_hash_algorithm digest_algo, + const uint8_t *digest, + const size_t digest_len); +void tpm_cb_log_dump(void); + +/* TPM 1.2 log format */ + +void *tpm1_log_init(void); +void *tpm1_log_cbmem_init(void); +void tpm1_preram_log_clear(void); +uint16_t tpm1_log_get_size(const void *log_table); +void tpm1_log_copy_entries(const void *from, void *to); +int tpm1_log_get(int entry_idx, int *pcr, const uint8_t **digest_data, + enum vb2_hash_algorithm *digest_algo, const char **event_name); +void tpm1_log_add_table_entry(const char *name, const uint32_t pcr, + enum vb2_hash_algorithm digest_algo, + const uint8_t *digest, + const size_t digest_len); +void tpm1_log_dump(void); + +#endif /* LOGS_H_ */ diff --git a/src/security/tpm/tspi/tspi.c b/src/security/tpm/tspi/tspi.c index 72615fc3067..633834918cd 100644 --- a/src/security/tpm/tspi/tspi.c +++ b/src/security/tpm/tspi/tspi.c @@ -2,6 +2,7 @@ #include #include +#include #include #include #include From b8f188ccab3d8a0b687140975bf2f5c942c78eaf Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Sun, 23 Oct 2022 00:47:55 +0300 Subject: [PATCH 006/213] security/tpm: add TPM log format as per 2.0 spec MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Used by default for all boards with TPM2 which don't specify log format explicitly. Change-Id: I0fac386bebab1b7104378ae3424957c6497e84e1 Ticket: https://ticket.coreboot.org/issues/422 Ticket: https://ticket.coreboot.org/issues/423 Signed-off-by: Michał Żygowski Signed-off-by: Sergii Dmytruk Reviewed-on: https://review.coreboot.org/c/coreboot/+/68748 Tested-by: build bot (Jenkins) Reviewed-by: Julius Werner --- .../bsd/include/commonlib/bsd/tpm_log_defs.h | 37 +++ src/security/tpm/Kconfig | 25 +- src/security/tpm/Makefile.inc | 6 + src/security/tpm/tpm2_log_serialized.h | 70 ++++++ src/security/tpm/tspi.h | 15 ++ src/security/tpm/tspi/crtm.h | 13 + src/security/tpm/tspi/log-tpm2.c | 230 ++++++++++++++++++ src/security/tpm/tspi/logs.h | 15 ++ 8 files changed, 410 insertions(+), 1 deletion(-) create mode 100644 src/security/tpm/tpm2_log_serialized.h create mode 100644 src/security/tpm/tspi/log-tpm2.c diff --git a/src/commonlib/bsd/include/commonlib/bsd/tpm_log_defs.h b/src/commonlib/bsd/include/commonlib/bsd/tpm_log_defs.h index a45a2127468..144d55a3319 100644 --- a/src/commonlib/bsd/include/commonlib/bsd/tpm_log_defs.h +++ b/src/commonlib/bsd/include/commonlib/bsd/tpm_log_defs.h @@ -9,6 +9,21 @@ #define TCPA_SPEC_ID_EVENT_SIGNATURE "Spec ID Event00" #define TCG_EFI_SPEC_ID_EVENT_SIGNATURE "Spec ID Event03" +#define TPM2_ALG_ERROR 0x0000 +#define TPM2_ALG_HMAC 0x0005 +#define TPM2_ALG_NULL 0x0010 +#define TPM2_ALG_SHA1 0x0004 +#define TPM2_ALG_SHA256 0x000B +#define TPM2_ALG_SHA384 0x000C +#define TPM2_ALG_SHA512 0x000D +#define TPM2_ALG_SM3_256 0x0012 + +#define SHA1_DIGEST_SIZE 20 +#define SHA256_DIGEST_SIZE 32 +#define SHA384_DIGEST_SIZE 48 +#define SHA512_DIGEST_SIZE 64 +#define SM3_256_DIGEST_SIZE 32 + #define EV_PREBOOT_CERT 0x00000000 #define EV_POST_CODE 0x00000001 #define EV_UNUSED 0x00000002 @@ -39,4 +54,26 @@ struct spec_id_event_data { uint8_t vendor_info_size; } __packed; +struct tpm_digest_sizes { + uint16_t alg_id; + uint16_t digest_size; +} __packed; + +struct tcg_efi_spec_id_event { + uint32_t pcr_index; + uint32_t event_type; + uint8_t digest[20]; + uint32_t event_size; + uint8_t signature[16]; + uint32_t platform_class; + uint8_t spec_version_minor; + uint8_t spec_version_major; + uint8_t spec_errata; + uint8_t uintn_size; + uint32_t num_of_algorithms; + struct tpm_digest_sizes digest_sizes[0]; /* variable number of members */ + /* uint8_t vendor_info_size; */ + /* uint8_t vendor_info[vendor_info_size]; */ +} __packed; + #endif diff --git a/src/security/tpm/Kconfig b/src/security/tpm/Kconfig index 467773dee46..8466d80dbe6 100644 --- a/src/security/tpm/Kconfig +++ b/src/security/tpm/Kconfig @@ -98,7 +98,7 @@ choice prompt "TPM event log format" depends on TPM_MEASURED_BOOT default TPM_LOG_TPM1 if TPM1 - default TPM_LOG_CB + default TPM_LOG_TPM2 if TPM2 config TPM_LOG_CB bool "coreboot's custom format" @@ -110,6 +110,29 @@ config TPM_LOG_TPM1 help Log per TPM 1.2 specification. See "TCG PC Client Specific Implementation Specification for Conventional BIOS". +config TPM_LOG_TPM2 + bool "TPM 2.0 format" + depends on TPM2 + help + Log per TPM 2.0 specification. + See "TCG PC Client Platform Firmware Profile Specification". + +endchoice + +choice + prompt "TPM2 hashing algorithm" + depends on TPM_MEASURED_BOOT && TPM_LOG_TPM2 + default TPM_HASH_SHA1 if TPM1 + default TPM_HASH_SHA256 if TPM2 + +config TPM_HASH_SHA1 + bool "SHA1" +config TPM_HASH_SHA256 + bool "SHA256" +config TPM_HASH_SHA384 + bool "SHA384" +config TPM_HASH_SHA512 + bool "SHA512" endchoice diff --git a/src/security/tpm/Makefile.inc b/src/security/tpm/Makefile.inc index a8c25d6fd80..ae06cb0ea6c 100644 --- a/src/security/tpm/Makefile.inc +++ b/src/security/tpm/Makefile.inc @@ -67,4 +67,10 @@ verstage-$(CONFIG_TPM_LOG_TPM1) += tspi/log-tpm1.c postcar-$(CONFIG_TPM_LOG_TPM1) += tspi/log-tpm1.c bootblock-$(CONFIG_TPM_LOG_TPM1) += tspi/log-tpm1.c +ramstage-$(CONFIG_TPM_LOG_TPM2) += tspi/log-tpm2.c +romstage-$(CONFIG_TPM_LOG_TPM2) += tspi/log-tpm2.c +verstage-$(CONFIG_TPM_LOG_TPM2) += tspi/log-tpm2.c +postcar-$(CONFIG_TPM_LOG_TPM2) += tspi/log-tpm2.c +bootblock-$(CONFIG_TPM_LOG_TPM2) += tspi/log-tpm2.c + endif # CONFIG_TPM_MEASURED_BOOT diff --git a/src/security/tpm/tpm2_log_serialized.h b/src/security/tpm/tpm2_log_serialized.h new file mode 100644 index 00000000000..2b4e43c6354 --- /dev/null +++ b/src/security/tpm/tpm2_log_serialized.h @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ + +#ifndef __TPM2_LOG_SERIALIZED_H__ +#define __TPM2_LOG_SERIALIZED_H__ + +#include + +#define TPM_20_SPEC_ID_EVENT_SIGNATURE "Spec ID Event03" +#define TPM_20_LOG_DATA_MAX_LENGTH 50 + +#define TPM_20_LOG_VI_MAGIC 0x32544243 /* "CBT2" in LE */ +#define TPM_20_LOG_VI_MAJOR 1 +#define TPM_20_LOG_VI_MINOR 0 + +/* + * TPM2.0 log entries can't be generally represented as C structures due to + * varying number of digests and their sizes. However, it works as long as + * we're only using single kind of digests. + */ +#if CONFIG(TPM_LOG_TPM2) +# if CONFIG(TPM_HASH_SHA1) +# define TPM_20_LOG_DIGEST_MAX_LENGTH SHA1_DIGEST_SIZE +# endif +# if CONFIG(TPM_HASH_SHA256) +# define TPM_20_LOG_DIGEST_MAX_LENGTH SHA256_DIGEST_SIZE +# endif +# if CONFIG(TPM_HASH_SHA384) +# define TPM_20_LOG_DIGEST_MAX_LENGTH SHA384_DIGEST_SIZE +# endif +# if CONFIG(TPM_HASH_SHA512) +# define TPM_20_LOG_DIGEST_MAX_LENGTH SHA512_DIGEST_SIZE +# endif + +# ifndef TPM_20_LOG_DIGEST_MAX_LENGTH +# error "Misconfiguration: failed to determine TPM hashing algorithm" +# endif +#else +# define TPM_20_LOG_DIGEST_MAX_LENGTH 1 /* To avoid compilation error */ +#endif + +/* TCG_PCR_EVENT2 */ +struct tpm_2_log_entry { + uint32_t pcr; + uint32_t event_type; + uint32_t digest_count; /* Always 1 in current implementation */ + uint16_t digest_type; + uint8_t digest[TPM_20_LOG_DIGEST_MAX_LENGTH]; + uint32_t data_length; + uint8_t data[TPM_20_LOG_DATA_MAX_LENGTH]; +} __packed; + +struct tpm_2_vendor { + uint8_t reserved; + uint8_t version_major; + uint8_t version_minor; + uint32_t magic; + uint16_t max_entries; + uint16_t num_entries; + uint32_t entry_size; +} __packed; + +struct tpm_2_log_table { + struct tcg_efi_spec_id_event header; /* TCG_PCR_EVENT actually */ + struct tpm_digest_sizes digest_sizes[1]; + uint8_t vendor_info_size; + struct tpm_2_vendor vendor; + struct tpm_2_log_entry entries[0]; /* Variable number of entries */ +} __packed; + +#endif diff --git a/src/security/tpm/tspi.h b/src/security/tpm/tspi.h index 5de0aa2dd3f..33f363cab6c 100644 --- a/src/security/tpm/tspi.h +++ b/src/security/tpm/tspi.h @@ -4,6 +4,7 @@ #define TSPI_H_ #include +#include #include #include #include @@ -32,6 +33,8 @@ static inline void *tpm_log_cbmem_init(void) return tpm_cb_log_cbmem_init(); if (CONFIG(TPM_LOG_TPM1)) return tpm1_log_cbmem_init(); + if (CONFIG(TPM_LOG_TPM2)) + return tpm2_log_cbmem_init(); return NULL; } @@ -45,6 +48,8 @@ static inline void tpm_preram_log_clear(void) tpm_cb_preram_log_clear(); else if (CONFIG(TPM_LOG_TPM1)) tpm1_preram_log_clear(); + else if (CONFIG(TPM_LOG_TPM2)) + tpm2_preram_log_clear(); } /** @@ -56,6 +61,8 @@ static inline uint16_t tpm_log_get_size(const void *log_table) return tpm_cb_log_get_size(log_table); if (CONFIG(TPM_LOG_TPM1)) return tpm1_log_get_size(log_table); + if (CONFIG(TPM_LOG_TPM2)) + return tpm2_log_get_size(log_table); return 0; } @@ -68,6 +75,8 @@ static inline void tpm_log_copy_entries(const void *from, void *to) tpm_cb_log_copy_entries(from, to); else if (CONFIG(TPM_LOG_TPM1)) tpm1_log_copy_entries(from, to); + else if (CONFIG(TPM_LOG_TPM2)) + tpm2_log_copy_entries(from, to); } /** @@ -80,6 +89,8 @@ static inline int tpm_log_get(int entry_idx, int *pcr, const uint8_t **digest_da return tpm_cb_log_get(entry_idx, pcr, digest_data, digest_algo, event_name); if (CONFIG(TPM_LOG_TPM1)) return tpm1_log_get(entry_idx, pcr, digest_data, digest_algo, event_name); + if (CONFIG(TPM_LOG_TPM2)) + return tpm2_log_get(entry_idx, pcr, digest_data, digest_algo, event_name); return 1; } @@ -100,6 +111,8 @@ static inline void tpm_log_add_table_entry(const char *name, const uint32_t pcr, tpm_cb_log_add_table_entry(name, pcr, digest_algo, digest, digest_len); else if (CONFIG(TPM_LOG_TPM1)) tpm1_log_add_table_entry(name, pcr, digest_algo, digest, digest_len); + else if (CONFIG(TPM_LOG_TPM2)) + tpm2_log_add_table_entry(name, pcr, digest_algo, digest, digest_len); } /** @@ -111,6 +124,8 @@ static inline void tpm_log_dump(void *unused) tpm_cb_log_dump(); else if (CONFIG(TPM_LOG_TPM1)) tpm1_log_dump(); + else if (CONFIG(TPM_LOG_TPM2)) + tpm2_log_dump(); } /** diff --git a/src/security/tpm/tspi/crtm.h b/src/security/tpm/tspi/crtm.h index 241607757b7..ffa4867594c 100644 --- a/src/security/tpm/tspi/crtm.h +++ b/src/security/tpm/tspi/crtm.h @@ -22,6 +22,19 @@ # define TPM_MEASURE_ALGO VB2_HASH_SHA256 #elif CONFIG(TPM_LOG_TPM1) # define TPM_MEASURE_ALGO VB2_HASH_SHA1 +#elif CONFIG(TPM_LOG_TPM2) +# if CONFIG(TPM_HASH_SHA1) +# define TPM_MEASURE_ALGO VB2_HASH_SHA1 +# endif +# if CONFIG(TPM_HASH_SHA256) +# define TPM_MEASURE_ALGO VB2_HASH_SHA256 +# endif +# if CONFIG(TPM_HASH_SHA384) +# define TPM_MEASURE_ALGO VB2_HASH_SHA384 +# endif +# if CONFIG(TPM_HASH_SHA512) +# define TPM_MEASURE_ALGO VB2_HASH_SHA512 +# endif #endif #if !defined(TPM_MEASURE_ALGO) diff --git a/src/security/tpm/tspi/log-tpm2.c b/src/security/tpm/tspi/log-tpm2.c new file mode 100644 index 00000000000..897ccedbff3 --- /dev/null +++ b/src/security/tpm/tspi/log-tpm2.c @@ -0,0 +1,230 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +/* + * Unlike log.c this implements TPM log according to TPM2.0 specification + * rather then using coreboot-specific log format. + * + * First entry is in TPM1.2 format and serves as a header, the rest are in + * a newer (agile) format which supports SHA256 and multiple hashes, but we + * store only one hash. + * + * This is defined in "TCG EFI Protocol Specification". + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static uint16_t tpmalg_from_vb2_hash(enum vb2_hash_algorithm hash_type) +{ + switch (hash_type) { + case VB2_HASH_SHA1: + return TPM2_ALG_SHA1; + case VB2_HASH_SHA256: + return TPM2_ALG_SHA256; + case VB2_HASH_SHA384: + return TPM2_ALG_SHA384; + case VB2_HASH_SHA512: + return TPM2_ALG_SHA512; + + default: + return 0xFF; + } +} + +void *tpm2_log_cbmem_init(void) +{ + static struct tpm_2_log_table *tclt; + if (tclt) + return tclt; + + if (cbmem_possibly_online()) { + size_t tpm_log_len; + struct tcg_efi_spec_id_event *hdr; + + tclt = cbmem_find(CBMEM_ID_TPM2_TCG_LOG); + if (tclt) + return tclt; + + tpm_log_len = sizeof(struct tpm_2_log_table) + + MAX_TPM_LOG_ENTRIES * sizeof(struct tpm_2_log_entry); + tclt = cbmem_add(CBMEM_ID_TPM2_TCG_LOG, tpm_log_len); + if (!tclt) + return NULL; + + memset(tclt, 0, tpm_log_len); + hdr = &tclt->header; + + hdr->event_type = htole32(EV_NO_ACTION); + hdr->event_size = htole32(33 + sizeof(tclt->vendor)); + strcpy((char *)hdr->signature, TPM_20_SPEC_ID_EVENT_SIGNATURE); + hdr->platform_class = htole32(0x00); // client platform + hdr->spec_version_minor = 0x00; + hdr->spec_version_major = 0x02; + hdr->spec_errata = 0x00; + hdr->uintn_size = 0x02; // 64-bit UINT + hdr->num_of_algorithms = htole32(1); + hdr->digest_sizes[0].alg_id = htole16(tpmalg_from_vb2_hash(TPM_MEASURE_ALGO)); + hdr->digest_sizes[0].digest_size = htole16(vb2_digest_size(TPM_MEASURE_ALGO)); + + tclt->vendor_info_size = sizeof(tclt->vendor); + tclt->vendor.reserved = 0; + tclt->vendor.version_major = TPM_20_LOG_VI_MAJOR; + tclt->vendor.version_minor = TPM_20_LOG_VI_MINOR; + tclt->vendor.magic = htole32(TPM_20_LOG_VI_MAGIC); + tclt->vendor.max_entries = htole16(MAX_TPM_LOG_ENTRIES); + tclt->vendor.num_entries = htole16(0); + tclt->vendor.entry_size = htole32(sizeof(struct tpm_2_log_entry)); + } + + return tclt; +} + +void tpm2_log_dump(void) +{ + int i, j; + struct tpm_2_log_table *tclt; + int hash_size; + const char *alg_name; + + tclt = tpm_log_init(); + if (!tclt) + return; + + hash_size = vb2_digest_size(TPM_MEASURE_ALGO); + alg_name = vb2_get_hash_algorithm_name(TPM_MEASURE_ALGO); + + printk(BIOS_INFO, "coreboot TPM 2.0 measurements:\n\n"); + for (i = 0; i < le16toh(tclt->vendor.num_entries); i++) { + struct tpm_2_log_entry *tce = &tclt->entries[i]; + + printk(BIOS_INFO, " PCR-%u ", le32toh(tce->pcr)); + + for (j = 0; j < hash_size; j++) + printk(BIOS_INFO, "%02x", tce->digest[j]); + + printk(BIOS_INFO, " %s [%s]\n", alg_name, tce->data); + } + printk(BIOS_INFO, "\n"); +} + +void tpm2_log_add_table_entry(const char *name, const uint32_t pcr, + enum vb2_hash_algorithm digest_algo, + const uint8_t *digest, + const size_t digest_len) +{ + struct tpm_2_log_table *tclt; + struct tpm_2_log_entry *tce; + + tclt = tpm_log_init(); + if (!tclt) { + printk(BIOS_WARNING, "TPM LOG: non-existent!\n"); + return; + } + + if (!name) { + printk(BIOS_WARNING, "TPM LOG: entry name not set\n"); + return; + } + + if (digest_algo != TPM_MEASURE_ALGO) { + printk(BIOS_WARNING, "TPM LOG: digest is of unsupported type: %s\n", + vb2_get_hash_algorithm_name(digest_algo)); + return; + } + + if (digest_len != vb2_digest_size(TPM_MEASURE_ALGO)) { + printk(BIOS_WARNING, "TPM LOG: digest has invalid length: %d\n", + (int)digest_len); + return; + } + + if (le16toh(tclt->vendor.num_entries) >= le16toh(tclt->vendor.max_entries)) { + printk(BIOS_WARNING, "TPM LOG: log table is full\n"); + return; + } + + tce = &tclt->entries[le16toh(tclt->vendor.num_entries)]; + tclt->vendor.num_entries = htole16(le16toh(tclt->vendor.num_entries) + 1); + + tce->pcr = htole32(pcr); + tce->event_type = htole32(EV_ACTION); + + tce->digest_count = htole32(1); + tce->digest_type = htole16(tpmalg_from_vb2_hash(TPM_MEASURE_ALGO)); + memcpy(tce->digest, digest, vb2_digest_size(TPM_MEASURE_ALGO)); + + tce->data_length = htole32(sizeof(tce->data)); + strncpy((char *)tce->data, name, sizeof(tce->data) - 1); + tce->data[sizeof(tce->data) - 1] = '\0'; +} + +int tpm2_log_get(int entry_idx, int *pcr, const uint8_t **digest_data, + enum vb2_hash_algorithm *digest_algo, const char **event_name) +{ + struct tpm_2_log_table *tclt; + struct tpm_2_log_entry *tce; + + tclt = tpm_log_init(); + if (!tclt) + return 1; + + if (entry_idx < 0 || entry_idx >= le16toh(tclt->vendor.num_entries)) + return 1; + + tce = &tclt->entries[entry_idx]; + + *pcr = le32toh(tce->pcr); + *digest_data = tce->digest; + *digest_algo = TPM_MEASURE_ALGO; /* We validate algorithm on addition */ + *event_name = (char *)tce->data; + return 0; +} + +uint16_t tpm2_log_get_size(const void *log_table) +{ + const struct tpm_2_log_table *tclt = log_table; + return le16toh(tclt->vendor.num_entries); +} + +void tpm2_preram_log_clear(void) +{ + printk(BIOS_INFO, "TPM LOG: clearing the log\n"); + /* + * Pre-RAM log is only for internal use and isn't exported anywhere, hence it's header + * is not initialized. + */ + struct tpm_2_log_table *tclt = (struct tpm_2_log_table *)_tpm_log; + tclt->vendor.max_entries = htole16(MAX_TPM_LOG_ENTRIES); + tclt->vendor.num_entries = htole16(0); +} + +void tpm2_log_copy_entries(const void *from, void *to) +{ + const struct tpm_2_log_table *from_log = from; + struct tpm_2_log_table *to_log = to; + int i; + + for (i = 0; i < le16toh(from_log->vendor.num_entries); i++) { + struct tpm_2_log_entry *tce = + &to_log->entries[le16toh(to_log->vendor.num_entries)]; + to_log->vendor.num_entries = htole16(le16toh(to_log->vendor.num_entries) + 1); + + tce->pcr = from_log->entries[i].pcr; + tce->event_type = from_log->entries[i].event_type; + + tce->digest_count = from_log->entries[i].digest_count; + tce->digest_type = from_log->entries[i].digest_type; + memcpy(tce->digest, from_log->entries[i].digest, sizeof(tce->digest)); + + tce->data_length = from_log->entries[i].data_length; + memcpy(tce->data, from_log->entries[i].data, sizeof(tce->data)); + } +} diff --git a/src/security/tpm/tspi/logs.h b/src/security/tpm/tspi/logs.h index 417017628ee..2d802f0bc5f 100644 --- a/src/security/tpm/tspi/logs.h +++ b/src/security/tpm/tspi/logs.h @@ -36,4 +36,19 @@ void tpm1_log_add_table_entry(const char *name, const uint32_t pcr, const size_t digest_len); void tpm1_log_dump(void); +/* TPM 2.0 log format */ + +void *tpm2_log_init(void); +void *tpm2_log_cbmem_init(void); +void tpm2_preram_log_clear(void); +uint16_t tpm2_log_get_size(const void *log_table); +void tpm2_log_copy_entries(const void *from, void *to); +int tpm2_log_get(int entry_idx, int *pcr, const uint8_t **digest_data, + enum vb2_hash_algorithm *digest_algo, const char **event_name); +void tpm2_log_add_table_entry(const char *name, const uint32_t pcr, + enum vb2_hash_algorithm digest_algo, + const uint8_t *digest, + const size_t digest_len); +void tpm2_log_dump(void); + #endif /* LOGS_H_ */ From 3331413de021a6cacb815bd1fad28923928a81f5 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Fri, 22 Oct 2021 01:02:32 +0300 Subject: [PATCH 007/213] drivers/ipmi: prepare for adding more interfaces De-duplicate common initialization code (self-test and device identification) and put it in a new ipmi_if.c unit, which is supposed to work with any underlying IPMI interface. Change-Id: Ia99da6fb63adb7bf556d3d6f7964b34831be8a2f Signed-off-by: Sergii Dmytruk --- Documentation/drivers/ipmi_kcs.md | 9 ++ src/drivers/ipmi/Makefile.inc | 4 +- src/drivers/ipmi/chip.h | 5 +- src/drivers/ipmi/ipmi_fru.c | 3 +- src/drivers/ipmi/ipmi_if.c | 101 ++++++++++++++++++++ src/drivers/ipmi/ipmi_if.h | 77 +++++++++++++++ src/drivers/ipmi/ipmi_kcs.c | 7 +- src/drivers/ipmi/ipmi_kcs.h | 56 +---------- src/drivers/ipmi/ipmi_kcs_ops.c | 77 +-------------- src/drivers/ipmi/ipmi_kcs_ops_premem.c | 113 ----------------------- src/drivers/ipmi/ipmi_ops.c | 13 +-- src/drivers/ipmi/ipmi_ops.h | 2 +- src/drivers/ipmi/ipmi_ops_premem.c | 53 +++++++++++ src/drivers/ipmi/ocp/ipmi_ocp.c | 7 +- src/drivers/ipmi/ocp/ipmi_ocp_romstage.c | 24 ++--- src/drivers/ipmi/supermicro_oem.c | 14 +-- src/mainboard/ocp/deltalake/ipmi.c | 2 +- src/mainboard/ocp/deltalake/romstage.c | 4 +- src/mainboard/ocp/tiogapass/ipmi.c | 2 +- src/mainboard/ocp/tiogapass/romstage.c | 4 +- 20 files changed, 293 insertions(+), 284 deletions(-) create mode 100644 src/drivers/ipmi/ipmi_if.c create mode 100644 src/drivers/ipmi/ipmi_if.h delete mode 100644 src/drivers/ipmi/ipmi_kcs_ops_premem.c create mode 100644 src/drivers/ipmi/ipmi_ops_premem.c diff --git a/Documentation/drivers/ipmi_kcs.md b/Documentation/drivers/ipmi_kcs.md index f6f0fb986a6..c4db492e008 100644 --- a/Documentation/drivers/ipmi_kcs.md +++ b/Documentation/drivers/ipmi_kcs.md @@ -42,6 +42,15 @@ The following registers can be set: * `gpe_interrupt` * Integer * The bit in GPE (SCI) used to notify about a change on the KCS. +* `wait_for_bmc` + * Boolean + * Wait for BMC to boot. This can be used if the BMC takes a long time to boot + after PoR: + - AST2400 on Supermicro X11SSH: 34 s +* `bmc_boot_timeout` + * Integer + * The timeout in seconds to wait for the IPMI service to be loaded. + Will be used if wait_for_bmc is true. [IPMI]: https://www.intel.com/content/dam/www/public/us/en/documents/product-briefs/ipmi-second-gen-interface-spec-v2-rev1-1.pdf diff --git a/src/drivers/ipmi/Makefile.inc b/src/drivers/ipmi/Makefile.inc index e4bcf313ebc..85f3dde4374 100644 --- a/src/drivers/ipmi/Makefile.inc +++ b/src/drivers/ipmi/Makefile.inc @@ -1,8 +1,10 @@ +ramstage-$(CONFIG_IPMI_KCS) += ipmi_if.c ramstage-$(CONFIG_IPMI_KCS) += ipmi_kcs.c ramstage-$(CONFIG_IPMI_KCS) += ipmi_kcs_ops.c ramstage-$(CONFIG_IPMI_KCS) += ipmi_ops.c ramstage-$(CONFIG_IPMI_KCS) += ipmi_fru.c ramstage-$(CONFIG_DRIVERS_IPMI_SUPERMICRO_OEM) += supermicro_oem.c -romstage-$(CONFIG_IPMI_KCS_ROMSTAGE) += ipmi_kcs_ops_premem.c +romstage-$(CONFIG_IPMI_KCS_ROMSTAGE) += ipmi_if.c +romstage-$(CONFIG_IPMI_KCS_ROMSTAGE) += ipmi_ops_premem.c romstage-$(CONFIG_IPMI_KCS_ROMSTAGE) += ipmi_kcs.c romstage-$(CONFIG_IPMI_KCS_ROMSTAGE) += ipmi_ops.c diff --git a/src/drivers/ipmi/chip.h b/src/drivers/ipmi/chip.h index 4e9d9e1985b..3b970c9fa40 100644 --- a/src/drivers/ipmi/chip.h +++ b/src/drivers/ipmi/chip.h @@ -8,6 +8,7 @@ #include struct drivers_ipmi_config { +#if CONFIG(IPMI_KCS) u8 bmc_i2c_address; u8 have_nv_storage; u8 nv_storage_device_address; @@ -25,6 +26,9 @@ struct drivers_ipmi_config { /* "POST complete" GPIO and polarity */ u32 post_complete_gpio; bool post_complete_invert; + unsigned int uid; /* Auto-filled by ipmi_ssdt() */ +#endif + /* * Wait for BMC to boot. * This can be used if the BMC takes a long time to boot after PoR: @@ -36,7 +40,6 @@ struct drivers_ipmi_config { * Will be used if wait_for_bmc is true. */ u16 bmc_boot_timeout; - unsigned int uid; /* Auto-filled by ipmi_ssdt() */ }; #endif /* _IMPI_CHIP_H_ */ diff --git a/src/drivers/ipmi/ipmi_fru.c b/src/drivers/ipmi/ipmi_fru.c index 822e5bfa23c..f16530705fe 100644 --- a/src/drivers/ipmi/ipmi_fru.c +++ b/src/drivers/ipmi/ipmi_fru.c @@ -5,6 +5,7 @@ #include #include +#include "ipmi_if.h" #include "ipmi_ops.h" #define MAX_FRU_BUSY_RETRY 5 @@ -34,7 +35,7 @@ static enum cb_err ipmi_read_fru(const int port, struct ipmi_read_fru_data_req * req->count = CONFIG_IPMI_FRU_SINGLE_RW_SZ; while (retry_count <= MAX_FRU_BUSY_RETRY) { - ret = ipmi_kcs_message(port, IPMI_NETFN_STORAGE, 0x0, + ret = ipmi_message(port, IPMI_NETFN_STORAGE, 0x0, IPMI_READ_FRU_DATA, (const unsigned char *) req, sizeof(*req), (unsigned char *) &rsp, sizeof(rsp)); if (rsp.resp.completion_code == 0x81) { diff --git a/src/drivers/ipmi/ipmi_if.c b/src/drivers/ipmi/ipmi_if.c new file mode 100644 index 00000000000..4ff90047417 --- /dev/null +++ b/src/drivers/ipmi/ipmi_if.c @@ -0,0 +1,101 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include "ipmi_if.h" + +#include +#include + +#include "chip.h" + +int ipmi_get_device_id(const struct device *dev, struct ipmi_devid_rsp *rsp) +{ + int ret; + + ret = ipmi_message(dev->path.pnp.port, IPMI_NETFN_APPLICATION, 0, + IPMI_BMC_GET_DEVICE_ID, NULL, 0, (u8 *)rsp, + sizeof(*rsp)); + if (ret < sizeof(struct ipmi_rsp) || rsp->resp.completion_code) { + printk(BIOS_ERR, "IPMI: %s command failed (ret=%d resp=0x%x)\n", + __func__, ret, rsp->resp.completion_code); + return 1; + } + if (ret != sizeof(*rsp)) { + printk(BIOS_ERR, "IPMI: %s response truncated\n", __func__); + return 1; + } + return 0; +} + +static int ipmi_get_bmc_self_test_result(const struct device *dev, + struct ipmi_selftest_rsp *rsp) +{ + int ret; + + ret = ipmi_message(dev->path.pnp.port, IPMI_NETFN_APPLICATION, 0, + IPMI_BMC_GET_SELFTEST_RESULTS, NULL, 0, (u8 *)rsp, + sizeof(*rsp)); + + if (ret < sizeof(struct ipmi_rsp) || rsp->resp.completion_code) { + printk(BIOS_ERR, "IPMI: %s command failed (ret=%d resp=0x%x)\n", + __func__, ret, rsp->resp.completion_code); + return 1; + } + if (ret != sizeof(*rsp)) { + printk(BIOS_ERR, "IPMI: %s response truncated\n", __func__); + return 1; + } + + return 0; +} + +int ipmi_process_self_test_result(const struct device *dev) +{ + int failure = 0; + uint8_t retry_count = 0; + struct ipmi_selftest_rsp selftestrsp = {0}; + + const struct drivers_ipmi_config *conf = dev->chip_info; + uint8_t retry_limit = 0; + + if (conf && conf->wait_for_bmc) + retry_limit = conf->bmc_boot_timeout; + + if (retry_limit == 0) + /* Try to get self-test results at least once */ + retry_limit = 1; + + printk(BIOS_INFO, "Get BMC self test result..."); + for (retry_count = 0; retry_count < retry_limit; retry_count++) { + if (!ipmi_get_bmc_self_test_result(dev, &selftestrsp)) + break; + + mdelay(1000); + } + + switch (selftestrsp.result) { + case IPMI_APP_SELFTEST_NO_ERROR: /* 0x55 */ + printk(BIOS_DEBUG, "No Error\n"); + break; + case IPMI_APP_SELFTEST_NOT_IMPLEMENTED: /* 0x56 */ + printk(BIOS_DEBUG, "Function Not Implemented\n"); + break; + case IPMI_APP_SELFTEST_ERROR: /* 0x57 */ + printk(BIOS_ERR, "BMC: Corrupted or inaccessible data or device\n"); + failure = 1; + break; + case IPMI_APP_SELFTEST_FATAL_HW_ERROR: /* 0x58 */ + printk(BIOS_ERR, "BMC: Fatal Hardware Error\n"); + failure = 1; + break; + case IPMI_APP_SELFTEST_RESERVED: /* 0xFF */ + printk(BIOS_DEBUG, "Reserved\n"); + break; + + default: /* Other Device Specific Hardware Error */ + printk(BIOS_ERR, "BMC: Device Specific Error: 0x%02x\n", selftestrsp.result); + failure = 1; + break; + } + + return failure; +} diff --git a/src/drivers/ipmi/ipmi_if.h b/src/drivers/ipmi/ipmi_if.h new file mode 100644 index 00000000000..22b5db38c26 --- /dev/null +++ b/src/drivers/ipmi/ipmi_if.h @@ -0,0 +1,77 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __IPMI_IF_H +#define __IPMI_IF_H + +/* Common API and code for different IPMI interfaces in different stages */ + +#include + +#define IPMI_NETFN_CHASSIS 0x00 +#define IPMI_NETFN_BRIDGE 0x02 +#define IPMI_NETFN_SENSOREVENT 0x04 +#define IPMI_NETFN_APPLICATION 0x06 +#define IPMI_BMC_GET_DEVICE_ID 0x01 +#define IPMI_IPMI_VERSION_MINOR(x) ((x) >> 4) +#define IPMI_IPMI_VERSION_MAJOR(x) ((x) & 0xf) +#define IPMI_BMC_GET_SELFTEST_RESULTS 0x04 +#define IPMI_APP_SELFTEST_RESERVED 0xFF +#define IPMI_APP_SELFTEST_NO_ERROR 0x55 +#define IPMI_APP_SELFTEST_NOT_IMPLEMENTED 0x56 +#define IPMI_APP_SELFTEST_ERROR 0x57 +#define IPMI_APP_SELFTEST_FATAL_HW_ERROR 0x58 + +#define IPMI_NETFN_FIRMWARE 0x08 +#define IPMI_NETFN_STORAGE 0x0a +#define IPMI_READ_FRU_DATA 0x11 +#define IPMI_ADD_SEL_ENTRY 0x44 +#define IPMI_NETFN_TRANSPORT 0x0c + +#define IPMI_CMD_ACPI_POWERON 0x06 + +struct ipmi_rsp { + uint8_t lun; + uint8_t cmd; + uint8_t completion_code; +} __packed; + +/* Get Device ID */ +struct ipmi_devid_rsp { + struct ipmi_rsp resp; + uint8_t device_id; + uint8_t device_revision; + uint8_t fw_rev1; + uint8_t fw_rev2; + uint8_t ipmi_version; + uint8_t additional_device_support; + uint8_t manufacturer_id[3]; + uint8_t product_id[2]; +} __packed; + +/* Get Self Test Results */ +struct ipmi_selftest_rsp { + struct ipmi_rsp resp; + uint8_t result; + uint8_t param; +} __packed; + +struct device; + +/* + * Sends a command and reads its response. Input buffer is for payload, but + * output includes `struct ipmi_rsp` as a header. Returns number of bytes copied + * into the buffer or -1. + */ +int ipmi_message(int port, int netfn, int lun, int cmd, + const unsigned char *inmsg, int inlen, + unsigned char *outmsg, int outlen); + +/* Run basic IPMI init functions in romstage from the provided PnP device, + * returns CB_SUCCESS on success and CB_ERR if an error occurred. */ +enum cb_err ipmi_premem_init(const uint16_t port, const uint16_t device); + +int ipmi_get_device_id(const struct device *dev, struct ipmi_devid_rsp *rsp); + +int ipmi_process_self_test_result(const struct device *dev); + +#endif /* __IPMI_IF_H */ diff --git a/src/drivers/ipmi/ipmi_kcs.c b/src/drivers/ipmi/ipmi_kcs.c index 12cbe82f141..271a6a25386 100644 --- a/src/drivers/ipmi/ipmi_kcs.c +++ b/src/drivers/ipmi/ipmi_kcs.c @@ -4,6 +4,7 @@ #include #include #include +#include "ipmi_if.h" #include "ipmi_kcs.h" #define IPMI_KCS_STATE(_x) ((_x) >> 6) @@ -219,9 +220,9 @@ static int ipmi_kcs_read_message(int port, unsigned char *msg, int len) return ret; } -int ipmi_kcs_message(int port, int netfn, int lun, int cmd, - const unsigned char *inmsg, int inlen, - unsigned char *outmsg, int outlen) +int ipmi_message(int port, int netfn, int lun, int cmd, + const unsigned char *inmsg, int inlen, + unsigned char *outmsg, int outlen) { if (ipmi_kcs_send_message(port, netfn, lun, cmd, inmsg, inlen)) { printk(BIOS_ERR, "ipmi_kcs_send_message failed\n"); diff --git a/src/drivers/ipmi/ipmi_kcs.h b/src/drivers/ipmi/ipmi_kcs.h index 33ddd5f016f..17298b1038c 100644 --- a/src/drivers/ipmi/ipmi_kcs.h +++ b/src/drivers/ipmi/ipmi_kcs.h @@ -3,62 +3,8 @@ #ifndef __IPMI_KCS_H #define __IPMI_KCS_H -#define IPMI_NETFN_CHASSIS 0x00 -#define IPMI_NETFN_BRIDGE 0x02 -#define IPMI_NETFN_SENSOREVENT 0x04 -#define IPMI_NETFN_APPLICATION 0x06 -#define IPMI_BMC_GET_DEVICE_ID 0x01 -#define IPMI_IPMI_VERSION_MINOR(x) ((x) >> 4) -#define IPMI_IPMI_VERSION_MAJOR(x) ((x) & 0xf) -#define IPMI_BMC_GET_SELFTEST_RESULTS 0x04 -#define IPMI_APP_SELFTEST_RESERVED 0xFF -#define IPMI_APP_SELFTEST_NO_ERROR 0x55 -#define IPMI_APP_SELFTEST_NOT_IMPLEMENTED 0x56 -#define IPMI_APP_SELFTEST_ERROR 0x57 -#define IPMI_APP_SELFTEST_FATAL_HW_ERROR 0x58 - -#define IPMI_NETFN_FIRMWARE 0x08 -#define IPMI_NETFN_STORAGE 0x0a -#define IPMI_READ_FRU_DATA 0x11 -#define IPMI_ADD_SEL_ENTRY 0x44 -#define IPMI_NETFN_TRANSPORT 0x0c - -#define IPMI_CMD_ACPI_POWERON 0x06 - -extern int ipmi_kcs_message(int port, int netfn, int lun, int cmd, - const unsigned char *inmsg, int inlen, - unsigned char *outmsg, int outlen); - -/* Run basic IPMI init functions in romstage from the provided PnP device, - * returns CB_SUCCESS on success and CB_ERR if an error occurred. */ -enum cb_err ipmi_kcs_premem_init(const u16 port, const u16 device); +#include void ipmi_bmc_version(uint8_t *ipmi_bmc_major_revision, uint8_t *ipmi_bmc_minor_revision); -struct ipmi_rsp { - uint8_t lun; - uint8_t cmd; - uint8_t completion_code; -} __packed; - -/* Get Device ID */ -struct ipmi_devid_rsp { - struct ipmi_rsp resp; - uint8_t device_id; - uint8_t device_revision; - uint8_t fw_rev1; - uint8_t fw_rev2; - uint8_t ipmi_version; - uint8_t additional_device_support; - uint8_t manufacturer_id[3]; - uint8_t product_id[2]; -} __packed; - -/* Get Self Test Results */ -struct ipmi_selftest_rsp { - struct ipmi_rsp resp; - uint8_t result; - uint8_t param; -} __packed; - #endif diff --git a/src/drivers/ipmi/ipmi_kcs_ops.c b/src/drivers/ipmi/ipmi_kcs_ops.c index 4ffa91fe231..48b8e065ef0 100644 --- a/src/drivers/ipmi/ipmi_kcs_ops.c +++ b/src/drivers/ipmi/ipmi_kcs_ops.c @@ -25,6 +25,7 @@ #include #include #include "ipmi_kcs.h" +#include "ipmi_if.h" #include "ipmi_supermicro_oem.h" #include "chip.h" @@ -37,46 +38,6 @@ static u8 bmc_revision_minor = 0x0; static struct boot_state_callback bscb_post_complete; -static int ipmi_get_device_id(struct device *dev, struct ipmi_devid_rsp *rsp) -{ - int ret; - - ret = ipmi_kcs_message(dev->path.pnp.port, IPMI_NETFN_APPLICATION, 0, - IPMI_BMC_GET_DEVICE_ID, NULL, 0, (u8 *)rsp, - sizeof(*rsp)); - if (ret < sizeof(struct ipmi_rsp) || rsp->resp.completion_code) { - printk(BIOS_ERR, "IPMI: %s command failed (ret=%d resp=0x%x)\n", - __func__, ret, rsp->resp.completion_code); - return 1; - } - if (ret != sizeof(*rsp)) { - printk(BIOS_ERR, "IPMI: %s response truncated\n", __func__); - return 1; - } - return 0; -} - -static int ipmi_get_bmc_self_test_result(struct device *dev, struct ipmi_selftest_rsp *rsp) -{ - int ret; - - ret = ipmi_kcs_message(dev->path.pnp.port, IPMI_NETFN_APPLICATION, 0, - IPMI_BMC_GET_SELFTEST_RESULTS, NULL, 0, (u8 *)rsp, - sizeof(*rsp)); - - if (ret < sizeof(struct ipmi_rsp) || rsp->resp.completion_code) { - printk(BIOS_ERR, "IPMI: %s command failed (ret=%d resp=0x%x)\n", - __func__, ret, rsp->resp.completion_code); - return 1; - } - if (ret != sizeof(*rsp)) { - printk(BIOS_ERR, "IPMI: %s response truncated\n", __func__); - return 1; - } - - return 0; -} - static void bmc_set_post_complete_gpio_callback(void *arg) { struct drivers_ipmi_config *conf = arg; @@ -103,8 +64,6 @@ static void ipmi_kcs_init(struct device *dev) uint32_t man_id = 0, prod_id = 0; struct drivers_ipmi_config *conf = dev->chip_info; const struct gpio_operations *gpio_ops; - struct ipmi_selftest_rsp selftestrsp = {0}; - uint8_t retry_count; if (!conf) { printk(BIOS_WARNING, "IPMI: chip_info is missing! Skip init.\n"); @@ -154,41 +113,9 @@ static void ipmi_kcs_init(struct device *dev) } } - printk(BIOS_INFO, "Get BMC self test result..."); - for (retry_count = 0; retry_count < conf->bmc_boot_timeout; retry_count++) { - if (!ipmi_get_bmc_self_test_result(dev, &selftestrsp)) - break; - - mdelay(1000); - } - - switch (selftestrsp.result) { - case IPMI_APP_SELFTEST_NO_ERROR: /* 0x55 */ - printk(BIOS_DEBUG, "No Error\n"); - break; - case IPMI_APP_SELFTEST_NOT_IMPLEMENTED: /* 0x56 */ - printk(BIOS_DEBUG, "Function Not Implemented\n"); - break; - case IPMI_APP_SELFTEST_ERROR: /* 0x57 */ - printk(BIOS_ERR, "BMC: Corrupted or inaccessible data or device\n"); - /* Don't write tables if communication failed */ - dev->enabled = 0; - break; - case IPMI_APP_SELFTEST_FATAL_HW_ERROR: /* 0x58 */ - printk(BIOS_ERR, "BMC: Fatal Hardware Error\n"); - /* Don't write tables if communication failed */ - dev->enabled = 0; - break; - case IPMI_APP_SELFTEST_RESERVED: /* 0xFF */ - printk(BIOS_DEBUG, "Reserved\n"); - break; - - default: /* Other Device Specific Hardware Error */ - printk(BIOS_ERR, "BMC: Device Specific Error\n"); + if (ipmi_process_self_test_result(dev)) /* Don't write tables if communication failed */ dev->enabled = 0; - break; - } if (!ipmi_get_device_id(dev, &rsp)) { /* Queried the IPMI revision from BMC */ diff --git a/src/drivers/ipmi/ipmi_kcs_ops_premem.c b/src/drivers/ipmi/ipmi_kcs_ops_premem.c deleted file mode 100644 index e1ae0dc3e5b..00000000000 --- a/src/drivers/ipmi/ipmi_kcs_ops_premem.c +++ /dev/null @@ -1,113 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ - -#include -#include -#include -#include -#include - -#include "ipmi_kcs.h" -#include "chip.h" - -static int ipmi_get_bmc_self_test_result(const struct device *dev, - struct ipmi_selftest_rsp *rsp) -{ - int ret; - - ret = ipmi_kcs_message(dev->path.pnp.port, IPMI_NETFN_APPLICATION, 0, - IPMI_BMC_GET_SELFTEST_RESULTS, NULL, 0, (u8 *)rsp, - sizeof(*rsp)); - - if (ret < sizeof(struct ipmi_rsp) || rsp->resp.completion_code) { - printk(BIOS_ERR, "IPMI: %s command failed (ret=%d resp=0x%x)\n", - __func__, ret, rsp->resp.completion_code); - return 1; - } - if (ret != sizeof(*rsp)) { - printk(BIOS_ERR, "IPMI: %s response truncated\n", __func__); - return 1; - } - - return 0; -} - -enum cb_err ipmi_kcs_premem_init(const u16 port, const u16 device) -{ - const struct drivers_ipmi_config *conf = NULL; - struct ipmi_selftest_rsp selftestrsp = {0}; - uint8_t retry_count; - const struct device *dev; - - /* Find IPMI PNP device from devicetree in romstage */ - dev = dev_find_slot_pnp(port, device); - - if (!dev) { - printk(BIOS_ERR, "IPMI: Cannot find PNP device port: %x, device %x\n", - port, device); - return CB_ERR; - } - if (!dev->enabled) { - printk(BIOS_ERR, "IPMI: device is not enabled\n"); - return CB_ERR; - } - printk(BIOS_DEBUG, "IPMI: romstage PNP KCS 0x%x\n", dev->path.pnp.port); - if (dev->chip_info) - conf = dev->chip_info; - - if (conf && conf->wait_for_bmc && conf->bmc_boot_timeout) { - struct stopwatch sw; - stopwatch_init_msecs_expire(&sw, conf->bmc_boot_timeout * 1000); - printk(BIOS_DEBUG, "IPMI: Waiting for BMC...\n"); - - while (!stopwatch_expired(&sw)) { - if (inb(dev->path.pnp.port) != 0xff) - break; - mdelay(100); - } - if (stopwatch_expired(&sw)) { - printk(BIOS_INFO, "IPMI: Waiting for BMC timed out\n"); - return CB_ERR; - } - } - - printk(BIOS_INFO, "Get BMC self test result..."); - if (conf && conf->bmc_boot_timeout) { - for (retry_count = 0; retry_count < conf->bmc_boot_timeout; retry_count++) { - if (!ipmi_get_bmc_self_test_result(dev, &selftestrsp)) - break; - - mdelay(1000); - } - } else { - /* At least run once */ - ipmi_get_bmc_self_test_result(dev, &selftestrsp); - } - - int ret = CB_ERR; - switch (selftestrsp.result) { - case IPMI_APP_SELFTEST_NO_ERROR: /* 0x55 */ - printk(BIOS_DEBUG, "No Error\n"); - ret = CB_SUCCESS; - break; - case IPMI_APP_SELFTEST_NOT_IMPLEMENTED: /* 0x56 */ - printk(BIOS_DEBUG, "Function Not Implemented\n"); - ret = CB_SUCCESS; - break; - case IPMI_APP_SELFTEST_ERROR: /* 0x57 */ - printk(BIOS_ERR, "Corrupted or inaccessible data or device\n"); - break; - case IPMI_APP_SELFTEST_FATAL_HW_ERROR: /* 0x58 */ - printk(BIOS_ERR, "Fatal Hardware Error\n"); - break; - case IPMI_APP_SELFTEST_RESERVED: /* 0xFF */ - printk(BIOS_DEBUG, "Reserved\n"); - ret = CB_SUCCESS; - break; - - default: /* Other Device Specific Hardware Error */ - printk(BIOS_ERR, "Device Specific Error 0x%x 0x%x\n", selftestrsp.result, - selftestrsp.param); - break; - } - return ret; -} diff --git a/src/drivers/ipmi/ipmi_ops.c b/src/drivers/ipmi/ipmi_ops.c index 73a02e1f37c..d9b3256eec3 100644 --- a/src/drivers/ipmi/ipmi_ops.c +++ b/src/drivers/ipmi/ipmi_ops.c @@ -2,6 +2,7 @@ #include #include "ipmi_ops.h" +#include "ipmi_if.h" #include #include @@ -18,7 +19,7 @@ enum cb_err ipmi_init_and_start_bmc_wdt(const int port, uint16_t countdown, /* clear BIOS FRB2 expiration flag */ req.timer_use_expiration_flags_clr = 2; req.initial_countdown_val = countdown; - ret = ipmi_kcs_message(port, IPMI_NETFN_APPLICATION, 0x0, + ret = ipmi_message(port, IPMI_NETFN_APPLICATION, 0x0, IPMI_BMC_SET_WDG_TIMER, (const unsigned char *) &req, sizeof(req), (unsigned char *) &rsp, sizeof(rsp)); @@ -32,7 +33,7 @@ enum cb_err ipmi_init_and_start_bmc_wdt(const int port, uint16_t countdown, } /* Reset command to start timer */ - ret = ipmi_kcs_message(port, IPMI_NETFN_APPLICATION, 0x0, + ret = ipmi_message(port, IPMI_NETFN_APPLICATION, 0x0, IPMI_BMC_RESET_WDG_TIMER, NULL, 0, (unsigned char *) &rsp, sizeof(rsp)); @@ -56,7 +57,7 @@ enum cb_err ipmi_stop_bmc_wdt(const int port) struct ipmi_rsp resp; /* Get current timer first */ - ret = ipmi_kcs_message(port, IPMI_NETFN_APPLICATION, 0x0, + ret = ipmi_message(port, IPMI_NETFN_APPLICATION, 0x0, IPMI_BMC_GET_WDG_TIMER, NULL, 0, (unsigned char *) &rsp, sizeof(rsp)); @@ -76,7 +77,7 @@ enum cb_err ipmi_stop_bmc_wdt(const int port) rsp.data.timer_use &= ~(1 << 6); rsp.data.initial_countdown_val = 0; req = rsp.data; - ret = ipmi_kcs_message(port, IPMI_NETFN_APPLICATION, 0x0, + ret = ipmi_message(port, IPMI_NETFN_APPLICATION, 0x0, IPMI_BMC_SET_WDG_TIMER, (const unsigned char *) &req, sizeof(req), (unsigned char *) &resp, sizeof(resp)); @@ -104,7 +105,7 @@ enum cb_err ipmi_get_system_guid(const int port, uint8_t *uuid) return CB_ERR; } - ret = ipmi_kcs_message(port, IPMI_NETFN_APPLICATION, 0x0, + ret = ipmi_message(port, IPMI_NETFN_APPLICATION, 0x0, IPMI_BMC_GET_SYSTEM_GUID, NULL, 0, (unsigned char *) &rsp, sizeof(rsp)); @@ -128,7 +129,7 @@ enum cb_err ipmi_add_sel(const int port, struct sel_event_record *sel) return CB_ERR; } - ret = ipmi_kcs_message(port, IPMI_NETFN_STORAGE, 0x0, + ret = ipmi_message(port, IPMI_NETFN_STORAGE, 0x0, IPMI_ADD_SEL_ENTRY, (const unsigned char *) sel, 16, (unsigned char *) &rsp, sizeof(rsp)); diff --git a/src/drivers/ipmi/ipmi_ops.h b/src/drivers/ipmi/ipmi_ops.h index d900272e38a..7a92a28121a 100644 --- a/src/drivers/ipmi/ipmi_ops.h +++ b/src/drivers/ipmi/ipmi_ops.h @@ -4,7 +4,7 @@ #define __IPMI_OPS_H #include -#include "ipmi_kcs.h" +#include "ipmi_if.h" #define IPMI_BMC_RESET_WDG_TIMER 0x22 #define IPMI_BMC_SET_WDG_TIMER 0x24 #define IPMI_BMC_GET_WDG_TIMER 0x25 diff --git a/src/drivers/ipmi/ipmi_ops_premem.c b/src/drivers/ipmi/ipmi_ops_premem.c new file mode 100644 index 00000000000..99c5842bb3d --- /dev/null +++ b/src/drivers/ipmi/ipmi_ops_premem.c @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include +#include +#include + +#include "ipmi_if.h" +#include "chip.h" + +enum cb_err ipmi_premem_init(const u16 port, const u16 device) +{ + const struct drivers_ipmi_config *conf = NULL; + const struct device *dev; + + /* Find IPMI PNP device from devicetree in romstage */ + dev = dev_find_slot_pnp(port, device); + + if (!dev) { + printk(BIOS_ERR, "IPMI: Cannot find PNP device port: %x, device %x\n", + port, device); + return CB_ERR; + } + if (!dev->enabled) { + printk(BIOS_ERR, "IPMI: device is not enabled\n"); + return CB_ERR; + } + printk(BIOS_DEBUG, "IPMI: romstage PNP KCS 0x%x\n", dev->path.pnp.port); + if (dev->chip_info) + conf = dev->chip_info; + + if (conf && conf->wait_for_bmc && conf->bmc_boot_timeout) { + struct stopwatch sw; + stopwatch_init_msecs_expire(&sw, conf->bmc_boot_timeout * 1000); + printk(BIOS_DEBUG, "IPMI: Waiting for BMC...\n"); + + while (!stopwatch_expired(&sw)) { + if (inb(dev->path.pnp.port) != 0xff) + break; + mdelay(100); + } + if (stopwatch_expired(&sw)) { + printk(BIOS_INFO, "IPMI: Waiting for BMC timed out\n"); + return CB_ERR; + } + } + + if (ipmi_process_self_test_result(dev)) + return CB_ERR; + + return CB_SUCCESS; +} diff --git a/src/drivers/ipmi/ocp/ipmi_ocp.c b/src/drivers/ipmi/ocp/ipmi_ocp.c index 11161a8ae47..9f583be934e 100644 --- a/src/drivers/ipmi/ocp/ipmi_ocp.c +++ b/src/drivers/ipmi/ocp/ipmi_ocp.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include @@ -28,8 +28,9 @@ static enum cb_err ipmi_set_ppin(struct device *dev) req.cpu1_lo = xeon_sp_ppin[1].lo; req.cpu1_hi = xeon_sp_ppin[1].hi; } - ret = ipmi_kcs_message(dev->path.pnp.port, IPMI_NETFN_OEM, 0x0, IPMI_OEM_SET_PPIN, - (const unsigned char *) &req, sizeof(req), (unsigned char *) &rsp, sizeof(rsp)); + ret = ipmi_message(dev->path.pnp.port, IPMI_NETFN_OEM, 0x0, IPMI_OEM_SET_PPIN, + (const unsigned char *) &req, sizeof(req), + (unsigned char *) &rsp, sizeof(rsp)); if (ret < sizeof(struct ipmi_rsp) || rsp.completion_code) { printk(BIOS_ERR, "IPMI: %s command failed (ret=%d resp=0x%x)\n", diff --git a/src/drivers/ipmi/ocp/ipmi_ocp_romstage.c b/src/drivers/ipmi/ocp/ipmi_ocp_romstage.c index 8e43d8d159e..7b0b9ea0360 100644 --- a/src/drivers/ipmi/ocp/ipmi_ocp_romstage.c +++ b/src/drivers/ipmi/ocp/ipmi_ocp_romstage.c @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ #include -#include +#include #include "ipmi_ocp.h" @@ -10,9 +10,9 @@ enum cb_err ipmi_set_post_start(const int port) int ret; struct ipmi_rsp rsp; - ret = ipmi_kcs_message(port, IPMI_NETFN_OEM, 0x0, - IPMI_BMC_SET_POST_START, NULL, 0, (u8 *) &rsp, - sizeof(rsp)); + ret = ipmi_message(port, IPMI_NETFN_OEM, 0x0, + IPMI_BMC_SET_POST_START, NULL, 0, (u8 *) &rsp, + sizeof(rsp)); if (ret < sizeof(struct ipmi_rsp) || rsp.completion_code) { printk(BIOS_ERR, "IPMI: %s command failed (ret=%d rsp=0x%x)\n", @@ -42,10 +42,10 @@ enum cb_err ipmi_set_cmos_clear(void) /* IPMI OEM get bios boot order command to check if the valid bit and the CMOS clear bit are both set from the response BootMode byte. */ - ret = ipmi_kcs_message(CONFIG_BMC_KCS_BASE, IPMI_NETFN_OEM, 0x0, - IPMI_OEM_GET_BIOS_BOOT_ORDER, - NULL, 0, - (unsigned char *) &rsp, sizeof(rsp)); + ret = ipmi_message(CONFIG_BMC_KCS_BASE, IPMI_NETFN_OEM, 0x0, + IPMI_OEM_GET_BIOS_BOOT_ORDER, + NULL, 0, + (unsigned char *) &rsp, sizeof(rsp)); if (ret < sizeof(struct ipmi_rsp) || rsp.resp.completion_code) { printk(BIOS_ERR, "IPMI: %s command failed (read ret=%d resp=0x%x)\n", @@ -56,10 +56,10 @@ enum cb_err ipmi_set_cmos_clear(void) if (!IS_CMOS_AND_VALID_BIT(rsp.data.boot_mode)) { req = rsp.data; SET_CMOS_AND_VALID_BIT(req.boot_mode); - ret = ipmi_kcs_message(CONFIG_BMC_KCS_BASE, IPMI_NETFN_OEM, 0x0, - IPMI_OEM_SET_BIOS_BOOT_ORDER, - (const unsigned char *) &req, sizeof(req), - (unsigned char *) &rsp, sizeof(rsp)); + ret = ipmi_message(CONFIG_BMC_KCS_BASE, IPMI_NETFN_OEM, 0x0, + IPMI_OEM_SET_BIOS_BOOT_ORDER, + (const unsigned char *) &req, sizeof(req), + (unsigned char *) &rsp, sizeof(rsp)); if (ret < sizeof(struct ipmi_rsp) || rsp.resp.completion_code) { printk(BIOS_ERR, "IPMI: %s command failed (sent ret=%d resp=0x%x)\n", diff --git a/src/drivers/ipmi/supermicro_oem.c b/src/drivers/ipmi/supermicro_oem.c index 9d5ffc77a46..7af4e3b8a47 100644 --- a/src/drivers/ipmi/supermicro_oem.c +++ b/src/drivers/ipmi/supermicro_oem.c @@ -3,7 +3,7 @@ #include #include -#include +#include #include #include #include "ipmi_supermicro_oem.h" @@ -35,9 +35,9 @@ static void set_coreboot_ver(const uint16_t kcs_port) bios_ver.str[i] = 0; bios_ver.ver = IPMI_LUN0_AC_SET_BIOS_VER; - ret = ipmi_kcs_message(kcs_port, IPMI_NETFN_OEM, 0, IPMI_LUN0_SET_BIOS_STRING, - (const unsigned char *) &bios_ver, sizeof(bios_ver), - (unsigned char *) &rsp, sizeof(rsp)); + ret = ipmi_message(kcs_port, IPMI_NETFN_OEM, 0, IPMI_LUN0_SET_BIOS_STRING, + (const unsigned char *) &bios_ver, sizeof(bios_ver), + (unsigned char *) &rsp, sizeof(rsp)); if (ret < sizeof(rsp) || rsp.completion_code) { printk(BIOS_ERR, "BMC_IPMI: %s command failed (ret=%d resp=0x%x)\n", __func__, ret, rsp.completion_code); @@ -54,9 +54,9 @@ static void set_coreboot_date(const uint16_t kcs_port) bios_ver.str[15] = 0; bios_ver.ver = IPMI_LUN0_AC_SET_BIOS_DATE; - ret = ipmi_kcs_message(kcs_port, IPMI_NETFN_OEM, 0, IPMI_LUN0_SET_BIOS_STRING, - (const unsigned char *) &bios_ver, sizeof(bios_ver), - (unsigned char *) &rsp, sizeof(rsp)); + ret = ipmi_message(kcs_port, IPMI_NETFN_OEM, 0, IPMI_LUN0_SET_BIOS_STRING, + (const unsigned char *) &bios_ver, sizeof(bios_ver), + (unsigned char *) &rsp, sizeof(rsp)); if (ret < sizeof(rsp) || rsp.completion_code) { printk(BIOS_ERR, "BMC_IPMI: %s command failed (ret=%d resp=0x%x)\n", __func__, ret, rsp.completion_code); diff --git a/src/mainboard/ocp/deltalake/ipmi.c b/src/mainboard/ocp/deltalake/ipmi.c index 25a5a27cba6..1695d73589c 100644 --- a/src/mainboard/ocp/deltalake/ipmi.c +++ b/src/mainboard/ocp/deltalake/ipmi.c @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ #include -#include +#include #include #include #include diff --git a/src/mainboard/ocp/deltalake/romstage.c b/src/mainboard/ocp/deltalake/romstage.c index 05a7188da41..2efed1ca84d 100644 --- a/src/mainboard/ocp/deltalake/romstage.c +++ b/src/mainboard/ocp/deltalake/romstage.c @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include #include @@ -188,7 +188,7 @@ void mainboard_memory_init_params(FSPM_UPD *mupd) /* Since it's the first IPMI command, it's better to run get BMC selftest result first */ - if (ipmi_kcs_premem_init(CONFIG_BMC_KCS_BASE, 0) == CB_SUCCESS) { + if (ipmi_premem_init(CONFIG_BMC_KCS_BASE, 0) == CB_SUCCESS) { ipmi_set_post_start(CONFIG_BMC_KCS_BASE); init_frb2_wdt(); } diff --git a/src/mainboard/ocp/tiogapass/ipmi.c b/src/mainboard/ocp/tiogapass/ipmi.c index 74f96fe36c5..e97341b2b67 100644 --- a/src/mainboard/ocp/tiogapass/ipmi.c +++ b/src/mainboard/ocp/tiogapass/ipmi.c @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ #include -#include +#include #include #include #include diff --git a/src/mainboard/ocp/tiogapass/romstage.c b/src/mainboard/ocp/tiogapass/romstage.c index 20c74660ced..842e977d3e1 100644 --- a/src/mainboard/ocp/tiogapass/romstage.c +++ b/src/mainboard/ocp/tiogapass/romstage.c @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include #include @@ -54,7 +54,7 @@ static void mainboard_config_iio(FSPM_UPD *mupd) void mainboard_memory_init_params(FSPM_UPD *mupd) { /* It's better to run get BMC selftest result first */ - if (ipmi_kcs_premem_init(CONFIG_BMC_KCS_BASE, 0) == CB_SUCCESS) { + if (ipmi_premem_init(CONFIG_BMC_KCS_BASE, 0) == CB_SUCCESS) { ipmi_set_post_start(CONFIG_BMC_KCS_BASE); init_frb2_wdt(); } From 12e24bcbdf438eaa5c6434c1a078a7ed84f84de3 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Fri, 22 Oct 2021 01:04:49 +0300 Subject: [PATCH 008/213] drivers/ipmi: add BT interface Unlike already implemented Keyboard Controller Style (KCS) interface Block Transfer interface is not byte-oriented and implies that device is capable of buffering command before processing it. Another difference is that polling can be replaced with interrupts, though not used by this implementation. Change-Id: Idb67972d1c38bbae04c7b4de3405350c229a05b9 Signed-off-by: Sergii Dmytruk --- Documentation/drivers/ipmi_bt.md | 34 +++++ src/drivers/ipmi/Kconfig | 28 ++++ src/drivers/ipmi/Makefile.inc | 10 ++ src/drivers/ipmi/ipmi_bt.c | 200 +++++++++++++++++++++++++++++ src/drivers/ipmi/ipmi_bt.h | 11 ++ src/drivers/ipmi/ipmi_bt_ops.c | 100 +++++++++++++++ src/drivers/ipmi/ipmi_ops_premem.c | 12 +- 7 files changed, 394 insertions(+), 1 deletion(-) create mode 100644 Documentation/drivers/ipmi_bt.md create mode 100644 src/drivers/ipmi/ipmi_bt.c create mode 100644 src/drivers/ipmi/ipmi_bt.h create mode 100644 src/drivers/ipmi/ipmi_bt_ops.c diff --git a/Documentation/drivers/ipmi_bt.md b/Documentation/drivers/ipmi_bt.md new file mode 100644 index 00000000000..18b97c6c8a0 --- /dev/null +++ b/Documentation/drivers/ipmi_bt.md @@ -0,0 +1,34 @@ +# IPMI BT driver + +The driver can be found in `src/drivers/ipmi/` (same as KCS). It works with BMC +that provide a BT I/O interface as specified in the [IPMI] standard. + +The driver detects the IPMI version and reserves the I/O space in coreboot's +resource allocator. + +## For developers + +To use the driver, select the `IPMI_BT` Kconfig and add the following PNP +device (in example for the BT at 0xe4): + +``` + chip drivers/ipmi + device pnp e4.0 on end # IPMI BT + end +``` + +**Note:** The I/O base address needs to be aligned to 4. + +The following registers can be set: + +* `wait_for_bmc` + * Boolean + * Wait for BMC to boot. This can be used if the BMC takes a long time to boot + after PoR. +* `bmc_boot_timeout` + * Integer + * The timeout in seconds to wait for the IPMI service to be loaded. + Will be used if wait_for_bmc is true. + + +[IPMI]: https://www.intel.com/content/dam/www/public/us/en/documents/product-briefs/ipmi-second-gen-interface-spec-v2-rev1-1.pdf diff --git a/src/drivers/ipmi/Kconfig b/src/drivers/ipmi/Kconfig index 012f67887c6..ef19ca43422 100644 --- a/src/drivers/ipmi/Kconfig +++ b/src/drivers/ipmi/Kconfig @@ -53,3 +53,31 @@ config DRIVERS_IPMI_SUPERMICRO_OEM The following features are implemented: * Communicates the BIOS version to the BMC * Communicates the BIOS date to the BMC + +config IPMI_BT + bool + default n + depends on !IPMI_KCS + +config IPMI_BT_ROMSTAGE + bool + default n + depends on IPMI_BT + help + IPMI BT support in romstage. + +config BMC_BT_BASE + hex + default 0xe4 + depends on IPMI_BT + help + The PNP base address of BMC BT. It must be equal to the + pnp port value defined in devicetree for chip drivers/ipmi. + +config IPMI_BT_TIMEOUT_MS + int + default 5000 + depends on IPMI_BT + help + The time unit is millisecond for each IPMI BT transfer. + The default is the same as for KCS as the implementation uses polling. diff --git a/src/drivers/ipmi/Makefile.inc b/src/drivers/ipmi/Makefile.inc index 85f3dde4374..83859c6c613 100644 --- a/src/drivers/ipmi/Makefile.inc +++ b/src/drivers/ipmi/Makefile.inc @@ -8,3 +8,13 @@ romstage-$(CONFIG_IPMI_KCS_ROMSTAGE) += ipmi_if.c romstage-$(CONFIG_IPMI_KCS_ROMSTAGE) += ipmi_ops_premem.c romstage-$(CONFIG_IPMI_KCS_ROMSTAGE) += ipmi_kcs.c romstage-$(CONFIG_IPMI_KCS_ROMSTAGE) += ipmi_ops.c + +ramstage-$(CONFIG_IPMI_BT) += ipmi_if.c +ramstage-$(CONFIG_IPMI_BT) += ipmi_bt.c +ramstage-$(CONFIG_IPMI_BT) += ipmi_bt_ops.c +ramstage-$(CONFIG_IPMI_BT) += ipmi_ops.c +ramstage-$(CONFIG_IPMI_BT) += ipmi_fru.c +romstage-$(CONFIG_IPMI_BT_ROMSTAGE) += ipmi_if.c +romstage-$(CONFIG_IPMI_BT_ROMSTAGE) += ipmi_ops_premem.c +romstage-$(CONFIG_IPMI_BT_ROMSTAGE) += ipmi_bt.c +romstage-$(CONFIG_IPMI_BT_ROMSTAGE) += ipmi_ops.c diff --git a/src/drivers/ipmi/ipmi_bt.c b/src/drivers/ipmi/ipmi_bt.c new file mode 100644 index 00000000000..0fefcef0db0 --- /dev/null +++ b/src/drivers/ipmi/ipmi_bt.c @@ -0,0 +1,200 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +/* + * IPMI specification: + * https://www.intel.com/content/www/us/en/servers/ipmi/ipmi-intelligent-platform-mgt-interface-spec-2nd-gen-v2-0-spec-update.html + * + * LUN seems to be always zero. + */ + +#include "ipmi_bt.h" + +#include +#include +#include +#include +#include +#include + +#include "ipmi_if.h" + +#define MAX_SIZE 255 +#define MAX_PAYLOAD_SIZE (MAX_SIZE - 4) + +#define BT_CTRL_INC 0 // Typical address of BT_CTRL is 0xE4 +#define HOST2BMC_INC 1 // Typical address of HOST2BMC is 0xE5 +#define BMC2HOST_INC 1 // Typical address of BMC2HOST is 0xE5 + +/* Bits of BT_CTRL */ +#define B_BUSY (1 << 7) +#define H_BUSY (1 << 6) +#define OEM0 (1 << 5) +#define EVT_ATN (1 << 4) +#define B2H_ATN (1 << 3) +#define H2B_ATN (1 << 2) +#define CLR_RD_PTR (1 << 1) +#define CLR_WR_PTR (1 << 0) + +static int wait_for_control_bit(uint16_t port, uint8_t bit, int set) +{ + uint16_t bt_ctrl_port = port + BT_CTRL_INC; + if (!wait_ms(CONFIG_IPMI_BT_TIMEOUT_MS, ((inb(bt_ctrl_port) & bit) != 0) == set)) { + printk(BIOS_ERR, "%s(0x%04x, 0x%02x, %d) timeout!\n", + __func__, port, bit, set); + return 1; + } + + return 0; +} + +int ipmi_bt_clear(uint16_t port) +{ + uint8_t bt_ctrl; + + /* + * First, set H_BUSY (if not set already) so BMC won't try to write new + * commands while we're resetting pointers. + */ + if ((inb(port + BT_CTRL_INC) & H_BUSY) == 0) + outb(H_BUSY, port + BT_CTRL_INC); + + /* If BMC is already in the process of writing, wait until it's done */ + if (wait_for_control_bit(port, B_BUSY, 0)) + return 1; + + bt_ctrl = inb(port + BT_CTRL_INC); + + printk(BIOS_SPEW, "BT_CTRL = %#2.2x\n", bt_ctrl); + + /* + * Clear all bits which are already set (they are either toggle bits or + * write-1-to-clear) and reset buffer pointers. This also clears H_BUSY. + */ + outb(bt_ctrl | CLR_RD_PTR | CLR_WR_PTR, port + BT_CTRL_INC); + + return 0; +} + +static int ipmi_bt_send(uint16_t port, uint8_t addr, uint8_t cmd, + const uint8_t *payload, uint8_t payload_len, + uint8_t seq_num) +{ + uint16_t i; + uint16_t len; + uint8_t buf[MAX_SIZE]; + + len = 3 + payload_len; + + buf[0] = len; + buf[1] = addr; + buf[2] = seq_num; + buf[3] = cmd; + memcpy(&buf[4], payload, payload_len); + + /* Wait for BMC to be available */ + if (wait_for_control_bit(port, B_BUSY, 0)) + return 1; + + /* Clear write pointer */ + outb(CLR_WR_PTR, port + BT_CTRL_INC); + + /* Send our message */ + for (i = 0; i < len + 1; ++i) + outb(buf[i], port + HOST2BMC_INC); + + /* Tell BMC to process the data */ + outb(H2B_ATN, port + BT_CTRL_INC); + + return 0; +} + +static int ipmi_bt_recv(uint16_t port, uint8_t addr, uint8_t cmd, + uint8_t *response, uint8_t response_len, + uint8_t seq_num) +{ + uint16_t i; + uint16_t len; + uint8_t buf[MAX_SIZE]; + + /* Wait for BMC's response */ + if (wait_for_control_bit(port, B2H_ATN, 1)) + return -1; + + /* Tell BMC that host is busy */ + outb(H_BUSY, port + BT_CTRL_INC); + + /* Acknowledge that response is being processed */ + outb(B2H_ATN, port + BT_CTRL_INC); + + /* Clear read pointer */ + outb(CLR_RD_PTR, port + BT_CTRL_INC); + + /* Receive response */ + len = inb(port + BMC2HOST_INC); + for (i = 0; i < len; ++i) + buf[i] = inb(port + BMC2HOST_INC); + + /* Indicate that the host is done working with the buffer */ + outb(H_BUSY, port + BT_CTRL_INC); + + if (buf[0] != addr) { + printk(BIOS_ERR, + "Invalid NETFN/LUN field in IPMI BT response: 0x%02x instead of 0x%02x\n", + buf[0], addr); + goto error; + } + if (buf[1] != seq_num) { + printk(BIOS_ERR, + "Invalid SEQ field in IPMI BT response: 0x%02x instead of 0x%02x\n", + buf[1], seq_num); + goto error; + } + if (buf[2] != cmd) { + printk(BIOS_ERR, + "Invalid CMD field in IPMI BT response: 0x%02x instead of 0x%02x\n", + buf[2], cmd); + goto error; + } + + if (response_len < len) + len = response_len; + + /* + * Copy response skipping sequence number to match KCS messages. + * Sequence number is really an implementation detail anyway. + */ + if (response_len != 0) + response[0] = buf[0]; + memcpy(&response[1], &buf[2], len - 1); + + return len; + +error: + printk(BIOS_ERR, " IPMI response length field: 0x%02x\n", len); + printk(BIOS_ERR, " IPMI netfn/lun: 0x%02x\n", addr); + printk(BIOS_ERR, " IPMI SEQ: 0x%02x\n", seq_num); + printk(BIOS_ERR, " IPMI command: 0x%02x\n", cmd); + return -1; +} + +int ipmi_message(int port, int netfn, int lun, int cmd, + const uint8_t *payload, int payload_len, + uint8_t *response, int response_len) +{ + static uint8_t seq_num = 0xff; + + uint8_t addr; + + assert(payload_len >= 0 && payload_len < MAX_PAYLOAD_SIZE); + assert(netfn >= 0 && netfn <= 0x3f); + assert(lun >= 0 && lun <= 0x3); + + addr = (netfn << 2) | (lun & 0x3); + if (ipmi_bt_send(port, addr, cmd, payload, payload_len, ++seq_num)) { + printk(BIOS_ERR, "Failed to send IPMI BT command 0x%02x\n", cmd); + return -1; + } + + addr = ((netfn + 1) << 2) | (lun & 0x3); + return ipmi_bt_recv(port, addr, cmd, response, response_len, seq_num); +} diff --git a/src/drivers/ipmi/ipmi_bt.h b/src/drivers/ipmi/ipmi_bt.h new file mode 100644 index 00000000000..afd4adaf894 --- /dev/null +++ b/src/drivers/ipmi/ipmi_bt.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __IPMI_BT_H +#define __IPMI_BT_H + +#include + +/* Drops events from BMC and resets state of the BT interface, returns zero on success. */ +int ipmi_bt_clear(uint16_t port); + +#endif /* __IPMI_BT_H */ diff --git a/src/drivers/ipmi/ipmi_bt_ops.c b/src/drivers/ipmi/ipmi_bt_ops.c new file mode 100644 index 00000000000..669ca8cff82 --- /dev/null +++ b/src/drivers/ipmi/ipmi_bt_ops.c @@ -0,0 +1,100 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +/* + * Place in devicetree.cb: + * + * chip drivers/ipmi + * device pnp e4.0 on end # IPMI BT + * end + */ + +#include +#include +#include + +#include "ipmi_if.h" +#include "ipmi_bt.h" + +static void ipmi_bt_init(struct device *dev) +{ + struct ipmi_devid_rsp rsp; + struct drivers_ipmi_config *conf = dev->chip_info; + + if (!conf) { + printk(BIOS_WARNING, "IPMI: chip_info is missing! Skip init.\n"); + return; + } + + printk(BIOS_DEBUG, "IPMI: PNP BT 0x%x\n", dev->path.pnp.port); + + if (ipmi_process_self_test_result(dev)) + dev->enabled = 0; + + if (!ipmi_get_device_id(dev, &rsp)) { + uint32_t man_id = 0; + uint32_t prod_id = 0; + + /* 4 bit encoding */ + u8 ipmi_revision_minor = IPMI_IPMI_VERSION_MINOR(rsp.ipmi_version); + u8 ipmi_revision_major = IPMI_IPMI_VERSION_MAJOR(rsp.ipmi_version); + + memcpy(&man_id, rsp.manufacturer_id, sizeof(rsp.manufacturer_id)); + + memcpy(&prod_id, rsp.product_id, sizeof(rsp.product_id)); + + printk(BIOS_INFO, "IPMI: Found man_id 0x%06x, prod_id 0x%04x\n", + man_id, prod_id); + + printk(BIOS_INFO, "IPMI: Version %01x.%01x\n", + ipmi_revision_major, ipmi_revision_minor); + } else { + dev->enabled = 0; + } + + if (ipmi_bt_clear(dev->path.pnp.port)) + dev->enabled = 0; +} + +static void ipmi_set_resources(struct device *dev) +{ + struct resource *res; + + for (res = dev->resource_list; res; res = res->next) { + if (!(res->flags & IORESOURCE_ASSIGNED)) + continue; + + res->flags |= IORESOURCE_STORED; + report_resource_stored(dev, res, ""); + } +} + +static void ipmi_read_resources(struct device *dev) +{ + struct resource *res = new_resource(dev, 0); + res->base = dev->path.pnp.port; + res->size = 3; + res->flags = IORESOURCE_IO | IORESOURCE_ASSIGNED | IORESOURCE_FIXED; +} + +static struct device_operations ops = { + .read_resources = ipmi_read_resources, + .set_resources = ipmi_set_resources, + .init = ipmi_bt_init, +}; + +static void enable_dev(struct device *dev) +{ + if (dev->path.type != DEVICE_PATH_PNP) + printk(BIOS_ERR, "%s: Unsupported device type\n", + dev_path(dev)); + else if (dev->path.pnp.port & 3) + printk(BIOS_ERR, "%s: Base address needs to be aligned to 4\n", + dev_path(dev)); + else + dev->ops = &ops; +} + +struct chip_operations drivers_ipmi_ops = { + CHIP_NAME("IPMI BT") + .enable_dev = enable_dev, +}; diff --git a/src/drivers/ipmi/ipmi_ops_premem.c b/src/drivers/ipmi/ipmi_ops_premem.c index 99c5842bb3d..3aae0a8a3d6 100644 --- a/src/drivers/ipmi/ipmi_ops_premem.c +++ b/src/drivers/ipmi/ipmi_ops_premem.c @@ -9,6 +9,10 @@ #include "ipmi_if.h" #include "chip.h" +#if CONFIG(IPMI_BT) +#include "ipmi_bt.h" +#endif + enum cb_err ipmi_premem_init(const u16 port, const u16 device) { const struct drivers_ipmi_config *conf = NULL; @@ -26,7 +30,8 @@ enum cb_err ipmi_premem_init(const u16 port, const u16 device) printk(BIOS_ERR, "IPMI: device is not enabled\n"); return CB_ERR; } - printk(BIOS_DEBUG, "IPMI: romstage PNP KCS 0x%x\n", dev->path.pnp.port); + printk(BIOS_DEBUG, "IPMI: romstage PNP %s 0x%x\n", + CONFIG(IPMI_KCS) ? "KCS" : "BT", dev->path.pnp.port); if (dev->chip_info) conf = dev->chip_info; @@ -49,5 +54,10 @@ enum cb_err ipmi_premem_init(const u16 port, const u16 device) if (ipmi_process_self_test_result(dev)) return CB_ERR; +#if CONFIG(IPMI_BT) + if (ipmi_bt_clear(dev->path.pnp.port)) + return CB_ERR; +#endif + return CB_SUCCESS; } From d11664f8b65335403a41ec5c76e9fa34967a6527 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Sat, 14 May 2022 23:59:49 +0300 Subject: [PATCH 009/213] payloads/external/skiboot/Makefile: fix output on `make clean` skiboot's Makefile always executes $(CC) to determine whether its clang or GCC and not setting CROSS for clean target results in this annoying output (assuming `powerpc64-linux-gcc` isn't available): make[2]: powerpc64-linux-gcc: No such file or directory Change-Id: I242b2d7c1bdf1bbd70fd4e4e0605341fe8301ca5 Signed-off-by: Sergii Dmytruk --- payloads/external/skiboot/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/payloads/external/skiboot/Makefile b/payloads/external/skiboot/Makefile index 5cf630ea254..e4db9bb5616 100644 --- a/payloads/external/skiboot/Makefile +++ b/payloads/external/skiboot/Makefile @@ -32,5 +32,6 @@ distclean: clean clean: # Redefine RM because it's used like `$(RM) non-existent-file` # Also ignore useless messages about removing test files - [ ! -d $(skiboot_dir) ] || $(MAKE) -C $(skiboot_dir) RM="rm -rf" clean > /dev/null + [ ! -d $(skiboot_dir) ] || \ + $(MAKE) -C $(skiboot_dir) RM="rm -rf" CROSS="$(skiboot_cross)" clean > /dev/null rm -rf $(build_dir) From 6ef621a7cb7aec5478fadad01d2c01f2a1447a82 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Tue, 18 Jan 2022 00:11:25 +0200 Subject: [PATCH 010/213] src/cpu/power9: move part of scom.h to scom.c Reset function, constants and include are not used outside of scom.c and not going to be. Change-Id: Iff4e98ae52c7099954f0c20fcb639eb87af15534 Signed-off-by: Sergii Dmytruk --- src/cpu/power9/scom.c | 34 ++++++++++++++++++---------------- src/include/cpu/power/scom.h | 5 ----- 2 files changed, 18 insertions(+), 21 deletions(-) diff --git a/src/cpu/power9/scom.c b/src/cpu/power9/scom.c index e55d149bff3..3e973e52e3b 100644 --- a/src/cpu/power9/scom.c +++ b/src/cpu/power9/scom.c @@ -4,6 +4,9 @@ #include // HMER #include +#define XSCOM_ADDR_IND_ADDR PPC_BITMASK(11, 31) +#define XSCOM_ADDR_IND_DATA PPC_BITMASK(48, 63) + #define XSCOM_DATA_IND_READ PPC_BIT(0) #define XSCOM_DATA_IND_COMPLETE PPC_BIT(32) #define XSCOM_DATA_IND_ERR PPC_BITMASK(33, 35) @@ -15,6 +18,21 @@ #define XSCOM_LOG_REG 0x00090012 #define XSCOM_ERR_REG 0x00090013 +static void reset_scom_engine(void) +{ + /* + * With cross-CPU SCOM accesses, first register should be cleared on the + * executing CPU, the other two on target CPU. In that case it may be + * necessary to do the remote writes in assembly directly to skip checking + * HMER and possibly end in a loop. + */ + write_scom_direct(XSCOM_RCVED_STAT_REG, 0); + write_scom_direct(XSCOM_LOG_REG, 0); + write_scom_direct(XSCOM_ERR_REG, 0); + clear_hmer(); + eieio(); +} + uint64_t read_scom_direct(uint64_t reg_address) { uint64_t val; @@ -117,19 +135,3 @@ uint64_t read_scom_indirect(uint64_t reg_address) return data & XSCOM_DATA_IND_DATA; } - -/* This function should be rarely called, don't make it inlined */ -void reset_scom_engine(void) -{ - /* - * With cross-CPU SCOM accesses, first register should be cleared on the - * executing CPU, the other two on target CPU. In that case it may be - * necessary to do the remote writes in assembly directly to skip checking - * HMER and possibly end in a loop. - */ - write_scom_direct(XSCOM_RCVED_STAT_REG, 0); - write_scom_direct(XSCOM_LOG_REG, 0); - write_scom_direct(XSCOM_ERR_REG, 0); - clear_hmer(); - eieio(); -} diff --git a/src/include/cpu/power/scom.h b/src/include/cpu/power/scom.h index f5354c83148..42eacc62483 100644 --- a/src/include/cpu/power/scom.h +++ b/src/include/cpu/power/scom.h @@ -30,13 +30,10 @@ // Higher bits specify indirect address #define XSCOM_ADDR_IND_FLAG PPC_BIT(0) -#define XSCOM_ADDR_IND_ADDR PPC_BITMASK(11, 31) -#define XSCOM_ADDR_IND_DATA PPC_BITMASK(48, 63) #ifndef __ASSEMBLER__ #include #include -#include // TODO: these are probably specific to POWER9 typedef enum { @@ -88,8 +85,6 @@ typedef enum { EC23_CHIPLET_ID = 0x37 ///< Core23 chiplet (Quad5, EX11, C1) } chiplet_id_t; -void reset_scom_engine(void); - uint64_t read_scom_direct(uint64_t reg_address); void write_scom_direct(uint64_t reg_address, uint64_t data); From c104b8b21af7f0e1b05214a419e9f0a0f3eb2418 Mon Sep 17 00:00:00 2001 From: Krystian Hebel Date: Tue, 23 Feb 2021 16:37:59 +0100 Subject: [PATCH 011/213] device/dram/ddr4.c: fill missing ECC info from SPD Change-Id: I80fccfa6d108b68d6f33a3d47766205b423a41ff Signed-off-by: Krystian Hebel --- src/device/dram/ddr4.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/device/dram/ddr4.c b/src/device/dram/ddr4.c index a66ee86fd15..813222cb88b 100644 --- a/src/device/dram/ddr4.c +++ b/src/device/dram/ddr4.c @@ -248,8 +248,11 @@ int spd_decode_ddr4(struct dimm_attr_ddr4_st *dimm, spd_raw_data spd) dimm->vdd_voltage = 1200; /* calculate size */ + /* FIXME: this is wrong for 3DS devices */ dimm->size_mb = cap_per_die_mbit / 8 * bus_width / sdram_width * dimm->ranks; + dimm->ecc_extension = spd[13] & SPD_ECC_8BIT; + /* make sure we have the manufacturing information block */ if (spd_bytes_used > 320) { dimm->manufacturer_id = (spd[351] << 8) | spd[350]; From 74530e0decf39d479e396fa51f9708661c457260 Mon Sep 17 00:00:00 2001 From: Krystian Hebel Date: Fri, 21 May 2021 13:49:38 +0200 Subject: [PATCH 012/213] device/dram: add DDR4 MRS commands Change-Id: I9d4f048c859bc89897d50a5a07468c3375aa1dcf Signed-off-by: Krystian Hebel --- src/device/dram/ddr4.c | 221 ++++++++++++++++++++++++ src/include/device/dram/ddr4.h | 298 +++++++++++++++++++++++++++++++++ 2 files changed, 519 insertions(+) diff --git a/src/device/dram/ddr4.c b/src/device/dram/ddr4.c index 813222cb88b..314bc6b7e42 100644 --- a/src/device/dram/ddr4.c +++ b/src/device/dram/ddr4.c @@ -328,3 +328,224 @@ enum cb_err spd_add_smbios17_ddr4(const u8 channel, const u8 slot, const u16 sel return CB_SUCCESS; } + +static mrs_cmd_t ddr4_wr_to_mr0_map(u8 wr) +{ + static const u16 enc[] = {0, 1, 2, 3, 4, 5, 7, 6, 8}; + int wr_idx = wr/2 - 5; + if (wr_idx < 0 || wr_idx >= ARRAY_SIZE(enc)) + die("WR out of bounds\n"); + + return enc[wr_idx] << 9; +} + +static mrs_cmd_t ddr4_cas_to_mr0_map(u8 cas) +{ + static const u16 enc[] = + { + /* V VVV V + *111111 + *5432109876543210 */ + 0b0000000000000000, /* CL = 9 */ + 0b0000000000000100, /* CL = 10 */ + 0b0000000000010000, /* CL = 11 */ + 0b0000000000010100, /* CL = 12 */ + 0b0000000000100000, /* CL = 13 */ + 0b0000000000100100, /* CL = 14 */ + 0b0000000000110000, /* CL = 15 */ + 0b0000000000110100, /* CL = 16 */ + 0b0000000001100100, /* CL = 17 */ + 0b0000000001000000, /* CL = 18 */ + 0b0000000001110000, /* CL = 19 */ + 0b0000000001000100, /* CL = 20 */ + 0b0000000001110100, /* CL = 21 */ + 0b0000000001010000, /* CL = 22 */ + 0b0000000001100000, /* CL = 23 */ + 0b0000000001010100, /* CL = 24 */ + 0b0001000000000000, /* CL = 25 */ + 0b0001000000000100, /* CL = 26 */ + 0b0001000000010000, /* CL = 27 (only 3DS) */ + 0b0001000000010100, /* CL = 28 */ + 0b0001000000100000, /* reserved for CL = 29 */ + 0b0001000000100100, /* CL = 30 */ + 0b0001000000110000, /* reserved for CL = 31 */ + 0b0001000000110100, /* CL = 32 */ + }; + + int cas_idx = cas - 9; + if (cas_idx < 0 || cas_idx >= ARRAY_SIZE(enc)) + die("CL out of bounds\n"); + + return enc[cas_idx]; +} + +mrs_cmd_t ddr4_get_mr0(u8 write_recovery, + enum ddr4_mr0_dll_reset dll_reset, + enum ddr4_mr0_mode mode, + u8 cas, + enum ddr4_mr0_burst_type burst_type, + enum ddr4_mr0_burst_length burst_length) +{ + mrs_cmd_t cmd = 0 << 20; + + cmd |= ddr4_wr_to_mr0_map(write_recovery); + cmd |= dll_reset << 8; + cmd |= mode << 7; + cmd |= ddr4_cas_to_mr0_map(cas); + cmd |= burst_type << 3; + cmd |= burst_length << 0; + + return cmd; +} + +mrs_cmd_t ddr4_get_mr1(enum ddr4_mr1_qoff qoff, + enum ddr4_mr1_tqds tqds, + enum ddr4_mr1_rtt_nom rtt_nom, + enum ddr4_mr1_write_leveling write_leveling, + enum ddr4_mr1_odimp output_drive_impedance, + enum ddr4_mr1_additive_latency additive_latency, + enum ddr4_mr1_dll dll_enable) +{ + mrs_cmd_t cmd = 1 << 20; + + cmd |= qoff << 12; + cmd |= tqds << 11; + cmd |= rtt_nom << 8; + cmd |= write_leveling << 7; + cmd |= output_drive_impedance << 1; + cmd |= additive_latency << 3; + cmd |= dll_enable << 0; + + return cmd; +} + +static mrs_cmd_t ddr4_cwl_to_mr2_map(u8 cwl) +{ + /* Encoding is (starting with 0): 9, 10, 11, 12, 14, 16, 18, 20 */ + if (cwl < 14) { + cwl -= 9; + } else { + cwl = (cwl - 14) / 2 + 4; + } + + return cwl << 3; +} + +mrs_cmd_t ddr4_get_mr2(enum ddr4_mr2_wr_crc wr_crc, + enum ddr4_mr2_rtt_wr rtt_wr, + enum ddr4_mr2_lp_asr self_refresh, u8 cwl) +{ + mrs_cmd_t cmd = 2 << 20; + + cmd |= wr_crc << 12; + cmd |= rtt_wr << 9; + cmd |= self_refresh << 6; + cmd |= ddr4_cwl_to_mr2_map(cwl); + + return cmd; +} + +mrs_cmd_t ddr4_get_mr3(enum ddr4_mr3_mpr_read_format mpr_read_format, + enum ddr4_mr3_wr_cmd_lat_crc_dm command_latency_crc_dm, + enum ddr4_mr3_fine_gran_ref fine_refresh, + enum ddr4_mr3_temp_sensor_readout temp_sensor, + enum ddr4_mr3_pda pda, + enum ddr4_mr3_geardown_mode geardown, + enum ddr4_mr3_mpr_operation mpr_operation, + u8 mpr_page) +{ + mrs_cmd_t cmd = 3 << 20; + + cmd |= mpr_read_format << 11; + cmd |= command_latency_crc_dm << 9; + cmd |= fine_refresh << 6; + cmd |= temp_sensor << 5; + cmd |= pda << 4; + cmd |= geardown << 3; + cmd |= mpr_operation << 2; + cmd |= (mpr_page & 3) << 0; + + return cmd; +} + +mrs_cmd_t ddr4_get_mr4(enum ddr4_mr4_hppr hppr, + enum ddr4_mr4_wr_preamble wr_preamble, + enum ddr4_mr4_rd_preamble rd_preamble, + enum ddr4_mr4_rd_preamble_training rd_preamble_train, + enum ddr4_mr4_self_refr_abort self_ref_abrt, + enum ddr4_mr4_cs_to_cmd_latency cs2cmd_lat, + enum ddr4_mr4_sppr sppr, + enum ddr4_mr4_internal_vref_mon int_vref_mon, + enum ddr4_mr4_temp_controlled_refr temp_ctrl_ref, + enum ddr4_mr4_max_pd_mode max_pd) +{ + mrs_cmd_t cmd = 4 << 20; + + cmd |= hppr << 13; + cmd |= wr_preamble << 12; + cmd |= rd_preamble << 11; + cmd |= rd_preamble_train << 10; + cmd |= self_ref_abrt << 9; + cmd |= cs2cmd_lat << 6; + cmd |= sppr << 5; + cmd |= int_vref_mon << 4; + cmd |= temp_ctrl_ref << 2; + cmd |= max_pd << 1; + + return cmd; +} + +mrs_cmd_t ddr4_get_mr5(enum ddr4_mr5_rd_dbi rd_dbi, + enum ddr4_mr5_wr_dbi wr_dbi, + enum ddr4_mr5_data_mask dm, + enum ddr4_mr5_rtt_park rtt_park, + enum ddr4_mr5_odt_pd odt_pd, + enum ddr4_mr5_ca_parity_lat pl) +{ + mrs_cmd_t cmd = 5 << 20; + + cmd |= rd_dbi << 12; + cmd |= wr_dbi << 11; + cmd |= dm << 10; + cmd |= rtt_park << 6; + cmd |= odt_pd << 5; + cmd |= pl << 0; + + return cmd; +} + +static mrs_cmd_t ddr4_tccd_l_to_mr6_map(u8 tccd_l) +{ + if (tccd_l < 4 || tccd_l > 8) + die("tCCD_l out of range\n"); + + return (tccd_l - 4) << 10; +} + +mrs_cmd_t ddr4_get_mr6(u8 tccd_l, + enum ddr4_mr6_vrefdq_training vrefdq_training, + enum ddr4_mr6_vrefdq_training_range range, + u8 vrefdq_value) +{ + mrs_cmd_t cmd = 6 << 20; + + cmd |= ddr4_tccd_l_to_mr6_map(tccd_l); + cmd |= vrefdq_training << 7; + cmd |= range << 6; + cmd |= vrefdq_value & 0x3F; + + return cmd; +} + +/* + * ZQCL: A16 = H, A15 = H, A14 = L, A10 = H, rest either L or H + * ZQCS: A16 = H, A15 = H, A14 = L, A10 = L, rest either L or H + */ +mrs_cmd_t ddr4_get_zqcal_cmd(enum ddr4_zqcal_ls long_short) +{ + mrs_cmd_t cmd = 1 << 16 | 1 << 15; + + cmd |= long_short << 10; + + return cmd; +} diff --git a/src/include/device/dram/ddr4.h b/src/include/device/dram/ddr4.h index ee7a1ea5f87..b185b36601f 100644 --- a/src/include/device/dram/ddr4.h +++ b/src/include/device/dram/ddr4.h @@ -74,4 +74,302 @@ enum cb_err spd_add_smbios17_ddr4(const u8 channel, const u8 slot, */ uint16_t ddr4_speed_mhz_to_reported_mts(uint16_t speed_mhz); +/** + * \brief Representation of an MRS command + * + * This represents an MRS command as seen by the DIMM. This is not a memory + * address that can be read to generate an MRS command. The mapping of CPU + * to memory pins is hardware-dependent. + * \n + * The idea is to generalize the MRS code, and only need a hardware-specific + * function to map the MRS bits to CPU address bits. An MRS command can be + * sent like: + * @code{.c} + * u32 addr; + * mrs_cmd_t mrs; + * chipset_enable_mrs_command_mode(); + * mrs = ddr4_get_mr0(rtt_wr, srt, asr, cwl) + * if (rank_has_mirrorred_pins) + * mrs = ddr4_mrs_mirror_pins(mrs); + * addr = chipset_specific_get_mrs_addr(mrs); + * volatile_read(addr); + * @endcode + * + * The MRS representation has the following structure: + * - cmd[17:0] = Address pins A[13:0] + * - cmd[21:20] = Bank address BA[1:0] + * - cmd[23:22] = Bank group BG[1:0] + * + * Address pins A[16:14] are always low for MRS commands. A17 is reserved for + * future use, cmd[19:18] is left as a placeholder in case it is needed. + */ +typedef u32 mrs_cmd_t; + +/* Swap A3<->A4, A5<->A6, A7<->A8, A11<->A13, BA0<->BA1, BG0<->BG1 */ +static inline mrs_cmd_t ddr4_mrs_mirror_pins(mrs_cmd_t x) +{ + x = (x & 0x5000A8) << 1 | + (x & 0xA00150) >> 1 | + (x & ~0xF001F8); + x = (x & 0x000800) << 2 | + (x & 0x002000) >> 2 | + (x & ~0x002800); + + return x; +} + +enum ddr4_mr0_mode { + DDR4_MR0_MODE_NORMAL = 0, + DDR4_MR0_MODE_TEST = 1, +}; +enum ddr4_mr0_dll_reset { + DDR4_MR0_DLL_RESET_NO = 0, + DDR4_MR0_DLL_RESET_YES = 1, +}; +enum ddr4_mr0_burst_type { + DDR4_MR0_BURST_TYPE_SEQUENTIAL = 0, + DDR4_MR0_BURST_TYPE_INTERLEAVED = 1, +}; +enum ddr4_mr0_burst_length { + DDR4_MR0_BURST_LENGTH_FIXED_8 = 0, + DDR4_MR0_BURST_LENGTH_OTF = 1, + DDR4_MR0_BURST_LENGTH_FIXED_4 = 2, +}; + +mrs_cmd_t ddr4_get_mr0(u8 write_recovery, + enum ddr4_mr0_dll_reset dll_reset, + enum ddr4_mr0_mode mode, + u8 cas, + enum ddr4_mr0_burst_type burst_type, + enum ddr4_mr0_burst_length burst_length); + +enum ddr4_mr1_qoff { + DDR4_MR1_QOFF_ENABLE = 0, + DDR4_MR1_QOFF_DISABLE = 1, +}; +enum ddr4_mr1_tqds { + DDR4_MR1_TQDS_DISABLE = 0, + DDR4_MR1_TQDS_ENABLE = 1, +}; +enum ddr4_mr1_rtt_nom { + DDR4_MR1_RTT_NOM_OFF = 0, + DDR4_MR1_RTT_NOM_RZQ_4 = 1, + DDR4_MR1_RTT_NOM_RZQ_2 = 2, + DDR4_MR1_RTT_NOM_RZQ_6 = 3, + DDR4_MR1_RTT_NOM_RZQ_1 = 4, + DDR4_MR1_RTT_NOM_RZQ_5 = 5, + DDR4_MR1_RTT_NOM_RZQ_3 = 6, + DDR4_MR1_RTT_NOM_RZQ_7 = 7, +}; +enum ddr4_mr1_write_leveling { + DDR4_MR1_WRLVL_DISABLE = 0, + DDR4_MR1_WRLVL_ENABLE = 1, +}; +enum ddr4_mr1_additive_latency { + DDR4_MR1_AL_DISABLE = 0, + DDR4_MR1_AL_CL_MINUS_1 = 1, + DDR4_MR1_AL_CL_MINUS_2 = 2, +}; +enum ddr4_mr1_odimp { + DDR4_MR1_ODIMP_RZQ_7 = 0, + DDR4_MR1_ODIMP_RZQ_5 = 1, +}; +enum ddr4_mr1_dll { + DDR4_MR1_DLL_DISABLE = 0, + DDR4_MR1_DLL_ENABLE = 1, +}; + +mrs_cmd_t ddr4_get_mr1(enum ddr4_mr1_qoff qoff, + enum ddr4_mr1_tqds tqds, + enum ddr4_mr1_rtt_nom rtt_nom, + enum ddr4_mr1_write_leveling write_leveling, + enum ddr4_mr1_odimp output_drive_impedance, + enum ddr4_mr1_additive_latency additive_latency, + enum ddr4_mr1_dll dll_enable); + +enum ddr4_mr2_wr_crc { + DDR4_MR2_WR_CRC_DISABLE = 0, + DDR4_MR2_WR_CRC_ENABLE = 1, +}; +enum ddr4_mr2_rtt_wr { + DDR4_MR2_RTT_WR_OFF = 0, + DDR4_MR2_RTT_WR_RZQ_2 = 1, + DDR4_MR2_RTT_WR_RZQ_1 = 2, + DDR4_MR2_RTT_WR_HI_Z = 3, + DDR4_MR2_RTT_WR_RZQ_3 = 4, +}; +enum ddr4_mr2_lp_asr { + DDR4_MR2_ASR_MANUAL_NORMAL_RANGE = 0, + DDR4_MR2_ASR_MANUAL_REDUCED_RANGE = 1, + DDR4_MR2_ASR_MANUAL_EXTENDED_RANGE = 2, + DDR4_MR2_ASR_AUTO = 3, +}; + +mrs_cmd_t ddr4_get_mr2(enum ddr4_mr2_wr_crc wr_crc, + enum ddr4_mr2_rtt_wr rtt_wr, + enum ddr4_mr2_lp_asr self_refresh, u8 cwl); + +enum ddr4_mr3_mpr_read_format { + DDR4_MR3_MPR_SERIAL = 0, + DDR4_MR3_MPR_PARALLEL = 1, + DDR4_MR3_MPR_STAGGERED = 2, +}; +enum ddr4_mr3_wr_cmd_lat_crc_dm { + DDR4_MR3_CRC_DM_4 = 0, + DDR4_MR3_CRC_DM_5 = 1, + DDR4_MR3_CRC_DM_6 = 2, +}; +enum ddr4_mr3_fine_gran_ref { + DDR4_MR3_FINE_GRAN_REF_NORMAL = 0, + DDR4_MR3_FINE_GRAN_REF_FIXED_2 = 1, + DDR4_MR3_FINE_GRAN_REF_FIXED_4 = 2, + /* Two reserved values */ + DDR4_MR3_FINE_GRAN_REF_OTF_2 = 5, + DDR4_MR3_FINE_GRAN_REF_OTF_4 = 6, +}; +enum ddr4_mr3_temp_sensor_readout { + DDR4_MR3_TEMP_SENSOR_DISABLE = 0, + DDR4_MR3_TEMP_SENSOR_ENABLE = 1, +}; +enum ddr4_mr3_pda { + DDR4_MR3_PDA_DISABLE = 0, + DDR4_MR3_PDA_ENABLE = 1, +}; +enum ddr4_mr3_geardown_mode { + DDR4_MR3_GEARDOWN_1_2_RATE = 0, + DDR4_MR3_GEARDOWN_1_4_RATE = 1, +}; +enum ddr4_mr3_mpr_operation { + DDR4_MR3_MPR_NORMAL = 0, + DDR4_MR3_MPR_MPR = 1, +}; + +mrs_cmd_t ddr4_get_mr3(enum ddr4_mr3_mpr_read_format mpr_read_format, + enum ddr4_mr3_wr_cmd_lat_crc_dm command_latency_crc_dm, + enum ddr4_mr3_fine_gran_ref fine_refresh, + enum ddr4_mr3_temp_sensor_readout temp_sensor, + enum ddr4_mr3_pda pda, + enum ddr4_mr3_geardown_mode geardown, + enum ddr4_mr3_mpr_operation mpr_operation, + u8 mpr_page); + +enum ddr4_mr4_hppr { + DDR4_MR4_HPPR_DISABLE = 0, + DDR4_MR4_HPPR_ENABLE = 1, +}; +enum ddr4_mr4_wr_preamble { + DDR4_MR4_WR_PREAMBLE_1 = 0, + DDR4_MR4_WR_PREAMBLE_2 = 1, +}; +enum ddr4_mr4_rd_preamble { + DDR4_MR4_RD_PREAMBLE_1 = 0, + DDR4_MR4_RD_PREAMBLE_2 = 1, +}; +enum ddr4_mr4_rd_preamble_training { + DDR4_MR4_RD_PREAMBLE_TRAINING_DISABLE = 0, + DDR4_MR4_RD_PREAMBLE_TRAINING_ENABLE = 1, +}; +enum ddr4_mr4_self_refr_abort { + DDR4_MR4_SELF_REFRESH_ABORT_DISABLE = 0, + DDR4_MR4_SELF_REFRESH_ABORT_ENABLE = 1, +}; +enum ddr4_mr4_cs_to_cmd_latency { + DDR4_MR4_CS_TO_CMD_LAT_DISABLE = 0, + DDR4_MR4_CS_TO_CMD_LAT_3 = 1, + DDR4_MR4_CS_TO_CMD_LAT_4 = 2, + DDR4_MR4_CS_TO_CMD_LAT_5 = 3, + DDR4_MR4_CS_TO_CMD_LAT_6 = 4, + DDR4_MR4_CS_TO_CMD_LAT_8 = 5, +}; +enum ddr4_mr4_sppr { + DDR4_MR4_SPPR_DISABLE = 0, + DDR4_MR4_SPPR_ENABLE = 1, +}; +enum ddr4_mr4_internal_vref_mon { + DDR4_MR4_INTERNAL_VREF_MON_DISABLE = 0, + DDR4_MR4_INTERNAL_VREF_MON_ENABLE = 1, +}; +enum ddr4_mr4_temp_controlled_refr { + DDR4_MR4_TEMP_CONTROLLED_REFR_DISABLE = 0, + DDR4_MR4_TEMP_CONTROLLED_REFR_NORMAL = 2, + DDR4_MR4_TEMP_CONTROLLED_REFR_EXTENDED = 3, +}; +enum ddr4_mr4_max_pd_mode { + DDR4_MR4_MAX_PD_MODE_DISABLE = 0, + DDR4_MR4_MAX_PD_MODE_ENABLE = 1, +}; + +mrs_cmd_t ddr4_get_mr4(enum ddr4_mr4_hppr hppr, + enum ddr4_mr4_wr_preamble wr_preamble, + enum ddr4_mr4_rd_preamble rd_preamble, + enum ddr4_mr4_rd_preamble_training rd_preamble_train, + enum ddr4_mr4_self_refr_abort self_ref_abrt, + enum ddr4_mr4_cs_to_cmd_latency cs2cmd_lat, + enum ddr4_mr4_sppr sppr, + enum ddr4_mr4_internal_vref_mon int_vref_mon, + enum ddr4_mr4_temp_controlled_refr temp_ctrl_ref, + enum ddr4_mr4_max_pd_mode max_pd); + +enum ddr4_mr5_rd_dbi { + DDR4_MR5_RD_DBI_DISABLE = 0, + DDR4_MR5_RD_DBI_ENABLE = 1, +}; +enum ddr4_mr5_wr_dbi { + DDR4_MR5_WR_DBI_DISABLE = 0, + DDR4_MR5_WR_DBI_ENABLE = 1, +}; +enum ddr4_mr5_data_mask { + DDR4_MR5_DATA_MASK_DISABLE = 0, + DDR4_MR5_DATA_MASK_ENABLE = 1, +}; +enum ddr4_mr5_rtt_park { + DDR4_MR5_RTT_PARK_OFF = 0, + DDR4_MR5_RTT_PARK_RZQ_4 = 1, + DDR4_MR5_RTT_PARK_RZQ_2 = 2, + DDR4_MR5_RTT_PARK_RZQ_6 = 3, + DDR4_MR5_RTT_PARK_RZQ_1 = 4, + DDR4_MR5_RTT_PARK_RZQ_5 = 5, + DDR4_MR5_RTT_PARK_RZQ_3 = 6, + DDR4_MR5_RTT_PARK_RZQ_7 = 7, +}; +enum ddr4_mr5_odt_pd { + DDR4_MR5_ODT_PD_ACTIVADED = 0, + DDR4_MR5_ODT_PD_DEACTIVADED = 1, +}; +enum ddr4_mr5_ca_parity_lat { + DDR4_MR5_CA_PARITY_LAT_DISABLE = 0, + DDR4_MR5_CA_PARITY_LAT_4 = 1, /* 1600-2133 MT/s */ + DDR4_MR5_CA_PARITY_LAT_5 = 2, /* 2400-2666 MT/s */ + DDR4_MR5_CA_PARITY_LAT_6 = 3, /* 2933-3200 MT/s */ + DDR4_MR5_CA_PARITY_LAT_8 = 4, /* RFU */ +}; + +mrs_cmd_t ddr4_get_mr5(enum ddr4_mr5_rd_dbi rd_dbi, + enum ddr4_mr5_wr_dbi wr_dbi, + enum ddr4_mr5_data_mask dm, + enum ddr4_mr5_rtt_park rtt_park, + enum ddr4_mr5_odt_pd odt_pd, + enum ddr4_mr5_ca_parity_lat pl); + +enum ddr4_mr6_vrefdq_training { + DDR4_MR6_VREFDQ_TRAINING_DISABLE = 0, + DDR4_MR6_VREFDQ_TRAINING_ENABLE = 1, +}; +enum ddr4_mr6_vrefdq_training_range { + DDR4_MR6_VREFDQ_TRAINING_RANGE_1 = 0, /* 60% to 92.50% in 0.65% steps */ + DDR4_MR6_VREFDQ_TRAINING_RANGE_2 = 1, /* 40% to 77.50% in 0.65% steps */ +}; + +mrs_cmd_t ddr4_get_mr6(u8 tccd_l, + enum ddr4_mr6_vrefdq_training vrefdq_training, + enum ddr4_mr6_vrefdq_training_range range, + u8 vrefdq_value); + +enum ddr4_zqcal_ls { + DDR4_ZQCAL_SHORT = 0, + DDR4_ZQCAL_LONG = 1, +}; + +mrs_cmd_t ddr4_get_zqcal_cmd(enum ddr4_zqcal_ls long_short); + #endif /* DEVICE_DRAM_DDR4L_H */ From 56331ecf6801d7722b2ef4b7b91b59cd0664c4f2 Mon Sep 17 00:00:00 2001 From: Krystian Hebel Date: Thu, 25 Mar 2021 15:05:27 +0100 Subject: [PATCH 013/213] device/dram: add RCD I2C access functions Change-Id: Ie4e6cfaeae16aba1853b33d527eddebadfbd3887 Signed-off-by: Krystian Hebel --- src/device/dram/Makefile.inc | 2 +- src/device/dram/rcd.c | 214 ++++++++++++++++++++++++++++++++++ src/include/device/dram/rcd.h | 61 ++++++++++ 3 files changed, 276 insertions(+), 1 deletion(-) create mode 100644 src/device/dram/rcd.c create mode 100644 src/include/device/dram/rcd.h diff --git a/src/device/dram/Makefile.inc b/src/device/dram/Makefile.inc index b91b128f85c..04c25759d60 100644 --- a/src/device/dram/Makefile.inc +++ b/src/device/dram/Makefile.inc @@ -1,3 +1,3 @@ -romstage-y += lpddr4.c ddr4.c ddr3.c ddr2.c ddr_common.c +romstage-y += lpddr4.c ddr4.c ddr3.c ddr2.c ddr_common.c rcd.c ramstage-y += lpddr4.c ddr4.c ddr3.c ddr2.c ddr_common.c spd.c diff --git a/src/device/dram/rcd.c b/src/device/dram/rcd.c new file mode 100644 index 00000000000..588bed79067 --- /dev/null +++ b/src/device/dram/rcd.c @@ -0,0 +1,214 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include +#include + +/** + * + * Registering Clock Driver (RCD) is responsible for driving address and control + * nets on RDIMM and LRDIMM applications. Its operation is configurable by a set + * of Register Control Words (RCWs). There are two ways of accessing RCWs: + * in-band on the memory channel as an MRS commands ("MR7") or through I2C. + * + * From JESD82-31: "For changes to the control word setting, (...) the + * controller needs to wait tMRD after _the last control word access_, before + * further access _to the DRAM_ can take place". MRS is passed to rank 0 of the + * DRAM, but MR7 is reserved so it is ignored by DRAM. tMRD (8nCK) applies here, + * unless longer delay is needed for RCWs which control the clock timing (see + * JESD82-31 for list of such). This makes sense from DRAMs point of view, + * however we are talking to the Registering Clock Driver (RCD), not DRAM. From + * parts marked in the sentence above one may assume that only one delay at the + * end is necessary and RCWs can be written back to back; however, in the same + * document in table 141 tMRD is defined as "Number of clock cycles between two + * control word accesses, MRS accesses, or any DRAM commands". + * + * I2C access to RCWs is required to support byte (8b), word (16b) and double + * word (32b) write size. Bigger blocks are not required. Reads must always be + * 32b, 32b-aligned blocks, even when reading just one RCW. RCD ignores the two + * lowest bits so unaligned accesses would return shifted values. RCWs are + * tightly packed in I2C space, so it is not possible to write just one 4b RCW + * without writing its neighbor. This is especially important for F0RC06, + * Command Space Control Word, as it it able to reset the state of RCD. For this + * reason, the mentioned register has NOP command (all 1's). JESD82-31 does not + * specify timeouts required for such multi-RCWs writes, or any other writes. + * These are not MRS accesses, so it would be strange to apply those timeouts. + * Perhaps only the registers that actually change the clock settings require + * time to stabilize. On the other hand, I2C is relatively slow, so it is + * possible that the write itself is long enough. + * + * RCD I2C address is 0xBx (or 0x58 + DIMM number, depending on convention), it + * is located on the same bus as SPD. It uses a bus command encoding, see + * section 3.3 in JESD82-31 for description of reading and writing register + * values. + * + * This file includes only functions for access through I2C - it is generic, + * while MRS commands are passed to memory controller registers in an + * implementation specific way. + */ + +#define RCD_CMD_BEGIN 0x80 +#define RCD_CMD_END 0x40 +#define RCD_CMD_PEC 0x10 +#define RCD_CMD_RD_DWORD 0x00 +#define RCD_CMD_WR_BYTE 0x04 +#define RCD_CMD_WR_WORD 0x08 +#define RCD_CMD_WR_DWORD 0x0C +#define RCD_CMD_BUS_BYTE 0x00 +#define RCD_CMD_BUS_BLOCK 0x02 + +/* Shorthand for block transfers */ +#define RCD_CMD_BLOCK (RCD_CMD_BEGIN | RCD_CMD_END | RCD_CMD_BUS_BLOCK) + +/* Excluding size of data */ +#define RCD_CMD_BYTES 4 + +/* Use byte fields to get rid of endianness issues. */ +struct rcd_i2c_cmd { + uint8_t cmd; + uint8_t bytes; /* From next byte up to PEC (excluding) */ + uint8_t reserved; + uint8_t devfun; + uint8_t reg_h; + uint8_t reg_l; + union { /* Not used for reads, can use 1, 2 or 4 for writes */ + uint8_t bdata[4]; + uint16_t wdata[2]; + uint32_t ddata; + }; + /* Optional PEC */ +} __packed; + +#define RCD_STS_SUCCESS 0x01 +#define RCD_STS_INTERNAL_TARGET_ABORT 0x10 + +/* Always 4 bytes data + status (for block commands) */ +#define RCD_RSP_BYTES 5 + +struct rcd_i2c_rsp { + uint8_t bytes; /* From next byte up to PEC (excluding) */ + uint8_t status; + union { + uint8_t bdata[4]; + uint32_t ddata; + }; + /* Optional PEC */ +} __packed; + +static inline int rcd_readd(unsigned int bus, uint8_t slave, uint8_t reg, + uint32_t *data) +{ + struct i2c_msg seg[2]; + struct rcd_i2c_cmd cmd = { + .cmd = RCD_CMD_BLOCK | RCD_CMD_RD_DWORD, + .bytes = RCD_CMD_BYTES, + .reg_l = reg + }; + struct rcd_i2c_rsp rsp = { 0xaa, 0x55 }; + + seg[0].flags = 0; + seg[0].slave = slave; + seg[0].buf = (uint8_t *)&cmd; + seg[0].len = cmd.bytes + 2; /* + .cmd and .bytes fields */ + + i2c_transfer(bus, seg, 1); + + seg[0].len = 1; /* Send just the command again */ + seg[1].flags = I2C_M_RD; + seg[1].slave = slave; + seg[1].buf = (uint8_t *)&rsp; + seg[1].len = RCD_RSP_BYTES + 1; /* + .bytes field */ + + i2c_transfer(bus, seg, ARRAY_SIZE(seg)); + + /* Data is sent MSB to LSB, i.e. higher registers to lower, reverse it. */ + *data = swab32(rsp.ddata); + + return rsp.status == RCD_STS_SUCCESS; +} + +static inline int rcd_writed(unsigned int bus, uint8_t slave, uint8_t reg, + uint32_t data) +{ + struct i2c_msg seg; + struct rcd_i2c_cmd cmd = { + .cmd = RCD_CMD_BLOCK | RCD_CMD_WR_DWORD, + .bytes = RCD_CMD_BYTES + sizeof(data), + .reg_l = reg, + .ddata = swab32(data) + }; + + seg.flags = 0; + seg.slave = slave; + seg.buf = (uint8_t *)&cmd; + seg.len = cmd.bytes + 2; /* + .cmd and .bytes fields */ + + return i2c_transfer(bus, &seg, 1); +} + +static inline int rcd_writeb(unsigned int bus, uint8_t slave, uint8_t reg, + uint8_t data) +{ + struct i2c_msg seg; + struct rcd_i2c_cmd cmd = { + .cmd = RCD_CMD_BLOCK | RCD_CMD_WR_BYTE, + .bytes = RCD_CMD_BYTES + sizeof(data), + .reg_l = reg, + .bdata[0] = data + }; + + seg.flags = 0; + seg.slave = slave; + seg.buf = (uint8_t *)&cmd; + seg.len = cmd.bytes + 2; /* + .cmd and .bytes fields */ + + return i2c_transfer(bus, &seg, 1); +} + +int rcd_write_reg(unsigned int bus, uint8_t slave, enum rcw_idx reg, + uint8_t data) +{ + if (reg < F0RC00_01 || reg > F0RCFx) { + printk(BIOS_ERR, "Trying to write to illegal RCW %#2.2x\n", + reg); + return 0; + } + + return rcd_writeb(bus, slave, reg, data); +} + +int rcd_write_32b(unsigned int bus, uint8_t slave, enum rcw_idx reg, + uint32_t data) +{ + if (reg < F0RC00_01 || reg > F0RCFx) { + printk(BIOS_ERR, "Trying to write to illegal RCW %#2.2x\n", + reg); + return 0; + } + + if (reg & 3) { + /* + * RCD would silently mask out the lowest bits, assume that this + * is not what caller wanted. + */ + printk(BIOS_ERR, "Unaligned RCW %#2.2x, aborting\n", reg); + return 0; + } + + return rcd_writed(bus, slave, reg, data); +} + +void dump_rcd(unsigned int bus, u8 addr) +{ + /* Can only read in 32b chunks */ + uint8_t buf[RCW_ALL_ALIGNED]; + int i; + + for (i = 0; i < RCW_ALL_ALIGNED; i += sizeof(uint32_t)) { + rcd_readd(bus, addr, i, (uint32_t *) &buf[i]); + } + + printk(BIOS_DEBUG, "RCD dump for I2C address %#2.2x:\n", addr); + hexdump(buf, sizeof(buf)); +} diff --git a/src/include/device/dram/rcd.h b/src/include/device/dram/rcd.h new file mode 100644 index 00000000000..86655dd7154 --- /dev/null +++ b/src/include/device/dram/rcd.h @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef DEVICE_DRAM_RCD_H +#define DEVICE_DRAM_RCD_H + +#include +#include +#include + +/* Maybe these should land in types.h */ +typedef uint32_t le_uint32_t; +typedef uint16_t le_uint16_t; + +enum rcw_idx { + VEN_ID_L, + VEN_ID_H, + DEV_ID_L, + DEV_ID_H, + REV_ID, + RES_05, + RES_06, + RES_07, + F0RC00_01, + F0RC02_03, + F0RC04_05, + F0RC06_07, + F0RC08_09, + F0RC0A_0B, + F0RC0C_0D, + F0RC0E_0F, + F0RC1x, + F0RC2x, + F0RC3x, + F0RC4x, + F0RC5x, + F0RC6x, + F0RC7x, + F0RC8x, + F0RC9x, + F0RCAx, + F0RCBx, + F0RCCx, + F0RCDx, + F0RCEx, + F0RCFx, + RCW_ALL, /* Total num of bytes */ + RCW_ALL_ALIGNED /* Total num of bytes after aligning to 4B */ +}; + +_Static_assert(RCW_ALL_ALIGNED % sizeof(uint32_t) == 0, + "RCW_ALL_ALIGNED is not aligned"); + +int rcd_write_reg(unsigned int bus, uint8_t slave, enum rcw_idx reg, + uint8_t data); +int rcd_write_32b(unsigned int bus, uint8_t slave, enum rcw_idx reg, + le_uint32_t data); + + +void dump_rcd(unsigned int bus, uint8_t addr); + +#endif /* DEVICE_DRAM_RCD_H */ From aca79345d811a757b591b07a40a214ad1f87777a Mon Sep 17 00:00:00 2001 From: Krystian Hebel Date: Fri, 9 Oct 2020 19:11:05 +0200 Subject: [PATCH 014/213] arch/ppc64/rom_media.c: move to mainboard/emulation/qemu-power* CBFS location in memory is different than on the real hardware. Change-Id: Icd806a57f449042c883b624056c05c1ff7e4c17e Signed-off-by: Krystian Hebel --- src/arch/ppc64/Makefile.inc | 3 --- src/mainboard/emulation/qemu-power8/Makefile.inc | 3 +++ .../emulation/qemu-power8}/rom_media.c | 0 src/mainboard/emulation/qemu-power9/Makefile.inc | 4 ++++ src/mainboard/emulation/qemu-power9/rom_media.c | 12 ++++++++++++ 5 files changed, 19 insertions(+), 3 deletions(-) rename src/{arch/ppc64 => mainboard/emulation/qemu-power8}/rom_media.c (100%) create mode 100644 src/mainboard/emulation/qemu-power9/rom_media.c diff --git a/src/arch/ppc64/Makefile.inc b/src/arch/ppc64/Makefile.inc index 8ccd62bfab5..92b1e399d67 100644 --- a/src/arch/ppc64/Makefile.inc +++ b/src/arch/ppc64/Makefile.inc @@ -12,7 +12,6 @@ ifeq ($(CONFIG_ARCH_BOOTBLOCK_PPC64),y) bootblock-y = bootblock_crt0.S bootblock-y += arch_timer.c bootblock-y += boot.c -bootblock-y += rom_media.c bootblock-y += \ $(top)/src/lib/memchr.c \ $(top)/src/lib/memcmp.c \ @@ -38,7 +37,6 @@ ifeq ($(CONFIG_ARCH_ROMSTAGE_PPC64),y) romstage-y += arch_timer.c romstage-y += boot.c romstage-y += stages.c -romstage-y += rom_media.c romstage-y += \ $(top)/src/lib/memchr.c \ $(top)/src/lib/memcmp.c \ @@ -64,7 +62,6 @@ endif ################################################################################ ifeq ($(CONFIG_ARCH_RAMSTAGE_PPC64),y) -ramstage-y += rom_media.c ramstage-y += stages.c ramstage-y += arch_timer.c ramstage-y += boot.c diff --git a/src/mainboard/emulation/qemu-power8/Makefile.inc b/src/mainboard/emulation/qemu-power8/Makefile.inc index 40119839232..c1484b96bcd 100644 --- a/src/mainboard/emulation/qemu-power8/Makefile.inc +++ b/src/mainboard/emulation/qemu-power8/Makefile.inc @@ -2,8 +2,11 @@ bootblock-y += bootblock.c bootblock-y += uart.c +bootblock-y += rom_media.c romstage-y += cbmem.c romstage-y += romstage.c ramstage-y += timer.c romstage-y += uart.c +romstage-y += rom_media.c ramstage-y += uart.c +ramstage-y += rom_media.c diff --git a/src/arch/ppc64/rom_media.c b/src/mainboard/emulation/qemu-power8/rom_media.c similarity index 100% rename from src/arch/ppc64/rom_media.c rename to src/mainboard/emulation/qemu-power8/rom_media.c diff --git a/src/mainboard/emulation/qemu-power9/Makefile.inc b/src/mainboard/emulation/qemu-power9/Makefile.inc index ace00a75b78..a71c20fe1aa 100644 --- a/src/mainboard/emulation/qemu-power9/Makefile.inc +++ b/src/mainboard/emulation/qemu-power9/Makefile.inc @@ -1,6 +1,10 @@ ## SPDX-License-Identifier: GPL-2.0-only +bootblock-y += rom_media.c + romstage-y += cbmem.c romstage-y += romstage.c +romstage-y += rom_media.c ramstage-y += ramstage.c +ramstage-y += rom_media.c diff --git a/src/mainboard/emulation/qemu-power9/rom_media.c b/src/mainboard/emulation/qemu-power9/rom_media.c new file mode 100644 index 00000000000..2fd47669a80 --- /dev/null +++ b/src/mainboard/emulation/qemu-power9/rom_media.c @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include + +static const struct mem_region_device boot_dev = + MEM_REGION_DEV_RO_INIT(FLASH_BASE_ADDR, CONFIG_ROM_SIZE); + +const struct region_device *boot_device_ro(void) +{ + return &boot_dev.rdev; +} From 5eac15e9c9a52f6a0820b6a147b5f2bca3b31ac6 Mon Sep 17 00:00:00 2001 From: Igor Bagnucki Date: Mon, 14 Dec 2020 14:52:50 +0100 Subject: [PATCH 015/213] soc/ibm/power9/*: add file structure for SOC Change-Id: Ie74b1e34f9aebe151d0fdb0e95c003510fd864c3 Signed-off-by: Krystian Hebel --- src/soc/ibm/Kconfig | 1 + src/soc/ibm/power9/Kconfig | 9 +++++++++ src/soc/ibm/power9/Makefile.inc | 10 ++++++++++ src/soc/ibm/power9/bootblock.c | 7 +++++++ src/soc/ibm/power9/chip.c | 7 +++++++ src/soc/ibm/power9/romstage.c | 10 ++++++++++ src/soc/ibm/power9/timer.c | 8 ++++++++ 7 files changed, 52 insertions(+) create mode 100644 src/soc/ibm/Kconfig create mode 100644 src/soc/ibm/power9/Kconfig create mode 100644 src/soc/ibm/power9/Makefile.inc create mode 100644 src/soc/ibm/power9/bootblock.c create mode 100644 src/soc/ibm/power9/chip.c create mode 100644 src/soc/ibm/power9/romstage.c create mode 100644 src/soc/ibm/power9/timer.c diff --git a/src/soc/ibm/Kconfig b/src/soc/ibm/Kconfig new file mode 100644 index 00000000000..40a09dc19f0 --- /dev/null +++ b/src/soc/ibm/Kconfig @@ -0,0 +1 @@ +source "src/soc/ibm/*/Kconfig" diff --git a/src/soc/ibm/power9/Kconfig b/src/soc/ibm/power9/Kconfig new file mode 100644 index 00000000000..a94c41d606c --- /dev/null +++ b/src/soc/ibm/power9/Kconfig @@ -0,0 +1,9 @@ +config CPU_IBM_POWER9 + bool + help + This SoC is the minimal template working on POWER9 + Talos II platform. + +if CPU_IBM_POWER9 + # nothing here yet +endif diff --git a/src/soc/ibm/power9/Makefile.inc b/src/soc/ibm/power9/Makefile.inc new file mode 100644 index 00000000000..0e12bc76ff0 --- /dev/null +++ b/src/soc/ibm/power9/Makefile.inc @@ -0,0 +1,10 @@ +## SPDX-License-Identifier: GPL-2.0-only + +ifeq ($(CONFIG_CPU_IBM_POWER9),y) + +bootblock-y += bootblock.c +romstage-y += romstage.c +ramstage-y += chip.c +ramstage-y += timer.c + +endif diff --git a/src/soc/ibm/power9/bootblock.c b/src/soc/ibm/power9/bootblock.c new file mode 100644 index 00000000000..86217285b9a --- /dev/null +++ b/src/soc/ibm/power9/bootblock.c @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include + +void bootblock_soc_early_init(void) +{ +} diff --git a/src/soc/ibm/power9/chip.c b/src/soc/ibm/power9/chip.c new file mode 100644 index 00000000000..dd320b441b2 --- /dev/null +++ b/src/soc/ibm/power9/chip.c @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include + +struct chip_operations soc_ibm_power9_ops = { + CHIP_NAME("POWER9") +}; diff --git a/src/soc/ibm/power9/romstage.c b/src/soc/ibm/power9/romstage.c new file mode 100644 index 00000000000..c4123152554 --- /dev/null +++ b/src/soc/ibm/power9/romstage.c @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include + +void main(void) +{ + console_init(); + run_ramstage(); +} diff --git a/src/soc/ibm/power9/timer.c b/src/soc/ibm/power9/timer.c new file mode 100644 index 00000000000..2e0289c0451 --- /dev/null +++ b/src/soc/ibm/power9/timer.c @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include + +void init_timer(void) +{ + // no need to do anything here +} From 25eeba49d465dfaba536f7a7760c836eb54ea084 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20=C5=BBygowski?= Date: Tue, 15 Sep 2020 17:01:57 +0200 Subject: [PATCH 016/213] mb/raptor-cs/talos-2: add basic mainboard structure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change-Id: I0c4f74c7b27c8bb5599d68305adf369ddc6fcc70 Signed-off-by: Michał Żygowski Signed-off-by: Krystian Hebel --- src/arch/ppc64/Makefile.inc | 1 + src/mainboard/raptor-cs/Kconfig | 15 ++++++ src/mainboard/raptor-cs/Kconfig.name | 2 + src/mainboard/raptor-cs/talos-2/Kconfig | 53 +++++++++++++++++++ src/mainboard/raptor-cs/talos-2/Kconfig.name | 2 + src/mainboard/raptor-cs/talos-2/Makefile.inc | 3 ++ .../raptor-cs/talos-2/board_info.txt | 2 + src/mainboard/raptor-cs/talos-2/cbmem.c | 11 ++++ src/mainboard/raptor-cs/talos-2/devicetree.cb | 5 ++ src/mainboard/raptor-cs/talos-2/mainboard.c | 19 +++++++ src/mainboard/raptor-cs/talos-2/memlayout.ld | 18 +++++++ 11 files changed, 131 insertions(+) create mode 100644 src/mainboard/raptor-cs/Kconfig create mode 100644 src/mainboard/raptor-cs/Kconfig.name create mode 100644 src/mainboard/raptor-cs/talos-2/Kconfig create mode 100644 src/mainboard/raptor-cs/talos-2/Kconfig.name create mode 100644 src/mainboard/raptor-cs/talos-2/Makefile.inc create mode 100644 src/mainboard/raptor-cs/talos-2/board_info.txt create mode 100644 src/mainboard/raptor-cs/talos-2/cbmem.c create mode 100644 src/mainboard/raptor-cs/talos-2/devicetree.cb create mode 100644 src/mainboard/raptor-cs/talos-2/mainboard.c create mode 100644 src/mainboard/raptor-cs/talos-2/memlayout.ld diff --git a/src/arch/ppc64/Makefile.inc b/src/arch/ppc64/Makefile.inc index 92b1e399d67..d47ec356535 100644 --- a/src/arch/ppc64/Makefile.inc +++ b/src/arch/ppc64/Makefile.inc @@ -62,6 +62,7 @@ endif ################################################################################ ifeq ($(CONFIG_ARCH_RAMSTAGE_PPC64),y) +ramstage-y += arch_timer.c ramstage-y += stages.c ramstage-y += arch_timer.c ramstage-y += boot.c diff --git a/src/mainboard/raptor-cs/Kconfig b/src/mainboard/raptor-cs/Kconfig new file mode 100644 index 00000000000..7f916bbaf37 --- /dev/null +++ b/src/mainboard/raptor-cs/Kconfig @@ -0,0 +1,15 @@ +if VENDOR_RAPTOR_CS + +choice + prompt "Mainboard model" + +source "src/mainboard/raptor-cs/*/Kconfig.name" + +endchoice + +source "src/mainboard/raptor-cs/*/Kconfig" + +config MAINBOARD_VENDOR + default "Raptor Computing Systems" + +endif # VENDOR_RAPTOR_CS diff --git a/src/mainboard/raptor-cs/Kconfig.name b/src/mainboard/raptor-cs/Kconfig.name new file mode 100644 index 00000000000..7f9d8d6b9bd --- /dev/null +++ b/src/mainboard/raptor-cs/Kconfig.name @@ -0,0 +1,2 @@ +config VENDOR_RAPTOR_CS + bool "Raptor Computing Systems" diff --git a/src/mainboard/raptor-cs/talos-2/Kconfig b/src/mainboard/raptor-cs/talos-2/Kconfig new file mode 100644 index 00000000000..c3379bcb4ff --- /dev/null +++ b/src/mainboard/raptor-cs/talos-2/Kconfig @@ -0,0 +1,53 @@ +## SPDX-License-Identifier: GPL-2.0-only + +if BOARD_RAPTOR_CS_TALOS_2 + +config BOARD_SPECIFIC_OPTIONS + def_bool y + select CPU_POWER9 + select CPU_IBM_POWER9 + select BOARD_ROMSIZE_KB_512 + select ARCH_BOOTBLOCK_PPC64 + select ARCH_VERSTAGE_PPC64 + select ARCH_ROMSTAGE_PPC64 + select ARCH_RAMSTAGE_PPC64 + select SUPERIO_ASPEED_AST2400 + select BOOT_DEVICE_NOT_SPI_FLASH + select MISSING_BOARD_RESET + select HAVE_DEBUG_RAM_SETUP + +config MEMLAYOUT_LD_FILE + string + default "src/mainboard/\$(CONFIG_MAINBOARD_DIR)/memlayout.ld" + +config MAINBOARD_DIR + string + default "raptor-cs/talos-2" + +config MAINBOARD_PART_NUMBER + string + default "Talos II" + +# I'm not sure how to deal with 2 CPUs with regard to DIMM count, but that's +# a problem for later. +config DIMM_MAX + int + default 8 + +config DIMM_SPD_SIZE + int + default 512 + +config MAX_CPUS + int + default 1 + +config MAINBOARD_VENDOR + string + default "Raptor CS" + +config DRAM_SIZE_MB + int + default 32768 + +endif # BOARD_RAPTOR_CS_TALOS_2 diff --git a/src/mainboard/raptor-cs/talos-2/Kconfig.name b/src/mainboard/raptor-cs/talos-2/Kconfig.name new file mode 100644 index 00000000000..8a75417628d --- /dev/null +++ b/src/mainboard/raptor-cs/talos-2/Kconfig.name @@ -0,0 +1,2 @@ +config BOARD_RAPTOR_CS_TALOS_2 + bool "Talos II" diff --git a/src/mainboard/raptor-cs/talos-2/Makefile.inc b/src/mainboard/raptor-cs/talos-2/Makefile.inc new file mode 100644 index 00000000000..d55179f8d44 --- /dev/null +++ b/src/mainboard/raptor-cs/talos-2/Makefile.inc @@ -0,0 +1,3 @@ +## SPDX-License-Identifier: GPL-2.0-only + +romstage-y += cbmem.c diff --git a/src/mainboard/raptor-cs/talos-2/board_info.txt b/src/mainboard/raptor-cs/talos-2/board_info.txt new file mode 100644 index 00000000000..aa2269185be --- /dev/null +++ b/src/mainboard/raptor-cs/talos-2/board_info.txt @@ -0,0 +1,2 @@ +Board name: Raptor CS Talos II +Category: desktop diff --git a/src/mainboard/raptor-cs/talos-2/cbmem.c b/src/mainboard/raptor-cs/talos-2/cbmem.c new file mode 100644 index 00000000000..15c20f8de42 --- /dev/null +++ b/src/mainboard/raptor-cs/talos-2/cbmem.c @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include + +void *cbmem_top_chipset(void) +{ + /* Top of cbmem is at lowest usable DRAM address below 4GiB. */ + /* For now, last 1M of 4G */ + void *ptr = (void *) ((1ULL << 32) - 1048576); + return ptr; +} diff --git a/src/mainboard/raptor-cs/talos-2/devicetree.cb b/src/mainboard/raptor-cs/talos-2/devicetree.cb new file mode 100644 index 00000000000..85440064fa4 --- /dev/null +++ b/src/mainboard/raptor-cs/talos-2/devicetree.cb @@ -0,0 +1,5 @@ +## SPDX-License-Identifier: GPL-2.0-only + +chip soc/ibm/power9 + device cpu_cluster 0 on end +end diff --git a/src/mainboard/raptor-cs/talos-2/mainboard.c b/src/mainboard/raptor-cs/talos-2/mainboard.c new file mode 100644 index 00000000000..b4d11efe4b4 --- /dev/null +++ b/src/mainboard/raptor-cs/talos-2/mainboard.c @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include + +static void mainboard_enable(struct device *dev) +{ + if (!dev) + die("No dev0; die\n"); + + /* Where does RAM live? */ + ram_resource_kb(dev, 0, 2048, 32768); + cbmem_recovery(0); +} + +struct chip_operations mainboard_ops = { + .enable_dev = mainboard_enable, +}; diff --git a/src/mainboard/raptor-cs/talos-2/memlayout.ld b/src/mainboard/raptor-cs/talos-2/memlayout.ld new file mode 100644 index 00000000000..c5136b9d14e --- /dev/null +++ b/src/mainboard/raptor-cs/talos-2/memlayout.ld @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include + +#include + +// TODO: fill in these blanks for Power9. +SECTIONS +{ + DRAM_START(0x0) + BOOTBLOCK(0, 64K) + ROMSTAGE(0x120000, 128K) + STACK(0x140000, 0x3ff00) + PRERAM_CBMEM_CONSOLE(0x180000, 8K) + FMAP_CACHE(0x182000, 2K) + CBFS_MCACHE(0x182800, 8K) + RAMSTAGE(0x200000, 16M) +} From 40b0422aafca19d00f63a15862fafd8277877924 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20=C5=BBygowski?= Date: Thu, 24 Sep 2020 15:09:40 +0200 Subject: [PATCH 017/213] Makefile.inc: compile ECC tools and inject ECC to final image MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change-Id: I8541aa6f1429ed6143830ed11c47c150183ddf0d Signed-off-by: Michał Żygowski --- Makefile.inc | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/Makefile.inc b/Makefile.inc index 0dd4864e20d..e211b6ceedb 100644 --- a/Makefile.inc +++ b/Makefile.inc @@ -519,7 +519,7 @@ endif additional-dirs += $(objutil)/cbfstool $(objutil)/ifdtool \ $(objutil)/options $(objutil)/amdfwtool \ - $(objutil)/cbootimage + $(objutil)/cbootimage $(objutil)/ffs export $(COREBOOT_EXPORTS) @@ -561,6 +561,7 @@ IFITTOOL:=$(objutil)/cbfstool/ifittool AMDCOMPRESS:=$(objutil)/cbfstool/amdcompress CSE_FPT:=$(objutil)/cbfstool/cse_fpt CSE_SERGER:=$(objutil)/cbfstool/cse_serger +ECCTOOL:=$(objutil)/ffs/ecc/ecc $(obj)/cbfstool: $(CBFSTOOL) cp $< $@ @@ -601,6 +602,12 @@ IFDTOOL:=$(objutil)/ifdtool/ifdtool AMDFWTOOL:=$(objutil)/amdfwtool/amdfwtool +$(ECCTOOL): + @printf " Compile ECCTOOL\n" + cp -r $(top)/3rdparty/ffs $(objutil) + cd $(objutil)/ffs && autoreconf -i && ./configure + +$(MAKE) -C $(objutil)/ffs + APCB_EDIT_TOOL:=$(top)/util/apcb/apcb_edit.py APCB_V3_EDIT_TOOL:=$(top)/util/apcb/apcb_v3_edit.py @@ -701,7 +708,7 @@ install-git-commit-clangfmt: include util/crossgcc/Makefile.inc .PHONY: tools -tools: $(objutil)/kconfig/conf $(objutil)/kconfig/toada $(CBFSTOOL) $(objutil)/cbfstool/cbfs-compression-tool $(FMAPTOOL) $(RMODTOOL) $(IFWITOOL) $(objutil)/nvramtool/nvramtool $(objutil)/sconfig/sconfig $(IFDTOOL) $(CBOOTIMAGE) $(AMDFWTOOL) $(AMDCOMPRESS) $(FUTILITY) $(BINCFG) $(IFITTOOL) $(objutil)/supermicro/smcbiosinfo $(CSE_FPT) $(CSE_SERGER) +tools: $(objutil)/kconfig/conf $(objutil)/kconfig/toada $(CBFSTOOL) $(objutil)/cbfstool/cbfs-compression-tool $(FMAPTOOL) $(RMODTOOL) $(IFWITOOL) $(objutil)/nvramtool/nvramtool $(objutil)/sconfig/sconfig $(IFDTOOL) $(CBOOTIMAGE) $(AMDFWTOOL) $(AMDCOMPRESS) $(FUTILITY) $(BINCFG) $(IFITTOOL) $(objutil)/supermicro/smcbiosinfo $(CSE_FPT) $(CSE_SERGER) $(ECCTOOL) ########################################################################### # Common recipes for all stages @@ -1146,7 +1153,7 @@ add_intermediate = \ $(1): $(obj)/coreboot.pre $(2) | $(INTERMEDIATE) \ $(eval INTERMEDIATE+=$(1)) $(eval PHONY+=$(1)) -$(obj)/coreboot.rom: $(obj)/coreboot.pre $(CBFSTOOL) $(IFITTOOL) $$(INTERMEDIATE) +$(obj)/coreboot.rom: $(obj)/coreboot.pre $(CBFSTOOL) $(IFITTOOL) $(ECCTOOL) $$(INTERMEDIATE) @printf " CBFS $(subst $(obj)/,,$(@))\n" # The full ROM may be larger than the CBFS part, so create an empty # file (filled with \377 = 0xff) and copy the CBFS image over it. @@ -1173,6 +1180,9 @@ ifeq ($(CONFIG_CBFS_VERIFICATION),y) exit 1 ;\ fi endif # CONFIG_CBFS_VERIFICATION +ifeq ($(CONFIG_ARCH_PPC64),y) + $(ECCTOOL) --inject $@ --output $@.ecc --p8 +endif # CONFIG_ARCH_PPC64 cbfs-files-y += $(CONFIG_CBFS_PREFIX)/romstage $(CONFIG_CBFS_PREFIX)/romstage-file := $(objcbfs)/romstage.elf From e7f5f0bc2995040091960d433c68514b1500d222 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20=C5=BBygowski?= Date: Thu, 24 Sep 2020 15:46:55 +0200 Subject: [PATCH 018/213] 3rdparty/sb-signing-utils: add SecureBoot utility for OpenPOWER MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change-Id: Id88baef5ecb1f8ffd74a7f464bbbaaaea0ca643d Signed-off-by: Michał Żygowski --- .gitmodules | 3 +++ 3rdparty/sb-signing-utils | 1 + 2 files changed, 4 insertions(+) create mode 160000 3rdparty/sb-signing-utils diff --git a/.gitmodules b/.gitmodules index 6f62952f435..4d0a18d202e 100644 --- a/.gitmodules +++ b/.gitmodules @@ -65,3 +65,6 @@ path = util/goswid url = ../goswid branch = trunk +[submodule "3rdparty/sb-signing-utils"] + path = 3rdparty/sb-signing-utils + url = https://github.com/open-power/sb-signing-utils.git diff --git a/3rdparty/sb-signing-utils b/3rdparty/sb-signing-utils new file mode 160000 index 00000000000..591c8f53482 --- /dev/null +++ b/3rdparty/sb-signing-utils @@ -0,0 +1 @@ +Subproject commit 591c8f53482243626901e1cc8a4ae321f314040d From 0cad01b904653050fd159bac24893639f741abd5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20=C5=BBygowski?= Date: Thu, 24 Sep 2020 16:17:25 +0200 Subject: [PATCH 019/213] Makefile.inc: add signing procedure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change-Id: I1504b6e87e6a35d2339abd8d33a84d78e250e0f4 Signed-off-by: Michał Żygowski Signed-off-by: Krystian Hebel Signed-off-by: Sergii Dmytruk --- Makefile.inc | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/Makefile.inc b/Makefile.inc index e211b6ceedb..becd31d9373 100644 --- a/Makefile.inc +++ b/Makefile.inc @@ -562,6 +562,7 @@ AMDCOMPRESS:=$(objutil)/cbfstool/amdcompress CSE_FPT:=$(objutil)/cbfstool/cse_fpt CSE_SERGER:=$(objutil)/cbfstool/cse_serger ECCTOOL:=$(objutil)/ffs/ecc/ecc +SBSIGNTOOLS:=$(objutil)/sb-signing-utils/create-container $(obj)/cbfstool: $(CBFSTOOL) cp $< $@ @@ -608,6 +609,12 @@ $(ECCTOOL): cd $(objutil)/ffs && autoreconf -i && ./configure +$(MAKE) -C $(objutil)/ffs +$(SBSIGNTOOLS): + @printf " Compile SB SIGNING UTILS\n" + cp -r $(top)/3rdparty/sb-signing-utils $(objutil) + cd $(objutil)/sb-signing-utils && autoreconf -i -Wno-unsupported && ./configure + +$(MAKE) -C $(objutil)/sb-signing-utils + APCB_EDIT_TOOL:=$(top)/util/apcb/apcb_edit.py APCB_V3_EDIT_TOOL:=$(top)/util/apcb/apcb_v3_edit.py @@ -708,7 +715,7 @@ install-git-commit-clangfmt: include util/crossgcc/Makefile.inc .PHONY: tools -tools: $(objutil)/kconfig/conf $(objutil)/kconfig/toada $(CBFSTOOL) $(objutil)/cbfstool/cbfs-compression-tool $(FMAPTOOL) $(RMODTOOL) $(IFWITOOL) $(objutil)/nvramtool/nvramtool $(objutil)/sconfig/sconfig $(IFDTOOL) $(CBOOTIMAGE) $(AMDFWTOOL) $(AMDCOMPRESS) $(FUTILITY) $(BINCFG) $(IFITTOOL) $(objutil)/supermicro/smcbiosinfo $(CSE_FPT) $(CSE_SERGER) $(ECCTOOL) +tools: $(objutil)/kconfig/conf $(objutil)/kconfig/toada $(CBFSTOOL) $(objutil)/cbfstool/cbfs-compression-tool $(FMAPTOOL) $(RMODTOOL) $(IFWITOOL) $(objutil)/nvramtool/nvramtool $(objutil)/sconfig/sconfig $(IFDTOOL) $(CBOOTIMAGE) $(AMDFWTOOL) $(AMDCOMPRESS) $(FUTILITY) $(BINCFG) $(IFITTOOL) $(objutil)/supermicro/smcbiosinfo $(CSE_FPT) $(CSE_SERGER) $(ECCTOOL) $(SBSIGNTOOLS) ########################################################################### # Common recipes for all stages @@ -1153,7 +1160,9 @@ add_intermediate = \ $(1): $(obj)/coreboot.pre $(2) | $(INTERMEDIATE) \ $(eval INTERMEDIATE+=$(1)) $(eval PHONY+=$(1)) -$(obj)/coreboot.rom: $(obj)/coreboot.pre $(CBFSTOOL) $(IFITTOOL) $(ECCTOOL) $$(INTERMEDIATE) +KEYLOC?=/tmp/keys + +$(obj)/coreboot.rom: $(obj)/coreboot.pre $(CBFSTOOL) $(IFITTOOL) $(ECCTOOL) $(SBSIGNTOOLS) $$(INTERMEDIATE) @printf " CBFS $(subst $(obj)/,,$(@))\n" # The full ROM may be larger than the CBFS part, so create an empty # file (filled with \377 = 0xff) and copy the CBFS image over it. @@ -1181,7 +1190,20 @@ ifeq ($(CONFIG_CBFS_VERIFICATION),y) fi endif # CONFIG_CBFS_VERIFICATION ifeq ($(CONFIG_ARCH_PPC64),y) - $(ECCTOOL) --inject $@ --output $@.ecc --p8 + cp -r $(top)/3rdparty/sb-signing-utils/test/keys /tmp + @printf " SBSIGN $(subst $(obj)/,,$(@))\n" + $(SBSIGNTOOLS) -a $(KEYLOC)/hw_key_a.key -b $(KEYLOC)/hw_key_b.key -c $(KEYLOC)/hw_key_c.key \ + -p $(KEYLOC)/hw_key_a.key --payload $(top)/$(objcbfs)/bootblock.bin \ + --imagefile $(top)/$(obj)/bootblock.signed + $(SBSIGNTOOLS) -a $(KEYLOC)/hw_key_a.key -b $(KEYLOC)/hw_key_b.key -c $(KEYLOC)/hw_key_c.key \ + -p $(KEYLOC)/hw_key_a.key --payload $(top)/$@ --imagefile $(top)/$@.signed + @printf " ECC $(subst $(obj)/,,$(@))\n" + $(ECCTOOL) --inject $(top)/$@.signed --output $(top)/$@.signed.ecc --p8 + $(ECCTOOL) --inject $(top)/$@ --output $(top)/$@.ecc --p8 + @printf " ECC bootblock\n" + dd if=$(obj)/bootblock.signed of=$(obj)/bootblock.signed.pad ibs=25486 conv=sync 2> /dev/null + $(ECCTOOL) --inject $(obj)/bootblock.signed.pad --output $(obj)/bootblock.signed.ecc --p8 + rm $(obj)/bootblock.signed $(obj)/bootblock.signed.pad endif # CONFIG_ARCH_PPC64 cbfs-files-y += $(CONFIG_CBFS_PREFIX)/romstage From 6da70343a5abbb242c6aa999f1254649d902825a Mon Sep 17 00:00:00 2001 From: Krystian Hebel Date: Tue, 16 Feb 2021 18:22:22 +0100 Subject: [PATCH 020/213] ppc64: Kconfig switch for bootblock in SEEPROM, zero HRMOR On PPC64 each address is logically OR'ed with HRMOR (Hypervisor Real Mode Offset Register) before it is dispatched to the underlying memory, meaning that memory space overlaps at the least significant bit set in HRMOR. coreboot is entered with HRMOR = 4GB-128MB both on hardware (when started by hostboot bootloader) and in Qemu in hb-mode. This means that memory overlaps every 128MB in this particular case. HRMOR can be explicitly ignored when MSB of an address is set, but this would require using different memory model for linking. If we zero HRMOR in bootblock, linking can be done against real address. This greatly simplifies memory layout and allows to forget about HRMOR from that point on. Signed-off-by: Krystian Hebel Signed-off-by: Maciej Pijanowski Change-Id: I0170463968c91b943c4b0dc15fe73fa616a164da --- Makefile.inc | 12 +- src/arch/ppc64/Makefile.inc | 2 +- src/arch/ppc64/bootblock_crt0.S | 112 +++++++++++++++--- .../emulation/qemu-power8/memlayout.ld | 10 ++ .../emulation/qemu-power9/memlayout.ld | 83 +++++++++---- src/mainboard/raptor-cs/talos-2/Kconfig | 14 +++ .../talos-2/board-bootblock-in-seeprom.fmd | 15 +++ src/mainboard/raptor-cs/talos-2/board.fmd | 18 +++ src/mainboard/raptor-cs/talos-2/memlayout.ld | 75 ++++++++++-- 9 files changed, 290 insertions(+), 51 deletions(-) create mode 100644 src/mainboard/raptor-cs/talos-2/board-bootblock-in-seeprom.fmd create mode 100644 src/mainboard/raptor-cs/talos-2/board.fmd diff --git a/Makefile.inc b/Makefile.inc index becd31d9373..ca285bfe4b0 100644 --- a/Makefile.inc +++ b/Makefile.inc @@ -1192,18 +1192,24 @@ endif # CONFIG_CBFS_VERIFICATION ifeq ($(CONFIG_ARCH_PPC64),y) cp -r $(top)/3rdparty/sb-signing-utils/test/keys /tmp @printf " SBSIGN $(subst $(obj)/,,$(@))\n" - $(SBSIGNTOOLS) -a $(KEYLOC)/hw_key_a.key -b $(KEYLOC)/hw_key_b.key -c $(KEYLOC)/hw_key_c.key \ - -p $(KEYLOC)/hw_key_a.key --payload $(top)/$(objcbfs)/bootblock.bin \ - --imagefile $(top)/$(obj)/bootblock.signed $(SBSIGNTOOLS) -a $(KEYLOC)/hw_key_a.key -b $(KEYLOC)/hw_key_b.key -c $(KEYLOC)/hw_key_c.key \ -p $(KEYLOC)/hw_key_a.key --payload $(top)/$@ --imagefile $(top)/$@.signed @printf " ECC $(subst $(obj)/,,$(@))\n" $(ECCTOOL) --inject $(top)/$@.signed --output $(top)/$@.signed.ecc --p8 +ifeq ($(CONFIG_BOOTBLOCK_IN_SEEPROM),y) + @printf " ECC bootblock\n" + $(ECCTOOL) --inject $(top)/$(objcbfs)/bootblock.bin --output $(obj)/bootblock.ecc --p8 +else + @printf " SBSIGN bootblock\n" + $(SBSIGNTOOLS) -a $(KEYLOC)/hw_key_a.key -b $(KEYLOC)/hw_key_b.key -c $(KEYLOC)/hw_key_c.key \ + -p $(KEYLOC)/hw_key_a.key --payload $(top)/$(objcbfs)/bootblock.bin \ + --imagefile $(top)/$(obj)/bootblock.signed $(ECCTOOL) --inject $(top)/$@ --output $(top)/$@.ecc --p8 @printf " ECC bootblock\n" dd if=$(obj)/bootblock.signed of=$(obj)/bootblock.signed.pad ibs=25486 conv=sync 2> /dev/null $(ECCTOOL) --inject $(obj)/bootblock.signed.pad --output $(obj)/bootblock.signed.ecc --p8 rm $(obj)/bootblock.signed $(obj)/bootblock.signed.pad +endif # CONFIG_BOOTBLOCK_IN_SEEPROM endif # CONFIG_ARCH_PPC64 cbfs-files-y += $(CONFIG_CBFS_PREFIX)/romstage diff --git a/src/arch/ppc64/Makefile.inc b/src/arch/ppc64/Makefile.inc index d47ec356535..8300ad693dc 100644 --- a/src/arch/ppc64/Makefile.inc +++ b/src/arch/ppc64/Makefile.inc @@ -1,6 +1,6 @@ ## SPDX-License-Identifier: GPL-2.0-only -ppc64_flags = -I$(src)/arch/ppc64/ -mbig-endian -mcpu=power8 -mtune=power8 +ppc64_flags = -I$(src)/arch/ppc64/ -mbig-endian -mcpu=power8 -mtune=power8 -mno-pointers-to-nested-functions ppc64_asm_flags = diff --git a/src/arch/ppc64/bootblock_crt0.S b/src/arch/ppc64/bootblock_crt0.S index 5a9496024e2..4254a191eba 100644 --- a/src/arch/ppc64/bootblock_crt0.S +++ b/src/arch/ppc64/bootblock_crt0.S @@ -2,6 +2,7 @@ /* * Early initialization code for POWER8/POWER9. */ +#include #include @@ -27,15 +28,67 @@ oris r,r, (e)@h; \ ori r,r, (e)@l; +/* Load an immediate 32-bit value into a register */ +#define LOAD_IMM32(r, e) \ + li r, 0; \ + oris r,r, (e)@h; \ + ori r,r, (e)@l; + +/* + * On POWER, 0 is wherever HRMOR points to rather than physical DRAM start. + * HRMOR is ORed with address, not added to it, meaning that memory space + * overlaps after 2^(least significant set bit of HRMOR). This becomes + * chaotic when nonconsecutive bits are set... + * + * Two and a half possible cases: + * 0. bootblock started with QEMU in hb-mode + * - NIA = 0x10 (bug?) + * - HRMOR = 0x08000000 (128M) + * - no physical memory to enable/train, everything accessible from start + * 1. bootblock loaded by HBBL + * - NIA = 0 + * - HRMOR = 0xF8000000 (4G - 128M) + * - initialized L3 = 0x400000 (4M) + * - top address before RAM = 0xF8400000 + * 2. bootblock in SEEPROM, loaded by SBE + * - NIA = 0x3000 (placeholder for int. vectors) + * - HRMOR = 0xF8200000 (4G - 128 M + 2 M) + * - initialized L3 = 0x8000 (bootblock/HBBL size = 32K) + * - no way 32K will be enough, must initialize more L3 in bootblock + * - HRMOR still applies, so memory overlaps every 2M + * + * Common subset (assuming 2. initializes as much memory as possible) is + * 0xF8200000-0xF8400000. 2M should be more than enough for pre-RAM code, + * but it isn't enough to load ramstage. We could implement postcar stage, + * but KISS: initialize L3 from _ebootblock to 0xF8980000: up to 9.5M into + * cache, leaving bottom 2M (0xF8000000-0xF8200000) either uninitialized + * (when started from SEEPROM) or just unused for anything but bootblock + * (loaded by HBBL). Last 0.5M of L3 cache is left for interrupt vectors + * normally located at address 0. + * + * Set HRMOR to 0 before jumping to C code in bootblock and forget it even + * exists. + * + * For QEMU s/0xF8/0x08/ in above description but code remains the same. + * L3 initialization is unnecessary in this case but won't break anything. + * + * TODO: there is a structure with SBE->HBBL data at 0 in 2nd option. It + * holds some useful data like XSCOM BAR and LPC BAR. If, for any reason, + * these addresses are different than default, they should be used instead + * of predefined values. + */ + .section ".text._start", "ax", %progbits .globl _start _start: /* QEMU with hb-mode=on starts at address 0x10, while hardware at 0x0 */ +#if !CONFIG(BOOTBLOCK_IN_SEEPROM) nop nop nop nop FIXUP_ENDIAN +#endif /* Store FDT address provided by QEMU in %r3 to pass it later to * payload */ @@ -44,32 +97,54 @@ _start: /* Set program priority to medium */ or %r2, %r2, %r2 - /* Stack */ - lis %r1, _estack@ha - addi %r1, %r1, _estack@l + li %r10, 1 + rotldi %r10, %r10, 63 /* MSB is "ignore HRMOR" */ - /* Clear .bss section */ - /* Currently not needed, .bss is zeroed in the file. If it were to be - * used, make sure that .bss is 128B aligned (size of cache line), - * otherwise dcbz will clear (part of) .opd section! */ -/* - lis %r5, _bss@ha - addi %r5, %r5, _bss@l - lis %r6, _ebss@ha - addi %r6, %r6, _ebss@l + /* Assumption: we are linked at address that isn't changed by HRMOR */ + LOAD_IMM32(%r7, ignoreHRMOR) + or %r9, %r7, %r10 + + mtlr %r9 + blr + +ignoreHRMOR: + /* Now we are at 0x8000000000000000 | linked address */ + li %r0, 0 + mtspr SPR_HRMOR, %r0 /* Clear HRMOR */ + isync + + /* We can't just "b stopIgnoringHRMOR", it would use relative offset */ + addi %r9, %r7, stopIgnoringHRMOR - ignoreHRMOR + mtlr %r9 + blr + +stopIgnoringHRMOR: + /* Now we are at linked address */ + slbia 7 + sync + isync + + /* + * When coming from SBE, L3 cache is invalid except for [2M, end of HBBL] + * range. Make the rest of it valid, or embrace the checkstops. + */ + + /* Validate and initialize to zeroes [end of HBBL, 9.5M] range */ + LOAD_IMM32(%r5, _ebootblock) /* Assume it is at least 128B aligned */ + LOAD_IMM32(%r6, _epreram_cbfs_cache) /* Same */ addi %r6, %r6, -1 1: dcbz 0, %r5 addi %r5, %r5, 128 cmpld cr7, %r5, %r6 blt cr7, 1b -*/ + + /* Stack */ + LOAD_IMM32(%r1, _estack) /* This is tested by checkstack() just before jumping to payload */ LOAD_IMM64(%r3, 0xDEADBEEFDEADBEEF) - lis %r5, _stack@ha - addi %r5, %r5, _stack@l - subi %r5, %r5, 8 + LOAD_IMM32(%r5, _stack - 8) sub %r4, %r1, %r5 sradi %r4, %r4, 3 /* Divide by 8 */ mtctr %r4 @@ -87,11 +162,10 @@ _start: mfmsr %r3 ori %r3, %r3, 0x2000 /* FP = 1 */ oris %r3, %r3, 0x0280 /* VEC = 1, VSX = 1 */ - mtmsr %r3 + mtmsrd %r3 /* Load official procedure descriptor address for main() */ - lis %r12, main@ha - addi %r12, %r12, main@l + LOAD_IMM32(%r12, main) /* Load TOC pointer and jump to main() */ ld %r2, 8(%r12) diff --git a/src/mainboard/emulation/qemu-power8/memlayout.ld b/src/mainboard/emulation/qemu-power8/memlayout.ld index 66f2c7577b3..993ee5fadfe 100644 --- a/src/mainboard/emulation/qemu-power8/memlayout.ld +++ b/src/mainboard/emulation/qemu-power8/memlayout.ld @@ -10,9 +10,19 @@ SECTIONS DRAM_START(0x0) BOOTBLOCK(0x100, 64K) ROMSTAGE(0x20000, 128K) + +#if !RAMSTAGE STACK(0x40000, 0x3ff00) +#endif + PRERAM_CBMEM_CONSOLE(0x80000, 8K) FMAP_CACHE(0x82000, 2K) CBFS_MCACHE(0x82800, 8K) + +#if !ENV_RAMSTAGE RAMSTAGE(0x100000, 16M) +#else + STACK(0xf8040000, 0x3ff00) + RAMSTAGE(0xf8100000, 16M) +#endif } diff --git a/src/mainboard/emulation/qemu-power9/memlayout.ld b/src/mainboard/emulation/qemu-power9/memlayout.ld index 8209433020e..5aca5408b10 100644 --- a/src/mainboard/emulation/qemu-power9/memlayout.ld +++ b/src/mainboard/emulation/qemu-power9/memlayout.ld @@ -8,27 +8,70 @@ SECTIONS { DRAM_START(0x0) - BOOTBLOCK(0, 32K) + /* + * On POWER, 0 is wherever HRMOR points to rather than physical DRAM start. + * HRMOR is ORed with address, not added to it, meaning that memory space + * overlaps after 2^(least significant set bit of HRMOR). This becomes + * chaotic when nonconsecutive bits are set... + * + * Two and a half possible cases: + * 0. bootblock started with QEMU in hb-mode + * - NIA = 0x10 (bug?) + * - HRMOR = 0x08000000 (128M) + * - no physical memory to enable/train, everything accessible from start + * 1. bootblock loaded by HBBL + * - NIA = 0 + * - HRMOR = 0xF8000000 (4G - 128M) + * - initialized L3 = 0x400000 (4M) + * - top address before RAM = 0xF8400000 + * 2. bootblock in SEEPROM, loaded by SBE + * - NIA = 0x3000 (placeholder for int. vectors) + * - HRMOR = 0xF8200000 (4G - 128 M + 2 M) + * - initialized L3 = 0x8000 (bootblock/HBBL size = 32K) + * - no way 32K will be enough, must initialize more L3 in bootblock + * - HRMOR still applies, so memory overlaps every 2M + * + * Common subset (assuming 2. initializes as much memory as possible) is + * 0xF8200000-0xF8400000. 2M should be more than enough for pre-RAM code, + * but it isn't enough to load ramstage. We could implement postcar stage, + * but KISS: initialize L3 from _ebootblock to 0xF8980000: up to 9.5M into + * cache, leaving bottom 2M (0xF8000000-0xF8200000) either uninitialized + * (when started from SEEPROM) or just unused for anything but bootblock + * (loaded by HBBL). Last 0.5M of L3 cache is left for interrupt vectors + * normally located at address 0. + * + * Set HRMOR to 0 before jumping to C code in bootblock and forget it even + * exists. + * + * For QEMU s/0xF8/0x08/ in above description but code remains the same. + * L3 initialization is unnecessary in this case but won't break anything. + * + * TODO: there is a structure with SBE->HBBL data at 0 in 2nd option. It + * holds some useful data like XSCOM BAR and LPC BAR. If, for any reason, + * these addresses are different than default, they should be used instead + * of predefined values. + */ - ROMSTAGE(0x1f00000, 1M) - -#if !ENV_RAMSTAGE - STACK(0x2000000, 32K) +#if !CONFIG(BOOTBLOCK_IN_SEEPROM) + BOOTBLOCK( 0x08000000, 32K) +#else + BOOTBLOCK( 0x08203000, 20K) #endif - FMAP_CACHE(0x2108000, 4K) - CBFS_MCACHE(0x2109000, 8K) - TIMESTAMP(0x210b000, 4K) - CBFS_CACHE(0x210c000, 512K) - PRERAM_CBMEM_CONSOLE(0x218c000, 128K) - - /* By default all memory addresses are affected by the value of HRMOR - * (Hypervisor Real Mode Offset Register) which is ORed to them. HRMOR - * has initial value of 0x8000000 in QEMU and is changed to 0 in - * ramstage. This means that before ramstage 0 actually points to - * 0x8000000. */ -#if ENV_RAMSTAGE - STACK(0xa000000, 32K) -#endif - RAMSTAGE(0xa008000, 1M) + STACK( 0x08208000, 32K) + PRERAM_CBMEM_CONSOLE(0x08210000, 128K) + FMAP_CACHE( 0x08230000, 4K) + CBFS_MCACHE( 0x08231000, 8K) + TIMESTAMP( 0x08233000, 4K) + + ROMSTAGE( 0x08240000, 256K) + + /* + * bootblock_crt0.S assumes this is the last part of L3, leaving for + * interrupt vectors at least 0.5M because of cache associativity. If + * more CBFS_CACHE is needed, split this into pre-/postram caches. + */ + CBFS_CACHE( 0x08280000, 7M) + + RAMSTAGE( 0x09000000, 2M) } diff --git a/src/mainboard/raptor-cs/talos-2/Kconfig b/src/mainboard/raptor-cs/talos-2/Kconfig index c3379bcb4ff..54c763131d0 100644 --- a/src/mainboard/raptor-cs/talos-2/Kconfig +++ b/src/mainboard/raptor-cs/talos-2/Kconfig @@ -2,6 +2,14 @@ if BOARD_RAPTOR_CS_TALOS_2 +config BOOTBLOCK_IN_SEEPROM + bool "Bootblock in SEEPROM" + default n + help + Enable this option if coreboot shall build image with separate + bootblock (i.e. not in coreboot.rom) to be put into SEEPROM + directly. + config BOARD_SPECIFIC_OPTIONS def_bool y select CPU_POWER9 @@ -20,6 +28,12 @@ config MEMLAYOUT_LD_FILE string default "src/mainboard/\$(CONFIG_MAINBOARD_DIR)/memlayout.ld" +config FMDFILE + string + default "src/mainboard/\$(CONFIG_MAINBOARD_DIR)/board-bootblock-in-seeprom.fmd" if BOOTBLOCK_IN_SEEPROM + default "src/mainboard/\$(CONFIG_MAINBOARD_DIR)/board.fmd" if !BOOTBLOCK_IN_SEEPROM + + config MAINBOARD_DIR string default "raptor-cs/talos-2" diff --git a/src/mainboard/raptor-cs/talos-2/board-bootblock-in-seeprom.fmd b/src/mainboard/raptor-cs/talos-2/board-bootblock-in-seeprom.fmd new file mode 100644 index 00000000000..8884751120f --- /dev/null +++ b/src/mainboard/raptor-cs/talos-2/board-bootblock-in-seeprom.fmd @@ -0,0 +1,15 @@ +# layout for firmware when flash address space matches used address layout +# +-------------+ <-- 0 +# | unspecified | +# +-------------+ <-- BIOS_BASE +# | FMAP | +# +-------------+ <-- BIOS_BASE + 128K + FMAP_SIZE +# | CBFS | +# +-------------+ <-- ROM_SIZE + +FLASH@0 CONFIG_ROM_SIZE { + BIOS@0 CONFIG_ROM_SIZE { + FMAP 0x200 + COREBOOT(CBFS) + } +} diff --git a/src/mainboard/raptor-cs/talos-2/board.fmd b/src/mainboard/raptor-cs/talos-2/board.fmd new file mode 100644 index 00000000000..462cdea7512 --- /dev/null +++ b/src/mainboard/raptor-cs/talos-2/board.fmd @@ -0,0 +1,18 @@ +# layout for firmware when flash address space matches used address layout +# +-------------+ <-- 0 +# | unspecified | +# +-------------+ <-- BIOS_BASE +# | bootblock | +# +-------------+ <-- BIOS_BASE + 128K +# | FMAP | +# +-------------+ <-- BIOS_BASE + 128K + FMAP_SIZE +# | CBFS | +# +-------------+ <-- ROM_SIZE + +FLASH@0 CONFIG_ROM_SIZE { + BIOS@0 CONFIG_ROM_SIZE { + BOOTBLOCK 128K + FMAP 0x200 + COREBOOT(CBFS) + } +} diff --git a/src/mainboard/raptor-cs/talos-2/memlayout.ld b/src/mainboard/raptor-cs/talos-2/memlayout.ld index c5136b9d14e..c9e7c9502e9 100644 --- a/src/mainboard/raptor-cs/talos-2/memlayout.ld +++ b/src/mainboard/raptor-cs/talos-2/memlayout.ld @@ -4,15 +4,74 @@ #include -// TODO: fill in these blanks for Power9. SECTIONS { DRAM_START(0x0) - BOOTBLOCK(0, 64K) - ROMSTAGE(0x120000, 128K) - STACK(0x140000, 0x3ff00) - PRERAM_CBMEM_CONSOLE(0x180000, 8K) - FMAP_CACHE(0x182000, 2K) - CBFS_MCACHE(0x182800, 8K) - RAMSTAGE(0x200000, 16M) + + /* + * On POWER, 0 is wherever HRMOR points to rather than physical DRAM start. + * HRMOR is ORed with address, not added to it, meaning that memory space + * overlaps after 2^(least significant set bit of HRMOR). This becomes + * chaotic when nonconsecutive bits are set... + * + * Two and a half possible cases: + * 0. bootblock started with QEMU in hb-mode + * - NIA = 0x10 (bug?) + * - HRMOR = 0x08000000 (128M) + * - no physical memory to enable/train, everything accessible from start + * 1. bootblock loaded by HBBL + * - NIA = 0 + * - HRMOR = 0xF8000000 (4G - 128M) + * - initialized L3 = 0x400000 (4M) + * - top address before RAM = 0xF8400000 + * 2. bootblock in SEEPROM, loaded by SBE + * - NIA = 0x3000 (placeholder for int. vectors) + * - HRMOR = 0xF8200000 (4G - 128 M + 2 M) + * - initialized L3 = 0x8000 (bootblock/HBBL size = 32K) + * - no way 32K will be enough, must initialize more L3 in bootblock + * - HRMOR still applies, so memory overlaps every 2M + * + * Common subset (assuming 2. initializes as much memory as possible) is + * 0xF8200000-0xF8400000. 2M should be more than enough for pre-RAM code, + * but it isn't enough to load ramstage. We could implement postcar stage, + * but KISS: initialize L3 from _ebootblock to 0xF8980000: up to 9.5M into + * cache, leaving bottom 2M (0xF8000000-0xF8200000) either uninitialized + * (when started from SEEPROM) or just unused for anything but bootblock + * (loaded by HBBL). Last 0.5M of L3 cache is left for interrupt vectors + * normally located at address 0. + * + * Set HRMOR to 0 before jumping to C code in bootblock and forget it even + * exists. + * + * For QEMU s/0xF8/0x08/ in above description but code remains the same. + * L3 initialization is unnecessary in this case but won't break anything. + * + * TODO: there is a structure with SBE->HBBL data at 0 in 2nd option. It + * holds some useful data like XSCOM BAR and LPC BAR. If, for any reason, + * these addresses are different than default, they should be used instead + * of predefined values. + */ + +#if !CONFIG(BOOTBLOCK_IN_SEEPROM) + BOOTBLOCK( 0xF8000000, 32K) +#else + BOOTBLOCK( 0xF8203000, 20K) +#endif + + STACK( 0xF8208000, 32K) + PRERAM_CBMEM_CONSOLE(0xF8210000, 128K) + FMAP_CACHE( 0xF8230000, 4K) + CBFS_MCACHE( 0xF8231000, 8K) + TIMESTAMP( 0xF8233000, 4K) + + ROMSTAGE( 0xF8240000, 256K) + + /* + * bootblock_crt0.S assumes this is the last part of L3, leaving for + * interrupt vectors at least 0.5M because of cache associativity. If + * more CBFS_CACHE is needed, split this into pre-/postram caches. + */ + CBFS_CACHE( 0xF8280000, 7M) + + RAMSTAGE( 0xF9000000, 2M) } From 4af3227314029e7da32ebe42bb72858e34623971 Mon Sep 17 00:00:00 2001 From: Krystian Hebel Date: Fri, 9 Oct 2020 19:13:03 +0200 Subject: [PATCH 021/213] soc/power9/rom_media.c: find CBFS in PNOR Tested on QEMU with ECC. Use mmap_helper to handle loading of compressed ramstage. Bootblock fits in SEEPROM with both console and LZ4 romstage compression, but not with verbose CBFS debug messages. Signed-off-by: Krystian Hebel Change-Id: I91c72c52849eb1e3fafe43390351537d04382e46 --- src/arch/ppc64/include/arch/io.h | 2 + src/soc/ibm/power9/Makefile.inc | 3 + src/soc/ibm/power9/rom_media.c | 451 +++++++++++++++++++++++++++++++ 3 files changed, 456 insertions(+) create mode 100644 src/soc/ibm/power9/rom_media.c diff --git a/src/arch/ppc64/include/arch/io.h b/src/arch/ppc64/include/arch/io.h index cfaae33f600..bc122f12018 100644 --- a/src/arch/ppc64/include/arch/io.h +++ b/src/arch/ppc64/include/arch/io.h @@ -8,7 +8,9 @@ /* Set MSB to 1 to ignore HRMOR */ #define MMIO_GROUP0_CHIP0_LPC_BASE_ADDR 0x8006030000000000 #define LPCHC_IO_SPACE 0xD0010000 +#define LPCHC_FW_SPACE 0xF0000000 #define FLASH_IO_SPACE 0xFC000000 +#define FW_SPACE_SIZE 0x10000000 #define LPC_BASE_ADDR (MMIO_GROUP0_CHIP0_LPC_BASE_ADDR + LPCHC_IO_SPACE) #define FLASH_BASE_ADDR (MMIO_GROUP0_CHIP0_LPC_BASE_ADDR + FLASH_IO_SPACE) #define MMIO_GROUP0_CHIP0_SCOM_BASE_ADDR 0x800603FC00000000 diff --git a/src/soc/ibm/power9/Makefile.inc b/src/soc/ibm/power9/Makefile.inc index 0e12bc76ff0..0145762960c 100644 --- a/src/soc/ibm/power9/Makefile.inc +++ b/src/soc/ibm/power9/Makefile.inc @@ -3,8 +3,11 @@ ifeq ($(CONFIG_CPU_IBM_POWER9),y) bootblock-y += bootblock.c +bootblock-y += rom_media.c +romstage-y += rom_media.c romstage-y += romstage.c ramstage-y += chip.c +ramstage-y += rom_media.c ramstage-y += timer.c endif diff --git a/src/soc/ibm/power9/rom_media.c b/src/soc/ibm/power9/rom_media.c new file mode 100644 index 00000000000..dabc3a0a20a --- /dev/null +++ b/src/soc/ibm/power9/rom_media.c @@ -0,0 +1,451 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include +#include +#include +#include +#include +#include "../../../../3rdparty/ffs/ffs/ffs.h" + +#define LPC_FLASH_MIN (MMIO_GROUP0_CHIP0_LPC_BASE_ADDR + LPCHC_FW_SPACE) +#define LPC_FLASH_TOP (LPC_FLASH_MIN + FW_SPACE_SIZE) + +#define CBFS_PARTITION_NAME "HBI" + +/* ffs_entry is not complete in included ffs.h, it lacks user data layout. + * See https://github.com/open-power/skiboot/blob/master/libflash/ffs.h */ + +/* Data integrity flags */ +#define FFS_ENRY_INTEG_ECC 0x8000 + +/* Version Checking : 1 byte */ +#define FFS_VERS_SHA512 0x80 + +enum ecc_status { + CLEAN=0, //< No ECC Error was detected. + CORRECTED=1, //< ECC error detected and corrected. + UNCORRECTABLE=2 //< ECC error detected and uncorrectable. +}; +typedef enum ecc_status ecc_status_t; + +enum ecc_bitfields { + GD = 0xff, //< Good, ECC matches. + UE = 0xfe, //< Uncorrectable. + E0 = 71, //< Error in ECC bit 0 + E1 = 70, //< Error in ECC bit 1 + E2 = 69, //< Error in ECC bit 2 + E3 = 68, //< Error in ECC bit 3 + E4 = 67, //< Error in ECC bit 4 + E5 = 66, //< Error in ECC bit 5 + E6 = 65, //< Error in ECC bit 6 + E7 = 64 //< Error in ECC bit 7 +}; + +/* +static uint64_t ecc_matrix[] = { + //0000000000000000111010000100001000111100000011111001100111111111 + 0x0000e8423c0f99ff, + //0000000011101000010000100011110000001111100110011111111100000000 + 0x00e8423c0f99ff00, + //1110100001000010001111000000111110011001111111110000000000000000 + 0xe8423c0f99ff0000, + //0100001000111100000011111001100111111111000000000000000011101000 + 0x423c0f99ff0000e8, + //0011110000001111100110011111111100000000000000001110100001000010 + 0x3c0f99ff0000e842, + //0000111110011001111111110000000000000000111010000100001000111100 + 0x0f99ff0000e8423c, + //1001100111111111000000000000000011101000010000100011110000001111 + 0x99ff0000e8423c0f, + //1111111100000000000000001110100001000010001111000000111110011001 + 0xff0000e8423c0f99 +}; +*/ + +/* + * Compressed version of table above, saves 48 bytes. Rotating value in register + * results in exactly the same size as full table, due to cost of loading values + * into registers. + */ +static uint8_t ecc_matrix[] = { + 0x00, 0x00, 0xe8, 0x42, 0x3c, 0x0f, 0x99, 0xff, + 0x00, 0x00, 0xe8, 0x42, 0x3c, 0x0f, 0x99 +}; + +static uint8_t syndrome_matrix[] = { + GD, E7, E6, UE, E5, UE, UE, 47, E4, UE, UE, 37, UE, 35, 39, UE, + E3, UE, UE, 48, UE, 30, 29, UE, UE, 57, 27, UE, 31, UE, UE, UE, + E2, UE, UE, 17, UE, 18, 40, UE, UE, 58, 22, UE, 21, UE, UE, UE, + UE, 16, 49, UE, 19, UE, UE, UE, 23, UE, UE, UE, UE, 20, UE, UE, + E1, UE, UE, 51, UE, 46, 9, UE, UE, 34, 10, UE, 32, UE, UE, 36, + UE, 62, 50, UE, 14, UE, UE, UE, 13, UE, UE, UE, UE, UE, UE, UE, + UE, 61, 8, UE, 41, UE, UE, UE, 11, UE, UE, UE, UE, UE, UE, UE, + 15, UE, UE, UE, UE, UE, UE, UE, UE, UE, 12, UE, UE, UE, UE, UE, + E0, UE, UE, 55, UE, 45, 43, UE, UE, 56, 38, UE, 1, UE, UE, UE, + UE, 25, 26, UE, 2, UE, UE, UE, 24, UE, UE, UE, UE, UE, 28, UE, + UE, 59, 54, UE, 42, UE, UE, 44, 6, UE, UE, UE, UE, UE, UE, UE, + 5, UE, UE, UE, UE, UE, UE, UE, UE, UE, UE, UE, UE, UE, UE, UE, + UE, 63, 53, UE, 0, UE, UE, UE, 33, UE, UE, UE, UE, UE, UE, UE, + 3, UE, UE, 52, UE, UE, UE, UE, UE, UE, UE, UE, UE, UE, UE, UE, + 7, UE, UE, UE, UE, UE, UE, UE, UE, 60, UE, UE, UE, UE, UE, UE, + UE, UE, UE, UE, 4, UE, UE, UE, UE, UE, UE, UE, UE, UE, UE, UE, +}; + +static inline uint32_t rd32(void *addr) +{ + uint64_t ret; + + /* Cache-inhibited load word */ + asm volatile("lwzcix %0, 0, %1" + : "=r" (ret) + : "r" (addr) + : ); + + return ret; +} + +static uint64_t rd64_unaligned(void *addr, int first_read) +{ + static uint64_t tmp1; /* static is used to reduce number of PNOR reads */ + uint64_t tmp2; + uint64_t ret; + uint64_t addr_aligned = ALIGN_DOWN((uint64_t)addr, 8); + unsigned shift = 8 * ((uint64_t) addr - addr_aligned); + + if (shift == 0 /* Previous tmp2 ended with ECC byte */ + || first_read) { /* or it is the first invocation from remove_ecc */ + asm volatile("ldcix %0, 0, %1" + : "=r" (tmp1) + : "r" (addr_aligned) + : ); + } + + asm volatile("ldcix %0, 0, %1" + : "=r" (tmp2) + : "r" (addr_aligned+8) + : ); + + ret = (tmp1 << shift) | (tmp2 >> (64 - shift)); + tmp1 = tmp2; + + return ret; +} + +/* + * memcpy from cache-inhibited source + * + * Assume src is 8B-aligned and does not overlap with dest. Copies ALIGN(n,8) + * bytes, make sure dest is big enough. + */ +static inline void memcpy_ci_src(void *dest, const void *src, size_t n) +{ + int i; + uint64_t tmp; + for (i = 0; i < n; i+=8) { + asm volatile("ldcix %0, %1, %2" + : "=r" (tmp) + : "b"(src), "r" (i)); + asm volatile("stdx %0, %1, %2" + :: "r" (tmp), "b"(dest), "r" (i) + : "memory"); + } +} + +static uint8_t generate_ecc(uint64_t i_data) +{ + uint8_t result = 0; + + for (int i = 0; i < 8; i++) { + result |= __builtin_parityll((*(uint64_t *)&ecc_matrix[i]) & i_data) << i; + } + return result; +} + +static uint8_t verify_ecc(uint64_t i_data, uint8_t i_ecc) +{ + return syndrome_matrix[generate_ecc(i_data) ^ i_ecc ]; +} + +static uint8_t correct_ecc(uint64_t *io_data, uint8_t *io_ecc) +{ + uint8_t bad_bit = verify_ecc(*io_data, *io_ecc); + + if ((bad_bit != GD) && (bad_bit != UE)) { /* Good is done, UE is hopeless */ + /* Determine if the ECC or data part is bad, do bit flip. */ + if (bad_bit >= E7) { + *io_ecc ^= (1 << (bad_bit - E7)); + } else { + *io_data ^=(1ull << (63 - bad_bit)); + } + } + return bad_bit; +} + +static ecc_status_t remove_ecc(uint8_t* io_src, size_t i_srcSz, + uint8_t* o_dst, size_t i_dstSz) +{ + ecc_status_t rc = CLEAN; + int first_read = 1; + + for(size_t i = 0, o = 0; i < i_srcSz; + i += sizeof(uint64_t) + sizeof(uint8_t), o += sizeof(uint64_t)) { + /* + * Read data and ECC parts. Reads from cache-inhibited storage always + * have to be aligned! + */ + uint64_t data = rd64_unaligned(&io_src[i], first_read); + first_read = 0; + + uint8_t ecc = io_src[i + sizeof(uint64_t)]; + + /* Calculate failing bit and fix data */ + uint8_t bad_bit = correct_ecc(&data, &ecc); + + /* Perform correction and status update */ + if (bad_bit == UE) + { + rc = UNCORRECTABLE; + } + /* Unused, our source is not writable */ + /* + else if (bad_bit != GD) + { + if (rc != UNCORRECTABLE) + { + rc = CORRECTED; + } + + *(uint64_t*)(&io_src[i]) = data; + io_src[i + sizeof(uint64_t)] = ecc; + } + */ + + /* Copy fixed data to destination buffer */ + *(uint64_t*)(&o_dst[o]) = data; + } + return rc; +} + +static char *pnor_base; + +/* + * PNOR has to be accessed with Cache Inhibited forms of instructions, and they + * require that the address is aligned, so we can just memcpy the data. + */ +static ssize_t no_ecc_readat(const struct region_device *rd, void *b, + size_t offset, size_t size) +{ + uint8_t tmp[8]; + offset -= rd->region.offset; + size_t off_a = ALIGN_DOWN(offset, 8); + size_t size_left = size; + char *part_base = pnor_base + rd->region.offset; + + /* If offset is not 8B-aligned */ + if (offset & 0x7) { + int i; + memcpy_ci_src(tmp, &part_base[off_a], 8); + for (i = 8 - (offset & 7); i < 8; i++) { + *((uint8_t *)(b++)) = tmp[i]; + if (!--size_left) + return size; + } + off_a += 8; + } + + /* Align down size_left to 8B */ + memcpy_ci_src(b, &part_base[off_a], ALIGN_DOWN(size_left, 8)); + + /* Copy the rest of requested unaligned data, if any */ + if (size_left & 7) { + off_a += ALIGN_DOWN(size_left, 8); + b += ALIGN_DOWN(size_left, 8); + int i; + memcpy_ci_src(tmp, &part_base[off_a], 8); + for (i = 0; i < (size_left & 7); i++) { + *((uint8_t *)(b++)) = tmp[i]; + } + } + + return size; +} + +static ssize_t ecc_readat(const struct region_device *rd, void *b, + size_t offset, size_t size) +{ + uint8_t tmp[8]; + offset -= rd->region.offset; + size_t off_a = ALIGN_DOWN(offset, 8); + size_t size_left = size; + char *part_base = pnor_base + rd->region.offset; + + /* If offset is not 8B-aligned */ + if (offset & 0x7) { + int i; + remove_ecc((uint8_t *) &part_base[(off_a * 9)/8], 9, tmp, 8); + for (i = 8 - (offset & 7); i < 8; i++) { + *((uint8_t *)(b++)) = tmp[i]; + if (!--size_left) + return size; + } + off_a += 8; + } + + /* Align down size_left to 8B */ + remove_ecc((uint8_t *) &part_base[(off_a * 9)/8], + (ALIGN_DOWN(size_left, 8) * 9) / 8, + b, + ALIGN_DOWN(size_left, 8)); + + /* Copy the rest of requested unaligned data, if any */ + if (size_left & 7) { + off_a += ALIGN_DOWN(size_left, 8); + b += ALIGN_DOWN(size_left, 8); + int i; + remove_ecc((uint8_t *) &part_base[(off_a * 9)/8], 9, tmp, 8); + for (i = 0; i < (size_left & 7); i++) { + *((uint8_t *)(b++)) = tmp[i]; + } + } + + return size; +} + +struct region_device_ops no_ecc_rdev_ops = { + .mmap = mmap_helper_rdev_mmap, + .munmap = mmap_helper_rdev_munmap, + .readat = no_ecc_readat, +}; + +struct region_device_ops ecc_rdev_ops = { + .mmap = mmap_helper_rdev_mmap, + .munmap = mmap_helper_rdev_munmap, + .readat = ecc_readat, +}; + +static void mount_part_from_pnor(const char *part_name, + struct mmap_helper_region_device *mdev) +{ + size_t base, size; + unsigned int i, block_size, entry_count = 0; + struct ffs_hdr *hdr_pnor = (struct ffs_hdr *)LPC_FLASH_TOP; + + /* This loop could be skipped if we may assume that PNOR is always 64M */ + while (hdr_pnor > (struct ffs_hdr *)LPC_FLASH_MIN) { + uint32_t csum = 0; + /* Size is aligned up to 8 because of how memcpy_ci_src works */ + uint8_t buffer[ALIGN(FFS_HDR_SIZE, 8)]; + struct ffs_hdr *hdr = (struct ffs_hdr *)buffer; + + /* Assume block_size = 4K */ + hdr_pnor = (struct ffs_hdr *)(((char *)hdr_pnor) - 0x1000); + + if (FFS_MAGIC != rd32(&hdr_pnor->magic)) + continue; + + if (FFS_VERSION_1 != rd32(&hdr_pnor->version)) + continue; + + /* Copy the header so we won't have to rd32() for further accesses */ + memcpy_ci_src(buffer, hdr_pnor, FFS_HDR_SIZE); + csum = hdr->magic ^ hdr->version ^ hdr->size ^ hdr->entry_size ^ + hdr->entry_count ^ hdr->block_size ^ hdr->block_count ^ + hdr->resvd[0] ^ hdr->resvd[1] ^ hdr->resvd[2] ^ hdr->resvd[3] ^ + hdr->checksum; + if (csum != 0) continue; + + pnor_base = (char *) LPC_FLASH_TOP - hdr->block_size * hdr->block_count; + entry_count = hdr->entry_count; + block_size = hdr->block_size; + + /* Every byte counts when building for SEEPROM */ +#if !CONFIG(BOOTBLOCK_IN_SEEPROM) + printk(BIOS_DEBUG, "FFS header at %p\n", hdr_pnor); + printk(BIOS_SPEW, " size %x\n", hdr->size); + printk(BIOS_SPEW, " entry_size %x\n", hdr->entry_size); + printk(BIOS_SPEW, " entry_count %x\n", hdr->entry_count); + printk(BIOS_SPEW, " block_size %x\n", hdr->block_size); + printk(BIOS_SPEW, " block_count %x\n", hdr->block_count); + printk(BIOS_DEBUG, "PNOR base at %p\n", pnor_base); +#endif + + break; + } + + if (hdr_pnor <= (struct ffs_hdr *)LPC_FLASH_MIN) + die("FFS header not found!\n"); + + for (i = 0; i < entry_count; i++) { + uint32_t *val, csum = 0; + int j; + /* Size is aligned up to 8 because of how memcpy_ci_src works */ + uint8_t buffer[ALIGN(FFS_ENTRY_SIZE, 8)]; + struct ffs_entry *e = (struct ffs_entry *)buffer; + + /* Copy the entry so we won't have to rd32() for further accesses */ + memcpy_ci_src(buffer, &hdr_pnor->entries[i], FFS_ENTRY_SIZE); + + /* Every byte counts when building for SEEPROM */ +#if !CONFIG(BOOTBLOCK_IN_SEEPROM) + printk(BIOS_SPEW, "%s: base %x, size %x (%x)\n\t type %x, flags %x\n", + e->name, e->base, e->size, e->actual, e->type, e->flags); +#endif + + if (strcmp(e->name, part_name) != 0) + continue; + + val = (uint32_t *) e; + for (j = 0; j < (FFS_ENTRY_SIZE / sizeof(uint32_t)); j++) + csum ^= val[j]; + + if (csum != 0) + continue; + + base = block_size * e->base; + /* This is size of the partition, it does not include header or ECC */ + size = e->actual; + + mdev->rdev.ops = &no_ecc_rdev_ops; + + if (e->user.data[0] & FFS_ENRY_INTEG_ECC) { + printk(BIOS_DEBUG, "%s partition has ECC\n", part_name); + mdev->rdev.ops = &ecc_rdev_ops; + size = size / 9 * 8; + } + + if ((e->user.data[1] >> 24) & FFS_VERS_SHA512) { + /* Skip PNOR partition header */ + base += 0x1000; + + /* Possibly skip ECC of the header */ + if (e->user.data[0] & FFS_ENRY_INTEG_ECC) + base += 0x200; + } + + mdev->rdev.region.offset = base; + mdev->rdev.region.size = size; + + break; + } +} + +static struct mmap_helper_region_device boot_mdev = MMAP_HELPER_DEV_INIT( + &no_ecc_rdev_ops, 0, CONFIG_ROM_SIZE, &cbfs_cache); + +void boot_device_init(void) +{ + static int init_done; + if (init_done) + return; + + mount_part_from_pnor(CBFS_PARTITION_NAME, &boot_mdev); + + init_done = 1; +} + +const struct region_device *boot_device_ro(void) +{ + return &boot_mdev.rdev; +} From 47386865da47175dac4696b989ce8005809c1ce6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20=C5=BBygowski?= Date: Mon, 12 Oct 2020 17:06:24 +0200 Subject: [PATCH 022/213] configs/config.raptor-cs-talos-2: add config for op-build Signed-off-by: Igor Bagnucki Change-Id: I49ad6094acaccf731ab6d6b45ac103d485a3179c --- configs/config.raptor-cs-talos-2 | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 configs/config.raptor-cs-talos-2 diff --git a/configs/config.raptor-cs-talos-2 b/configs/config.raptor-cs-talos-2 new file mode 100644 index 00000000000..800300f3869 --- /dev/null +++ b/configs/config.raptor-cs-talos-2 @@ -0,0 +1,2 @@ +CONFIG_VENDOR_RAPTOR_CS=y +CONFIG_PAYLOAD_SKIBOOT=y From 204bec78cd6e27f0c4030d549bed3dabcb8b56a1 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Sun, 11 Apr 2021 20:55:25 +0300 Subject: [PATCH 023/213] soc/power9/rom_media.c: add RO region device for MEMD partition Signed-off-by: Sergii Dmytruk Change-Id: I2c7f43adc04937cb1d041c24073ecb70298cae82 --- src/include/cpu/power/memd.h | 14 ++++++++++++++ src/soc/ibm/power9/rom_media.c | 29 +++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) create mode 100644 src/include/cpu/power/memd.h diff --git a/src/include/cpu/power/memd.h b/src/include/cpu/power/memd.h new file mode 100644 index 00000000000..9c26b4bc5f6 --- /dev/null +++ b/src/include/cpu/power/memd.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef CPU_PPC64_MEMD_H +#define CPU_PPC64_MEMD_H + +struct region_device; + +void memd_device_init(void); + +void memd_device_unmount(void); + +const struct region_device *memd_device_ro(void); + +#endif /* CPU_PPC64_MEMD_H */ diff --git a/src/soc/ibm/power9/rom_media.c b/src/soc/ibm/power9/rom_media.c index dabc3a0a20a..41c110f5deb 100644 --- a/src/soc/ibm/power9/rom_media.c +++ b/src/soc/ibm/power9/rom_media.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -14,6 +15,8 @@ #define CBFS_PARTITION_NAME "HBI" +#define MEMD_PARTITION_NAME "MEMD" + /* ffs_entry is not complete in included ffs.h, it lacks user data layout. * See https://github.com/open-power/skiboot/blob/master/libflash/ffs.h */ @@ -313,6 +316,8 @@ static ssize_t ecc_readat(const struct region_device *rd, void *b, return size; } +struct region_device_ops no_rdev_ops = {}; + struct region_device_ops no_ecc_rdev_ops = { .mmap = mmap_helper_rdev_mmap, .munmap = mmap_helper_rdev_munmap, @@ -431,6 +436,30 @@ static void mount_part_from_pnor(const char *part_name, } } +static struct mmap_helper_region_device memd_mdev = MMAP_HELPER_DEV_INIT( + &no_ecc_rdev_ops, 0, CONFIG_ROM_SIZE, &cbfs_cache); + +void memd_device_init(void) +{ + static int init_done; + if (init_done) + return; + + mount_part_from_pnor(MEMD_PARTITION_NAME, &memd_mdev); + + init_done = 1; +} + +void memd_device_unmount(void) +{ + memd_mdev.rdev.ops = &no_rdev_ops; +} + +const struct region_device *memd_device_ro(void) +{ + return &memd_mdev.rdev; +} + static struct mmap_helper_region_device boot_mdev = MMAP_HELPER_DEV_INIT( &no_ecc_rdev_ops, 0, CONFIG_ROM_SIZE, &cbfs_cache); From e2e4a4357a6801bcee1624429c41bd7f92b1caf7 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Sun, 11 Apr 2021 20:56:56 +0300 Subject: [PATCH 024/213] soc/power9/: parse VPD data for memory attributes in romstage romstage needs to know values of attributes that are required for configuring RAM. These values are stored inside MEMD partition of PNOR in binary form that's not readily usable (need to do multiple look ups followed by data extraction). Signed-off-by: Sergii Dmytruk Change-Id: I6b744cd66384074a12ff764e9e3a990c3c2e8426 --- src/include/cpu/power/vpd.h | 12 + src/include/cpu/power/vpd_data.h | 149 ++++++++ src/soc/ibm/power9/Makefile.inc | 1 + src/soc/ibm/power9/romstage.c | 4 + src/soc/ibm/power9/vpd.c | 628 +++++++++++++++++++++++++++++++ 5 files changed, 794 insertions(+) create mode 100644 src/include/cpu/power/vpd.h create mode 100644 src/include/cpu/power/vpd_data.h create mode 100644 src/soc/ibm/power9/vpd.c diff --git a/src/include/cpu/power/vpd.h b/src/include/cpu/power/vpd.h new file mode 100644 index 00000000000..d25e2c62dd1 --- /dev/null +++ b/src/include/cpu/power/vpd.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef CPU_PPC64_VPD_H +#define CPU_PPC64_VPD_H + +#define VPD_RECORD_NAME_LEN 4 +#define VPD_RECORD_SIZE_LEN 2 +#define VPD_KWD_NAME_LEN 2 + +void vpd_pnor_main(void); + +#endif /* CPU_PPC64_VPD_H */ diff --git a/src/include/cpu/power/vpd_data.h b/src/include/cpu/power/vpd_data.h new file mode 100644 index 00000000000..20fd8f3bb47 --- /dev/null +++ b/src/include/cpu/power/vpd_data.h @@ -0,0 +1,149 @@ +#ifndef CPU_PPC64_VPD_DATA_H +#define CPU_PPC64_VPD_DATA_H + +/* Memory rotator data */ + +/* FIXME: these can be updated by MVPD in istep 7.5. Values below (from MEMD) + * are different than in documentation. */ +extern uint8_t ATTR_MSS_VPD_MR_TSYS_ADR[4]; +extern uint8_t ATTR_MSS_VPD_MR_TSYS_DATA[4]; + +/* This data is the same for all configurations */ +extern uint8_t ATTR_MSS_VPD_MR_DPHY_GPO; +extern uint8_t ATTR_MSS_VPD_MR_DPHY_RLO; +extern uint8_t ATTR_MSS_VPD_MR_DPHY_WLO; +extern uint8_t ATTR_MSS_VPD_MR_MC_2N_MODE_AUTOSET; + +/* + * 43 tables for 43 signals. These probably are platform specific so in the + * final version we should read this from VPD partition. Hardcoding it will make + * one less possible fault point. + * + * Also, VPD layout may change. Right npw Talos uses first version of layout, + * but there is a newer version with one additional field __in the middle__ of + * the structure. + * + * Order: + * - J0 - PROC 0 MCS 0, 1 DIMM, 1866 MT/s + * - J1 - PROC 0 MCS 1, 1 DIMM, 1866 MT/s + * - J2 - PROC 1 MCS 0, 1 DIMM, 1866 MT/s + * - J3 - PROC 1 MCS 1, 1 DIMM, 1866 MT/s + * - J4 - PROC 0 MCS 0, 2 DIMMs, 1866 MT/s + * - J5 + * - J6 + * - J7 - PROC 1 MCS 1, 2 DIMMs, 1866 MT/s + * - J8 - PROC 0 MCS 0, 1 DIMM, 2133 MT/s + * - J9 + * - JA + * - JB - PROC 1 MCS 1, 1 DIMM, 2133 MT/s + * - JC - PROC 0 MCS 0, 2 DIMMs, 2133 MT/s + * - JD + * - JE + * - JF - PROC 1 MCS 1, 2 DIMMs, 2133 MT/s + * - JG - PROC 0 MCS 0, 1 DIMM, 2400 MT/s + * - JH + * - JI + * - JJ - PROC 1 MCS 1, 1 DIMM, 2400 MT/s + * - JK - PROC 0 MCS 0, 2 DIMMs, 2400 MT/s + * - JL + * - JM + * - JN - PROC 1 MCS 1, 2 DIMMs, 2400 MT/s + * - JO - PROC 0 MCS 0, 1 DIMM, 2666 MT/s + * - JP + * - JQ + * - JR - PROC 1 MCS 1, 1 DIMM, 2666 MT/s + * + * 2 DIMMs, 2666 MT/s is not supported (this is ensured by prepare_dimm_data()). + */ +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_CSN0[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_ADDR_WEN_A14[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_ODT1[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_C0[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_BA1[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A10[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_ODT1[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_BA0[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A00[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_ODT0[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_ODT0[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_ADDR_CASN_A15[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A13[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_CSN1[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_D0_CLKN[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_D0_CLKP[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A17[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_C1[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_D1_CLKN[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_D1_CLKP[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_C2[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_CSN1[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A02[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_PAR[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_CSN0[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_ADDR_RASN_A16[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A08[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A05[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A03[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A01[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A04[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A07[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A09[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A06[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_CKE1[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A12[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_ACTN[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A11[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_BG0[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_CKE0[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_CKE1[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_BG1[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_CKE0[28][MCA_PER_MCS]; + +/* End of rotator data */ + +/* Memory terminator data */ + +/* + * VPD has per rank settings, but both ranks (if present) are the same. Order: + * - 1R in DIMM0 and no DIMM1 + * - 1R in both DIMMs + * - 2R in DIMM0 and no DIMM1 + * - 2R in both DIMMs + */ +extern uint32_t ATTR_MSS_VPD_MT_VREF_MC_RD[4]; +extern uint8_t ATTR_MSS_VPD_MT_VREF_DRAM_WR[4]; +extern uint8_t ATTR_MSS_VPD_MT_DRAM_RTT_PARK[4]; +extern uint8_t ATTR_MSS_VPD_MT_DRAM_RTT_WR[4]; +extern uint8_t ATTR_MSS_VPD_MT_DRAM_RTT_NOM[4]; + +/* + * Warning: this is not a 1:1 copy from VPD. + * + * VPD uses uint8_t [2][2][4] table, indexed as [MCA][DIMM][RANK]. It tries to + * be generic, but for RDIMMs only 2 ranks are supported. This format also + * allows for different settings across MCAs, but in Talos they are identical. + * + * Tables below are uint8_t [4][2][2], indexed as [rank config.][DIMM][RANK]. + * + * There are 4 rank configurations, see comments in ATTR_MSS_VPD_MT_VREF_MC_RD. + */ +extern uint8_t ATTR_MSS_VPD_MT_ODT_RD[4][2][2]; +extern uint8_t ATTR_MSS_VPD_MT_ODT_WR[4][2][2]; + +/* This data is the same for all configurations */ +extern uint8_t ATTR_MSS_VPD_MT_DRAM_DRV_IMP_DQ_DQS; +extern uint32_t ATTR_MSS_VPD_MT_MC_DQ_ACBOOST_RD_UP; +extern uint32_t ATTR_MSS_VPD_MT_MC_DQ_ACBOOST_WR_DOWN; +extern uint32_t ATTR_MSS_VPD_MT_MC_DQ_ACBOOST_WR_UP; +extern uint64_t ATTR_MSS_VPD_MT_MC_DQ_CTLE_CAP; +extern uint64_t ATTR_MSS_VPD_MT_MC_DQ_CTLE_RES; +extern uint8_t ATTR_MSS_VPD_MT_MC_DRV_IMP_CLK; +extern uint8_t ATTR_MSS_VPD_MT_MC_DRV_IMP_CMD_ADDR; +extern uint8_t ATTR_MSS_VPD_MT_MC_DRV_IMP_CNTL; +extern uint8_t ATTR_MSS_VPD_MT_MC_DRV_IMP_CSCID; +extern uint8_t ATTR_MSS_VPD_MT_MC_DRV_IMP_DQ_DQS; +extern uint8_t ATTR_MSS_VPD_MT_MC_RCV_IMP_DQ_DQS; +extern uint8_t ATTR_MSS_VPD_MT_PREAMBLE; +extern uint16_t ATTR_MSS_VPD_MT_WINDAGE_RD_CTR; + +#endif /* CPU_PPC64_VPD_DATA_H */ diff --git a/src/soc/ibm/power9/Makefile.inc b/src/soc/ibm/power9/Makefile.inc index 0145762960c..71f330aef28 100644 --- a/src/soc/ibm/power9/Makefile.inc +++ b/src/soc/ibm/power9/Makefile.inc @@ -6,6 +6,7 @@ bootblock-y += bootblock.c bootblock-y += rom_media.c romstage-y += rom_media.c romstage-y += romstage.c +romstage-y += vpd.c ramstage-y += chip.c ramstage-y += rom_media.c ramstage-y += timer.c diff --git a/src/soc/ibm/power9/romstage.c b/src/soc/ibm/power9/romstage.c index c4123152554..5738002cb83 100644 --- a/src/soc/ibm/power9/romstage.c +++ b/src/soc/ibm/power9/romstage.c @@ -1,10 +1,14 @@ /* SPDX-License-Identifier: GPL-2.0-only */ #include +#include #include void main(void) { console_init(); + + vpd_pnor_main(); + run_ramstage(); } diff --git a/src/soc/ibm/power9/vpd.c b/src/soc/ibm/power9/vpd.c new file mode 100644 index 00000000000..ecd687cbe59 --- /dev/null +++ b/src/soc/ibm/power9/vpd.c @@ -0,0 +1,628 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "../../../../3rdparty/ffs/ffs/ffs.h" + +/* Properly rounded size of MEMD header */ +#define MEMD_HDR_SIZE ALIGN(sizeof(struct memd_hdr), 16) +/* Divisor used for section size in MEMD header */ +#define MEMD_SECTION_ROUNDING_DIVISOR 1000 + +/* Supported mapping layout version */ +#define VPD_MAPPING_VERSION 1 +/* Size of entries in MR and MT mappings */ +#define VPD_MAPPING_ENTRY_SIZE 6 + +/* + * Structure of nesting: + * - MEMD + * - VPD blob + * - VPD keyword (VPD name, keyword mapping, attributes, something else) + * + * Either part of VDP record or of MEMD header (depending on the source): + * - 11 bytes -- ECC (unimplemented, should be ignored) + * - 0x84 byte -- resource type (this byte is missing from PNOR image) (opt) + * + * VPD Record (this is a part of binary VPD which is stored in .rvpd-files): + * - 2 bytes -- size of the record's data in LE (>= 40) + * - RT keyword -- always the first record with 4 bytes of data + * - other keywords (as many as data size allows) + * - PF keyword -- padding, always present + * - 0x78 byte -- closing resource type + * + * Keyword: + * - 2 bytes -- keyword name + * - 1 or 2 bytes -- keyword's data size (little endian) + * (2 bytes if first char of keyword name is #) + * - N bytes -- N == data size + * + * Minimal record size is 40 bytes. If it exceeds that, padding aligned on word + * boundary (where a word is 4 bytes long). + * + * Format of MR, MT, Q0 and CK keywords that provide mapping: + * - Header: + * - 1 byte -- version + * - 1 byte -- entry count + * - 1 byte -- entry size in bytes (only for Q0 and CK keywords) + * - 1 byte -- reserved + * - Entry (0xff value in first 5 fields means "matches everything"): + * - 1 byte -- mcs mask (high byte) + * - 1 byte -- mcs mask (low byte) + * - 1 byte -- rank mask (high byte) + * - 1 byte -- rank mask (low byte) + * - 1 byte -- frequency mask + * 0x80 - 1866 + * 0x40 - 2133 + * 0x20 - 2400 + * 0x10 - 2666 + * - 1 byte -- kw ([0-9A-Z]) + * 0x00 for row after last + * 0xff for unsupported configuration + * + * Glossary: + * - MR keyword -- the mapping + * - MR configuration -- one of J#, X#, etc. + * + * See the following sources in talos-hostboot: + * - src/import/chips/p9/procedures/hwp/accessors/p9_get_mem_vpd_keyword.C + * - src/import/chips/p9/procedures/hwp/accessors/p9_get_mem_vpd_keyword.H + * - src/import/chips/p9/procedures/hwp/memory/lib/dimm/eff_dimm.C + * - src/import/chips/p9/procedures/hwp/memory/lib/mss_vpd_decoder.H + * - src/usr/fapi2/test/getVpdTest.C + */ + +/* Size of this structure should be rounded to 16 bytes */ +struct memd_hdr { + char eyecatch[4]; // Magic number to determine validity "OKOK" + char header_version[4]; // Version of this header + char memd_version[4]; // Version of the MEMD payload + uint32_t section_size; // / 1000 + 1 + uint16_t section_count; // Number of MEMD instances + char reserved[8]; // Reserved bytes +} __attribute__((packed)); + +/* Combines pointer to VPD area with configuration information */ +struct vpd_info { + const uint8_t *data; // VPD area pointer + int mcs_i; // MCS position (spans CPUs) + int freq; // Frequency in MHz + int dimm0_rank; + int dimm1_rank; +}; + +/* Memory terminator data */ + +uint8_t ATTR_MSS_VPD_MT_DRAM_RTT_NOM[4]; +uint8_t ATTR_MSS_VPD_MT_DRAM_RTT_PARK[4]; +uint8_t ATTR_MSS_VPD_MT_DRAM_RTT_WR[4]; + +uint8_t ATTR_MSS_VPD_MT_ODT_RD[4][2][2]; +uint8_t ATTR_MSS_VPD_MT_ODT_WR[4][2][2]; + +uint8_t ATTR_MSS_VPD_MT_VREF_DRAM_WR[4]; +uint32_t ATTR_MSS_VPD_MT_VREF_MC_RD[4]; + +uint8_t ATTR_MSS_VPD_MT_DRAM_DRV_IMP_DQ_DQS; +uint32_t ATTR_MSS_VPD_MT_MC_DQ_ACBOOST_RD_UP; +uint32_t ATTR_MSS_VPD_MT_MC_DQ_ACBOOST_WR_DOWN; +uint32_t ATTR_MSS_VPD_MT_MC_DQ_ACBOOST_WR_UP; +uint64_t ATTR_MSS_VPD_MT_MC_DQ_CTLE_CAP; +uint64_t ATTR_MSS_VPD_MT_MC_DQ_CTLE_RES; +uint8_t ATTR_MSS_VPD_MT_MC_DRV_IMP_CLK; +uint8_t ATTR_MSS_VPD_MT_MC_DRV_IMP_CMD_ADDR; +uint8_t ATTR_MSS_VPD_MT_MC_DRV_IMP_CNTL; +uint8_t ATTR_MSS_VPD_MT_MC_DRV_IMP_CSCID; +uint8_t ATTR_MSS_VPD_MT_MC_DRV_IMP_DQ_DQS; +uint8_t ATTR_MSS_VPD_MT_MC_RCV_IMP_DQ_DQS; +uint8_t ATTR_MSS_VPD_MT_PREAMBLE; +uint16_t ATTR_MSS_VPD_MT_WINDAGE_RD_CTR; + +/* End of terminator data */ + +/* Memory rotator data */ + +uint8_t ATTR_MSS_VPD_MR_DPHY_GPO; +uint8_t ATTR_MSS_VPD_MR_DPHY_RLO; +uint8_t ATTR_MSS_VPD_MR_DPHY_WLO; +uint8_t ATTR_MSS_VPD_MR_MC_2N_MODE_AUTOSET; + +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A00[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A01[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A02[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A03[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A04[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A05[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A06[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A07[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A08[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A09[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A10[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A11[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A12[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A13[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A17[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_BA0[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_BA1[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_BG0[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_BG1[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_C0[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_C1[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_C2[28][2]; + +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_ACTN[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_ADDR_CASN_A15[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_ADDR_RASN_A16[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_ADDR_WEN_A14[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_PAR[28][2]; + +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_CKE0[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_CKE1[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_CSN0[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_CSN1[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_ODT0[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_ODT1[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_CKE0[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_CKE1[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_CSN0[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_CSN1[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_ODT0[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_ODT1[28][2]; + +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_D0_CLKN[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_D0_CLKP[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_D1_CLKN[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_D1_CLKP[28][2]; + +uint8_t ATTR_MSS_VPD_MR_TSYS_ADR[4]; +uint8_t ATTR_MSS_VPD_MR_TSYS_DATA[4]; + +/* End of rotator data */ + +/* Looks up an entry matching specified configuration in an MT or MR mapping. + Returns a character or '\0' on lookup failure. */ +static char mapping_lookup(const struct vpd_info *vpd, const uint8_t *mapping, size_t size) +{ + int i = 0; + int entry_count = 0; + int offset = 0; + uint16_t mcs_mask = 0; + uint16_t freq_mask = 0; + uint16_t rank_mask = 0; + + /* Mapping header size */ + if (size < 3) + die("Mapping is too small!\n"); + offset = 3; + + if (mapping[0] != VPD_MAPPING_VERSION) + die("Unsupported mapping version!\n"); + + /* 0x8000 is a mask for MCS #0 */ + assert(vpd->mcs_i >= 0 && vpd->mcs_i <= 15); + mcs_mask = 0x8000 >> vpd->mcs_i; + + /* (0, 0) -> 0x8000; (0, 1) -> 0x4000; ...; (1, 0) -> 0x0800; ... */ + assert(vpd->dimm0_rank >= 0 && vpd->dimm0_rank <= 3); + assert(vpd->dimm1_rank >= 0 && vpd->dimm1_rank <= 3); + rank_mask = 0x8000 >> (vpd->dimm0_rank*4 + vpd->dimm1_rank); + + switch (vpd->freq) { + case 1866: + freq_mask = 0x80; + break; + case 2133: + freq_mask = 0x40; + break; + case 2400: + freq_mask = 0x20; + break; + case 2666: + freq_mask = 0x10; + break; + default: + die("Unhandled frequency value: %d\n", vpd->freq); + break; + } + + entry_count = mapping[1]; + for (i = 0; i < entry_count; ++i, offset += VPD_MAPPING_ENTRY_SIZE) { + const uint16_t mcs_mask_value = + (mapping[offset + 0] << 8) | mapping[offset + 1]; + const uint16_t rank_mask_value = + (mapping[offset + 2] << 8) | mapping[offset + 3]; + + /* Data ended sooner than expected */ + if (mapping[offset + 5] == 0x00) + continue; + + if ((mcs_mask_value & mcs_mask) != mcs_mask) + continue; + if ((rank_mask_value & rank_mask) != rank_mask) + continue; + if ((mapping[offset + 4] & freq_mask) != freq_mask) + continue; + + return mapping[offset + 5]; + } + + return '\0'; +} + +/* Finds a keyword by its name. Retrieves its size too. Returns NULL on + * failure. */ +static const uint8_t *find_vpd_kwd(const struct vpd_info *vpd, const char *name, + size_t *size) +{ + const uint8_t *data = vpd->data; + + size_t offset = 0; + uint16_t record_size = 0; + + if (strlen(name) != VPD_KWD_NAME_LEN) + die("Keyword name has wrong length!\n"); + + memcpy(&record_size, &data[offset], sizeof(record_size)); + offset += VPD_RECORD_SIZE_LEN; + record_size = le16toh(record_size); + + /* Skip mandatory "RT" and one byte of record size (always 4) */ + offset += VPD_KWD_NAME_LEN + 1; + + if (memcmp(&data[offset], "MEMD", VPD_RECORD_NAME_LEN)) + die("Failed to find MEMD record!\n"); + offset += VPD_RECORD_NAME_LEN; + + while (offset < record_size) { + uint16_t kwd_size = 0; + bool match = false; + const int two_byte_size = (data[offset] == '#'); + + /* This is always the last keyword */ + if (!memcmp(&data[offset], "PF", VPD_KWD_NAME_LEN)) + break; + + match = !memcmp(&data[offset], name, VPD_KWD_NAME_LEN); + + offset += VPD_KWD_NAME_LEN; + + if (two_byte_size) { + memcpy(&kwd_size, &data[offset], sizeof(kwd_size)); + kwd_size = le16toh(kwd_size); + offset += 2; + } else { + kwd_size = data[offset]; + offset += 1; + } + + if (match) { + *size = kwd_size; + return &data[offset]; + } + + offset += kwd_size; + } + + return NULL; +} + +/* Looks up configuration in specified mapping and loads it or dies */ +static const uint8_t *find_vpd_conf(const struct vpd_info *vpd, const char *mapping_name, + size_t *size) +{ + const uint8_t *mapping = NULL; + const uint8_t *conf = NULL; + size_t kwd_size = 0; + + char conf_name[3] = {}; + + if (!strcmp(mapping_name, "MR")) + conf_name[0] = 'J'; + else if (!strcmp(mapping_name, "MT")) + conf_name[0] = 'X'; + else + die("Unsupported %s mapping type\n", mapping_name); + + mapping = find_vpd_kwd(vpd, mapping_name, &kwd_size); + if (!mapping) + die("VPD is missing %s keyword!\n", mapping_name); + + conf_name[1] = mapping_lookup(vpd, mapping, kwd_size); + if (!conf_name[1]) + die("Failed to find matching %s configuration!\n", mapping_name); + + conf = find_vpd_kwd(vpd, conf_name, &kwd_size); + if (!conf) + die("Failed to read %s configuration!\n", mapping_name); + + *size = kwd_size; + return conf; +} + +static void load_mt_attrs(const uint8_t *mt_conf, size_t size, int vpd_idx) +{ + uint8_t version_layout; + uint8_t version_data; + + if (size < 2) + die("MT configuration is way too small!\n"); + + version_layout = mt_conf[0]; + version_data = mt_conf[1]; + + if (version_layout > 1) + die("Unsupported layout of MT configuration!\n"); + + if (size < 218) + die("MT configuration is smaller than expected!\n"); + + ATTR_MSS_VPD_MT_DRAM_RTT_NOM[vpd_idx] = mt_conf[38]; + ATTR_MSS_VPD_MT_DRAM_RTT_PARK[vpd_idx] = mt_conf[54]; + ATTR_MSS_VPD_MT_DRAM_RTT_WR[vpd_idx] = mt_conf[70]; + + switch (version_layout) { + case 0: + memcpy(&ATTR_MSS_VPD_MT_ODT_RD[vpd_idx][0][0], &mt_conf[170], + 2); + memcpy(&ATTR_MSS_VPD_MT_ODT_RD[vpd_idx][1][0], &mt_conf[174], + 2); + memcpy(&ATTR_MSS_VPD_MT_ODT_WR[vpd_idx][0][0], &mt_conf[186], + 2); + memcpy(&ATTR_MSS_VPD_MT_ODT_WR[vpd_idx][1][0], &mt_conf[190], + 2); + ATTR_MSS_VPD_MT_VREF_DRAM_WR[vpd_idx] = mt_conf[204]; + memcpy(&ATTR_MSS_VPD_MT_VREF_MC_RD[vpd_idx], &mt_conf[206], 4); + + /* The following data is the same for all configurations */ + if (vpd_idx == 0) { + memcpy(&ATTR_MSS_VPD_MT_MC_DQ_ACBOOST_RD_UP, &mt_conf[86], 4); + memcpy(&ATTR_MSS_VPD_MT_MC_DQ_ACBOOST_WR_DOWN, &mt_conf[94], 4); + memcpy(&ATTR_MSS_VPD_MT_MC_DQ_ACBOOST_WR_UP, &mt_conf[102], 4); + + memcpy(&ATTR_MSS_VPD_MT_MC_DQ_CTLE_CAP, &mt_conf[110], 8); + memcpy(&ATTR_MSS_VPD_MT_MC_DQ_CTLE_RES, &mt_conf[126], 8); + + ATTR_MSS_VPD_MT_MC_DRV_IMP_CLK = mt_conf[142]; + ATTR_MSS_VPD_MT_MC_DRV_IMP_CMD_ADDR = mt_conf[144]; + ATTR_MSS_VPD_MT_MC_DRV_IMP_CNTL = mt_conf[146]; + ATTR_MSS_VPD_MT_MC_DRV_IMP_CSCID = mt_conf[148]; + + ATTR_MSS_VPD_MT_MC_DRV_IMP_DQ_DQS = mt_conf[150]; + ATTR_MSS_VPD_MT_MC_RCV_IMP_DQ_DQS = mt_conf[160]; + + memcpy(&ATTR_MSS_VPD_MT_PREAMBLE, &mt_conf[202], 2); + + memcpy(&ATTR_MSS_VPD_MT_WINDAGE_RD_CTR, &mt_conf[214], 2); + } + break; + case 1: + memcpy(&ATTR_MSS_VPD_MT_ODT_RD[vpd_idx][0][0], &mt_conf[172], 2); + memcpy(&ATTR_MSS_VPD_MT_ODT_RD[vpd_idx][1][0], &mt_conf[176], 2); + memcpy(&ATTR_MSS_VPD_MT_ODT_WR[vpd_idx][0][0], &mt_conf[188], 2); + memcpy(&ATTR_MSS_VPD_MT_ODT_WR[vpd_idx][1][0], &mt_conf[192], 2); + ATTR_MSS_VPD_MT_VREF_DRAM_WR[vpd_idx] = mt_conf[206]; + memcpy(&ATTR_MSS_VPD_MT_VREF_MC_RD[vpd_idx], &mt_conf[208], 4); + + /* The following data is the same for all configurations */ + if (vpd_idx == 0) { + memcpy(&ATTR_MSS_VPD_MT_MC_DQ_ACBOOST_RD_UP, &mt_conf[88], 4); + memcpy(&ATTR_MSS_VPD_MT_MC_DQ_ACBOOST_WR_DOWN, &mt_conf[96], 4); + memcpy(&ATTR_MSS_VPD_MT_MC_DQ_ACBOOST_WR_UP, &mt_conf[104], 4); + + memcpy(&ATTR_MSS_VPD_MT_MC_DQ_CTLE_CAP, &mt_conf[112], 8); + memcpy(&ATTR_MSS_VPD_MT_MC_DQ_CTLE_RES, &mt_conf[128], 8); + + ATTR_MSS_VPD_MT_MC_DRV_IMP_CLK = mt_conf[144]; + ATTR_MSS_VPD_MT_MC_DRV_IMP_CMD_ADDR = mt_conf[146]; + ATTR_MSS_VPD_MT_MC_DRV_IMP_CNTL = mt_conf[148]; + ATTR_MSS_VPD_MT_MC_DRV_IMP_CSCID = mt_conf[150]; + + ATTR_MSS_VPD_MT_MC_DRV_IMP_DQ_DQS = mt_conf[152]; + ATTR_MSS_VPD_MT_MC_RCV_IMP_DQ_DQS = mt_conf[162]; + + ATTR_MSS_VPD_MT_PREAMBLE = mt_conf[204]; + + memcpy(&ATTR_MSS_VPD_MT_WINDAGE_RD_CTR, &mt_conf[216], 2); + } + break; + } + + /* The following data is the same for all configurations */ + if (vpd_idx == 0) + ATTR_MSS_VPD_MT_DRAM_DRV_IMP_DQ_DQS = mt_conf[22]; +} + +static void load_mt(const uint8_t *vpd_data) +{ + int vpd_idx = 0; + + /* Assuming that data differs only per DIMM pairs */ + for (vpd_idx = 0; vpd_idx < 4; ++vpd_idx) { + const int dimm0_rank = 1 + vpd_idx / 2; + const int dimm1_rank = (vpd_idx % 2 ? dimm0_rank : 0); + + struct vpd_info vpd = { + .data = vpd_data, + .mcs_i = 0, + .freq = 1866, + .dimm0_rank = dimm0_rank, + .dimm1_rank = dimm1_rank, + }; + + const uint8_t *mt_conf = NULL; + size_t size = 0; + + mt_conf = find_vpd_conf(&vpd, "MT", &size); + if (!mt_conf) + die("Failed to read MT configuration!\n"); + + load_mt_attrs(mt_conf, size, vpd_idx); + } +} + +static void load_mr_attrs(const uint8_t *mr_conf, size_t size, int vpd_idx) +{ + uint8_t version_layout; + uint8_t version_data; + + if (size < 2) + die("MR configuration is way too small!\n"); + + version_layout = mr_conf[0]; + version_data = mr_conf[1]; + + if (version_layout != 0) + die("Unsupported layout of MR configuration!\n"); + + if (size < 101) + die("MR configuration is smaller than expected!\n"); + + /* The following data is the same for all configurations */ + if (vpd_idx == 0) { + ATTR_MSS_VPD_MR_DPHY_GPO = mr_conf[6]; + ATTR_MSS_VPD_MR_DPHY_RLO = mr_conf[8]; + ATTR_MSS_VPD_MR_DPHY_WLO = mr_conf[10]; + + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_D0_CLKN, &mr_conf[58], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_D0_CLKP, &mr_conf[56], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_D1_CLKN, &mr_conf[62], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_D1_CLKP, &mr_conf[60], 2); + + ATTR_MSS_VPD_MR_MC_2N_MODE_AUTOSET = mr_conf[98]; + } + + /* The following data changes per frequency */ + if (vpd_idx % 8 == 0) { + const int freq_i = vpd_idx / 8; + ATTR_MSS_VPD_MR_TSYS_ADR[freq_i] = mr_conf[99]; + ATTR_MSS_VPD_MR_TSYS_DATA[freq_i] = mr_conf[100]; + } + + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A00[vpd_idx], &mr_conf[12], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A01[vpd_idx], &mr_conf[14], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A02[vpd_idx], &mr_conf[16], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A03[vpd_idx], &mr_conf[18], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A04[vpd_idx], &mr_conf[20], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A05[vpd_idx], &mr_conf[22], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A06[vpd_idx], &mr_conf[24], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A07[vpd_idx], &mr_conf[26], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A08[vpd_idx], &mr_conf[28], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A09[vpd_idx], &mr_conf[30], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A10[vpd_idx], &mr_conf[32], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A11[vpd_idx], &mr_conf[34], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A12[vpd_idx], &mr_conf[36], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A13[vpd_idx], &mr_conf[38], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A17[vpd_idx], &mr_conf[40], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_BA0[vpd_idx], &mr_conf[42], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_BA1[vpd_idx], &mr_conf[44], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_BG0[vpd_idx], &mr_conf[46], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_BG1[vpd_idx], &mr_conf[48], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_C0[vpd_idx], &mr_conf[50], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_C1[vpd_idx], &mr_conf[52], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_C2[vpd_idx], &mr_conf[54], 2); + + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_ACTN[vpd_idx], &mr_conf[64], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_ADDR_CASN_A15[vpd_idx], &mr_conf[66], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_ADDR_RASN_A16[vpd_idx], &mr_conf[68], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_ADDR_WEN_A14[vpd_idx], &mr_conf[70], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_PAR[vpd_idx], &mr_conf[72], 2); + + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_CKE0[vpd_idx], &mr_conf[74], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_CKE1[vpd_idx], &mr_conf[76], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_CSN0[vpd_idx], &mr_conf[82], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_CSN1[vpd_idx], &mr_conf[84], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_ODT0[vpd_idx], &mr_conf[90], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_ODT1[vpd_idx], &mr_conf[92], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_CKE0[vpd_idx], &mr_conf[78], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_CKE1[vpd_idx], &mr_conf[80], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_CSN0[vpd_idx], &mr_conf[86], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_CSN1[vpd_idx], &mr_conf[88], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_ODT0[vpd_idx], &mr_conf[94], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_ODT1[vpd_idx], &mr_conf[96], 2); +} + +static void load_mr(const uint8_t *vpd_data) +{ + const int freqs[] = { 1866, 2133, 2400, 2666 }; + int vpd_idx = 0; + + /* Index matches indexing of ATTR_MSS_VPD_MR_MC_PHASE_ROT_* data */ + for (vpd_idx = 0; vpd_idx < 28; ++vpd_idx) { + const int freq = freqs[vpd_idx / 8]; + const int mcs_i = vpd_idx % 4; + const int dimm1_rank = (vpd_idx % 8 >= 4 ? 1 : 0); + + struct vpd_info v = { + .data = vpd_data, + .mcs_i = mcs_i, + .freq = freq, + /* Configurations differ only per DIMM presence */ + .dimm0_rank = 1, + .dimm1_rank = dimm1_rank, + }; + + const uint8_t *mr_conf = NULL; + size_t size = 0; + + mr_conf = find_vpd_conf(&v, "MR", &size); + if (!mr_conf) + die("Failed to read MT configuration!\n"); + + load_mr_attrs(mr_conf, size, vpd_idx); + } +} + +static void load_vpd_attrs(const uint8_t *vpd_data) +{ + load_mr(vpd_data); + load_mt(vpd_data); +} + +void vpd_pnor_main(void) +{ + const struct region_device *memd_device = NULL; + + uint8_t buf[MEMD_HDR_SIZE]; + struct memd_hdr *hdr_memd = (struct memd_hdr *)buf; + + const uint8_t *vpd_data = NULL; + size_t vpd_size = 0; + + memd_device_init(); + memd_device = memd_device_ro(); + + /* Copy all header at once */ + if (rdev_readat(memd_device, buf, 0, sizeof(buf)) != sizeof(buf)) + die("Failed to read MEMD header!\n"); + + if (memcmp(hdr_memd->eyecatch, "OKOK", 4)) + die("Invalid MEMD header!\n"); + if (memcmp(hdr_memd->header_version, "01.0", 4)) + die("Unsupported MEMD header version!\n"); + if (memcmp(hdr_memd->memd_version, "01.0", 4)) + die("Unsupported MEMD version!\n"); + + /* We don't loop over sections */ + if (hdr_memd->section_count != 1) + die("Failed to map VPD data!\n"); + + vpd_size = hdr_memd->section_size*MEMD_SECTION_ROUNDING_DIVISOR; + vpd_data = rdev_mmap(memd_device, MEMD_HDR_SIZE, vpd_size); + if (!vpd_data) + die("Failed to map VPD data!\n"); + + load_vpd_attrs(vpd_data); + + if (rdev_munmap(memd_device, (void *)vpd_data)) + die("Failed to unmap VPD data!\n"); + + memd_device_unmount(); +} From 738302abe9eae7c6c0033b50553e9396fb0b9f28 Mon Sep 17 00:00:00 2001 From: Krystian Hebel Date: Mon, 22 Feb 2021 21:00:56 +0100 Subject: [PATCH 025/213] soc/power9/: add I2C and SPD functions Signed-off-by: Krystian Hebel Signed-off-by: Sergii Dmytruk Change-Id: I415d9e785d5bf395c2872990a80f7dec73664f58 --- src/soc/ibm/power9/Makefile.inc | 1 + src/soc/ibm/power9/i2c.c | 193 ++++++++++++++++++++++++++++++++ 2 files changed, 194 insertions(+) create mode 100644 src/soc/ibm/power9/i2c.c diff --git a/src/soc/ibm/power9/Makefile.inc b/src/soc/ibm/power9/Makefile.inc index 71f330aef28..67e280f2841 100644 --- a/src/soc/ibm/power9/Makefile.inc +++ b/src/soc/ibm/power9/Makefile.inc @@ -7,6 +7,7 @@ bootblock-y += rom_media.c romstage-y += rom_media.c romstage-y += romstage.c romstage-y += vpd.c +romstage-y += i2c.c ramstage-y += chip.c ramstage-y += rom_media.c ramstage-y += timer.c diff --git a/src/soc/ibm/power9/i2c.c b/src/soc/ibm/power9/i2c.c new file mode 100644 index 00000000000..db190a1b522 --- /dev/null +++ b/src/soc/ibm/power9/i2c.c @@ -0,0 +1,193 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include +#include + +#define FIFO_REG(bus) (0xA0004 | ((bus) << 12)) +#define CMD_REG(bus) (0xA0005 | ((bus) << 12)) +#define MODE_REG(bus) (0xA0006 | ((bus) << 12)) +#define STATUS_REG(bus) (0xA000B | ((bus) << 12)) +#define RES_ERR_REG(bus) (0xA000C | ((bus) << 12)) + +// CMD register +#define LEN_SHIFT(x) PPC_SHIFT((x), 31) +#define ADDR_SHIFT(x) PPC_SHIFT((x), 14) +#define READ_NOT_WRITE 0x0001000000000000 +#define START 0x8000000000000000 +#define WITH_ADDR 0x4000000000000000 +#define READ_CONT 0x2000000000000000 +#define STOP 0x1000000000000000 + +// STATUS register +#define DATA_REQUEST 0x0200000000000000 +#define CMD_COMPLETE 0x0100000000000000 +#define FIFO_COUNT_FLD 0x0000000F00000000 +#define BUSY 0x0000030000000000 +#define UNRECOVERABLE 0xFC80000000000000 + +#define CLEAR_ERR 0x8000000000000000 + +#define I2C_MAX_FIFO_CAPACITY 8 +#define SPD_I2C_BUS 3 + +/* return -1 if SMBus errors otherwise return 0 */ +static int get_spd(u8 *spd, u8 addr) +{ + /* + * Second half of DIMMs is on the second I2C port. platform_i2c_transfer() + * changes this automatically for SPD and RCD, but not for SPD page select. + * For those commands, set MSB that is later masked out. + */ + uint8_t fix = addr & 0x80; + + if (i2c_read_bytes(SPD_I2C_BUS, addr, 0, spd, SPD_PAGE_LEN) < 0) { + printk(BIOS_INFO, "No memory DIMM at address %02X\n", addr); + return -1; + } + + /* DDR4 spd is 512 byte. Switch to page 1 */ + i2c_writeb(SPD_I2C_BUS, SPD_PAGE_1 | fix, 0, 0); + + /* No need to check again if DIMM is present */ + i2c_read_bytes(SPD_I2C_BUS, addr, 0, spd + SPD_PAGE_LEN, SPD_PAGE_LEN); + /* Restore to page 0 */ + i2c_writeb(SPD_I2C_BUS, SPD_PAGE_0 | fix, 0, 0); + + return 0; +} + +static u8 spd_data[CONFIG_DIMM_MAX * CONFIG_DIMM_SPD_SIZE]; + +void get_spd_smbus(struct spd_block *blk) +{ + u8 i; + + for (i = 0 ; i < CONFIG_DIMM_MAX; i++) { + if (blk->addr_map[i] == 0) { + blk->spd_array[i] = NULL; + continue; + } + + if (get_spd(&spd_data[i * CONFIG_DIMM_SPD_SIZE], blk->addr_map[i]) == 0) + blk->spd_array[i] = &spd_data[i * CONFIG_DIMM_SPD_SIZE]; + else + blk->spd_array[i] = NULL; + } + + blk->len = SPD_PAGE_LEN_DDR4; +} + +int platform_i2c_transfer(unsigned int bus, struct i2c_msg *segment, + int seg_count) +{ + int i; + uint64_t r; + + if (bus > 3) { + printk(BIOS_ERR, "I2C bus out of range (%d)\n", bus); + return -1; + } + + /* + * Divisor fields in this register are poorly documented: + * + * Bits SCOM Field Mnemonic: Description + * 0:7 RWX BIT_RATE_DIVISOR_3: Decides the speed on the I2C bus. + * 8:9 RWX BIT_RATE_DIVISOR_3: Decides the speed on the I2C bus. + * 10:15 RWX BIT_RATE_DIVISOR_3: Decides the speed on the I2C bus. + * + * After issuing a fast command (SCOM A3000) they change like this: + * - 100 kHz - previous value is not changed + * - 50 kHz - 0x000B + * - 3400 kHz - 0x005E + * - 400 kHz - 0x0177 + * + * Use value for 400 kHz as it is the one used by Hostboot. + */ + uint16_t bit_rate_div = 0x0177; // 400kHz by default + if (bus == 2) { + /* + * Skiboot computes the value as: + * + * (((clock-frequency / bus-frequency) - 1) / 4) + * + * Frequencies are specified in the corresponding device tree entries. + * clock-frequency is from I2C master and bus-frequency is from I2C bus. + * + * At least for TPM default value doesn't work (results in NACK error) for + * bus #2. + */ + bit_rate_div = 0x0048; + } + + write_scom(RES_ERR_REG(bus), CLEAR_ERR); + + for (i = 0; i < seg_count; i++) { + unsigned int len; + uint64_t read_not_write, stop, read_cont, port; + + /* Only read for now, implement different flags when needed */ + if (segment[i].flags & ~I2C_M_RD) { + printk(BIOS_ERR, "Unsupported I2C flags (0x%4.4x)\n", segment[i].flags); + return -1; + } + + read_not_write = (segment[i].flags & I2C_M_RD) ? READ_NOT_WRITE : 0; + stop = (i == seg_count - 1) ? STOP : 0; + read_cont = (!stop && !read_not_write) ? READ_CONT : 0; + port = segment[i].slave & 0x80 ? 1 : 0; + + write_scom(MODE_REG(bus), + PPC_SHIFT(bit_rate_div, 15) | PPC_SHIFT(port, 21)); + write_scom(RES_ERR_REG(bus), CLEAR_ERR); + write_scom(CMD_REG(bus), START | stop | WITH_ADDR | read_not_write | read_cont | + ADDR_SHIFT(segment[i].slave & 0x7F) | + LEN_SHIFT(segment[i].len)); + + for (len = 0; len < segment[i].len; len++) { + r = read_scom(STATUS_REG(bus)); + + if (read_not_write) { + /* Read */ + while ((r & (DATA_REQUEST | FIFO_COUNT_FLD)) == 0) { + if (r & UNRECOVERABLE) { + /* This may be DIMM not present so use low verbosity */ + printk(BIOS_INFO, "I2C transfer failed (0x%16.16llx)\n", r); + return -1; + } + r = read_scom(STATUS_REG(bus)); + } + + r = read_scom(FIFO_REG(bus)); + segment[i].buf[len] = r >> 56; + } + else + { + /* Write */ + while ((r & DATA_REQUEST) == 0) { + if (r & UNRECOVERABLE) { + printk(BIOS_INFO, "I2C transfer failed (0x%16.16llx)\n", r); + return -1; + } + r = read_scom(STATUS_REG(bus)); + } + + write_scom(FIFO_REG(bus), (uint64_t) segment[i].buf[len] << 56); + } + } + + r = read_scom(STATUS_REG(bus)); + while ((r & CMD_COMPLETE) == 0) { + if (r & UNRECOVERABLE) { + printk(BIOS_INFO, "I2C transfer failed (0x%16.16llx)\n", r); + return -1; + } + r = read_scom(STATUS_REG(bus)); + } + + } + + return 0; +} From 7314025ffc064b70cc38d895822794023964a724 Mon Sep 17 00:00:00 2001 From: Krystian Hebel Date: Fri, 14 May 2021 10:43:04 +0200 Subject: [PATCH 026/213] soc/power9: add header file with SCOM addresses used in RAM init Signed-off-by: Krystian Hebel Change-Id: Ifd0c1e4ace3ea916f575f10857e14e56233004ec --- src/soc/ibm/power9/istep_13_scom.h | 820 +++++++++++++++++++++++++++++ 1 file changed, 820 insertions(+) create mode 100644 src/soc/ibm/power9/istep_13_scom.h diff --git a/src/soc/ibm/power9/istep_13_scom.h b/src/soc/ibm/power9/istep_13_scom.h new file mode 100644 index 00000000000..2f09f6b8c28 --- /dev/null +++ b/src/soc/ibm/power9/istep_13_scom.h @@ -0,0 +1,820 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef ISTEP_13_SCOM_H +#define ISTEP_13_SCOM_H + +#include + +#define PSU_HOST_SBE_MBOX0_REG 0x000D0050 +#define PSU_HOST_SBE_MBOX1_REG 0x000D0051 + +#define PSU_SBE_DOORBELL_REG 0x000D0060 +#define PSU_SBE_DOORBELL_REG_WAND 0x000D0061 +#define PSU_SBE_DOORBELL_REG_WOR 0x000D0062 +#define PSU_HOST_DOORBELL_REG 0x000D0063 +#define PSU_HOST_DOORBELL_REG_WAND 0x000D0064 +#define PSU_HOST_DOORBELL_REG_WOR 0x000D0065 + +#define MCD0_FIR_MASK_REG 0x03011003 +#define MCD1_FIR_MASK_REG 0x03011403 + +#define NEST_CPLT_CTRL1 0x05000001 + +#define MCSLOW_CPLT_CTRL0 0x07000000 +#define MCSLOW_CPLT_CTRL0_WOR 0x07000010 +#define MCSLOW_CPLT_CTRL0_WCLEAR 0x07000020 +#define MCSLOW_CPLT_CTRL0_CTRL_CC_ABSTCLK_MUXSEL_DC 0 +#define MCSLOW_CPLT_CTRL0_TC_UNIT_SYNCCLK_MUXSEL_DC 1 +#define MCSLOW_CPLT_CTRL0_CTRL_CC_FLUSHMODE_INH_DC 2 +#define MCSLOW_CPLT_CTRL0_CTRL_CC_FORCE_ALIGN_DC 3 + +#define MCSLOW_CPLT_CTRL1 0x07000001 +#define MCSLOW_CPLT_CTRL1_WOR 0x07000011 +#define MCSLOW_CPLT_CTRL1_WCLEAR 0x07000021 + +#define MCSLOW_CPLT_CONF0 0x07000008 + +#define MCSLOW_CPLT_STAT0 0x07000100 +#define MCSLOW_CPLT_STAT0_CC_CTRL_OPCG_DONE_DC 8 +#define MCSLOW_CPLT_STAT0_CC_CTRL_CHIPLET_IS_ALIGNED_DC 9 + +#define MBACALFIR 0x07010900 +#define MBACALFIR_MASK 0x07010903 +#define MBACALFIR_ACTION0 0x07010906 +#define MBACALFIR_ACTION1 0x07010907 +#define MBACALFIR_MBA_RECOVERABLE_ERROR 0 +#define MBACALFIR_MBA_NONRECOVERABLE_ERROR 1 +#define MBACALFIR_REFRESH_OVERRUN 2 +#define MBACALFIR_RCD_PARITY_ERROR 4 +#define MBACALFIR_DDR_CAL_TIMEOUT_ERR 5 +#define MBACALFIR_DDR_CAL_RESET_TIMEOUT 7 +#define MBACALFIR_DDR_MBA_EVENT_N 8 +#define MBACALFIR_WRQ_RRQ_HANG_ERR 9 +#define MBACALFIR_SM_1HOT_ERR 10 +#define MBACALFIR_ASYNC_IF_ERROR 11 +#define MBACALFIR_CMD_PARITY_ERROR 12 +#define MBACALFIR_PORT_FAIL 13 +#define MBACALFIR_RCD_CAL_PARITY_ERROR 14 + +#define MBA_DSM0Q 0x0701090A +#define MBA_DSM0Q_CFG_RODT_START_DLY 5 +#define MBA_DSM0Q_CFG_RODT_END_DLY 11 +#define MBA_DSM0Q_CFG_WODT_START_DLY 17 +#define MBA_DSM0Q_CFG_WODT_END_DLY 23 +#define MBA_DSM0Q_CFG_WRDONE_DLY 29 +#define MBA_DSM0Q_CFG_WRDATA_DLY 35 +#define MBA_DSM0Q_CFG_RDTAG_DLY 41 + +#define MBA_TMR0Q 0x0701090B +#define MBA_TMR0Q_RRDM_DLY 3 +#define MBA_TMR0Q_RRSMSR_DLY 7 +#define MBA_TMR0Q_RRSMDR_DLY 11 +#define MBA_TMR0Q_RROP_DLY 15 +#define MBA_TMR0Q_WWDM_DLY 19 +#define MBA_TMR0Q_WWSMSR_DLY 23 +#define MBA_TMR0Q_WWSMDR_DLY 27 +#define MBA_TMR0Q_WWOP_DLY 31 +#define MBA_TMR0Q_RWDM_DLY 36 +#define MBA_TMR0Q_RWSMSR_DLY 41 +#define MBA_TMR0Q_RWSMDR_DLY 46 +#define MBA_TMR0Q_WRDM_DLY 50 +#define MBA_TMR0Q_WRSMSR_DLY 56 +#define MBA_TMR0Q_WRSMDR_DLY 62 + +#define MBA_TMR1Q 0x0701090C +#define MBA_TMR1Q_RRSBG_DLY 3 +#define MBA_TMR1Q_WRSBG_DLY 9 +#define MBA_TMR1Q_CFG_TFAW 15 +#define MBA_TMR1Q_CFG_TRCD 20 +#define MBA_TMR1Q_CFG_TRP 25 +#define MBA_TMR1Q_CFG_TRAS 31 +#define MBA_TMR1Q_CFG_WR2PRE 47 +#define MBA_TMR1Q_CFG_RD2PRE 51 +#define MBA_TMR1Q_TRRD 55 +#define MBA_TMR1Q_TRRD_SBG 59 +#define MBA_TMR1Q_CFG_ACT_TO_DIFF_RANK_DLY 63 + +#define MBA_WRQ0Q 0x0701090D +#define MBA_WRQ0Q_CFG_WRQ_FIFO_MODE 5 +#define MBA_WRQ0Q_CFG_DISABLE_WR_PG_MODE 6 +#define MBA_WRQ0Q_CFG_WRQ_ACT_NUM_WRITES_PENDING 58 + +#define MBA_RRQ0Q 0x0701090E +#define MBA_RRQ0Q_CFG_RRQ_FIFO_MODE 6 +#define MBA_RRQ0Q_CFG_RRQ_ACT_NUM_READS_PENDING 60 + +#define MBA_CAL0Q 0x0701090F +#define MBA_CAL0Q_RESET_RECOVER 57 + +#define MBA_CAL3Q 0x07010912 + +#define MBA_FARB0Q 0x07010913 +#define MBA_FARB0Q_CFG_2N_ADDR 17 +#define MBA_FARB0Q_CFG_PARITY_AFTER_CMD 38 +#define MBA_FARB0Q_CFG_RCD_PROTECTION_TIME 53 +#define MBA_FARB0Q_CFG_DISABLE_RCD_RECOVERY 54 +#define MBA_FARB0Q_CFG_OE_ALWAYS_ON 55 +#define MBA_FARB0Q_CFG_PORT_FAIL_DISABLE 57 +#define MBA_FARB0Q_CFG_OPT_RD_SIZE 63 + +#define MBA_FARB1Q 0x07010914 +#define MBA_FARB1Q_CFG_SLOT0_S0_CID 2 +#define MBA_FARB1Q_CFG_SLOT0_S1_CID 5 +#define MBA_FARB1Q_CFG_SLOT0_S2_CID 8 +#define MBA_FARB1Q_CFG_SLOT0_S3_CID 11 +#define MBA_FARB1Q_CFG_SLOT0_S4_CID 14 +#define MBA_FARB1Q_CFG_SLOT0_S5_CID 17 +#define MBA_FARB1Q_CFG_SLOT0_S6_CID 20 +#define MBA_FARB1Q_CFG_SLOT0_S7_CID 23 +#define MBA_FARB1Q_CFG_SLOT1_S0_CID 26 +#define MBA_FARB1Q_CFG_SLOT1_S1_CID 29 +#define MBA_FARB1Q_CFG_SLOT1_S2_CID 32 +#define MBA_FARB1Q_CFG_SLOT1_S3_CID 35 +#define MBA_FARB1Q_CFG_SLOT1_S4_CID 38 +#define MBA_FARB1Q_CFG_SLOT1_S5_CID 41 +#define MBA_FARB1Q_CFG_SLOT1_S6_CID 44 +#define MBA_FARB1Q_CFG_SLOT1_S7_CID 47 + +#define MBA_FARB2Q 0x07010915 +#define MBA_FARB2Q_CFG_RANK0_RD_ODT 3 +#define MBA_FARB2Q_CFG_RANK1_RD_ODT 7 +#define MBA_FARB2Q_CFG_RANK2_RD_ODT 11 +#define MBA_FARB2Q_CFG_RANK3_RD_ODT 15 +#define MBA_FARB2Q_CFG_RANK4_RD_ODT 19 +#define MBA_FARB2Q_CFG_RANK5_RD_ODT 23 +#define MBA_FARB2Q_CFG_RANK6_RD_ODT 27 +#define MBA_FARB2Q_CFG_RANK7_RD_ODT 31 +#define MBA_FARB2Q_CFG_RANK0_WR_ODT 35 +#define MBA_FARB2Q_CFG_RANK1_WR_ODT 39 +#define MBA_FARB2Q_CFG_RANK2_WR_ODT 43 +#define MBA_FARB2Q_CFG_RANK3_WR_ODT 47 +#define MBA_FARB2Q_CFG_RANK4_WR_ODT 51 +#define MBA_FARB2Q_CFG_RANK5_WR_ODT 55 +#define MBA_FARB2Q_CFG_RANK6_WR_ODT 59 +#define MBA_FARB2Q_CFG_RANK7_WR_ODT 63 + +#define MBA_FARB3Q 0x07010916 +#define MBA_FARB3Q_CFG_NM_N_PER_SLOT 14 +#define MBA_FARB3Q_CFG_NM_N_PER_PORT 30 +#define MBA_FARB3Q_CFG_NM_M 44 +#define MBA_FARB3Q_CFG_NM_RAS_WEIGHT 47 +#define MBA_FARB3Q_CFG_NM_CAS_WEIGHT 50 +#define MBA_FARB3Q_CFG_NM_CHANGE_AFTER_SYNC 53 + +#define MBA_FARB4Q 0x07010917 +#define MBA_FARB4Q_EMERGENCY_N 41 +#define MBA_FARB4Q_EMERGENCY_M 55 + +#define MBA_FARB5Q 0x07010918 +#define MBA_FARB5Q_CFG_DDR_DPHY_NCLK 1 +#define MBA_FARB5Q_CFG_DDR_DPHY_PCLK 3 +#define MBA_FARB5Q_CFG_DDR_RESETN 4 +#define MBA_FARB5Q_CFG_CCS_ADDR_MUX_SEL 5 +#define MBA_FARB5Q_CFG_CCS_INST_RESET_ENABLE 6 +#define MBA_FARB5Q_CFG_FORCE_MCLK_LOW_N 8 + +#define MBAREF0Q 0x07010932 +#define MBAREF0Q_CFG_REFRESH_ENABLE 0 +#define MBAREF0Q_CFG_REFRESH_PRIORITY_THRESHOLD 7 +#define MBAREF0Q_CFG_REFRESH_INTERVAL 18 +#define MBAREF0Q_CFG_TRFC 39 +#define MBAREF0Q_CFG_REFR_TSV_STACK 49 +#define MBAREF0Q_CFG_REFR_CHECK_INTERVAL 60 + +#define MBARPC0Q 0x07010934 +#define MBARPC0Q_CFG_MIN_MAX_DOMAINS_ENABLE 2 +#define MBARPC0Q_CFG_MIN_MAX_DOMAINS 5 +#define MBARPC0Q_CFG_PUP_AVAIL 10 +#define MBARPC0Q_CFG_PDN_PUP 15 +#define MBARPC0Q_CFG_PUP_PDN 20 +#define MBARPC0Q_RESERVED_21 21 +#define MBARPC0Q_CFG_MIN_DOMAIN_REDUCTION_ENABLE 22 +#define MBARPC0Q_CFG_MIN_DOMAIN_REDUCTION_TIME 32 + +#define MBASTR0Q 0x07010935 +#define MBASTR0Q_CFG_STR_ENABLE 0 +#define MBASTR0Q_CFG_ENTER_STR_TIME 11 +#define MBASTR0Q_CFG_TCKESR 16 +#define MBASTR0Q_CFG_TCKSRE 21 +#define MBASTR0Q_CFG_TCKSRX 26 +#define MBASTR0Q_CFG_TXSDLL 37 +#define MBASTR0Q_CFG_SAFE_REFRESH_INTERVAL 56 + +#define ECC_FIR_MASK 0x07010A03 +#define ECC_FIR_ACTION0 0x07010A06 +#define ECC_FIR_ACTION1 0x07010A07 +#define FIR_MAINLINE_AUE 13 +#define FIR_MAINLINE_UE 14 +#define FIR_MAINLINE_RCD 15 +#define FIR_MAINLINE_IAUE 16 +#define FIR_MAINLINE_IUE 17 +#define ECC_FIR_MAINTENANCE_AUE 33 +#define ECC_FIR_MAINTENANCE_IAUE 36 +#define MCA_FIR_MAINTENANCE_IUE 37 +#define ECC_FIR_SCOM_PARITY_CLASS_STATUS 41 +#define ECC_FIR_SCOM_PARITY_CLASS_RECOVERABLE 42 +#define ECC_FIR_WRITE_RMW_CE 45 + +#define RECR 0x07010A0A +#define MBSECCQ_DISABLE_MEMORY_ECC_CHECK_CORRECT 0 +#define MBSECCQ_DISABLE_MEMORY_ECC_CORRECT 1 +#define MBSECCQ_READ_POINTER_DELAY 8 +#define MBSECCQ_VAL_TO_DATA_DELAY 18 +#define MBSECCQ_DELAY_VALID_1X 19 +#define MBSECCQ_NEST_VAL_TO_DATA_DELAY 21 +#define MBSECCQ_DELAY_NONBYPASS 22 +#define MBSECCQ_ENABLE_UE_NOISE_WINDOW 26 +#define MBSECCQ_ENABLE_TCE_CORRECTION 27 +#define MBSECCQ_USE_ADDRESS_HASH 29 +#define MBSECCQ_DATA_INVERSION 31 +#define MBSECCQ_RESERVED_40 40 + +#define DBGR 0x07010A0B +#define DBGR_ECC_WAT_ACTION_SELECT 9 +#define DBGR_ECC_WAT_SOURCE 11 + +#define FWMS0 0x07010A18 + +#define AACR 0x07010A29 +#define AACR_ADDRESS 9 +#define AACR_AUTOINC 10 +#define AACR_ECCGEN 11 + +#define AADR 0x07010A2A +#define AAER 0x07010A2B + +#define WRTCFG 0x07010A38 + +#define IOM_PHY0_DDRPHY_FIR_REG 0x07011000 +#define IOM_PHY0_DDRPHY_FIR_MASK_REG 0x07011003 +#define IOM_PHY0_DDRPHY_FIR_ACTION0_REG 0x07011006 +#define IOM_PHY0_DDRPHY_FIR_ACTION1_REG 0x07011007 +#define IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_0 54 +#define IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_1 55 +#define IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_2 56 +#define IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_3 57 +#define IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_4 58 +#define IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_5 59 +#define IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_6 60 +#define IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_7 61 + +#define MCBISTFIR 0x07012300 +#define MCBISTFIRMASK 0x07012303 +#define MCBISTFIRACT0 0x07012306 +#define MCBISTFIRACT1 0x07012307 +#define MCBISTFIRQ_COMMAND_ADDRESS_TIMEOUT 1 +#define MCBISTFIRQ_INTERNAL_FSM_ERROR 2 +#define MCBISTFIRQ_MCBIST_BRODCAST_OUT_OF_SYNC 3 +#define MCBISTFIRQ_MCBIST_PROGRAM_COMPLETE 10 +#define MCBISTFIRQ_SCOM_RECOVERABLE_REG_PE 13 +#define MCBISTFIRQ_SCOM_FATAL_REG_PE 14 + +#define CCS_INST_ARR0_00 0x07012315 +#define CCS_INST_ARR0_00_CCS_DDR_ACTN 20 +#define CCS_INST_ARR0_00_CCS_DDR_CKE 27 +#define CCS_INST_ARR0_00_CCS_DDR_CSN_0_1 33 +#define CCS_INST_ARR0_00_CCS_DDR_CSN_2_3 37 +#define CCS_INST_ARR0_00_CCS_DDR_CAL_TYPE 59 + +#define CCS_INST_ARR1_00 0x07012335 +#define CCS_INST_ARR1_00_IDLES 15 +#define CCS_INST_ARR1_00_DDR_CAL_RANK 56 +#define CCS_INST_ARR1_00_DDR_CALIBRATION_ENABLE 57 +#define CCS_INST_ARR1_00_CCS_END 58 +#define CCS_INST_ARR1_00_GOTO_CMD 63 + +#define MBSEC0Q 0x07012355 +#define MBSEC1Q 0x07012356 + +#define MBSTRQ 0x07012357 +#define MBSTRQ_CFG_PAUSE_ON_MPE 34 +#define MBSTRQ_CFG_PAUSE_ON_UE 35 +#define MBSTRQ_CFG_PAUSE_ON_AUE 37 +#define MBSTRQ_CFG_NCE_SOFT_SYMBOL_COUNT_ENABLE 55 +#define MBSTRQ_CFG_NCE_INTER_SYMBOL_COUNT_ENABLE 56 +#define MBSTRQ_CFG_NCE_HARD_SYMBOL_COUNT_ENABLE 57 + +#define MCBSTATQ 0x07012366 + +#define WATCFG0AQ 0x07012380 +#define WATCFG0AQ_CFG_WAT_EVENT_SEL 47 + +#define WATCFG0BQ 0x07012381 +#define WATCFG0BQ_CFG_WAT_MSKA 43 +#define WATCFG0BQ_CFG_WAT_CNTL 60 + +#define WATCFG0DQ 0x07012383 +#define WATCFG0DQ_CFG_WAT_PATA 43 + +#define WATCFG3AQ 0x0701238F +#define WATCFG3AQ_CFG_WAT_EVENT_SEL 47 + +#define WATCFG3BQ 0x07012390 +#define WATCFG3BQ_CFG_WAT_MSKA 43 +#define WATCFG3BQ_CFG_WAT_CNTL 60 + +#define CCS_CNTLQ 0x070123A5 +#define CCS_CNTLQ_CCS_START 0 +#define CCS_CNTLQ_CCS_STOP 1 + +#define CCS_STATQ 0x070123A6 +#define CCS_STATQ_CCS_IP 0 +#define CCS_STATQ_CCS_DONE 1 + +#define CCS_MODEQ 0x070123A7 +#define CCS_MODEQ_CCS_STOP_ON_ERR 0 +#define CCS_MODEQ_CCS_UE_DISABLE 1 +#define CCS_MODEQ_DDR_CAL_TIMEOUT_CNT 23 +#define CCS_MODEQ_CFG_CCS_PARITY_AFTER_CMD 24 +#define CCS_MODEQ_COPY_CKE_TO_SPARE_CKE 26 +#define CCS_MODEQ_DDR_CAL_TIMEOUT_CNT_MULT 31 + +#define MCBMR0Q 0x070123A8 + +#define MCBPARMQ 0x070123AF + +#define MCBDRSRQ 0x070123BC +#define MCBDRCRQ 0x070123BD +#define MCBFD0Q 0x070123BE + +#define MCBSA0Q 0x070123CC +#define MCBEA0Q 0x070123CE + +#define MCBAGRAQ 0x070123D6 +#define MCBAGRAQ_CFG_MAINT_ADDR_MODE_EN 10 +#define MCBAGRAQ_CFG_MAINT_DETECT_SRANK_BOUNDARIES 12 + +#define MCB_CNTLQ 0x070123DB +#define MCB_CNTLQ_MCB_START 0 + +#define MCB_CNTLSTATQ 0x070123DC +#define MCB_CNTLSTATQ_MCB_IP 0 +#define MCB_CNTLSTATQ_MCB_DONE 1 +#define MCB_CNTLSTATQ_MCB_FAIL 2 + +#define MCBCFGQ 0x070123E0 +#define MCBCFGQ_CFG_LOG_COUNTS_IN_TRACE 36 +#define MCBCFGQ_CFG_PAUSE_ON_ERROR_MODE 58 + +#define DBGCFG0Q 0x070123E8 +#define DBGCFG0Q_CFG_DBG_ENABLE 0 +#define DBGCFG0Q_CFG_DBG_PICK_MCBIST01 33 + +#define DBGCFG1Q 0x070123E9 +#define DBGCFG1Q_CFG_WAT_ENABLE 0 + +#define DBGCFG2Q 0x070123EA +#define DBGCFG2Q_CFG_WAT_LOC_EVENT0_SEL 19 +#define DBGCFG2Q_CFG_WAT_LOC_EVENT1_SEL 39 + +#define DBGCFG3Q 0x070123EB +#define DBGCFG3Q_CFG_WAT_GLOB_EVENT0_SEL 22 +#define DBGCFG3Q_CFG_WAT_GLOB_EVENT1_SEL 25 +#define DBGCFG3Q_CFG_WAT_ACT_SET_SPATTN_PULSE 40 + +#define MCSLOW_SYNC_CONFIG 0x07030000 +#define MCSLOW_SYNC_CONFIG_LISTEN_TO_SYNC_PULSE_DIS 4 +#define MCSLOW_SYNC_CONFIG_CLEAR_CHIPLET_IS_ALIGNED 7 + +#define MCSLOW_OPCG_ALIGN 0x07030001 +#define MCSLOW_OPCG_ALIGN_INOP_ALIGN 3 +#define MCSLOW_OPCG_ALIGN_INOP_WAIT 19 +#define MCSLOW_OPCG_ALIGN_SCAN_RATIO 51 +#define MCSLOW_OPCG_ALIGN_OPCG_WAIT_CYCLES 63 + +#define MCSLOW_OPCG_REG0 0x07030002 +#define MCSLOW_OPCG_RUNN_MODE 0 +#define MCSLOW_OPCG_RUN_SCAN0 2 + +#define MCSLOW_SCAN_REGION_TYPE 0x07030005 +#define MCSLOW_SCAN_REGION_TYPE_SCAN_REGION_UNIT10 14 +#define MCSLOW_SCAN_REGION_TYPE_SCAN_TYPE_BNDY 56 + +#define MCSLOW_CLK_REGION 0x07030006 +#define MCSLOW_CLK_REGION_CLOCK_CMD 1 +#define MCSLOW_CLK_REGION_CLOCK_REGION_UNIT10 14 +#define MCSLOW_CLK_REGION_SEL_THOLD_SL 48 +#define MCSLOW_CLK_REGION_SEL_THOLD_NSL 49 +#define MCSLOW_CLK_REGION_SEL_THOLD_ARY 50 + +#define MCSLOW_CLOCK_STAT_SL 0x07030008 +#define MCSLOW_CLOCK_STAT_NSL 0x07030009 +#define MCSLOW_CLOCK_STAT_ARY 0x0703000A + +#define MCSLOW_XFIR 0x07040000 +#define MCSLOW_FIR_MASK 0x07040002 +#define MCSLOW_LOCAL_FIR 0x0704000A +#define MCSLOW_LOCAL_FIR_MASK 0x0704000D +#define MCSLOW_LOCAL_FIR_ACTION0 0x07040010 +#define MCSLOW_LOCAL_FIR_ACTION1 0x07040011 + +#define PCBSLMC01_MULTICAST_GROUP_1 0x070F0001 +#define PCBSLMC01_MULTICAST_GROUP_2 0x070F0002 + +#define PCBSLMC01_PLL_LOCK_REG 0x070F0019 + +#define PCBSLMC01_SLAVE_CONFIG_REG 0x070F001E + +#define PCBSLMC01_ERROR_REG 0x070F001F + +#define PCBSLMC01_NET_CTRL0 0x070F0040 +#define PCBSLMC01_NET_CTRL0_WAND 0x070F0041 +#define PCBSLMC01_NET_CTRL0_WOR 0x070F0042 +#define PCBSLMC01_NET_CTRL0_PCB_EP_RESET 1 +#define PCBSLMC01_NET_CTRL0_PLL_TEST_EN 3 +#define PCBSLMC01_NET_CTRL0_PLL_RESET 4 +#define PCBSLMC01_NET_CTRL0_PLL_BYPASS 5 +#define PCBSLMC01_NET_CTRL0_FENCE_EN 18 + +#define PCBSLMC01_NET_CTRL1 0x070F0044 +#define PCBSLMC01_NET_CTRL1_WAND 0x070F0045 +#define PCBSLMC01_NET_CTRL1_WOR 0x070F0046 +#define PCBSLMC01_NET_CTRL1_CLK_DCC_BYPASS_EN 1 +#define PCBSLMC01_NET_CTRL1_CLK_PDLY_BYPASS_EN 2 + +#define DDRPHY_DP16_DQ_BIT_ENABLE0_P0_0 0x800000000701103F +#define DDRPHY_DP16_DFT_PDA_CONTROL_P0_0 0x800000010701103F + +#define DDRPHY_DP16_CONFIG0_P0_0 0x800000030701103F +#define DP16_CONFIG0_FLUSH 51 +#define DP16_CONFIG0_INIT_IO 54 +#define DP16_CONFIG0_ADVANCE_PING_PONG 55 +#define DP16_CONFIG0_DELAY_PING_PONG_HALF 58 + +/* Names come from documentation, they are inconsistent there too. */ +#define DDRPHY_DP16_READ_CLOCK_RANK_PAIR0_P0_0 0x800000040701103F +#define DDRPHY_DP16_READ_CLOCK_RANK_PAIR1_P0_0 0x800001040701103F +#define DDRPHY_DP16_READ_CLOCK_RANK_PAIR2_P0_0 0x800002040701103F +#define DDRPHY_DP16_READ_CLOCK_RANK_PAIR3_P0_0 0x800003040701103F +#define DDRPHY_DP16_WRCLK_EN_RP0_P0_0 0x800000050701103F +#define DDRPHY_DP16_WRCLK_EN_RP1_P0_0 0x800001050701103F +#define DDRPHY_DP16_WRCLK_EN_RP2_P0_0 0x800002050701103F +#define DDRPHY_DP16_WRCLK_EN_RP3_P0_0 0x800003050701103F + +#define DDRPHY_DP16_SYSCLK_PR0_P0_0 0x800000070701103F +#define DDRPHY_DP16_SYSCLK_PR1_P0_0 0x8000007F0701103F + +#define DDRPHY_DP16_DQS_RD_PHASE_SELECT_RANK_PAIR0_P0_0 0x800000090701103F + +#define DDRPHY_DP16_DRIFT_LIMITS_P0_0 0x8000000A0701103F +#define DD2_BLUE_EXTEND_RANGE 49 + +#define DDRPHY_DP16_RD_LVL_STATUS0_P0_0 0x8000000E0701103F +#define DDRPHY_DP16_RD_LVL_STATUS2_P0_0 0x800000100701103F + +#define DDRPHY_DP16_RD_DIA_CONFIG5_P0_0 0x800000120701103F +#define DYN_MCTERM_CNTL_EN 49 +#define PER_CAL_UPDATE_DISABLE 52 +#define PERCAL_PWR_DIS 59 + +#define DDRPHY_DP16_DQS_GATE_DELAY_RP0_P0_0 0x800000130701103F +#define DDRPHY_DP16_DQS_GATE_DELAY_RP1_P0_0 0x800001130701103F +#define DDRPHY_DP16_DQS_GATE_DELAY_RP2_P0_0 0x800002130701103F +#define DDRPHY_DP16_DQS_GATE_DELAY_RP3_P0_0 0x800003130701103F + +#define DDRPHY_DP16_RD_STATUS0_P0_0 0x800000140701103F + +#define DDRPHY_DP16_RD_VREF_DAC_0_P0_0 0x800000160701103F +#define DDRPHY_DP16_RD_VREF_DAC_1_P0_0 0x8000001F0701103F +#define DDRPHY_DP16_RD_VREF_DAC_2_P0_0 0x800000C00701103F +#define DDRPHY_DP16_RD_VREF_DAC_3_P0_0 0x800000C10701103F +#define DDRPHY_DP16_RD_VREF_DAC_4_P0_0 0x800000C20701103F +#define DDRPHY_DP16_RD_VREF_DAC_5_P0_0 0x800000C30701103F +#define DDRPHY_DP16_RD_VREF_DAC_6_P0_0 0x800000C40701103F +#define DDRPHY_DP16_RD_VREF_DAC_7_P0_0 0x800000C50701103F +#define BIT0_VREF_DAC 55 +#define BIT1_VREF_DAC 63 + +#define DDRPHY_DP16_WR_ERROR0_P0_0 0x8000001B0701103F + +#define DDRPHY_DP16_CTLE_CTL_BYTE0_P0_0 0x800000200701103F +#define NIB_0_DQSEL_CAP 49 +#define NIB_0_DQSEL_RES 55 +#define NIB_1_DQSEL_CAP 57 +#define NIB_1_DQSEL_RES 63 + +#define DDRPHY_DP16_CTLE_CTL_BYTE1_P0_0 0x800000210701103F +#define NIB_2_DQSEL_CAP 49 +#define NIB_2_DQSEL_RES 55 +#define NIB_3_DQSEL_CAP 57 +#define NIB_3_DQSEL_RES 63 + +#define DDRPHY_DP16_ACBOOST_CTL_BYTE0_P0_0 0x800000220701103F +#define S0ACENSLICENDRV_DC 50 +#define S0ACENSLICEPDRV_DC 53 +#define S0ACENSLICEPTERM_DC 56 + +#define DDRPHY_DP16_ACBOOST_CTL_BYTE1_P0_0 0x800000230701103F +#define S1ACENSLICENDRV_DC 50 +#define S1ACENSLICEPDRV_DC 53 +#define S1ACENSLICEPTERM_DC 56 + +#define DDRPHY_DP16_DLL_CNTL0_P0_0 0x800000240701103F +#define DDRPHY_DP16_DLL_CNTL1_P0_0 0x800000250701103F +#define INIT_RXDLL_CAL_RESET 48 + +#define DDRPHY_DP16_DLL_VREG_CONTROL0_P0_0 0x8000002A0701103F +#define DDRPHY_DP16_DLL_VREG_CONTROL1_P0_0 0x8000002B0701103F +#define RXREG_VREG_COMPCON_DC 50 +#define RXREG_VREG_DRVCON_DC 55 +#define RXREG_VREG_REF_SEL_DC 58 + +#define DDRPHY_DP16_DLL_VREG_COARSE0_P0_0 0x8000002C0701103F +#define DDRPHY_DP16_DLL_VREG_COARSE1_P0_0 0x8000002D0701103F + +#define DDRPHY_DP16_WR_VREF_STATUS0_P0_0 0x8000002E0701103F +#define DDRPHY_DP16_WR_VREF_STATUS1_P0_0 0x8000002F0701103F + +#define DDRPHY_DP16_DQSCLK_OFFSET_P0_0 0x800000370701103F +#define DQS_OFFSET 55 + +#define DDRPHY_DP16_WR_DELAY_VALUE_0_RP0_REG_P0_0 0x800000380701103F + +#define DDRPHY_DP16_WR_VREF_VALUE0_RANK_PAIR0_P0_0 0x8000005E0701103F +#define DDRPHY_DP16_WR_VREF_VALUE0_RANK_PAIR1_P0_0 0x8000015E0701103F +#define DDRPHY_DP16_WR_VREF_VALUE0_RANK_PAIR2_P0_0 0x8000025E0701103F +#define DDRPHY_DP16_WR_VREF_VALUE0_RANK_PAIR3_P0_0 0x8000035E0701103F +#define WR_VREF_RANGE_DRAM0 49 +#define WR_VREF_VALUE_DRAM0 55 +#define WR_VREF_RANGE_DRAM1 57 +#define WR_VREF_VALUE_DRAM1 63 + +#define DDRPHY_DP16_WR_VREF_VALUE1_RANK_PAIR0_P0_0 0x8000005F0701103F +#define DDRPHY_DP16_WR_VREF_VALUE1_RANK_PAIR1_P0_0 0x8000015F0701103F +#define DDRPHY_DP16_WR_VREF_VALUE1_RANK_PAIR2_P0_0 0x8000025F0701103F +#define DDRPHY_DP16_WR_VREF_VALUE1_RANK_PAIR3_P0_0 0x8000035F0701103F +#define WR_VREF_RANGE_DRAM2 49 +#define WR_VREF_VALUE_DRAM2 55 +#define WR_VREF_RANGE_DRAM3 57 +#define WR_VREF_VALUE_DRAM3 63 + +#define DDRPHY_DP16_WR_VREF_CONFIG0_P0_0 0x8000006C0701103F +#define WR_CTR_1D_MODE_SWITCH 48 +#define WR_CTR_RUN_FULL_1D 49 +#define WR_CTR_2D_SMALL_STEP_VAL 52 +#define WR_CTR_2D_BIG_STEP_VAL 56 +#define WR_CTR_NUM_BITS_TO_SKIP 59 +#define WR_CTR_NUM_NO_INC_VREF_COMP 62 + +#define DDRPHY_DP16_SYSCLK_PR_VALUE_P0_0 0x800000730701103F +#define BB_LOCK0 48 +#define BB_LOCK1 56 + +#define DDRPHY_DP16_WRCLK_PR_P0_0 0x800000740701103F +#define TSYS_WRCLK 55 + +#define DDRPHY_DP16_IO_TX_CONFIG0_P0_0 0x800000750701103F +#define DDRPHY_DP16_IO_TX_CONFIG0_STRENGTH 51 + +#define DDRPHY_DP16_RD_VREF_CAL_EN_P0_0 0x800000760701103F + +#define DDRPHY_DP16_DLL_CONFIG1_P0_0 0x800000770701103F +#define S0INSDLYTAP 61 +#define S1INSDLYTAP 62 + +#define DDRPHY_DP16_IO_TX_FET_SLICE_P0_0 0x800000780701103F +#define EN_SLICE_N_WR 55 +#define EN_SLICE_P_WR 63 + +#define DDRPHY_DP16_RD_VREF_CAL_ERROR_P0_0 0x8000007A0701103F + +#define DDRPHY_DP16_IO_TX_PFET_TERM_P0_0 0x8000007B0701103F + +#define DDRPHY_DP16_DQ_BIT_DISABLE_RP0_P0_0 0x8000007C0701103F +#define DDRPHY_DP16_DQS_BIT_DISABLE_RP0_P0_0 0x8000007D0701103F + +#define DDRPHY_DP16_WR_VREF_ERROR0_P0_0 0x800000AE0701103F +#define DDRPHY_DP16_WR_VREF_ERROR1_P0_0 0x800000AF0701103F + +#define DDRPHY_DP16_WR_VREF_CONFIG1_P0_0 0x800000EC0701103F +#define WR_CTR_VREF_RANGE_SELECT 48 +#define WR_CTR_VREF_RANGE_CROSSOVER 55 +#define WR_CTR_VREF_SINGLE_RANGE_MAX 62 + +/* Yes, MASK1 is supposed to be before MASK0. */ +#define DDRPHY_DP16_WR_VREF_ERROR_MASK1_P0_0 0x800000FA0701103F +#define DDRPHY_DP16_WR_VREF_ERROR_MASK0_P0_0 0x800000FB0701103F + +#define DDRPHY_ADR_BIT_ENABLE_P0_ADR0 0x800040000701103F +#define DDRPHY_ADR_DIFFPAIR_ENABLE_P0_ADR1 0x800044010701103F + +#define DDRPHY_ADR_DELAY0_P0_ADR0 0x800040040701103F +#define DDRPHY_ADR_DELAY1_P0_ADR0 0x800040050701103F +#define DDRPHY_ADR_DELAY2_P0_ADR0 0x800040060701103F +#define DDRPHY_ADR_DELAY3_P0_ADR0 0x800040070701103F +#define DDRPHY_ADR_DELAY4_P0_ADR0 0x800040080701103F +#define DDRPHY_ADR_DELAY5_P0_ADR0 0x800040090701103F +#define DDRPHY_ADR_DELAY0_P0_ADR1 0x800044040701103F +#define DDRPHY_ADR_DELAY1_P0_ADR1 0x800044050701103F +#define DDRPHY_ADR_DELAY2_P0_ADR1 0x800044060701103F +#define DDRPHY_ADR_DELAY3_P0_ADR1 0x800044070701103F +#define DDRPHY_ADR_DELAY4_P0_ADR1 0x800044080701103F +#define DDRPHY_ADR_DELAY5_P0_ADR1 0x800044090701103F +#define DDRPHY_ADR_DELAY0_P0_ADR2 0x800048040701103F +#define DDRPHY_ADR_DELAY1_P0_ADR2 0x800048050701103F +#define DDRPHY_ADR_DELAY2_P0_ADR2 0x800048060701103F +#define DDRPHY_ADR_DELAY3_P0_ADR2 0x800048070701103F +#define DDRPHY_ADR_DELAY4_P0_ADR2 0x800048080701103F +#define DDRPHY_ADR_DELAY5_P0_ADR2 0x800048090701103F +#define DDRPHY_ADR_DELAY0_P0_ADR3 0x80004C040701103F +#define DDRPHY_ADR_DELAY1_P0_ADR3 0x80004C050701103F +#define DDRPHY_ADR_DELAY2_P0_ADR3 0x80004C060701103F +#define DDRPHY_ADR_DELAY3_P0_ADR3 0x80004C070701103F +#define ADR_DELAY_EVEN 55 +#define ADR_DELAY_ODD 63 + +#define DDRPHY_ADR_DELAY1_P0_ADR1 0x800044050701103F +#define DDRPHY_ADR_DELAY3_P0_ADR1 0x800044070701103F + +#define DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR0 0x800040200701103F +#define DDRPHY_ADR_IO_FET_SLICE_EN_MAP1_P0_ADR0 0x800040210701103F +#define DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR1 0x800044200701103F +#define DDRPHY_ADR_IO_FET_SLICE_EN_MAP1_P0_ADR1 0x800044210701103F +#define DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR2 0x800048200701103F +#define DDRPHY_ADR_IO_FET_SLICE_EN_MAP1_P0_ADR2 0x800048210701103F +#define DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR3 0x80004C200701103F +#define DDRPHY_ADR_IO_FET_SLICE_EN_MAP1_P0_ADR3 0x80004C210701103F +#define SLICE_SEL0 48 +#define SLICE_SEL1 50 +#define SLICE_SEL2 52 +#define SLICE_SEL3 54 +#define SLICE_SEL4 56 +#define SLICE_SEL5 58 +#define SLICE_SEL6 60 +#define SLICE_SEL7 62 + +#define DDRPHY_ADR_DLL_VREG_CONFIG_1_P0_ADR32S0 0x800080310701103F +#define DDRPHY_ADR_DLL_VREG_CONFIG_1_STRENGTH 62 + +#define ADR_SYSCLK_CNTRL_PR_P0_ADR32S0 0x800080320701103F + +#define DDRPHY_ADR_MCCLK_WRCLK_PR_STATIC_OFFSET_P0_ADR32S0 0x800080330701103F + +#define ADR_SYSCLK_PR_VALUE_RO_P0_ADR32S0 0x800080340701103F + +#define DDRPHY_ADR_OUTPUT_FORCE_ATEST_CNTL_P0_ADR32S0 0x800080350701103F +#define FLUSH 48 +#define INIT_IO 50 + +#define DDRPHY_ADR_DLL_CNTL_P0_ADR32S0 0x8000803A0701103F +#define INIT_RXDLL_CAL_RESET 48 + +#define DDRPHY_ADR_DLL_VREG_CONTROL_P0_ADR32S0 0x8000803D0701103F + +#define DDRPHY_ADR_DLL_VREG_COARSE_P0_ADR32S0 0x8000803E0701103F + +#define DDRPHY_PC_DLL_ZCAL_CAL_STATUS_P0 0x8000C0000701103F +#define DP_DLL_CAL_GOOD 48 +#define DP_DLL_CAL_ERROR 49 +#define DP_DLL_CAL_ERROR_FINE 50 +#define ADR_DLL_CAL_GOOD 51 +#define ADR_DLL_CAL_ERROR 52 +#define ADR_DLL_CAL_ERROR_FINE 53 +#define ZCAL_DONE 63 + +#define DDRPHY_PC_RANK_PAIR0_P0 0x8000C0020701103F +#define DDRPHY_PC_RANK_PAIR1_P0 0x8000C0030701103F +#define DDRPHY_PC_RANK_PAIR2_P0 0x8000C0300701103F +#define DDRPHY_PC_RANK_PAIR3_P0 0x8000C0310701103F + +#define DDRPHY_PC_CONFIG0_P0 0x8000C00C0701103F +#define DDR4_CMD_SIG_REDUCTION 54 +#define DDR4_VLEVEL_BANK_GROUP 62 + +#define DDRPHY_PC_CONFIG1_P0 0x8000C00D0701103F +#define WRITE_LATENCY_OFFSET 51 +#define READ_LATENCY_OFFSET 55 +#define MEMORY_TYPE 61 +#define DDR4_LATENCY_SW 62 + +#define DDRPHY_PC_RESETS_P0 0x8000C00E0701103F +#define SYSCLK_RESET 49 +#define ENABLE_ZCAL 51 + +#define DDRPHY_PC_MIRROR_CONFIG_P0 0x8000C0110701103F +#define ADDR_MIRROR_RP1_PRI 50 +#define ADDR_MIRROR_RP1_SEC 51 +#define ADDR_MIRROR_RP3_PRI 54 +#define ADDR_MIRROR_RP3_SEC 55 + +#define DDRPHY_PC_ERROR_STATUS0_P0 0x8000C0120701103F + +#define DDRPHY_PC_INIT_CAL_CONFIG0_P0 0x8000C0160701103F +#define ABORT_ON_CAL_ERROR 58 +#define ENA_RANK_PAIR_MSB 60 + +#define DDRPHY_PC_INIT_CAL_CONFIG1_P0 0x8000C0170701103F +#define REFRESH_COUNT 51 +#define REFRESH_CONTROL 53 +#define REFRESH_ALL_RANKS 54 +#define CMD_SNOOP_DIS 55 +#define REFRESH_INTERVAL 63 + +#define DDRPHY_PC_INIT_CAL_ERROR_P0 0x8000C0180701103F + +#define DDRPHY_PC_INIT_CAL_STATUS_P0 0x8000C0190701103F + +#define DDRPHY_PC_CSID_CFG_P0 0x8000C0330701103F + +#define DDRPHY_SEQ_CONFIG0_P0 0x8000C4020701103F +#define TWO_CYCLE_ADDR_EN 49 +#define DELAYED_PAR 54 +#define PAR_A17_MASK 62 + +#define DDRPHY_SEQ_ODT_WR_CONFIG0_P0 0x8000C40A0701103F +#define DDRPHY_SEQ_ODT_RD_CONFIG1_P0 0x8000C40F0701103F +#define ODT_RD_VALUES0 51 +#define ODT_RD_VALUES1 59 + +#define DDRPHY_SEQ_MEM_TIMING_PARAM0_P0 0x8000C4120701103F +#define TMOD_CYCLES 51 +#define TRCD_CYCLES 55 +#define TRP_CYCLES 59 +#define TRFC_CYCLES 63 + +#define DDRPHY_SEQ_MEM_TIMING_PARAM1_P0 0x8000C4130701103F +#define TZQINIT_CYCLES 51 +#define TZQCS_CYCLES 55 +#define TWLDQSEN_CYCLES 59 +#define TWRMRD_CYCLES 63 + +#define DDRPHY_SEQ_MEM_TIMING_PARAM2_P0 0x8000C4140701103F +#define TODTLON_OFF_CYCLES 51 + +#define DDRPHY_SEQ_RD_WR_DATA0_P0 0x8000C4000701103F +#define DDRPHY_SEQ_RD_WR_DATA1_P0 0x8000C4010701103F +#define RD_RW_DATA_REG0 63 +#define RD_RW_DATA_REG1 63 + +#define DDRPHY_SEQ_ERROR_STATUS0_P0 0x8000C4080701103F + +#define DDRPHY_SEQ_ODT_WR_CONFIG0_P0 0x8000C40A0701103F +#define ODT_WR_VALUES0 51 +#define ODT_WR_VALUES1 59 + +#define DDRPHY_SEQ_ODT_WR_CONFIG1_P0 0x8000C40B0701103F +#define ODT_WR_VALUES2 51 +#define ODT_WR_VALUES3 59 + +#define DDRPHY_SEQ_ODT_RD_CONFIG0_P0 0x8000C40E0701103F +#define ODT_RD_VALUES0 51 +#define ODT_RD_VALUES1 59 + +#define DDRPHY_SEQ_ODT_RD_CONFIG1_P0 0x8000C40F0701103F +#define ODT_RD_VALUES2 51 +#define ODT_RD_VALUES3 59 + +#define DDRPHY_RC_CONFIG0_P0 0x8000C8000701103F +#define GLOBAL_PHY_OFFSET 51 +#define PERFORM_RDCLK_ALIGN 62 + +#define DDRPHY_RC_CONFIG1_P0 0x8000C8010701103F + +#define DDRPHY_RC_CONFIG2_P0 0x8000C8020701103F +#define CONSEC_PASS 52 + +#define DDRPHY_RC_ERROR_STATUS0_P0 0x8000C8050701103F + +#define DDRPHY_RC_CONFIG3_P0 0x8000C8070701103F +#define COARSE_CAL_STEP_SIZE 54 + +#define DDRPHY_RC_RDVREF_CONFIG0_P0 0x8000C8090701103F + +#define DDRPHY_RC_RDVREF_CONFIG1_P0 0x8000C80A0701103F +#define CMD_PRECEDE_TIME 55 +#define MPR_LOCATION 59 +#define CALIBRATION_ENABLE 60 +#define SKIP_RDCENTERING 61 + +#define DDRPHY_WC_CONFIG0_P0 0x8000CC000701103F +#define TWLO_TWLOE 55 +#define WL_ONE_DQS_PULSE 56 +#define FW_WR_RD 62 +#define CUSTOM_INIT_WRITE 63 + +#define DDRPHY_WC_CONFIG1_P0 0x8000CC010701103F +#define BIG_STEP 51 +#define SMALL_STEP 54 +#define WR_PRE_DLY 60 + +#define DDRPHY_WC_CONFIG2_P0 0x8000CC020701103F +#define NUM_VALID_SAMPLES 51 +#define FW_RD_WR 57 +#define IPW_WR_WR 61 + +#define DDRPHY_WC_ERROR_STATUS0_P0 0x8000CC030701103F + +#define DDRPHY_WC_CONFIG3_P0 0x8000CC050701103F +#define MRS_CMD_DQ_OFF 60 + +#define DDRPHY_WC_RTT_WR_SWAP_ENABLE_P0 0x8000CC060701103F +#define WL_ENABLE_RTT_SWAP 48 +#define WR_CTR_ENABLE_RTT_SWAP 49 +#define WR_CTR_VREF_COUNTER_RESET_VAL 59 + +#define DDRPHY_APB_CONFIG0_P0 0x8000D0000701103F +#define RESET_ERR_RPT 49 + +#define DDRPHY_APB_ERROR_STATUS0_P0 0x8000D0010701103F + +#endif // ISTEP_13_SCOM_H From f4d13464bd0b29c330dc02e015921d11c3fd0284 Mon Sep 17 00:00:00 2001 From: Krystian Hebel Date: Tue, 23 Feb 2021 12:33:32 +0100 Subject: [PATCH 027/213] soc/power9/: read and calculate required data from SPD This fill mem_data global variable with information about memory hardware. Signed-off-by: Krystian Hebel Change-Id: I6f537ff225387a29d5d609c05729e1ab993fbd37 --- src/include/cpu/power/istep_13.h | 196 ++++++++++++++++++ src/soc/ibm/power9/romstage.c | 333 +++++++++++++++++++++++++++++++ 2 files changed, 529 insertions(+) create mode 100644 src/include/cpu/power/istep_13.h diff --git a/src/include/cpu/power/istep_13.h b/src/include/cpu/power/istep_13.h new file mode 100644 index 00000000000..ae3556dc77a --- /dev/null +++ b/src/include/cpu/power/istep_13.h @@ -0,0 +1,196 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include +#include +#include + +#define MCS_PER_PROC 2 +#define MCA_PER_MCS 2 +#define MCA_PER_PROC (MCA_PER_MCS * MCS_PER_PROC) +#define DIMMS_PER_MCA 2 +#define DIMMS_PER_MCS (DIMMS_PER_MCA * MCA_PER_MCS) + +/* These should be in one of the SPD headers. */ +#define WIDTH_x4 0 +#define WIDTH_x8 1 + +#define DENSITY_256Mb 0 +#define DENSITY_512Mb 1 +#define DENSITY_1Gb 2 +#define DENSITY_2Gb 3 +#define DENSITY_4Gb 4 +#define DENSITY_8Gb 5 +#define DENSITY_16Gb 6 +#define DENSITY_32Gb 7 + +#define PSEC_PER_NSEC 1000 +#define PSEC_PER_USEC 1000000 + +typedef struct { + bool present; + uint8_t mranks; + uint8_t log_ranks; + uint8_t width; + uint8_t density; + uint8_t *spd; + uint8_t rcd_i2c_addr; + uint16_t size_gb; // 2S8Rx4 8Gb DIMMs are 256GB +} rdimm_data_t; + +typedef struct { + bool functional; + rdimm_data_t dimm[DIMMS_PER_MCA]; + + /* + * The following fields are read and/or calculated from SPD obtained + * from DIMMs, but they are here because we can only set them per + * MCA/port/channel and not per DIMM. All units are clock cycles, + * absolute time values are rarely used. + */ + uint16_t nfaw; + uint16_t nras; + uint16_t nrfc; + uint16_t nrfc_dlr; // nRFC for Different Logical Rank (3DS only) + uint8_t cl; + uint8_t nccd_l; + uint8_t nwtr_s; + uint8_t nwtr_l; + uint8_t nrcd; + uint8_t nrp; + uint8_t nwr; + uint8_t nrrd_s; + uint8_t nrrd_l; +} mca_data_t; + +typedef struct { + bool functional; + mca_data_t mca[MCA_PER_MCS]; +} mcs_data_t; + +typedef struct { + /* Do we need 'bool functional' here as well? */ + mcs_data_t mcs[MCS_PER_PROC]; + + /* + * Unclear whether we can have different speeds between MCSs. + * Documentation says we can, but ring ID in 13.3 is sent per MCBIST. + * ATTR_MSS_FREQ is defined for SYSTEM target type, implying only one + * speed for whole platform. + * + * FIXME: maybe these should be in mcs_data_t and 13.3 should send + * a second Ring ID for the second MCS. How to test it? + */ + uint16_t speed; // MT/s + /* + * These depend just on memory frequency (and specification), and even + * though they describe DRAM/DIMM/MCA settings, there is no need to have + * multiple copies of identical data. + */ + uint16_t nrefi; // 7.8 us in normal temperature range (0-85 deg Celsius) + uint8_t cwl; + uint8_t nrtp; // max(4 nCK, 7.5 ns) = 7.5 ns for every supported speed +} mcbist_data_t; + +extern mcbist_data_t mem_data; +static const chiplet_id_t mcs_ids[MCS_PER_PROC] = {MC01_CHIPLET_ID, MC23_CHIPLET_ID}; + +/* + * All time conversion functions assume that both MCSs have the same frequency. + * Change it if proven otherwise by adding a second argument - memory speed or + * MCS index. + * + * These functions should not be used before setting mem_data.speed to a valid + * non-0 value. + */ +static inline uint64_t tck_in_ps(void) +{ + /* + * Speed is in MT/s, we need to divide it by 2 to get MHz. + * tCK(avg) should be rounded down to the next valid speed bin, which + * corresponds to value obtained by using standardized MT/s values. + */ + return 1000000 / (mem_data.speed / 2); +} + +static inline uint64_t ps_to_nck(uint64_t ps) +{ + /* Algorithm taken from JEDEC Standard No. 21-C */ + return ((ps * 1000 / tck_in_ps()) + 974) / 1000; +} + +static inline uint64_t mtb_ftb_to_nck(uint64_t mtb, int8_t ftb) +{ + /* ftb is signed (always byte?) */ + return ps_to_nck(mtb * 125 + ftb); +} + +static inline uint64_t ns_to_nck(uint64_t ns) +{ + return ps_to_nck(ns * PSEC_PER_NSEC); +} + +static inline uint64_t nck_to_ps(uint64_t nck) +{ + return nck * tck_in_ps(); +} + +/* + * To be used in delays, so always round up. + * + * Microsecond is the best precision exposed by coreboot API. tCK is somewhere + * around 1 ns, so most smaller delays will be rounded up to 1 us. For better + * resolution we would have to read TBR (Time Base Register) directly. + */ +static inline uint64_t nck_to_us(uint64_t nck) +{ + return (nck_to_ps(nck) + PSEC_PER_USEC - 1) / PSEC_PER_USEC; +} + +static inline void delay_nck(uint64_t nck) +{ + udelay(nck_to_us(nck)); +} + +/* TODO: discover which MCAs are used on second MCS (0,1,6,7? 0,1,4,5?) */ +/* TODO: consider non-RMW variants */ +static inline void mca_and_or(chiplet_id_t mcs, int mca, uint64_t scom, + uint64_t and, uint64_t or) +{ + /* + * Indirect registers have different stride than the direct ones in + * general, except for (only?) direct PHY registers. + */ + unsigned mul = (scom & PPC_BIT(0) || + (scom & 0xFFFFF000) == 0x07011000) ? 0x400 : 0x40; + scom_and_or_for_chiplet(mcs, scom + mca * mul, and, or); +} + +static inline void dp_mca_and_or(chiplet_id_t mcs, int dp, int mca, + uint64_t scom, uint64_t and, uint64_t or) +{ + mca_and_or(mcs, mca, scom + dp * 0x40000000000, and, or); +} + +static inline uint64_t mca_read(chiplet_id_t mcs, int mca, uint64_t scom) +{ + /* Indirect registers have different stride than the direct ones in + * general, except for (only?) direct PHY registers. */ + unsigned mul = (scom & PPC_BIT(0) || + (scom & 0xFFFFF000) == 0x07011000) ? 0x400 : 0x40; + return read_scom_for_chiplet(mcs, scom + mca * mul); +} + +static inline void mca_write(chiplet_id_t mcs, int mca, uint64_t scom, uint64_t val) +{ + /* Indirect registers have different stride than the direct ones in + * general, except for (only?) direct PHY registers. */ + unsigned mul = (scom & PPC_BIT(0) || + (scom & 0xFFFFF000) == 0x07011000) ? 0x400 : 0x40; + write_scom_for_chiplet(mcs, scom + mca * mul, val); +} +static inline uint64_t dp_mca_read(chiplet_id_t mcs, int dp, int mca, uint64_t scom) +{ + return mca_read(mcs, mca, scom + dp * 0x40000000000); +} diff --git a/src/soc/ibm/power9/romstage.c b/src/soc/ibm/power9/romstage.c index 5738002cb83..21c1afe9142 100644 --- a/src/soc/ibm/power9/romstage.c +++ b/src/soc/ibm/power9/romstage.c @@ -2,7 +2,328 @@ #include #include +#include #include +#include +#include + +/* DIMM SPD addresses */ +#define DIMM0 0x50 +#define DIMM1 0x51 +#define DIMM2 0x52 +#define DIMM3 0x53 +#define DIMM4 0x54 +#define DIMM5 0x55 +#define DIMM6 0x56 +#define DIMM7 0x57 + +mcbist_data_t mem_data; + +static void dump_mca_data(mca_data_t *mca) +{ + printk(BIOS_SPEW, "\tCL = %d\n", mca->cl); + printk(BIOS_SPEW, "\tCCD_L = %d\n", mca->nccd_l); + printk(BIOS_SPEW, "\tWTR_S = %d\n", mca->nwtr_s); + printk(BIOS_SPEW, "\tWTR_L = %d\n", mca->nwtr_l); + printk(BIOS_SPEW, "\tFAW = %d\n", mca->nfaw); + printk(BIOS_SPEW, "\tRCD = %d\n", mca->nrcd); + printk(BIOS_SPEW, "\tRP = %d\n", mca->nrp); + printk(BIOS_SPEW, "\tRAS = %d\n", mca->nras); + printk(BIOS_SPEW, "\tWR = %d\n", mca->nwr); + printk(BIOS_SPEW, "\tRRD_S = %d\n", mca->nrrd_s); + printk(BIOS_SPEW, "\tRRD_L = %d\n", mca->nrrd_l); + printk(BIOS_SPEW, "\tRFC = %d\n", mca->nrfc); + printk(BIOS_SPEW, "\tRFC_DLR = %d\n", mca->nrfc_dlr); + + int i; + for (i = 0; i < 2; i++) { + if (mca->dimm[i].present) { + printk(BIOS_SPEW, "\tDIMM%d: %dRx%d ", i, mca->dimm[i].mranks, + (mca->dimm[i].width + 1) * 4); + + if (mca->dimm[i].log_ranks != mca->dimm[i].mranks) + printk(BIOS_SPEW, "%dH 3DS ", mca->dimm[i].log_ranks / mca->dimm[i].mranks); + + printk(BIOS_SPEW, "%dGB\n", mca->dimm[i].size_gb); + } else { + printk(BIOS_SPEW, "\tDIMM%d: not installed\n", i); + } + } +} + +/* TODO: add checks for same ranks configuration for both DIMMs under one MCA */ +static inline bool is_proper_dimm(spd_raw_data spd, int slot) +{ + struct dimm_attr_ddr4_st attr; + if (spd == NULL) + return false; + + if (spd_decode_ddr4(&attr, spd) != SPD_STATUS_OK) { + printk(BIOS_ERR, "Malformed SPD for slot %d\n", slot); + return false; + } + + if (attr.dram_type != SPD_MEMORY_TYPE_DDR4_SDRAM || + attr.dimm_type != SPD_DDR4_DIMM_TYPE_RDIMM || + !attr.ecc_extension) { + printk(BIOS_ERR, "Bad DIMM type in slot %d\n", slot); + return false; + } + + return true; +} + +static void mark_nonfunctional(int mcs, int mca) +{ + mem_data.mcs[mcs].mca[mca].functional = false; + + /* Propagate upwards */ + if (!mem_data.mcs[mcs].mca[mca ^ 1].functional) { + mem_data.mcs[mcs].functional = false; + if (!mem_data.mcs[mcs ^ 1].functional) + die("No functional MCS left"); + } +} + +static uint64_t find_min_mtb_ftb(rdimm_data_t *dimm, int mtb_idx, int ftb_idx) +{ + uint64_t val0 = 0, val1 = 0; + + if (dimm[0].present) + val0 = mtb_ftb_to_nck(dimm[0].spd[mtb_idx], (int8_t)dimm[0].spd[ftb_idx]); + if (dimm[1].present) + val1 = mtb_ftb_to_nck(dimm[1].spd[mtb_idx], (int8_t)dimm[1].spd[ftb_idx]); + + return (val0 < val1) ? val1 : val0; +} + +static uint64_t find_min_multi_mtb(rdimm_data_t *dimm, int mtb_l, int mtb_h, uint8_t mask, int shift) +{ + uint64_t val0 = 0, val1 = 0; + + if (dimm[0].present) + val0 = dimm[0].spd[mtb_l] | ((dimm[0].spd[mtb_h] & mask) << shift); + if (dimm[1].present) + val1 = dimm[1].spd[mtb_l] | ((dimm[1].spd[mtb_h] & mask) << shift); + + return (val0 < val1) ? mtb_ftb_to_nck(val1, 0) : mtb_ftb_to_nck(val0, 0); +} + +/* This is most of step 7 condensed into one function */ +static void prepare_dimm_data(void) +{ + int i, mcs, mca; + int tckmin = 0x06; // Platform limit + + /* + * DIMMs 4-7 are under a different port. This is not the same as bus, but we + * need to pass that information to I2C function. As there is no easier way, + * use MSB of address and mask it out at the receiving side. This will print + * wrong addresses in dump_spd_info(), but that is small price to pay. + */ + struct spd_block blk = { + .addr_map = { DIMM0, DIMM1, DIMM2, DIMM3, + DIMM4 | 0x80, DIMM5 | 0x80, DIMM6 | 0x80, DIMM7 | 0x80 }, + }; + + get_spd_smbus(&blk); + dump_spd_info(&blk); + + /* + * We need to find the highest common (for all DIMMs and the platform) + * supported frequency, meaning we need to compare minimum clock cycle times + * and choose the highest value. For the range supported by the platform we + * can check MTB only. + * + * TODO: check if we can have different frequencies across MCSs. + */ + for (i = 0; i < CONFIG_DIMM_MAX; i++) { + if (is_proper_dimm(blk.spd_array[i], i)) { + mcs = i / DIMMS_PER_MCS; + mca = (i % DIMMS_PER_MCS) / MCA_PER_MCS; + int dimm_idx = i % 2; // (i % DIMMS_PER_MCS) % MCA_PER_MCS + + + /* Maximum for 2 DIMMs on one port (channel, MCA) is 2400 MT/s */ + if (tckmin < 0x07 && mem_data.mcs[mcs].mca[mca].functional) + tckmin = 0x07; + + mem_data.mcs[mcs].functional = true; + mem_data.mcs[mcs].mca[mca].functional = true; + + rdimm_data_t *dimm = &mem_data.mcs[mcs].mca[mca].dimm[dimm_idx]; + + dimm->present = true; + dimm->spd = blk.spd_array[i]; + /* RCD address is the same as SPD, with one additional bit set */ + dimm->rcd_i2c_addr = blk.addr_map[i] | 0x08; + /* + * SPD fields in spd.h are not compatible with DDR4 and those in + * spd_bin.h are just a few of all required. + * + * TODO: add fields that are lacking to either of those files or + * add a file specific to DDR4 SPD. + */ + dimm->width = blk.spd_array[i][12] & 7; + dimm->mranks = ((blk.spd_array[i][12] >> 3) & 0x7) + 1; + dimm->log_ranks = dimm->mranks * (((blk.spd_array[i][6] >> 4) & 0x7) + 1); + dimm->density = blk.spd_array[i][4] & 0xF; + dimm->size_gb = (1 << (dimm->density - 2)) * (2 - dimm->width) * + dimm->log_ranks; + + if ((blk.spd_array[i][5] & 0x38) == 0x30) + die("DIMMs with 18 row address bits are not supported\n"); + + if (blk.spd_array[i][18] > tckmin) + tckmin = blk.spd_array[i][18]; + } + } + + /* + * There is one (?) MCBIST per CPU. Fail if there are no supported DIMMs + * connected, otherwise assume it is functional. There is no reason to redo + * this test in the rest of isteps. + * + * TODO: 2 CPUs with one DIMM (in total) will not work with this code. + */ + if (!mem_data.mcs[0].functional && !mem_data.mcs[1].functional) + die("No DIMMs detected, aborting\n"); + + switch (tckmin) { + /* For CWL assume 1tCK write preamble */ + case 0x06: + mem_data.speed = 2666; + mem_data.cwl = 14; + break; + case 0x07: + mem_data.speed = 2400; + mem_data.cwl = 12; + break; + case 0x08: + mem_data.speed = 2133; + mem_data.cwl = 11; + break; + case 0x09: + mem_data.speed = 1866; + mem_data.cwl = 10; + break; + default: + die("Unsupported tCKmin: %d ps (+/- 125)\n", tckmin * 125); + } + + /* Now that we know our speed, we can calculate the rest of the data */ + mem_data.nrefi = ns_to_nck(7800); + mem_data.nrtp = ps_to_nck(7500); + printk(BIOS_SPEW, "Common memory parameters:\n" + "\tspeed =\t%d MT/s\n" + "\tREFI =\t%d clock cycles\n" + "\tCWL =\t%d clock cycles\n" + "\tRTP =\t%d clock cycles\n", + mem_data.speed, mem_data.nrefi, mem_data.cwl, mem_data.nrtp); + + for (mcs = 0; mcs < MCS_PER_PROC; mcs++) { + if (!mem_data.mcs[mcs].functional) continue; + for (mca = 0; mca < MCA_PER_MCS; mca++) { + if (!mem_data.mcs[mcs].mca[mca].functional) continue; + + rdimm_data_t *dimm = mem_data.mcs[mcs].mca[mca].dimm; + uint32_t val0, val1, common; + int min; /* Minimum compatible with both DIMMs is the bigger value */ + + /* CAS Latency */ + val0 = dimm[0].present ? le32_to_cpu(*(uint32_t *)&dimm[0].spd[20]) : -1; + val1 = dimm[1].present ? le32_to_cpu(*(uint32_t *)&dimm[1].spd[20]) : -1; + /* Assuming both DIMMs are in low CL range, true for all DDR4 speed bins */ + common = val0 & val1; + + /* tAAmin - minimum CAS latency time */ + min = find_min_mtb_ftb(dimm, 24, 123); + while (min <= 36 && ((common >> (min - 7)) & 1) == 0) + min++; + + if (min > 36) { + /* Maybe just die() instead? */ + printk(BIOS_WARNING, "Cannot find CL supported by all DIMMs under MCS%d, MCA%d." + " Marking as nonfunctional.\n", mcs, mca); + mark_nonfunctional(mcs, mca); + continue; + } + + mem_data.mcs[mcs].mca[mca].cl = min; + + /* + * There are also minimal values in Table 170 of JEDEC Standard No. 79-4C which + * probably should also be honored. Some of them (e.g. RRD) depend on the page + * size, which depends on DRAM width. On tested DIMM they are just right - it is + * either minimal legal value or rounded up to whole clock cycle. Can we rely on + * vendors to put sane values in SPD or do we have to check them for validity? + */ + + /* Minimum CAS to CAS Delay Time, Same Bank Group */ + mem_data.mcs[mcs].mca[mca].nccd_l = find_min_mtb_ftb(dimm, 40, 117); + + /* Minimum Write to Read Time, Different Bank Group */ + mem_data.mcs[mcs].mca[mca].nwtr_s = find_min_multi_mtb(dimm, 44, 43, 0x0F, 8); + + /* Minimum Write to Read Time, Same Bank Group */ + mem_data.mcs[mcs].mca[mca].nwtr_l = find_min_multi_mtb(dimm, 45, 43, 0xF0, 4); + + /* Minimum Four Activate Window Delay Time */ + mem_data.mcs[mcs].mca[mca].nfaw = find_min_multi_mtb(dimm, 37, 36, 0x0F, 8); + + /* Minimum RAS to CAS Delay Time */ + mem_data.mcs[mcs].mca[mca].nrcd = find_min_mtb_ftb(dimm, 25, 122); + + /* Minimum Row Precharge Delay Time */ + mem_data.mcs[mcs].mca[mca].nrp = find_min_mtb_ftb(dimm, 26, 121); + + /* Minimum Active to Precharge Delay Time */ + mem_data.mcs[mcs].mca[mca].nras = find_min_multi_mtb(dimm, 28, 27, 0x0F, 8); + + /* Minimum Write Recovery Time */ + mem_data.mcs[mcs].mca[mca].nwr = find_min_multi_mtb(dimm, 42, 41, 0x0F, 8); + + /* Minimum Activate to Activate Delay Time, Different Bank Group */ + mem_data.mcs[mcs].mca[mca].nrrd_s = find_min_mtb_ftb(dimm, 38, 119); + + /* Minimum Activate to Activate Delay Time, Same Bank Group */ + mem_data.mcs[mcs].mca[mca].nrrd_l = find_min_mtb_ftb(dimm, 39, 118); + + /* Minimum Refresh Recovery Delay Time */ + /* Assuming no fine refresh mode. */ + mem_data.mcs[mcs].mca[mca].nrfc = find_min_multi_mtb(dimm, 30, 31, 0xFF, 8); + + /* Minimum Refresh Recovery Delay Time for Different Logical Rank (3DS only) */ + /* + * This one is set per MCA, but it depends on DRAM density, which can be + * mixed between DIMMs under the same channel. We need to choose the bigger + * minimum time, which corresponds to higher density. + * + * Assuming no fine refresh mode. + */ + val0 = dimm[0].present ? dimm[0].spd[4] & 0xF : 0; + val1 = dimm[1].present ? dimm[1].spd[4] & 0xF : 0; + min = (val0 < val1) ? val1 : val0; + + switch (min) { + case 0x4: + mem_data.mcs[mcs].mca[mca].nrfc_dlr = ns_to_nck(90); + break; + case 0x5: + mem_data.mcs[mcs].mca[mca].nrfc_dlr = ns_to_nck(120); + break; + case 0x6: + mem_data.mcs[mcs].mca[mca].nrfc_dlr = ns_to_nck(185); + break; + default: + die("Unsupported DRAM density\n"); + } + + printk(BIOS_SPEW, "MCS%d, MCA%d times (in clock cycles):\n", mcs, mca); + dump_mca_data(&mem_data.mcs[mcs].mca[mca]); + } + } +} void main(void) { @@ -10,5 +331,17 @@ void main(void) vpd_pnor_main(); + prepare_dimm_data(); + + /* Test if SCOM still works. Maybe should check also indirect access? */ + printk(BIOS_DEBUG, "0xF000F = %llx\n", read_scom(0xF000F)); + + /* + * Halt to give a chance to inspect FIRs, otherwise checkstops from + * ramstage may cover up the failure in romstage. + */ + if (read_scom(0xF000F) == 0xFFFFFFFFFFFFFFFF) + die("SCOM stopped working, check FIRs, halting now\n"); + run_ramstage(); } From e4a1b311b90382c80d8760d86fccac47a2e35220 Mon Sep 17 00:00:00 2001 From: Krystian Hebel Date: Fri, 21 May 2021 12:27:53 +0200 Subject: [PATCH 028/213] soc/power9/ccs.c: implementation CCS is configured command sequencer, which is used during DRAM training. It's loaded with a program which it then executes. Signed-off-by: Krystian Hebel Change-Id: Iea137843c9085d34a2d34eb9273dea5daa174417 --- src/include/cpu/power/istep_13.h | 40 +++- src/soc/ibm/power9/Makefile.inc | 1 + src/soc/ibm/power9/ccs.c | 346 +++++++++++++++++++++++++++++++ 3 files changed, 386 insertions(+), 1 deletion(-) create mode 100644 src/soc/ibm/power9/ccs.c diff --git a/src/include/cpu/power/istep_13.h b/src/include/cpu/power/istep_13.h index ae3556dc77a..a4b33e33cdc 100644 --- a/src/include/cpu/power/istep_13.h +++ b/src/include/cpu/power/istep_13.h @@ -28,10 +28,15 @@ #define PSEC_PER_NSEC 1000 #define PSEC_PER_USEC 1000000 +/* Values are the same across all supported speed bins */ +static const int tMRD = 8; +static const int tMOD = 24; +static const int tZQinit = 1024; + typedef struct { bool present; uint8_t mranks; - uint8_t log_ranks; + uint8_t log_ranks; // In total, not per mrank uint8_t width; uint8_t density; uint8_t *spd; @@ -194,3 +199,36 @@ static inline uint64_t dp_mca_read(chiplet_id_t mcs, int dp, int mca, uint64_t s { return mca_read(mcs, mca, scom + dp * 0x40000000000); } + +enum rank_selection { + NO_RANKS = 0, + DIMM0_RANK0 = 1 << 0, + DIMM0_RANK1 = 1 << 1, + DIMM0_ALL_RANKS = DIMM0_RANK0 | DIMM0_RANK1, + DIMM1_RANK0 = 1 << 2, + DIMM1_RANK1 = 1 << 3, + DIMM1_ALL_RANKS = DIMM1_RANK0 | DIMM1_RANK1, + BOTH_DIMMS_1R = DIMM0_RANK0 | DIMM1_RANK0, + BOTH_DIMMS_2R = DIMM0_ALL_RANKS | DIMM1_ALL_RANKS +}; + +enum cal_config { + CAL_WR_LEVEL = PPC_BIT(48), + CAL_INITIAL_PAT_WR = PPC_BIT(49), + CAL_DQS_ALIGN = PPC_BIT(50), + CAL_RDCLK_ALIGN = PPC_BIT(51), + CAL_READ_CTR = PPC_BIT(52), + CAL_WRITE_CTR = PPC_BIT(53), + CAL_INITIAL_COARSE_WR = PPC_BIT(54), + CAL_COARSE_RD = PPC_BIT(55), + CAL_CUSTOM_RD = PPC_BIT(56), + CAL_CUSTOM_WR = PPC_BIT(57) +}; + +void ccs_add_instruction(chiplet_id_t id, mrs_cmd_t mrs, uint8_t csn, + uint8_t cke, uint16_t idles); +void ccs_add_mrs(chiplet_id_t id, mrs_cmd_t mrs, enum rank_selection ranks, + int mirror, uint16_t idles); +void ccs_phy_hw_step(chiplet_id_t id, int mca_i, int rp, enum cal_config conf, + uint64_t step_cycles); +void ccs_execute(chiplet_id_t id, int mca_i); diff --git a/src/soc/ibm/power9/Makefile.inc b/src/soc/ibm/power9/Makefile.inc index 67e280f2841..586444f5add 100644 --- a/src/soc/ibm/power9/Makefile.inc +++ b/src/soc/ibm/power9/Makefile.inc @@ -8,6 +8,7 @@ romstage-y += rom_media.c romstage-y += romstage.c romstage-y += vpd.c romstage-y += i2c.c +romstage-y += ccs.c ramstage-y += chip.c ramstage-y += rom_media.c ramstage-y += timer.c diff --git a/src/soc/ibm/power9/ccs.c b/src/soc/ibm/power9/ccs.c new file mode 100644 index 00000000000..81d81a4802a --- /dev/null +++ b/src/soc/ibm/power9/ccs.c @@ -0,0 +1,346 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include + +#include "istep_13_scom.h" + +static inline uint64_t reverse_bits(uint64_t x) +{ + x = swab64(x); // reverse bytes + x = (x & 0xF0F0F0F0F0F0F0F0) >> 4 | + (x & 0x0F0F0F0F0F0F0F0F) << 4; // reverse nibbles in bytes + x = (x & 0x1111111111111111) << 3 | + (x & 0x2222222222222222) << 1 | + (x & 0x4444444444444444) >> 1 | + (x & 0x8888888888888888) >> 3; // reverse bits in nibbles + + return x; +} + +/* 32 total, but last one is reserved for ccs_execute() */ +#define MAX_CCS_INSTR 31 + +static unsigned instr; +static uint64_t total_cycles; + +/* TODO: 4R, CID? */ +void ccs_add_instruction(chiplet_id_t id, mrs_cmd_t mrs, uint8_t csn, + uint8_t cke, uint16_t idles) +{ + /* + * CSS_INST_ARR0_n layout (bits from MRS): + * [0-13] A0-A13 + * [14] A17 + * [15] BG1 + * [17-18] BA0-1 + * [19] BG0 + * [21] A16 + * [22] A15 + * [23] A14 + */ + uint64_t mrs64 = (reverse_bits(mrs) & PPC_BITMASK(0, 13)) | /* A0-A13 */ + PPC_SHIFT(mrs & (1<<14), 23 + 14) | /* A14 */ + PPC_SHIFT(mrs & (1<<15), 22 + 15) | /* A15 */ + PPC_SHIFT(mrs & (1<<16), 21 + 16) | /* A16 */ + PPC_SHIFT(mrs & (1<<17), 14 + 17) | /* A17 */ + PPC_SHIFT(mrs & (1<<20), 17 + 20) | /* BA0 */ + PPC_SHIFT(mrs & (1<<21), 18 + 21) | /* BA1 */ + PPC_SHIFT(mrs & (1<<22), 19 + 22) | /* BG0 */ + PPC_SHIFT(mrs & (1<<23), 15 + 23); /* BA1 */ + + /* MC01.MCBIST.CCS.CCS_INST_ARR0_n + [all] 0 + // "ACT is high. It's a no-care in the spec but it seems to raise + // questions when people look at the trace, so lets set it high." + [20] CCS_INST_ARR0_00_CCS_DDR_ACTN = 1 + // "CKE is high Note: P8 set all 4 of these high - not sure if that's + // correct. BRS" + [24-27] CCS_INST_ARR0_00_CCS_DDR_CKE = cke + [32-33] CCS_INST_ARR0_00_CCS_DDR_CSN_0_1 = csn[0:1] + [36-37] CCS_INST_ARR0_00_CCS_DDR_CSN_2_3 = csn[2:3] + */ + write_scom_for_chiplet(id, CCS_INST_ARR0_00 + instr, + mrs64 | PPC_BIT(CCS_INST_ARR0_00_CCS_DDR_ACTN) | + PPC_SHIFT(cke & 0xF, CCS_INST_ARR0_00_CCS_DDR_CKE) | + PPC_SHIFT((csn >> 2) & 3, CCS_INST_ARR0_00_CCS_DDR_CSN_0_1) | + PPC_SHIFT(csn & 3, CCS_INST_ARR0_00_CCS_DDR_CSN_2_3)); + + /* MC01.MCBIST.CCS.CCS_INST_ARR1_n + [all] 0 + [0-15] CCS_INST_ARR1_00_IDLES = idles + [59-63] CCS_INST_ARR1_00_GOTO_CMD = instr + 1 + */ + write_scom_for_chiplet(id, CCS_INST_ARR1_00 + instr, + PPC_SHIFT(idles, CCS_INST_ARR1_00_IDLES) | + PPC_SHIFT(instr + 1, CCS_INST_ARR1_00_GOTO_CMD)); + + /* + * For the last instruction in the stream we could decrease it by one (final + * DES added in ccs_execute()), but subtracting it would take longer than + * that one cycle, so leave it. + */ + total_cycles += idles; + instr++; + + if (instr >= MAX_CCS_INSTR) { + /* Maybe call ccs_execute() here? Would need mca_i... */ + die("CCS instructions overflowed\n"); + } +} + +/* This isn't useful for anything but calibration steps, do we want it? */ +static void dump_cal_errors(chiplet_id_t id, int mca_i) +{ + /* Stop CCS so it won't mess up with the values */ + write_scom_for_chiplet(id, CCS_CNTLQ, PPC_BIT(CCS_CNTLQ_CCS_STOP)); + +#if CONFIG(DEBUG_RAM_SETUP) + int dp; + + for (dp = 0; dp < 5; dp++) { + printk(BIOS_ERR, "DP %d\n", dp); + printk(BIOS_ERR, "\t%#16.16llx - RD_VREF_CAL_ERROR\n", + dp_mca_read(id, dp, mca_i, DDRPHY_DP16_RD_VREF_CAL_ERROR_P0_0)); + printk(BIOS_ERR, "\t%#16.16llx - DQ_BIT_DISABLE_RP0\n", + dp_mca_read(id, dp, mca_i, DDRPHY_DP16_DQ_BIT_DISABLE_RP0_P0_0)); + printk(BIOS_ERR, "\t%#16.16llx - DQS_BIT_DISABLE_RP0\n", + dp_mca_read(id, dp, mca_i, DDRPHY_DP16_DQS_BIT_DISABLE_RP0_P0_0)); + printk(BIOS_ERR, "\t%#16.16llx - WR_ERROR0\n", + dp_mca_read(id, dp, mca_i, DDRPHY_DP16_WR_ERROR0_P0_0)); + printk(BIOS_ERR, "\t%#16.16llx - RD_STATUS0\n", + dp_mca_read(id, dp, mca_i, DDRPHY_DP16_RD_STATUS0_P0_0)); + printk(BIOS_ERR, "\t%#16.16llx - RD_LVL_STATUS2\n", + dp_mca_read(id, dp, mca_i, DDRPHY_DP16_RD_LVL_STATUS2_P0_0)); + printk(BIOS_ERR, "\t%#16.16llx - RD_LVL_STATUS0\n", + dp_mca_read(id, dp, mca_i, DDRPHY_DP16_RD_LVL_STATUS0_P0_0)); + printk(BIOS_ERR, "\t%#16.16llx - WR_VREF_ERROR0\n", + dp_mca_read(id, dp, mca_i, DDRPHY_DP16_WR_VREF_ERROR0_P0_0)); + printk(BIOS_ERR, "\t%#16.16llx - WR_VREF_ERROR1\n", + dp_mca_read(id, dp, mca_i, DDRPHY_DP16_WR_VREF_ERROR1_P0_0)); + } + + printk(BIOS_ERR, "%#16.16llx - APB_ERROR_STATUS0\n", + mca_read(id, mca_i, DDRPHY_APB_ERROR_STATUS0_P0)); + + printk(BIOS_ERR, "%#16.16llx - RC_ERROR_STATUS0\n", + mca_read(id, mca_i, DDRPHY_RC_ERROR_STATUS0_P0)); + + printk(BIOS_ERR, "%#16.16llx - SEQ_ERROR_STATUS0\n", + mca_read(id, mca_i, DDRPHY_SEQ_ERROR_STATUS0_P0)); + + printk(BIOS_ERR, "%#16.16llx - WC_ERROR_STATUS0\n", + mca_read(id, mca_i, DDRPHY_WC_ERROR_STATUS0_P0)); + + printk(BIOS_ERR, "%#16.16llx - PC_ERROR_STATUS0\n", + mca_read(id, mca_i, DDRPHY_PC_ERROR_STATUS0_P0)); + + printk(BIOS_ERR, "%#16.16llx - PC_INIT_CAL_ERROR\n", + mca_read(id, mca_i, DDRPHY_PC_INIT_CAL_ERROR_P0)); + + printk(BIOS_ERR, "%#16.16llx - PC_INIT_CAL_STATUS\n", + mca_read(id, mca_i, DDRPHY_PC_INIT_CAL_STATUS_P0)); + + printk(BIOS_ERR, "%#16.16llx - IOM_PHY0_DDRPHY_FIR_REG\n", + mca_read(id, mca_i, IOM_PHY0_DDRPHY_FIR_REG)); +#endif + die("CCS execution timeout\n"); +} + +void ccs_execute(chiplet_id_t id, int mca_i) +{ + uint64_t poll_timeout; + long time; + + /* + * Polling parameters: initial delay is total_cycles/8, no delay between + * polls (coreboot API checks in a busy loop, but there is nothing else to + * do than wait), poll count is whatever it takes to get to total_cycles + * times 4 just in case (won't hurt unless calibration fails anyway). + */ + if (total_cycles < 8) + total_cycles = 8; + poll_timeout = nck_to_us((total_cycles * 7 * 4) / 8); + + write_scom_for_chiplet(id, CCS_CNTLQ, PPC_BIT(CCS_CNTLQ_CCS_STOP)); + time = wait_us(1, !(read_scom_for_chiplet(id, CCS_STATQ) & + PPC_BIT(CCS_STATQ_CCS_IP))); + + /* Is it always as described below (CKE, CSN) or is it a copy of last instr? */ + /* Final DES - CCS does not wait for IDLES for the last command before + * clearing IP (in progress) bit, so we must use one separate DES + * instruction at the end. + MC01.MCBIST.CCS.CCS_INST_ARR0_n + [all] 0 + [20] CCS_INST_ARR0_00_CCS_DDR_ACTN = 1 + [24-27] CCS_INST_ARR0_00_CCS_DDR_CKE = 0xf + [32-33] CCS_INST_ARR0_00_CCS_DDR_CSN_0_1 = 3 + [36-37] CCS_INST_ARR0_00_CCS_DDR_CSN_2_3 = 3 + MC01.MCBIST.CCS.CCS_INST_ARR1_n + [all] 0 + [58] CCS_INST_ARR1_00_CCS_END = 1 + */ + write_scom_for_chiplet(id, CCS_INST_ARR0_00 + instr, + PPC_BIT(CCS_INST_ARR0_00_CCS_DDR_ACTN) | + PPC_SHIFT(0xF, CCS_INST_ARR0_00_CCS_DDR_CKE) | + PPC_SHIFT(3, CCS_INST_ARR0_00_CCS_DDR_CSN_0_1) | + PPC_SHIFT(3, CCS_INST_ARR0_00_CCS_DDR_CSN_2_3)); + write_scom_for_chiplet(id, CCS_INST_ARR1_00 + instr, + PPC_BIT(CCS_INST_ARR1_00_CCS_END)); + + /* Select ports + MC01.MCBIST.MBA_SCOMFIR.MCB_CNTLQ + // Broadcast mode is not supported, set only one bit at a time + [2-5] MCB_CNTLQ_MCBCNTL_PORT_SEL = bitmap with MCA index + */ + scom_and_or_for_chiplet(id, MCB_CNTLQ, ~PPC_BITMASK(2, 5), PPC_BIT(2 + mca_i)); + + /* Lets go */ + write_scom_for_chiplet(id, CCS_CNTLQ, PPC_BIT(CCS_CNTLQ_CCS_START)); + + /* With microsecond resolution we are probably wasting a lot of time here. */ + delay_nck(total_cycles/8); + + /* timeout(50*10ns): + if MC01.MCBIST.MBA_SCOMFIR.CCS_STATQ[0] (CCS_STATQ_CCS_IP) != 1: break + delay(10ns) + if MC01.MCBIST.MBA_SCOMFIR.CCS_STATQ != 0x40..00: report failure // only [1] set, others 0 + */ + time = wait_us(poll_timeout, (udelay(1), !(read_scom_for_chiplet(id, CCS_STATQ) & + PPC_BIT(CCS_STATQ_CCS_IP)))); + + /* This isn't useful for anything but calibration steps, do we want it? */ + if (!time) + dump_cal_errors(id, mca_i); + + printk(BIOS_DEBUG, "CCS took %lld us (%lld us timeout), %d instruction(s)\n", + time + nck_to_us(total_cycles/8), + poll_timeout + nck_to_us(total_cycles/8), instr); + + if (read_scom_for_chiplet(id, CCS_STATQ) != PPC_BIT(CCS_STATQ_CCS_DONE)) + die("(%#16.16llx) CCS execution error\n", read_scom_for_chiplet(id, CCS_STATQ)); + + instr = 0; + total_cycles = 0; + + /* LRDIMM only */ + // cleanup_from_execute(); +} + +/* + * Constant to invert A3-A9, A11, A13, BA0-1, BG0-1. This also changes BG1 to 1, + * which automatically selects B-side. Note that A17 is not included here. + */ +static const mrs_cmd_t invert = 0xF02BF8; + +/* + * Procedure for sending MRS through CCS + * + * We need to remember about two things here: + * - RDIMM has A-side and B-side, some address bits are inverted for B-side; + * side is selected by DBG1 (when mirroring is enabled DBG0 is used for odd + * ranks to select side, instead of DBG1) + * - odd ranks may or may not have mirrored lines, depending on SPD[136]. + * + * Because of those two reasons we cannot simply repeat MRS data for all sides + * and ranks, we have to do some juggling instead. Inverting is easy, we just + * have to XOR with appropriate mask (special case for A17, it is not inverted + * if it isn't used). Mirroring will require manual bit manipulations. + * + * There are no signals that are mirrored but not inverted, which means that + * the order of those operations doesn't matter. + */ +/* TODO: add support for A17. For now it is blocked in initial SPD parsing. */ +void ccs_add_mrs(chiplet_id_t id, mrs_cmd_t mrs, enum rank_selection ranks, + int mirror, uint16_t idles) +{ + if (ranks & DIMM0_RANK0) { + /* DIMM 0, rank 0, side A */ + /* + * "Not sure if we can get tricky here and only delay after the b-side MR. + * The question is whether the delay is needed/assumed by the register or is + * purely a DRAM mandated delay. We know we can't go wrong having both + * delays but if we can ever confirm that we only need one we can fix this. + * BRS" + */ + ccs_add_instruction(id, mrs, 0x7, 0xF, idles); + + /* DIMM 0, rank 0, side B - invert A3-A9, A11, A13, A17 (TODO), BA0-1, BG0-1 */ + ccs_add_instruction(id, mrs ^ invert, 0x7, 0xF, idles); + } + + if (ranks & DIMM0_RANK1) { + /* DIMM 0, rank 1, side A, mirror if needed */ + if (mirror) + mrs = ddr4_mrs_mirror_pins(mrs); + + ccs_add_instruction(id, mrs, 0xB, 0xF, idles); + + /* DIMM 0, rank 1, side B - MRS is already mirrored, just invert it */ + ccs_add_instruction(id, mrs ^ invert, 0xB, 0xF, idles); + } + + if (ranks & DIMM1_RANK0) { + /* DIMM 1, rank 0, side A */ + ccs_add_instruction(id, mrs, 0xD, 0xF, idles); + + /* DIMM 1, rank 0, side B - invert A3-A9, A11, A13, A17 (TODO), BA0-1, BG0-1 */ + ccs_add_instruction(id, mrs ^ invert, 0xD, 0xF, idles); + } + + if (ranks & DIMM1_RANK1) { + /* DIMM 1, rank 1, side A, mirror if needed */ + if (mirror) + mrs = ddr4_mrs_mirror_pins(mrs); + + ccs_add_instruction(id, mrs, 0xE, 0xF, idles); + + /* DIMM 1, rank 1, side B - MRS is already mirrored, just invert it */ + ccs_add_instruction(id, mrs ^ invert, 0xE, 0xF, idles); + } +} + +void ccs_phy_hw_step(chiplet_id_t id, int mca_i, int rp, enum cal_config conf, + uint64_t step_cycles) +{ + /* MC01.MCBIST.CCS.CCS_INST_ARR0_n + [all] 0 + // "CKE is high Note: P8 set all 4 of these high - not sure if that's correct. BRS" + [24-27] CCS_INST_ARR0_00_CCS_DDR_CKE = 0xf + [32-33] CCS_INST_ARR0_00_CCS_DDR_CSN_0_1 = 3 // Not used by the engine for calibration? + [36-37] CCS_INST_ARR0_00_CCS_DDR_CSN_2_3 = 3 // Not used by the engine for calibration? + [56-59] CCS_INST_ARR0_00_CCS_DDR_CAL_TYPE = 0xc + */ + write_scom_for_chiplet(id, CCS_INST_ARR0_00 + instr, + PPC_SHIFT(0xF, CCS_INST_ARR0_00_CCS_DDR_CKE) | + PPC_SHIFT(3, CCS_INST_ARR0_00_CCS_DDR_CSN_0_1) | + PPC_SHIFT(3, CCS_INST_ARR0_00_CCS_DDR_CSN_2_3) | + PPC_SHIFT(0xC, CCS_INST_ARR0_00_CCS_DDR_CAL_TYPE)); + + /* MC01.MCBIST.CCS.CCS_INST_ARR1_n + [all] 0 + [53-56] CCS_INST_ARR1_00_DDR_CAL_RANK = rp + [57] CCS_INST_ARR1_00_DDR_CALIBRATION_ENABLE = 1 + [59-63] CCS_INST_ARR1_00_GOTO_CMD = instr + 1 + */ + write_scom_for_chiplet(id, CCS_INST_ARR1_00 + instr, + PPC_SHIFT(rp, CCS_INST_ARR1_00_DDR_CAL_RANK) | + PPC_BIT(CCS_INST_ARR1_00_DDR_CALIBRATION_ENABLE) | + PPC_SHIFT(instr + 1, CCS_INST_ARR1_00_GOTO_CMD)); + + total_cycles += step_cycles; + instr++; + + /* Setup calibration config + IOM0.DDRPHY_PC_INIT_CAL_CONFIG0_P0 + [48-57] i_cal_config // cal_config is already encoded, don't shift + [58] ABORT_ON_CAL_ERROR = 0 + [60+rp] ENA_RANK_PAIR = 1 // So, rp must be [0-3] + */ + mca_and_or(id, mca_i, DDRPHY_PC_INIT_CAL_CONFIG0_P0, + ~(PPC_BITMASK(48, 58) | PPC_BITMASK(60, 63)), + conf | PPC_BIT(ENA_RANK_PAIR_MSB + rp)); + + ccs_execute(id, mca_i); +} From a37bca1f1a9f631cb6dd1450e496c11f7f99fbad Mon Sep 17 00:00:00 2001 From: Krystian Hebel Date: Tue, 23 Feb 2021 16:56:16 +0100 Subject: [PATCH 029/213] soc/power9/: implement istep 13.2 13.2 mem_pll_reset: Reset PLL for MCAs in async a) p9_mem_pll_reset.C (proc chip) - This step is a no-op on cumulus as the centaur is already has its PLLs setup in step 11 - This step is a no-op if memory is running in synchronous mode since the MCAs are using the nest PLL, HWP detect and exits - If in async mode then this HWP will put the PLL into bypass, reset mode - Disable listen_to_sync for MEM chiplet, whenever MEM is not in sync to NEST Signed-off-by: Krystian Hebel Change-Id: I7cf4d5a566d428c3326af26a720905096b97b3c8 --- src/include/cpu/power/istep_13.h | 2 + src/soc/ibm/power9/Makefile.inc | 1 + src/soc/ibm/power9/istep_13_2.c | 172 +++++++++++++++++++++++++++++++ src/soc/ibm/power9/romstage.c | 3 + 4 files changed, 178 insertions(+) create mode 100644 src/soc/ibm/power9/istep_13_2.c diff --git a/src/include/cpu/power/istep_13.h b/src/include/cpu/power/istep_13.h index a4b33e33cdc..60789a609e9 100644 --- a/src/include/cpu/power/istep_13.h +++ b/src/include/cpu/power/istep_13.h @@ -232,3 +232,5 @@ void ccs_add_mrs(chiplet_id_t id, mrs_cmd_t mrs, enum rank_selection ranks, void ccs_phy_hw_step(chiplet_id_t id, int mca_i, int rp, enum cal_config conf, uint64_t step_cycles); void ccs_execute(chiplet_id_t id, int mca_i); + +void istep_13_2(void); diff --git a/src/soc/ibm/power9/Makefile.inc b/src/soc/ibm/power9/Makefile.inc index 586444f5add..c2076d4f336 100644 --- a/src/soc/ibm/power9/Makefile.inc +++ b/src/soc/ibm/power9/Makefile.inc @@ -7,6 +7,7 @@ bootblock-y += rom_media.c romstage-y += rom_media.c romstage-y += romstage.c romstage-y += vpd.c +romstage-y += istep_13_2.c romstage-y += i2c.c romstage-y += ccs.c ramstage-y += chip.c diff --git a/src/soc/ibm/power9/istep_13_2.c b/src/soc/ibm/power9/istep_13_2.c new file mode 100644 index 00000000000..a92998eabd1 --- /dev/null +++ b/src/soc/ibm/power9/istep_13_2.c @@ -0,0 +1,172 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include + +#include "istep_13_scom.h" + +/* + * 13.2 mem_pll_reset: Reset PLL for MCAs in async + * + * a) p9_mem_pll_reset.C (proc chip) + * - This step is a no-op on cumulus as the centaur is already has its PLLs + * setup in step 11 + * - This step is a no-op if memory is running in synchronous mode since the + * MCAs are using the nest PLL, HWP detect and exits + * - If in async mode then this HWP will put the PLL into bypass, reset mode + * - Disable listen_to_sync for MEM chiplet, whenever MEM is not in sync to + * NEST + */ +void istep_13_2(void) +{ + printk(BIOS_EMERG, "starting istep 13.2\n"); + int i; + long time_elapsed = 0; + + report_istep(13, 2); + + /* Assuming MC doesn't run in sync mode with Fabric, otherwise this is no-op */ + + for (i = 0; i < MCS_PER_PROC; i++) { + // Assert endpoint reset + /* + TP.TPCHIP.NET.PCBSLMC01.NET_CTRL0 (WOR) + [all] 0 + [1] PCB_EP_RESET = 1 + */ + write_scom_for_chiplet(mcs_ids[i], PCBSLMC01_NET_CTRL0_WOR, + PPC_BIT(PCBSLMC01_NET_CTRL0_PCB_EP_RESET)); + + // Mask PLL unlock error in PCB slave + /* + TP.TPCHIP.NET.PCBSLMC01.SLAVE_CONFIG_REG + [12] (part of) ERROR_MASK = 1 + */ + scom_or_for_chiplet(mcs_ids[i], PCBSLMC01_SLAVE_CONFIG_REG, + PPC_BIT(12)); + + // Move MC PLL into reset state (3 separate writes, no delays between them) + /* + TP.TPCHIP.NET.PCBSLMC01.NET_CTRL0 (WOR) + [all] 0 + [5] PLL_BYPASS = 1 + TP.TPCHIP.NET.PCBSLMC01.NET_CTRL0 (WOR) + [all] 0 + [4] PLL_RESET = 1 + TP.TPCHIP.NET.PCBSLMC01.NET_CTRL0 (WOR) + [all] 0 + [3] PLL_TEST_EN = 1 + */ + write_scom_for_chiplet(mcs_ids[i], PCBSLMC01_NET_CTRL0_WOR, + PPC_BIT(PCBSLMC01_NET_CTRL0_PLL_BYPASS)); + write_scom_for_chiplet(mcs_ids[i], PCBSLMC01_NET_CTRL0_WOR, + PPC_BIT(PCBSLMC01_NET_CTRL0_PLL_RESET)); + write_scom_for_chiplet(mcs_ids[i], PCBSLMC01_NET_CTRL0_WOR, + PPC_BIT(PCBSLMC01_NET_CTRL0_PLL_TEST_EN)); + + // Assert MEM PLDY and DCC bypass + /* + TP.TPCHIP.NET.PCBSLMC01.NET_CTRL1 (WOR) + [all] 0 + [1] CLK_DCC_BYPASS_EN = 1 + [2] CLK_PDLY_BYPASS_EN = 1 + */ + write_scom_for_chiplet(mcs_ids[i], PCBSLMC01_NET_CTRL1_WOR, + PPC_BIT(PCBSLMC01_NET_CTRL1_CLK_DCC_BYPASS_EN) | + PPC_BIT(PCBSLMC01_NET_CTRL1_CLK_PDLY_BYPASS_EN)); + + // Drop endpoint reset + /* + TP.TPCHIP.NET.PCBSLMC01.NET_CTRL0 (WAND) + [all] 1 + [1] PCB_EP_RESET = 0 + */ + write_scom_for_chiplet(mcs_ids[i], PCBSLMC01_NET_CTRL0_WAND, + ~PPC_BIT(PCBSLMC01_NET_CTRL0_PCB_EP_RESET)); + + // Disable listen to sync pulse to MC chiplet, when MEM is not in sync to nest + /* + TP.TCMC01.MCSLOW.SYNC_CONFIG + [4] LISTEN_TO_SYNC_PULSE_DIS = 1 + */ + scom_or_for_chiplet(mcs_ids[i], MCSLOW_SYNC_CONFIG, + PPC_BIT(MCSLOW_SYNC_CONFIG_LISTEN_TO_SYNC_PULSE_DIS)); + + // Initialize OPCG_ALIGN register + /* + TP.TCMC01.MCSLOW.OPCG_ALIGN + [all] 0 + [0-3] INOP_ALIGN = 5 // 8:1 + [12-19] INOP_WAIT = 0 + [47-51] SCAN_RATIO = 0 // 1:1 + [52-63] OPCG_WAIT_CYCLES = 0x20 + */ + write_scom_for_chiplet(mcs_ids[i], MCSLOW_OPCG_ALIGN, + PPC_SHIFT(5, MCSLOW_OPCG_ALIGN_INOP_ALIGN) | + PPC_SHIFT(0x20, MCSLOW_OPCG_ALIGN_OPCG_WAIT_CYCLES)); + + // scan0 flush PLL boundary ring + /* + TP.TCMC01.MCSLOW.CLK_REGION + [all] 0 + [14] CLOCK_REGION_UNIT10 = 1 + [48] SEL_THOLD_SL = 1 + [49] SEL_THOLD_NSL = 1 + [50] SEL_THOLD_ARY = 1 + TP.TCMC01.MCSLOW.SCAN_REGION_TYPE + [all] 0 + [14] SCAN_REGION_UNIT10 = 1 + [56] SCAN_TYPE_BNDY = 1 + TP.TCMC01.MCSLOW.OPCG_REG0 + [0] RUNN_MODE = 0 + // Separate write, but don't have to read again + TP.TCMC01.MCSLOW.OPCG_REG0 + [2] RUN_SCAN0 = 1 + */ + write_scom_for_chiplet(mcs_ids[i], MCSLOW_CLK_REGION, + PPC_BIT(MCSLOW_CLK_REGION_CLOCK_REGION_UNIT10) | + PPC_BIT(MCSLOW_CLK_REGION_SEL_THOLD_SL) | + PPC_BIT(MCSLOW_CLK_REGION_SEL_THOLD_NSL) | + PPC_BIT(MCSLOW_CLK_REGION_SEL_THOLD_ARY)); + write_scom_for_chiplet(mcs_ids[i], MCSLOW_SCAN_REGION_TYPE, + PPC_BIT(MCSLOW_SCAN_REGION_TYPE_SCAN_REGION_UNIT10) | + PPC_BIT(MCSLOW_SCAN_REGION_TYPE_SCAN_TYPE_BNDY)); + scom_and_for_chiplet(mcs_ids[i], MCSLOW_OPCG_REG0, + ~PPC_BIT(MCSLOW_OPCG_RUNN_MODE)); + scom_or_for_chiplet(mcs_ids[i], MCSLOW_OPCG_REG0, + PPC_BIT(MCSLOW_OPCG_RUN_SCAN0)); + } + + /* Separate loop so we won't have to wait for timeout twice */ + for (i = 0; i < MCS_PER_PROC; i++) { + /* FIXME: previous one didn't skip nonfunctional, should this one? */ + //~ if (!mem_data.mcs[i].functional) + //~ continue; + + /* + timeout(200 * 16us): + TP.TCMC01.MCSLOW.CPLT_STAT0 + if (([8] CC_CTRL_OPCG_DONE_DC) == 1) break + delay(16us) + */ + time_elapsed = wait_us(200 * 16 - time_elapsed, + read_scom_for_chiplet(mcs_ids[i], MCSLOW_CPLT_STAT0) & + PPC_BIT(MCSLOW_CPLT_STAT0_CC_CTRL_OPCG_DONE_DC)); + + if (!time_elapsed) + die("Timed out while waiting for PLL boundary ring flush\n"); + + // Cleanup + /* + TP.TCMC01.MCSLOW.CLK_REGION + [all] 0 + TP.TCMC01.MCSLOW.SCAN_REGION_TYPE + [all] 0 + */ + write_scom_for_chiplet(mcs_ids[i], MCSLOW_CLK_REGION, 0); + write_scom_for_chiplet(mcs_ids[i], MCSLOW_SCAN_REGION_TYPE, 0); + } + + printk(BIOS_EMERG, "ending istep 13.2\n"); +} diff --git a/src/soc/ibm/power9/romstage.c b/src/soc/ibm/power9/romstage.c index 21c1afe9142..406f0cfcbc6 100644 --- a/src/soc/ibm/power9/romstage.c +++ b/src/soc/ibm/power9/romstage.c @@ -333,6 +333,9 @@ void main(void) prepare_dimm_data(); + report_istep(13, 1); // no-op + istep_13_2(); + /* Test if SCOM still works. Maybe should check also indirect access? */ printk(BIOS_DEBUG, "0xF000F = %llx\n", read_scom(0xF000F)); From 09684c865f237ff3655aae28f32570d59a7f156f Mon Sep 17 00:00:00 2001 From: Krystian Hebel Date: Wed, 24 Feb 2021 13:35:36 +0100 Subject: [PATCH 030/213] soc/power9/: implement istep 13.3 13.3 mem_pll_initf: PLL Initfile for MBAs a) p9_mem_pll_initf.C (proc chip) - This step is a no-op on cumulus - This step is a no-op if memory is running in synchronous mode since the MCAs are using the nest PLL, HWP detect and exits - MCA PLL setup - Note that Hostboot doesn't support twiddling bits, Looks up which "bucket" (ring) to use from attributes set during mss_freq - Then request the SBE to scan ringId with setPulse - SBE needs to support 5 RS4 images - Data is stored as a ring image in the SBE that is frequency specific - 5 different frequencies (1866, 2133, 2400, 2667, EXP) Signed-off-by: Krystian Hebel Change-Id: Ie9ab183812750b52a87ec552033388a2a2f0a96f --- src/include/cpu/power/istep_13.h | 1 + src/soc/ibm/power9/Makefile.inc | 1 + src/soc/ibm/power9/istep_13_3.c | 121 +++++++++++++++++++++++++++++++ src/soc/ibm/power9/romstage.c | 1 + 4 files changed, 124 insertions(+) create mode 100644 src/soc/ibm/power9/istep_13_3.c diff --git a/src/include/cpu/power/istep_13.h b/src/include/cpu/power/istep_13.h index 60789a609e9..856bdf42963 100644 --- a/src/include/cpu/power/istep_13.h +++ b/src/include/cpu/power/istep_13.h @@ -234,3 +234,4 @@ void ccs_phy_hw_step(chiplet_id_t id, int mca_i, int rp, enum cal_config conf, void ccs_execute(chiplet_id_t id, int mca_i); void istep_13_2(void); +void istep_13_3(void); diff --git a/src/soc/ibm/power9/Makefile.inc b/src/soc/ibm/power9/Makefile.inc index c2076d4f336..19215fcf434 100644 --- a/src/soc/ibm/power9/Makefile.inc +++ b/src/soc/ibm/power9/Makefile.inc @@ -8,6 +8,7 @@ romstage-y += rom_media.c romstage-y += romstage.c romstage-y += vpd.c romstage-y += istep_13_2.c +romstage-y += istep_13_3.c romstage-y += i2c.c romstage-y += ccs.c ramstage-y += chip.c diff --git a/src/soc/ibm/power9/istep_13_3.c b/src/soc/ibm/power9/istep_13_3.c new file mode 100644 index 00000000000..cae3035b7d8 --- /dev/null +++ b/src/soc/ibm/power9/istep_13_3.c @@ -0,0 +1,121 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include + +#include "istep_13_scom.h" + +#define RING_ID_1866 0x6B +#define RING_ID_2133 0x6C +#define RING_ID_2400 0x6D +#define RING_ID_2666 0x6E + +/* + * 13.3 mem_pll_initf: PLL Initfile for MBAs + * + * a) p9_mem_pll_initf.C (proc chip) + * - This step is a no-op on cumulus + * - This step is a no-op if memory is running in synchronous mode since the + * MCAs are using the nest PLL, HWP detect and exits + * - MCA PLL setup + * - Note that Hostboot doesn't support twiddling bits, Looks up which + * "bucket" (ring) to use from attributes set during mss_freq + * - Then request the SBE to scan ringId with setPulse + * - SBE needs to support 5 RS4 images + * - Data is stored as a ring image in the SBE that is frequency specific + * - 5 different frequencies (1866, 2133, 2400, 2667, EXP) + */ +void istep_13_3(void) +{ + printk(BIOS_EMERG, "starting istep 13.3\n"); + uint64_t ring_id; + int mcs_i; + + report_istep(13, 3); + + /* Assuming MC doesn't run in sync mode with Fabric, otherwise this is no-op */ + + switch (mem_data.speed) { + case 2666: + ring_id = RING_ID_2666; + break; + case 2400: + ring_id = RING_ID_2400; + break; + case 2133: + ring_id = RING_ID_2133; + break; + case 1866: + ring_id = RING_ID_1866; + break; + default: + die("Unsupported memory speed (%d MT/s)\n", mem_data.speed); + } + + /* + * This is the only place where Hostboot does `putRing()` on Nimbus, but + * because Hostboot tries to be as generic as possible, there are many tests + * and safeties in place. We do not have to worry about another threads or + * out of order command/response pair. Just fill a buffer, send it and make + * sure the receiver (SBE) gets it. If you still want to know the details, + * start digging here: https://github.com/open-power/hostboot/blob/master/src/usr/scan/scandd.C#L169 + */ + // TP.TPCHIP.PIB.PSU.PSU_SBE_DOORBELL_REG + if (read_scom(PSU_SBE_DOORBELL_REG) & PPC_BIT(0)) + die("MBOX to SBE busy, this should not happen\n"); + + + for (mcs_i = 0; mcs_i < MCS_PER_PROC; mcs_i++) { + long time; + + if (!mem_data.mcs[mcs_i].functional) + continue; + + /* https://github.com/open-power/hostboot/blob/master/src/include/usr/sbeio/sbe_psudd.H#L418 */ + // TP.TPCHIP.PIB.PSU.PSU_HOST_SBE_MBOX0_REG + /* REQUIRE_RESPONSE, PSU_PUT_RING_FROM_IMAGE_CMD, CMD_CONTROL_PUTRING */ + /* + * TODO: there is also a sequence ID (bits 32-47) which should be unique. It + * has a value of 9 at this point in Hostboot logs, meaning there were + * probably earlier messages to SBE. In that case, we may also need a static + * variable for it, which probably implies wrapping this into a function and + * moving it to separate file. + */ + write_scom(PSU_HOST_SBE_MBOX0_REG, 0x000001000000D301); + + // TP.TPCHIP.PIB.PSU.PSU_HOST_SBE_MBOX0_REG + /* TARGET_TYPE_PERV, chiplet ID = 0x07, ring ID, RING_MODE_SET_PULSE_NSL */ + write_scom(PSU_HOST_SBE_MBOX1_REG, 0x0002000000000004 | PPC_SHIFT(ring_id, 47) | + PPC_SHIFT(mcs_ids[mcs_i], 31)); + + // Ring the host->SBE doorbell + // TP.TPCHIP.PIB.PSU.PSU_SBE_DOORBELL_REG_OR + write_scom(PSU_SBE_DOORBELL_REG_WOR, PPC_BIT(0)); + + // Wait for response + /* + * Hostboot registers an interrupt handler in a thread that is demonized. We + * do not want nor need to implement a whole OS just for this purpose, we + * can just busy-wait here, there isn't anything better to do anyway. + * + * The original timeout is 90 seconds, but that seems like eternity. After + * thorough testing we probably should trim it. + */ + // TP.TPCHIP.PIB.PSU.PSU_HOST_DOORBELL_REG + time = wait_ms(90 * MSECS_PER_SEC, read_scom(PSU_HOST_DOORBELL_REG) & PPC_BIT(0)); + + if (!time) + die("Timed out while waiting for SBE response\n"); + + /* This may depend on the requested frequency, but for current setup in our + * lab this is ~3ms both for coreboot and Hostboot. */ + printk(BIOS_EMERG, "putRing took %ld ms\n", time); + + // Clear SBE->host doorbell + // TP.TPCHIP.PIB.PSU.PSU_HOST_DOORBELL_REG_AND + write_scom(PSU_HOST_DOORBELL_REG_WAND, ~PPC_BIT(0)); + } + + printk(BIOS_EMERG, "ending istep 13.3\n"); +} diff --git a/src/soc/ibm/power9/romstage.c b/src/soc/ibm/power9/romstage.c index 406f0cfcbc6..2ca94962127 100644 --- a/src/soc/ibm/power9/romstage.c +++ b/src/soc/ibm/power9/romstage.c @@ -335,6 +335,7 @@ void main(void) report_istep(13, 1); // no-op istep_13_2(); + istep_13_3(); /* Test if SCOM still works. Maybe should check also indirect access? */ printk(BIOS_DEBUG, "0xF000F = %llx\n", read_scom(0xF000F)); From 4e5198a201d91e88a3ca921952cea5bfa00f3e2f Mon Sep 17 00:00:00 2001 From: Krystian Hebel Date: Wed, 24 Feb 2021 15:35:18 +0100 Subject: [PATCH 031/213] soc/power9/: implement istep 13.4 13.4 mem_pll_setup: Setup PLL for MBAs a) p9_mem_pll_setup.C (proc chip) - This step is a no-op on cumulus - This step is a no-op if memory is running in synchronous mode since the MCAs are using the nest PLL, HWP detect and exits - MCA PLL setup - Moved PLL out of bypass (just DDR) - Performs PLL checking Signed-off-by: Krystian Hebel Change-Id: I7173e707f625936be471c4d55d1ecd8048e8a545 --- src/include/cpu/power/istep_13.h | 1 + src/soc/ibm/power9/Makefile.inc | 1 + src/soc/ibm/power9/istep_13_4.c | 119 +++++++++++++++++++++++++++++++ src/soc/ibm/power9/romstage.c | 1 + 4 files changed, 122 insertions(+) create mode 100644 src/soc/ibm/power9/istep_13_4.c diff --git a/src/include/cpu/power/istep_13.h b/src/include/cpu/power/istep_13.h index 856bdf42963..19c22596b74 100644 --- a/src/include/cpu/power/istep_13.h +++ b/src/include/cpu/power/istep_13.h @@ -235,3 +235,4 @@ void ccs_execute(chiplet_id_t id, int mca_i); void istep_13_2(void); void istep_13_3(void); +void istep_13_4(void); diff --git a/src/soc/ibm/power9/Makefile.inc b/src/soc/ibm/power9/Makefile.inc index 19215fcf434..23e22bf7e8a 100644 --- a/src/soc/ibm/power9/Makefile.inc +++ b/src/soc/ibm/power9/Makefile.inc @@ -9,6 +9,7 @@ romstage-y += romstage.c romstage-y += vpd.c romstage-y += istep_13_2.c romstage-y += istep_13_3.c +romstage-y += istep_13_4.c romstage-y += i2c.c romstage-y += ccs.c ramstage-y += chip.c diff --git a/src/soc/ibm/power9/istep_13_4.c b/src/soc/ibm/power9/istep_13_4.c new file mode 100644 index 00000000000..fed7db98dd3 --- /dev/null +++ b/src/soc/ibm/power9/istep_13_4.c @@ -0,0 +1,119 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include + +#include "istep_13_scom.h" + +/* + * 13.4 mem_pll_setup: Setup PLL for MBAs + * + * a) p9_mem_pll_setup.C (proc chip) + * - This step is a no-op on cumulus + * - This step is a no-op if memory is running in synchronous mode since the + * MCAs are using the nest PLL, HWP detect and exits + * - MCA PLL setup + * - Moved PLL out of bypass (just DDR) + * - Performs PLL checking + */ +void istep_13_4(void) +{ + printk(BIOS_EMERG, "starting istep 13.4\n"); + int i; + + report_istep(13, 4); + + /* Assuming MC doesn't run in sync mode with Fabric, otherwise this is no-op */ + + for (i = 0; i < MCS_PER_PROC; i++) { + // Drop PLDY bypass of Progdelay logic + /* + TP.TPCHIP.NET.PCBSLMC01.NET_CTRL1 (WAND) + [all] 1 + [2] CLK_PDLY_BYPASS_EN = 0 + */ + write_scom_for_chiplet(mcs_ids[i], PCBSLMC01_NET_CTRL1_WAND, + ~PPC_BIT(PCBSLMC01_NET_CTRL1_CLK_PDLY_BYPASS_EN)); + + // Drop DCC bypass of DCC logic + /* + TP.TPCHIP.NET.PCBSLMC01.NET_CTRL1 (WAND) + [all] 1 + [1] CLK_DCC_BYPASS_EN = 0 + */ + write_scom_for_chiplet(mcs_ids[i], PCBSLMC01_NET_CTRL1_WAND, + ~PPC_BIT(PCBSLMC01_NET_CTRL1_CLK_DCC_BYPASS_EN)); + + // ATTR_NEST_MEM_X_O_PCI_BYPASS is set to 0 in talos.xml. + // > if (ATTR_NEST_MEM_X_O_PCI_BYPASS == 0) + + // Drop PLL test enable + /* + TP.TPCHIP.NET.PCBSLMC01.NET_CTRL0 (WAND) + [all] 1 + [3] PLL_TEST_EN = 0 + */ + write_scom_for_chiplet(mcs_ids[i], PCBSLMC01_NET_CTRL0_WAND, + ~PPC_BIT(PCBSLMC01_NET_CTRL0_PLL_TEST_EN)); + + // Drop PLL reset + /* + TP.TPCHIP.NET.PCBSLMC01.NET_CTRL0 (WAND) + [all] 1 + [4] PLL_RESET = 0 + */ + write_scom_for_chiplet(mcs_ids[i], PCBSLMC01_NET_CTRL0_WAND, + ~PPC_BIT(PCBSLMC01_NET_CTRL0_PLL_RESET)); + + /* + * TODO: This is how Hosboot does it, maybe it would be better to use + * wait_ms and a separate loop to have only one timeout. On the other + * hand, it is possible that MCS will stop responding to SCOM accesses + * after PLL reset so we wouldn't be able to read the status. + */ + mdelay(5); + + // Check PLL lock + /* + TP.TPCHIP.NET.PCBSLMC01.PLL_LOCK_REG + assert([0] (reserved) == 1) + */ + if (!(read_scom_for_chiplet(mcs_ids[i], PCBSLMC01_PLL_LOCK_REG) & PPC_BIT(0))) + die("MCS%d PLL not locked\n", i); + + // Drop PLL Bypass + /* + TP.TPCHIP.NET.PCBSLMC01.NET_CTRL0 (WAND) + [all] 1 + [5] PLL_BYPASS = 0 + */ + write_scom_for_chiplet(mcs_ids[i], PCBSLMC01_NET_CTRL0_WAND, + ~PPC_BIT(PCBSLMC01_NET_CTRL0_PLL_BYPASS)); + + // Set scan ratio to 4:1 + /* + TP.TCMC01.MCSLOW.OPCG_ALIGN + [47-51] SCAN_RATIO = 3 // 4:1 + */ + scom_and_or_for_chiplet(mcs_ids[i], MCSLOW_OPCG_ALIGN, ~PPC_BITMASK(47,51), + PPC_SHIFT(3, MCSLOW_OPCG_ALIGN_SCAN_RATIO)); + + // > end if + + // Reset PCB Slave error register + /* + TP.TPCHIP.NET.PCBSLMC01.ERROR_REG + [all] 1 // Write 1 to clear + */ + write_scom_for_chiplet(mcs_ids[i], PCBSLMC01_ERROR_REG, ~0); + + // Unmask PLL unlock error in PCB slave + /* + TP.TPCHIP.NET.PCBSLMC01.SLAVE_CONFIG_REG + [12] (part of) ERROR_MASK = 0 + */ + scom_and_for_chiplet(mcs_ids[i], PCBSLMC01_SLAVE_CONFIG_REG, ~PPC_BIT(12)); + } + + printk(BIOS_EMERG, "ending istep 13.4\n"); +} diff --git a/src/soc/ibm/power9/romstage.c b/src/soc/ibm/power9/romstage.c index 2ca94962127..999292ee950 100644 --- a/src/soc/ibm/power9/romstage.c +++ b/src/soc/ibm/power9/romstage.c @@ -336,6 +336,7 @@ void main(void) report_istep(13, 1); // no-op istep_13_2(); istep_13_3(); + istep_13_4(); /* Test if SCOM still works. Maybe should check also indirect access? */ printk(BIOS_DEBUG, "0xF000F = %llx\n", read_scom(0xF000F)); From 9458f79e9b7a5f6e02abc7d11ed36fc325207ffc Mon Sep 17 00:00:00 2001 From: Krystian Hebel Date: Thu, 25 Feb 2021 17:09:39 +0100 Subject: [PATCH 032/213] soc/power9/: implement istep 13.6 13.6 mem_startclocks: Start clocks on MBA/MCAs a) p9_mem_startclocks.C (proc chip) - This step is a no-op on cumulus - This step is a no-op if memory is running in synchronous mode since the MCAs are using the nest PLL, HWP detect and exits - Drop fences and tholds on MBA/MCAs to start the functional clocks Signed-off-by: Krystian Hebel Change-Id: I1e2fdf6a3113a3f23fc8af441cbfac0b3320972b --- src/include/cpu/power/istep_13.h | 1 + src/soc/ibm/power9/Makefile.inc | 1 + src/soc/ibm/power9/istep_13_6.c | 332 +++++++++++++++++++++++++++++++ src/soc/ibm/power9/romstage.c | 2 + 4 files changed, 336 insertions(+) create mode 100644 src/soc/ibm/power9/istep_13_6.c diff --git a/src/include/cpu/power/istep_13.h b/src/include/cpu/power/istep_13.h index 19c22596b74..de0bf351adc 100644 --- a/src/include/cpu/power/istep_13.h +++ b/src/include/cpu/power/istep_13.h @@ -236,3 +236,4 @@ void ccs_execute(chiplet_id_t id, int mca_i); void istep_13_2(void); void istep_13_3(void); void istep_13_4(void); +void istep_13_6(void); diff --git a/src/soc/ibm/power9/Makefile.inc b/src/soc/ibm/power9/Makefile.inc index 23e22bf7e8a..5090bef8f31 100644 --- a/src/soc/ibm/power9/Makefile.inc +++ b/src/soc/ibm/power9/Makefile.inc @@ -10,6 +10,7 @@ romstage-y += vpd.c romstage-y += istep_13_2.c romstage-y += istep_13_3.c romstage-y += istep_13_4.c +romstage-y += istep_13_6.c romstage-y += i2c.c romstage-y += ccs.c ramstage-y += chip.c diff --git a/src/soc/ibm/power9/istep_13_6.c b/src/soc/ibm/power9/istep_13_6.c new file mode 100644 index 00000000000..2f0b80080a3 --- /dev/null +++ b/src/soc/ibm/power9/istep_13_6.c @@ -0,0 +1,332 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include + +#include "istep_13_scom.h" + +/* + * FIXME: ATTR_PG value should come from MEMD partition, but it is empty after + * build. Default value from talos.xml (5 for all chiplets) probably never makes + * sense. Value read from already booted MVPD is 0xE0 (?) for both MCSs. We can + * either add functions to read and parse MVPD or just hardcode the values. So + * far I haven't found the code that writes to MVPD in Hostboot, other than for + * PDI keyword (PG keyword should be used here). + * + * Value below comes from a log of booting Hostboot. It isn't even remotely + * similar to values mentioned above. It touches bits marked as reserved in the + * documentation, so we can't rely on specification to be up to date. + * + * As this describes whether clocks on second MCS should be started or not, this + * definitely will be different when more DIMMs are installed. + */ +#define ATTR_PG 0xE1FC000000000000ull + +static inline void p9_mem_startclocks_cplt_ctrl_action_function(chiplet_id_t id) +{ + // Drop partial good fences + /* + TP.TCMC01.MCSLOW.CPLT_CTRL1 (WO_CLEAR) + [all] 0 + [3] TC_VITL_REGION_FENCE = ~ATTR_PG[3] + [4-14] TC_REGION{1-3}_FENCE, UNUSED_{8-14}B = ~ATTR_PG[4-14] + */ + write_scom_for_chiplet(id, MCSLOW_CPLT_CTRL1_WCLEAR, ~ATTR_PG & PPC_BITMASK(3, 14)); + + // Reset abistclk_muxsel and syncclk_muxsel + /* + TP.TCMC01.MCSLOW.CPLT_CTRL0 (WO_CLEAR) + [all] 0 + [0] CTRL_CC_ABSTCLK_MUXSEL_DC = 1 + [1] TC_UNIT_SYNCCLK_MUXSEL_DC = 1 + */ + write_scom_for_chiplet(id, MCSLOW_CPLT_CTRL0_WCLEAR, + PPC_BIT(MCSLOW_CPLT_CTRL0_CTRL_CC_ABSTCLK_MUXSEL_DC) | + PPC_BIT(MCSLOW_CPLT_CTRL0_TC_UNIT_SYNCCLK_MUXSEL_DC)); + +} + +static inline void p9_sbe_common_align_chiplets(chiplet_id_t id) +{ + // Exit flush + /* + TP.TCMC01.MCSLOW.CPLT_CTRL0 (WO_OR) + [all] 0 + [2] CTRL_CC_FLUSHMODE_INH_DC = 1 + */ + write_scom_for_chiplet(id, MCSLOW_CPLT_CTRL0_WOR, + PPC_BIT(MCSLOW_CPLT_CTRL0_CTRL_CC_FLUSHMODE_INH_DC)); + + // Enable alignement + /* + TP.TCMC01.MCSLOW.CPLT_CTRL0 (WO_OR) + [all] 0 + [3] CTRL_CC_FORCE_ALIGN_DC = 1 + */ + write_scom_for_chiplet(id, MCSLOW_CPLT_CTRL0_WOR, + PPC_BIT(MCSLOW_CPLT_CTRL0_CTRL_CC_FORCE_ALIGN_DC)); + + // Clear chiplet is aligned + /* + TP.TCMC01.MCSLOW.SYNC_CONFIG + [7] CLEAR_CHIPLET_IS_ALIGNED = 1 + */ + scom_or_for_chiplet(id, MCSLOW_SYNC_CONFIG, + PPC_BIT(MCSLOW_SYNC_CONFIG_CLEAR_CHIPLET_IS_ALIGNED)); + + // Unset Clear chiplet is aligned + /* + TP.TCMC01.MCSLOW.SYNC_CONFIG + [7] CLEAR_CHIPLET_IS_ALIGNED = 0 + */ + scom_and_for_chiplet(id, MCSLOW_SYNC_CONFIG, + ~PPC_BIT(MCSLOW_SYNC_CONFIG_CLEAR_CHIPLET_IS_ALIGNED)); + + udelay(100); + + // Poll aligned bit + /* + timeout(10*100us): + TP.TCMC01.MCSLOW.CPLT_STAT0 + if (([9] CC_CTRL_CHIPLET_IS_ALIGNED_DC) == 1) break + delay(100us) + */ + if (!wait_us(10 * 100, read_scom_for_chiplet(id, MCSLOW_CPLT_STAT0) & + PPC_BIT(MCSLOW_CPLT_STAT0_CC_CTRL_CHIPLET_IS_ALIGNED_DC))) + die("Timeout while waiting for chiplet alignment\n"); + + // Disable alignment + /* + TP.TCMC01.MCSLOW.CPLT_CTRL0 (WO_CLEAR) + [all] 0 + [3] CTRL_CC_FORCE_ALIGN_DC = 1 + */ + write_scom_for_chiplet(id, MCSLOW_CPLT_CTRL0_WCLEAR, + PPC_BIT(MCSLOW_CPLT_CTRL0_CTRL_CC_FORCE_ALIGN_DC)); +} + +static void p9_sbe_common_clock_start_stop(chiplet_id_t id) +{ + // Chiplet exit flush + /* + TP.TCMC01.MCSLOW.CPLT_CTRL0 (WO_OR) + [all] 0 + [2] CTRL_CC_FLUSHMODE_INH_DC = 1 + */ + write_scom_for_chiplet(id, MCSLOW_CPLT_CTRL0_WOR, + PPC_BIT(MCSLOW_CPLT_CTRL0_CTRL_CC_FLUSHMODE_INH_DC)); + + // Clear Scan region type register + /* + TP.TCMC01.MCSLOW.SCAN_REGION_TYPE + [all] 0 + */ + write_scom_for_chiplet(id, MCSLOW_SCAN_REGION_TYPE, 0); + + // Setup all Clock Domains and Clock Types + /* + TP.TCMC01.MCSLOW.CLK_REGION + [0-1] CLOCK_CMD = 1 // start + [2] SLAVE_MODE = 0 + [3] MASTER_MODE = 0 + [4-14] CLOCK_REGION_* = (((~ATTR_PG[4-14]) >> 1) & 0x07FE) << 1 = + ~ATTR_PG[4-14] & 0x0FFC = + ~ATTR_PG[4-13] // Hostboot tends to complicate + [48] SEL_THOLD_SL = 1 + [49] SEL_THOLD_NSL = 1 + [50] SEL_THOLD_ARY = 1 + */ + scom_and_or_for_chiplet(id, MCSLOW_CLK_REGION, + ~(PPC_BITMASK(0, 14) | PPC_BITMASK(48, 50)), + PPC_SHIFT(1, MCSLOW_CLK_REGION_CLOCK_CMD) | + PPC_BIT(MCSLOW_CLK_REGION_SEL_THOLD_SL) | + PPC_BIT(MCSLOW_CLK_REGION_SEL_THOLD_NSL) | + PPC_BIT(MCSLOW_CLK_REGION_SEL_THOLD_ARY) | + (~ATTR_PG & PPC_BITMASK(4, 13))); + + // Poll OPCG done bit to check for completeness + /* + timeout(10*100us): + TP.TCMC01.MCSLOW.CPLT_STAT0 + if (([8] CC_CTRL_OPCG_DONE_DC) == 1) break + delay(100us) + */ + if (!wait_us(10 * 100, read_scom_for_chiplet(id, MCSLOW_CPLT_STAT0) & + PPC_BIT(MCSLOW_CPLT_STAT0_CC_CTRL_OPCG_DONE_DC))) + die("Timeout while waiting for OPCG done bit\n"); + + /* + * Here Hostboot calculates what is expected clock status, based on previous + * values and requested command. It is done by generic functions, but + * because we know exactly which clocks were to be started, we can test just + * for those. + */ + /* + TP.TCMC01.MCSLOW.CLOCK_STAT_SL + TP.TCMC01.MCSLOW.CLOCK_STAT_NSL + TP.TCMC01.MCSLOW.CLOCK_STAT_ARY + assert(([4-14] & ATTR_PG[4-14]) == ATTR_PG[4-14]) + */ + uint64_t mask = ATTR_PG & PPC_BITMASK(4, 13); + if ((read_scom_for_chiplet(id, MCSLOW_CLOCK_STAT_SL) & PPC_BITMASK(4, 13)) != mask || + (read_scom_for_chiplet(id, MCSLOW_CLOCK_STAT_NSL) & PPC_BITMASK(4, 13)) != mask || + (read_scom_for_chiplet(id, MCSLOW_CLOCK_STAT_ARY) & PPC_BITMASK(4, 13)) != mask) + die("Unexpected clock status\n"); +} + +static inline void p9_mem_startclocks_fence_setup_function(chiplet_id_t id) +{ + /* + * Hostboot does it based on pg_vector. It seems to check for Nest IDs to + * which MCs are connected, but I'm not sure if this is the case. I also + * don't know if it is possible to have a functional MCBIST for which we + * don't want to drop the fence (functional MCBIST with nonfunctional NEST?) + * + * Most likely this will need to be fixed for populated second MCS. + */ + + /* + * if ((MC.ATTR_CHIP_UNIT_POS == 0x07 && pg_vector[5]) || + * (MC.ATTR_CHIP_UNIT_POS == 0x08 && pg_vector[3])) + *{ + */ + + // Drop chiplet fence + /* + TP.TPCHIP.NET.PCBSLMC01.NET_CTRL0 (WAND) + [all] 1 + [18] FENCE_EN = 0 + */ + write_scom_for_chiplet(id, PCBSLMC01_NET_CTRL0_WAND, + ~PPC_BIT(PCBSLMC01_NET_CTRL0_FENCE_EN)); + + /* }*/ +} + +static void p9_sbe_common_configure_chiplet_FIR(chiplet_id_t id) +{ + // reset pervasive FIR + /* + TP.TCMC01.MCSLOW.LOCAL_FIR + [all] 0 + */ + write_scom_for_chiplet(id, MCSLOW_LOCAL_FIR, 0); + + // configure pervasive FIR action/mask + /* + TP.TCMC01.MCSLOW.LOCAL_FIR_ACTION0 + [all] 0 + TP.TCMC01.MCSLOW.LOCAL_FIR_ACTION1 + [all] 0 + [0-3] 0xF + TP.TCMC01.MCSLOW.LOCAL_FIR_MASK + [all] 0 + [4-41] 0x3FFFFFFFFF (every bit set) + */ + write_scom_for_chiplet(id, MCSLOW_LOCAL_FIR_ACTION0, 0); + write_scom_for_chiplet(id, MCSLOW_LOCAL_FIR_ACTION1, PPC_BITMASK(0, 3)); + write_scom_for_chiplet(id, MCSLOW_LOCAL_FIR_MASK, PPC_BITMASK(4, 41)); + + // reset XFIR + /* + TP.TCMC01.MCSLOW.XFIR + [all] 0 + */ + write_scom_for_chiplet(id, MCSLOW_XFIR, 0); + + // configure XFIR mask + /* + TP.TCMC01.MCSLOW.FIR_MASK + [all] 0 + */ + write_scom_for_chiplet(id, MCSLOW_FIR_MASK, 0); +} + +/* + * 13.6 mem_startclocks: Start clocks on MBA/MCAs + * + * a) p9_mem_startclocks.C (proc chip) + * - This step is a no-op on cumulus + * - This step is a no-op if memory is running in synchronous mode since the + * MCAs are using the nest PLL, HWP detect and exits + * - Drop fences and tholds on MBA/MCAs to start the functional clocks + */ +void istep_13_6(void) +{ + printk(BIOS_EMERG, "starting istep 13.6\n"); + int i; + + report_istep(13, 6); + + /* Assuming MC doesn't run in sync mode with Fabric, otherwise this is no-op */ + + for (i = 0; i < MCS_PER_PROC; i++) { + /* According to logs, Hostboot does it also for the second MCS */ + //~ if (!mem_data.mcs[i].functional) + //~ continue; + + // Call p9_mem_startclocks_cplt_ctrl_action_function for Mc chiplets + p9_mem_startclocks_cplt_ctrl_action_function(mcs_ids[i]); + + // Call module align chiplets for Mc chiplets + p9_sbe_common_align_chiplets(mcs_ids[i]); + + // Call module clock start stop for MC01, MC23 + p9_sbe_common_clock_start_stop(mcs_ids[i]); + + // Call p9_mem_startclocks_fence_setup_function for Mc chiplets + p9_mem_startclocks_fence_setup_function(mcs_ids[i]); + + // Clear flush_inhibit to go in to flush mode + /* + TP.TCMC01.MCSLOW.CPLT_CTRL0 (WO_CLEAR) + [all] 0 + [2] CTRL_CC_FLUSHMODE_INH_DC = 1 + */ + write_scom_for_chiplet(mcs_ids[i], MCSLOW_CPLT_CTRL0_WCLEAR, + PPC_BIT(MCSLOW_CPLT_CTRL0_CTRL_CC_FLUSHMODE_INH_DC)); + + // Call p9_sbe_common_configure_chiplet_FIR for MC chiplets + p9_sbe_common_configure_chiplet_FIR(mcs_ids[i]); + + // Reset FBC chiplet configuration + /* + TP.TCMC01.MCSLOW.CPLT_CONF0 + [48-51] TC_UNIT_GROUP_ID_DC = ATTR_PROC_FABRIC_GROUP_ID // Where do these come from? + [52-54] TC_UNIT_CHIP_ID_DC = ATTR_PROC_FABRIC_CHIP_ID + [56-60] TC_UNIT_SYS_ID_DC = ATTR_PROC_FABRIC_SYSTEM_ID // 0 in talos.xml + */ + /* + * Take 0 for all values - assuming ATTR_PROC_FABRIC_GROUP_ID is + * ATTR_FABRIC_GROUP_ID of parent PROC (same for CHIP_ID). Only + * SYSTEM_ID is present in talos.xml with full name. + */ + scom_and_for_chiplet(mcs_ids[i], MCSLOW_CPLT_CONF0, + ~(PPC_BITMASK(48, 54) | PPC_BITMASK(56, 60))); + + // Add to Multicast Group + /* Avoid setting if register is already set, i.e. [3-5] != 7 */ + /* + TP.TPCHIP.NET.PCBSLMC01.MULTICAST_GROUP_1 + [3-5] MULTICAST1_GROUP: if 7 then set to 0 + [16-23] (not described): if [3-5] == 7 then set to 0x1C // No clue why Hostboot modifies these bits + TP.TPCHIP.NET.PCBSLMC01.MULTICAST_GROUP_2 + [3-5] MULTICAST1_GROUP: if 7 then set to 2 + [16-23] (not described): if [3-5] == 7 then set to 0x1C + */ + if ((read_scom_for_chiplet(mcs_ids[i], PCBSLMC01_MULTICAST_GROUP_1) & PPC_BITMASK(3, 5)) + == PPC_BITMASK(3, 5)) + scom_and_or_for_chiplet(mcs_ids[i], PCBSLMC01_MULTICAST_GROUP_1, + ~(PPC_BITMASK(3, 5) | PPC_BITMASK(16, 23)), + PPC_BITMASK(19, 21)); + + if ((read_scom_for_chiplet(mcs_ids[i], PCBSLMC01_MULTICAST_GROUP_2) & PPC_BITMASK(3, 5)) + == PPC_BITMASK(3, 5)) + scom_and_or_for_chiplet(mcs_ids[i], PCBSLMC01_MULTICAST_GROUP_2, + ~(PPC_BITMASK(3, 5) | PPC_BITMASK(16, 23)), + PPC_BIT(4) | PPC_BITMASK(19, 21)); + } + + printk(BIOS_EMERG, "ending istep 13.6\n"); +} diff --git a/src/soc/ibm/power9/romstage.c b/src/soc/ibm/power9/romstage.c index 999292ee950..4dd9abae248 100644 --- a/src/soc/ibm/power9/romstage.c +++ b/src/soc/ibm/power9/romstage.c @@ -337,6 +337,8 @@ void main(void) istep_13_2(); istep_13_3(); istep_13_4(); + report_istep(13, 5); // no-op + istep_13_6(); /* Test if SCOM still works. Maybe should check also indirect access? */ printk(BIOS_DEBUG, "0xF000F = %llx\n", read_scom(0xF000F)); From 98e514ea5907688e8571d9caaed92fc7b921c39a Mon Sep 17 00:00:00 2001 From: Krystian Hebel Date: Thu, 4 Mar 2021 18:29:05 +0100 Subject: [PATCH 033/213] soc/power9/istep_13_8.c: implementation Signed-off-by: Krystian Hebel Change-Id: I08c87266e45b7547bf0f7be3de988607edfb4561 --- src/include/cpu/power/istep_13.h | 1 + src/include/cpu/power/scom.h | 6 + src/soc/ibm/power9/Makefile.inc | 1 + src/soc/ibm/power9/istep_13_8.c | 2227 ++++++++++++++++++++++++++++++ src/soc/ibm/power9/romstage.c | 2 + 5 files changed, 2237 insertions(+) create mode 100644 src/soc/ibm/power9/istep_13_8.c diff --git a/src/include/cpu/power/istep_13.h b/src/include/cpu/power/istep_13.h index de0bf351adc..74431bdd2e5 100644 --- a/src/include/cpu/power/istep_13.h +++ b/src/include/cpu/power/istep_13.h @@ -237,3 +237,4 @@ void istep_13_2(void); void istep_13_3(void); void istep_13_4(void); void istep_13_6(void); +void istep_13_8(void); // TODO: takes epsilon values from 8.6 and MSS data from 7.4 diff --git a/src/include/cpu/power/scom.h b/src/include/cpu/power/scom.h index 42eacc62483..00f62b28b83 100644 --- a/src/include/cpu/power/scom.h +++ b/src/include/cpu/power/scom.h @@ -85,6 +85,12 @@ typedef enum { EC23_CHIPLET_ID = 0x37 ///< Core23 chiplet (Quad5, EX11, C1) } chiplet_id_t; +static const chiplet_id_t mcs_to_nest[] = +{ + [MC01_CHIPLET_ID] = N3_CHIPLET_ID, + [MC23_CHIPLET_ID] = N1_CHIPLET_ID, +}; + uint64_t read_scom_direct(uint64_t reg_address); void write_scom_direct(uint64_t reg_address, uint64_t data); diff --git a/src/soc/ibm/power9/Makefile.inc b/src/soc/ibm/power9/Makefile.inc index 5090bef8f31..77561ce721b 100644 --- a/src/soc/ibm/power9/Makefile.inc +++ b/src/soc/ibm/power9/Makefile.inc @@ -11,6 +11,7 @@ romstage-y += istep_13_2.c romstage-y += istep_13_3.c romstage-y += istep_13_4.c romstage-y += istep_13_6.c +romstage-y += istep_13_8.c romstage-y += i2c.c romstage-y += ccs.c ramstage-y += chip.c diff --git a/src/soc/ibm/power9/istep_13_8.c b/src/soc/ibm/power9/istep_13_8.c new file mode 100644 index 00000000000..836bbf11e20 --- /dev/null +++ b/src/soc/ibm/power9/istep_13_8.c @@ -0,0 +1,2227 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include + +#include "istep_13_scom.h" + +#define ATTR_PG 0xE000000000000000ull +#define FREQ_PB_MHZ 1866 + +/* + * This function was generated from initfiles. Some of the registers used here + * are not documented, except for occasional name of a constant written to it. + * They also access registers at addresses for chiplet ID = 5 (Nest west), even + * though the specified target is MCA. It is not clear if MCA offset has to be + * added to SCOM address for those registers or not. Even logs from debug + * version of Hostboot don't list the addresses explicitly, but by comparing + * them with values read with 'pdbg' it seems that they use a stride of 0x10. + * + * Undocumented registers are marked with (?) in the comments. + */ +static void p9n_mca_scom(int mcs_i, int mca_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + const int mca_mul = 0x10; + /* + * Mixing rules: + * - rank configurations are the same for both DIMMs + * - fields for unpopulated DIMMs are initialized to all 0 + * + * With those two assumptions values can be logically ORed to produce a + * common value without conditionals. + */ + int n_dimms = (mca->dimm[0].present && mca->dimm[1].present) ? 2 : 1; + int mranks = mca->dimm[0].mranks | mca->dimm[1].mranks; + int log_ranks = mca->dimm[0].log_ranks | mca->dimm[1].log_ranks; + bool is_8H = (log_ranks / mranks) == 8; + chiplet_id_t nest = mcs_to_nest[id]; + int vpd_idx = mca->dimm[0].present ? (mca->dimm[0].mranks == 2 ? 2 : 0) : + (mca->dimm[1].mranks == 2 ? 2 : 0); + if (mca->dimm[0].present && mca->dimm[1].present) + vpd_idx++; + + /* P9N2_MCS_PORT02_MCPERF0 (?) + [22-27] = 0x20 // AMO_LIMIT + */ + scom_and_or_for_chiplet(nest, 0x05010823 + mca_i * mca_mul, + ~PPC_BITMASK(22, 27), PPC_SHIFT(0x20, 27)); + + /* P9N2_MCS_PORT02_MCPERF2 (?) + [0-2] = 1 // PF_DROP_VALUE0 + [3-5] = 3 // PF_DROP_VALUE1 + [6-8] = 5 // PF_DROP_VALUE2 + [9-11] = 7 // PF_DROP_VALUE3 + [13-15] = // REFRESH_BLOCK_CONFIG + if has only one DIMM in MCA: + 0b000 : if master ranks = 1 + 0b001 : if master ranks = 2 + 0b100 : if master ranks = 4 + // Per allowable DIMM mixing rules, we cannot mix different number of ranks on any single port + if has both DIMMs in MCA: + 0b010 : if master ranks = 1 + 0b011 : if master ranks = 2 + 0b100 : if master ranks = 4 // 4 mranks is the same for one and two DIMMs in MCA + [16] = // ENABLE_REFRESH_BLOCK_SQ + [17] = // ENABLE_REFRESH_BLOCK_NSQ, always the same value as [16] + 1 : if (1 < (DIMM0 + DIMM1 logical ranks) <= 8 && not (one DIMM, 4 mranks, 2H 3DS) + 0 : otherwise + [18] = 0 // ENABLE_REFRESH_BLOCK_DISP + [28-31] = 0b0100 // SQ_LFSR_CNTL + [50-54] = 0b11100 // NUM_RMW_BUF + [61] = ATTR_ENABLE_MEM_EARLY_DATA_SCOM // EN_ALT_ECR_ERR, 0? + */ + uint64_t ref_blk_cfg = mranks == 4 ? 0x4 : + mranks == 2 ? (n_dimms == 1 ? 0x1 : 0x3) : + n_dimms == 1 ? 0x0 : 0x2; + uint64_t en_ref_blk = (log_ranks <= 1 || log_ranks > 8) ? 0 : + (n_dimms == 1 && mranks == 4 && log_ranks == 8) ? 0 : 3; + + scom_and_or_for_chiplet(nest, 0x05010824 + mca_i * mca_mul, + /* and */ + ~(PPC_BITMASK(0, 11) | PPC_BITMASK(13, 18) | PPC_BITMASK(28, 31) + | PPC_BITMASK(28, 31) | PPC_BITMASK(50, 54) | PPC_BIT(61)), + /* or */ + PPC_SHIFT(1, 2) | PPC_SHIFT(3, 5) | PPC_SHIFT(5, 8) + | PPC_SHIFT(7, 11) /* PF_DROP_VALUEs */ + | PPC_SHIFT(ref_blk_cfg, 15) | PPC_SHIFT(en_ref_blk, 17) + | PPC_SHIFT(0x4, 31) | PPC_SHIFT(0x1C, 54)); + + /* P9N2_MCS_PORT02_MCAMOC (?) + [1] = 0 // FORCE_PF_DROP0 + [4-28] = 0x19fffff // WRTO_AMO_COLLISION_RULES + [29-31] = 1 // AMO_SIZE_SELECT, 128B_RW_64B_DATA + */ + scom_and_or_for_chiplet(nest, 0x05010825 + mca_i * mca_mul, + ~(PPC_BIT(1) | PPC_BITMASK(4, 31)), + PPC_SHIFT(0x19FFFFF, 28) | PPC_SHIFT(1, 31)); + + /* P9N2_MCS_PORT02_MCEPSQ (?) + [0-7] = 1 // JITTER_EPSILON + // ATTR_PROC_EPS_READ_CYCLES_T* are calculated in 8.6 + // Rounded up? + [8-15] = (ATTR_PROC_EPS_READ_CYCLES_T0 + 6) / 4 // LOCAL_NODE_EPSILON + [16-23] = (ATTR_PROC_EPS_READ_CYCLES_T1 + 6) / 4 // NEAR_NODAL_EPSILON + [24-31] = (ATTR_PROC_EPS_READ_CYCLES_T1 + 6) / 4 // GROUP_EPSILON + [32-39] = (ATTR_PROC_EPS_READ_CYCLES_T2 + 6) / 4 // REMOTE_NODAL_EPSILON + [40-47] = (ATTR_PROC_EPS_READ_CYCLES_T2 + 6) / 4 // VECTOR_GROUP_EPSILON + */ + scom_and_or_for_chiplet(nest, 0x05010826 + mca_i * mca_mul, ~PPC_BITMASK(0, 47), + PPC_SHIFT(1, 7) /* FIXME: fill the rest with non-hardcoded values*/ + | PPC_SHIFT(4, 15) | PPC_SHIFT(4, 23) | PPC_SHIFT(4, 31) + | PPC_SHIFT(0x19, 39) | PPC_SHIFT(0x19, 47)); +//~ static const uint32_t EPSILON_R_T0_LE[] = { 7, 7, 8, 8, 10, 22 }; // T0, T1 +//~ static const uint32_t EPSILON_R_T2_LE[] = { 67, 69, 71, 74, 79, 103 }; // T2 + + /* P9N2_MCS_PORT02_MCBUSYQ (?) + [0] = 1 // ENABLE_BUSY_COUNTERS + [1-3] = 1 // BUSY_COUNTER_WINDOW_SELECT, 1024 cycles + [4-13] = 38 // BUSY_COUNTER_THRESHOLD0 + [14-23] = 51 // BUSY_COUNTER_THRESHOLD1 + [24-33] = 64 // BUSY_COUNTER_THRESHOLD2 + */ + scom_and_or_for_chiplet(nest, 0x05010827 + mca_i * mca_mul, ~PPC_BITMASK(0, 33), + PPC_BIT(0) | PPC_SHIFT(1, 3) | PPC_SHIFT(38, 13) + | PPC_SHIFT(51, 23) | PPC_SHIFT(64, 33)); + + /* P9N2_MCS_PORT02_MCPERF3 (?) + [31] = 1 // ENABLE_CL0 + [41] = 1 // ENABLE_AMO_MSI_RMW_ONLY + [43] = !ATTR_ENABLE_MEM_EARLY_DATA_SCOM // ENABLE_CP_M_MDI0_LOCAL_ONLY, !0 = 1? + [44] = 1 // DISABLE_WRTO_IG + [45] = 1 // AMO_LIMIT_SEL + */ + scom_or_for_chiplet(nest, 0x0501082B + mca_i * mca_mul, + PPC_BIT(31) | PPC_BIT(41) | PPC_BIT(43) | PPC_BIT(44) + | PPC_BIT(45)); + + /* MC01.PORT0.SRQ.MBA_DSM0Q = + // These are set per port so all latencies should be calculated from both DIMMs (if present) + [0-5] MBA_DSM0Q_CFG_RODT_START_DLY = ATTR_EFF_DRAM_CL - ATTR_EFF_DRAM_CWL + [6-11] MBA_DSM0Q_CFG_RODT_END_DLY = ATTR_EFF_DRAM_CL - ATTR_EFF_DRAM_CWL + 5 + [12-17] MBA_DSM0Q_CFG_WODT_START_DLY = 0 + [18-23] MBA_DSM0Q_CFG_WODT_END_DLY = 5 + [24-29] MBA_DSM0Q_CFG_WRDONE_DLY = 24 + [30-35] MBA_DSM0Q_CFG_WRDATA_DLY = ATTR_EFF_DRAM_CWL + ATTR_MSS_EFF_DPHY_WLO - 8 + // Assume RDIMM, non-NVDIMM only + [36-41] MBA_DSM0Q_CFG_RDTAG_DLY = + MSS_FREQ_EQ_1866: ATTR_EFF_DRAM_CL + 7 + MSS_FREQ_EQ_2133: ATTR_EFF_DRAM_CL + 7 + MSS_FREQ_EQ_2400: ATTR_EFF_DRAM_CL + 8 + MSS_FREQ_EQ_2666: ATTR_EFF_DRAM_CL + 9 + */ + /* ATTR_MSS_EFF_DPHY_WLO = 1 from VPD, 3 from dump? */ + uint64_t rdtag_dly = mem_data.speed == 2666 ? 9 : + mem_data.speed == 2400 ? 8 : 7; + mca_and_or(id, mca_i, MBA_DSM0Q, ~PPC_BITMASK(0, 41), + PPC_SHIFT(mca->cl - mem_data.cwl, MBA_DSM0Q_CFG_RODT_START_DLY) | + PPC_SHIFT(mca->cl - mem_data.cwl + 5, MBA_DSM0Q_CFG_RODT_END_DLY) | + PPC_SHIFT(5, MBA_DSM0Q_CFG_WODT_END_DLY) | + PPC_SHIFT(24, MBA_DSM0Q_CFG_WRDONE_DLY) | + PPC_SHIFT(mem_data.cwl + /* 1 */ 3 - 8, MBA_DSM0Q_CFG_WRDATA_DLY) | + PPC_SHIFT(mca->cl + rdtag_dly, MBA_DSM0Q_CFG_RDTAG_DLY)); + + /* MC01.PORT0.SRQ.MBA_TMR0Q = + [0-3] MBA_TMR0Q_RRDM_DLY = + MSS_FREQ_EQ_1866: 8 + MSS_FREQ_EQ_2133: 9 + MSS_FREQ_EQ_2400: 10 + MSS_FREQ_EQ_2666: 11 + [4-7] MBA_TMR0Q_RRSMSR_DLY = 4 + [8-11] MBA_TMR0Q_RRSMDR_DLY = 4 + [12-15] MBA_TMR0Q_RROP_DLY = ATTR_EFF_DRAM_TCCD_L + [16-19] MBA_TMR0Q_WWDM_DLY = + MSS_FREQ_EQ_1866: 8 + MSS_FREQ_EQ_2133: 9 + MSS_FREQ_EQ_2400: 10 + MSS_FREQ_EQ_2666: 11 + [20-23] MBA_TMR0Q_WWSMSR_DLY = 4 + [24-27] MBA_TMR0Q_WWSMDR_DLY = 4 + [28-31] MBA_TMR0Q_WWOP_DLY = ATTR_EFF_DRAM_TCCD_L + [32-36] MBA_TMR0Q_RWDM_DLY = // same as below + [37-41] MBA_TMR0Q_RWSMSR_DLY = // same as below + [42-46] MBA_TMR0Q_RWSMDR_DLY = + MSS_FREQ_EQ_1866: ATTR_EFF_DRAM_CL - ATTR_EFF_DRAM_CWL + 8 + MSS_FREQ_EQ_2133: ATTR_EFF_DRAM_CL - ATTR_EFF_DRAM_CWL + 9 + MSS_FREQ_EQ_2400: ATTR_EFF_DRAM_CL - ATTR_EFF_DRAM_CWL + 10 + MSS_FREQ_EQ_2666: ATTR_EFF_DRAM_CL - ATTR_EFF_DRAM_CWL + 11 + [47-50] MBA_TMR0Q_WRDM_DLY = + MSS_FREQ_EQ_1866: ATTR_EFF_DRAM_CWL - ATTR_EFF_DRAM_CL + 8 + MSS_FREQ_EQ_2133: ATTR_EFF_DRAM_CWL - ATTR_EFF_DRAM_CL + 9 + MSS_FREQ_EQ_2400: ATTR_EFF_DRAM_CWL - ATTR_EFF_DRAM_CL + 10 + MSS_FREQ_EQ_2666: ATTR_EFF_DRAM_CWL - ATTR_EFF_DRAM_CL + 11 + [51-56] MBA_TMR0Q_WRSMSR_DLY = // same as below + [57-62] MBA_TMR0Q_WRSMDR_DLY = ATTR_EFF_DRAM_CWL + ATTR_EFF_DRAM_TWTR_S + 4 + */ + uint64_t var_dly = mem_data.speed == 2666 ? 11 : + mem_data.speed == 2400 ? 10 : + mem_data.speed == 2133 ? 9 : 8; + mca_and_or(id, mca_i, MBA_TMR0Q, PPC_BIT(63), + PPC_SHIFT(var_dly, MBA_TMR0Q_RRDM_DLY) | + PPC_SHIFT(4, MBA_TMR0Q_RRSMSR_DLY) | + PPC_SHIFT(4, MBA_TMR0Q_RRSMDR_DLY) | + PPC_SHIFT(mca->nccd_l, MBA_TMR0Q_RROP_DLY) | + PPC_SHIFT(var_dly, MBA_TMR0Q_WWDM_DLY) | + PPC_SHIFT(4, MBA_TMR0Q_WWSMSR_DLY) | + PPC_SHIFT(4, MBA_TMR0Q_WWSMDR_DLY) | + PPC_SHIFT(mca->nccd_l, MBA_TMR0Q_WWOP_DLY) | + PPC_SHIFT(mca->cl - mem_data.cwl + var_dly, MBA_TMR0Q_RWDM_DLY) | + PPC_SHIFT(mca->cl - mem_data.cwl + var_dly, MBA_TMR0Q_RWSMSR_DLY) | + PPC_SHIFT(mca->cl - mem_data.cwl + var_dly, MBA_TMR0Q_RWSMDR_DLY) | + PPC_SHIFT(mem_data.cwl - mca->cl + var_dly, MBA_TMR0Q_WRDM_DLY) | + PPC_SHIFT(mem_data.cwl + mca->nwtr_s + 4, MBA_TMR0Q_WRSMSR_DLY) | + PPC_SHIFT(mem_data.cwl + mca->nwtr_s + 4, MBA_TMR0Q_WRSMDR_DLY)); + + /* MC01.PORT0.SRQ.MBA_TMR1Q = + [0-3] MBA_TMR1Q_RRSBG_DLY = ATTR_EFF_DRAM_TCCD_L + [4-9] MBA_TMR1Q_WRSBG_DLY = ATTR_EFF_DRAM_CWL + ATTR_EFF_DRAM_TWTR_L + 4 + [10-15] MBA_TMR1Q_CFG_TFAW = ATTR_EFF_DRAM_TFAW + [16-20] MBA_TMR1Q_CFG_TRCD = ATTR_EFF_DRAM_TRCD + [21-25] MBA_TMR1Q_CFG_TRP = ATTR_EFF_DRAM_TRP + [26-31] MBA_TMR1Q_CFG_TRAS = ATTR_EFF_DRAM_TRAS + [41-47] MBA_TMR1Q_CFG_WR2PRE = ATTR_EFF_DRAM_CWL + ATTR_EFF_DRAM_TWR + 4 + [48-51] MBA_TMR1Q_CFG_RD2PRE = ATTR_EFF_DRAM_TRTP + [52-55] MBA_TMR1Q_TRRD = ATTR_EFF_DRAM_TRRD_S + [56-59] MBA_TMR1Q_TRRD_SBG = ATTR_EFF_DRAM_TRRD_L + [60-63] MBA_TMR1Q_CFG_ACT_TO_DIFF_RANK_DLY = // var_dly from above + MSS_FREQ_EQ_1866: 8 + MSS_FREQ_EQ_2133: 9 + MSS_FREQ_EQ_2400: 10 + MSS_FREQ_EQ_2666: 11 + */ + mca_and_or(id, mca_i, MBA_TMR1Q, 0, + PPC_SHIFT(mca->nccd_l, MBA_TMR1Q_RRSBG_DLY) | + PPC_SHIFT(mem_data.cwl + mca->nwtr_l + 4, MBA_TMR1Q_WRSBG_DLY) | + PPC_SHIFT(mca->nfaw, MBA_TMR1Q_CFG_TFAW) | + PPC_SHIFT(mca->nrcd, MBA_TMR1Q_CFG_TRCD) | + PPC_SHIFT(mca->nrp, MBA_TMR1Q_CFG_TRP) | + PPC_SHIFT(mca->nras, MBA_TMR1Q_CFG_TRAS) | + PPC_SHIFT(mem_data.cwl + mca->nwr + 4, MBA_TMR1Q_CFG_WR2PRE) | + PPC_SHIFT(mem_data.nrtp, MBA_TMR1Q_CFG_RD2PRE) | + PPC_SHIFT(mca->nrrd_s, MBA_TMR1Q_TRRD) | + PPC_SHIFT(mca->nrrd_l, MBA_TMR1Q_TRRD_SBG) | + PPC_SHIFT(var_dly, MBA_TMR1Q_CFG_ACT_TO_DIFF_RANK_DLY)); + + /* MC01.PORT0.SRQ.MBA_WRQ0Q = + [5] MBA_WRQ0Q_CFG_WRQ_FIFO_MODE = 0 // ATTR_MSS_REORDER_QUEUE_SETTING, 0 = reorder + [6] MBA_WRQ0Q_CFG_DISABLE_WR_PG_MODE = 1 + [55-58] MBA_WRQ0Q_CFG_WRQ_ACT_NUM_WRITES_PENDING = 8 + */ + mca_and_or(id, mca_i, MBA_WRQ0Q, + ~(PPC_BIT(MBA_WRQ0Q_CFG_WRQ_FIFO_MODE) | + PPC_BIT(MBA_WRQ0Q_CFG_DISABLE_WR_PG_MODE) | + PPC_BITMASK(55, 58)), + PPC_BIT(MBA_WRQ0Q_CFG_DISABLE_WR_PG_MODE) | + PPC_SHIFT(8, MBA_WRQ0Q_CFG_WRQ_ACT_NUM_WRITES_PENDING)); + + /* MC01.PORT0.SRQ.MBA_RRQ0Q = + [6] MBA_RRQ0Q_CFG_RRQ_FIFO_MODE = 0 // ATTR_MSS_REORDER_QUEUE_SETTING + [57-60] MBA_RRQ0Q_CFG_RRQ_ACT_NUM_READS_PENDING = 8 + */ + mca_and_or(id, mca_i, MBA_RRQ0Q, + ~(PPC_BIT(MBA_RRQ0Q_CFG_RRQ_FIFO_MODE) | PPC_BITMASK(57, 60)), + PPC_SHIFT(8, MBA_RRQ0Q_CFG_RRQ_ACT_NUM_READS_PENDING)); + + /* MC01.PORT0.SRQ.MBA_FARB0Q = + if (l_TGT3_ATTR_MSS_MRW_DRAM_2N_MODE == 0x02 || (l_TGT3_ATTR_MSS_MRW_DRAM_2N_MODE == 0x00 && l_TGT2_ATTR_MSS_VPD_MR_MC_2N_MODE_AUTOSET == 0x02)) + [17] MBA_FARB0Q_CFG_2N_ADDR = 1 // Default is auto for mode, 1N from VPD, so [17] = 0 + [38] MBA_FARB0Q_CFG_PARITY_AFTER_CMD = 1 + [61-63] MBA_FARB0Q_CFG_OPT_RD_SIZE = 3 + */ + mca_and_or(id, mca_i, MBA_FARB0Q, + ~(PPC_BIT(MBA_FARB0Q_CFG_2N_ADDR) | + PPC_BIT(MBA_FARB0Q_CFG_PARITY_AFTER_CMD) | + PPC_BITMASK(61, 63)), + PPC_BIT(MBA_FARB0Q_CFG_PARITY_AFTER_CMD) | + PPC_SHIFT(3, MBA_FARB0Q_CFG_OPT_RD_SIZE)); + + /* MC01.PORT0.SRQ.MBA_FARB1Q = + [0-2] MBA_FARB1Q_CFG_SLOT0_S0_CID = 0 + [3-5] MBA_FARB1Q_CFG_SLOT0_S1_CID = 4 + [6-8] MBA_FARB1Q_CFG_SLOT0_S2_CID = 2 + [9-11] MBA_FARB1Q_CFG_SLOT0_S3_CID = 6 + if (DIMM0 is 8H 3DS) + [12-14] MBA_FARB1Q_CFG_SLOT0_S4_CID = 1 + [15-17] MBA_FARB1Q_CFG_SLOT0_S5_CID = 5 + [18-20] MBA_FARB1Q_CFG_SLOT0_S6_CID = 3 + [21-23] MBA_FARB1Q_CFG_SLOT0_S7_CID = 7 + else + [12-14] MBA_FARB1Q_CFG_SLOT0_S4_CID = 0 + [15-17] MBA_FARB1Q_CFG_SLOT0_S5_CID = 4 + [18-20] MBA_FARB1Q_CFG_SLOT0_S6_CID = 2 + [21-23] MBA_FARB1Q_CFG_SLOT0_S7_CID = 6 + if (DIMM0 has 4 master ranks) + [12-14] MBA_FARB1Q_CFG_SLOT0_S4_CID = 4 // TODO: test if all slots with 4R DIMMs works with that + [24-26] MBA_FARB1Q_CFG_SLOT1_S0_CID = 0 + [27-29] MBA_FARB1Q_CFG_SLOT1_S1_CID = 4 + [30-32] MBA_FARB1Q_CFG_SLOT1_S2_CID = 2 + [33-35] MBA_FARB1Q_CFG_SLOT1_S3_CID = 6 + if (DIMM1 is 8H 3DS) + [36-38] MBA_FARB1Q_CFG_SLOT1_S4_CID = 1 + [39-41] MBA_FARB1Q_CFG_SLOT1_S5_CID = 5 + [42-44] MBA_FARB1Q_CFG_SLOT1_S6_CID = 3 + [45-47] MBA_FARB1Q_CFG_SLOT1_S7_CID = 7 + else + [36-38] MBA_FARB1Q_CFG_SLOT1_S4_CID = 0 + [39-41] MBA_FARB1Q_CFG_SLOT1_S5_CID = 4 + [42-44] MBA_FARB1Q_CFG_SLOT1_S6_CID = 2 + [45-47] MBA_FARB1Q_CFG_SLOT1_S7_CID = 6 + if (DIMM1 has 4 master ranks) + [36-38] MBA_FARB1Q_CFG_SLOT1_S4_CID = 4 // TODO: test if all slots with 4R DIMMs works with that + */ + /* Due to allowable DIMM mixing rules, ranks of both DIMMs are the same */ + uint64_t cids_even = (0 << 9) | (4 << 6) | (2 << 3) | (6 << 0); + uint64_t cids_odd = (1 << 9) | (5 << 6) | (3 << 3) | (7 << 0); + uint64_t cids_4_7 = is_8H ? cids_odd : cids_even; + /* Not sure if this is even supported, there is no MT VPD data for this case */ + if (mranks == 4) + cids_4_7 = (cids_4_7 & ~(7ull << 9)) | (4 << 9); + + mca_and_or(id, mca_i, MBA_FARB1Q, ~PPC_BITMASK(0, 47), + PPC_SHIFT(cids_even, MBA_FARB1Q_CFG_SLOT0_S3_CID) | + PPC_SHIFT(cids_4_7, MBA_FARB1Q_CFG_SLOT0_S7_CID) | + PPC_SHIFT(cids_even, MBA_FARB1Q_CFG_SLOT1_S3_CID) | + PPC_SHIFT(cids_4_7, MBA_FARB1Q_CFG_SLOT1_S7_CID)); + + /* MC01.PORT0.SRQ.MBA_FARB2Q = + F(X) = (((X >> 4) & 0xc) | ((X >> 2) & 0x3)) // Bits 0,1,4,5 of uint8_t X, big endian numbering + [0-3] MBA_FARB2Q_CFG_RANK0_RD_ODT = F(ATTR_MSS_VPD_MT_ODT_RD[l_def_PORT_INDEX][0][0]) + [4-7] MBA_FARB2Q_CFG_RANK1_RD_ODT = F(ATTR_MSS_VPD_MT_ODT_RD[l_def_PORT_INDEX][0][1]) + [8-11] MBA_FARB2Q_CFG_RANK2_RD_ODT = F(ATTR_MSS_VPD_MT_ODT_RD[l_def_PORT_INDEX][0][2]) // always 0 + [12-15] MBA_FARB2Q_CFG_RANK3_RD_ODT = F(ATTR_MSS_VPD_MT_ODT_RD[l_def_PORT_INDEX][0][3]) // always 0 + [16-19] MBA_FARB2Q_CFG_RANK4_RD_ODT = F(ATTR_MSS_VPD_MT_ODT_RD[l_def_PORT_INDEX][1][0]) + [20-23] MBA_FARB2Q_CFG_RANK5_RD_ODT = F(ATTR_MSS_VPD_MT_ODT_RD[l_def_PORT_INDEX][1][1]) + [24-27] MBA_FARB2Q_CFG_RANK6_RD_ODT = F(ATTR_MSS_VPD_MT_ODT_RD[l_def_PORT_INDEX][1][2]) // always 0 + [28-31] MBA_FARB2Q_CFG_RANK7_RD_ODT = F(ATTR_MSS_VPD_MT_ODT_RD[l_def_PORT_INDEX][1][3]) // always 0 + [32-35] MBA_FARB2Q_CFG_RANK0_WR_ODT = F(ATTR_MSS_VPD_MT_ODT_WR[l_def_PORT_INDEX][0][0]) + [36-39] MBA_FARB2Q_CFG_RANK1_WR_ODT = F(ATTR_MSS_VPD_MT_ODT_WR[l_def_PORT_INDEX][0][1]) + [40-43] MBA_FARB2Q_CFG_RANK2_WR_ODT = F(ATTR_MSS_VPD_MT_ODT_WR[l_def_PORT_INDEX][0][2]) // always 0 + [44-47] MBA_FARB2Q_CFG_RANK3_WR_ODT = F(ATTR_MSS_VPD_MT_ODT_WR[l_def_PORT_INDEX][0][3]) // always 0 + [48-51] MBA_FARB2Q_CFG_RANK4_WR_ODT = F(ATTR_MSS_VPD_MT_ODT_WR[l_def_PORT_INDEX][1][0]) + [52-55] MBA_FARB2Q_CFG_RANK5_WR_ODT = F(ATTR_MSS_VPD_MT_ODT_WR[l_def_PORT_INDEX][1][1]) + [56-59] MBA_FARB2Q_CFG_RANK6_WR_ODT = F(ATTR_MSS_VPD_MT_ODT_WR[l_def_PORT_INDEX][1][2]) // always 0 + [60-63] MBA_FARB2Q_CFG_RANK7_WR_ODT = F(ATTR_MSS_VPD_MT_ODT_WR[l_def_PORT_INDEX][1][3]) // always 0 + */ + #define F(X) ((((X) >> 4) & 0xc) | (((X) >> 2) & 0x3)) + mca_and_or(id, mca_i, MBA_FARB2Q, 0, + PPC_SHIFT(F(ATTR_MSS_VPD_MT_ODT_RD[vpd_idx][0][0]), MBA_FARB2Q_CFG_RANK0_RD_ODT) | + PPC_SHIFT(F(ATTR_MSS_VPD_MT_ODT_RD[vpd_idx][0][1]), MBA_FARB2Q_CFG_RANK1_RD_ODT) | + PPC_SHIFT(F(ATTR_MSS_VPD_MT_ODT_RD[vpd_idx][1][0]), MBA_FARB2Q_CFG_RANK4_RD_ODT) | + PPC_SHIFT(F(ATTR_MSS_VPD_MT_ODT_RD[vpd_idx][1][1]), MBA_FARB2Q_CFG_RANK5_RD_ODT) | + PPC_SHIFT(F(ATTR_MSS_VPD_MT_ODT_WR[vpd_idx][0][0]), MBA_FARB2Q_CFG_RANK0_WR_ODT) | + PPC_SHIFT(F(ATTR_MSS_VPD_MT_ODT_WR[vpd_idx][0][1]), MBA_FARB2Q_CFG_RANK1_WR_ODT) | + PPC_SHIFT(F(ATTR_MSS_VPD_MT_ODT_WR[vpd_idx][1][0]), MBA_FARB2Q_CFG_RANK4_WR_ODT) | + PPC_SHIFT(F(ATTR_MSS_VPD_MT_ODT_WR[vpd_idx][1][1]), MBA_FARB2Q_CFG_RANK5_WR_ODT) ); + #undef F + + /* MC01.PORT0.SRQ.PC.MBAREF0Q = + [5-7] MBAREF0Q_CFG_REFRESH_PRIORITY_THRESHOLD = 3 + [8-18] MBAREF0Q_CFG_REFRESH_INTERVAL = ATTR_EFF_DRAM_TREFI / (8 * (DIMM0 + DIMM1 logical ranks)) + [30-39] MBAREF0Q_CFG_TRFC = ATTR_EFF_DRAM_TRFC + [40-49] MBAREF0Q_CFG_REFR_TSV_STACK = ATTR_EFF_DRAM_TRFC_DLR + [50-60] MBAREF0Q_CFG_REFR_CHECK_INTERVAL = ((ATTR_EFF_DRAM_TREFI / 8) * 6) / 5 + */ + /* + * Hostboot writes slightly lower REFR_CHECK_INTERVAL, 1544 vs 1560, because + * it uses 99% of tREFI in 7.4 in eff_dimm::dram_trefi(). If this causes any + * issues we can do the same, but for now let's try to avoid floating point + * arithmetic. + */ + mca_and_or(id, mca_i, MBAREF0Q, ~(PPC_BITMASK(5, 18) | PPC_BITMASK(30, 60)), + PPC_SHIFT(3, MBAREF0Q_CFG_REFRESH_PRIORITY_THRESHOLD) | + PPC_SHIFT(mem_data.nrefi / (8 * 2 * log_ranks), MBAREF0Q_CFG_REFRESH_INTERVAL) | + PPC_SHIFT(mca->nrfc, MBAREF0Q_CFG_TRFC) | + PPC_SHIFT(mca->nrfc_dlr, MBAREF0Q_CFG_REFR_TSV_STACK) | + PPC_SHIFT(((mem_data.nrefi / 8) * 6) / 5, MBAREF0Q_CFG_REFR_CHECK_INTERVAL)); + + /* MC01.PORT0.SRQ.PC.MBARPC0Q = + [6-10] MBARPC0Q_CFG_PUP_AVAIL = + MSS_FREQ_EQ_1866: 6 + MSS_FREQ_EQ_2133: 7 + MSS_FREQ_EQ_2400: 8 + MSS_FREQ_EQ_2666: 9 + [11-15] MBARPC0Q_CFG_PDN_PUP = + MSS_FREQ_EQ_1866: 5 + MSS_FREQ_EQ_2133: 6 + MSS_FREQ_EQ_2400: 6 + MSS_FREQ_EQ_2666: 7 + [16-20] MBARPC0Q_CFG_PUP_PDN = + MSS_FREQ_EQ_1866: 5 + MSS_FREQ_EQ_2133: 6 + MSS_FREQ_EQ_2400: 6 + MSS_FREQ_EQ_2666: 7 + [21] MBARPC0Q_RESERVED_21 = // MCP_PORT0_SRQ_PC_MBARPC0Q_CFG_QUAD_RANK_ENC + (l_def_MASTER_RANKS_DIMM0 == 4): 1 + (l_def_MASTER_RANKS_DIMM0 != 4): 0 + */ + /* Perhaps these can be done by ns_to_nck(), but Hostboot used a forest of ifs */ + uint64_t pup_avail = mem_data.speed == 1866 ? 6 : + mem_data.speed == 2133 ? 7 : + mem_data.speed == 2400 ? 8 : 9; + uint64_t p_up_dn = mem_data.speed == 1866 ? 5 : + mem_data.speed == 2666 ? 7 : 6; + mca_and_or(id, mca_i, MBARPC0Q, ~PPC_BITMASK(6, 21), + PPC_SHIFT(pup_avail, MBARPC0Q_CFG_PUP_AVAIL) | + PPC_SHIFT(p_up_dn, MBARPC0Q_CFG_PDN_PUP) | + PPC_SHIFT(p_up_dn, MBARPC0Q_CFG_PUP_PDN) | + (mranks == 4 ? PPC_BIT(MBARPC0Q_RESERVED_21) : 0)); + + /* MC01.PORT0.SRQ.PC.MBASTR0Q = + [12-16] MBASTR0Q_CFG_TCKESR = 5 + [17-21] MBASTR0Q_CFG_TCKSRE = + MSS_FREQ_EQ_1866: 10 + MSS_FREQ_EQ_2133: 11 + MSS_FREQ_EQ_2400: 12 + MSS_FREQ_EQ_2666: 14 + [22-26] MBASTR0Q_CFG_TCKSRX = + MSS_FREQ_EQ_1866: 10 + MSS_FREQ_EQ_2133: 11 + MSS_FREQ_EQ_2400: 12 + MSS_FREQ_EQ_2666: 14 + [27-37] MBASTR0Q_CFG_TXSDLL = + MSS_FREQ_EQ_1866: 597 + MSS_FREQ_EQ_2133: 768 + MSS_FREQ_EQ_2400: 768 + MSS_FREQ_EQ_2666: 939 + [46-56] MBASTR0Q_CFG_SAFE_REFRESH_INTERVAL = ATTR_EFF_DRAM_TREFI / (8 * (DIMM0 + DIMM1 logical ranks)) + */ + uint64_t tcksr_ex = mem_data.speed == 1866 ? 10 : + mem_data.speed == 2133 ? 11 : + mem_data.speed == 2400 ? 12 : 14; + uint64_t txsdll = mem_data.speed == 1866 ? 597 : + mem_data.speed == 2666 ? 939 : 768; + mca_and_or(id, mca_i, MBASTR0Q, ~(PPC_BITMASK(12, 37) | PPC_BITMASK(46, 56)), + PPC_SHIFT(5, MBASTR0Q_CFG_TCKESR) | + PPC_SHIFT(tcksr_ex, MBASTR0Q_CFG_TCKSRE) | + PPC_SHIFT(tcksr_ex, MBASTR0Q_CFG_TCKSRX) | + PPC_SHIFT(txsdll, MBASTR0Q_CFG_TXSDLL) | + PPC_SHIFT(mem_data.nrefi / + (8 * (mca->dimm[0].log_ranks + mca->dimm[1].log_ranks)), + MBASTR0Q_CFG_SAFE_REFRESH_INTERVAL)); + + /* MC01.PORT0.ECC64.SCOM.RECR = + [16-18] MBSECCQ_VAL_TO_DATA_DELAY = + l_TGT4_ATTR_MC_SYNC_MODE == 1: 5 + l_def_mn_freq_ratio < 915: 3 + l_def_mn_freq_ratio < 1150: 4 + l_def_mn_freq_ratio < 1300: 5 + l_def_mn_freq_ratio >= 1300: 6 + [19] MBSECCQ_DELAY_VALID_1X = 0 + [20-21] MBSECCQ_NEST_VAL_TO_DATA_DELAY = + l_TGT4_ATTR_MC_SYNC_MODE == 1: 1 + l_def_mn_freq_ratio < 1040: 1 + l_def_mn_freq_ratio < 1150: 0 + l_def_mn_freq_ratio < 1215: 1 + l_def_mn_freq_ratio < 1300: 0 + l_def_mn_freq_ratio < 1400: 1 + l_def_mn_freq_ratio >= 1400: 0 + [22] MBSECCQ_DELAY_NONBYPASS = + l_TGT4_ATTR_MC_SYNC_MODE == 1: 0 + l_def_mn_freq_ratio < 1215: 0 + l_def_mn_freq_ratio >= 1215: 1 + [40] MBSECCQ_RESERVED_36_43 = // MCP_PORT0_ECC64_ECC_SCOM_MBSECCQ_BYPASS_TENURE_3 + l_TGT4_ATTR_MC_SYNC_MODE == 1: 0 + l_TGT4_ATTR_MC_SYNC_MODE == 0: 1 + */ + /* Assume asynchronous mode */ + /* + * From Hostboot: + * l_def_mn_freq_ratio = 1000 * ATTR_MSS_FREQ / ATTR_FREQ_PB_MHZ; + * ATTR_MSS_FREQ is in MT/s (sigh), ATTR_FREQ_PB_MHZ is 1866 MHz (from talos.xml). + */ + uint64_t mn_freq_ratio = 1000 * mem_data.speed / FREQ_PB_MHZ; + uint64_t val_to_data = mn_freq_ratio < 915 ? 3 : + mn_freq_ratio < 1150 ? 4 : + mn_freq_ratio < 1300 ? 5 : 6; + uint64_t nest_val_to_data = mn_freq_ratio < 1040 ? 1 : + mn_freq_ratio < 1150 ? 0 : + mn_freq_ratio < 1215 ? 1 : + mn_freq_ratio < 1300 ? 0 : + mn_freq_ratio < 1400 ? 1 : 0; + mca_and_or(id, mca_i, RECR, ~(PPC_BITMASK(16, 22) | PPC_BIT(MBSECCQ_RESERVED_40)), + PPC_SHIFT(val_to_data, MBSECCQ_VAL_TO_DATA_DELAY) | + PPC_SHIFT(nest_val_to_data, MBSECCQ_NEST_VAL_TO_DATA_DELAY) | + (mn_freq_ratio < 1215 ? 0 : PPC_BIT(MBSECCQ_DELAY_NONBYPASS)) | + PPC_BIT(MBSECCQ_RESERVED_40)); + + /* MC01.PORT0.ECC64.SCOM.DBGR = + [9] DBGR_ECC_WAT_ACTION_SELECT = 0 + [10-11] DBGR_ECC_WAT_SOURCE = 0 + */ + mca_and_or(id, mca_i, DBGR, ~PPC_BITMASK(9, 11), 0); + + /* MC01.PORT0.WRITE.WRTCFG = + [9] = 1 // MCP_PORT0_WRITE_NEW_WRITE_64B_MODE this is marked as RO const 0 for bits 8-63 in docs! + */ + mca_and_or(id, mca_i, WRTCFG, ~0ull, PPC_BIT(9)); +} + +static void thermal_throttle_scominit(int mcs_i, int mca_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + /* Set power control register */ + /* MC01.PORT0.SRQ.PC.MBARPC0Q = + [3-5] MBARPC0Q_CFG_MIN_MAX_DOMAINS = 0 + [22] MBARPC0Q_CFG_MIN_DOMAIN_REDUCTION_ENABLE = + if ATTR_MSS_MRW_POWER_CONTROL_REQUESTED == PD_AND_STR_OFF: 0 // default + else: 1 + [23-32] MBARPC0Q_CFG_MIN_DOMAIN_REDUCTION_TIME = 959 + */ + mca_and_or(id, mca_i, MBARPC0Q, ~(PPC_BITMASK(3, 5) | PPC_BITMASK(22, 32)), + PPC_SHIFT(959, MBARPC0Q_CFG_MIN_DOMAIN_REDUCTION_TIME)); + + /* Set STR register */ + /* MC01.PORT0.SRQ.PC.MBASTR0Q = + [0] MBASTR0Q_CFG_STR_ENABLE = + ATTR_MSS_MRW_POWER_CONTROL_REQUESTED == PD_AND_STR: 1 + ATTR_MSS_MRW_POWER_CONTROL_REQUESTED == PD_AND_STR_CLK_STOP: 1 + ATTR_MSS_MRW_POWER_CONTROL_REQUESTED == POWER_DOWN: 0 + ATTR_MSS_MRW_POWER_CONTROL_REQUESTED == PD_AND_STR_OFF: 0 // default + [2-11] MBASTR0Q_CFG_ENTER_STR_TIME = 1023 + */ + mca_and_or(id, mca_i, MBASTR0Q, ~(PPC_BIT(0) | PPC_BITMASK(2, 11)), + PPC_SHIFT(1023, MBASTR0Q_CFG_ENTER_STR_TIME)); + + /* Set N/M throttling control register */ + /* MC01.PORT0.SRQ.MBA_FARB3Q = + [0-14] MBA_FARB3Q_CFG_NM_N_PER_SLOT = ATTR_MSS_RUNTIME_MEM_THROTTLED_N_COMMANDS_PER_SLOT[mss::index(MCA)] + [15-30] MBA_FARB3Q_CFG_NM_N_PER_PORT = ATTR_MSS_RUNTIME_MEM_THROTTLED_N_COMMANDS_PER_PORT[mss::index(MCA)] + [31-44] MBA_FARB3Q_CFG_NM_M = ATTR_MSS_MRW_MEM_M_DRAM_CLOCKS // default 0x200 + [45-47] MBA_FARB3Q_CFG_NM_RAS_WEIGHT = 0 + [48-50] MBA_FARB3Q_CFG_NM_CAS_WEIGHT = 1 + // Set to disable permanently due to hardware design bug (HW403028) that won't be changed + [53] MBA_FARB3Q_CFG_NM_CHANGE_AFTER_SYNC = 0 + */ + /* + * Values of m_dram_clocks and nm_throttled_n_per_port come from talos.xml + * nm_n_per_slot and nm_n_per_port are derived from values in talos.xml + * + * All of them may be different for other platforms + */ + uint64_t nm_n_per_slot = 0x80; + uint64_t nm_n_per_port = 0x80; + uint64_t m_dram_clocks = 0x200; + mca_and_or(id, mca_i, MBA_FARB3Q, ~(PPC_BITMASK(0, 50) | PPC_BIT(53)), + PPC_SHIFT(nm_n_per_slot, MBA_FARB3Q_CFG_NM_N_PER_SLOT) | + PPC_SHIFT(nm_n_per_port, MBA_FARB3Q_CFG_NM_N_PER_PORT) | + PPC_SHIFT(m_dram_clocks, MBA_FARB3Q_CFG_NM_M) | + PPC_SHIFT(1, MBA_FARB3Q_CFG_NM_CAS_WEIGHT)); + + /* Set safemode throttles */ + /* MC01.PORT0.SRQ.MBA_FARB4Q = + [27-41] MBA_FARB4Q_EMERGENCY_N = ATTR_MSS_MRW_SAFEMODE_MEM_THROTTLED_N_COMMANDS_PER_PORT[mss::index(MCA)] + [42-55] MBA_FARB4Q_EMERGENCY_M = ATTR_MSS_MRW_MEM_M_DRAM_CLOCKS + */ + uint64_t nm_throttled_n_per_port = 0x20; + mca_and_or(id, mca_i, MBA_FARB4Q, ~PPC_BITMASK(27, 55), + PPC_SHIFT(nm_throttled_n_per_port, MBA_FARB4Q_EMERGENCY_N) | + PPC_SHIFT(m_dram_clocks, MBA_FARB4Q_EMERGENCY_M)); +} + +/* + * Values set in this function are mostly for magic MCA, other (functional) MCAs + * are set later. If all of these registers are later written with proper values + * for functional MCAs, maybe this can be called just for magic, non-functional + * ones to save time, but for now do it in a way the Hostboot does it. + */ +static void p9n_ddrphy_scom(int mcs_i, int mca_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + int dp; + /* + * Hostboot sets this to proper value in phy_scominit(), but I don't see + * why. Speed is the same for whole MCBIST anyway. + */ + uint64_t strength = mem_data.speed == 1866 ? 1 : + mem_data.speed == 2133 ? 2 : + mem_data.speed == 2400 ? 4 : 8; + + for (dp = 0; dp < 5; dp++) { + /* IOM0.DDRPHY_DP16_DLL_VREG_CONTROL0_P0_{0,1,2,3,4} = + [48-50] RXREG_VREG_COMPCON_DC = 3 + [52-59] = 0x74: + [53-55] RXREG_VREG_DRVCON_DC = 0x7 + [56-58] RXREG_VREG_REF_SEL_DC = 0x2 + [62-63] = 0: + [62] DLL_DRVREN_MODE = POWER8 mode (thermometer style, enabling all drivers up to the one that is used) + [63] DLL_CAL_CKTS_ACTIVE = After VREG calibration, some analog circuits are powered down + */ + /* Same as default value after reset? */ + dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_DLL_VREG_CONTROL0_P0_0, + ~(PPC_BITMASK(48, 50) | PPC_BITMASK(52, 59) | PPC_BITMASK(62, 63)), + PPC_SHIFT(3, RXREG_VREG_COMPCON_DC) | + PPC_SHIFT(7, RXREG_VREG_DRVCON_DC) | + PPC_SHIFT(2, RXREG_VREG_REF_SEL_DC)); + + /* IOM0.DDRPHY_DP16_DLL_VREG_CONTROL1_P0_{0,1,2,3,4} = + [48-50] RXREG_VREG_COMPCON_DC = 3 + [52-59] = 0x74: + [53-55] RXREG_VREG_DRVCON_DC = 0x7 + [56-58] RXREG_VREG_REF_SEL_DC = 0x2 + [62-63] = 0: + [62] DLL_DRVREN_MODE = POWER8 mode (thermometer style, enabling all drivers up to the one that is used) + [63] DLL_CAL_CKTS_ACTIVE = After VREG calibration, some analog circuits are powered down + */ + /* Same as default value after reset? */ + dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_DLL_VREG_CONTROL1_P0_0, + ~(PPC_BITMASK(48, 50) | PPC_BITMASK(52, 59) | PPC_BITMASK(62, 63)), + PPC_SHIFT(3, RXREG_VREG_COMPCON_DC) | + PPC_SHIFT(7, RXREG_VREG_DRVCON_DC) | + PPC_SHIFT(2, RXREG_VREG_REF_SEL_DC)); + + /* IOM0.DDRPHY_DP16_WRCLK_PR_P0_{0,1,2,3,4} = + // For zero delay simulations, or simulations where the delay of the SysClk tree and the WrClk tree are equal, + // set this field to 60h + [49-55] TSYS_WRCLK = 0x60 + */ + dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_WRCLK_PR_P0_0, + ~PPC_BITMASK(49, 55), PPC_SHIFT(0x60, TSYS_WRCLK)); + + /* IOM0.DDRPHY_DP16_IO_TX_CONFIG0_P0_{0,1,2,3,4} = + [48-51] STRENGTH = 0x4 // 2400 MT/s + [52] DD2_RESET_READ_FIX_DISABLE = 0 // Enable the DD2 function to remove the register reset on read feature + // on status registers + */ + dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_IO_TX_CONFIG0_P0_0, + ~PPC_BITMASK(48, 52), + PPC_SHIFT(strength, DDRPHY_DP16_IO_TX_CONFIG0_STRENGTH)); + + /* IOM0.DDRPHY_DP16_DLL_CONFIG1_P0_{0,1,2,3,4} = + [48-63] = 0x0006: + [48-51] HS_DLLMUX_SEL_0_0_3 = 0 + [53-56] HS_DLLMUX_SEL_1_0_3 = 0 + [61] S0INSDLYTAP = 1 // For proper functional operation, this bit must be 0b + [62] S1INSDLYTAP = 1 // For proper functional operation, this bit must be 0b + */ + dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_DLL_CONFIG1_P0_0, + ~(PPC_BITMASK(48, 63)), + PPC_BIT(S0INSDLYTAP) | PPC_BIT(S1INSDLYTAP)); + + /* IOM0.DDRPHY_DP16_IO_TX_FET_SLICE_P0_{0,1,2,3,4} = + [48-63] = 0x7f7f: + [59-55] EN_SLICE_N_WR = 0x7f + [57-63] EN_SLICE_P_WR = 0x7f + */ + dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_IO_TX_FET_SLICE_P0_0, + ~PPC_BITMASK(48, 63), + PPC_SHIFT(0x7F, EN_SLICE_N_WR) | + PPC_SHIFT(0x7F, EN_SLICE_P_WR)); + } + + for (dp = 0; dp < 4; dp++) { + /* IOM0.DDRPHY_ADR_BIT_ENABLE_P0_ADR{0,1,2,3} = + [48-63] = 0xffff + */ + dp_mca_and_or(id, dp, mca_i, DDRPHY_ADR_BIT_ENABLE_P0_ADR0, + ~PPC_BITMASK(48, 63), PPC_SHIFT(0xFFFF, 63)); + } + + /* IOM0.DDRPHY_ADR_DIFFPAIR_ENABLE_P0_ADR1 = + [48-63] = 0x5000: + [49] DI_ADR2_ADR3: 1 = Lanes 2 and 3 are a differential clock pair + [51] DI_ADR6_ADR7: 1 = Lanes 6 and 7 are a differential clock pair + */ + mca_and_or(id, mca_i, DDRPHY_ADR_DIFFPAIR_ENABLE_P0_ADR1, + ~PPC_BITMASK(48, 63), PPC_SHIFT(0x5000, 63)); + + /* IOM0.DDRPHY_ADR_DELAY1_P0_ADR1 = + [48-63] = 0x4040: + [49-55] ADR_DELAY2 = 0x40 + [57-63] ADR_DELAY3 = 0x40 + */ + mca_and_or(id, mca_i, DDRPHY_ADR_DELAY1_P0_ADR1, + ~PPC_BITMASK(48, 63), PPC_SHIFT(0x4040, 63)); + + /* IOM0.DDRPHY_ADR_DELAY3_P0_ADR1 = + [48-63] = 0x4040: + [49-55] ADR_DELAY6 = 0x40 + [57-63] ADR_DELAY7 = 0x40 + */ + mca_and_or(id, mca_i, DDRPHY_ADR_DELAY3_P0_ADR1, + ~PPC_BITMASK(48, 63), PPC_SHIFT(0x4040, 63)); + + for (dp = 0; dp < 2; dp ++) { + /* IOM0.DDRPHY_ADR_DLL_VREG_CONFIG_1_P0_ADR32S{0,1} = + [48-63] = 0x0008: + [48-51] HS_DLLMUX_SEL_0_3 = 0 + [59-62] STRENGTH = 4 // 2400 MT/s + */ + dp_mca_and_or(id, dp, mca_i, DDRPHY_ADR_DLL_VREG_CONFIG_1_P0_ADR32S0, + ~PPC_BITMASK(48, 63), + PPC_SHIFT(strength, DDRPHY_ADR_DLL_VREG_CONFIG_1_STRENGTH)); + + /* IOM0.DDRPHY_ADR_MCCLK_WRCLK_PR_STATIC_OFFSET_P0_ADR32S{0,1} = + [48-63] = 0x6000 + // For zero delay simulations, or simulations where the delay of the + // SysClk tree and the WrClk tree are equal, set this field to 60h + [49-55] TSYS_WRCLK = 0x60 + */ + dp_mca_and_or(id, dp, mca_i, + DDRPHY_ADR_MCCLK_WRCLK_PR_STATIC_OFFSET_P0_ADR32S0, + ~PPC_BITMASK(48, 63), + PPC_SHIFT(0x60, TSYS_WRCLK)); + + /* IOM0.DDRPHY_ADR_DLL_VREG_CONTROL_P0_ADR32S{0,1} = + [48-50] RXREG_VREG_COMPCON_DC = 3 + [52-59] = 0x74: + [53-55] RXREG_VREG_DRVCON_DC = 0x7 + [56-58] RXREG_VREG_REF_SEL_DC = 0x2 + [63] DLL_CAL_CKTS_ACTIVE = 0 // After VREG calibration, some analog circuits are powered down + */ + dp_mca_and_or(id, dp, mca_i, DDRPHY_ADR_DLL_VREG_CONTROL_P0_ADR32S0, + ~PPC_BITMASK(48, 63), + PPC_SHIFT(3, RXREG_VREG_COMPCON_DC) | + PPC_SHIFT(7, RXREG_VREG_DRVCON_DC) | + PPC_SHIFT(2, RXREG_VREG_REF_SEL_DC)); + } + + /* IOM0.DDRPHY_PC_CONFIG0_P0 = + [48-63] = 0x0202: + [48-51] PDA_ENABLE_OVERRIDE = 0 + [52] 2TCK_PREAMBLE_ENABLE = 0 + [53] PBA_ENABLE = 0 + [54] DDR4_CMD_SIG_REDUCTION = 1 + [55] SYSCLK_2X_MEMINTCLKO = 0 + [56] RANK_OVERRIDE = 0 + [57-59] RANK_OVERRIDE_VALUE = 0 + [60] LOW_LATENCY = 0 + [61] DDR4_IPW_LOOP_DIS = 0 + [62] DDR4_VLEVEL_BANK_GROUP = 1 + [63] VPROTH_PSEL_MODE = 0 + */ + mca_and_or(id, mca_i, DDRPHY_PC_CONFIG0_P0, ~PPC_BITMASK(48, 63), + PPC_BIT(DDR4_CMD_SIG_REDUCTION) | + PPC_BIT(DDR4_VLEVEL_BANK_GROUP)); +} + +static void p9n_mcbist_scom(int mcs_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + /* MC01.MCBIST.MBA_SCOMFIR.WATCFG0AQ = + [0-47] WATCFG0AQ_CFG_WAT_EVENT_SEL = 0x400000000000 + */ + scom_and_or_for_chiplet(id, WATCFG0AQ, ~PPC_BITMASK(0, 47), + PPC_SHIFT(0x400000000000, WATCFG0AQ_CFG_WAT_EVENT_SEL)); + + /* MC01.MCBIST.MBA_SCOMFIR.WATCFG0BQ = + [0-43] WATCFG0BQ_CFG_WAT_MSKA = 0x3fbfff + [44-60] WATCFG0BQ_CFG_WAT_CNTL = 0x10000 + */ + scom_and_or_for_chiplet(id, WATCFG0BQ, ~PPC_BITMASK(0, 60), + PPC_SHIFT(0x3fbfff, WATCFG0BQ_CFG_WAT_MSKA) | + PPC_SHIFT(0x10000, WATCFG0BQ_CFG_WAT_CNTL)); + + /* MC01.MCBIST.MBA_SCOMFIR.WATCFG0DQ = + [0-43] WATCFG0DQ_CFG_WAT_PATA = 0x80200004000 + */ + scom_and_or_for_chiplet(id, WATCFG0DQ, ~PPC_BITMASK(0, 43), + PPC_SHIFT(0x80200004000, WATCFG0DQ_CFG_WAT_PATA)); + + /* MC01.MCBIST.MBA_SCOMFIR.WATCFG3AQ = + [0-47] WATCFG3AQ_CFG_WAT_EVENT_SEL = 0x800000000000 + */ + scom_and_or_for_chiplet(id, WATCFG3AQ, ~PPC_BITMASK(0, 47), + PPC_SHIFT(0x800000000000, WATCFG3AQ_CFG_WAT_EVENT_SEL)); + + /* MC01.MCBIST.MBA_SCOMFIR.WATCFG3BQ = + [0-43] WATCFG3BQ_CFG_WAT_MSKA = 0xfffffffffff + [44-60] WATCFG3BQ_CFG_WAT_CNTL = 0x10400 + */ + scom_and_or_for_chiplet(id, WATCFG3BQ, ~PPC_BITMASK(0, 60), + PPC_SHIFT(0xfffffffffff, WATCFG3BQ_CFG_WAT_MSKA) | + PPC_SHIFT(0x10400, WATCFG3BQ_CFG_WAT_CNTL)); + + /* MC01.MCBIST.MBA_SCOMFIR.MCBCFGQ = + [36] MCBCFGQ_CFG_LOG_COUNTS_IN_TRACE = 0 + */ + scom_and_for_chiplet(id, MCBCFGQ, ~PPC_BIT(MCBCFGQ_CFG_LOG_COUNTS_IN_TRACE)); + + /* MC01.MCBIST.MBA_SCOMFIR.DBGCFG0Q = + [0] DBGCFG0Q_CFG_DBG_ENABLE = 1 + [23-33] DBGCFG0Q_CFG_DBG_PICK_MCBIST01 = 0x780 + */ + scom_and_or_for_chiplet(id, DBGCFG0Q, ~PPC_BITMASK(23, 33), + PPC_BIT(DBGCFG0Q_CFG_DBG_ENABLE) | + PPC_SHIFT(0x780, DBGCFG0Q_CFG_DBG_PICK_MCBIST01)); + + /* MC01.MCBIST.MBA_SCOMFIR.DBGCFG1Q = + [0] DBGCFG1Q_CFG_WAT_ENABLE = 1 + */ + scom_or_for_chiplet(id, DBGCFG1Q, PPC_BIT(DBGCFG1Q_CFG_WAT_ENABLE)); + + /* MC01.MCBIST.MBA_SCOMFIR.DBGCFG2Q = + [0-19] DBGCFG2Q_CFG_WAT_LOC_EVENT0_SEL = 0x10000 + [20-39] DBGCFG2Q_CFG_WAT_LOC_EVENT1_SEL = 0x08000 + */ + scom_and_or_for_chiplet(id, DBGCFG2Q, ~PPC_BITMASK(0, 39), + PPC_SHIFT(0x10000, DBGCFG2Q_CFG_WAT_LOC_EVENT0_SEL) | + PPC_SHIFT(0x08000, DBGCFG2Q_CFG_WAT_LOC_EVENT1_SEL)); + + /* MC01.MCBIST.MBA_SCOMFIR.DBGCFG3Q = + [20-22] DBGCFG3Q_CFG_WAT_GLOB_EVENT0_SEL = 0x4 + [23-25] DBGCFG3Q_CFG_WAT_GLOB_EVENT1_SEL = 0x4 + [37-40] DBGCFG3Q_CFG_WAT_ACT_SET_SPATTN_PULSE = 0x4 + */ + scom_and_or_for_chiplet(id, DBGCFG3Q, ~(PPC_BITMASK(20, 25) | PPC_BITMASK(37, 40)), + PPC_SHIFT(0x4, DBGCFG3Q_CFG_WAT_GLOB_EVENT0_SEL) | + PPC_SHIFT(0x4, DBGCFG3Q_CFG_WAT_GLOB_EVENT1_SEL) | + PPC_SHIFT(0x4, DBGCFG3Q_CFG_WAT_ACT_SET_SPATTN_PULSE)); +} + +static void set_rank_pairs(int mcs_i, int mca_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + /* + * Assumptions: + * - non-LR DIMMs (platform wiki), + * - no ATTR_EFF_RANK_GROUP_OVERRIDE, + * - mixing rules followed - the same rank configuration for both DIMMs. + * + * Because rank pairs are defined for each MCA, we can only have up to two + * 2R DIMMs. For such configurations, RP0 primary is rank 0 on DIMM 0, + * RP1 primary - rank 1 DIMM 0, RP2 primary - rank 0 DIMM 1, + * RP3 primary - rank 1 DIMM 1. There are no secondary (this is true for + * RDIMM only), tertiary or quaternary rank pairs. + */ + + static const uint16_t F[] = {0, 0xf000, 0xf0f0, 0xfff0, 0xffff}; + + /* TODO: can we mix mirrored and non-mirrored 2R DIMMs in one port? */ + + /* IOM0.DDRPHY_PC_RANK_PAIR0_P0 = + // rank_countX is the number of master ranks on DIMM X. + [48-63] = 0x1537 & F[rank_count0]: // F = {0, 0xf000, 0xf0f0, 0xfff0, 0xffff} + [48-50] RANK_PAIR0_PRI = 0 + [51] RANK_PAIR0_PRI_V = 1: if (rank_count0 >= 1) + [52-54] RANK_PAIR0_SEC = 2 + [55] RANK_PAIR0_SEC_V = 1: if (rank_count0 >= 3) + [56-58] RANK_PAIR1_PRI = 1 + [59] RANK_PAIR1_PRI_V = 1: if (rank_count0 >= 2) + [60-62] RANK_PAIR1_SEC = 3 + [63] RANK_PAIR1_SEC_V = 1: if (rank_count0 == 4) + */ + mca_and_or(id, mca_i, DDRPHY_PC_RANK_PAIR0_P0, ~PPC_BITMASK(48, 63), + PPC_SHIFT(0x1537 & F[mca->dimm[0].mranks], 63)); + + /* IOM0.DDRPHY_PC_RANK_PAIR1_P0 = + [48-63] = 0x1537 & F[rank_count1]: // F = {0, 0xf000, 0xf0f0, 0xfff0, 0xffff} + [48-50] RANK_PAIR2_PRI = 0 + [51] RANK_PAIR2_PRI_V = 1: if (rank_count1 >= 1) + [52-54] RANK_PAIR2_SEC = 2 + [55] RANK_PAIR2_SEC_V = 1: if (rank_count1 >= 3) + [56-58] RANK_PAIR3_PRI = 1 + [59] RANK_PAIR3_PRI_V = 1: if (rank_count1 >= 2) + [60-62] RANK_PAIR3_SEC = 3 + [63] RANK_PAIR3_SEC_V = 1: if (rank_count1 == 4) + */ + mca_and_or(id, mca_i, DDRPHY_PC_RANK_PAIR1_P0, ~PPC_BITMASK(48, 63), + PPC_SHIFT(0x1537 & F[mca->dimm[1].mranks], 63)); + + /* IOM0.DDRPHY_PC_RANK_PAIR2_P0 = + [48-63] = 0 + */ + mca_and_or(id, mca_i, DDRPHY_PC_RANK_PAIR2_P0, ~PPC_BITMASK(48, 63), 0); + + /* IOM0.DDRPHY_PC_RANK_PAIR3_P0 = + [48-63] = 0 + */ + mca_and_or(id, mca_i, DDRPHY_PC_RANK_PAIR3_P0, ~PPC_BITMASK(48, 63), 0); + + /* IOM0.DDRPHY_PC_CSID_CFG_P0 = + [0-63] 0xf000: + [48] CS0_INIT_CAL_VALUE = 1 + [49] CS1_INIT_CAL_VALUE = 1 + [50] CS2_INIT_CAL_VALUE = 1 + [51] CS3_INIT_CAL_VALUE = 1 + */ + mca_and_or(id, mca_i, DDRPHY_PC_CSID_CFG_P0, ~PPC_BITMASK(48, 63), + PPC_SHIFT(0xF000, 63)); + + /* IOM0.DDRPHY_PC_MIRROR_CONFIG_P0 = + [all] = 0 + // A rank is mirrored if all are true: + // - the rank is valid (RANK_PAIRn_XXX_V == 1) + // - the rank is odd (RANK_PAIRn_XXX % 2 == 1) + // - the mirror mode attribute is set for the rank's DIMM (SPD[136]) + // - We are not in quad encoded mode (so master ranks <= 2) + [48] ADDR_MIRROR_RP0_PRI + ... + [55] ADDR_MIRROR_RP3_SEC + [58] ADDR_MIRROR_A3_A4 = 1 + [59] ADDR_MIRROR_A5_A6 = 1 + [60] ADDR_MIRROR_A7_A8 = 1 + [61] ADDR_MIRROR_A11_A13 = 1 + [62] ADDR_MIRROR_BA0_BA1 = 1 + [63] ADDR_MIRROR_BG0_BG1 = 1 + */ + /* + * Assumptions: + * - primary and secondary have the same evenness, + * - RP1 and RP3 have odd ranks, + * - both DIMMs have SPD[136] set or both have it unset, no mixing allowed, + * - when rank is not valid, it doesn't matter if it is mirrored, + * - no quad encoded mode - no data for it in MT VPD anyway. + * + * With all of the above, ADDR_MIRROR_RP{1,3}_{PRI,SEC} = SPD[136]. + */ + uint64_t mirr = mca->dimm[0].present ? mca->dimm[0].spd[136] : + mca->dimm[1].spd[136]; + mca_and_or(id, mca_i, DDRPHY_PC_MIRROR_CONFIG_P0, ~PPC_BITMASK(48, 63), + PPC_SHIFT(mirr, ADDR_MIRROR_RP1_PRI) | + PPC_SHIFT(mirr, ADDR_MIRROR_RP1_SEC) | + PPC_SHIFT(mirr, ADDR_MIRROR_RP3_PRI) | + PPC_SHIFT(mirr, ADDR_MIRROR_RP3_SEC) | + PPC_BITMASK(58, 63)); + + /* IOM0.DDRPHY_PC_RANK_GROUP_EXT_P0 = // 0x8000C0350701103F + [all] = 0 + // Same rules as above + [48] ADDR_MIRROR_RP0_TER + ... + [55] ADDR_MIRROR_RP3_QUA + */ + /* These are not valid anyway, so don't bother setting anything. */ +} + +static void reset_data_bit_enable(int mcs_i, int mca_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + int dp; + + for (dp = 0; dp < 4; dp++) { + /* IOM0.DDRPHY_DP16_DQ_BIT_ENABLE0_P0_{0,1,2,3} = + [all] = 0 + [48-63] DATA_BIT_ENABLE_0_15 = 0xffff + */ + dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_DQ_BIT_ENABLE0_P0_0, 0, 0xFFFF); + } + + /* IOM0.DDRPHY_DP16_DQ_BIT_ENABLE0_P0_4 = + [all] = 0 + [48-63] DATA_BIT_ENABLE_0_15 = 0xff00 + */ + dp_mca_and_or(id, 4, mca_i, DDRPHY_DP16_DQ_BIT_ENABLE0_P0_0, 0, 0xFF00); + + /* IOM0.DDRPHY_DP16_DFT_PDA_CONTROL_P0_{0,1,2,3,4} = + // This reg is named MCA_DDRPHY_DP16_DATA_BIT_ENABLE1_P0_n in the code. + // Probably the address changed for DD2 but the documentation did not. + [all] = 0 + */ + for (dp = 0; dp < 5; dp++) { + dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_DFT_PDA_CONTROL_P0_0, 0, 0); + } +} + +/* 5 DP16, 8 MCA */ +/* + * These tables specify which clock/strobes pins (16-23) of DP16 are used to + * capture outgoing/incoming data on which data pins (0-16). Those will + * eventually arrive to DIMM as DQS and DQ, respectively. The mapping must be + * the same for write and read, but for some reason HW has two separate sets of + * registers. + */ +/* + * TODO: after we know how MCAs are numbered we can drop half of x8 table. + * I'm 90% sure it is 0,1,4,5, but for now I'll leave the rest in comments. + */ +static const uint16_t x4_clk[5] = {0x8640, 0x8640, 0x8640, 0x8640, 0x8400}; +static const uint16_t x8_clk[8][5] = { + {0x0CC0, 0xC0C0, 0x0CC0, 0x0F00, 0x0C00}, /* Port 0 */ + {0xC0C0, 0x0F00, 0x0CC0, 0xC300, 0x0C00}, /* Port 1 */ +// {0xC300, 0xC0C0, 0xC0C0, 0x0F00, 0x0C00}, /* Port 2 */ +// {0x0F00, 0x0F00, 0xC300, 0xC300, 0xC000}, /* Port 3 */ + {0x0CC0, 0xC0C0, 0x0F00, 0x0F00, 0xC000}, /* Port 4 */ + {0xC300, 0x0CC0, 0x0CC0, 0xC300, 0xC000}, /* Port 5 */ +// {0x0CC0, 0x0CC0, 0x0CC0, 0xC0C0, 0x0C00}, /* Port 6 */ +// {0x0CC0, 0xC0C0, 0x0F00, 0xC300, 0xC000}, /* Port 7 */ +}; + +static void reset_clock_enable(int mcs_i, int mca_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + /* Assume the same rank configuration for both DIMMs */ + int dp; + int width = mca->dimm[0].present ? mca->dimm[0].width : + mca->dimm[1].width; + int mranks[2] = {mca->dimm[0].mranks, mca->dimm[1].mranks}; + /* Index for x8_clk depends on how MCAs are numbered... */ + const uint16_t *clk = width == WIDTH_x4 ? x4_clk : + x8_clk[mcs_i * MCA_PER_MCS + mca_i]; + + /* IOM0.DDRPHY_DP16_WRCLK_EN_RP0_P0_{0,1,2,3,4} + [all] = 0 + [48-63] QUADn_CLKxx + */ + /* IOM0.DDRPHY_DP16_READ_CLOCK_RANK_PAIR0_P0_{0,1,2,3,4} + [all] = 0 + [48-63] QUADn_CLKxx + */ + for (dp = 0; dp < 5; dp++) { + /* Note that these correspond to valid rank pairs */ + if (mranks[0] > 0) { + dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_WRCLK_EN_RP0_P0_0, + 0, clk[dp]); + dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_READ_CLOCK_RANK_PAIR0_P0_0, + 0, clk[dp]); + } + + if (mranks[0] > 1) { + dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_WRCLK_EN_RP1_P0_0, + 0, clk[dp]); + dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_READ_CLOCK_RANK_PAIR1_P0_0, + 0, clk[dp]); + } + + if (mranks[1] > 0) { + dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_WRCLK_EN_RP2_P0_0, + 0, clk[dp]); + dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_READ_CLOCK_RANK_PAIR2_P0_0, + 0, clk[dp]); + } + + if (mranks[1] > 1) { + dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_WRCLK_EN_RP3_P0_0, + 0, clk[dp]); + dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_READ_CLOCK_RANK_PAIR3_P0_0, + 0, clk[dp]); + } + } +} + +static void reset_rd_vref(int mcs_i, int mca_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + + int dp; + int vpd_idx = mca->dimm[0].present ? (mca->dimm[0].mranks == 2 ? 2 : 0) : + (mca->dimm[1].mranks == 2 ? 2 : 0); + if (mca->dimm[0].present && mca->dimm[1].present) + vpd_idx++; + + /* RD_VREF_DVDD * (100000 - ATTR_MSS_VPD_MT_VREF_MC_RD) / RD_VREF_DAC_STEP + vref_bf = 12 * (100000 - ATTR_MSS_VPD_MT_VREF_MC_RD) / 6500 + IOM0.DDRPHY_DP16_RD_VREF_DAC_{0-7}_P0_{0-3}, + IOM0.DDRPHY_DP16_RD_VREF_DAC_{0-3}_P0_4 = // only half of last DP16 is used + [49-55] BIT0_VREF_DAC = vref_bf + [57-63] BIT1_VREF_DAC = vref_bf + */ + const uint64_t vref_bf = 12 * (100000 - ATTR_MSS_VPD_MT_VREF_MC_RD[vpd_idx]) / 6500; + for (dp = 0; dp < 5; dp++) { + + /* SCOM addresses are not regular for DAC, so no inner loop. */ + dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_RD_VREF_DAC_0_P0_0, + ~(PPC_BITMASK(49, 55) | PPC_BITMASK(57, 63)), + PPC_SHIFT(vref_bf, BIT0_VREF_DAC) | + PPC_SHIFT(vref_bf, BIT1_VREF_DAC)); + + dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_RD_VREF_DAC_1_P0_0, + ~(PPC_BITMASK(49, 55) | PPC_BITMASK(57, 63)), + PPC_SHIFT(vref_bf, BIT0_VREF_DAC) | + PPC_SHIFT(vref_bf, BIT1_VREF_DAC)); + + dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_RD_VREF_DAC_2_P0_0, + ~(PPC_BITMASK(49, 55) | PPC_BITMASK(57, 63)), + PPC_SHIFT(vref_bf, BIT0_VREF_DAC) | + PPC_SHIFT(vref_bf, BIT1_VREF_DAC)); + + dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_RD_VREF_DAC_3_P0_0, + ~(PPC_BITMASK(49, 55) | PPC_BITMASK(57, 63)), + PPC_SHIFT(vref_bf, BIT0_VREF_DAC) | + PPC_SHIFT(vref_bf, BIT1_VREF_DAC)); + + if (dp == 4) break; + + dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_RD_VREF_DAC_4_P0_0, + ~(PPC_BITMASK(49, 55) | PPC_BITMASK(57, 63)), + PPC_SHIFT(vref_bf, BIT0_VREF_DAC) | + PPC_SHIFT(vref_bf, BIT1_VREF_DAC)); + + dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_RD_VREF_DAC_5_P0_0, + ~(PPC_BITMASK(49, 55) | PPC_BITMASK(57, 63)), + PPC_SHIFT(vref_bf, BIT0_VREF_DAC) | + PPC_SHIFT(vref_bf, BIT1_VREF_DAC)); + + dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_RD_VREF_DAC_6_P0_0, + ~(PPC_BITMASK(49, 55) | PPC_BITMASK(57, 63)), + PPC_SHIFT(vref_bf, BIT0_VREF_DAC) | + PPC_SHIFT(vref_bf, BIT1_VREF_DAC)); + + dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_RD_VREF_DAC_7_P0_0, + ~(PPC_BITMASK(49, 55) | PPC_BITMASK(57, 63)), + PPC_SHIFT(vref_bf, BIT0_VREF_DAC) | + PPC_SHIFT(vref_bf, BIT1_VREF_DAC)); + } + + /* IOM0.DDRPHY_DP16_RD_VREF_CAL_EN_P0_{0-4} + [48-63] VREF_CAL_EN = 0xffff // enable = 0xffff, disable = 0x0000 + */ + for (dp = 0; dp < 5; dp++) { + /* Is it safe to set this before VREF_DAC? If yes, may use one loop for both */ + dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_RD_VREF_CAL_EN_P0_0, + 0, PPC_BITMASK(48, 63)); + } +} + +static void pc_reset(int mcs_i, int mca_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + /* These are from VPD */ + /* + uint64_t ATTR_MSS_EFF_DPHY_WLO = mem_data.speed == 1866 ? 1 : 2; + uint64_t ATTR_MSS_EFF_DPHY_RLO = mem_data.speed == 1866 ? 4 : + mem_data.speed == 2133 ? 5 : + mem_data.speed == 2400 ? 6 : 7; + */ + + /* IOM0.DDRPHY_PC_CONFIG0_P0 has been reset in p9n_ddrphy_scom() */ + + /* IOM0.DDRPHY_PC_CONFIG1_P0 = + [48-51] WRITE_LATENCY_OFFSET = ATTR_MSS_EFF_DPHY_WLO + [52-55] READ_LATENCY_OFFSET = ATTR_MSS_EFF_DPHY_RLO + +1: if 2N mode (ATTR_MSS_VPD_MR_MC_2N_MODE_AUTOSET, ATTR_MSS_MRW_DRAM_2N_MODE) // Gear-down mode in JEDEC + // Assume no LRDIMM + [59-61] MEMORY_TYPE = 0x5 // 0x7 for LRDIMM + [62] DDR4_LATENCY_SW = 1 + */ + /* + * FIXME: I have no idea where Hostboot gets these values from, they should + * be the same as in VPD, yet WLO is 3 and RLO is 5 when written to SCOM... + * + * These are from VPD: + * uint64_t ATTR_MSS_EFF_DPHY_WLO = mem_data.speed == 1866 ? 1 : 2; + * uint64_t ATTR_MSS_EFF_DPHY_RLO = mem_data.speed == 1866 ? 4 : + * mem_data.speed == 2133 ? 5 : + * mem_data.speed == 2400 ? 6 : 7; + */ + mca_and_or(id, mca_i, DDRPHY_PC_CONFIG1_P0, + ~(PPC_BITMASK(48, 55) | PPC_BITMASK(59, 62)), + PPC_SHIFT(/* ATTR_MSS_EFF_DPHY_WLO */ 3, WRITE_LATENCY_OFFSET) | + PPC_SHIFT(/* ATTR_MSS_EFF_DPHY_RLO */ 5, READ_LATENCY_OFFSET) | + PPC_SHIFT(0x5, MEMORY_TYPE) | PPC_BIT(DDR4_LATENCY_SW)); + + /* IOM0.DDRPHY_PC_ERROR_STATUS0_P0 = + [all] 0 + */ + mca_and_or(id, mca_i, DDRPHY_PC_ERROR_STATUS0_P0, 0, 0); + + /* IOM0.DDRPHY_PC_INIT_CAL_ERROR_P0 = + [all] 0 + */ + mca_and_or(id, mca_i, DDRPHY_PC_INIT_CAL_ERROR_P0, 0, 0); +} + +static void wc_reset(int mcs_i, int mca_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + + /* IOM0.DDRPHY_WC_CONFIG0_P0 = + [all] 0 + // BUG? Mismatch between comment (-,-), code (+,+) and docs (-,+) for operations inside 'max' + [48-55] TWLO_TWLOE = 12 + max((twldqsen - tmod), (twlo + twlow)) + + longest DQS delay in clocks (rounded up) + longest DQ delay in clocks (rounded up) + [56] WL_ONE_DQS_PULSE = 1 + [57-62] FW_WR_RD = 0x20 // "# dd0 = 17 clocks, now 32 from SWyatt" + [63] CUSTOM_INIT_WRITE = 1 // set to a 1 to get proper values for RD VREF + */ + /* + * tMOD = max(24 nCK, 15 ns) = 24 nCK for all supported speed bins + * tWLDQSEN >= 25 nCK + * tWLDQSEN > tMOD + ODTLon + tADC + * 0.3 tCK <= tADC <= 0.7 tCK, round to 1 + * ODTLon = WL - 2 = CWL + AL + PL - 2; AL = 0, PL = 0 + * tWLDQSEN = max(25, tMOD + CWL - 2 + 1) = CWL + 23 + * tWLO = 0 - 9.5 ns, Hostboot uses ATTR_MSS_EFF_DPHY_WLO + * tWLOE = 0 - 2 ns, Hostboot uses 2 ns + * Longest DQ and DQS delays are both equal 1 nCK. + */ + /* + * FIXME: again, tWLO = 3 in Hostboot. Why? + * This is still much smaller than tWLDQSEN so leave it, for now. + */ + uint64_t tWLO = mem_data.speed == 1866 ? 1 : 2; + uint64_t tWLOE = ns_to_nck(2); + uint64_t tWLDQSEN = MAX(25, tMOD + (mem_data.cwl - 2) + 1); + /* + * Use the version from the code, it may be longer than necessary but it + * works. Note that MAX() always expands to CWL + 23 + 24 = 47 + CWL, which + * means that we can just write 'tWLO_tWLOE = 61 + CWL'. Leaving full + * version below, it will be easier to fix. + */ + /* + * FIXME: relative to Hostboot, we are 2 nCK short for tWLDQSEN (37 vs 39). + * It doesn't have '- 2' in its calculations (timing.H). However, this is + * JEDEC way of doing it so it _should_ work. + */ + uint64_t tWLO_tWLOE = 12 + MAX((tWLDQSEN + tMOD), (tWLO + tWLOE)) + 1 + 1; + mca_and_or(id, mca_i, DDRPHY_WC_CONFIG0_P0, 0, + PPC_SHIFT(tWLO_tWLOE, TWLO_TWLOE) | PPC_BIT(WL_ONE_DQS_PULSE) | + PPC_SHIFT(0x20, FW_WR_RD) | PPC_BIT(CUSTOM_INIT_WRITE)); + + /* IOM0.DDRPHY_WC_CONFIG1_P0 = + [all] 0 + [48-51] BIG_STEP = 7 + [52-54] SMALL_STEP = 0 + [55-60] WR_PRE_DLY = 0x2a (42) + */ + mca_and_or(id, mca_i, DDRPHY_WC_CONFIG1_P0, 0, + PPC_SHIFT(7, BIG_STEP) | PPC_SHIFT(0x2A, WR_PRE_DLY)); + + /* IOM0.DDRPHY_WC_CONFIG2_P0 = + [all] 0 + [48-51] NUM_VALID_SAMPLES = 5 + [52-57] FW_RD_WR = max(tWTR_S + 11, AL + tRTP + 3) + [58-61] IPW_WR_WR = 5 // results in 24 clock cycles + */ + /* There is no Additive Latency. */ + mca_and_or(id, mca_i, DDRPHY_WC_CONFIG2_P0, 0, + PPC_SHIFT(5, NUM_VALID_SAMPLES) | + PPC_SHIFT(MAX(mca->nwtr_s + 11, mem_data.nrtp + 3), FW_RD_WR) | + PPC_SHIFT(5, IPW_WR_WR)); + + /* IOM0.DDRPHY_WC_CONFIG3_P0 = + [all] 0 + [55-60] MRS_CMD_DQ_OFF = 0x3f + */ + mca_and_or(id, mca_i, DDRPHY_WC_CONFIG3_P0, 0, PPC_SHIFT(0x3F, MRS_CMD_DQ_OFF)); + + /* IOM0.DDRPHY_WC_RTT_WR_SWAP_ENABLE_P0 + [48] WL_ENABLE_RTT_SWAP = 0 + [49] WR_CTR_ENABLE_RTT_SWAP = 0 + [50-59] WR_CTR_VREF_COUNTER_RESET_VAL = 150ns in clock cycles // JESD79-4C Table 67 + */ + mca_and_or(id, mca_i, DDRPHY_WC_RTT_WR_SWAP_ENABLE_P0, ~PPC_BITMASK(48, 59), + PPC_SHIFT(ns_to_nck(150), WR_CTR_VREF_COUNTER_RESET_VAL)); +} + +static void rc_reset(int mcs_i, int mca_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + + /* IOM0.DDRPHY_RC_CONFIG0_P0 + [all] 0 + [48-51] GLOBAL_PHY_OFFSET = 0x5 // ATTR_MSS_VPD_MR_DPHY_GPO + [62] PERFORM_RDCLK_ALIGN = 1 + */ + mca_and_or(id, mca_i, DDRPHY_RC_CONFIG0_P0, 0, + PPC_SHIFT(0x5, GLOBAL_PHY_OFFSET) | PPC_BIT(PERFORM_RDCLK_ALIGN)); + + /* IOM0.DDRPHY_RC_CONFIG1_P0 + [all] 0 + */ + mca_and_or(id, mca_i, DDRPHY_RC_CONFIG1_P0, 0, 0); + + /* IOM0.DDRPHY_RC_CONFIG2_P0 + [all] 0 + [48-52] CONSEC_PASS = 8 + [57-58] 3 // not documented, BURST_WINDOW? + */ + mca_and_or(id, mca_i, DDRPHY_RC_CONFIG2_P0, 0, + PPC_SHIFT(8, CONSEC_PASS) | PPC_SHIFT(3, 58)); + + /* IOM0.DDRPHY_RC_CONFIG3_P0 + [all] 0 + [51-54] COARSE_CAL_STEP_SIZE = 4 // 5/128 + */ + mca_and_or(id, mca_i, DDRPHY_RC_CONFIG3_P0, 0, + PPC_SHIFT(4, COARSE_CAL_STEP_SIZE)); + + /* IOM0.DDRPHY_RC_RDVREF_CONFIG0_P0 = + [all] 0 + [48-63] WAIT_TIME = + 0xffff // as slow as possible, or use calculation from vref_guess_time(), or: + MSS_FREQ_EQ_1866: 0x0804 + MSS_FREQ_EQ_2133: 0x092a + MSS_FREQ_EQ_2400: 0x0a50 + MSS_FREQ_EQ_2666: 0x0b74 // use this value for all freqs maybe? + */ + uint64_t wait_time = mem_data.speed == 1866 ? 0x0804 : + mem_data.speed == 2133 ? 0x092A : + mem_data.speed == 2400 ? 0x0A50 : 0x0B74; + mca_and_or(id, mca_i, DDRPHY_RC_RDVREF_CONFIG0_P0, 0, PPC_SHIFT(wait_time, 63)); + + /* IOM0.DDRPHY_RC_RDVREF_CONFIG1_P0 = + [all] 0 + [48-55] CMD_PRECEDE_TIME = (AL + CL + 15) + [56-59] MPR_LOCATION = 4 // "From R. King." + */ + mca_and_or(id, mca_i, DDRPHY_RC_RDVREF_CONFIG1_P0, 0, + PPC_SHIFT(mca->cl + 15, CMD_PRECEDE_TIME) | + PPC_SHIFT(4, MPR_LOCATION)); +} + +static inline int log2_up(uint32_t x) +{ + int lz; + asm("cntlzd %0, %1" : "=r"(lz) : "r"((x << 1) - 1)); + return 63 - lz; +} + +static void seq_reset(int mcs_i, int mca_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + int vpd_idx = mca->dimm[0].present ? (mca->dimm[0].mranks == 2 ? 2 : 0) : + (mca->dimm[1].mranks == 2 ? 2 : 0); + if (mca->dimm[0].present && mca->dimm[1].present) + vpd_idx++; + + /* IOM0.DDRPHY_SEQ_CONFIG0_P0 = + [all] 0 + [49] TWO_CYCLE_ADDR_EN = + 2N mode: 1 + else: 0 + [54] DELAYED_PAR = 1 + [62] PAR_A17_MASK = + 16Gb x4 configuration: 0 + else: 1 + */ + uint64_t par_a17_mask = PPC_BIT(PAR_A17_MASK); + if ((mca->dimm[0].width == WIDTH_x4 && mca->dimm[0].density == DENSITY_16Gb) || + (mca->dimm[1].width == WIDTH_x4 && mca->dimm[1].density == DENSITY_16Gb)) + par_a17_mask = 0; + + mca_and_or(id, mca_i, DDRPHY_SEQ_CONFIG0_P0, 0, + PPC_BIT(DELAYED_PAR) | par_a17_mask); + + /* All log2 values in timing registers are rounded up. */ + /* IOM0.DDRPHY_SEQ_MEM_TIMING_PARAM0_P0 = + [all] 0 + [48-51] TMOD_CYCLES = 5 // log2(max(tMRD, tMOD)) = log2(24), JEDEC tables 169 and 170 and section 13.5 + [52-55] TRCD_CYCLES = log2(tRCD) + [56-59] TRP_CYCLES = log2(tRP) + [60-63] TRFC_CYCLES = log2(tRFC) + */ + /* + * FIXME or FIXHOSTBOOT: due to a bug in Hostboot TRFC_CYCLES is always 0. + * A loop searches for a minimum for all MCAs, but minimum that values are + * compared to is initially set to 0. This is a clear violation of RFC + * timing. It is fixed later in dqs_align_turn_on_refresh() in 13.11, but + * that may not have been necessary if it were written here properly. + * + * https://github.com/open-power/hostboot/blob/master/src/import/chips/p9/procedures/hwp/memory/lib/phy/seq.C#L142 + */ + mca_and_or(id, mca_i, DDRPHY_SEQ_MEM_TIMING_PARAM0_P0, 0, + PPC_SHIFT(5, TMOD_CYCLES) | + PPC_SHIFT(log2_up(mca->nrcd), TRCD_CYCLES) | + PPC_SHIFT(log2_up(mca->nrp), TRP_CYCLES) | + PPC_SHIFT(log2_up(mca->nrfc), TRFC_CYCLES)); + + /* IOM0.DDRPHY_SEQ_MEM_TIMING_PARAM1_P0 = + [all] 0 + [48-51] TZQINIT_CYCLES = 10 // log2(1024), JEDEC tables 169 and 170 + [52-55] TZQCS_CYCLES = 7 // log2(128), JEDEC tables 169 and 170 + [56-59] TWLDQSEN_CYCLES = 6 // log2(37) rounded up, JEDEC tables 169 and 170 + [60-63] TWRMRD_CYCLES = 6 // log2(40) rounded up, JEDEC tables 169 and 170 + */ + mca_and_or(id, mca_i, DDRPHY_SEQ_MEM_TIMING_PARAM1_P0, 0, + PPC_SHIFT(10, TZQINIT_CYCLES) | PPC_SHIFT(7, TZQCS_CYCLES) | + PPC_SHIFT(6, TWLDQSEN_CYCLES) | PPC_SHIFT(6, TWRMRD_CYCLES)); + + /* IOM0.DDRPHY_SEQ_MEM_TIMING_PARAM2_P0 = + [all] 0 + [48-51] TODTLON_OFF_CYCLES = log2(CWL + AL + PL - 2) + [52-63] reserved = 0x777 // "Reset value of SEQ_TIMING2 is lucky 7's" + */ + /* AL and PL are disabled (0) */ + mca_and_or(id, mca_i, DDRPHY_SEQ_MEM_TIMING_PARAM2_P0, 0, + PPC_SHIFT(log2_up(mem_data.cwl - 2), TODTLON_OFF_CYCLES) | + PPC_SHIFT(0x777, 63)); + + /* IOM0.DDRPHY_SEQ_RD_WR_DATA0_P0 = + [all] 0 + [48-63] RD_RW_DATA_REG0 = 0xaa00 + */ + mca_and_or(id, mca_i, DDRPHY_SEQ_RD_WR_DATA0_P0, 0, + PPC_SHIFT(0xAA00, RD_RW_DATA_REG0)); + + /* IOM0.DDRPHY_SEQ_RD_WR_DATA1_P0 = + [all] 0 + [48-63] RD_RW_DATA_REG1 = 0x00aa + */ + mca_and_or(id, mca_i, DDRPHY_SEQ_RD_WR_DATA1_P0, 0, + PPC_SHIFT(0x00AA, RD_RW_DATA_REG1)); + + /* + * For all registers below, assume RDIMM (max 2 ranks). + * + * Remember that VPD data layout is different, code will be slightly + * different than the comments. + */ +#define F(x) (((x >> 4) & 0xC) | ((x >> 2) & 0x3)) + + /* IOM0.DDRPHY_SEQ_ODT_RD_CONFIG0_P0 = + F(X) = (((X >> 4) & 0xc) | ((X >> 2) & 0x3)) // Bits 0,1,4,5 of X, see also MC01.PORT0.SRQ.MBA_FARB2Q + [all] 0 + [48-51] ODT_RD_VALUES0 = F(ATTR_MSS_VPD_MT_ODT_RD[index(MCA)][0][0]) + [56-59] ODT_RD_VALUES1 = F(ATTR_MSS_VPD_MT_ODT_RD[index(MCA)][0][1]) + */ + mca_and_or(id, mca_i, DDRPHY_SEQ_ODT_RD_CONFIG0_P0, 0, + PPC_SHIFT(F(ATTR_MSS_VPD_MT_ODT_RD[vpd_idx][0][0]), ODT_RD_VALUES0) | + PPC_SHIFT(F(ATTR_MSS_VPD_MT_ODT_RD[vpd_idx][0][1]), ODT_RD_VALUES1)); + + /* IOM0.DDRPHY_SEQ_ODT_RD_CONFIG1_P0 = + F(X) = (((X >> 4) & 0xc) | ((X >> 2) & 0x3)) // Bits 0,1,4,5 of X, see also MC01.PORT0.SRQ.MBA_FARB2Q + [all] 0 + [48-51] ODT_RD_VALUES2 = + count_dimm(MCA) == 2: F(ATTR_MSS_VPD_MT_ODT_RD[index(MCA)][1][0]) + count_dimm(MCA) != 2: F(ATTR_MSS_VPD_MT_ODT_RD[index(MCA)][0][2]) + [56-59] ODT_RD_VALUES3 = + count_dimm(MCA) == 2: F(ATTR_MSS_VPD_MT_ODT_RD[index(MCA)][1][1]) + count_dimm(MCA) != 2: F(ATTR_MSS_VPD_MT_ODT_RD[index(MCA)][0][3]) + */ + /* 2 DIMMs -> odd vpd_idx */ + uint64_t val = 0; + if (vpd_idx % 2) + val = PPC_SHIFT(F(ATTR_MSS_VPD_MT_ODT_RD[vpd_idx][1][0]), ODT_RD_VALUES2) | + PPC_SHIFT(F(ATTR_MSS_VPD_MT_ODT_RD[vpd_idx][1][1]), ODT_RD_VALUES3); + + mca_and_or(id, mca_i, DDRPHY_SEQ_ODT_RD_CONFIG1_P0, 0, val); + + + /* IOM0.DDRPHY_SEQ_ODT_WR_CONFIG0_P0 = + F(X) = (((X >> 4) & 0xc) | ((X >> 2) & 0x3)) // Bits 0,1,4,5 of X, see also MC01.PORT0.SRQ.MBA_FARB2Q + [all] 0 + [48-51] ODT_WR_VALUES0 = F(ATTR_MSS_VPD_MT_ODT_WR[index(MCA)][0][0]) + [56-59] ODT_WR_VALUES1 = F(ATTR_MSS_VPD_MT_ODT_WR[index(MCA)][0][1]) + */ + mca_and_or(id, mca_i, DDRPHY_SEQ_ODT_WR_CONFIG0_P0, 0, + PPC_SHIFT(F(ATTR_MSS_VPD_MT_ODT_WR[vpd_idx][0][0]), ODT_WR_VALUES0) | + PPC_SHIFT(F(ATTR_MSS_VPD_MT_ODT_WR[vpd_idx][0][1]), ODT_WR_VALUES1)); + + /* IOM0.DDRPHY_SEQ_ODT_WR_CONFIG1_P0 = + F(X) = (((X >> 4) & 0xc) | ((X >> 2) & 0x3)) // Bits 0,1,4,5 of X, see also MC01.PORT0.SRQ.MBA_FARB2Q + [all] 0 + [48-51] ODT_WR_VALUES2 = + count_dimm(MCA) == 2: F(ATTR_MSS_VPD_MT_ODT_WR[index(MCA)][1][0]) + count_dimm(MCA) != 2: F(ATTR_MSS_VPD_MT_ODT_WR[index(MCA)][0][2]) + [56-59] ODT_WR_VALUES3 = + count_dimm(MCA) == 2: F(ATTR_MSS_VPD_MT_ODT_WR[index(MCA)][1][1]) + count_dimm(MCA) != 2: F(ATTR_MSS_VPD_MT_ODT_WR[index(MCA)][0][3]) + */ + val = 0; + if (vpd_idx % 2) + val = PPC_SHIFT(F(ATTR_MSS_VPD_MT_ODT_WR[vpd_idx][1][0]), ODT_WR_VALUES2) | + PPC_SHIFT(F(ATTR_MSS_VPD_MT_ODT_WR[vpd_idx][1][1]), ODT_WR_VALUES3); + + mca_and_or(id, mca_i, DDRPHY_SEQ_ODT_WR_CONFIG1_P0, 0, val); +#undef F +} + +static void reset_ac_boost_cntl(int mcs_i, int mca_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + int dp; + + /* IOM0.DDRPHY_DP16_ACBOOST_CTL_BYTE{0,1}_P0_{0,1,2,3,4} = + // For all of the AC Boost attributes, they're laid out in the uint32_t as such: + // Bit 0-2 = DP16 Block 0 (DQ Bits 0-7) BYTE0_P0_0 + // Bit 3-5 = DP16 Block 0 (DQ Bits 8-15) BYTE1_P0_0 + // Bit 6-8 = DP16 Block 1 (DQ Bits 0-7) BYTE0_P0_1 + // Bit 9-11 = DP16 Block 1 (DQ Bits 8-15) BYTE1_P0_1 + // Bit 12-14 = DP16 Block 2 (DQ Bits 0-7) BYTE0_P0_2 + // Bit 15-17 = DP16 Block 2 (DQ Bits 8-15) BYTE1_P0_2 + // Bit 18-20 = DP16 Block 3 (DQ Bits 0-7) BYTE0_P0_3 + // Bit 21-23 = DP16 Block 3 (DQ Bits 8-15) BYTE1_P0_3 + // Bit 24-26 = DP16 Block 4 (DQ Bits 0-7) BYTE0_P0_4 + // Bit 27-29 = DP16 Block 4 (DQ Bits 8-15) BYTE1_P0_4 + [all] 0? // function does read prev values from SCOM but then overwrites all non-const-0 fields. Why bother? + [48-50] S{0,1}ACENSLICENDRV_DC = appropriate bits from ATTR_MSS_VPD_MT_MC_DQ_ACBOOST_WR_DOWN + [51-53] S{0,1}ACENSLICEPDRV_DC = appropriate bits from ATTR_MSS_VPD_MT_MC_DQ_ACBOOST_WR_UP + [54-56] S{0,1}ACENSLICEPTERM_DC = appropriate bits from ATTR_MSS_VPD_MT_MC_DQ_ACBOOST_RD_UP + */ + /* + * Both ATTR_MSS_VPD_MT_MC_DQ_ACBOOST_WR_* have a value of 0x24924924 + * for all rank configurations (two copies for two MCA indices to be exact), + * meaning that all 3b fields are 0b001. ATTR_MSS_VPD_MT_MC_DQ_ACBOOST_RD_UP + * equals 0. Last DP16 doesn't require special handling, all DQ bits are + * configured. + * + * Write these fields explicitly instead of shifting and masking for better + * readability. + */ + for (dp = 0; dp < 5; dp++) { + dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_ACBOOST_CTL_BYTE0_P0_0, + ~PPC_BITMASK(48, 56), + PPC_SHIFT(1, S0ACENSLICENDRV_DC) | + PPC_SHIFT(1, S0ACENSLICEPDRV_DC)); + dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_ACBOOST_CTL_BYTE1_P0_0, + ~PPC_BITMASK(48, 56), + PPC_SHIFT(1, S1ACENSLICENDRV_DC) | + PPC_SHIFT(1, S1ACENSLICEPDRV_DC)); + } +} + +static void reset_ctle_cntl(int mcs_i, int mca_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + int dp; + + /* IOM0.DDRPHY_DP16_CTLE_CTL_BYTE{0,1}_P0_{0,1,2,3,4} = + // For the capacitance CTLE attributes, they're laid out in the uint64_t as such. The resitance + // attributes are the same, but 3 bits long. Notice that DP Block X Nibble 0 is DQ0:3, + // Nibble 1 is DQ4:7, Nibble 2 is DQ8:11 and 3 is DQ12:15. + // Bit 0-1 = DP16 Block 0 Nibble 0 Bit 16-17 = DP16 Block 2 Nibble 0 Bit 32-33 = DP16 Block 4 Nibble 0 + // Bit 2-3 = DP16 Block 0 Nibble 1 Bit 18-19 = DP16 Block 2 Nibble 1 Bit 34-35 = DP16 Block 4 Nibble 1 + // Bit 4-5 = DP16 Block 0 Nibble 2 Bit 20-21 = DP16 Block 2 Nibble 2 Bit 36-37 = DP16 Block 4 Nibble 2 + // Bit 6-7 = DP16 Block 0 Nibble 3 Bit 22-23 = DP16 Block 2 Nibble 3 Bit 38-39 = DP16 Block 4 Nibble 3 + // Bit 8-9 = DP16 Block 1 Nibble 0 Bit 24-25 = DP16 Block 3 Nibble 0 + // Bit 10-11 = DP16 Block 1 Nibble 1 Bit 26-27 = DP16 Block 3 Nibble 1 + // Bit 12-13 = DP16 Block 1 Nibble 2 Bit 28-29 = DP16 Block 3 Nibble 2 + // Bit 14-15 = DP16 Block 1 Nibble 3 Bit 30-31 = DP16 Block 3 Nibble 3 + [48-49] NIB_{0,2}_DQSEL_CAP = appropriate bits from ATTR_MSS_VPD_MT_MC_DQ_CTLE_CAP + [53-55] NIB_{0,2}_DQSEL_RES = appropriate bits from ATTR_MSS_VPD_MT_MC_DQ_CTLE_RES + [56-57] NIB_{1,3}_DQSEL_CAP = appropriate bits from ATTR_MSS_VPD_MT_MC_DQ_CTLE_CAP + [61-63] NIB_{1,3}_DQSEL_RES = appropriate bits from ATTR_MSS_VPD_MT_MC_DQ_CTLE_RES + */ + /* + * For all rank configurations and both MCAs, ATTR_MSS_VPD_MT_MC_DQ_CTLE_CAP + * is 0x5555555555000000 (so every 2b field is 0b01) and *_RES equals + * 0xb6db6db6db6db6d0 (every 3b field is 0b101 = 5). + */ + for (dp = 0; dp < 5; dp++) { + dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_CTLE_CTL_BYTE0_P0_0, + ~(PPC_BITMASK(48, 49) | PPC_BITMASK(53, 57) | PPC_BITMASK(61, 63)), + PPC_SHIFT(1, NIB_0_DQSEL_CAP) | PPC_SHIFT(5, NIB_0_DQSEL_RES) | + PPC_SHIFT(1, NIB_1_DQSEL_CAP) | PPC_SHIFT(5, NIB_1_DQSEL_RES)); + dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_CTLE_CTL_BYTE1_P0_0, + ~(PPC_BITMASK(48, 49) | PPC_BITMASK(53, 57) | PPC_BITMASK(61, 63)), + PPC_SHIFT(1, NIB_2_DQSEL_CAP) | PPC_SHIFT(5, NIB_2_DQSEL_RES) | + PPC_SHIFT(1, NIB_3_DQSEL_CAP) | PPC_SHIFT(5, NIB_3_DQSEL_RES)); + } +} + +static void reset_delay(int mcs_i, int mca_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + + /* See comments in ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_CSN0 for layout */ + int speed_idx = mem_data.speed == 1866 ? 0 : + mem_data.speed == 2133 ? 8 : + mem_data.speed == 2400 ? 16 : 24; + int dimm_idx = (mca->dimm[0].present && mca->dimm[1].present) ? 4 : 0; + /* TODO: second CPU not supported */ + int vpd_idx = speed_idx + dimm_idx + mcs_i; + + /* + * From documentation: + * "If the reset value is not sufficient for the given system, these + * registers must be set via the programming interface." + * + * Unsure if this is the case. Hostboot sets it, so lets do it too. + */ + /* IOM0.DDRPHY_ADR_DELAY0_P0_ADR0 = + [all] 0 + [49-55] ADR_DELAY0 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_CSN0 + [57-63] ADR_DELAY1 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_ADDR_WEN_A14 + */ + mca_and_or(id, mca_i, DDRPHY_ADR_DELAY0_P0_ADR0, 0, + PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_CSN0[vpd_idx][mca_i], ADR_DELAY_EVEN) | + PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_ADDR_WEN_A14[vpd_idx][mca_i], ADR_DELAY_ODD)); + + /* IOM0.DDRPHY_ADR_DELAY1_P0_ADR0 = + [all] 0 + [49-55] ADR_DELAY2 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_ODT1 + [57-63] ADR_DELAY3 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_C0 + */ + mca_and_or(id, mca_i, DDRPHY_ADR_DELAY1_P0_ADR0, 0, + PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_ODT1[vpd_idx][mca_i], ADR_DELAY_EVEN) | + PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_C0[vpd_idx][mca_i], ADR_DELAY_ODD)); + + /* IOM0.DDRPHY_ADR_DELAY2_P0_ADR0 = + [all] 0 + [49-55] ADR_DELAY4 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_BA1 + [57-63] ADR_DELAY5 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A10 + */ + mca_and_or(id, mca_i, DDRPHY_ADR_DELAY2_P0_ADR0, 0, + PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_BA1[vpd_idx][mca_i], ADR_DELAY_EVEN) | + PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A10[vpd_idx][mca_i], ADR_DELAY_ODD)); + + /* IOM0.DDRPHY_ADR_DELAY3_P0_ADR0 = + [all] 0 + [49-55] ADR_DELAY6 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_ODT1 + [57-63] ADR_DELAY7 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_BA0 + */ + mca_and_or(id, mca_i, DDRPHY_ADR_DELAY3_P0_ADR0, 0, + PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_ODT1[vpd_idx][mca_i], ADR_DELAY_EVEN) | + PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_BA0[vpd_idx][mca_i], ADR_DELAY_ODD)); + + /* IOM0.DDRPHY_ADR_DELAY4_P0_ADR0 = + [all] 0 + [49-55] ADR_DELAY8 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A00 + [57-63] ADR_DELAY9 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_ODT0 + */ + mca_and_or(id, mca_i, DDRPHY_ADR_DELAY4_P0_ADR0, 0, + PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A00[vpd_idx][mca_i], ADR_DELAY_EVEN) | + PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_ODT0[vpd_idx][mca_i], ADR_DELAY_ODD)); + + /* IOM0.DDRPHY_ADR_DELAY5_P0_ADR0 = + [all] 0 + [49-55] ADR_DELAY10 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_ODT0 + [57-63] ADR_DELAY11 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_ADDR_CASN_A15 + */ + mca_and_or(id, mca_i, DDRPHY_ADR_DELAY5_P0_ADR0, 0, + PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_ODT0[vpd_idx][mca_i], ADR_DELAY_EVEN) | + PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_ADDR_CASN_A15[vpd_idx][mca_i], ADR_DELAY_ODD)); + + + /* IOM0.DDRPHY_ADR_DELAY0_P0_ADR1 = + [all] 0 + [49-55] ADR_DELAY0 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A13 + [57-63] ADR_DELAY1 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_CSN1 + */ + mca_and_or(id, mca_i, DDRPHY_ADR_DELAY0_P0_ADR1, 0, + PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A13[vpd_idx][mca_i], ADR_DELAY_EVEN) | + PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_CSN1[vpd_idx][mca_i], ADR_DELAY_ODD)); + + /* IOM0.DDRPHY_ADR_DELAY1_P0_ADR1 = + [all] 0 + [49-55] ADR_DELAY2 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_D0_CLKN + [57-63] ADR_DELAY3 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_D0_CLKP + */ + mca_and_or(id, mca_i, DDRPHY_ADR_DELAY1_P0_ADR1, 0, + PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_D0_CLKN[vpd_idx][mca_i], ADR_DELAY_EVEN) | + PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_D0_CLKP[vpd_idx][mca_i], ADR_DELAY_ODD)); + + /* IOM0.DDRPHY_ADR_DELAY2_P0_ADR1 = + [all] 0 + [49-55] ADR_DELAY4 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A17 + [57-63] ADR_DELAY5 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_C1 + */ + mca_and_or(id, mca_i, DDRPHY_ADR_DELAY2_P0_ADR1, 0, + PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A17[vpd_idx][mca_i], ADR_DELAY_EVEN) | + PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_C1[vpd_idx][mca_i], ADR_DELAY_ODD)); + + /* IOM0.DDRPHY_ADR_DELAY3_P0_ADR1 = + [all] 0 + [49-55] ADR_DELAY6 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_D1_CLKN + [57-63] ADR_DELAY7 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_D1_CLKP + */ + mca_and_or(id, mca_i, DDRPHY_ADR_DELAY3_P0_ADR1, 0, + PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_D1_CLKN[vpd_idx][mca_i], ADR_DELAY_EVEN) | + PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_D1_CLKP[vpd_idx][mca_i], ADR_DELAY_ODD)); + + /* IOM0.DDRPHY_ADR_DELAY4_P0_ADR1 = + [all] 0 + [49-55] ADR_DELAY8 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_C2 + [57-63] ADR_DELAY9 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_CSN1 + */ + mca_and_or(id, mca_i, DDRPHY_ADR_DELAY4_P0_ADR1, 0, + PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_C2[vpd_idx][mca_i], ADR_DELAY_EVEN) | + PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_CSN1[vpd_idx][mca_i], ADR_DELAY_ODD)); + + /* IOM0.DDRPHY_ADR_DELAY5_P0_ADR1 = + [all] 0 + [49-55] ADR_DELAY10 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A02 + [57-63] ADR_DELAY11 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_PAR + */ + mca_and_or(id, mca_i, DDRPHY_ADR_DELAY5_P0_ADR1, 0, + PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A02[vpd_idx][mca_i], ADR_DELAY_EVEN) | + PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_PAR[vpd_idx][mca_i], ADR_DELAY_ODD)); + + + /* IOM0.DDRPHY_ADR_DELAY0_P0_ADR2 = + [all] 0 + [49-55] ADR_DELAY0 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_CSN0 + [57-63] ADR_DELAY1 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_ADDR_RASN_A16 + */ + mca_and_or(id, mca_i, DDRPHY_ADR_DELAY0_P0_ADR2, 0, + PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_CSN0[vpd_idx][mca_i], ADR_DELAY_EVEN) | + PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_ADDR_RASN_A16[vpd_idx][mca_i], ADR_DELAY_ODD)); + + /* IOM0.DDRPHY_ADR_DELAY1_P0_ADR2 = + [all] 0 + [49-55] ADR_DELAY2 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A08 + [57-63] ADR_DELAY3 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A05 + */ + mca_and_or(id, mca_i, DDRPHY_ADR_DELAY1_P0_ADR2, 0, + PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A08[vpd_idx][mca_i], ADR_DELAY_EVEN) | + PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A05[vpd_idx][mca_i], ADR_DELAY_ODD)); + + /* IOM0.DDRPHY_ADR_DELAY2_P0_ADR2 = + [all] 0 + [49-55] ADR_DELAY4 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A03 + [57-63] ADR_DELAY5 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A01 + */ + mca_and_or(id, mca_i, DDRPHY_ADR_DELAY2_P0_ADR2, 0, + PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A03[vpd_idx][mca_i], ADR_DELAY_EVEN) | + PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A01[vpd_idx][mca_i], ADR_DELAY_ODD)); + + /* IOM0.DDRPHY_ADR_DELAY3_P0_ADR2 = + [all] 0 + [49-55] ADR_DELAY6 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A04 + [57-63] ADR_DELAY7 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A07 + */ + mca_and_or(id, mca_i, DDRPHY_ADR_DELAY3_P0_ADR2, 0, + PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A04[vpd_idx][mca_i], ADR_DELAY_EVEN) | + PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A07[vpd_idx][mca_i], ADR_DELAY_ODD)); + + /* IOM0.DDRPHY_ADR_DELAY4_P0_ADR2 = + [all] 0 + [49-55] ADR_DELAY8 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A09 + [57-63] ADR_DELAY9 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A06 + */ + mca_and_or(id, mca_i, DDRPHY_ADR_DELAY4_P0_ADR2, 0, + PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A09[vpd_idx][mca_i], ADR_DELAY_EVEN) | + PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A06[vpd_idx][mca_i], ADR_DELAY_ODD)); + + /* IOM0.DDRPHY_ADR_DELAY5_P0_ADR2 = + [all] 0 + [49-55] ADR_DELAY10 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_CKE1 + [57-63] ADR_DELAY11 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A12 + */ + mca_and_or(id, mca_i, DDRPHY_ADR_DELAY5_P0_ADR2, 0, + PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_CKE1[vpd_idx][mca_i], ADR_DELAY_EVEN) | + PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A12[vpd_idx][mca_i], ADR_DELAY_ODD)); + + + /* IOM0.DDRPHY_ADR_DELAY0_P0_ADR3 = + [all] 0 + [49-55] ADR_DELAY0 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_ACTN + [57-63] ADR_DELAY1 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A11 + */ + mca_and_or(id, mca_i, DDRPHY_ADR_DELAY0_P0_ADR3, 0, + PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_ACTN[vpd_idx][mca_i], ADR_DELAY_EVEN) | + PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A11[vpd_idx][mca_i], ADR_DELAY_ODD)); + + /* IOM0.DDRPHY_ADR_DELAY1_P0_ADR3 = + [all] 0 + [49-55] ADR_DELAY2 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_BG0 + [57-63] ADR_DELAY3 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_CKE0 + */ + mca_and_or(id, mca_i, DDRPHY_ADR_DELAY1_P0_ADR3, 0, + PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_BG0[vpd_idx][mca_i], ADR_DELAY_EVEN) | + PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_CKE0[vpd_idx][mca_i], ADR_DELAY_ODD)); + + /* IOM0.DDRPHY_ADR_DELAY2_P0_ADR3 = + [all] 0 + [49-55] ADR_DELAY4 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_CKE1 + [57-63] ADR_DELAY5 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_BG1 + */ + mca_and_or(id, mca_i, DDRPHY_ADR_DELAY2_P0_ADR3, 0, + PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_CKE1[vpd_idx][mca_i], ADR_DELAY_EVEN) | + PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_BG1[vpd_idx][mca_i], ADR_DELAY_ODD)); + + /* IOM0.DDRPHY_ADR_DELAY3_P0_ADR3 = + [all] 0 + [49-55] ADR_DELAY6 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_CKE0 + */ + mca_and_or(id, mca_i, DDRPHY_ADR_DELAY3_P0_ADR3, 0, + PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_CKE0[vpd_idx][mca_i], ADR_DELAY_EVEN)); + +} + +static void reset_tsys_adr(int mcs_i, int mca_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + int i = mem_data.speed == 1866 ? 0 : + mem_data.speed == 2133 ? 1 : + mem_data.speed == 2400 ? 2 : 3; + + /* IOM0.DDRPHY_ADR_MCCLK_WRCLK_PR_STATIC_OFFSET_P0_ADR32S{0,1} = + [all] 0 + [49-55] TSYS_WRCLK = ATTR_MSS_VPD_MR_TSYS_ADR + // From regs spec: + // Set to '19'h for 2666 MT/s. + // Set to '17'h for 2400 MT/s. + // Set to '14'h for 2133 MT/s. + // Set to '12'h for 1866 MT/s. + */ + /* Has the same stride as DP16. */ + dp_mca_and_or(id, 0, mca_i, DDRPHY_ADR_MCCLK_WRCLK_PR_STATIC_OFFSET_P0_ADR32S0, + 0, PPC_SHIFT(ATTR_MSS_VPD_MR_TSYS_ADR[i], TSYS_WRCLK)); + dp_mca_and_or(id, 1, mca_i, DDRPHY_ADR_MCCLK_WRCLK_PR_STATIC_OFFSET_P0_ADR32S0, + 0, PPC_SHIFT(ATTR_MSS_VPD_MR_TSYS_ADR[i], TSYS_WRCLK)); +} + +static void reset_tsys_data(int mcs_i, int mca_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + int i = mem_data.speed == 1866 ? 0 : + mem_data.speed == 2133 ? 1 : + mem_data.speed == 2400 ? 2 : 3; + int dp; + + /* IOM0.DDRPHY_DP16_WRCLK_PR_P0_{0,1,2,3,4} = + [all] 0 + [49-55] TSYS_WRCLK = ATTR_MSS_VPD_MR_TSYS_DATA + // From regs spec: + // Set to '12'h for 2666 MT/s. + // Set to '10'h for 2400 MT/s. + // Set to '0F'h for 2133 MT/s. + // Set to '0D'h for 1866 MT/s. + */ + for (dp = 0; dp < 5; dp++) { + dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_WRCLK_PR_P0_0, 0, + PPC_SHIFT(ATTR_MSS_VPD_MR_TSYS_DATA[i], TSYS_WRCLK)); + } +} + +static void reset_io_impedances(int mcs_i, int mca_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + int dp; + + for (dp = 0; dp < 5; dp++) { + /* IOM0.DDRPHY_DP16_IO_TX_FET_SLICE_P0_{0,1,2,3,4} = + [all] 0 + // 0 - Hi-Z, otherwise impedance = 240/ Ohms + [49-55] EN_SLICE_N_WR = ATTR_MSS_VPD_MT_MC_DRV_IMP_DQ_DQS[{0,1,2,3,4}] + [57-63] EN_SLICE_P_WR = ATTR_MSS_VPD_MT_MC_DRV_IMP_DQ_DQS[{0,1,2,3,4}] + */ + /* + * For all rank configurations and MCAs, ATTR_MSS_VPD_MT_MC_DRV_IMP_DQ_DQS + * is 34 Ohms. 240/34 = 7 bits set. According to documentation this is the + * default value, but set it just to be safe. + */ + dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_IO_TX_FET_SLICE_P0_0, 0, + PPC_SHIFT(0x7F, EN_SLICE_N_WR) | + PPC_SHIFT(0x7F, EN_SLICE_P_WR)); + + /* IOM0.DDRPHY_DP16_IO_TX_PFET_TERM_P0_{0,1,2,3,4} = + [all] 0 + // 0 - Hi-Z, otherwise impedance = 240/ Ohms + [49-55] EN_SLICE_N_WR = ATTR_MSS_VPD_MT_MC_RCV_IMP_DQ_DQS[{0,1,2,3,4}] + */ + /* 60 Ohms for all configurations, 240/60 = 4 bits set. */ + dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_IO_TX_PFET_TERM_P0_0, 0, + PPC_SHIFT(0x0F, EN_SLICE_N_WR)); + } + + /* IOM0.DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR1 = // yes, ADR1 + // These are RMW one at a time. I don't see why not all at once, or at least in pairs (P and N of the same clocks) + if (ATTR_MSS_VPD_MT_MC_DRV_IMP_CLK == ENUM_ATTR_MSS_VPD_MT_MC_DRV_IMP_CLK_OHM30): + [54,52,62,60] SLICE_SELn = 1 // CLK00 P, CLK00 N, CLK01 P, CLK01 N + else + [54,52,62,60] = 0 + */ + /* 30 Ohms for all configurations. */ + mca_and_or(id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR1, ~0, + PPC_BIT(SLICE_SEL2) | PPC_BIT(SLICE_SEL3) | + PPC_BIT(SLICE_SEL6) | PPC_BIT(SLICE_SEL7)); + + /* + * Following are reordered to minimalize number of register reads/writes + ------------------------------------------------------------------------ + val = (ATTR_MSS_VPD_MT_MC_DRV_IMP_CMD_ADDR == ENUM_ATTR_MSS_VPD_MT_MC_DRV_IMP_CMD_ADDR_OHM30) ? 1 : 0 + // val = 30 for all VPD configurations + IOM0.DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR0 = + [50,56,58,62] = val // ADDR14/WEN, BA1, ADDR10, BA0 + IOM0.DDRPHY_ADR_IO_FET_SLICE_EN_MAP1_P0_ADR0 = + [48,54] = val // ADDR0, ADDR15/CAS + IOM0.DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR1 = // same as CLK, however it uses different VPD + [48,56] = val // ADDR13, ADDR17/RAS + IOM0.DDRPHY_ADR_IO_FET_SLICE_EN_MAP1_P0_ADR1 = + [52] = val // ADDR2 + IOM0.DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR2 = + [50,52,54,56,58,60,62] = val // ADDR16/RAS, ADDR8, ADDR5, ADDR3, ADDR1, ADDR4, ADDR7 + IOM0.DDRPHY_ADR_IO_FET_SLICE_EN_MAP1_P0_ADR2 = + [48,50,54] = val // ADDR9, ADDR6, ADDR12 + IOM0.DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR3 = + [48,50,52,58] = val // ACT_N, ADDR11, BG0, BG1 + */ + mca_and_or(id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR0, ~0, + PPC_BIT(SLICE_SEL1) | PPC_BIT(SLICE_SEL4) | PPC_BIT(SLICE_SEL5) | + PPC_BIT(SLICE_SEL7)); + mca_and_or(id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP1_P0_ADR0, ~0, + PPC_BIT(SLICE_SEL0) | PPC_BIT(SLICE_SEL3)); + mca_and_or(id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR1, ~0, + PPC_BIT(SLICE_SEL0) | PPC_BIT(SLICE_SEL4)); + mca_and_or(id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP1_P0_ADR1, ~0, + PPC_BIT(SLICE_SEL2)); + mca_and_or(id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR2, ~0, + PPC_BIT(SLICE_SEL1) | PPC_BIT(SLICE_SEL2) | PPC_BIT(SLICE_SEL3) | + PPC_BIT(SLICE_SEL4) | PPC_BIT(SLICE_SEL5) | PPC_BIT(SLICE_SEL6) | + PPC_BIT(SLICE_SEL7)); + mca_and_or(id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP1_P0_ADR2, ~0, + PPC_BIT(SLICE_SEL0) | PPC_BIT(SLICE_SEL1) | PPC_BIT(SLICE_SEL3)); + mca_and_or(id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR3, ~0, + PPC_BIT(SLICE_SEL0) | PPC_BIT(SLICE_SEL1) | PPC_BIT(SLICE_SEL2) | + PPC_BIT(SLICE_SEL5)); + + /* + * Following are reordered to minimalize number of register reads/writes + ------------------------------------------------------------------------ + val = (ATTR_MSS_VPD_MT_MC_DRV_IMP_CNTL == ENUM_ATTR_MSS_VPD_MT_MC_DRV_IMP_CNTL_OHM30) ? 1 : 0 + // val = 30 for all VPD sets + IOM0.DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR0 = // same as CMD/ADDR, however it uses different VPD + [52,60] = val // ODT3, ODT1 + IOM0.DDRPHY_ADR_IO_FET_SLICE_EN_MAP1_P0_ADR0 = // same as CMD/ADDR, however it uses different VPD + [50,52] = val // ODT2, ODT0 + IOM0.DDRPHY_ADR_IO_FET_SLICE_EN_MAP1_P0_ADR1 = // same as CMD/ADDR, however it uses different VPD + [54] = val // PARITY + IOM0.DDRPHY_ADR_IO_FET_SLICE_EN_MAP1_P0_ADR2 = // same as CMD/ADDR, however it uses different VPD + [52] = val // CKE1 + IOM0.DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR3 = // same as CMD/ADDR, however it uses different VPD + [54,56,60,62] = val // CKE0, CKE3, CKE2, RESET_N + */ + mca_and_or(id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR0, ~0, + PPC_BIT(SLICE_SEL2) | PPC_BIT(SLICE_SEL6)); + mca_and_or(id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP1_P0_ADR0, ~0, + PPC_BIT(SLICE_SEL1) | PPC_BIT(SLICE_SEL2)); + mca_and_or(id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP1_P0_ADR1, ~0, + PPC_BIT(SLICE_SEL3)); + mca_and_or(id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP1_P0_ADR2, ~0, + PPC_BIT(SLICE_SEL2)); + mca_and_or(id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR3, ~0, + PPC_BIT(SLICE_SEL3) | PPC_BIT(SLICE_SEL4) | PPC_BIT(SLICE_SEL6) | + PPC_BIT(SLICE_SEL7)); + + /* + * Following are reordered to minimalize number of register reads/writes + ------------------------------------------------------------------------ + val = (ATTR_MSS_VPD_MT_MC_DRV_IMP_CSCID == ENUM_ATTR_MSS_VPD_MT_MC_DRV_IMP_CSCID_OHM30) ? 1 : 0 + // val = 30 for all VPD sets + IOM0.DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR0 = // same as CMD/ADDR and CNTL, however it uses different VPD + [48,54] = val // CS0, CID0 + IOM0.DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR1 = // same as CLK and CMD/ADDR, however it uses different VPD + [50,58] = val // CS1, CID1 + IOM0.DDRPHY_ADR_IO_FET_SLICE_EN_MAP1_P0_ADR1 = // same as CMD/ADDR and CNTL, however it uses different VPD + [48,50] = val // CS3, CID2 + IOM0.DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR2 = // same as CMD/ADDR, however it uses different VPD + [48] = val // CS2 + */ + mca_and_or(id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR0, ~0, + PPC_BIT(SLICE_SEL0) | PPC_BIT(SLICE_SEL3)); + mca_and_or(id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR1, ~0, + PPC_BIT(SLICE_SEL1) | PPC_BIT(SLICE_SEL5)); + mca_and_or(id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP1_P0_ADR1, ~0, + PPC_BIT(SLICE_SEL0) | PPC_BIT(SLICE_SEL1)); + mca_and_or(id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR2, ~0, + PPC_BIT(SLICE_SEL0)); + + /* + * IO impedance regs summary: lanes 9-15 have different possible settings (results in 15/30 vs 40/30 Ohm) + * MAP0_ADR0: all set MAP1_ADR0: lanes 12-15 not set + * MAP0_ADR1: all set MAP1_ADR1: lanes 12-15 not set + * MAP0_ADR2: all set MAP1_ADR2: lanes 12-15 not set + * MAP0_ADR3: all set MAP1_ADR3: not used + * This mapping is consistent with ADR_DELAYx_P0_ADRy settings. + */ +} + +static void reset_wr_vref_registers(int mcs_i, int mca_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + int dp; + int vpd_idx = mca->dimm[0].present ? (mca->dimm[0].mranks == 2 ? 2 : 0) : + (mca->dimm[1].mranks == 2 ? 2 : 0); + if (mca->dimm[0].present && mca->dimm[1].present) + vpd_idx++; + + for (dp = 0; dp < 5; dp++) { + /* IOM0.DDRPHY_DP16_WR_VREF_CONFIG0_P0_{0,1,2,3,4} = + [all] 0 + [48] WR_CTR_1D_MODE_SWITCH = 0 // 1 for functional) + continue; + + /* Some registers cannot be initialized without data from SPD */ + if (mca->functional) { + /* Assume DIMM mixing rules are followed - same rank config on both DIMMs*/ + p9n_mca_scom(mcs_i, mca_i); + thermal_throttle_scominit(mcs_i, mca_i); + } + + /* The rest can and should be initialized also on magic port */ + p9n_ddrphy_scom(mcs_i, mca_i); + } + p9n_mcbist_scom(mcs_i); + } + + /* This double loop is a part of phy_scominit() in Hostboot, but this is simpler. */ + for (mcs_i = 0; mcs_i < MCS_PER_PROC; mcs_i++) { + if (!mem_data.mcs[mcs_i].functional) + continue; + + for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { + mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + /* No magic for phy_scominit(). */ + if (mca->functional) + phy_scominit(mcs_i, mca_i); + + /* + * TODO: test this with DIMMs on both MCS. Maybe this has to be done + * in a separate loop, after phy_scominit()'s are done on both MCSs. + */ + if (mca_i == 0 || mca->functional) + fir_unmask(mcs_i, mca_i); + } + } + + printk(BIOS_EMERG, "ending istep 13.8\n"); +} diff --git a/src/soc/ibm/power9/romstage.c b/src/soc/ibm/power9/romstage.c index 4dd9abae248..c811238a431 100644 --- a/src/soc/ibm/power9/romstage.c +++ b/src/soc/ibm/power9/romstage.c @@ -339,6 +339,8 @@ void main(void) istep_13_4(); report_istep(13, 5); // no-op istep_13_6(); + report_istep(13, 7); // no-op + istep_13_8(); /* Test if SCOM still works. Maybe should check also indirect access? */ printk(BIOS_DEBUG, "0xF000F = %llx\n", read_scom(0xF000F)); From 633f1cb8a299d33f539f22ac446cb5c6ee88246d Mon Sep 17 00:00:00 2001 From: Krystian Hebel Date: Wed, 17 Mar 2021 20:10:09 +0100 Subject: [PATCH 034/213] soc/power9/istep_13_9.c: istep implementation 13.9 mss_ddr_phy_reset: Soft reset of DDR PHY macros - Lock DDR DLLs - Already configured DDR DLL in scaninit - Sends Soft DDR Phy reset - Kick off internal ZQ Cal - Perform any config that wasn't scanned in (TBD) - Nothing known here Signed-off-by: Krystian Hebel Change-Id: I1ee4071a4ea887104c4a81f6dd266d3895ccb5ea --- src/include/cpu/power/istep_13.h | 1 + src/soc/ibm/power9/Makefile.inc | 1 + src/soc/ibm/power9/istep_13_9.c | 793 +++++++++++++++++++++++++++++++ src/soc/ibm/power9/romstage.c | 1 + 4 files changed, 796 insertions(+) create mode 100644 src/soc/ibm/power9/istep_13_9.c diff --git a/src/include/cpu/power/istep_13.h b/src/include/cpu/power/istep_13.h index 74431bdd2e5..51aa416cd67 100644 --- a/src/include/cpu/power/istep_13.h +++ b/src/include/cpu/power/istep_13.h @@ -238,3 +238,4 @@ void istep_13_3(void); void istep_13_4(void); void istep_13_6(void); void istep_13_8(void); // TODO: takes epsilon values from 8.6 and MSS data from 7.4 +void istep_13_9(void); diff --git a/src/soc/ibm/power9/Makefile.inc b/src/soc/ibm/power9/Makefile.inc index 77561ce721b..3a96186002e 100644 --- a/src/soc/ibm/power9/Makefile.inc +++ b/src/soc/ibm/power9/Makefile.inc @@ -12,6 +12,7 @@ romstage-y += istep_13_3.c romstage-y += istep_13_4.c romstage-y += istep_13_6.c romstage-y += istep_13_8.c +romstage-y += istep_13_9.c romstage-y += i2c.c romstage-y += ccs.c ramstage-y += chip.c diff --git a/src/soc/ibm/power9/istep_13_9.c b/src/soc/ibm/power9/istep_13_9.c new file mode 100644 index 00000000000..dfce64bb82b --- /dev/null +++ b/src/soc/ibm/power9/istep_13_9.c @@ -0,0 +1,793 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include + +#include "istep_13_scom.h" + +static int test_dll_calib_done(int mcs_i, int mca_i, bool *do_workaround) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + uint64_t status = mca_read(id, mca_i, DDRPHY_PC_DLL_ZCAL_CAL_STATUS_P0); + /* + if (IOM0.DDRPHY_PC_DLL_ZCAL_CAL_STATUS_P0 + [48] DP_DLL_CAL_GOOD == 1 + [49] DP_DLL_CAL_ERROR == 0 + [50] DP_DLL_CAL_ERROR_FINE == 0 + [51] ADR_DLL_CAL_GOOD == 1 + [52] ADR_DLL_CAL_ERROR == 0 + [53] ADR_DLL_CAL_ERROR_FINE == 0) break // success + if (IOM0.DDRPHY_PC_DLL_ZCAL_CAL_STATUS_P0 + [49] DP_DLL_CAL_ERROR == 1 | + [50] DP_DLL_CAL_ERROR_FINE == 1 | + [52] ADR_DLL_CAL_ERROR == 1 | + [53] ADR_DLL_CAL_ERROR_FINE == 1) break and do the workaround + */ + if ((status & PPC_BITMASK(48, 53)) == + (PPC_BIT(DP_DLL_CAL_GOOD) | PPC_BIT(ADR_DLL_CAL_GOOD))) { + /* DLL calibration finished without errors */ + return 1; + } + + if (status & (PPC_BIT(DP_DLL_CAL_ERROR) | PPC_BIT(DP_DLL_CAL_ERROR_FINE) | + PPC_BIT(ADR_DLL_CAL_ERROR) | PPC_BIT(ADR_DLL_CAL_ERROR_FINE))) { + /* DLL calibration finished, but with errors */ + *do_workaround = true; + return 1; + } + + /* Not done yet */ + return 0; +} + +static int test_bb_lock(int mcs_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + uint64_t res = PPC_BIT(BB_LOCK0) | PPC_BIT(BB_LOCK1); + int mca_i, dp; + + for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { + if (!mem_data.mcs[mcs_i].mca[mca_i].functional) + continue; + + /* + IOM0.DDRPHY_ADR_SYSCLK_PR_VALUE_RO_P0_ADR32S{0,1} + [56] BB_LOCK & + IOM0.DDRPHY_DP16_SYSCLK_PR_VALUE_P0_{0,1,2,3} + [48] BB_LOCK0 & + [56] BB_LOCK1 & + IOM0.DDRPHY_DP16_SYSCLK_PR_VALUE_P0_4 + [48] BB_LOCK0 // last DP16 uses only first half + if all bits listed above are set: success + */ + + /* ADR_SYSCLK_PR_VALUE_RO_P0_ADR32S{0,1}, BB_LOCK0 doesn't matter */ + res &= dp_mca_read(id, 0, mca_i, ADR_SYSCLK_PR_VALUE_RO_P0_ADR32S0) | + PPC_BIT(BB_LOCK0); + res &= dp_mca_read(id, 1, mca_i, ADR_SYSCLK_PR_VALUE_RO_P0_ADR32S0) | + PPC_BIT(BB_LOCK0); + + /* IOM0.DDRPHY_DP16_SYSCLK_PR_VALUE_P0_{0,1,2,3} */ + for (dp = 0; dp < 4; dp++) { + res &= dp_mca_read(id, dp, mca_i, DDRPHY_DP16_SYSCLK_PR_VALUE_P0_0); + } + + /* IOM0.DDRPHY_DP16_SYSCLK_PR_VALUE_P0_4, BB_LOCK1 doesn't matter */ + res &= dp_mca_read(id, dp, mca_i, DDRPHY_DP16_SYSCLK_PR_VALUE_P0_0) | + PPC_BIT(BB_LOCK1); + + /* Do we want early return here? */ + } + + return res == (PPC_BIT(BB_LOCK0) | PPC_BIT(BB_LOCK1)); +} + +static void fix_bad_voltage_settings(int mcs_i) +{ + die("fix_bad_voltage_settings() required for MCS%d, but not implemented yet\n", mcs_i); + + /* TODO: implement if needed */ +/* + for each functional MCA + // Each MCA has 10 DLLs: ADR DLL0, DP0-4 DLL0, DP0-3 DLL1. Each of those can fail. For each DLL there are 5 registers + // used in this workaround, those are (see src/import/chips/p9/procedures/hwp/memory/lib/workarounds/dll_workaround.C): + // - l_CNTRL: DP16 or ADR CNTRL register + // - l_COARSE_SAME: VREG_COARSE register for same DLL as CNTRL reg + // - l_COARSE_NEIGH: VREG_COARSE register for DLL neighbor for this workaround + // - l_DAC_LOWER: DLL DAC Lower register + // - l_DAC_UPPER: DLL DAC Upper register + // Warning: the last two have their descriptions swapped in dll_workaround.H + // It seems that the code excepts that DLL neighbor is always good, what if it isn't? + // + // General flow, stripped from C++ bloating and repeated loops: + for each DLL // list in workarounds/dll_workaround.C + 1. check if this DLL failed, if not - skip to the next one + (l_CNTRL[62 | 63] | l_COARSE_SAME[56-62] == 1) -> failed + 2. set reset bit, set skip VREG bit, clear the error bits + l_CNTRL[48] = 1 + l_CNTRL[50-51] = 2 // REGS_RXDLL_CAL_SKIP, 2 - skip VREG calib., do coarse delay calib. only + l_CNTRL[62-63] = 0 + 3. clear DLL FIR (see "Do FIRry things" at the end of 13.8) // this was actually done for non-failed DLLs too, why? + IOM0.IOM_PHY0_DDRPHY_FIR_REG = // 0x07011000 // maybe use SCOM1 (AND) 0x07011001 + [56] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_2 = 0 // calibration errors + [58] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_4 = 0 // DLL errors + 4. write the VREG DAC value found in neighbor (good) to the failing DLL VREG DAC + l_COARSE_SAME[56-62] = l_COARSE_NEIGH[56-62] + 5. reset the upper and lower fine calibration bits back to defaults + l_DAC_LOWER[56-63] = 0x0x8000 // Hard coded default values per Steve Wyatt for this workaround + l_DAC_UPPER[56-63] = 0x0xFFE0 + 6. run DLL Calibration again on failed DLLs + l_CNTRL[48] = 0 + // Wait for calibration to finish + delay(37,382 memclocks) // again, we could do better than this + + // Check if calibration succeeded (same tests as in 1 above, for all DLLs) + for each DLL + if (l_CNTRL[62 | 63] | l_COARSE_SAME[56-62] == 1): failed, assert and die? +*/ +} + +static void check_during_phy_reset(int mcs_i) +{ + /* + * Mostly FFDC, which to my current knowledge is just the error logging. If + * it does anything else, this whole function needs rechecking. + */ + chiplet_id_t id = mcs_ids[mcs_i]; + int mca_i; + uint64_t val; + + /* If any of these bits is set, report error. Clear them unconditionally. */ + for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { + if (mca_i != 0 && !mem_data.mcs[mcs_i].mca[mca_i].functional) + continue; + + /* MC01.PORT0.SRQ.MBACALFIR + [0] MBACALFIR_MBA_RECOVERABLE_ERROR + [1] MBACALFIR_MBA_NONRECOVERABLE_ERROR + [10] MBACALFIR_SM_1HOT_ERR + */ + val = mca_read(id, mca_i, MBACALFIR); + if (val & (PPC_BIT(MBACALFIR_MBA_RECOVERABLE_ERROR) | + PPC_BIT(MBACALFIR_MBA_NONRECOVERABLE_ERROR) | + PPC_BIT(MBACALFIR_SM_1HOT_ERR))) { + /* No idea how severe that error is... */ + printk(BIOS_ERR, "Error detected in PORT%d.SRQ.MBACALFIR: %#llx\n", + mca_i, val); + } + + mca_and_or(id, mca_i, MBACALFIR, + ~(PPC_BIT(MBACALFIR_MBA_RECOVERABLE_ERROR) | + PPC_BIT(MBACALFIR_MBA_NONRECOVERABLE_ERROR) | + PPC_BIT(MBACALFIR_SM_1HOT_ERR)), + 0); + + /* IOM0.IOM_PHY0_DDRPHY_FIR_REG + [54] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_0 + [55] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_1 + [56] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_2 + [57] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_3 + [58] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_4 + [59] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_5 + [60] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_6 + [61] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_7 + */ + val = mca_read(id, mca_i, IOM_PHY0_DDRPHY_FIR_REG); + if (val & PPC_BITMASK(54, 61)) { + /* No idea how severe that error is... */ + printk(BIOS_ERR, "Error detected in IOM_PHY%d_DDRPHY_FIR_REG: %#llx\n", + mca_i , val); + } + + mca_and_or(id, mca_i, IOM_PHY0_DDRPHY_FIR_REG, ~(PPC_BITMASK(54, 61)), 0); + } +} + +static void fir_unmask(int mcs_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + int mca_i; + + /* + * MASK must be always written last, otherwise we may get unintended + * actions. No need for explicit barrier here, SCOM functions do eieio. + MC01.MCBIST.MBA_SCOMFIR.MCBISTFIRACT0 + [2] MCBISTFIRQ_INTERNAL_FSM_ERROR = 0 + [13] MCBISTFIRQ_SCOM_RECOVERABLE_REG_PE = 0 + [14] MCBISTFIRQ_SCOM_FATAL_REG_PE = 0 + MC01.MCBIST.MBA_SCOMFIR.MCBISTFIRACT1 + [2] MCBISTFIRQ_INTERNAL_FSM_ERROR = 0 + [13] MCBISTFIRQ_SCOM_RECOVERABLE_REG_PE = 1 + [14] MCBISTFIRQ_SCOM_FATAL_REG_PE = 0 + MC01.MCBIST.MBA_SCOMFIR.MCBISTFIRMASK + [2] MCBISTFIRQ_INTERNAL_FSM_ERROR = 0 // checkstop (0,0,0) + [13] MCBISTFIRQ_SCOM_RECOVERABLE_REG_PE = 0 // recoverable_error (0,1,0) + [14] MCBISTFIRQ_SCOM_FATAL_REG_PE = 0 // checkstop (0,0,0) + */ + scom_and_or_for_chiplet(id, MCBISTFIRACT0, + ~(PPC_BIT(MCBISTFIRQ_INTERNAL_FSM_ERROR) | + PPC_BIT(MCBISTFIRQ_SCOM_RECOVERABLE_REG_PE) | + PPC_BIT(MCBISTFIRQ_SCOM_FATAL_REG_PE)), + 0); + scom_and_or_for_chiplet(id, MCBISTFIRACT1, + ~(PPC_BIT(MCBISTFIRQ_INTERNAL_FSM_ERROR) | + PPC_BIT(MCBISTFIRQ_SCOM_RECOVERABLE_REG_PE) | + PPC_BIT(MCBISTFIRQ_SCOM_FATAL_REG_PE)), + PPC_BIT(MCBISTFIRQ_SCOM_RECOVERABLE_REG_PE)); + scom_and_or_for_chiplet(id, MCBISTFIRMASK, + ~(PPC_BIT(MCBISTFIRQ_INTERNAL_FSM_ERROR) | + PPC_BIT(MCBISTFIRQ_SCOM_RECOVERABLE_REG_PE) | + PPC_BIT(MCBISTFIRQ_SCOM_FATAL_REG_PE)), + 0); + + for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { + if (!mem_data.mcs[mcs_i].mca[mca_i].functional) + continue; + + /* + MC01.PORT0.SRQ.MBACALFIR_ACTION0 + [0] MBACALFIR_MBA_RECOVERABLE_ERROR = 0 + [1] MBACALFIR_MBA_NONRECOVERABLE_ERROR = 0 + [4] MBACALFIR_RCD_PARITY_ERROR = 0 + [10] MBACALFIR_SM_1HOT_ERR = 0 + MC01.PORT0.SRQ.MBACALFIR_ACTION1 + [0] MBACALFIR_MBA_RECOVERABLE_ERROR = 1 + [1] MBACALFIR_MBA_NONRECOVERABLE_ERROR = 0 + [4] MBACALFIR_RCD_PARITY_ERROR = 1 + [10] MBACALFIR_SM_1HOT_ERR = 0 + MC01.PORT0.SRQ.MBACALFIR_MASK + [0] MBACALFIR_MBA_RECOVERABLE_ERROR = 0 // recoverable_error (0,1,0) + [1] MBACALFIR_MBA_NONRECOVERABLE_ERROR = 0 // checkstop (0,0,0) + [4] MBACALFIR_RCD_PARITY_ERROR = 0 // recoverable_error (0,1,0) + [10] MBACALFIR_SM_1HOT_ERR = 0 // checkstop (0,0,0) + */ + mca_and_or(id, mca_i, MBACALFIR_ACTION0, + ~(PPC_BIT(MBACALFIR_MBA_RECOVERABLE_ERROR) | + PPC_BIT(MBACALFIR_MBA_NONRECOVERABLE_ERROR) | + PPC_BIT(MBACALFIR_RCD_PARITY_ERROR) | + PPC_BIT(MBACALFIR_SM_1HOT_ERR)), + 0); + mca_and_or(id, mca_i, MBACALFIR_ACTION1, + ~(PPC_BIT(MBACALFIR_MBA_RECOVERABLE_ERROR) | + PPC_BIT(MBACALFIR_MBA_NONRECOVERABLE_ERROR) | + PPC_BIT(MBACALFIR_RCD_PARITY_ERROR) | + PPC_BIT(MBACALFIR_SM_1HOT_ERR)), + PPC_BIT(MBACALFIR_MBA_RECOVERABLE_ERROR) | + PPC_BIT(MBACALFIR_RCD_PARITY_ERROR)); + mca_and_or(id, mca_i, MBACALFIR_MASK, + ~(PPC_BIT(MBACALFIR_MBA_RECOVERABLE_ERROR) | + PPC_BIT(MBACALFIR_MBA_NONRECOVERABLE_ERROR) | + PPC_BIT(MBACALFIR_RCD_PARITY_ERROR) | + PPC_BIT(MBACALFIR_SM_1HOT_ERR)), + 0); + + /* + IOM0.IOM_PHY0_DDRPHY_FIR_ACTION0_REG + [54] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_0 = 0 + [55] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_1 = 0 + [57] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_3 = 0 // no ERROR_2! + [58] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_4 = 0 + [59] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_5 = 0 + [60] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_6 = 0 + [61] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_7 = 0 + IOM0.IOM_PHY0_DDRPHY_FIR_ACTION1_REG + [54] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_0 = 1 + [55] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_1 = 1 + [57] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_3 = 1 + [58] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_4 = 1 + [59] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_5 = 1 + [60] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_6 = 1 + [61] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_7 = 1 + IOM0.IOM_PHY0_DDRPHY_FIR_MASK_REG + [54] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_0 = 0 // recoverable_error (0,1,0) + [55] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_1 = 0 // recoverable_error (0,1,0) + [57] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_3 = 0 // recoverable_error (0,1,0) + [58] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_4 = 0 // recoverable_error (0,1,0) + [59] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_5 = 0 // recoverable_error (0,1,0) + [60] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_6 = 0 // recoverable_error (0,1,0) + [61] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_7 = 0 // recoverable_error (0,1,0) + */ + mca_and_or(id, mca_i, IOM_PHY0_DDRPHY_FIR_ACTION0_REG, + ~(PPC_BITMASK(54, 55) | PPC_BITMASK(57, 61)), + 0); + mca_and_or(id, mca_i, IOM_PHY0_DDRPHY_FIR_ACTION1_REG, + ~(PPC_BITMASK(54, 55) | PPC_BITMASK(57, 61)), + PPC_BITMASK(54, 55) | PPC_BITMASK(57, 61)); + mca_and_or(id, mca_i, IOM_PHY0_DDRPHY_FIR_MASK_REG, + ~(PPC_BITMASK(54, 55) | PPC_BITMASK(57, 61)), + 0); + } +} + +/* + * Can't protect with do..while, this macro is supposed to exit 'for' loop in + * which it is invoked. As a side effect, it is used without semicolon. + * + * "I want to break free" - Freddie Mercury + */ +#define TEST_VREF(dp, scom) \ +if ((dp_mca_read(mcs_ids[mcs_i], dp, mca_i, scom) & PPC_BITMASK(56, 62)) == \ + PPC_SHIFT(1,62)) { \ + need_dll_workaround = true; \ + break; \ +} + +/* + * 13.9 mss_ddr_phy_reset: Soft reset of DDR PHY macros + * + * - Lock DDR DLLs + * - Already configured DDR DLL in scaninit + * - Sends Soft DDR Phy reset + * - Kick off internal ZQ Cal + * - Perform any config that wasn't scanned in (TBD) + * - Nothing known here + */ +void istep_13_9(void) +{ + printk(BIOS_EMERG, "starting istep 13.9\n"); + int mcs_i, mca_i, dp; + long time; + bool need_dll_workaround; + + report_istep(13, 9); + + /* + * Most of this istep consists of: + * 1. asserting reset bit or starting calibration + * 2. delay + * 3. deasserting reset bit or checking the result of calibration + * + * These are done for each (functional and/or magic) MCA. Because the delay + * is required between points 1 and 3 for a given MCA, those delays are done + * outside of 'for each MCA' loops. They are still inside 'for each MCS' + * loop, unclear if we can break it into pieces too. + */ + for (mcs_i = 0; mcs_i < MCS_PER_PROC; mcs_i++) { + if (!mem_data.mcs[mcs_i].functional) + continue; + + for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { + mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + + if (mca_i != 0 && !mca->functional) + continue; + + /* MC01.PORT0.SRQ.MBA_FARB5Q = + [8] MBA_FARB5Q_CFG_FORCE_MCLK_LOW_N = 0 + */ + mca_and_or(mcs_ids[mcs_i], mca_i, MBA_FARB5Q, + ~PPC_BIT(MBA_FARB5Q_CFG_FORCE_MCLK_LOW_N), 0); + + /* Drive all control signals to their inactive/idle state, or + * inactive value + IOM0.DDRPHY_DP16_SYSCLK_PR0_P0_{0,1,2,3,4} = + IOM0.DDRPHY_DP16_SYSCLK_PR1_P0_{0,1,2,3,4} = + [all] 0 + [48] reserved = 1 // MCA_DDRPHY_DP16_SYSCLK_PR0_P0_0_01_ENABLE + */ + for (dp = 0; dp < 5; dp++) { + dp_mca_and_or(mcs_ids[mcs_i], dp, mca_i, DDRPHY_DP16_SYSCLK_PR0_P0_0, + 0, PPC_BIT(48)); + dp_mca_and_or(mcs_ids[mcs_i], dp, mca_i, DDRPHY_DP16_SYSCLK_PR1_P0_0, + 0, PPC_BIT(48)); + } + + /* Assert reset to PHY for 32 memory clocks + MC01.PORT0.SRQ.MBA_CAL0Q = + [57] MBA_CAL0Q_RESET_RECOVER = 1 + */ + mca_and_or(mcs_ids[mcs_i], mca_i, MBA_CAL0Q, ~0, + PPC_BIT(MBA_CAL0Q_RESET_RECOVER)); + } + + delay_nck(32); + + for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { + mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + + if (mca_i != 0 && !mca->functional) + continue; + + /* Deassert reset_n + MC01.PORT0.SRQ.MBA_CAL0Q = + [57] MBA_CAL0Q_RESET_RECOVER = 0 + */ + mca_and_or(mcs_ids[mcs_i], mca_i, MBA_CAL0Q, + ~PPC_BIT(MBA_CAL0Q_RESET_RECOVER), 0); + + /* Flush output drivers + IOM0.DDRPHY_ADR_OUTPUT_FORCE_ATEST_CNTL_P0_ADR32S{0,1} = + [all] 0 + [48] FLUSH = 1 + [50] INIT_IO = 1 + */ + /* Has the same stride as DP16 */ + dp_mca_and_or(mcs_ids[mcs_i], 0, mca_i, + DDRPHY_ADR_OUTPUT_FORCE_ATEST_CNTL_P0_ADR32S0, 0, + PPC_BIT(FLUSH) | PPC_BIT(INIT_IO)); + dp_mca_and_or(mcs_ids[mcs_i], 1, mca_i, + DDRPHY_ADR_OUTPUT_FORCE_ATEST_CNTL_P0_ADR32S0, 0, + PPC_BIT(FLUSH) | PPC_BIT(INIT_IO)); + + /* IOM0.DDRPHY_DP16_CONFIG0_P0_{0,1,2,3,4} = + [all] 0 + [51] FLUSH = 1 + [54] INIT_IO = 1 + [55] ADVANCE_PING_PONG = 1 + [58] DELAY_PING_PONG_HALF = 1 + */ + for (dp = 0; dp < 5; dp++) { + dp_mca_and_or(mcs_ids[mcs_i], 0, mca_i, DDRPHY_DP16_CONFIG0_P0_0, 0, + PPC_BIT(DP16_CONFIG0_FLUSH) | + PPC_BIT(DP16_CONFIG0_INIT_IO) | + PPC_BIT(DP16_CONFIG0_ADVANCE_PING_PONG) | + PPC_BIT(DP16_CONFIG0_DELAY_PING_PONG_HALF)); + } + } + + delay_nck(32); + + for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { + mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + + if (mca_i != 0 && !mca->functional) + continue; + + /* IOM0.DDRPHY_ADR_OUTPUT_FORCE_ATEST_CNTL_P0_ADR32S{0,1} = + [all] 0 + [48] FLUSH = 0 + [50] INIT_IO = 0 + */ + /* Has the same stride as DP16 */ + dp_mca_and_or(mcs_ids[mcs_i], 0, mca_i, + DDRPHY_ADR_OUTPUT_FORCE_ATEST_CNTL_P0_ADR32S0, 0, 0); + dp_mca_and_or(mcs_ids[mcs_i], 1, mca_i, + DDRPHY_ADR_OUTPUT_FORCE_ATEST_CNTL_P0_ADR32S0, 0, 0); + + /* IOM0.DDRPHY_DP16_CONFIG0_P0_{0,1,2,3,4} = + [all] 0 + [51] FLUSH = 0 + [54] INIT_IO = 0 + [55] ADVANCE_PING_PONG = 1 + [58] DELAY_PING_PONG_HALF = 1 + */ + for (dp = 0; dp < 5; dp++) { + dp_mca_and_or(mcs_ids[mcs_i], 0, mca_i, DDRPHY_DP16_CONFIG0_P0_0, 0, + PPC_BIT(DP16_CONFIG0_ADVANCE_PING_PONG) | + PPC_BIT(DP16_CONFIG0_DELAY_PING_PONG_HALF)); + } + } + + /* ZCTL Enable */ + /* + * In Hostboot this is 'for each magic MCA'. We know there is only one + * magic, and it has always the same index. + IOM0.DDRPHY_PC_RESETS_P0 = + // Yet another documentation error: all bits in this register are marked as read-only + [51] ENABLE_ZCAL = 1 + */ + mca_and_or(mcs_ids[mcs_i], 0, DDRPHY_PC_RESETS_P0, ~0, PPC_BIT(ENABLE_ZCAL)); + + /* Maybe it would be better to add another 1us later instead of this. */ + delay_nck(1024); + + /* for each magic MCA */ + /* 50*10ns, but we don't have such precision. */ + time = wait_us(1, mca_read(mcs_ids[mcs_i], 0, + DDRPHY_PC_DLL_ZCAL_CAL_STATUS_P0) & PPC_BIT(ZCAL_DONE)); + + if (!time) + die("ZQ calibration timeout\n"); + + /* DLL calibration */ + /* + * Here was an early return if no functional MCAs were found. Wouldn't + * that make whole MCBIST non-functional? + */ + for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { + mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + + if (!mca->functional) + continue; + + /* IOM0.DDRPHY_ADR_DLL_CNTL_P0_ADR32S{0,1} = + [48] INIT_RXDLL_CAL_RESET = 0 + */ + /* Has the same stride as DP16 */ + dp_mca_and_or(mcs_ids[mcs_i], 0, mca_i, DDRPHY_ADR_DLL_CNTL_P0_ADR32S0, + ~PPC_BIT(INIT_RXDLL_CAL_RESET), 0); + dp_mca_and_or(mcs_ids[mcs_i], 1, mca_i, DDRPHY_ADR_DLL_CNTL_P0_ADR32S0, + ~PPC_BIT(INIT_RXDLL_CAL_RESET), 0); + + for (dp = 0; dp < 4; dp++) { + /* IOM0.DDRPHY_DP16_DLL_CNTL{0,1}_P0_{0,1,2,3} = + [48] INIT_RXDLL_CAL_RESET = 0 + */ + dp_mca_and_or(mcs_ids[mcs_i], dp, mca_i, DDRPHY_DP16_DLL_CNTL0_P0_0, + ~PPC_BIT(INIT_RXDLL_CAL_RESET), 0); + dp_mca_and_or(mcs_ids[mcs_i], dp, mca_i, DDRPHY_DP16_DLL_CNTL1_P0_0, + ~PPC_BIT(INIT_RXDLL_CAL_RESET), 0); + } + /* Last DP16 is different + IOM0.DDRPHY_DP16_DLL_CNTL0_P0_4 + [48] INIT_RXDLL_CAL_RESET = 0 + IOM0.DDRPHY_DP16_DLL_CNTL1_P0_4 + [48] INIT_RXDLL_CAL_RESET = 1 + */ + dp_mca_and_or(mcs_ids[mcs_i], dp, mca_i, DDRPHY_DP16_DLL_CNTL0_P0_0, + ~PPC_BIT(INIT_RXDLL_CAL_RESET), 0); + dp_mca_and_or(mcs_ids[mcs_i], dp, mca_i, DDRPHY_DP16_DLL_CNTL1_P0_0, + ~0, PPC_BIT(INIT_RXDLL_CAL_RESET)); + } + + /* From Hostboot's comments: + * 32,772 dphy_nclk cycles from Reset=0 to VREG Calibration to exhaust all values + * 37,382 dphy_nclk cycles for full calibration to start and fail ("worst case") + * + * Why assume worst case instead of making the next timeout bigger? + */ + delay_nck(37382); + + /* + * The comment before poll says: + * > To keep things simple, we'll poll for the change in one of the ports. + * > Once that's completed, we'll check the others. If any one has failed, + * > or isn't notifying complete, we'll pop out an error + * + * The issue is that it only tests the first of the functional ports. + * Other ports may or may not have failed. Even if this times out, the + * rest of the function continues normally, without throwing any error... + * + * For now, leave it as it was done in Hostboot. + */ + /* timeout(50*10ns): + if (IOM0.DDRPHY_PC_DLL_ZCAL_CAL_STATUS_P0 + [48] DP_DLL_CAL_GOOD == 1 + [49] DP_DLL_CAL_ERROR == 0 + [50] DP_DLL_CAL_ERROR_FINE == 0 + [51] ADR_DLL_CAL_GOOD == 1 + [52] ADR_DLL_CAL_ERROR == 0 + [53] ADR_DLL_CAL_ERROR_FINE == 0) break // success + if (IOM0.DDRPHY_PC_DLL_ZCAL_CAL_STATUS_P0 + [49] DP_DLL_CAL_ERROR == 1 | + [50] DP_DLL_CAL_ERROR_FINE == 1 | + [52] ADR_DLL_CAL_ERROR == 1 | + [53] ADR_DLL_CAL_ERROR_FINE == 1) break and do the workaround + */ + need_dll_workaround = false; + for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { + if (mem_data.mcs[mcs_i].mca[mca_i].functional) + break; + } + /* 50*10ns, but we don't have such precision. */ + time = wait_us(1, test_dll_calib_done(mcs_i, mca_i, &need_dll_workaround)); + if (!time) + die("DLL calibration timeout\n"); + + /* + * Workaround is also required if any of coarse VREG has value 1 after + * calibration. Test from poll above is repeated here - this time for every + * MCA, but it doesn't wait until DLL gets calibrated if that is still in + * progress. The registers below (also used in the workaround) __must not__ + * be written to while hardware calibration is in progress. + */ + for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { + mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + + if (need_dll_workaround) + break; + + if (!mca->functional) + continue; + + /* + * This assumes that by the time the first functional MCA completed + * successfully, all MCAs completed (with or without errors). If the + * first MCA failed then we won't even get here, we would bail earlier + * because need_dll_workaround == true in that case. + * + * This is not safe if DLL calibration takes more time for other MCAs, + * but this is the way Hostboot does it. + */ + test_dll_calib_done(mcs_i, mca_i, &need_dll_workaround); + + /* + if (IOM0.DDRPHY_ADR_DLL_VREG_COARSE_P0_ADR32S0 | + IOM0.DDRPHY_DP16_DLL_VREG_COARSE0_P0_{0,1,2,3,4} | + IOM0.DDRPHY_DP16_DLL_VREG_COARSE1_P0_{0,1,2,3} | + [56-62] REGS_RXDLL_VREG_DAC_COARSE = 1) // The same offset for ADR and DP16 + do the workaround + */ + TEST_VREF(0, DDRPHY_ADR_DLL_VREG_COARSE_P0_ADR32S0) + TEST_VREF(4, DDRPHY_DP16_DLL_VREG_COARSE0_P0_0) + for (dp = 0; dp < 4; dp++) { + TEST_VREF(dp, DDRPHY_DP16_DLL_VREG_COARSE0_P0_0) + TEST_VREF(dp, DDRPHY_DP16_DLL_VREG_COARSE1_P0_0) + } + } + + if (need_dll_workaround) + fix_bad_voltage_settings(mcs_i); + + /* Start bang-bang-lock */ + for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { + mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + + if (!mca->functional) + continue; + + /* Take dphy_nclk/SysClk alignment circuits out of reset and put into + * continuous update mode + IOM0.DDRPHY_ADR_SYSCLK_CNTL_PR_P0_ADR32S{0,1} = + IOM0.DDRPHY_DP16_SYSCLK_PR0_P0_{0,1,2,3,4} = + IOM0.DDRPHY_DP16_SYSCLK_PR1_P0_{0,1,2,3} = + [all] 0 + [48-63] 0x8024 // From the DDR PHY workbook + */ + /* Has the same stride as DP16 */ + dp_mca_and_or(mcs_ids[mcs_i], 0, mca_i, + ADR_SYSCLK_CNTRL_PR_P0_ADR32S0, + ~PPC_BITMASK(48, 63), PPC_SHIFT(0x8024, 63)); + dp_mca_and_or(mcs_ids[mcs_i], 1, mca_i, + ADR_SYSCLK_CNTRL_PR_P0_ADR32S0, + ~PPC_BITMASK(48, 63), PPC_SHIFT(0x8024, 63)); + + for (dp = 0; dp < 4; dp++) { + dp_mca_and_or(mcs_ids[mcs_i], dp, mca_i, + DDRPHY_DP16_SYSCLK_PR0_P0_0, + ~PPC_BITMASK(48, 63), PPC_SHIFT(0x8024, 63)); + dp_mca_and_or(mcs_ids[mcs_i], dp, mca_i, + DDRPHY_DP16_SYSCLK_PR1_P0_0, + ~PPC_BITMASK(48, 63), PPC_SHIFT(0x8024, 63)); + } + dp_mca_and_or(mcs_ids[mcs_i], 4, mca_i, + DDRPHY_DP16_SYSCLK_PR0_P0_0, + ~PPC_BITMASK(48, 63), PPC_SHIFT(0x8024, 63)); + } + + /* + * Wait at least 5932 dphy_nclk clock cycles to allow the dphy_nclk/SysClk + * alignment circuit to perform initial alignment. + */ + delay_nck(5932); + + /* Check for LOCK in {DP16,ADR}_SYSCLK_PR_VALUE */ + /* 50*10ns, but we don't have such precision. */ + /* + * FIXME: Hostboot uses the timeout mentioned above for each of + * the registers separately. It also checks them separately, + * meaning that they don't have to be locked at the same time. + * I am not sure if this is why the call below times out or if + * there is another reason. Can these locks be lost or should + * they hold until reset? + * + * Increasing the timeout helps (maybe that's just luck), but + * this probably isn't a proper way to do this. + */ + time = wait_ms(1000, test_bb_lock(mcs_i)); + if (!time) + die("BB lock timeout\n"); + + for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { + mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + + if (!mca->functional) + continue; + + /* De-assert the SYSCLK_RESET + IOM0.DDRPHY_PC_RESETS_P0 = + [49] SYSCLK_RESET = 0 + */ + mca_and_or(mcs_ids[mcs_i], mca_i, DDRPHY_PC_RESETS_P0, + ~PPC_BIT(SYSCLK_RESET), 0); + + /* Reset the windage registers */ + /* + * According to the PHY team, resetting the read delay offset must be + * done after SYSCLK_RESET. + * + * ATTR_MSS_VPD_MT_WINDAGE_RD_CTR holds (signed) value of offset in + * picoseconds. It must be converted to phase rotator ticks. There are + * 128 ticks per clock, and clock period depends on memory frequency. + * + * Result is rounded away from zero, so we have to add _or subtract_ + * half of tick. + * + * In some cases we can skip this (40 register writes per port), from + * documentation: + * + * "This register must not be set to a nonzero value unless detailed + * timing analysis shows that, for a particular configuration, the + * read-centering algorithm places the sampling point off from the eye + * center." + * + * ATTR_MSS_VPD_MT_WINDAGE_RD_CTR is outside of defined values for VPD + * for Talos, it is by default set to 0. Skipping this for now, but it + * may be needed for generalized code. + * + // 0x80000{0,1,2,3}0C0701103F, +0x0400_0000_0000 + IOM0.DDRPHY_DP16_READ_DELAY_OFFSET0_RANK_PAIR{0,1,2,3}_P0_{0,1,2,3,4} = + // 0x80000{0,1,2,3}0D0701103F, +0x0400_0000_0000 + IOM0.DDRPHY_DP16_READ_DELAY_OFFSET1_RANK_PAIR{0,1,2,3}_P0_{0,1,2,3,4} = + [all] 0 + [49-55] OFFSET0 = offset_in_ticks_rounded + [57-63] OFFSET1 = offset_in_ticks_rounded + */ + + /* + * Take the dphy_nclk/SysClk alignment circuit out of the Continuous + * Update mode + IOM0.DDRPHY_ADR_SYSCLK_CNTL_PR_P0_ADR32S{0,1} = // 0x800080320701103F, +0x0400_0000_0000 + IOM0.DDRPHY_DP16_SYSCLK_PR0_P0_{0,1,2,3,4} = // 0x800000070701103F, +0x0400_0000_0000 + IOM0.DDRPHY_DP16_SYSCLK_PR1_P0_{0,1,2,3} = // 0x8000007F0701103F, +0x0400_0000_0000 + [all] 0 + [48-63] 0x8020 // From the DDR PHY workbook + */ + /* Has the same stride as DP16 */ + dp_mca_and_or(mcs_ids[mcs_i], 0, mca_i, + ADR_SYSCLK_CNTRL_PR_P0_ADR32S0, + ~PPC_BITMASK(48, 63), PPC_SHIFT(0x8020, 63)); + dp_mca_and_or(mcs_ids[mcs_i], 1, mca_i, + ADR_SYSCLK_CNTRL_PR_P0_ADR32S0, + ~PPC_BITMASK(48, 63), PPC_SHIFT(0x8020, 63)); + + for (dp = 0; dp < 4; dp++) { + dp_mca_and_or(mcs_ids[mcs_i], dp, mca_i, + DDRPHY_DP16_SYSCLK_PR0_P0_0, + ~PPC_BITMASK(48, 63), PPC_SHIFT(0x8020, 63)); + dp_mca_and_or(mcs_ids[mcs_i], dp, mca_i, + DDRPHY_DP16_SYSCLK_PR1_P0_0, + ~PPC_BITMASK(48, 63), PPC_SHIFT(0x8020, 63)); + } + dp_mca_and_or(mcs_ids[mcs_i], 4, mca_i, + DDRPHY_DP16_SYSCLK_PR0_P0_0, + ~PPC_BITMASK(48, 63), PPC_SHIFT(0x8020, 63)); + } + + /* Wait at least 32 dphy_nclk clock cycles */ + delay_nck(32); + /* Done bang-bang-lock */ + + /* Per J. Bialas, force_mclk_low can be dasserted */ + for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { + mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + + if (!mca->functional) + continue; + + /* MC01.PORT0.SRQ.MBA_FARB5Q = + [8] MBA_FARB5Q_CFG_FORCE_MCLK_LOW_N = 1 + */ + mca_and_or(mcs_ids[mcs_i], mca_i, MBA_FARB5Q, ~0, + PPC_BIT(MBA_FARB5Q_CFG_FORCE_MCLK_LOW_N)); + + } + + /* Workarounds */ + /* + * Does not apply to DD2, but even then reads and writes back some + * registers without modifications. + */ + // mss::workarounds::dp16::after_phy_reset(); + + /* + * Comments from Hostboot: + * + * "New for Nimbus - perform duty cycle clock distortion calibration + * (DCD cal). + * Per PHY team's characterization, the DCD cal needs to be run after DLL + * calibration." + * + * However, it can be skipped based on ATTR_MSS_RUN_DCD_CALIBRATION, + * and by default it is skipped. + */ + // mss::adr32s::duty_cycle_distortion_calibration(); + + /* FIR */ + check_during_phy_reset(mcs_i); + fir_unmask(mcs_i); + } + + printk(BIOS_EMERG, "ending istep 13.9\n"); +} diff --git a/src/soc/ibm/power9/romstage.c b/src/soc/ibm/power9/romstage.c index c811238a431..4aa6d8d6abf 100644 --- a/src/soc/ibm/power9/romstage.c +++ b/src/soc/ibm/power9/romstage.c @@ -341,6 +341,7 @@ void main(void) istep_13_6(); report_istep(13, 7); // no-op istep_13_8(); + istep_13_9(); /* Test if SCOM still works. Maybe should check also indirect access? */ printk(BIOS_DEBUG, "0xF000F = %llx\n", read_scom(0xF000F)); From cf87ee20b71ca5db0e6c1f03fe3083579656cc73 Mon Sep 17 00:00:00 2001 From: Krystian Hebel Date: Thu, 25 Mar 2021 15:22:49 +0100 Subject: [PATCH 035/213] soc/power9/istep_13_10.c: implement DRAM initialization 13.10 mss_draminit: Dram initialize a) p9_mss_draminit.C (mcbist) -- Nimbus b) p9c_mss_draminit.C (mba) -- Cumulus - RCD parity errors are checked before logging other errors - HWP will exit with RC - De-assert dram reset - De-assert bit (Scom) that forces mem clock low - dram clocks start - Raise CKE - Load RCD Control Words - Load MRS - for each dimm pair/ports/rank - ODT Values - MR0-MR6 c) Check for attentions (even if HWP has error) - FW - Call PRD - If finds and error, commit HWP RC as informational - Else commit HWP RC as normal - Trigger reconfig loop is anything was deconfigured Signed-off-by: Krystian Hebel Change-Id: Ia705d6ff2e06ff48271fdcf529e36af6ecce595e --- src/include/cpu/power/istep_13.h | 64 ++++ src/soc/ibm/power9/Makefile.inc | 1 + src/soc/ibm/power9/istep_13_10.c | 521 +++++++++++++++++++++++++++++++ src/soc/ibm/power9/romstage.c | 1 + 4 files changed, 587 insertions(+) create mode 100644 src/soc/ibm/power9/istep_13_10.c diff --git a/src/include/cpu/power/istep_13.h b/src/include/cpu/power/istep_13.h index 51aa416cd67..1c12dbf434f 100644 --- a/src/include/cpu/power/istep_13.h +++ b/src/include/cpu/power/istep_13.h @@ -233,9 +233,73 @@ void ccs_phy_hw_step(chiplet_id_t id, int mca_i, int rp, enum cal_config conf, uint64_t step_cycles); void ccs_execute(chiplet_id_t id, int mca_i); +static inline enum ddr4_mr5_rtt_park vpd_to_rtt_park(uint8_t vpd) +{ + /* Fun fact: this is 240/vpd with bit order reversed */ + switch (vpd) { + case 34: + return DDR4_MR5_RTT_PARK_RZQ_7; + case 40: + return DDR4_MR5_RTT_PARK_RZQ_6; + case 48: + return DDR4_MR5_RTT_PARK_RZQ_5; + case 60: + return DDR4_MR5_RTT_PARK_RZQ_4; + case 80: + return DDR4_MR5_RTT_PARK_RZQ_3; + case 120: + return DDR4_MR5_RTT_PARK_RZQ_2; + case 240: + return DDR4_MR5_RTT_PARK_RZQ_1; + default: + return DDR4_MR5_RTT_PARK_OFF; + } +} + +static inline enum ddr4_mr2_rtt_wr vpd_to_rtt_wr(uint8_t vpd) +{ + switch (vpd) { + case 0: + return DDR4_MR2_RTT_WR_OFF; + case 80: + return DDR4_MR2_RTT_WR_RZQ_3; + case 120: + return DDR4_MR2_RTT_WR_RZQ_2; + case 240: + return DDR4_MR2_RTT_WR_RZQ_1; + default: + /* High-Z is 1 in VPD */ + return DDR4_MR2_RTT_WR_HI_Z; + } +} + +static inline enum ddr4_mr1_rtt_nom vpd_to_rtt_nom(uint8_t vpd) +{ + /* Fun fact: this is 240/vpd with bit order reversed */ + switch (vpd) { + case 34: + return DDR4_MR1_RTT_NOM_RZQ_7; + case 40: + return DDR4_MR1_RTT_NOM_RZQ_6; + case 48: + return DDR4_MR1_RTT_NOM_RZQ_5; + case 60: + return DDR4_MR1_RTT_NOM_RZQ_4; + case 80: + return DDR4_MR1_RTT_NOM_RZQ_3; + case 120: + return DDR4_MR1_RTT_NOM_RZQ_2; + case 240: + return DDR4_MR1_RTT_NOM_RZQ_1; + default: + return DDR4_MR1_RTT_NOM_OFF; + } +} + void istep_13_2(void); void istep_13_3(void); void istep_13_4(void); void istep_13_6(void); void istep_13_8(void); // TODO: takes epsilon values from 8.6 and MSS data from 7.4 void istep_13_9(void); +void istep_13_10(void); diff --git a/src/soc/ibm/power9/Makefile.inc b/src/soc/ibm/power9/Makefile.inc index 3a96186002e..91cdc915d53 100644 --- a/src/soc/ibm/power9/Makefile.inc +++ b/src/soc/ibm/power9/Makefile.inc @@ -13,6 +13,7 @@ romstage-y += istep_13_4.c romstage-y += istep_13_6.c romstage-y += istep_13_8.c romstage-y += istep_13_9.c +romstage-y += istep_13_10.c romstage-y += i2c.c romstage-y += ccs.c ramstage-y += chip.c diff --git a/src/soc/ibm/power9/istep_13_10.c b/src/soc/ibm/power9/istep_13_10.c new file mode 100644 index 00000000000..11d4a335a34 --- /dev/null +++ b/src/soc/ibm/power9/istep_13_10.c @@ -0,0 +1,521 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include +#include +#include + +#include "istep_13_scom.h" + +#define SPD_I2C_BUS 3 + +static void draminit_cke_helper(chiplet_id_t id, int mca_i) +{ + /* + * Hostboot stops CCS before sending new programs. I'm not sure it is wise + * to do, unless there are infinite loops. Don't do and see what happens. + MC01.MCBIST.MBA_SCOMFIR.CCS_CNTLQ // 0x070123A5 + [all] 0 + [1] CCS_CNTLQ_CCS_STOP = 1 + timeout(50*10ns): + if MC01.MCBIST.MBA_SCOMFIR.CCS_STATQ[0] (CCS_STATQ_CCS_IP) != 1: break // 0x070123A6 + delay(10ns) + */ + + ccs_add_instruction(id, 0, 0xF, 0xF, 400); + ccs_execute(id, mca_i); +} + +static void rcd_load(mca_data_t *mca, int d) +{ + uint8_t val; + rdimm_data_t *dimm = &mca->dimm[d]; + uint8_t *spd = dimm->spd; + + /* Raw card specifications are JEDEC documents MODULE4.20.28.x, where x is A-E */ + + /* + F0RC00 = 0x0 // Depends on reference raw card used, sometimes 0x2 (ref. A, B, C and custom?) + // Seems that 'custom' is used for > C, which means 0x2 is always set. + F0RC01 = 0x0 // Depends on reference raw card used, sometimes 0xC (ref. C?). + // JESD82-31: "The system must read the module SPD to determine + // which clock outputs are used by the module". R/C C and D use + // only Y0-Y1, other R/C use all 4 signals. + */ + /* + * F0RC01 is effectively based on dimm->mranks, but maybe future reference R/C + * will use different clocks than Y0-Y1, which technically is possible... + * + * (spd[131] & 0x1F) is 0x02 for C and 0x03 for D, this line tests for both + */ + val = ((spd[131] & 0x1E) == 0x02) ? 0xC2 : 0x02; + rcd_write_reg(SPD_I2C_BUS, mca->dimm[d].rcd_i2c_addr, F0RC00_01, val); + + /* + F0RC02 = + [0] = 1 if(!(16Gb density && x4 width)) // disable A17? // Why not use SPD[5]? + // Hostboot waits for tSTAB, however it is not necessary as long as bit 3 is not changed. + F0RC03 = + [0-1] SPD[137][4-5] // Address/Command drive strength + [2-3] SPD[137][6-7] // CS drive strength + // There is also a workaround for NVDIMM hybrids, not needed for plain RDIMM + */ + val = spd[137] & 0xF0; // F0RC03 + if (dimm->density != DENSITY_16Gb || dimm->width != WIDTH_x4) + val |= 1; + rcd_write_reg(SPD_I2C_BUS, mca->dimm[d].rcd_i2c_addr, F0RC02_03, val); + + /* + F0RC04 = + // BUG? Hostboot reverses bitfields order for RC04, 05 + [0-1] SPD[137][2-3] // ODT drive strength + [2-3] SPD[137][0-1] // CKE drive strength + // There is also a workaround for NVDIMM hybrids, not needed for plain RDIMM + F0RC05 = + [0-1] SPD[138][2-3] // Clocks drive strength, A side (1,3) + [2-3] SPD[138][0-1] // Clocks drive strength, B side (0,2) + // There is also a workaround for NVDIMM hybrids, not needed for plain RDIMM + */ + /* First read both nibbles as they are in SPD, then swap pairs of bit fields */ + val = (spd[137] & 0x0F) | ((spd[138] & 0x0F) << 4); + val = ((val & 0x33) << 2) | ((val & 0xCC) >> 2); + rcd_write_reg(SPD_I2C_BUS, mca->dimm[d].rcd_i2c_addr, F0RC04_05, val); + + /* + F0RC06 = 0xf // This is a command register, either don't touch it or use NOP (F) + F0RC07 = 0x0 // This is a command register, either don't touch it or use NOP (0) + */ + + /* + F0RC08 = + [0-1] = + 1 if master ranks == 4 (SPD[12]) // C0 and C1 enabled + 3 if not 3DS (check SPD[6] and SPD[10]) // all disabled + 2 if slave ranks <= 2 // C0 enabled + 1 if slave ranks <= 4 // C0 and C1 enabled + 0 otherwise (3DS with 5-8 slave ranks) // C0, C1 and C2 enabled + [3] = 1 if(!(16Gb density && x4 width)) // disable A17? // Why not use SPD[5]? + F0RC09 = + [2] = + // TODO: add test for it, write 1 for now + 0 if this DIMM's ODTs are used for writes or reads that target the other DIMM on the same port + 1 otherwise + [3] = 1 // Register CKE Power Down. CKE must be high at the moment of writing to this register and stay high. + // TODO: For how long? Indefinitely, tMRD, tInDIS, tFixedOutput or anything else? + */ + /* Assume no 4R */ + val = (dimm->mranks == dimm->log_ranks) ? 3 : + (2 - (dimm->log_ranks / dimm->mranks) / 4); + if (dimm->density != DENSITY_16Gb || dimm->width != WIDTH_x4) + val |= 8; + val |= 0xC0; + rcd_write_reg(SPD_I2C_BUS, mca->dimm[d].rcd_i2c_addr, F0RC08_09, val); + + /* + F0RC0A = + [0-2] = // There are other valid values not used by Hostboot + 1 if 1866 MT/s + 2 if 2133 MT/s + 3 if 2400 MT/s + 4 if 2666 MT/s + F0RC0B = 0xe // External VrefCA connected to QVrefCA and BVrefCA + */ + val = mem_data.speed == 1866 ? 1 : + mem_data.speed == 2133 ? 2 : + mem_data.speed == 2400 ? 3 : 4; + val |= 0xE0; + rcd_write_reg(SPD_I2C_BUS, mca->dimm[d].rcd_i2c_addr, F0RC0A_0B, val); + + /* + F0RC0C = 0 // Normal operating mode + F0RC0D = + [0-1] = // CS mode + 3 if master ranks == 4 (SPD[12]) // encoded QuadCS + 0 otherwise // direct DualCS + [2] = 1 // RDIMM + [3] = SPD[136] // Address mirroring for MRS commands + */ + /* Assume RDIMM and that there are no 4R configurations, add when needed */ + val = 0x40; + if (spd[136]) + val |= 0x80; + rcd_write_reg(SPD_I2C_BUS, mca->dimm[d].rcd_i2c_addr, F0RC0C_0D, val); + + /* + F0RC0E = 0xd // Parity enable, ALERT_n assertion and re-enable + F0RC0F = 0 // Normal mode + */ + val = 0x0D; + rcd_write_reg(SPD_I2C_BUS, mca->dimm[d].rcd_i2c_addr, F0RC0E_0F, val); + + /* + F0RC1x = 0 // Normal mode, VDD/2 + F0RC2x = 0 // Normal mode, all I2C accesses enabled + */ + + /* + F0RC3x = + 0x1f if 1866 MT/s + 0x2c if 2133 MT/s + 0x39 if 2400 MT/s + 0x47 if 2666 MT/s + */ + val = mem_data.speed == 1866 ? 0x1F : + mem_data.speed == 2133 ? 0x2C : + mem_data.speed == 2400 ? 0x39 : 0x47; + rcd_write_reg(SPD_I2C_BUS, mca->dimm[d].rcd_i2c_addr, F0RC3x, val); + + /* + F0RC4x = 0 // Should not be touched at all, it is used to access different function spaces + F0RC5x = 0 // Should not be touched at all, it is used to access different function spaces + F0RC6x = 0 // Should not be touched at all, it is used to access different function spaces + F0RC7x = 0 // Value comes from VPD, 0 is default, it doesn't seem to be changed anywhere in the code... + F0RC8x = 0 // Default QxODT timing for reads and for writes + F0RC9x = 0 // QxODT not asserted during writes, all ranks + F0RCAx = 0 // QxODT not asserted during reads, all ranks + */ + + /* + F0RCBx = + [0-2] = // Note that only the first line is different than F0RC08 (C0 vs. C0 & C1) + 6 if master ranks == 4 (SPD[12]) // C0 enabled + 7 if not 3DS (check SPD[6] and SPD[10]) // all disabled + 6 if slave ranks <= 2 // C0 enabled + 4 if slave ranks <= 4 // C0 and C1 enabled + 0 otherwise (3DS with 5-8 slave ranks) // C0, C1 and C2 enabled + */ + /* Assume no 4R */ + val = (dimm->mranks == dimm->log_ranks) ? 7 : + (dimm->log_ranks / dimm->mranks) == 2 ? 6 : + (dimm->log_ranks / dimm->mranks) == 4 ? 4 : 0; + rcd_write_reg(SPD_I2C_BUS, mca->dimm[d].rcd_i2c_addr, F0RCBx, val); + + /* + * After all RCWs are set, DRAM gets reset "to ensure it is reset properly". + * Comment: "Note: the minimum for a FORC06 soft reset is 32 cycles, but we + * empirically tested it at 8k cycles". Shouldn't we rather wait (again!) + * for periods defined in JESD79-4C (200us low and 500us high)? + * + * Do we even need it in the first place? + */ + /* + F0RC06 = 0x2 // Set QRST_n to active (low) + delay(8000 memclocks) + F0RC06 = 0x3 // Set QRST_n to inactive (high) + delay(8000 memclocks) + */ + val = 0x2; + rcd_write_reg(SPD_I2C_BUS, mca->dimm[d].rcd_i2c_addr, F0RC06_07, val); + delay_nck(8000); + val = 0x3; + rcd_write_reg(SPD_I2C_BUS, mca->dimm[d].rcd_i2c_addr, F0RC06_07, val); + delay_nck(8000); + + /* + * Dumped values from currently installed DIMM, from Petitboot: + * 0xc7 0x18 0x42 0x00 0x00 0x00 0x00 0x00 VID[2], DID[2], RID[1], 3x reserved + * 0x02 0x01 0x00 0x03 0xcb 0xe4 0x40 0x0d F0RC00-0F (4b each) + * 0x00 0x00 0x47 0x00 0x00 0x00 0x00 0x00 F0RC1x-8x (8b each) + * 0x00 0x00 0x07 F0RC9x-Bx (8b each), then all zeroes (Error Log Registers) + * + * Below is a copy of above values, this also tests RCD/I2C API. Command + * register is changed to NOP (was "Clear DRAM Reset" in dump). + */ + /* + rcd_write_32b(SPD_I2C_BUS, mca->dimm[d].rcd_i2c_addr, F0RC00_01, 0x0201000f); + rcd_write_32b(SPD_I2C_BUS, mca->dimm[d].rcd_i2c_addr, F0RC08_09, 0xcbe4400d); + rcd_write_reg(SPD_I2C_BUS, mca->dimm[d].rcd_i2c_addr, F0RC3x, 0x47); + rcd_write_reg(SPD_I2C_BUS, mca->dimm[d].rcd_i2c_addr, F0RCBx, 0x07); + */ +} + +/* + * Programming the Mode Registers consists of entering special mode using MRS + * (Mode Register Set) command and sending MR# values, one at a time, in a + * specific order (3,6,5,4,2,1,0). Those values are sent using address lines, + * including bank and bank group lines, which select which MR to write to. + * One of the implications is that these values cannot be read back. PHY + * controller holds the mirrors of last written values in its registers, but the + * mapping of bits is not clear. This mirror is RW, so there is a possibility + * that the values are not the same as the real ones (but this would be a bad + * idea as these bits are used by a controller). It gets further complicated + * when PDA mode was used at any point, as there is just one mirror register per + * rank pair. + * + * We have to write a whole register even when changing just one bit, this means + * that we have to remember what was written, or be able to (re)generate valid + * data. For this platform we have CCS which can be programmed to push all MRs + * in one sequence of instructions, including all required timeouts. There are + * two main timeout parameters: tMRD (minimal amount of time between two MRS + * commands) and tMOD (time between MRS and non-MRS and non-DES command). For + * all Speed Bins tMRD = 8nCK, tMOD = max(24nCK, 15ns) = 24nCK. Exceptions to + * those are: + * - gear down mode + * - PDA mode + * - settings to command & address lines: C/A parity latency, CS to C/A latency + * (only tMRC doesn't apply) + * - VrefDQ training + * - DLL Enable, DLL Reset (only tMOD doesn't apply) + * - maximum power saving mode (only tMOD doesn't apply) + * + * MRS are written per rank usually, although most of them must have the same + * values across the DIMM or even port. There are some settings that apply to + * individual DRAMs instead of whole rank (e.g. Vref in MR6). Normally settings + * written to MR# are passed to each DRAM, if individual DRAM has to have its + * settings changed independently of others we must use Per DRAM Addressability + * (PDA) mode. PDA is possible only after write leveling was performed. + * + * CCS is per MCBIST, so we need at most 4 (ports) * 2 (DIMMs per port) * + * 2 (master ranks per DIMM) * 2 (A- and B-side) * + * (7 (# of MRS) + 1 (final DES)) = 256 instructions. CCS holds space for 32 + * instructions, so we have to divide it and send a set of instructions per DIMM + * or even smaller chunks. + * + * TODO: is 4 ranks on RDIMM possible/used? PHY supports two ranks per DIMM (see + * 2.1 in any of the volumes of the registers specification), but Hostboot has + * configurations even for RDIMMs with 4 master ranks (see xlate_map vector in + * src/import/chips/p9/procedures/hwp/memory/lib/mc/xlate.C). Maybe those are + * counted in different places, i.e. before and after RCD, and thanks to Encoded + * QuadCS 4R DIMMs are visible to the PHY as 2R devices? + */ +static void mrs_load(int mcs_i, int mca_i, int d) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + int mirrored = mca->dimm[d].spd[136] & 1; + mrs_cmd_t mrs; + int vpd_idx = (mca->dimm[d].mranks - 1) * 2 + (!!mca->dimm[d ^ 1].present); + enum rank_selection ranks; + + if (d == 0) { + if (mca->dimm[d].mranks == 2) + ranks = DIMM0_ALL_RANKS; + else + ranks = DIMM0_RANK0; + } + else { + if (mca->dimm[d].mranks == 2) + ranks = DIMM1_ALL_RANKS; + else + ranks = DIMM1_RANK0; + } + + /* + * If any of the following are changed, make sure to change istep 13.11 too, + * some of the pre-/post-workarounds are also writing to these registers. + */ + + mrs = ddr4_get_mr3(DDR4_MR3_MPR_SERIAL, + DDR4_MR3_CRC_DM_5, + DDR4_MR3_FINE_GRAN_REF_NORMAL, + DDR4_MR3_TEMP_SENSOR_DISABLE, + DDR4_MR3_PDA_DISABLE, + DDR4_MR3_GEARDOWN_1_2_RATE, + DDR4_MR3_MPR_NORMAL, + 0); + ccs_add_mrs(id, mrs, ranks, mirrored, tMRD); + + mrs = ddr4_get_mr6(mca->nccd_l, + DDR4_MR6_VREFDQ_TRAINING_DISABLE, + DDR4_MR6_VREFDQ_TRAINING_RANGE_1, /* Don't care when disabled */ + 0); + ccs_add_mrs(id, mrs, ranks, mirrored, tMRD); + + mrs = ddr4_get_mr5(DDR4_MR5_RD_DBI_DISABLE, + DDR4_MR5_WR_DBI_DISABLE, + DDR4_MR5_DATA_MASK_DISABLE, + vpd_to_rtt_park(ATTR_MSS_VPD_MT_DRAM_RTT_PARK[vpd_idx]), + DDR4_MR5_ODT_PD_ACTIVADED, + DDR4_MR5_CA_PARITY_LAT_DISABLE); + ccs_add_mrs(id, mrs, ranks, mirrored, tMRD); + + mrs = ddr4_get_mr4(DDR4_MR4_HPPR_DISABLE, + DDR4_MR4_WR_PREAMBLE_1, /* ATTR_MSS_VPD_MT_PREAMBLE - always 0 */ + DDR4_MR4_RD_PREAMBLE_1, /* ATTR_MSS_VPD_MT_PREAMBLE - always 0 */ + DDR4_MR4_RD_PREAMBLE_TRAINING_DISABLE, + DDR4_MR4_SELF_REFRESH_ABORT_DISABLE, + DDR4_MR4_CS_TO_CMD_LAT_DISABLE, + DDR4_MR4_SPPR_DISABLE, + DDR4_MR4_INTERNAL_VREF_MON_DISABLE, + DDR4_MR4_TEMP_CONTROLLED_REFR_DISABLE, + DDR4_MR4_MAX_PD_MODE_DISABLE); + ccs_add_mrs(id, mrs, ranks, mirrored, tMRD); + + /* + * Regarding RTT_WR: OFF seems to be the safest option, but it is not always + * the case in VPD. + * See "Write leveling - pre-workaround" (and post-workaround) in 13.11, + * maybe write 0 here and don't do pre-? + */ + mrs = ddr4_get_mr2(DDR4_MR2_WR_CRC_DISABLE, /* ATTR_MSS_MRW_DRAM_WRITE_CRC, default 0 */ + vpd_to_rtt_wr(ATTR_MSS_VPD_MT_DRAM_RTT_WR[vpd_idx]), + /* ATTR_MSS_MRW_REFRESH_RATE_REQUEST, default DOUBLE. + * Do we need to half tREFI as well? */ + DDR4_MR2_ASR_MANUAL_EXTENDED_RANGE, + mem_data.cwl); + ccs_add_mrs(id, mrs, ranks, mirrored, tMRD); + + mrs = ddr4_get_mr1(DDR4_MR1_QOFF_ENABLE, + mca->dimm[d].width == WIDTH_x8 ? DDR4_MR1_TQDS_ENABLE : DDR4_MR1_TQDS_DISABLE, + vpd_to_rtt_nom(ATTR_MSS_VPD_MT_DRAM_RTT_NOM[vpd_idx]), + DDR4_MR1_WRLVL_DISABLE, + DDR4_MR1_ODIMP_RZQ_7, /* ATTR_MSS_VPD_MT_DRAM_DRV_IMP_DQ_DQS, always 34 Ohms */ + DDR4_MR1_AL_DISABLE, + DDR4_MR1_DLL_ENABLE); + ccs_add_mrs(id, mrs, ranks, mirrored, tMRD); + + mrs = ddr4_get_mr0(mca->nwr, + DDR4_MR0_DLL_RESET_YES, + DDR4_MR0_MODE_NORMAL, + mca->cl, + DDR4_MR0_BURST_TYPE_SEQUENTIAL, + DDR4_MR0_BURST_LENGTH_FIXED_8); + ccs_add_mrs(id, mrs, ranks, mirrored, tMOD); + + ccs_execute(id, mca_i); +} + +/* + * 13.10 mss_draminit: Dram initialize + * + * a) p9_mss_draminit.C (mcbist) -- Nimbus + * b) p9c_mss_draminit.C (mba) -- Cumulus + * - RCD parity errors are checked before logging other errors - HWP will + * exit with RC + * - De-assert dram reset + * - De-assert bit (Scom) that forces mem clock low - dram clocks start + * - Raise CKE + * - Load RCD Control Words + * - Load MRS - for each dimm pair/ports/rank + * - ODT Values + * - MR0-MR6 + * c) Check for attentions (even if HWP has error) + * - FW + * - Call PRD + * - If finds and error, commit HWP RC as informational + * - Else commit HWP RC as normal + * - Trigger reconfig loop is anything was deconfigured + */ +void istep_13_10(void) +{ + printk(BIOS_EMERG, "starting istep 13.10\n"); + int mcs_i, mca_i, dimm; + + report_istep(13, 10); + + for (mcs_i = 0; mcs_i < MCS_PER_PROC; mcs_i++) { + if (!mem_data.mcs[mcs_i].functional) + continue; + + /* MC01.MCBIST.MBA_SCOMFIR.CCS_MODEQ + // "It's unclear if we want to run with this true or false. Right now (10/15) this + // has to be false. Shelton was unclear if this should be on or off in general BRS" + [0] CCS_MODEQ_CCS_STOP_ON_ERR = 0 + [1] CCS_MODEQ_CCS_UE_DISABLE = 0 + [24] CCS_MODEQ_CFG_CCS_PARITY_AFTER_CMD = 1 + [26] CCS_MODEQ_COPY_CKE_TO_SPARE_CKE = 1 // Docs: "Does not apply for POWER9. No spare chips to copy to." + // The following are set in 13.11, but we can do it here, one less RMW + // "Hm. Centaur sets this up for the longest duration possible. Can we do better?" + // This is timeout so we should only hit it in the case of error. What is the unit of this field? Memclocks? + [8-23] CCS_MODEQ_DDR_CAL_TIMEOUT_CNT = 0xffff + [30-31] CCS_MODEQ_DDR_CAL_TIMEOUT_CNT_MULT = 3 + */ + scom_and_or_for_chiplet(mcs_ids[mcs_i], CCS_MODEQ, + ~(PPC_BIT(CCS_MODEQ_CCS_STOP_ON_ERR) | + PPC_BIT(CCS_MODEQ_CCS_UE_DISABLE)), + PPC_BIT(CCS_MODEQ_CFG_CCS_PARITY_AFTER_CMD) | + PPC_BIT(CCS_MODEQ_COPY_CKE_TO_SPARE_CKE) | + PPC_SHIFT(0xFFFF, CCS_MODEQ_DDR_CAL_TIMEOUT_CNT) | + PPC_SHIFT(3, CCS_MODEQ_DDR_CAL_TIMEOUT_CNT_MULT)); + + for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { + mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + + if (!mca->functional) + continue; + + /* MC01.PORT0.SRQ.MBA_FARB5Q // 0x07010918 + // RESET_N should stay low for at least 200us (JEDEC fig 7) for cold boot. Who and when sets it low? + // "Up, down P down, up N. Somewhat magic numbers - came from Centaur and proven to be the + // same on Nimbus. Why these are what they are might be lost to time ..." + [0-1] MBA_FARB5Q_CFG_DDR_DPHY_NCLK = 0x1 // 0b01 // 2nd RMW + [2-3] MBA_FARB5Q_CFG_DDR_DPHY_PCLK = 0x2 // 0b10 // 2nd RMW + [4] MBA_FARB5Q_CFG_DDR_RESETN = 1 // 3rd RMW (optional (?), only if changes) + [5] MBA_FARB5Q_CFG_CCS_ADDR_MUX_SEL = 1 // 1st RMW (optional, only if changes) + [6] MBA_FARB5Q_CFG_CCS_INST_RESET_ENABLE = 0 // 1st RMW (optional, only if changes) + */ + mca_and_or(mcs_ids[mcs_i], mca_i, MBA_FARB5Q, + ~PPC_BIT(MBA_FARB5Q_CFG_CCS_INST_RESET_ENABLE), + PPC_BIT(MBA_FARB5Q_CFG_CCS_ADDR_MUX_SEL)); + mca_and_or(mcs_ids[mcs_i], mca_i, MBA_FARB5Q, ~PPC_BITMASK(0, 3), + PPC_SHIFT(0x6, MBA_FARB5Q_CFG_DDR_DPHY_PCLK)); + mca_and_or(mcs_ids[mcs_i], mca_i, MBA_FARB5Q, ~0, + PPC_BIT(MBA_FARB5Q_CFG_DDR_RESETN)); + + udelay(500); /* part of 3rd RMW, but delay is unconditional */ + } + + /* + * JEDEC, fig 7,8: delays above and below end at the same point, they + * are not consecutive. RDIMM spec says that clocks must be stable for + * 16nCK before RESET_n = 1. This is not explicitly ensured. + * + * Below seems unnecessary, we are starting clocks at the same time as + * deasserting reset (are we?) + */ + /* + * max(10ns, 5tCK), but for all DDR4 Speed Bins 10ns is bigger. + * coreboot API doesn't have enough precision anyway. + */ + udelay(1); + + /* + * draminit_cke_helper() is called only for the first functional MCA + * because CCS_ADDR_MUX_SEL is set. + */ + for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { + if (mem_data.mcs[mcs_i].mca[mca_i].functional) + break; + } + draminit_cke_helper(mcs_ids[mcs_i], mca_i); + + for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { + mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + + if (!mca->functional) + continue; + + /* + * "Per conversation with Shelton and Steve, turn off addr_mux_sel + * after the CKE CCS but before the RCD/MRS CCSs" + * + * Needs to be disabled for all MCAs before next instructions, hence + * separate loop. + MC01.PORT0.SRQ.MBA_FARB5Q + [5] MBA_FARB5Q_CFG_CCS_ADDR_MUX_SEL = 0 + */ + mca_and_or(mcs_ids[mcs_i], mca_i, MBA_FARB5Q, + ~PPC_BIT(MBA_FARB5Q_CFG_CCS_ADDR_MUX_SEL), 0); + } + + for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { + mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + + if (!mca->functional) + continue; + + for (dimm = 0; dimm < DIMMS_PER_MCA; dimm++) { + if (!mca->dimm[dimm].present) + continue; + + rcd_load(mca, dimm); + // bcw_load(); /* LRDIMM only */ + mrs_load(mcs_i, mca_i, dimm); + dump_rcd(SPD_I2C_BUS, mca->dimm[dimm].rcd_i2c_addr); + } + } + } + + printk(BIOS_EMERG, "ending istep 13.10\n"); +} diff --git a/src/soc/ibm/power9/romstage.c b/src/soc/ibm/power9/romstage.c index 4aa6d8d6abf..72e7103eb52 100644 --- a/src/soc/ibm/power9/romstage.c +++ b/src/soc/ibm/power9/romstage.c @@ -342,6 +342,7 @@ void main(void) report_istep(13, 7); // no-op istep_13_8(); istep_13_9(); + istep_13_10(); /* Test if SCOM still works. Maybe should check also indirect access? */ printk(BIOS_DEBUG, "0xF000F = %llx\n", read_scom(0xF000F)); From 5e8748e0d7a5208c91f1e8787c011df6c7e4ad25 Mon Sep 17 00:00:00 2001 From: Krystian Hebel Date: Mon, 29 Mar 2021 17:23:24 +0200 Subject: [PATCH 036/213] soc/power9/istep_13_11.c: implementation 13.11 mss_draminit_training: Dram training a) p9_mss_draminit_training.C (mcbist) -- Nimbus b) p9c_mss_draminit_training.C (mba) -- Cumulus - Prior to running this procedure will apply known DQ bad bits to prevent them from participating in training. This information is extracted from the bad DQ attribute and applied to Hardware - Marks the calibration fail array - External ZQ Calibration - Execute initial dram calibration (7 step - handled by HW) - This procedure will update the bad DQ attribute for each dimm based on its findings Signed-off-by: Krystian Hebel Change-Id: Ia1250dd49c0f780c38da8ca264a984083fea7767 --- src/include/cpu/power/istep_13.h | 1 + src/soc/ibm/power9/Makefile.inc | 1 + src/soc/ibm/power9/istep_13_11.c | 1347 ++++++++++++++++++++++++++++++ src/soc/ibm/power9/romstage.c | 1 + 4 files changed, 1350 insertions(+) create mode 100644 src/soc/ibm/power9/istep_13_11.c diff --git a/src/include/cpu/power/istep_13.h b/src/include/cpu/power/istep_13.h index 1c12dbf434f..205e51da87e 100644 --- a/src/include/cpu/power/istep_13.h +++ b/src/include/cpu/power/istep_13.h @@ -303,3 +303,4 @@ void istep_13_6(void); void istep_13_8(void); // TODO: takes epsilon values from 8.6 and MSS data from 7.4 void istep_13_9(void); void istep_13_10(void); +void istep_13_11(void); diff --git a/src/soc/ibm/power9/Makefile.inc b/src/soc/ibm/power9/Makefile.inc index 91cdc915d53..a6b068fe88d 100644 --- a/src/soc/ibm/power9/Makefile.inc +++ b/src/soc/ibm/power9/Makefile.inc @@ -14,6 +14,7 @@ romstage-y += istep_13_6.c romstage-y += istep_13_8.c romstage-y += istep_13_9.c romstage-y += istep_13_10.c +romstage-y += istep_13_11.c romstage-y += i2c.c romstage-y += ccs.c ramstage-y += chip.c diff --git a/src/soc/ibm/power9/istep_13_11.c b/src/soc/ibm/power9/istep_13_11.c new file mode 100644 index 00000000000..2558c14185a --- /dev/null +++ b/src/soc/ibm/power9/istep_13_11.c @@ -0,0 +1,1347 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include +#include + +#include "istep_13_scom.h" + +static void setup_and_execute_zqcal(int mcs_i, int mca_i, int d) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + int mirrored = mca->dimm[d].spd[136] & 1; /* Maybe add this to mca_data_t? */ + mrs_cmd_t cmd = ddr4_get_zqcal_cmd(DDR4_ZQCAL_LONG); + enum rank_selection ranks; + + if (d == 0) { + if (mca->dimm[d].mranks == 2) + ranks = DIMM0_ALL_RANKS; + else + ranks = DIMM0_RANK0; + } else { + if (mca->dimm[d].mranks == 2) + ranks = DIMM1_ALL_RANKS; + else + ranks = DIMM1_RANK0; + } + + /* + * JEDEC: "All banks must be precharged and tRP met before ZQCL or ZQCS + * commands are issued by the controller" - not sure if this is ensured. + * A refresh during the calibration probably would impact the results. Also, + * "No other activities should be performed on the DRAM channel by the + * controller for the duration of tZQinit, tZQoper, or tZQCS" - this means + * we have to insert a delay after every ZQCL, not only after the last one. + * As a possible improvement, perhaps we could reorder this step a bit and + * send ZQCL on all ports "simultaneously" (without delays) and add a delay + * just between different DIMMs/ranks, but those delays cannot be done by + * CCS and we don't have a timer with enough precision to make it worth the + * effort. + */ + ccs_add_mrs(id, cmd, ranks, mirrored, tZQinit); + ccs_execute(id, mca_i); +} + +static void clear_initial_cal_errors(int mcs_i, int mca_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + int dp; + + for (dp = 0; dp < 5; dp++) { + /* Whole lot of zeroing + IOM0.DDRPHY_DP16_RD_VREF_CAL_ERROR_P0_{0-4}, + IOM0.DDRPHY_DP16_WR_ERROR0_P0_{0-4}, + IOM0.DDRPHY_DP16_RD_STATUS0_P0_{0-4}, + IOM0.DDRPHY_DP16_RD_LVL_STATUS2_P0_{0-4}, + IOM0.DDRPHY_DP16_RD_LVL_STATUS0_P0_{0-4}, + IOM0.DDRPHY_DP16_WR_VREF_ERROR0_P0_{0-4}, + IOM0.DDRPHY_DP16_WR_VREF_ERROR1_P0_{0-4}, + [all] 0 + */ + dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_RD_VREF_CAL_ERROR_P0_0, 0, 0); + dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_WR_ERROR0_P0_0, 0, 0); + dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_RD_STATUS0_P0_0, 0, 0); + dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_RD_LVL_STATUS2_P0_0, 0, 0); + dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_RD_LVL_STATUS0_P0_0, 0, 0); + dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_WR_VREF_ERROR0_P0_0, 0, 0); + dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_WR_VREF_ERROR1_P0_0, 0, 0); + } + + /* IOM0.DDRPHY_APB_CONFIG0_P0 = + [49] RESET_ERR_RPT = 1, then 0 + */ + mca_and_or(id, mca_i, DDRPHY_APB_CONFIG0_P0, ~0, PPC_BIT(RESET_ERR_RPT)); + mca_and_or(id, mca_i, DDRPHY_APB_CONFIG0_P0, ~PPC_BIT(RESET_ERR_RPT), 0); + + /* IOM0.DDRPHY_APB_ERROR_STATUS0_P0 = + [all] 0 + */ + mca_and_or(id, mca_i, DDRPHY_APB_ERROR_STATUS0_P0, 0, 0); + + /* IOM0.DDRPHY_RC_ERROR_STATUS0_P0 = + [all] 0 + */ + mca_and_or(id, mca_i, DDRPHY_RC_ERROR_STATUS0_P0, 0, 0); + + /* IOM0.DDRPHY_SEQ_ERROR_STATUS0_P0 = + [all] 0 + */ + mca_and_or(id, mca_i, DDRPHY_SEQ_ERROR_STATUS0_P0, 0, 0); + + /* IOM0.DDRPHY_WC_ERROR_STATUS0_P0 = + [all] 0 + */ + mca_and_or(id, mca_i, DDRPHY_WC_ERROR_STATUS0_P0, 0, 0); + + /* IOM0.DDRPHY_PC_ERROR_STATUS0_P0 = + [all] 0 + */ + mca_and_or(id, mca_i, DDRPHY_PC_ERROR_STATUS0_P0, 0, 0); + + /* IOM0.DDRPHY_PC_INIT_CAL_ERROR_P0 = + [all] 0 + */ + mca_and_or(id, mca_i, DDRPHY_PC_INIT_CAL_ERROR_P0, 0, 0); + + /* IOM0.IOM_PHY0_DDRPHY_FIR_REG = + [56] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_2 = 0 + */ + mca_and_or(id, mca_i, IOM_PHY0_DDRPHY_FIR_REG, + ~PPC_BIT(IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_2), 0); +} + +static void dump_cal_errors(int mcs_i, int mca_i) +{ +#if CONFIG(DEBUG_RAM_SETUP) + chiplet_id_t id = mcs_ids[mcs_i]; + int dp; + + /* + * Values are printed before names for two reasons: + * - it is easier to align, + * - BMC buffers host's serial output both in 'obmc-console-client' and in + * Serial over LAN and may not print few last characters. + */ + for (dp = 0; dp < 5; dp++) { + printk(BIOS_ERR, "DP %d\n", dp); + printk(BIOS_ERR, "\t%#16.16llx - RD_VREF_CAL_ERROR\n", + dp_mca_read(id, dp, mca_i, DDRPHY_DP16_RD_VREF_CAL_ERROR_P0_0)); + printk(BIOS_ERR, "\t%#16.16llx - DQ_BIT_DISABLE_RP0\n", + dp_mca_read(id, dp, mca_i, DDRPHY_DP16_DQ_BIT_DISABLE_RP0_P0_0)); + printk(BIOS_ERR, "\t%#16.16llx - DQS_BIT_DISABLE_RP0\n", + dp_mca_read(id, dp, mca_i, DDRPHY_DP16_DQS_BIT_DISABLE_RP0_P0_0)); + printk(BIOS_ERR, "\t%#16.16llx - WR_ERROR0\n", + dp_mca_read(id, dp, mca_i, DDRPHY_DP16_WR_ERROR0_P0_0)); + printk(BIOS_ERR, "\t%#16.16llx - RD_STATUS0\n", + dp_mca_read(id, dp, mca_i, DDRPHY_DP16_RD_STATUS0_P0_0)); + printk(BIOS_ERR, "\t%#16.16llx - RD_LVL_STATUS2\n", + dp_mca_read(id, dp, mca_i, DDRPHY_DP16_RD_LVL_STATUS2_P0_0)); + printk(BIOS_ERR, "\t%#16.16llx - RD_LVL_STATUS0\n", + dp_mca_read(id, dp, mca_i, DDRPHY_DP16_RD_LVL_STATUS0_P0_0)); + printk(BIOS_ERR, "\t%#16.16llx - WR_VREF_ERROR0\n", + dp_mca_read(id, dp, mca_i, DDRPHY_DP16_WR_VREF_ERROR0_P0_0)); + printk(BIOS_ERR, "\t%#16.16llx - WR_VREF_ERROR1\n", + dp_mca_read(id, dp, mca_i, DDRPHY_DP16_WR_VREF_ERROR1_P0_0)); + } + + printk(BIOS_ERR, "%#16.16llx - APB_ERROR_STATUS0\n", + mca_read(id, mca_i, DDRPHY_APB_ERROR_STATUS0_P0)); + + printk(BIOS_ERR, "%#16.16llx - RC_ERROR_STATUS0\n", + mca_read(id, mca_i, DDRPHY_RC_ERROR_STATUS0_P0)); + + printk(BIOS_ERR, "%#16.16llx - SEQ_ERROR_STATUS0\n", + mca_read(id, mca_i, DDRPHY_SEQ_ERROR_STATUS0_P0)); + + printk(BIOS_ERR, "%#16.16llx - WC_ERROR_STATUS0\n", + mca_read(id, mca_i, DDRPHY_WC_ERROR_STATUS0_P0)); + + printk(BIOS_ERR, "%#16.16llx - PC_ERROR_STATUS0\n", + mca_read(id, mca_i, DDRPHY_PC_ERROR_STATUS0_P0)); + + printk(BIOS_ERR, "%#16.16llx - PC_INIT_CAL_ERROR\n", + mca_read(id, mca_i, DDRPHY_PC_INIT_CAL_ERROR_P0)); + + /* 0x8000 on success for first rank, 0x4000 for second */ + printk(BIOS_ERR, "%#16.16llx - PC_INIT_CAL_STATUS\n", + mca_read(id, mca_i, DDRPHY_PC_INIT_CAL_STATUS_P0)); + + printk(BIOS_ERR, "%#16.16llx - IOM_PHY0_DDRPHY_FIR_REG\n", + mca_read(id, mca_i, IOM_PHY0_DDRPHY_FIR_REG)); + + printk(BIOS_ERR, "%#16.16llx - MBACALFIRQ\n", + mca_read(id, mca_i, MBACALFIR)); +#endif +} + +/* Based on ATTR_MSS_MRW_RESET_DELAY_BEFORE_CAL, by default do it. */ +static void dp16_reset_delay_values(int mcs_i, int mca_i, enum rank_selection ranks_present) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + int dp; + + /* + * It iterates over enabled rank pairs. See 13.8 for where these "pairs" + * (which may have up to 4 elements) were set. + */ + for (dp = 0; dp < 5; dp++) { + /* IOM0.DDRPHY_DP16_DQS_GATE_DELAY_RP0_P0_{0-4} = 0 */ + if (ranks_present & DIMM0_RANK0) + dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_DQS_GATE_DELAY_RP0_P0_0, 0, 0); + /* IOM0.DDRPHY_DP16_DQS_GATE_DELAY_RP1_P0_{0-4} = 0 */ + if (ranks_present & DIMM0_RANK1) + dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_DQS_GATE_DELAY_RP1_P0_0, 0, 0); + /* IOM0.DDRPHY_DP16_DQS_GATE_DELAY_RP2_P0_{0-4} = 0 */ + if (ranks_present & DIMM1_RANK0) + dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_DQS_GATE_DELAY_RP2_P0_0, 0, 0); + /* IOM0.DDRPHY_DP16_DQS_GATE_DELAY_RP3_P0_{0-4} = 0 */ + if (ranks_present & DIMM1_RANK1) + dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_DQS_GATE_DELAY_RP3_P0_0, 0, 0); + } +} + +static void dqs_align_turn_on_refresh(int mcs_i, int mca_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + + /* IOM0.DDRPHY_SEQ_MEM_TIMING_PARAM0_P0 + // > May need to add freq/tRFI attr dependency later but for now use this value + // > Provided by Ryan King + [60-63] TRFC_CYCLES = 9 // tRFC = 2^9 = 512 memcycles + */ + /* See note in seq_reset() in 13.8. This may not be necessary. */ + mca_and_or(id, mca_i, DDRPHY_SEQ_MEM_TIMING_PARAM0_P0, ~PPC_BITMASK(60, 63), + PPC_SHIFT(9, TRFC_CYCLES)); + + /* IOM0.DDRPHY_PC_INIT_CAL_CONFIG1_P0 + // > Hard coded settings provided by Ryan King for this workaround + [48-51] REFRESH_COUNT = 0xf + // TODO: see "Read clock align - pre-workaround" below. Why not 1 until + // calibration finishes? Does it pull in refresh commands? + [52-53] REFRESH_CONTROL = 3 // refresh commands may interrupt calibration routines + [54] REFRESH_ALL_RANKS = 1 + [55] CMD_SNOOP_DIS = 0 + [57-63] REFRESH_INTERVAL = 0x13 // Worst case: 6.08us for 1866 (max tCK). Must be not more than 7.8us + */ + mca_and_or(id, mca_i, DDRPHY_PC_INIT_CAL_CONFIG1_P0, + ~(PPC_BITMASK(48, 55) | PPC_BITMASK(57, 63)), + PPC_SHIFT(0xF, REFRESH_COUNT) | PPC_SHIFT(3, REFRESH_CONTROL) | + PPC_BIT(REFRESH_ALL_RANKS) | PPC_SHIFT(0x13, REFRESH_INTERVAL)); +} + +static void wr_level_pre(int mcs_i, int mca_i, int rp, + enum rank_selection ranks_present) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + int d = rp / 2; + int vpd_idx = (mca->dimm[d].mranks - 1) * 2 + (!!mca->dimm[d ^ 1].present); + int mirrored = mca->dimm[d].spd[136] & 1; + mrs_cmd_t mrs; + enum rank_selection rank = 1 << rp; + int i; + + /* + * JEDEC specification requires disabling RTT_WR during WR_LEVEL, and + * enabling equivalent terminations. + */ + if (ATTR_MSS_VPD_MT_DRAM_RTT_WR[vpd_idx] != 0) { + /* MR2 = // redo the rest of the bits + [A11-A9] 0 + */ + mrs = ddr4_get_mr2(DDR4_MR2_WR_CRC_DISABLE, + vpd_to_rtt_wr(0), + DDR4_MR2_ASR_MANUAL_EXTENDED_RANGE, + mem_data.cwl); + ccs_add_mrs(id, mrs, rank, mirrored, tMRD); + + /* MR1 = // redo the rest of the bits + // Write properly encoded RTT_WR value as RTT_NOM + [A8-A10] 240/ATTR_MSS_VPD_MT_DRAM_RTT_WR + */ + mrs = ddr4_get_mr1(DDR4_MR1_QOFF_ENABLE, + mca->dimm[d].width == WIDTH_x8 ? DDR4_MR1_TQDS_ENABLE : + DDR4_MR1_TQDS_DISABLE, + vpd_to_rtt_nom(ATTR_MSS_VPD_MT_DRAM_RTT_WR[vpd_idx]), + DDR4_MR1_WRLVL_DISABLE, + DDR4_MR1_ODIMP_RZQ_7, + DDR4_MR1_AL_DISABLE, + DDR4_MR1_DLL_ENABLE); + /* + * Next command for this rank is REF, done by PHY hardware, so use tMOD. + * + * There are possible MRS commands to be send to other ranks, maybe we + * can subtract those. On the other hand, with microsecond precision for + * delays in ccs_execute(), this probably doesn't matter anyway. + */ + ccs_add_mrs(id, mrs, rank, mirrored, tMOD); + + /* + * This block is done after MRS commands in Hostboot, but we do not call + * ccs_execute() until the end of this function anyway. It doesn't seem + * to make a difference. + */ + switch (rp) { + case 0: + /* IOM0.DDRPHY_SEQ_ODT_WR_CONFIG0_P0 = + [48] = 1 + */ + mca_and_or(id, mca_i, DDRPHY_SEQ_ODT_WR_CONFIG0_P0, ~0, PPC_BIT(48)); + break; + case 1: + /* IOM0.DDRPHY_SEQ_ODT_WR_CONFIG0_P0 = + [57] = 1 + */ + mca_and_or(id, mca_i, DDRPHY_SEQ_ODT_WR_CONFIG0_P0, ~0, PPC_BIT(57)); + break; + case 2: + /* IOM0.DDRPHY_SEQ_ODT_WR_CONFIG1_P0 = + [50] = 1 + */ + mca_and_or(id, mca_i, DDRPHY_SEQ_ODT_WR_CONFIG0_P0, ~0, PPC_BIT(50)); + break; + case 3: + /* IOM0.DDRPHY_SEQ_ODT_WR_CONFIG1_P0 = + [59] = 1 + */ + mca_and_or(id, mca_i, DDRPHY_SEQ_ODT_WR_CONFIG0_P0, ~0, PPC_BIT(59)); + break; + } + + // mss::workarounds::seq::odt_config(); // Not needed on DD2 + + } + + /* Different workaround, executed even if RTT_WR == 0 */ + /* workarounds::wr_lvl::configure_non_calibrating_ranks() + for each rank on MCA except current primary rank: + MR1 = // redo the rest of the bits + [A7] = 1 // Write Leveling Enable + [A12] = 1 // Outputs disabled (DQ, DQS) + */ + for (i = 0; i < 4; i++) { + rank = 1 << i; + if (i == rp || !(ranks_present & rank)) + continue; + + /* + * VPD index stays the same (DIMM mixing rules), but I'm not sure about + * mirroring. Better safe than sorry, assume mirrored and non-mirrored + * DIMMs can be mixed. + */ + mirrored = mca->dimm[i/2].spd[136] & 1; + + mrs = ddr4_get_mr1(DDR4_MR1_QOFF_DISABLE, + mca->dimm[d].width == WIDTH_x8 ? DDR4_MR1_TQDS_ENABLE : + DDR4_MR1_TQDS_DISABLE, + vpd_to_rtt_nom(ATTR_MSS_VPD_MT_DRAM_RTT_NOM[vpd_idx]), + DDR4_MR1_WRLVL_ENABLE, + DDR4_MR1_ODIMP_RZQ_7, + DDR4_MR1_AL_DISABLE, + DDR4_MR1_DLL_ENABLE); + /* + * Delays apply to commands sent to the same rank, but we are changing + * ranks. Can we get away with 0 delay? Is it worth it? Remember that + * the same delay is currently used between sides of RCD. + */ + ccs_add_mrs(id, mrs, rank, mirrored, tMRD); + } + + /* TODO: maybe drop it, next ccs_phy_hw_step() would call it anyway. */ + //ccs_execute(id, mca_i); +} + +static uint64_t wr_level_time(mca_data_t *mca) +{ + /* + * "Note: the following equation is taken from the PHY workbook - leaving + * the naked numbers in for parity to the workbook + * + * This step runs for approximately (80 + TWLO_TWLOE) x NUM_VALID_SAMPLES x + * (384/(BIG_STEP + 1) + (2 x (BIG_STEP + 1))/(SMALL_STEP + 1)) + 20 memory + * clock cycles per rank." + * + * TWLO_TWLOE for every defined speed bin is 9.5 + 2 = 11.5 ns, this needs + * to be converted to clock cycles, it is the only non-constant component of + * the equation. + */ + const int big_step = 7; + const int small_step = 0; + const int num_valid_samples = 5; + const int twlo_twloe = ps_to_nck(11500); + + return (80 + twlo_twloe) * num_valid_samples * (384 / (big_step + 1) + + (2 * (big_step + 1)) / (small_step + 1)) + 20; +} + +/* Undo the pre-workaround, basically */ +static void wr_level_post(int mcs_i, int mca_i, int rp, + enum rank_selection ranks_present) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + int d = rp / 2; + int vpd_idx = (mca->dimm[d].mranks - 1) * 2 + (!!mca->dimm[d ^ 1].present); + int mirrored = mca->dimm[d].spd[136] & 1; + mrs_cmd_t mrs; + enum rank_selection rank = 1 << rp; + int i; + + /* + * JEDEC specification requires disabling RTT_WR during WR_LEVEL, and + * enabling equivalent terminations. + */ + if (ATTR_MSS_VPD_MT_DRAM_RTT_WR[vpd_idx] != 0) { + #define F(x) ((((x) >> 4) & 0xc) | (((x) >> 2) & 0x3)) + /* Originally done in seq_reset() in 13.8 */ + /* IOM0.DDRPHY_SEQ_ODT_RD_CONFIG1_P0 = + F(X) = (((X >> 4) & 0xc) | ((X >> 2) & 0x3)) // Bits 0,1,4,5 of X, see also MC01.PORT0.SRQ.MBA_FARB2Q + [all] 0 + [48-51] ODT_RD_VALUES0 = + count_dimm(MCA) == 2: F(ATTR_MSS_VPD_MT_ODT_RD[index(MCA)][1][0]) + count_dimm(MCA) != 2: F(ATTR_MSS_VPD_MT_ODT_RD[index(MCA)][0][2]) + [56-59] ODT_RD_VALUES1 = + count_dimm(MCA) == 2: F(ATTR_MSS_VPD_MT_ODT_RD[index(MCA)][1][1]) + count_dimm(MCA) != 2: F(ATTR_MSS_VPD_MT_ODT_RD[index(MCA)][0][3]) + */ + /* 2 DIMMs -> odd vpd_idx */ + uint64_t val = 0; + if (vpd_idx % 2) + val = PPC_SHIFT(F(ATTR_MSS_VPD_MT_ODT_RD[vpd_idx][1][0]), ODT_RD_VALUES0) | + PPC_SHIFT(F(ATTR_MSS_VPD_MT_ODT_RD[vpd_idx][1][1]), ODT_RD_VALUES1); + + mca_and_or(id, mca_i, DDRPHY_SEQ_ODT_RD_CONFIG1_P0, 0, val); + + + /* IOM0.DDRPHY_SEQ_ODT_WR_CONFIG0_P0 = + F(X) = (((X >> 4) & 0xc) | ((X >> 2) & 0x3)) // Bits 0,1,4,5 of X, see also MC01.PORT0.SRQ.MBA_FARB2Q + [all] 0 + [48-51] ODT_WR_VALUES0 = F(ATTR_MSS_VPD_MT_ODT_WR[index(MCA)][0][0]) + [56-59] ODT_WR_VALUES1 = F(ATTR_MSS_VPD_MT_ODT_WR[index(MCA)][0][1]) + */ + mca_and_or(id, mca_i, DDRPHY_SEQ_ODT_WR_CONFIG0_P0, 0, + PPC_SHIFT(F(ATTR_MSS_VPD_MT_ODT_WR[vpd_idx][0][0]), ODT_RD_VALUES0) | + PPC_SHIFT(F(ATTR_MSS_VPD_MT_ODT_WR[vpd_idx][0][1]), ODT_RD_VALUES1)); + #undef F + + /* MR2 = // redo the rest of the bits + [A11-A9] ATTR_MSS_VPD_MT_DRAM_RTT_WR + */ + mrs = ddr4_get_mr2(DDR4_MR2_WR_CRC_DISABLE, + vpd_to_rtt_wr(ATTR_MSS_VPD_MT_DRAM_RTT_WR[vpd_idx]), + DDR4_MR2_ASR_MANUAL_EXTENDED_RANGE, + mem_data.cwl); + ccs_add_mrs(id, mrs, rank, mirrored, tMRD); + + /* MR1 = // redo the rest of the bits + // Write properly encoded RTT_NOM value + [A8-A10] 240/ATTR_MSS_VPD_MT_DRAM_RTT_NOM + */ + mrs = ddr4_get_mr1(DDR4_MR1_QOFF_ENABLE, + mca->dimm[d].width == WIDTH_x8 ? DDR4_MR1_TQDS_ENABLE : + DDR4_MR1_TQDS_DISABLE, + vpd_to_rtt_nom(ATTR_MSS_VPD_MT_DRAM_RTT_NOM[vpd_idx]), + DDR4_MR1_WRLVL_DISABLE, + DDR4_MR1_ODIMP_RZQ_7, + DDR4_MR1_AL_DISABLE, + DDR4_MR1_DLL_ENABLE); + /* + * Next command for this rank should be REF before Initial Pattern Write, + * done by PHY hardware, so use tMOD. + */ + ccs_add_mrs(id, mrs, rank, mirrored, tMOD); + + // mss::workarounds::seq::odt_config(); // Not needed on DD2 + } + + /* Different workaround, executed even if RTT_WR == 0 */ + /* workarounds::wr_lvl::configure_non_calibrating_ranks() + for each rank on MCA except current primary rank: + MR1 = // redo the rest of the bits + [A7] = 1 // Write Leveling Enable + [A12] = 1 // Outputs disabled (DQ, DQS) + */ + for (i = 0; i < 4; i++) { + rank = 1 << i; + if (i == rp || !(ranks_present & rank)) + continue; + + /* + * VPD index stays the same (DIMM mixing rules), but I'm not sure about + * mirroring. Better safe than sorry, assume mirrored and non-mirrored + * DIMMs can be mixed. + */ + mirrored = mca->dimm[i/2].spd[136] & 1; + + mrs = ddr4_get_mr1(DDR4_MR1_QOFF_ENABLE, + mca->dimm[d].width == WIDTH_x8 ? DDR4_MR1_TQDS_ENABLE : + DDR4_MR1_TQDS_DISABLE, + vpd_to_rtt_nom(ATTR_MSS_VPD_MT_DRAM_RTT_NOM[vpd_idx]), + DDR4_MR1_WRLVL_DISABLE, + DDR4_MR1_ODIMP_RZQ_7, + DDR4_MR1_AL_DISABLE, + DDR4_MR1_DLL_ENABLE); + /* + * Delays apply to commands sent to the same rank, but we are changing + * ranks. Can we get away with 0 delay? Is it worth it? Remember that + * the same delay is currently used between sides of RCD. + */ + ccs_add_mrs(id, mrs, rank, mirrored, tMRD); + } + + /* TODO: maybe drop it, next ccs_phy_hw_step() would call it anyway. */ + //ccs_execute(id, mca_i); +} + +static uint64_t initial_pat_wr_time(mca_data_t *mca) +{ + /* + * "Not sure how long this should take, so we're gonna use 1 to make sure we + * get at least one polling loop" + * + * Hostboot polls every 10 us, but in coreboot this value results in minimal + * delay of 2 us (one microsecond for delay_nck() and another for wait_us() + * in ccs_execute()). Tests show that it is not enough. + * + * What has to be done to write pattern to MPR in general: + * - write to MR3 to enable MPR access (tMOD) + * - write to MPRs (tWR_MPR for back-to-back writes, there are 4 MPRs; + * tWR_MPR is tMOD + AL + PL, but AL and PL is 0 here) + * - write to MR3 to disable MPR access (tMOD or tMRD, depending on what is + * the next command). + * + * This gives 6 * tMOD, but because there is RCD with sides A and B this is + * 12 * tMOD = 288 nCK. However, we have to add to calculations refresh + * commands, as set in dqs_align_turn_on_refresh() - 15 commands, each takes + * 512 nCK. This is kind of consistent for 2666 MT/s DIMM with 5 us I've + * seen in tests. + * + * There is no limit about how many refresh commands can be issued (as long + * as tRFC isn't violated), but only 8 of them are "pulling in" further + * refreshes, meaning that DRAM will survive 9*tREFI without a refresh + * (8 pulled in and 1 regular interval) - this is useful for longer + * calibration steps. Another 9*tREFI can be postponed - REF commands are + * sent after a longer pause, but this (probably) isn't relevant here. + * + * There may be more refreshes sent in the middle of the most of steps due + * to REFRESH_CONTROL setting. + * + * These additional cycles should be added to all calibration steps. I don't + * think they are included in Hostboot, then again I don't know what exactly + * is added in "equations taken from the PHY workbook". This may be the + * reason why Hostboot multiplies every timeout by 4 AND assumes worst case + * wherever possible AND polls so rarely. + * + * From the lack of better ideas, return 10 us. + */ + return ns_to_nck(10 * 1000); +} + +static uint64_t dqs_align_time(mca_data_t *mca) +{ + /* + * "This step runs for approximately 6 x 600 x 4 DRAM clocks per rank pair." + * + * In tests this is a bit less than that, but not enough to impact total + * times because we start busy polling earlier. + */ + return 6 * 600 * 4; +} + +static void rdclk_align_pre(int mcs_i, int mca_i, int rp, + enum rank_selection ranks_present) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + + /* + * TODO: we just set it before starting calibration steps. As we don't have + * any precious data in RAM yet, maybe we can use 0 there and just change it + * to 3 in the post-workaround? + */ + + /* Turn off refresh, we don't want it to interfere here + IOM0.DDRPHY_PC_INIT_CAL_CONFIG1_P0 + [52-53] REFRESH_CONTROL = 0 // refresh commands are only sent at start of initial calibration + */ + mca_and_or(id, mca_i, DDRPHY_PC_INIT_CAL_CONFIG1_P0, ~PPC_BITMASK(52, 53), 0); +} + +static uint64_t rdclk_align_time(mca_data_t *mca) +{ + /* + * "This step runs for approximately 24 x ((1024/COARSE_CAL_STEP_SIZE + + * 4 x COARSE_CAL_STEP_SIZE) x 4 + 32) DRAM clocks per rank pair" + * + * COARSE_CAL_STEP_SIZE = 4 + * + * In tests this finishes in about a third of this time (7 us instead of + * calculated 20.16 us). + */ + const int coarse_cal_step_size = 4; + return 24 * ((1024/coarse_cal_step_size + 4*coarse_cal_step_size) * 4 + 32); +} + +static void rdclk_align_post(int mcs_i, int mca_i, int rp, + enum rank_selection ranks_present) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + int dp; + uint64_t val; + const uint64_t mul = 0x0000010000000000; + + /* + * "In DD2.*, We adjust the red waterfall to account for low VDN settings. + * We move the waterfall forward by one" + IOM0.DDRPHY_DP16_DQS_RD_PHASE_SELECT_RANK_PAIR{0-3}_P0_{0-3} + [48-49] DQSCLK_SELECT0 = (++DQSCLK_SELECT0 % 4) + [52-53] DQSCLK_SELECT1 = (++DQSCLK_SELECT1 % 4) + [56-57] DQSCLK_SELECT2 = (++DQSCLK_SELECT2 % 4) + [60-61] DQSCLK_SELECT3 = (++DQSCLK_SELECT3 % 4) + IOM0.DDRPHY_DP16_DQS_RD_PHASE_SELECT_RANK_PAIR{0-3}_P0_4 + [48-49] DQSCLK_SELECT0 = (++DQSCLK_SELECT0 % 4) + [52-53] DQSCLK_SELECT1 = (++DQSCLK_SELECT1 % 4) + // Can't change non-existing quads + */ + for (dp = 0; dp < 4; dp++) { + val = dp_mca_read(id, dp, mca_i, + DDRPHY_DP16_DQS_RD_PHASE_SELECT_RANK_PAIR0_P0_0 + rp * mul); + val += PPC_BIT(49) | PPC_BIT(53) | PPC_BIT(57) | PPC_BIT(61); + val &= PPC_BITMASK(48, 49) | PPC_BITMASK(52, 53) | PPC_BITMASK(56, 57) | + PPC_BITMASK(60, 61); + /* TODO: this can be done with just one read */ + dp_mca_and_or(id, dp, mca_i, + DDRPHY_DP16_DQS_RD_PHASE_SELECT_RANK_PAIR0_P0_0 + rp * mul, + ~(PPC_BITMASK(48, 49) | PPC_BITMASK(52, 53) | + PPC_BITMASK(56, 57) | PPC_BITMASK(60, 61)), + val); + } + + val = dp_mca_read(id, 4, mca_i, + DDRPHY_DP16_DQS_RD_PHASE_SELECT_RANK_PAIR0_P0_0 + rp * mul); + val += PPC_BIT(49) | PPC_BIT(53); + val &= PPC_BITMASK(48, 49) | PPC_BITMASK(52, 53); + dp_mca_and_or(id, dp, mca_i, + DDRPHY_DP16_DQS_RD_PHASE_SELECT_RANK_PAIR0_P0_0 + rp * mul, + ~(PPC_BITMASK(48, 49) | PPC_BITMASK(52, 53)), + val); + + /* Turn on refresh */ + dqs_align_turn_on_refresh(mcs_i, mca_i); +} + +static void read_ctr_pre(int mcs_i, int mca_i, int rp, + enum rank_selection ranks_present) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + int dp; + + /* Turn off refresh + IOM0.DDRPHY_PC_INIT_CAL_CONFIG1_P0 + [52-53] REFRESH_CONTROL = 0 // refresh commands are only sent at start of initial calibration + */ + mca_and_or(id, mca_i, DDRPHY_PC_INIT_CAL_CONFIG1_P0, ~PPC_BITMASK(52, 53), 0); + + for (dp = 0; dp < 5; dp++) { + /* + IOM0.DDRPHY_DP16_CONFIG0_P0_{0-4} + [62] 1 // part of ATESTSEL_0_4 field + */ + dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_CONFIG0_P0_0, ~0, PPC_BIT(62)); + + /* + * This was a part of main calibration in Hostboot, not pre-workaround, + * but this is easier this way. + IOM0.DDRPHY_DP16_RD_VREF_CAL_EN_P0_{0-4} + [all] 0 + [48-63] VREF_CAL_EN = 0xffff // We already did this in reset_rd_vref() in 13.8 + */ + dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_RD_VREF_CAL_EN_P0_0, 0, + PPC_SHIFT(0xFFFF, 63)); + } + + /* This also was part of main + IOM0.DDRPHY_RC_RDVREF_CONFIG1_P0 + [60] CALIBRATION_ENABLE = 1 + [61] SKIP_RDCENTERING = 0 + */ + mca_and_or(id, mca_i, DDRPHY_RC_RDVREF_CONFIG1_P0, + ~PPC_BIT(SKIP_RDCENTERING), + PPC_BIT(CALIBRATION_ENABLE)); +} + +static uint64_t read_ctr_time(mca_data_t *mca) +{ + /* + * "This step runs for approximately 6 x (512/COARSE_CAL_STEP_SIZE + 4 x + * (COARSE_CAL_STEP_SIZE + 4 x CONSEQ_PASS)) x 24 DRAM clocks per rank pair." + * + * COARSE_CAL_STEP_SIZE = 4 + * CONSEQ_PASS = 8 + * + * In tests this step takes more than that (38/30us), probably because of + * REF commands that are pulled in before the calibration. It is still much + * less than timeout (107us). + */ + const int coarse_cal_step_size = 4; + const int conseq_pass = 8; + return 6 * (512/coarse_cal_step_size + 4 * (coarse_cal_step_size + 4 * conseq_pass)) + * 24; +} + +static void read_ctr_post(int mcs_i, int mca_i, int rp, + enum rank_selection ranks_present) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + int dp; + + /* Does not apply to DD2 */ + // workarounds::dp16::rd_dq::fix_delay_values(); + + /* Turn on refresh */ + dqs_align_turn_on_refresh(mcs_i, mca_i); + + for (dp = 0; dp < 5; dp++) { + /* + IOM0.DDRPHY_DP16_CONFIG0_P0_{0-4} + [62] 0 // part of ATESTSEL_0_4 field + */ + dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_CONFIG0_P0_0, ~PPC_BIT(62), 0); + } +} + +/* Assume 18 DRAMs per DIMM ((8 data + 1 ECC) * 2), even for x8 */ +static uint16_t write_delays[18]; + +static void write_ctr_pre(int mcs_i, int mca_i, int rp, + enum rank_selection ranks_present) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + int mirrored = mca->dimm[rp/2].spd[136] & 1; + mrs_cmd_t mrs; + enum rank_selection rank = 1 << rp; + int vpd_idx = (mca->dimm[rp/2].mranks - 1) * 2 + (!!mca->dimm[(rp/2) ^ 1].present); + int dram; + + /* + * Write VREF Latching + * + * This may be considered a separate step, but with current dispatch logic + * we cannot add a step that isn't accelerated by PHY hardware so do this as + * a part of pre-workaround of next step. + * + * "JEDEC has a 3 step latching process for WR VREF + * 1) enter into VREFDQ training mode, with the desired range value is XXXXXX + * 2) set the VREFDQ value while in training mode - this actually latches the value + * 3) exit VREFDQ training mode and go into normal operation mode" + * + * Each step is followed by a 150ns (tVREFDQE or tVREFDQX) stream of DES + * commands before next one. + */ + uint64_t tVREFDQ_E_X = ns_to_nck(150); + + /* Fill MRS command once, then flip VREFDQ training mode bit as needed */ + mrs = ddr4_get_mr6(mca->nccd_l, + DDR4_MR6_VREFDQ_TRAINING_ENABLE, + (ATTR_MSS_VPD_MT_VREF_DRAM_WR[vpd_idx] & 0x40) >> 6, + ATTR_MSS_VPD_MT_VREF_DRAM_WR[vpd_idx] & 0x3F); + + /* Step 1 - enter VREFDQ training mode */ + ccs_add_mrs(id, mrs, rank, mirrored, tVREFDQ_E_X); + + /* Step 2 - latch VREFDQ value, command exactly the same as step 1 */ + ccs_add_mrs(id, mrs, rank, mirrored, tVREFDQ_E_X); + + /* Step 3 - exit VREFDQ training mode */ + mrs ^= 1 << 7; // A7 - VREFDQ Training Enable + ccs_add_mrs(id, mrs, rank, mirrored, tVREFDQ_E_X); + + /* TODO: maybe drop it, next ccs_phy_hw_step() would call it anyway. */ + //ccs_execute(id, mca_i); + + /* End of VREF Latching, beginning of Write Centering pre-workaround */ + + /* + * DRAM is one IC on the DIMM module, there are 9 DRAMs for x8 and 18 for + * x4 devices (DQ bits/width) per rank. Before centering the delays are the + * same for each DQ of a given DRAM, meaning it is enough to save just one + * value per DRAM. For simplicity, save every 4th DQ even on x8 devices. + */ + for (dram = 0; dram < ARRAY_SIZE(write_delays); dram++) { + int dp = (dram * 4) / 16; + int val_idx = (dram * 4) % 16; + const uint64_t rp_mul = 0x0000010000000000; + const uint64_t val_mul = 0x0000000100000000; + /* IOM0.DDRPHY_DP16_WR_DELAY_VALUE__RP_REG_P0_ */ + uint64_t val = dp_mca_read(id, dp, mca_i, + DDRPHY_DP16_WR_DELAY_VALUE_0_RP0_REG_P0_0 + + rp * rp_mul + val_idx * val_mul); + write_delays[dram] = (uint16_t) val; + } +} + +static uint64_t write_ctr_time(mca_data_t *mca) +{ + /* + * "1000 + (NUM_VALID_SAMPLES * (FW_WR_RD + FW_RD_WR + 16) * + * (1024/(SMALL_STEP +1) + 128/(BIG_STEP +1)) + 2 * (BIG_STEP+1)/(SMALL_STEP+1)) * 24 + * DRAM clocks per rank pair." + * + * Yes, write leveling values are used for write centering, this is not an + * error (or is it? CONFIG0 says BIG_STEP = 1) + * WR_LVL_BIG_STEP = 7 + * WR_LVL_SMALL_STEP = 0 + * WR_LVL_NUM_VALID_SAMPLES = 5 + * + * "Per PHY spec, defaults to 0. Would need an attribute to drive differently" + * FW_WR_RD = 0 + * + * "From the PHY spec. Also confirmed with S. Wyatt as this is different + * than the calculation used in Centaur. This field must be set to the + * larger of the two values in number of memory clock cycles. + * FW_RD_WR = max(tWTR + 11, AL + tRTP + 3) + * Note from J. Bialas: The difference between tWTR_S and tWTR_L is that _S + * is write to read time to different bank groups, while _L is to the same. + * The algorithm should be switching bank groups so tWTR_S can be used" + * + * tRTP = 7.5ns (this comes from DDR4 spec) + * AL = 0 + * + * For tWTR_S = 2.5ns this should give ~2.9-4.5ms, + 2 * 3 * 150ns from MRS + * commands in pre-workaround (insignificantly small compared to total time). + * In tests this is ~7.5ms, with 10.5ms timeout, mostly because the equation + * below probably doesn't account for REF commands. This leaves rather small + * margin for error. + */ + const int big_step = 7; + const int small_step = 0; + const int num_valid_samples = 5; + int fw_rd_wr = MAX(mca->nwtr_s + 11, ps_to_nck(7500) + 3); + return 1000 + (num_valid_samples * (fw_rd_wr + 16) * + (1024/(small_step + 1) + 128/(big_step + 1)) + + 2 * (big_step + 1)/(small_step + 1)) * 24; +} + +static void write_ctr_post(int mcs_i, int mca_i, int rp, + enum rank_selection ranks_present) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + int dp; + uint64_t bad_bits = 0; + + /* + * TODO: this just tests if workaround is needed, real workaround is not + * yet implemented. + */ + for (dp = 0; dp < 5; dp++) { + bad_bits |= dp_mca_read(id, dp, mca_i, DDRPHY_DP16_DQ_BIT_DISABLE_RP0_P0_0); + } + + if (!bad_bits) + return; + + /* + * Full workaround consists of: + * - enabling PDA mode (per DRAM addressing) on MC + * - reverting initial WR Vref values in MC + * - reverting WR delays saved in pre-workaround + * - clearing bad DQ bits (because this calibration step will be re-run) + * - entering PDA mode on DRAMs + * - reverting initial VREFDQ values in bad DRAM(s) + * - exiting PDA mode on DRAMs (this point has its own workaround) + * - exiting PDA mode on MC + * - finding a median of RD Vref DAC values and disabling all DQ bits except + * one known to be good (close to median) + * - rerunning main calibration, exit on success + * - if it still fails, re-enable all DQ bits (bad and good), set 1D only + * write centering and rerun again + */ + die("Write Centering post-workaround required, but not yet implemented\n"); +} + +static uint64_t coarse_wr_rd_time(mca_data_t *mca) +{ + /* + * "40 cycles for WR, 32 for RD" + * + * With number of cycles set to just the above this step times out, add time + * for 15 REF commands as set in dqs_align_turn_on_refresh(). + */ + return 40 + 32 + 15 * 512; +} + +typedef void (phy_workaround_t) (int mcs_i, int mca_i, int rp, + enum rank_selection ranks_present); + +struct phy_step { + const char *name; + enum cal_config cfg; + phy_workaround_t *pre; + uint64_t (*time)(mca_data_t *mca); + phy_workaround_t *post; +}; + +static struct phy_step steps[] = { + { + "Write Leveling", + CAL_WR_LEVEL, + wr_level_pre, + wr_level_time, + wr_level_post, + }, + { + "Initial Pattern Write", + CAL_INITIAL_PAT_WR, + NULL, + initial_pat_wr_time, + NULL, + }, + { + "DQS alignment", + CAL_DQS_ALIGN, + NULL, + dqs_align_time, + NULL, + }, + { + "Read Clock Alignment", + CAL_RDCLK_ALIGN, + rdclk_align_pre, + rdclk_align_time, + rdclk_align_post, + }, + { + "Read Centering", + CAL_READ_CTR, + read_ctr_pre, + read_ctr_time, + read_ctr_post, + }, + { + "Write Centering", + CAL_WRITE_CTR, + write_ctr_pre, + write_ctr_time, + write_ctr_post, + }, + { + "Coarse write/read", + CAL_INITIAL_COARSE_WR | CAL_COARSE_RD, + NULL, + coarse_wr_rd_time, + NULL, + }, + +/* + // Following are performed in istep 13.12 + CAL_CUSTOM_RD + CAL_CUSTOM_WR +*/ +}; + +static void dispatch_step(struct phy_step *step, int mcs_i, int mca_i, int rp, + enum rank_selection ranks_present) +{ + mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + printk(BIOS_DEBUG, "%s starting\n", step->name); + + if (step->pre) + step->pre(mcs_i, mca_i, rp, ranks_present); + + ccs_phy_hw_step(mcs_ids[mcs_i], mca_i, rp, step->cfg, step->time(mca)); + + if (step->post) + step->post(mcs_i, mca_i, rp, ranks_present); + + dump_cal_errors(mcs_i, mca_i); + + if (mca_read(mcs_ids[mcs_i], mca_i, DDRPHY_PC_INIT_CAL_ERROR_P0) != 0) + die("%s failed, aborting\n", step->name); + + printk(BIOS_DEBUG, "%s done\n", step->name); +} + +/* Can we modify dump_cal_errors() for this? */ +static int process_initial_cal_errors(int mcs_i, int mca_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + int dp; + uint64_t err = 0; + + for (dp = 0; dp < 5; dp++) { + /* IOM0.DDRPHY_DP16_RD_VREF_CAL_ERROR_P0_n */ + err |= dp_mca_read(id, dp, mca_i, DDRPHY_DP16_RD_VREF_CAL_ERROR_P0_0); + + /* Both ERROR_MASK registers were set to 0xFFFF in 13.8 */ + /* IOM0.DDRPHY_DP16_WR_VREF_ERROR0_P0_n & + * ~IOM0.DDRPHY_DP16_WR_VREF_ERROR_MASK0_P0_n */ + err |= (dp_mca_read(id, dp, mca_i, DDRPHY_DP16_WR_VREF_ERROR0_P0_0) & + ~dp_mca_read(id, dp, mca_i, DDRPHY_DP16_WR_VREF_ERROR_MASK0_P0_0)); + + /* IOM0.DDRPHY_DP16_WR_VREF_ERROR1_P0_n & + * ~IOM0.DDRPHY_DP16_WR_VREF_ERROR_MASK1_P0_n */ + err |= (dp_mca_read(id, dp, mca_i, DDRPHY_DP16_WR_VREF_ERROR1_P0_0) & + ~dp_mca_read(id, dp, mca_i, DDRPHY_DP16_WR_VREF_ERROR_MASK1_P0_0)); + } + + /* IOM0.DDRPHY_PC_INIT_CAL_ERROR_P0 */ + err |= mca_read(id, mca_i, DDRPHY_PC_INIT_CAL_ERROR_P0); + + if (err) + return 1; + + /* + * err == 0 at this point can be either a true success or an error of the + * calibration engine itself. Check for latter. + */ + /* IOM0.IOM_PHY0_DDRPHY_FIR_REG */ + if (read_scom_for_chiplet(id, IOM_PHY0_DDRPHY_FIR_REG) & + PPC_BIT(IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_2)) { + /* + * "Clear the PHY FIR ERROR 2 bit so we don't keep failing training and + * training advance on this port" + */ + scom_and_or_for_chiplet(id, IOM_PHY0_DDRPHY_FIR_REG, + ~PPC_BIT(IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_2), + 0); + + return 1; + } + + return 0; +} + +static int can_recover(int mcs_i, int mca_i, int rp) +{ + /* + * We can recover from 1 nibble + 1 bit (or less) bad lines. Anything more + * and DIMM is beyond repair. A bad nibble is a nibble with any number of + * bad bits. If a DQS is bad (either true or complementary signal, or both), + * a whole nibble (for x4 DRAMs) or byte (x8) is considered bad. + * + * Check both DQS and DQ registers in one loop, iterating over DP16s - that + * way it is easier to sum bad bits/nibbles. + * + * See reset_clock_enable() in 13.8 or an array in process_bad_bits() in + * phy/dp16.C for mapping of DQS bits in x8 and mask bits from this register + * accordingly. + */ + int bad_nibbles = 0; + int bad_bits = 0; + int dp; + chiplet_id_t id = mcs_ids[mcs_i]; + uint8_t width = mem_data.mcs[mcs_i].mca[mca_i].dimm[rp/2].width; + + for (dp = 0; dp < 5; dp++) { + uint64_t reg; + uint64_t nibbles_mask = 0xFFFF; + /* + IOM0.DDRPHY_DP16_DQS_BIT_DISABLE_RP_P0_{0-4}: + // This calculates how many (DQS_t | DQS_c) failed - if _t and _c failed + // for the same DQS, we count it as one. + bad_dqs = bit_count((reg & 0x5500) | ((reg & 0xaa00) >> 1)) + if x8 && bad_dqs > 0: DIMM is FUBAR, return error + total_bad_nibbles += bad_dqs + // If we are already past max possible number, we might as well return now + if total_bad_nibbles > 1: DIMM is FUBAR, return error + */ + const uint64_t rp_mul = 0x0000010000000000; + reg = dp_mca_read(id, dp, mca_i, + DDRPHY_DP16_DQS_BIT_DISABLE_RP0_P0_0 + rp * rp_mul); + + /* One bad DQS on x8 is already bad 2 nibbles, can't recover from that. */ + if (reg != 0 && width == WIDTH_x8) + return 0; + + if (reg & (PPC_BIT(48) | PPC_BIT(49))) // quad 0 + nibbles_mask &= 0x0FFF; + if (reg & (PPC_BIT(50) | PPC_BIT(51))) // quad 1 + nibbles_mask &= 0xF0FF; + if (reg & (PPC_BIT(52) | PPC_BIT(53))) // quad 2 + nibbles_mask &= 0xFF0F; + if (reg & (PPC_BIT(54) | PPC_BIT(55))) // quad 3 + nibbles_mask &= 0xFFF0; + + bad_nibbles += __builtin_popcount((reg & 0x5500) | ((reg & 0xAA00) >> 1)); + + /* + IOM0.DDRPHY_DP16_DQ_BIT_DISABLE_RP_P0_{0-4}: + nibble = {[48-51], [52-55], [56-59], [60-63]} + for each nibble: + if bit_count(nibble) > 1: total_bad_nibbles += 1 + if bit_count(nibble) == 1: total_bad_bits += 1 + // We can't have two bad bits, one of them must be treated as bad nibble + if total_bad_bits > 1: total_bad_nibbles += 1, total_bad_bits -= 1 + if total_bad_nibbles > 1: DIMM is FUBAR, return error? + */ + reg = dp_mca_read(id, dp, mca_i, + DDRPHY_DP16_DQ_BIT_DISABLE_RP0_P0_0 + rp * rp_mul); + + /* Exclude nibbles corresponding to a bad DQS, it won't get worse. */ + reg &= nibbles_mask; + + /* Add bits in nibbles */ + reg = ((reg & 0x1111) >> 0) + ((reg & 0x2222) >> 1) + + ((reg & 0x4444) >> 2) + ((reg & 0x8888) >> 3); + + /* + * We only care if there is 0, 1 or more bad bits. Collapse bits [0-2] + * of each nibble into [2], leave [3] unmodified (PPC bit numbering). + */ + reg = ((reg & 0x1111) >> 0) | ((reg & 0x2222) >> 0) | + ((reg & 0x4444) >> 1) | ((reg & 0x8888) >> 2); + + /* Clear bit [3] if [2] is also set. */ + reg = (reg & 0x2222) | ((reg & 0x1111) & ~((reg & 0x2222) >> 1)); + + /* Now [2] is bad nibble, [3] is exactly one bad bit */ + bad_bits += __builtin_popcount(reg & 0x1111); + if (bad_bits > 1) { + bad_nibbles += bad_bits - 1; + bad_bits = 1; + } + bad_nibbles += __builtin_popcount(reg & 0x2222); + + /* No need to test for bad single bits, condition above handles it */ + if (bad_nibbles > 1) + return 0; + } + + /* + * Now, if total_bad_nibbles is less than 2 we know that total_bad_bits is + * also less than 2, and DIMM is good enough for recovery. + */ + printk(BIOS_WARNING, "MCS%d MCA%d DIMM%d has %d bad nibble(s) and %d bad " + "bit(s), but can be recovered\n", mcs_i, mca_i, rp/2, bad_nibbles, + bad_bits); + return 1; +} + +static void fir_unmask(int mcs_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + int mca_i; + + /* + * "All mcbist attentions are already special attentions" + MC01.MCBIST.MBA_SCOMFIR.MCBISTFIRACT0 + [1] MCBISTFIRQ_COMMAND_ADDRESS_TIMEOUT = 0 + MC01.MCBIST.MBA_SCOMFIR.MCBISTFIRACT1 + [1] MCBISTFIRQ_COMMAND_ADDRESS_TIMEOUT = 1 + MC01.MCBIST.MBA_SCOMFIR.MCBISTFIRMASK + [1] MCBISTFIRQ_COMMAND_ADDRESS_TIMEOUT = 0 //recoverable_error (0,1,0) + */ + scom_and_or_for_chiplet(id, MCBISTFIRACT0, + ~PPC_BIT(MCBISTFIRQ_COMMAND_ADDRESS_TIMEOUT), + 0); + scom_and_or_for_chiplet(id, MCBISTFIRACT1, + ~PPC_BIT(MCBISTFIRQ_COMMAND_ADDRESS_TIMEOUT), + PPC_BIT(MCBISTFIRQ_COMMAND_ADDRESS_TIMEOUT)); + scom_and_or_for_chiplet(id, MCBISTFIRMASK, + ~PPC_BIT(MCBISTFIRQ_COMMAND_ADDRESS_TIMEOUT), + 0); + + for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { + if (!mem_data.mcs[mcs_i].mca[mca_i].functional) + continue; + + /* + MC01.PORT0.SRQ.MBACALFIR_ACTION0 + [2] MBACALFIR_MASK_REFRESH_OVERRUN = 0 + [5] MBACALFIR_MASK_DDR_CAL_TIMEOUT_ERR = 0 + [7] MBACALFIR_MASK_DDR_CAL_RESET_TIMEOUT = 0 + [9] MBACALFIR_MASK_WRQ_RRQ_HANG_ERR = 0 + [11] MBACALFIR_MASK_ASYNC_IF_ERROR = 0 + [12] MBACALFIR_MASK_CMD_PARITY_ERROR = 0 + [14] MBACALFIR_MASK_RCD_CAL_PARITY_ERROR = 0 + MC01.PORT0.SRQ.MBACALFIR_ACTION1 + [2] MBACALFIR_MASK_REFRESH_OVERRUN = 1 + [5] MBACALFIR_MASK_DDR_CAL_TIMEOUT_ERR = 1 + [7] MBACALFIR_MASK_DDR_CAL_RESET_TIMEOUT = 1 + [9] MBACALFIR_MASK_WRQ_RRQ_HANG_ERR = 1 + [11] MBACALFIR_MASK_ASYNC_IF_ERROR = 0 + [12] MBACALFIR_MASK_CMD_PARITY_ERROR = 0 + [14] MBACALFIR_MASK_RCD_CAL_PARITY_ERROR = 1 + MC01.PORT0.SRQ.MBACALFIR_MASK + [2] MBACALFIR_MASK_REFRESH_OVERRUN = 0 // recoverable_error (0,1,0) + [5] MBACALFIR_MASK_DDR_CAL_TIMEOUT_ERR = 0 // recoverable_error (0,1,0) + [7] MBACALFIR_MASK_DDR_CAL_RESET_TIMEOUT = 0 // recoverable_error (0,1,0) + [9] MBACALFIR_MASK_WRQ_RRQ_HANG_ERR = 0 // recoverable_error (0,1,0) + [11] MBACALFIR_MASK_ASYNC_IF_ERROR = 0 // checkstop (0,0,0) + [12] MBACALFIR_MASK_CMD_PARITY_ERROR = 0 // checkstop (0,0,0) + [14] MBACALFIR_MASK_RCD_CAL_PARITY_ERROR = 0 // recoverable_error (0,1,0) + */ + mca_and_or(id, mca_i, MBACALFIR_ACTION0, + ~(PPC_BIT(MBACALFIR_REFRESH_OVERRUN) | + PPC_BIT(MBACALFIR_DDR_CAL_TIMEOUT_ERR) | + PPC_BIT(MBACALFIR_DDR_CAL_RESET_TIMEOUT) | + PPC_BIT(MBACALFIR_WRQ_RRQ_HANG_ERR) | + PPC_BIT(MBACALFIR_ASYNC_IF_ERROR) | + PPC_BIT(MBACALFIR_CMD_PARITY_ERROR) | + PPC_BIT(MBACALFIR_RCD_CAL_PARITY_ERROR)), + 0); + mca_and_or(id, mca_i, MBACALFIR_ACTION1, + ~(PPC_BIT(MBACALFIR_REFRESH_OVERRUN) | + PPC_BIT(MBACALFIR_DDR_CAL_TIMEOUT_ERR) | + PPC_BIT(MBACALFIR_DDR_CAL_RESET_TIMEOUT) | + PPC_BIT(MBACALFIR_WRQ_RRQ_HANG_ERR) | + PPC_BIT(MBACALFIR_ASYNC_IF_ERROR) | + PPC_BIT(MBACALFIR_CMD_PARITY_ERROR) | + PPC_BIT(MBACALFIR_RCD_CAL_PARITY_ERROR)), + PPC_BIT(MBACALFIR_REFRESH_OVERRUN) | + PPC_BIT(MBACALFIR_DDR_CAL_TIMEOUT_ERR) | + PPC_BIT(MBACALFIR_DDR_CAL_RESET_TIMEOUT) | + PPC_BIT(MBACALFIR_WRQ_RRQ_HANG_ERR) | + PPC_BIT(MBACALFIR_RCD_CAL_PARITY_ERROR)); + mca_and_or(id, mca_i, MBACALFIR_MASK, + ~(PPC_BIT(MBACALFIR_REFRESH_OVERRUN) | + PPC_BIT(MBACALFIR_DDR_CAL_TIMEOUT_ERR) | + PPC_BIT(MBACALFIR_DDR_CAL_RESET_TIMEOUT) | + PPC_BIT(MBACALFIR_WRQ_RRQ_HANG_ERR) | + PPC_BIT(MBACALFIR_ASYNC_IF_ERROR) | + PPC_BIT(MBACALFIR_CMD_PARITY_ERROR) | + PPC_BIT(MBACALFIR_RCD_CAL_PARITY_ERROR)), + 0); + } +} + +/* + * 13.11 mss_draminit_training: Dram training + * + * a) p9_mss_draminit_training.C (mcbist) -- Nimbus + * b) p9c_mss_draminit_training.C (mba) -- Cumulus + * - Prior to running this procedure will apply known DQ bad bits to prevent + * them from participating in training. This information is extracted from + * the bad DQ attribute and applied to Hardware + * - Marks the calibration fail array + * - External ZQ Calibration + * - Execute initial dram calibration (7 step - handled by HW) + * - This procedure will update the bad DQ attribute for each dimm based on + * its findings + */ +void istep_13_11(void) +{ + printk(BIOS_EMERG, "starting istep 13.11\n"); + int mcs_i, mca_i, dimm, rp; + enum rank_selection ranks_present; + + report_istep(13, 11); + + for (mcs_i = 0; mcs_i < MCS_PER_PROC; mcs_i++) { + if (!mem_data.mcs[mcs_i].functional) + continue; + + for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { + mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + + if (!mca->functional) + continue; + + ranks_present = NO_RANKS; + for (dimm = 0; dimm < DIMMS_PER_MCA; dimm++) { + if (!mca->dimm[dimm].present) + continue; + + if (mca->dimm[dimm].mranks == 2) + ranks_present |= DIMM0_ALL_RANKS << (2 * dimm); + else + ranks_present |= DIMM0_RANK0 << (2 * dimm); + + setup_and_execute_zqcal(mcs_i, mca_i, dimm); + } + + /* IOM0.DDRPHY_PC_INIT_CAL_CONFIG0_P0 = 0 */ + mca_and_or(mcs_ids[mcs_i], mca_i, DDRPHY_PC_INIT_CAL_CONFIG0_P0, 0, 0); + + /* + * > Disable port fails as it doesn't appear the MC handles initial + * > cal timeouts correctly (cal_length.) BRS, see conversation with + * > Brad Michael + MC01.PORT0.SRQ.MBA_FARB0Q = + [57] MBA_FARB0Q_CFG_PORT_FAIL_DISABLE = 1 + */ + mca_and_or(mcs_ids[mcs_i], mca_i, MBA_FARB0Q, ~0, + PPC_BIT(MBA_FARB0Q_CFG_PORT_FAIL_DISABLE)); + + /* + * > The following registers must be configured to the correct + * > operating environment: + * > These are reset in phy_scominit + * > Section 5.2.5.10 SEQ ODT Write Configuration {0-3} on page 422 + * > Section 5.2.6.1 WC Configuration 0 Register on page 434 + * > Section 5.2.6.2 WC Configuration 1 Register on page 436 + * > Section 5.2.6.3 WC Configuration 2 Register on page 438 + * + * It would be nice to have the documentation mentioned above or at + * least know what it is about... + */ + + clear_initial_cal_errors(mcs_i, mca_i); + dp16_reset_delay_values(mcs_i, mca_i, ranks_present); + dqs_align_turn_on_refresh(mcs_i, mca_i); + + /* + * List of calibration steps for RDIMM, in execution order: + * - ZQ calibration - calibrates DRAM output driver and on-die termination + * values (already done) + * - Write leveling - compensates for skew caused by a fly-by topology + * - Initial pattern write - not exactly a calibration, but prepares patterns + * for next steps + * - DQS align + * - RDCLK align + * - Read centering + * - Write Vref latching - not exactly a calibration, but required for next + * steps; there is no help from PHY for that but it is simple to do + * manually + * - Write centering + * - Coarse write/read + * - Custom read and/or write centering - performed in istep 13.12 + * Some of these steps have pre- or post-workarounds, or both. + * + * All of those steps (except ZQ calibration) are executed for each rank pair + * before going to the next pair. Some of them require that there is no other + * activity on the controller so parallelization may not be possible. + * + * Quick reminder from set_rank_pairs() in 13.8 (RDIMM only): + * - RP0 primary - DIMM 0 rank 0 + * - RP1 primary - DIMM 0 rank 1 + * - RP2 primary - DIMM 1 rank 0 + * - RP3 primary - DIMM 1 rank 1 + */ + for (rp = 0; rp < 4; rp++) { + if (!(ranks_present & (1 << rp))) + continue; + + dump_cal_errors(mcs_i, mca_i); + + for (int i = 0; i < ARRAY_SIZE(steps); i++) + dispatch_step(&steps[i], mcs_i, mca_i, rp, ranks_present); + + if (process_initial_cal_errors(mcs_i, mca_i) && + !can_recover(mcs_i, mca_i, rp)) { + die("Calibration failed for MCS%d MCA%d DIMM%d\n", mcs_i, mca_i, rp/2); + } + } + + /* Does not apply to DD2.* */ + //workarounds::dp16::modify_calibration_results(); + } + + /* + * Hostboot just logs the errors reported earlier (i.e. more than + * 1 nibble + 1 bit of bad DQ lines) "and lets PRD deconfigure based off + * of ATTR_BAD_DQ_BITMAP". + * TODO: what is PRD? How does it "deconfigure" and what? Quick glance + * at the code: it may have something to do with undocumented 0x0501082X + * SCOM registers, there are usr/diag/prdf/*//*.rule files with + * yacc/flex files to compile them. It also may be using 'attn' + * instruction. + */ + + fir_unmask(mcs_i); + } + + printk(BIOS_EMERG, "ending istep 13.11\n"); +} diff --git a/src/soc/ibm/power9/romstage.c b/src/soc/ibm/power9/romstage.c index 72e7103eb52..76d4411b169 100644 --- a/src/soc/ibm/power9/romstage.c +++ b/src/soc/ibm/power9/romstage.c @@ -343,6 +343,7 @@ void main(void) istep_13_8(); istep_13_9(); istep_13_10(); + istep_13_11(); /* Test if SCOM still works. Maybe should check also indirect access? */ printk(BIOS_DEBUG, "0xF000F = %llx\n", read_scom(0xF000F)); From 580dcd7bf7617ff307914613e0291bc1f9511269 Mon Sep 17 00:00:00 2001 From: Krystian Hebel Date: Fri, 16 Apr 2021 15:30:19 +0200 Subject: [PATCH 037/213] soc/power9/istep_13_13.c: implementation 13.13 mss_draminit_mc: Hand off control to MC a) p9_mss_draminit_mc.C (mcbist) - Nimbus b) p9c_mss_draminit_mc.C (membuf) -Cumulus - P9 Cumulus -- Set IML complete bit in centaur - Start main refresh engine - Refresh, periodic calibration, power controls - Turn on ECC checking on memory accesses - Note at this point memory FIRs can be monitored by PRD Signed-off-by: Krystian Hebel Change-Id: Ieb485bcae3c6d00161790becdf74090659d2a838 --- src/include/cpu/power/istep_13.h | 5 + src/soc/ibm/power9/Makefile.inc | 1 + src/soc/ibm/power9/istep_13_13.c | 719 +++++++++++++++++++++++++++++++ src/soc/ibm/power9/romstage.c | 2 + 4 files changed, 727 insertions(+) create mode 100644 src/soc/ibm/power9/istep_13_13.c diff --git a/src/include/cpu/power/istep_13.h b/src/include/cpu/power/istep_13.h index 205e51da87e..4953a6bbfcb 100644 --- a/src/include/cpu/power/istep_13.h +++ b/src/include/cpu/power/istep_13.h @@ -13,6 +13,10 @@ #define DIMMS_PER_MCS (DIMMS_PER_MCA * MCA_PER_MCS) /* These should be in one of the SPD headers. */ +/* + * Note: code in 13.3 depends on width/density having values as encoded in SPD + * and below. Please do not change them. + */ #define WIDTH_x4 0 #define WIDTH_x8 1 @@ -304,3 +308,4 @@ void istep_13_8(void); // TODO: takes epsilon values from 8.6 and MSS data from void istep_13_9(void); void istep_13_10(void); void istep_13_11(void); +void istep_13_13(void); diff --git a/src/soc/ibm/power9/Makefile.inc b/src/soc/ibm/power9/Makefile.inc index a6b068fe88d..6c1fb213511 100644 --- a/src/soc/ibm/power9/Makefile.inc +++ b/src/soc/ibm/power9/Makefile.inc @@ -15,6 +15,7 @@ romstage-y += istep_13_8.c romstage-y += istep_13_9.c romstage-y += istep_13_10.c romstage-y += istep_13_11.c +romstage-y += istep_13_13.c romstage-y += i2c.c romstage-y += ccs.c ramstage-y += chip.c diff --git a/src/soc/ibm/power9/istep_13_13.c b/src/soc/ibm/power9/istep_13_13.c new file mode 100644 index 00000000000..266e59870ad --- /dev/null +++ b/src/soc/ibm/power9/istep_13_13.c @@ -0,0 +1,719 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include + +#include "istep_13_scom.h" + +/* + * Set up the MC port <-> DIMM address translation registers. + * + * These are not documented in specs, everything described here comes from the + * code (and comments). Depending on how you count them, there are 2 or 3 base + * configurations, and the rest is a modification of one of the bases or its + * derived forms. Each level usually adds one row bit, but sometimes it removes + * it or modifies rank bits. In most cases when it happens, the rest of bits + * must be shifted. + * + * There are two pairs of identical settings for each master/slave rank + * configurations: 4Gb x4 is always the same as 8Gb x8, and 8Gb x4 is the same + * as 16Gb x8. + * + * Base configurations are: + * - 1 rank, non-3DS 4Gb x4 and second DIMM is also 1R (not necessarily 4Gb x4) + * - special case when the other DIMM is not an 1R device (because of allowed + * DIMM mixing this can only mean tha the other slot is not populated) + * - 2 rank, non-3DS 4Gb x4 + * + * The special case uses different column, bank and bank group addressing, the + * other two cases use identical mapping. This is due to the fact that for one + * 1R DIMM there is no port address bit with index 7, which is used as C9 in + * other cases. Hostboot divides those cases as listed above, but it might make + * more sense to separate special case and use uniform logic for the rest. + * However, for two 1R DIMMs port address 29 is always assigned to D-bit (more + * about it later), because bit map fields for rows use only 3 bit for encoding, + * meaning that only port bits 0-7 can be mapped to row bits 15-17. + * + * According to code, port addresses 0-7 and 22-32 can be configured in the + * register - 19 possibilities total, encoded, so bit field in register is 5 + * bits long, except for row bitmaps, which are only 3 bits long (addresses 0-7 + * only). Column, bank and bank group addressing is always the same (DDR4 always + * has 10 column, 2 bank, 2 bank group bits), difference is in row bit mapping + * (we may or may not have bits 15, 16 and 17, those are indexed from 0 so we + * can have 15-18 row bits in total) and ranks mapping (up to 2 bits for master + * ranks, up to 3 bits for slave ranks in 3DS devices). + * + * TODO: what about 16Gb bank groups? Those should use just 1 bit, but the code + * doesn't change it. + * + * Apart from already mentioned bits there is also D-bit (D is short for DIMM). + * It is used to tell the controller which DIMM has to be accessed. To avoid + * holes in the memory map, larger DIMM must be mapped to lower addresses. For + * example, when we have 4GB and 8GB DIMMs: + * + * 0 4 8 12 16 ... memory space, in GB + * | DIMM X |DIMM Y | hole | ... <- this is good + * |DIMM Y | hole | DIMM X | ... <- this is bad + * + * Whether DIMM X is in DIMM0 or DIMM1 slot doesn't matter. The example is + * simplified - the addresses do not translate directly to CPU address space. + * There are multiple MCAs in the system, they are grouped together later in + * 14.5, based on the mappings calculated in 7.4. + * + * There are two pieces to configure for D-bit: + * - D_BIT_MAP - which address bit is used to decide which DIMM is used (this + * corresponds to 8GB in the example above), this is common to both DIMMs, + * - SLOTn_D_VALUE - what value should the D-bit have to access DIMMn, each DIMM + * has its own SLOTn_D_VALUE, when one DIMM has this bit set, the other one + * must have it cleared; in the (good) example above DIMM Y should have this + * bit set. When both DIMMs have the same size then only one D_VALUE must be + * set, but it doesn't matter which one. + * + * TODO: what if only one DIMM is present? Do we have to set these to something + * sane (0 and 0 should work) or is it enough that VALID bit is clear for the + * other DIMM? + * + * If bits are assigned in a proper order, we can use a constant table with + * mappings and assign values from that table to registers describing address + * bits in a sparse manner, depending on a number of rank and row bits used by + * a given DIMM. The order is based on the cost of changing individual bits on + * the DIMM side (considering data locality): + * 1. Bank group, bank and column bits are sent with every read/write command. + * It takes RL = AL + CL + PL after the read command until first DQ bits + * appear on the bus. In practice we usually don't care about write delays, + * when data is sent to the controller the CPU can already execute further + * code, it doesn't have to wait until it is actually written to DRAM. This + * is a cheap change. + * TODO: this is for DRAM, any additional delay caused by RCD, PHY or MC? + * 2. Ranks (both master and slave) are selected by CS (and Chip ID for slave + * ranks) bits, they are also sent with each command. Depending on MR4 + * settings, we may need to wait for additional tCAL (CS to Command Address + * Latency), DRAM needs some time to "wake up" before it can parse commands. + * If tCAL is not used (default in Hostboot), the cost is the same as for BG, + * BA, column bits. It doesn't matter whether master or slave ranks are + * assigned first, but Hostboot starts with slave ranks - it has 5 bits per + * bit map, so it can encode higher numbers. + * 3. Row bits - these are expensive. Row must be activated before its column + * are accessed. Each bank can have one activated row at a time. If there was + * an open row (different than the one we want to access), it must be + * precharged (it takes tRP before next activation command can be issued), + * and then the new row can be activated (after which we have to wait for + * tRCD before sending the read/write command). A row cannot be opened + * indefinitely, there is both a minimal and maximal period between ACT and + * PRE commands (tRAS), and minimums for read to precharge (tRTP), ACT to ACT + * for different banks (tRRD, with differentiation between the same and + * different bank groups) and Four Activate Window (tFAW). When row changes + * don't happen too often, we usually have to wait for tRCD and sometimes + * also tRP, on top of the previous delays. + * 4. D bit. Two DIMMs on a channel share all of its lines except CLK, CS, ODT + * and CKE bits. Because we don't have to change CS for a given DIMM, the + * cost is the same as 1 (assuming hardware holds CS between commands). + * However, this bit has to be assigned lastly (i.e. it has to be the most + * significant port address bit) to not introduce holes in the memory space + * for two differently sized DIMMs. + * * TODO: can we safely map it closer to LSB (at least before row bits) + * when we have two DIMMs with the same size? + * + * TODO: what about bad DQ bits? Do they impact this in any way? Probably not, + * unless a whole DIMM is disabled. + * + * Below are registers layouts reconstructed from + * import/chips/p9/common/include/p9n2_mc_scom_addresses_fld.H: + * 0x05010820 // P9N2_MCS_PORT02_MCP0XLT0, also PORT13 on +0x10 SCOM addresses + * [0] SLOT0_VALID // set if DIMM present + * [1] SLOT0_D_VALUE // set if both DIMMs present and size of DIMM1 > DIMM0 + * [2] 12GB_ENABLE // unused (maybe for 12Gb/24Gb DRAM?) + * [5] SLOT0_M0_VALID + * [6] SLOT0_M1_VALID + * [9] SLOT0_S0_VALID + * [10] SLOT0_S1_VALID + * [11] SLOT0_S2_VALID + * [12] SLOT0_B2_VALID // Hmmm... + * [13] SLOT0_ROW15_VALID + * [14] SLOT0_ROW16_VALID + * [15] SLOT0_ROW17_VALID + * [16] SLOT1_VALID // set if DIMM present + * [17] SLOT1_D_VALUE // set if both DIMMs present and size of DIMM1 <= DIMM0 + * [21] SLOT1_M0_VALID + * [22] SLOT1_M1_VALID + * [25] SLOT1_S0_VALID + * [26] SLOT1_S1_VALID + * [27] SLOT1_S2_VALID + * [28] SLOT1_B2_VALID // Hmmm... + * [29] SLOT1_ROW15_VALID + * [30] SLOT1_ROW16_VALID + * [31] SLOT1_ROW17_VALID + * [35-39] D_BIT_MAP + * [41-43] M0_BIT_MAP // 3b for M0 but 5b for M1 + * [47-51] M1_BIT_MAP + * [53-55] R17_BIT_MAP + * [57-59] R16_BIT_MAP + * [61-63] R15_BIT_MAP + * + * 0x05010821 // P9N2_MCS_PORT02_MCP0XLT1 + * [3-7] S0_BIT_MAP + * [11-15] S1_BIT_MAP + * [19-23] S2_BIT_MAP + * [35-39] COL4_BIT_MAP + * [43-47] COL5_BIT_MAP + * [51-55] COL6_BIT_MAP + * [59-63] COL7_BIT_MAP + * + * 0x05010822 // P9N2_MCS_PORT02_MCP0XLT2 + * [3-7] COL8_BIT_MAP + * [11-15] COL9_BIT_MAP + * [19-23] BANK0_BIT_MAP + * [27-31] BANK1_BIT_MAP + * [35-39] BANK2_BIT_MAP // Hmmm... + * [43-47] BANK_GROUP0_BIT_MAP + * [51-55] BANK_GROUP1_BIT_MAP + * + * All *_BIT_MAP fields above are encoded. Note that some of them are 3b long, + * those can map only PA 0 through 7. + */ + +static uint64_t dimms_rank_config(mca_data_t *mca, uint64_t xlt0, int update_d_bit) +{ + uint64_t val = 0; + int me; + int max_row_bits = 0; + + for (me = 0; me < DIMMS_PER_MCA; me++) { + if (mca->dimm[me].present) { + int other = me ^ 1; + int height = mca->dimm[me].log_ranks / mca->dimm[me].mranks; + /* + * Note: this depends on width/density having values as encoded in + * SPD and istep_13.h. Please do not change them. + */ + int row_bits = 12 + mca->dimm[me].density - mca->dimm[me].width; + if (row_bits > max_row_bits) + max_row_bits = row_bits; + + val |= PPC_BIT(0 + 16*me); + + /* When mixing rules are followed, bigger density = bigger size */ + if (mca->dimm[other].present && + mca->dimm[other].density > mca->dimm[me].density) + val |= PPC_BIT(1 + 16*me); + + /* M1 is used first, then M0 */ + if (mca->dimm[me].mranks > 1) + val |= PPC_BIT(6 + 16*me); + + if (mca->dimm[me].mranks > 2) + val |= PPC_BIT(5 + 16*me); + + /* Same with S2, S1, S0 */ + if (height > 1) + val |= PPC_BIT(11 + 16*me); + + if (height > 2) + val |= PPC_BIT(10 + 16*me); + + if (height > 4) + val |= PPC_BIT(9 + 16*me); + + /* Row bits */ + if (row_bits > 15) + val |= PPC_BIT(13 + 16*me); + + if (row_bits > 16) + val |= PPC_BIT(14 + 16*me); + + if (row_bits > 17) + val |= PPC_BIT(15 + 16*me); + } + } + + /* When both DIMMs are present and have the same sizes, D_VALUE was not set. */ + if (mca->dimm[0].density == mca->dimm[1].density) + val |= PPC_BIT(1); + + val |= xlt0; + + if (update_d_bit) { + /* + * In order for this to work: + * - old D-bit must have the value it would have for 18 row bits + * - changes happen only in PA0-PA7 range + * - D-bit is always numerically the lowest assigned PA index + * + * These assumptions are always true except for non-3DS 1R DIMMs, but + * those do not set update_d_bit. + */ + uint64_t dbit = xlt0 & PPC_BITMASK(35, 39); + dbit += ((uint64_t)(18 - max_row_bits)) << 39; + val = (val & ~PPC_BITMASK(35, 39)) | dbit; + } + + return val; +} + +enum pa_encoding { + NA = 0, + PA0 = 0, + PA1, + PA2, + PA3, + PA4, + PA5, + PA6, + PA7, + PA22 = 8, // Defined but not used by Hostboot + PA23, + PA24, + PA25, + PA26, + PA27, + PA28, + PA29, + PA30, + PA31, + PA32 // 0b10010 +}; + +/* + * M - master aka package ranks + * H - height (1 for non-3DS devices) + */ +enum mc_rank_config { + /* SDP, DDP, QDP DIMMs */ + M1H1_ONE_DIMM, + M1H1_TWO_DIMMS, + M2H1, + M4H1, + /* TODO: add 3DS DIMMs when needed */ +}; + +#define MCP0XLT0(D, M0, M1, R17, R16, R15) \ +(PPC_SHIFT((D), 39) | PPC_SHIFT((M0), 43) | PPC_SHIFT((M1), 51) | \ + PPC_SHIFT((R17), 55) | PPC_SHIFT((R16), 59) | PPC_SHIFT((R15), 63)) + +#define MCP0XLT1(S0, S1, S2, COL4, COL5, COL6, COL7) \ +(PPC_SHIFT((S0), 7) | PPC_SHIFT((S1), 15) | PPC_SHIFT((S2), 23) | \ + PPC_SHIFT((COL4), 39) | PPC_SHIFT((COL5), 47) | PPC_SHIFT((COL6), 55) | \ + PPC_SHIFT((COL7), 63)) + +#define MCP0XLT2(COL8, COL9, BA0, BA1, BG0, BG1) \ +(PPC_SHIFT((COL8), 7) | PPC_SHIFT((COL9), 15) | PPC_SHIFT((BA0), 23) | \ + PPC_SHIFT((BA1), 31) | PPC_SHIFT((BG0), 47) | PPC_SHIFT((BG1), 55)) + +/* + * xlt_tables[rank_configuration][reg_index] + * + * rank_configuration: see enum above + * reg_index: MCP0XLT0, MCP0XLT1, MCP0XLT2 + * + * Width and density do not matter directly, only through number of row bits. + * Different widths cannot be mixed on the same port, but densities can, and + * consequently row bits can, too. Assume that all bitmaps can be configured, + * as long as 'valid' bits are set properly. + * + * For anything else than 1R non-3DS devices D-bit is patched by code. Initial + * value in tables below is PA that would be assigned for DRAM with 18 row bits. + * When two DIMMs with different densities are installed in one port, use number + * of row bits of a bigger DIMM. + */ +static const uint64_t xlt_tables[][3] = { + /* 1R, one DIMM under port */ + { + MCP0XLT0(NA, NA, NA, PA5, PA6, PA7), + MCP0XLT1(NA, NA, NA, PA28, PA27, PA26, PA25), + MCP0XLT2(PA24, PA23, PA29, PA30, PA31, PA32), + }, + /* 1R, both DIMMs under port */ + { + MCP0XLT0(PA29, NA, NA, PA4, PA5, PA6), + MCP0XLT1(NA, NA, NA, PA28, PA27, PA26, PA25), + MCP0XLT2(PA24, PA23, PA7, PA30, PA31, PA32), + }, + /* 2R */ + { + MCP0XLT0(PA3, NA, PA29, PA4, PA5, PA6), + MCP0XLT1(NA, NA, NA, PA28, PA27, PA26, PA25), + MCP0XLT2(PA24, PA23, PA7, PA30, PA31, PA32), + }, + /* 4R */ + { + MCP0XLT0(PA2, PA6, PA29, PA3, PA4, PA5), + MCP0XLT1(NA, NA, NA, PA28, PA27, PA26, PA25), + MCP0XLT2(PA24, PA23, PA7, PA30, PA31, PA32), + }, + /* TODO: 3DS */ +}; + +static void setup_xlate_map(int mcs_i, int mca_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + const int mca_mul = 0x10; + /* + * Mixing rules: + * - rank configurations are the same for both DIMMs + * - fields for unpopulated DIMMs are initialized to all 0 + * + * With those two assumptions values can be logically ORed to produce a + * common value without conditionals. + * + * Note: mixing rules do not specify explicitly if two 3DS of different + * heights can be mixed. In that case log_ranks/mranks could have bad value, + * but it would still be different than 1. + */ + int n_dimms = (mca->dimm[0].present && mca->dimm[1].present) ? 2 : 1; + int mranks = mca->dimm[0].mranks | mca->dimm[1].mranks; + int log_ranks = mca->dimm[0].log_ranks | mca->dimm[1].log_ranks; + int is_3DS = (log_ranks / mranks) != 1; + int update_d = log_ranks != 1; // Logically the same as '(mranks != 1) | is_3DS' + chiplet_id_t nest = mcs_to_nest[id]; + enum mc_rank_config cfg = M1H1_ONE_DIMM; + + if (is_3DS) { + die("3DS DIMMs not yet supported\n"); + } else { + switch (mranks) { + case 1: + /* One DIMM is default */ + if (n_dimms == 2) + cfg = M1H1_TWO_DIMMS; + break; + case 2: + cfg = M2H1; + break; + case 4: + cfg = M4H1; + break; + default: + /* Should be impossible to reach */ + die("Bad number of package ranks: %d\n", mranks); + break; + } + } + + /* MCS_PORT02_MCP0XLT0 (?) */ + write_scom_for_chiplet(nest, 0x05010820 + mca_i * mca_mul, + dimms_rank_config(mca, xlt_tables[cfg][0], update_d)); + + /* MCS_PORT02_MCP0XLT1 (?) */ + write_scom_for_chiplet(nest, 0x05010821 + mca_i * mca_mul, + xlt_tables[cfg][1]); + + /* MCS_PORT02_MCP0XLT2 (?) */ + write_scom_for_chiplet(nest, 0x05010822 + mca_i * mca_mul, + xlt_tables[cfg][2]); + +} + +static void enable_pm(int mcs_i, int mca_i) +{ + const int ATTR_MSS_MRW_POWER_CONTROL_REQUESTED = 0; + /* + * Enable Power management based off of mrw_power_control_requested + * "Before enabling power controls, run the parity disable workaround" + * This is a loop over MCAs inside a loop over MCAs. Is this necessary? + * for each functional MCA + * // > The workaround is needed iff + * // > 1) greater than or equal to DD2 + * // > 2) self time refresh is enabled + * // > 3) the DIMM's are not TSV // TSV = 3DS + * // > 4) a 4R DIMM is present + * // TODO: skip for now, we do not have any 4R, non-3DS sticks to test it + * str_non_tsv_parity() + */ + + /* MC01.PORT0.SRQ.PC.MBARPC0Q + if ATTR_MSS_MRW_POWER_CONTROL_REQUESTED == // default 0 == off + ENUM_ATTR_MSS_MRW_IDLE_POWER_CONTROL_REQUESTED_POWER_DOWN || // 1 + ENUM_ATTR_MSS_MRW_IDLE_POWER_CONTROL_REQUESTED_PD_AND_STR_CLK || // 2 + ENUM_ATTR_MSS_MRW_IDLE_POWER_CONTROL_REQUESTED_PD_AND_STR_CLK_STOP: // 3 + [2] MBARPC0Q_CFG_MIN_MAX_DOMAINS_ENABLE = 1 + */ + if (ATTR_MSS_MRW_POWER_CONTROL_REQUESTED) + mca_and_or(mcs_ids[mcs_i], mca_i, MBARPC0Q, ~0, + PPC_BIT(MBARPC0Q_CFG_MIN_MAX_DOMAINS_ENABLE)); +} + +static void apply_mark_store(int mcs_i, int mca_i) +{ + /* + * FIXME: where do the values written to MVPD come from? They are all 0s in + * SCOM dump, which makes this function no-op. + */ + const uint64_t ATTR_MSS_MVPD_FWMS[8] = {0}; + int i; + + for (i = 0; i < ARRAY_SIZE(ATTR_MSS_MVPD_FWMS); i++) { + if (ATTR_MSS_MVPD_FWMS[i] == 0) + continue; + + /* MC01.PORT0.ECC64.SCOM.FWMS{0-7} + [all] 0 + [0-22] from ATTR_MSS_MVPD_FWMS + */ + mca_and_or(mcs_ids[mcs_i], mca_i, FWMS0 + i, + 0, ATTR_MSS_MVPD_FWMS[i]); + } +} + +static void fir_unmask(int mcs_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + int mca_i; + + /* + * "All mcbist attentions are already special attentions" + * + * These include broadcast_out_of_sync() workaround. + MC01.MCBIST.MBA_SCOMFIR.MCBISTFIRACT0 + [3] MCBISTFIRQ_MCBIST_BRODCAST_OUT_OF_SYNC = 0 + [10] MCBISTFIRQ_MCBIST_PROGRAM_COMPLETE = 1 + MC01.MCBIST.MBA_SCOMFIR.MCBISTFIRACT1 + [3] MCBISTFIRQ_MCBIST_BRODCAST_OUT_OF_SYNC = 1 + [10] MCBISTFIRQ_MCBIST_PROGRAM_COMPLETE = 0 + MC01.MCBIST.MBA_SCOMFIR.MCBISTFIRMASK + [3] MCBISTFIRQ_MCBIST_BRODCAST_OUT_OF_SYNC = 0 // recoverable_error (0,1,0) + [10] MCBISTFIRQ_MCBIST_PROGRAM_COMPLETE = 0 // attention (1,0,0) + */ + /* + * TODO: check if this works with bootblock in SEEPROM too. We don't have + * interrupt handlers set up in that case. + */ + scom_and_or_for_chiplet(id, MCBISTFIRACT0, + ~(PPC_BIT(MCBISTFIRQ_MCBIST_BRODCAST_OUT_OF_SYNC) | + PPC_BIT(MCBISTFIRQ_MCBIST_PROGRAM_COMPLETE)), + PPC_BIT(MCBISTFIRQ_MCBIST_PROGRAM_COMPLETE)); + scom_and_or_for_chiplet(id, MCBISTFIRACT1, + ~(PPC_BIT(MCBISTFIRQ_MCBIST_BRODCAST_OUT_OF_SYNC) | + PPC_BIT(MCBISTFIRQ_MCBIST_PROGRAM_COMPLETE)), + PPC_BIT(MCBISTFIRQ_MCBIST_BRODCAST_OUT_OF_SYNC)); + scom_and_or_for_chiplet(id, MCBISTFIRMASK, + ~(PPC_BIT(MCBISTFIRQ_MCBIST_BRODCAST_OUT_OF_SYNC) | + PPC_BIT(MCBISTFIRQ_MCBIST_PROGRAM_COMPLETE)), + 0); + + for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { + if (!mem_data.mcs[mcs_i].mca[mca_i].functional) + continue; + + /* From broadcast_out_of_sync() workaround: + MC01.PORT0.ECC64.SCOM.RECR + [26] MBSECCQ_ENABLE_UE_NOISE_WINDOW = 0 + */ + mca_and_or(id, mca_i, RECR, ~PPC_BIT(MBSECCQ_ENABLE_UE_NOISE_WINDOW), 0); + + /* + MC01.PORT0.ECC64.SCOM.ACTION0 + [33] FIR_MAINTENANCE_AUE = 0 + [36] FIR_MAINTENANCE_IAUE = 0 + [41] FIR_SCOM_PARITY_CLASS_STATUS = 0 + [42] FIR_SCOM_PARITY_CLASS_RECOVERABLE = 0 + [43] FIR_SCOM_PARITY_CLASS_UNRECOVERABLE = 0 + [44] FIR_ECC_CORRECTOR_INTERNAL_PARITY_ERROR = 0 + [45] FIR_WRITE_RMW_CE = 0 + [46] FIR_WRITE_RMW_UE = 0 + [48] FIR_WDF_OVERRUN_ERROR_0 = 0 + [49] FIR_WDF_OVERRUN_ERROR_1 = 0 + [50] FIR_WDF_SCOM_SEQUENCE_ERROR = 0 + [51] FIR_WDF_STATE_MACHINE_ERROR = 0 + [52] FIR_WDF_MISC_REGISTER_PARITY_ERROR = 0 + [53] FIR_WRT_SCOM_SEQUENCE_ERROR = 0 + [54] FIR_WRT_MISC_REGISTER_PARITY_ERROR = 0 + [55] FIR_ECC_GENERATOR_INTERNAL_PARITY_ERROR = 0 + [56] FIR_READ_BUFFER_OVERFLOW_ERROR = 0 + [57] FIR_WDF_ASYNC_INTERFACE_ERROR = 0 + [58] FIR_READ_ASYNC_INTERFACE_PARITY_ERROR = 0 + [59] FIR_READ_ASYNC_INTERFACE_SEQUENCE_ERROR = 0 + MC01.PORT0.ECC64.SCOM.ACTION1 + [33] FIR_MAINTENANCE_AUE = 1 + [36] FIR_MAINTENANCE_IAUE = 1 + [41] FIR_SCOM_PARITY_CLASS_STATUS = 1 + [42] FIR_SCOM_PARITY_CLASS_RECOVERABLE = 1 + [43] FIR_SCOM_PARITY_CLASS_UNRECOVERABLE = 0 + [44] FIR_ECC_CORRECTOR_INTERNAL_PARITY_ERROR = 0 + [45] FIR_WRITE_RMW_CE = 1 + [46] FIR_WRITE_RMW_UE = 0 + [48] FIR_WDF_OVERRUN_ERROR_0 = 0 + [49] FIR_WDF_OVERRUN_ERROR_1 = 0 + [50] FIR_WDF_SCOM_SEQUENCE_ERROR = 0 + [51] FIR_WDF_STATE_MACHINE_ERROR = 0 + [52] FIR_WDF_MISC_REGISTER_PARITY_ERROR = 0 + [53] FIR_WRT_SCOM_SEQUENCE_ERROR = 0 + [54] FIR_WRT_MISC_REGISTER_PARITY_ERROR = 0 + [55] FIR_ECC_GENERATOR_INTERNAL_PARITY_ERROR = 0 + [56] FIR_READ_BUFFER_OVERFLOW_ERROR = 0 + [57] FIR_WDF_ASYNC_INTERFACE_ERROR = 0 + [58] FIR_READ_ASYNC_INTERFACE_PARITY_ERROR = 0 + [59] FIR_READ_ASYNC_INTERFACE_SEQUENCE_ERROR = 0 + MC01.PORT0.ECC64.SCOM.MASK + [33] FIR_MAINTENANCE_AUE = 0 // recoverable_error (0,1,0) + [36] FIR_MAINTENANCE_IAUE = 0 // recoverable_error (0,1,0) + [41] FIR_SCOM_PARITY_CLASS_STATUS = 0 // recoverable_error (0,1,0) + [42] FIR_SCOM_PARITY_CLASS_RECOVERABLE = 0 // recoverable_error (0,1,0) + [43] FIR_SCOM_PARITY_CLASS_UNRECOVERABLE = 0 // checkstop (0,0,0) + [44] FIR_ECC_CORRECTOR_INTERNAL_PARITY_ERROR = 0 // checkstop (0,0,0) + [45] FIR_WRITE_RMW_CE = 0 // recoverable_error (0,1,0) + [46] FIR_WRITE_RMW_UE = 0 // checkstop (0,0,0) + [48] FIR_WDF_OVERRUN_ERROR_0 = 0 // checkstop (0,0,0) + [49] FIR_WDF_OVERRUN_ERROR_1 = 0 // checkstop (0,0,0) + [50] FIR_WDF_SCOM_SEQUENCE_ERROR = 0 // checkstop (0,0,0) + [51] FIR_WDF_STATE_MACHINE_ERROR = 0 // checkstop (0,0,0) + [52] FIR_WDF_MISC_REGISTER_PARITY_ERROR = 0 // checkstop (0,0,0) + [53] FIR_WRT_SCOM_SEQUENCE_ERROR = 0 // checkstop (0,0,0) + [54] FIR_WRT_MISC_REGISTER_PARITY_ERROR = 0 // checkstop (0,0,0) + [55] FIR_ECC_GENERATOR_INTERNAL_PARITY_ERROR = 0 // checkstop (0,0,0) + [56] FIR_READ_BUFFER_OVERFLOW_ERROR = 0 // checkstop (0,0,0) + [57] FIR_WDF_ASYNC_INTERFACE_ERROR = 0 // checkstop (0,0,0) + [58] FIR_READ_ASYNC_INTERFACE_PARITY_ERROR = 0 // checkstop (0,0,0) + [59] FIR_READ_ASYNC_INTERFACE_SEQUENCE_ERROR = 0 // checkstop (0,0,0) + */ + mca_and_or(id, mca_i, ECC_FIR_ACTION0, + ~(PPC_BIT(ECC_FIR_MAINTENANCE_AUE) | + PPC_BIT(ECC_FIR_MAINTENANCE_IAUE) | + PPC_BITMASK(41, 46) | PPC_BITMASK(48, 59)), + 0); + + mca_and_or(id, mca_i, ECC_FIR_ACTION1, + ~(PPC_BIT(ECC_FIR_MAINTENANCE_AUE) | + PPC_BIT(ECC_FIR_MAINTENANCE_IAUE) | + PPC_BITMASK(41, 46) | PPC_BITMASK(48, 59)), + PPC_BIT(ECC_FIR_MAINTENANCE_AUE) | + PPC_BIT(ECC_FIR_MAINTENANCE_IAUE) | + PPC_BIT(ECC_FIR_SCOM_PARITY_CLASS_STATUS) | + PPC_BIT(ECC_FIR_SCOM_PARITY_CLASS_RECOVERABLE) | + PPC_BIT(ECC_FIR_WRITE_RMW_CE)); + + mca_and_or(id, mca_i, ECC_FIR_MASK, + ~(PPC_BIT(ECC_FIR_MAINTENANCE_AUE) | + PPC_BIT(ECC_FIR_MAINTENANCE_IAUE) | + PPC_BITMASK(41, 46) | PPC_BITMASK(48, 59)), + 0); + } +} + +/* + * 13.13 mss_draminit_mc: Hand off control to MC + * + * a) p9_mss_draminit_mc.C (mcbist) - Nimbus + * b) p9c_mss_draminit_mc.C (membuf) -Cumulus + * - P9 Cumulus -- Set IML complete bit in centaur + * - Start main refresh engine + * - Refresh, periodic calibration, power controls + * - Turn on ECC checking on memory accesses + * - Note at this point memory FIRs can be monitored by PRD + */ +void istep_13_13(void) +{ + printk(BIOS_EMERG, "starting istep 13.13\n"); + int mcs_i, mca_i; + + report_istep(13, 13); + + for (mcs_i = 0; mcs_i < MCS_PER_PROC; mcs_i++) { + /* No need to initialize a non-functional MCS */ + if (!mem_data.mcs[mcs_i].functional) + continue; + + for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { + chiplet_id_t id = mcs_ids[mcs_i]; + mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + + if (!mca->functional) + continue; + + setup_xlate_map(mcs_i, mca_i); + + /* Set up read pointer delay */ + /* MC01.PORT0.ECC64.SCOM.RECR + [6-8] MBSECCQ_READ_POINTER_DELAY = 1 // code sets this to "ON", but this field is numerical value + // Not sure where this attr comes from or what is its default value. Assume !0 = 1 -> TCE correction enabled + [27] MBSECCQ_ENABLE_TCE_CORRECTION = !ATTR_MNFG_FLAGS.MNFG_REPAIRS_DISABLED_ATTR + */ + mca_and_or(id, mca_i, RECR, + ~(PPC_BITMASK(6, 8) | PPC_BIT(MBSECCQ_ENABLE_TCE_CORRECTION)), + PPC_SHIFT(1, MBSECCQ_READ_POINTER_DELAY) | + PPC_BIT(MBSECCQ_ENABLE_TCE_CORRECTION)); + + enable_pm(mcs_i, mca_i); + + /* + * This was already done after draminit_cke_helper, search for "Per + * conversation with Shelton and Steve..." in 13.10, "however that + * might be a work-around so we set it low here kind of like + * belt-and-suspenders. BRS" + * + * MC01.PORT0.SRQ.MBA_FARB5Q + * [5] MBA_FARB5Q_CFG_CCS_ADDR_MUX_SEL = 0 + */ + mca_and_or(id, mca_i, MBA_FARB5Q, + ~PPC_BIT(MBA_FARB5Q_CFG_CCS_ADDR_MUX_SEL), 0); + + /* MC01.PORT0.SRQ.MBA_FARB0Q + [57] MBA_FARB0Q_CFG_PORT_FAIL_DISABLE = 0 + */ + mca_and_or(id, mca_i, MBA_FARB0Q, + ~PPC_BIT(MBA_FARB0Q_CFG_PORT_FAIL_DISABLE), 0); + + /* + * "MC work around for OE bug (seen in periodics + PHY) + * Turn on output-enable always on. Shelton tells me they'll fix + * for DD2" + * + * This is also surrounded by '#ifndef REMOVE_FOR_DD2', but this + * name is nowhere else to be found. If this still have to be used, + * we may as well merge it with the previous write. + * + * MC01.PORT0.SRQ.MBA_FARB0Q + * [55] MBA_FARB0Q_CFG_OE_ALWAYS_ON = 1 + */ + mca_and_or(id, mca_i, MBA_FARB0Q, ~0, + PPC_BIT(MBA_FARB0Q_CFG_OE_ALWAYS_ON)); + + /* MC01.PORT0.SRQ.PC.MBAREF0Q + [0] MBAREF0Q_CFG_REFRESH_ENABLE = 1 + */ + mca_and_or(id, mca_i, MBAREF0Q, ~0, PPC_BIT(MBAREF0Q_CFG_REFRESH_ENABLE)); + + /* Enable periodic calibration */ + /* + * A large chunk of function enable_periodic_cal() in Hostboot is + * disabled, protected by #ifdef TODO_166433_PERIODICS, which also + * isn't mentioned anywhere else. This is what is left: + MC01.PORT0.SRQ.MBA_CAL3Q + [all] 0 + [0-1] MBA_CAL3Q_CFG_INTERNAL_ZQ_TB = 0x3 + [2-9] MBA_CAL3Q_CFG_INTERNAL_ZQ_LENGTH = 0xff + [10-11] MBA_CAL3Q_CFG_EXTERNAL_ZQ_TB = 0x3 + [12-19] MBA_CAL3Q_CFG_EXTERNAL_ZQ_LENGTH = 0xff + [20-21] MBA_CAL3Q_CFG_RDCLK_SYSCLK_TB = 0x3 + [22-29] MBA_CAL3Q_CFG_RDCLK_SYSCLK_LENGTH = 0xff + [30-31] MBA_CAL3Q_CFG_DQS_ALIGNMENT_TB = 0x3 + [32-39] MBA_CAL3Q_CFG_DQS_ALIGNMENT_LENGTH = 0xff + [40-41] MBA_CAL3Q_CFG_MPR_READEYE_TB = 0x3 + [42-49] MBA_CAL3Q_CFG_MPR_READEYE_LENGTH = 0xff + [50-51] MBA_CAL3Q_CFG_ALL_PERIODIC_TB = 0x3 + [52-59] MBA_CAL3Q_CFG_ALL_PERIODIC_LENGTH = 0xff + // Or simpler: 0xfffffffffffffff0 + */ + mca_and_or(id, mca_i, MBA_CAL3Q, 0, PPC_BITMASK(0, 59)); + + /* Enable read ECC + MC01.PORT0.ECC64.SCOM.RECR // 0x07010A0A + [0] MBSECCQ_DISABLE_MEMORY_ECC_CHECK_CORRECT = 0 + [1] MBSECCQ_DISABLE_MEMORY_ECC_CORRECT = 0 + [29] MBSECCQ_USE_ADDRESS_HASH = 1 + // Docs don't describe the encoding, code suggests this inverts data, toggles checks + [30-31] MBSECCQ_DATA_INVERSION = 3 + */ + mca_and_or(id, mca_i, RECR, + ~(PPC_BITMASK(0, 1) | PPC_BITMASK(29, 31)), + PPC_BIT(MBSECCQ_USE_ADDRESS_HASH) | + PPC_SHIFT(3, MBSECCQ_DATA_INVERSION)); + + apply_mark_store(mcs_i, mca_i); + } + + fir_unmask(mcs_i); + } + + printk(BIOS_EMERG, "ending istep 13.13\n"); +} diff --git a/src/soc/ibm/power9/romstage.c b/src/soc/ibm/power9/romstage.c index 76d4411b169..2229c198017 100644 --- a/src/soc/ibm/power9/romstage.c +++ b/src/soc/ibm/power9/romstage.c @@ -344,6 +344,8 @@ void main(void) istep_13_9(); istep_13_10(); istep_13_11(); + report_istep(13, 12); // optional, not yet implemented + istep_13_13(); /* Test if SCOM still works. Maybe should check also indirect access? */ printk(BIOS_DEBUG, "0xF000F = %llx\n", read_scom(0xF000F)); From d9abcc431a9ed455aefd919f4525f998cdf94629 Mon Sep 17 00:00:00 2001 From: Krystian Hebel Date: Mon, 26 Apr 2021 14:47:57 +0200 Subject: [PATCH 038/213] soc/power9/istep_14_1.c: implementation 14.1 mss_memdiag: Mainstore Pattern Testing - The following step documents the generalities of this step - In FW PRD will control mem diags via interrupts. It doesn't use mss_memdiags.C directly but the HWP subroutines - In cronus it will execute mss_memdiags.C directly b) p9_mss_memdiags.C (mcbist)--Nimbus c) p9_mss_memdiags.C (mba) -- Cumulus - Prior to running this procedure will apply known DQ bad bits to prevent them from participating in training. This information is extracted from the bad DQ attribute and applied to Hardware - Nimbus uses the mcbist engine - Still supports superfast read/init/scrub - Cumulus/Centaur uses the scrub engine - Modes: - Minimal: Write-only with 0's - Standard: Write of 0's followed by a Read - Medium: Write-followed by Read, 4 patterns, last of 0's - Max: Write-followed by Read, 9 patterns, last of 0's - Run on the host - This procedure will update the bad DQ attribute for each dimm based on its findings - At the end of this procedure sets FIR masks correctly for runtime analysis - All subsequent repairs are considered runtime issues Signed-off-by: Krystian Hebel Change-Id: Iad3b8bf3384da001d008f2daa1f71958afce8329 --- src/include/cpu/power/istep_14.h | 8 + src/soc/ibm/power9/Makefile.inc | 1 + src/soc/ibm/power9/istep_14_1.c | 755 +++++++++++++++++++++++++++++++ src/soc/ibm/power9/romstage.c | 3 + 4 files changed, 767 insertions(+) create mode 100644 src/include/cpu/power/istep_14.h create mode 100644 src/soc/ibm/power9/istep_14_1.c diff --git a/src/include/cpu/power/istep_14.h b/src/include/cpu/power/istep_14.h new file mode 100644 index 00000000000..709592b3c00 --- /dev/null +++ b/src/include/cpu/power/istep_14.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef CPU_PPC64_ISTEP_14_H +#define CPU_PPC64_ISTEP_14_H + +void istep_14_1(void); + +#endif /* CPU_PPC64_ISTEP_14_H */ diff --git a/src/soc/ibm/power9/Makefile.inc b/src/soc/ibm/power9/Makefile.inc index 6c1fb213511..922ae5372e4 100644 --- a/src/soc/ibm/power9/Makefile.inc +++ b/src/soc/ibm/power9/Makefile.inc @@ -16,6 +16,7 @@ romstage-y += istep_13_9.c romstage-y += istep_13_10.c romstage-y += istep_13_11.c romstage-y += istep_13_13.c +romstage-y += istep_14_1.c romstage-y += i2c.c romstage-y += ccs.c ramstage-y += chip.c diff --git a/src/soc/ibm/power9/istep_14_1.c b/src/soc/ibm/power9/istep_14_1.c new file mode 100644 index 00000000000..575ddb0a024 --- /dev/null +++ b/src/soc/ibm/power9/istep_14_1.c @@ -0,0 +1,755 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include +#include +#include + +#include "istep_13_scom.h" + +static void fir_unmask(int mcs_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + int mca_i; + const int is_dd20 = pvr_revision() == SPR_PVR_REV(2, 0); + /* Bits in other registers (act0, mask) are already set properly. + MC01.MCBIST.MBA_SCOMFIR.MCBISTFIRACT1 + [3] MCBISTFIRQ_MCBIST_BRODCAST_OUT_OF_SYNC = 0 // checkstop (0,0,0) + */ + scom_and_or_for_chiplet(id, MCBISTFIRACT1, + ~PPC_BIT(MCBISTFIRQ_MCBIST_BRODCAST_OUT_OF_SYNC), 0); + + for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { + uint64_t val; + if (!mem_data.mcs[mcs_i].mca[mca_i].functional) + continue; + + /* From broadcast_out_of_sync() workaround: + MC01.PORT0.ECC64.SCOM.RECR + [26] MBSECCQ_ENABLE_UE_NOISE_WINDOW = 1 + */ + mca_and_or(id, mca_i, RECR, ~0, PPC_BIT(MBSECCQ_ENABLE_UE_NOISE_WINDOW)); + + /* + * Read out the wr_done and rd_tag delays and find min and set the RCD + * Protect Time to this value. + * + * MC01.PORT0.SRQ.MBA_DSM0Q + * [24-29] MBA_DSM0Q_CFG_WRDONE_DLY + * [36-41] MBA_DSM0Q_CFG_RDTAG_DLY + * + * MC01.PORT0.SRQ.MBA_FARB0Q + * [48-53] MBA_FARB0Q_CFG_RCD_PROTECTION_TIME + */ + val = mca_read(id, mca_i, MBA_DSM0Q); + val = MIN((val & PPC_BITMASK(24, 29)) >> 29, + (val & PPC_BITMASK(36, 41)) >> 41); + mca_and_or(id, mca_i, MBA_FARB0Q, + ~PPC_BITMASK(48, 53), + PPC_SHIFT(val, MBA_FARB0Q_CFG_RCD_PROTECTION_TIME)); + + /* + * Due to hardware defect with DD2.0 certain errors are not handled + * properly. As a result, these firs are marked as checkstop for DD2 to + * avoid any mishandling. + * + * MCA_FIR_MAINLINE_RCD stays masked on newer platforms. ACT0 and ACT1 + * for RCD are not touched by Hostboot, but for simplicity set those to + * 0 always - they are "don't care" if masked, and 0 is their reset + * value. Affected bits are annotated with asterisk below - whatever is + * mentioned below is changed to checkstop for those bits. + * + * This also affects Cumulus DD1.0, but the rest of the code is for + * Nimbus only so don't bother checking for it. + * + * MC01.PORT0.ECC64.SCOM.ACTION0 + * [13] FIR_MAINLINE_AUE = 0 + * [14] FIR_MAINLINE_UE = 0 + * [15] FIR_MAINLINE_RCD = 0 + * [16] FIR_MAINLINE_IAUE = 0 + * [17] FIR_MAINLINE_IUE = 0 + * [37] MCA_FIR_MAINTENANCE_IUE = 0 + * MC01.PORT0.ECC64.SCOM.ACTION1 + * [13] FIR_MAINLINE_AUE = 0 + * [14] FIR_MAINLINE_UE = 1* + * [15] FIR_MAINLINE_RCD = 0 + * [16] FIR_MAINLINE_IAUE = 0 + * [17] FIR_MAINLINE_IUE = 1 + * [33] MCA_FIR_MAINTENANCE_AUE = 0 // Hostboot clears AUE and IAUE without + * [36] MCA_FIR_MAINTENANCE_IAUE = 0 // unmasking, with no explanation why + * [37] MCA_FIR_MAINTENANCE_IUE = 1 + * MC01.PORT0.ECC64.SCOM.MASK + * [13] FIR_MAINLINE_AUE = 0 // checkstop (0,0,0) + * [14] FIR_MAINLINE_UE = 0 // *recoverable_error (0,1,0) + * [15] FIR_MAINLINE_RCD = 1* // *masked (X,X,1) + * [16] FIR_MAINLINE_IAUE = 0 // checkstop (0,0,0) + * [17] FIR_MAINLINE_IUE = 0 // recoverable_error (0,1,0) + * [37] MCA_FIR_MAINTENANCE_IUE = 0 // recoverable_error (0,1,0) + */ + mca_and_or(id, mca_i, ECC_FIR_ACTION0, + ~(PPC_BITMASK(13, 17) | PPC_BIT(MCA_FIR_MAINTENANCE_IUE)), + 0); + mca_and_or(id, mca_i, ECC_FIR_ACTION1, + ~(PPC_BITMASK(13, 17) | PPC_BIT(ECC_FIR_MAINTENANCE_AUE) | + PPC_BITMASK(36, 37)), + (is_dd20 ? 0 : PPC_BIT(FIR_MAINLINE_UE)) | + PPC_BIT(FIR_MAINLINE_IUE) | PPC_BIT(MCA_FIR_MAINTENANCE_IUE)); + mca_and_or(id, mca_i, ECC_FIR_MASK, + ~(PPC_BITMASK(13, 17) | + PPC_BIT(MCA_FIR_MAINTENANCE_IUE)), + (is_dd20 ? 0 : PPC_BIT(FIR_MAINLINE_RCD))); + + /* + * WARNING: checkstop is encoded differently (1,0,0). **Do not** try to + * make a function/macro that pretends to be universal. + * + * MC01.PORT0.SRQ.MBACALFIR_ACTION0 + * [13] MBACALFIR_PORT_FAIL = 0* + * MC01.PORT0.SRQ.MBACALFIR_ACTION1 + * [13] MBACALFIR_PORT_FAIL = 1* + * MC01.PORT0.SRQ.MBACALFIR_MASK + * [13] MBACALFIR_PORT_FAIL = 0 // *recoverable_error (0,1,0) + */ + mca_and_or(id, mca_i, MBACALFIR_ACTION0, + ~PPC_BIT(13), + (is_dd20 ? PPC_BIT(MBACALFIR_PORT_FAIL) : 0)); + mca_and_or(id, mca_i, MBACALFIR_ACTION1, + ~PPC_BIT(MBACALFIR_PORT_FAIL), + (is_dd20 ? 0 : PPC_BIT(MBACALFIR_PORT_FAIL))); + mca_and_or(id, mca_i, MBACALFIR_MASK, ~PPC_BIT(MBACALFIR_PORT_FAIL), 0); + + /* + * Enable port fail and RCD recovery + * TODO: check if we can set this together with RCD protection time. + * + * MC01.PORT0.SRQ.MBA_FARB0Q + * [54] MBA_FARB0Q_CFG_DISABLE_RCD_RECOVERY = 0 + * [57] MBA_FARB0Q_CFG_PORT_FAIL_DISABLE = 0 + */ + mca_and_or(id, mca_i, MBA_FARB0Q, + ~(PPC_BIT(MBA_FARB0Q_CFG_DISABLE_RCD_RECOVERY) | + PPC_BIT(MBA_FARB0Q_CFG_PORT_FAIL_DISABLE)), 0); + } +} + +static void set_fifo_mode(int mcs_i, int fifo) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + int mca_i; + /* Make sure fifo is either 0 or 1, nothing else. */ + fifo = !!fifo; + + /* MC01.PORT0.SRQ.MBA_RRQ0Q + * [6] MBA_RRQ0Q_CFG_RRQ_FIFO_MODE = fifo + * MC01.PORT0.SRQ.MBA_WRQ0Q + * [5] MBA_WRQ0Q_CFG_WRQ_FIFO_MODE = fifo + */ + for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { + if (!mem_data.mcs[mcs_i].mca[mca_i].functional) + continue; + + mca_and_or(id, mca_i, MBA_RRQ0Q, ~PPC_BIT(MBA_RRQ0Q_CFG_RRQ_FIFO_MODE), + PPC_SHIFT(fifo, MBA_RRQ0Q_CFG_RRQ_FIFO_MODE)); + mca_and_or(id, mca_i, MBA_WRQ0Q, ~PPC_BIT(MBA_WRQ0Q_CFG_WRQ_FIFO_MODE), + PPC_SHIFT(fifo, MBA_WRQ0Q_CFG_WRQ_FIFO_MODE)); + } +} + +static void load_maint_pattern(int mcs_i, const uint64_t pat[16]) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + /* + * Different than in Hostboot: + * - Hostboot writes data for second 64B line but doesn't use 128B mode so + * first 64B are repeated + * - Hostboot also manually sets the address for the second half even + * though it would be autoincremented to proper value + * - Hostboot writes 4 pairs of 64b chunks of data, we write 8 uint64_t's + */ + int mca_i; + + for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { + int i; + if (!mem_data.mcs[mcs_i].mca[mca_i].functional) + continue; + + /* MC01.PORT0.ECC64.SCOM.AACR + * [1-9] AACR_ADDRESS = 0b111110000 = 0x1F0 + * [10] AACR_AUTOINC = 1 + * [11] AACR_ECCGEN = 1 + */ + mca_write(id, mca_i, AACR, + PPC_SHIFT(0x1F0, AACR_ADDRESS) | PPC_BIT(AACR_AUTOINC) | + PPC_BIT(AACR_ECCGEN)); + + for (i = 0; i < 16; i++) { + /* MC01.PORT0.ECC64.SCOM.AADR - data */ + mca_write(id, mca_i, AADR, pat[i]); + /* + * Although ECC is generated by hardware, we still have to write to + * this register to have address incremented. Comments say that + * the data also wouldn't be written to RMW buffer without it. + */ + /* MC01.PORT0.ECC64.SCOM.AAER - ECC */ + mca_write(id, mca_i, AAER, 0); + } + } +} + +static const uint64_t patterns[][16] = { + {0}, + {0x596f75207265616c, 0x6c792073686f756c, 0x646e27742072656c, 0x79206f6e206d656d, + 0x6f7279206265696e, 0x67207a65726f6564, 0x206279206669726d, 0x776172652e2e2e00}, + {0x4e6576657220756e, 0x646572657374696d, 0x6174652074686520, 0x62616e6477696474, + 0x68206f6620612073, 0x746174696f6e2077, 0x61676f6e2066756c, 0x6c206f6620746170, + 0x657320687572746c, 0x696e6720646f776e, 0x2074686520686967, 0x687761792e202d20, + 0x416e647265772053, 0x2e2054616e656e62, 0x61756d0a00000000}, +}; + +/* + * Layout of start/end address registers: + * [0-2] unused by HW, in Hostboot: + * [0-1] port select + * [2] dimm select + * [3-4] mrank (0 to 1) + * [5-7] srank (0 to 2) + * [8-25] row (0 to 17) + * [26-32] col (3 to 9) + * [33-35] bank (0 to 2) + * [36-37] bank_group (0 to 1) + * + * In maintenance mode MCBIST automatically skips unused bits, they can safely + * be set to 0 for start and 1 for end addresses. + * + * Hostboot sets 3 ranges: + * - 0 to end of first DIMM (aka first DIMM) + * - 0 to end of address space (aka everything) + * - first address on first DIMM on last port to end of address space (aka last + * port) + * + * Assuming that the documentation is correct, when spare bits are not taken + * into account, all ranges result in [start of DIMM, end of DIMM] range. Maybe + * they are set only for debug purposes? + * + * Trying to use just one range instead. + */ +/* + * NOTE: Except for setting address ranges, Hostboot repeats all of this for + * every subtest, even though most of the registers don't change in between. + */ +static void init_mcbist(int mcs_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + uint64_t val; + int i; + + /* Fill address ranges */ + /* MC01.MCBIST.MBA_SCOMFIR.MCBSA0Q + * [0-37] MCBSA0Q_CFG_START_ADDR_0 + */ + write_scom_for_chiplet(id, MCBSA0Q, 0); + /* MC01.MCBIST.MBA_SCOMFIR.MCBEA0Q + * [0-37] MCBSA0Q_CFG_END_ADDR_0 + */ + write_scom_for_chiplet(id, MCBEA0Q, PPC_BITMASK(3, 37)); + + /* Hostboot stops MCBIST engine, die() if it is already started instead */ + /* TODO: check all bits (MCBIST was ever started) or just "in progress"? */ + /* MC01.MCBIST.MBA_SCOMFIR.MCB_CNTLSTATQ + * [0] MCB_CNTLSTATQ_MCB_IP + * [1] MCB_CNTLSTATQ_MCB_DONE + * [2] MCB_CNTLSTATQ_MCB_FAIL + */ + if ((val = read_scom_for_chiplet(id, MCB_CNTLSTATQ)) != 0) + die("MCBIST started already (%#16.16llx), this shouldn't happen\n", val); + + /* + * Clear MCBIST errors: + * - MCBIST Error Status Register - MC01.MCBIST.MBA_SCOMFIR.MCBSTATQ + * - MBS Memory Scrub/Read Error Count Register 0 - MC01.MCBIST.MBA_SCOMFIR.MBSEC0Q + * - MBS Memory Scrub/Read Error Count Register 1 - MC01.MCBIST.MBA_SCOMFIR.MBSEC1Q + * - MCBIST Fault Isolation Register - MC01.MCBIST.MBA_SCOMFIR.MCBISTFIRQ + */ + write_scom_for_chiplet(id, MCBSTATQ, 0); + write_scom_for_chiplet(id, MBSEC0Q, 0); + write_scom_for_chiplet(id, MBSEC1Q, 0); + write_scom_for_chiplet(id, MCBISTFIR, 0); + + /* Enable FIFO mode */ + set_fifo_mode(mcs_i, 1); + + /* + * Hostboot clears address maps, but they are not used in maintenance + * address mode. Also, it sets MCBAGRAQ_CFG_MAINT_DETECT_SRANK_BOUNDARIES + * for scrub commands, but not for patterns. I have no idea what possible + * implications this has, but without 3DS DIMMs I have no way of testing it. + * For now I'll set this bit even for patterns so MCBAGRAQ register can be + * written only once instead of each subtest. + */ + /* MC01.MCBIST.MBA_SCOMFIR.MCBAGRAQ + * [all] 0 + * [10] MCBAGRAQ_CFG_MAINT_ADDR_MODE_EN = 1 + * [12] MCBAGRAQ_CFG_MAINT_DETECT_SRANK_BOUNDARIES = 1 + */ + write_scom_for_chiplet(id, MCBAGRAQ, + PPC_BIT(MCBAGRAQ_CFG_MAINT_ADDR_MODE_EN) | + PPC_BIT(MCBAGRAQ_CFG_MAINT_DETECT_SRANK_BOUNDARIES)); + + /* + * Configure MCBIST + * + * Enabling MCBCFGQ_CFG_MCB_LEN64 speeds up operations on x4 devices (~70ms + * per pass on 16GB DIMM), but slows down x8 (~90ms per pass on 8GB DIMM). + * As the difference for x8 is bigger than x4, keep it disabled. + * + * MCBCFGQ_CFG_PAUSE_ON_ERROR_MODE = 0b10 sets MCBIST to pause on error + * after current rank finishes. This is set for scrub only, but as we don't + * expect to see any errors, it should be OK to set it for pattern writing + * as well. + * + * MCBCFGQ_CFG_ENABLE_HOST_ATTN is set in Hostboot, but we don't have + * interrupt handlers so keep it disabled. + */ + /* MC01.MCBIST.MBA_SCOMFIR.MCBCFGQ + * [all] 0 + * [56] MCBCFGQ_CFG_MCB_LEN64 = see above + * [57-58] MCBCFGQ_CFG_PAUSE_ON_ERROR_MODE = 0 for patterns, 0b10 for scrub + * [63] MCBCFGQ_CFG_ENABLE_HOST_ATTN = see above + */ + write_scom_for_chiplet(id, MCBCFGQ, + PPC_SHIFT(0b10, MCBCFGQ_CFG_PAUSE_ON_ERROR_MODE)); + + /* + * This sets up memory parameters, mostly gaps between commands. For as fast + * as possible, gaps of 0 are configured here. + */ + /* MC01.MCBIST.MBA_SCOMFIR.MCBPARMQ */ + write_scom_for_chiplet(id, MCBPARMQ, 0); + + /* + * Steps done from this point should be moved out of this function, they + * should be done with different patterns before each subtest. Right now + * only a pattern of all zeroes is used. + */ + + /* Data pattern: 8 data registers + 1 ECC register */ + /* TODO: different patterns can be used */ + for (i = 0; i < 9; i++) { + write_scom_for_chiplet(id, MCBFD0Q + i, patterns[0][i]); + } + + /* TODO: random seeds */ + + /* + * Maintenance data pattern + * + * Difference between this and data pattern above is that this is used for + * ALTER and the one above for WRITE. ALTER can write 128 different bytes, + * while WRITE repeats a sequence of 64B twice. ALTER is ~3-4 times slower. + */ + load_maint_pattern(mcs_i, patterns[0]); + + /* + * Load the data rotate config and seeds + * + * Patterns (fixed) used by Hostboot are self-repeating and either all ones, + * all zeroes or alternating bits (0x55/0xAA). Only in the last case + * rotating data seeds can make a difference, but it is the same as + * inverting. + */ + /* MC01.MCBIST.MBA_SCOMFIR.MCBDRCRQ */ + write_scom_for_chiplet(id, MCBDRCRQ, 0); + /* MC01.MCBIST.MBA_SCOMFIR.MCBDRSRQ */ + write_scom_for_chiplet(id, MCBDRSRQ, 0); + + /* + * The following step may be done just once, as long as the same set of + * options work for both pattern writing and scrubbing, which so far seems + * to be the case. + */ + + /* + * Load MCBIST threshold register + * + * This one has slightly different settings for patterns than for scrub, but + * some of those that are explicitly set for scrubbing are always implicitly + * enabled for nonscrub. The only meaningful difference is that some + * uncorrectable errors pause MCBIST on scrub, but not on pattern writes. + * Lets set them to pause even for pattern writes here and hope for the + * best. + */ + /* MC01.MCBIST.MBA_SCOMFIR.MBSTRQ + * [0-31] those are thresholds for different errors, all of them are set to + * all 1's, meaning that pausing on threshold is disabled + * [34] MBSTRQ_CFG_PAUSE_ON_MPE = 1 for scrub, else 0 (Mark Placed Error) + * [35] MBSTRQ_CFG_PAUSE_ON_UE = 1 for scrub, else 0 (Uncorrectable Error) + * [37] MBSTRQ_CFG_PAUSE_ON_AUE = 1 for scrub, else 0 (Array UE) + * [55] MBSTRQ_CFG_NCE_SOFT_SYMBOL_COUNT_ENABLE \ 1 for scrub, nonscrub + * [56] MBSTRQ_CFG_NCE_INTER_SYMBOL_COUNT_ENABLE } counts all NCE + * [57] MBSTRQ_CFG_NCE_HARD_SYMBOL_COUNT_ENABLE / + */ + write_scom_for_chiplet(id, MBSTRQ, PPC_BITMASK(0, 31) | + PPC_BIT(MBSTRQ_CFG_PAUSE_ON_MPE) | + PPC_BIT(MBSTRQ_CFG_PAUSE_ON_UE) | + PPC_BIT(MBSTRQ_CFG_PAUSE_ON_AUE) | + PPC_BIT(MBSTRQ_CFG_NCE_SOFT_SYMBOL_COUNT_ENABLE) | + PPC_BIT(MBSTRQ_CFG_NCE_INTER_SYMBOL_COUNT_ENABLE) | + PPC_BIT(MBSTRQ_CFG_NCE_HARD_SYMBOL_COUNT_ENABLE)); +} + + +/***************************MCBIST***********************************/ +#define MCBIST_TESTS_PER_REG 4 +/* 32 total, but last register is under non-consecutive SCOM address */ +#define MAX_MCBIST_TESTS 28 +#define MAX_MCBIST_TEST_REGS (MAX_MCBIST_TESTS / MCBIST_TESTS_PER_REG) + +/* + * TODO: if we were to run both MCBISTs in parallel, we would need separate + * instances of those... + */ +static uint64_t mcbist_memreg_cache; +static unsigned tests; + +#define ECC_MODE 0x0008 +#define DONE 0x0004 + +enum data_mode +{ + // MCBIST test data modes + FIXED_DATA_MODE = 0x0000, + RAND_FWD_MODE = 0x0010, + RAND_REV_MODE = 0x0020, + RAND_FWD_MAINT = 0x0030, + RAND_REV_MAINT = 0x0040, + DATA_EQ_ADDR = 0x0050, + ROTATE_LEFT_MODE = 0x0060, + ROTATE_RIGHT_MODE = 0x0070, +}; + +enum op_type +{ + WRITE = 0x0000, // fast, with no concurrent traffic + READ = 0x1000, // fast, with no concurrent traffic + READ_WRITE = 0x2000, + WRITE_READ = 0x3000, + READ_WRITE_READ = 0x4000, + READ_WRITE_WRITE = 0x5000, + RAND_SEQ = 0x6000, + READ_READ_WRITE = 0x8000, + SCRUB_RRWR = 0x9000, + STEER_RW = 0xA000, + ALTER = 0xB000, // (W) + DISPLAY = 0xC000, // (R, slow) + CCS_EXECUTE = 0xF000, + + // if bits 9:11 (Data Mode bits) = 000 (bits 4:8 used to specify which subtest to go to) + // Refresh only cmd if bits 9:11 (Data Mode bits) /= 000 + GOTO_SUBTEST_N = 0x7000, +}; + +static void commit_mcbist_memreg_cache(int mcs_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + int reg = (tests - 1) / MCBIST_TESTS_PER_REG; + + if (reg < 0) + die("commit_mcbist_memreg_cache() called without adding tests first!\n"); + + if (reg >= MAX_MCBIST_TEST_REGS) + die("Too many MCBIST instructions added\n"); + + /* MC01.MCBIST.MBA_SCOMFIR.MCBMRQ */ + write_scom_for_chiplet(id, MCBMR0Q + reg, mcbist_memreg_cache); + mcbist_memreg_cache = 0; +} + +static void add_mcbist_test(int mcs_i, uint16_t test) +{ + int test_i = tests % MCBIST_TESTS_PER_REG; + if (test_i == 0 && tests != 0) + commit_mcbist_memreg_cache(mcs_i); + + /* This assumes cache is properly cleared. */ + mcbist_memreg_cache |= PPC_SHIFT(test, test_i*16 + 15); + tests++; +} + +/* + * ECC Scrubbing - theory + * + * RAM cannot hold the data indefinitely. It uses capacitors to hold the bits, + * which are constantly being drawn by leaks. To counteract this, memory has to + * be periodically refreshed, which recharges the capacitors. However, sometimes + * this happens too late, when state of capacitor has already changed (either + * electric charge was depleted, or capacitor gained additional potential from + * outside - rowhammer, radiation) up to the point where it passes the threshold + * and 0 becomes 1 or vice versa. Refresh command in that case would only make + * "borderline 1" into "strong 1", so it won't be able to fix the problem. This + * is where ECC comes in. + * + * ECC is limited in number of changed bits it can fix and detect. Because of + * that it is important that ECC is checked and possible errors are corrected + * before too many bits have flipped, and corrected values are written back to + * RAM. This is done by hardware, without software's interaction, but it can be + * informed that ECC error has happened (machine check exception). + * + * ECC is checked every time data in RAM is accessed. To check every part of RAM + * even when CPU doesn't need to read it, memory controller does the accesses in + * the background. This is called ECC scrubbing. + * + * Note that it is enough for MC to just send read commands. When everything is + * correct, data is still written back to DRAM because reading operation is + * destructive - capacitors are discharged when read and have to be charged + * again. This happens internally in DRAM, there is no need to send that data + * through the memory bus when DRAM already has it. If there was an error, MC + * automatically sends corrected data to be written. + * + * ECC scrubbing happens between RAM and MC. CPU doesn't participate in this + * process, but it may be slowed down on memory intensive operations because + * some of the bandwidth is used for scrubbing. + * + * TL;DR: ECC scrub is read operation with discarded results. + */ +static void add_scrub(int mcs_i, int port_dimm) +{ + uint16_t test = READ | ECC_MODE | (port_dimm << 9); + add_mcbist_test(mcs_i, test); +} + +static void add_fixed_pattern_write(int mcs_i, int port_dimm) +{ + /* Use ALTER instead of WRITE to use maintenance pattern. ALTER is slow. */ + uint16_t test = WRITE | FIXED_DATA_MODE | ECC_MODE | (port_dimm << 9); + add_mcbist_test(mcs_i, test); +} + +/* +static void add_random_pattern_write(int port_dimm) +{ + uint16_t test = WRITE | RAND_FWD_MAINT | ECC_MODE | (port_dimm << 9); + add_mcbist_test(test); +} +*/ + +/* TODO: calculate initial delays and timeouts */ +static void mcbist_execute(int mcs_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + /* This is index of last instruction, not the new one. */ + int test_i = (tests - 1) % MCBIST_TESTS_PER_REG; + uint64_t val; + + /* + * Nothing to do. Note that status register won't report "done", or will + * report state of previous program instead. According to docs this bits + * are writable, do we want to set them to simplify things? + * + * Another possibility would be to start MCBIST with single no-op test (goto + * with DONE bit set), but this may unnecessarily make things slower. + */ + if (tests == 0) + return; + + /* Check if in progress */ + /* TODO: we could force it to stop, but dying will help with debugging */ + if ((val = read_scom_for_chiplet(id, MCB_CNTLSTATQ)) & PPC_BIT(MCB_CNTLSTATQ_MCB_IP)) + die("MCBIST in progress already (%#16.16llx), this shouldn't happen\n", val); + + /* + * Contrary to CCS, we don't add no-op instruction here. DONE bit has to be + * set for instruction that is already present. Perhaps DONE is poor name, + * is tells that MCBIST should stop after this test, but this is how it is + * named in the documentation. + */ + mcbist_memreg_cache |= PPC_BIT(13 + test_i*16); + commit_mcbist_memreg_cache(mcs_i); + + /* MC01.MCBIST.MBA_SCOMFIR.MCB_CNTLQ + * [0] MCB_CNTLQ_MCB_START + */ + scom_and_or_for_chiplet(id, MCB_CNTLQ, ~0, PPC_BIT(MCB_CNTLQ_MCB_START)); + + /* Wait for MCBIST to start. Test for IP and DONE, it may finish early. */ + if (((val = read_scom_for_chiplet(id, MCB_CNTLSTATQ)) & + (PPC_BIT(MCB_CNTLSTATQ_MCB_IP) | PPC_BIT(MCB_CNTLSTATQ_MCB_DONE))) == 0) { + /* + * TODO: how long do we want to wait? Hostboot uses 10*100us polling, + * but so far it seems to always be already started on the first read. + */ + udelay(1); + if (((val = read_scom_for_chiplet(id, MCB_CNTLSTATQ)) & + (PPC_BIT(MCB_CNTLSTATQ_MCB_IP) | PPC_BIT(MCB_CNTLSTATQ_MCB_DONE))) == 0) + die("MCBIST failed (%#16.16llx) to start twice\n", val); + + /* Check if this is needed. Do not move before test, it impacts delay! */ + printk(BIOS_INFO, "MCBIST started after delay\n"); + } + + tests = 0; +} + +/* + * FIXME: 0x07012300[10] MCBIST_PROGRAM_COMPLETE should be checked instead. It + * gets set when MCBIST is paused, while 0x070123DC[0] IP stays on in that case. + * This may become a problem for 3DS DIMMs. + */ +static int mcbist_is_done(int mcs_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + uint64_t val = val = read_scom_for_chiplet(id, MCB_CNTLSTATQ); + + /* Still in progress */ + if (val & PPC_BIT(MCB_CNTLSTATQ_MCB_IP)) + return 0; + + /* Not sure if DONE and FAIL can be set at the same time, check FAIL first */ + if ((val & PPC_BIT(MCB_CNTLSTATQ_MCB_FAIL)) || val == 0) + die("MCBIST error (%#16.16llx)\n"); + + /* Finished */ + if (val & PPC_BIT(MCB_CNTLSTATQ_MCB_DONE)) + return 1; + + /* Is it even possible to get here? */ + return 0; +} + +/***************************MCBIST end*******************************/ + +/* + * 14.1 mss_memdiag: Mainstore Pattern Testing + * + * - The following step documents the generalities of this step + * - In FW PRD will control mem diags via interrupts. It doesn't use + * mss_memdiags.C directly but the HWP subroutines + * - In cronus it will execute mss_memdiags.C directly + * b) p9_mss_memdiags.C (mcbist)--Nimbus + * c) p9_mss_memdiags.C (mba) -- Cumulus + * - Prior to running this procedure will apply known DQ bad bits to prevent + * them from participating in training. This information is extracted from + * the bad DQ attribute and applied to Hardware + * - Nimbus uses the mcbist engine + * - Still supports superfast read/init/scrub + * - Cumulus/Centaur uses the scrub engine + * - Modes: + * - Minimal: Write-only with 0's + * - Standard: Write of 0's followed by a Read + * - Medium: Write-followed by Read, 4 patterns, last of 0's + * - Max: Write-followed by Read, 9 patterns, last of 0's + * - Run on the host + * - This procedure will update the bad DQ attribute for each dimm based on + * its findings + * - At the end of this procedure sets FIR masks correctly for runtime + * analysis + * - All subsequent repairs are considered runtime issues + */ +void istep_14_1(void) +{ + int mcs_i, mca_i; + printk(BIOS_EMERG, "starting istep 14.1\n"); + report_istep(14, 1); + + for (mcs_i = 0; mcs_i < MCS_PER_PROC; mcs_i++) { + if (!mem_data.mcs[mcs_i].functional) + continue; + + /* + * FIXME: add testing for chipkill + * + * Testing touches bad DQ registers. This step also configures MC to + * deal with bad nibbles/DQs - see can_recover() in 13.11. It repeats, + * to some extent, training done in 13.12 which is TODO. Following the + * assumptions made in previous isteps, skip this for now. + */ + init_mcbist(mcs_i); + + /* + * Add subtests. + * + * At the very minimum one pattern write is required, otherwise RAM will + * have random data, which most likely will throw unrecoverable errors + * because ECC is also random. + * + * Scrubbing may throw errors when address mapping is wrong even when + * maintenance pattern write can succeed for the same configuration. + */ + for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { + mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + int dimm; + if (!mca->functional) + continue; + + for (dimm = 0; dimm < DIMMS_PER_MCA; dimm++) { + if (!mca->dimm[dimm].present) + continue; + + add_fixed_pattern_write(mcs_i, mca_i*2 + dimm); + /* + * Hostboot uses separate program for scrub due to different + * pausing conditions. Having it in the same program seems to + * be working. + */ + add_scrub(mcs_i, mca_i*2 + dimm); + } + } + + /* + * TODO: it writes whole RAM, this will take loooooong time. We can + * easily start second MCBIST while this is running. This would get more + * complicated for more patterns, but it still should be doable without + * interrupts reporting completion. + * + * Also, under right circumstances*, it should be possible to use + * broadcast mode for writing to all DIMMs simultaneously. + * + * *) Proper circumstances are: + * - every port has the same number of DIMMs (or no DIMMs at all) + * - every DIMM has the same: + * - rank configuration + * - number of row and column bits + * - width (and density, but this is implied by previous + * requirements) + * - module family (but we don't support anything but RDIMM anyway) + */ + mcbist_execute(mcs_i); + } + + long total_time = 0; + + for (mcs_i = 0; mcs_i < MCS_PER_PROC; mcs_i++) { + if (!mem_data.mcs[mcs_i].functional) + continue; + + /* + * When there is no other activity on the bus, this should take roughly + * (total RAM size under MCS / transfer rate) * number of subtests. + * + * TODO: for the second MCS we should account for the time the first MCS + * took to finish it's tasks. If the second MCS has less RAM (counted in + * address bits used) it is possible that it finishes before the first + * one does. In that case the amount of time required for second MCS + * would be lost. Maybe we could get fancy and in wait_us() check for + * (mcbist_is_done(0) || mcbist_is_done(1)) instead? Maybe even unmask + * FIRs and set FIFO mode off inside mcbist_is_done()? + */ + long time = wait_us(1000*1000*60, (udelay(1), mcbist_is_done(mcs_i))); + + /* TODO: dump error/status registers on failure */ + if (!time) + die("MCBIST%d times out (%#16.16llx)\n", mcs_i, + read_scom_for_chiplet(mcs_ids[mcs_i], MCB_CNTLSTATQ)); + + total_time += time; + printk(BIOS_ERR, "MCBIST%d took %ld us\n", mcs_i, total_time); + + /* Unmask mainline FIRs. */ + fir_unmask(mcs_i); + + /* Turn off FIFO mode to improve performance. */ + set_fifo_mode(mcs_i, 0); + } + + printk(BIOS_EMERG, "ending istep 14.1\n"); +} diff --git a/src/soc/ibm/power9/romstage.c b/src/soc/ibm/power9/romstage.c index 2229c198017..715f93a015e 100644 --- a/src/soc/ibm/power9/romstage.c +++ b/src/soc/ibm/power9/romstage.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -347,6 +348,8 @@ void main(void) report_istep(13, 12); // optional, not yet implemented istep_13_13(); + istep_14_1(); + /* Test if SCOM still works. Maybe should check also indirect access? */ printk(BIOS_DEBUG, "0xF000F = %llx\n", read_scom(0xF000F)); From c34b08c96df94201ef6e11513160a2c69fb431d4 Mon Sep 17 00:00:00 2001 From: Krystian Hebel Date: Tue, 11 May 2021 14:19:17 +0200 Subject: [PATCH 039/213] soc/power9/istep_14_1.c: add option to skip initial ECC scrubbing Enabling this option will skip initial ECC scrubbing that is normally done right after memory is written with an initial pattern. This is not the same as clearing memory after init, ECC memory always has to be filled with any value in order to make ECC correct. Selecting this option almost halves the time needed for memory initialization (istep 14.1). Signed-off-by: Krystian Hebel Change-Id: I1919c8f2219df6eba836d67338d623149c4acc58 --- src/soc/ibm/power9/Kconfig | 13 ++++++++++++- src/soc/ibm/power9/istep_14_1.c | 3 ++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/src/soc/ibm/power9/Kconfig b/src/soc/ibm/power9/Kconfig index a94c41d606c..bb12528bd70 100644 --- a/src/soc/ibm/power9/Kconfig +++ b/src/soc/ibm/power9/Kconfig @@ -5,5 +5,16 @@ config CPU_IBM_POWER9 Talos II platform. if CPU_IBM_POWER9 - # nothing here yet + +config SKIP_INITIAL_ECC_SCRUB + bool "Skip initial ECC scrubbing" + default n + help + Enabling this option will skip initial ECC scrubbing that is normally + done right after memory is written with an initial pattern. This is + not the same as clearing memory after init, ECC memory always has to + be filled with any value in order to make ECC correct. + Selecting this option almost halves the time needed for memory + initialization (istep 14.1). + endif diff --git a/src/soc/ibm/power9/istep_14_1.c b/src/soc/ibm/power9/istep_14_1.c index 575ddb0a024..94735731900 100644 --- a/src/soc/ibm/power9/istep_14_1.c +++ b/src/soc/ibm/power9/istep_14_1.c @@ -691,7 +691,8 @@ void istep_14_1(void) * pausing conditions. Having it in the same program seems to * be working. */ - add_scrub(mcs_i, mca_i*2 + dimm); + if (!CONFIG(SKIP_INITIAL_ECC_SCRUB)) + add_scrub(mcs_i, mca_i*2 + dimm); } } From 7f447f5cbb1d1fe36fad9fc65c83327deac0ab8d Mon Sep 17 00:00:00 2001 From: Krystian Hebel Date: Wed, 5 May 2021 09:05:41 +0200 Subject: [PATCH 040/213] soc/power9/istep_14_5.c: implementation 14.5 proc_setup_bars: Setup Memory BARs a) p9_mss_setup_bars.C (proc chip) -- Nimbus b) p9c_mss_setup_bars.C (proc chip) -- Cumulus - Same HWP interface for both Nimbus and Cumulus, input target is TARGET_TYPE_PROC_CHIP; HWP is to figure out if target is a Nimbus (MCS) or Cumulus (MI) internally. - Prior to setting the memory bars on each processor chip, this procedure needs to set the centaur security protection bit - TCM_CHIP_PROTECTION_EN_DC is SCOM Addr 0x03030000 - TCN_CHIP_PROTECTION_EN_DC is SCOM Addr 0x02030000 - Both must be set to protect Nest and Mem domains - Based on system memory map - Each MCS has its mirroring and non mirrored BARs - Set the correct checkerboard configs. Note that chip flushes to checkerboard - need to disable memory bar on slave otherwise base flush values will ack all memory accesses c) p9_setup_bars.C - Sets up Powerbus/MCD, L3 BARs on running core - Other cores are setup via winkle images - Setup dSMP and PCIe Bars - Setup PCIe outbound BARS (doing stores/loads from host core) - Addresses that PCIE responds to on powerbus (PCI init 1-7) - Informing PCIe of the memory map (inbound) - PCI Init 8-15 - Set up Powerbus Epsilon settings - Code is still running out of L3 cache - Use this procedure to setup runtime epsilon values - Must be done before memory is viable Signed-off-by: Krystian Hebel Change-Id: I6ebf16b171d0be0daf0776b2bbf6e8fff4ed77ce --- src/include/cpu/power/istep_14.h | 1 + src/soc/ibm/power9/Makefile.inc | 1 + src/soc/ibm/power9/istep_14_5.c | 338 +++++++++++++++++++++++++++++++ src/soc/ibm/power9/romstage.c | 1 + 4 files changed, 341 insertions(+) create mode 100644 src/soc/ibm/power9/istep_14_5.c diff --git a/src/include/cpu/power/istep_14.h b/src/include/cpu/power/istep_14.h index 709592b3c00..5b79655e4a7 100644 --- a/src/include/cpu/power/istep_14.h +++ b/src/include/cpu/power/istep_14.h @@ -4,5 +4,6 @@ #define CPU_PPC64_ISTEP_14_H void istep_14_1(void); +void istep_14_5(void); #endif /* CPU_PPC64_ISTEP_14_H */ diff --git a/src/soc/ibm/power9/Makefile.inc b/src/soc/ibm/power9/Makefile.inc index 922ae5372e4..1ccea084d93 100644 --- a/src/soc/ibm/power9/Makefile.inc +++ b/src/soc/ibm/power9/Makefile.inc @@ -17,6 +17,7 @@ romstage-y += istep_13_10.c romstage-y += istep_13_11.c romstage-y += istep_13_13.c romstage-y += istep_14_1.c +romstage-y += istep_14_5.c romstage-y += i2c.c romstage-y += ccs.c ramstage-y += chip.c diff --git a/src/soc/ibm/power9/istep_14_5.c b/src/soc/ibm/power9/istep_14_5.c new file mode 100644 index 00000000000..8909988dda6 --- /dev/null +++ b/src/soc/ibm/power9/istep_14_5.c @@ -0,0 +1,338 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include + +#include "istep_13_scom.h" + +/* + * Reset memory controller configuration written by SBE. + * Close the MCS acker before enabling the real memory bars. + * + * Some undocumented registers, again. The registers use a stride I haven't seen + * before (0x80), not sure if those are MCSs (including those not present on P9), + * magic MCAs or something totally different. Hostboot writes to all possible + * registers, regardless of how many ports/slots are populated. + * + * All register and field names come from code and comments only, except for the + * first one. + */ +static void revert_mc_hb_dcbz_config(void) +{ + int mcs_i, i; + uint64_t val; + const uint64_t mul = 0x80; + + for (mcs_i = 0; mcs_i < MCS_PER_PROC; mcs_i++) { + chiplet_id_t nest = mcs_to_nest[mcs_ids[mcs_i]]; + + /* + * Bit for MCS2/3 is documented, but for MCS0/1 it is "unused". Use what + * Hostboot uses - bit 10 for MCS0/1 and bit 9 for MCS2/3. + */ + /* TP.TCNx.Nx.CPLT_CTRL1, x = {1,3} */ + val = read_scom_for_chiplet(nest, NEST_CPLT_CTRL1); + if ((mcs_i == 0 && val & PPC_BIT(10)) || + (mcs_i == 1 && val & PPC_BIT(9))) + continue; + + for (i = 0; i < 2; i++) { + /* MCFGP -- mark BAR invalid & reset grouping configuration fields + MCS_n_MCFGP // undocumented, 0x0501080A, 0x0501088A, 0x0301080A, 0x0301088A for MCS{0-3} + [0] VALID = 0 + [1-4] MC_CHANNELS_PER_GROUP = 0 + [5-7] CHANNEL_0_GROUP_MEMBER_IDENTIFICATION = 0 // CHANNEL_1_GROUP_MEMBER_IDENTIFICATION not cleared? + [13-23] GROUP_SIZE = 0 + */ + scom_and_or_for_chiplet(nest, 0x0501080A + i * mul, + ~(PPC_BITMASK(0, 7) | PPC_BITMASK(13, 23)), + 0); + + /* MCMODE1 -- enable speculation, cmd bypass, fp command bypass + MCS_n_MCMODE1 // undocumented, 0x05010812, 0x05010892, 0x03010812, 0x03010892 + [32] DISABLE_ALL_SPEC_OPS = 0 + [33-51] DISABLE_SPEC_OP = 0x40 // bit 45 (called DCBF_BIT in code) set because of HW414958 + [54-60] DISABLE_COMMAND_BYPASS = 0 + [61] DISABLE_FP_COMMAND_BYPASS = 0 + */ + scom_and_or_for_chiplet(nest, 0x05010812 + i * mul, + ~(PPC_BITMASK(32, 51) | PPC_BITMASK(54, 61)), + PPC_SHIFT(0x40, 51)); + + /* MCS_MCPERF1 -- enable fast path + MCS_n_MCPERF1 // undocumented, 0x05010810, 0x05010890, 0x03010810, 0x03010890 + [0] DISABLE_FASTPATH = 0 + */ + scom_and_or_for_chiplet(nest, 0x05010810 + i * mul, + ~PPC_BIT(0), + 0); + + /* Re-mask MCFIR. We want to ensure all MCSs are masked until the + * BARs are opened later during IPL. + MCS_n_MCFIRMASK_OR // undocumented, 0x05010805, 0x05010885, 0x03010805, 0x03010885 + [all] 1 + */ + write_scom_for_chiplet(nest, 0x05010805 + i * mul, ~0); + } + } +} + +/* + * TODO: right now, every port is a separate group. This is easier to code, but + * will impact performance due to no interleaving. + * + * Even though documentation (POWER9 Processor User's Manual) says that only the + * total amount of memory behind an MCU has to be the same, Hostboot doesn't + * group 1Rx4 with 2Rx8 (both have 16GB), at least if they are on the different + * sides of CPU. Case when they are on the same side was not tested yet. + * + * If that means MCAs from different sides cannot be grouped, groups bigger than + * 2 ports are not possible, at least for Talos. + * + * TODO2: note that this groups _ports_, not _DIMMs_. One implication is that + * total amount of memory doesn't have to be a power of 2 (different densities). + * Group sizes written to the register however are based on log2 of size. This + * means that either there will be a hole or some RAM won't be mapped. We do not + * have a way of testing it right now, all our DIMMs have 8Gb density. + */ +struct mc_group { + /* Multiple MCAs can be in one group, but not the other way around. */ + uint8_t port_mask; + /* Encoded, 4GB = 0, 8GB = 1, 16GB = 3, 32GB = 7 ... */ + uint8_t group_size; +}; + +/* Without proper documentation it's hard to tell if this is correct. */ +/* The following array is MCS_MCFGP for MCA0 and MCS_MCFGPM for MCA1: + * MCS_MCFGP // undocumented, 0x0501080A + * [all] 0 + * [0] VALID + * [1-4] MC_CHANNELS_PER_GROUP (*) + * [5-7] CHANNEL_0_GROUP_MEMBER_IDENTIFICATION (*) + * [8-10] CHANNEL_1_GROUP_MEMBER_IDENTIFICATION (*) + * [13-23] GROUP_SIZE + * [24-47] GROUP_BASE_ADDRESS + * + * MCS_MCFGPM // undocumented, 0x0501080C + * [all] 0 + * [0] VALID + * [13-23] GROUP_SIZE + * [24-47] GROUP_BASE_ADDRESS + * + * Fields marked with (*) are used only when there is more than 1 MCA in a group. + */ +static uint64_t mcfgp_regs[MCS_PER_PROC][MCA_PER_MCS]; + +/* Encodes size and keeps groups[] sorted. */ +static void add_group(struct mc_group groups[MCA_PER_PROC], int size, uint8_t mask) +{ + int i; + /* + * Size calculations are correct for size that is a power of 2. I have no + * idea what is the proper thing to do if it isn't. + */ + struct mc_group in = {mask, (size - 1) >> 2}; + + if (size & (size - 1)) + die("Size of group %#2.2x (%d GB) is not a power of 2\n", mask, size); + + for (i = 0; i < MCA_PER_PROC; i++) { + struct mc_group tmp = groups[i]; + + if (tmp.group_size < in.group_size) { + groups[i] = in; + /* Shift the rest of elements */ + in = tmp; + } + + /* Current element was empty */ + if (tmp.port_mask == 0) + break; + } + + if (in.port_mask != 0) + die("Tried to add more groups than possible\n"); +} + +/* TODO: make groups with > 1 MCA possible */ +static void fill_groups(void) +{ + int mcs_i, mca_i, i; + struct mc_group groups[MCA_PER_PROC] = {0}; + /* This is in 4GB units, as expected by registers. */ + uint32_t cur_ba = 0; + + for (mcs_i = 0; mcs_i < MCS_PER_PROC; mcs_i++) { + if (!mem_data.mcs[mcs_i].functional) + continue; + + for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { + mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + + if (!mca->functional) + continue; + + /* + * Use the same format as in Hostboot, in case there can be more + * than 2 MCAs per MCS. + * mask = (MCS0/MCA0, MCS0/MCA1, 0, 0, MCS1/MCA0, MCS1/MCA1, 0, 0) + */ + uint8_t mask = PPC_BIT(mcs_i * 4 + mca_i) >> 56; + /* Non-present DIMM will have a size of 0. */ + add_group(groups, mca->dimm[0].size_gb + mca->dimm[1].size_gb, mask); + } + } + + /* Now that all the groups are sorted by size, we can set base addresses. */ + for (i = 0; i < MCA_PER_PROC; i++) { + uint8_t mask = groups[i].port_mask; + if (mask == 0) + break; + + /* A reminder for whoever implements this in add_group() but not here. */ + if (mask & (mask - 1)) + die("Multiple MCs in a group are not supported yet\n"); + + /* + * Get MCS and MCA from mask, we expect bigger groups in the future. No + * else-ifs, bigger groups must set multiple registers (though that is + * not enough, there are also IDs to be set in MCS_MCFGP). + */ + if (mask & 0x80) { + /* MCS = 0, MCA = 0 */ + mcfgp_regs[0][0] = PPC_BIT(0) | PPC_SHIFT(groups[i].group_size, 23) | + PPC_SHIFT(cur_ba, 47); + } + if (mask & 0x40) { + /* MCS = 0, MCA = 1 */ + mcfgp_regs[0][1] = PPC_BIT(0) | PPC_SHIFT(groups[i].group_size, 23) | + PPC_SHIFT(cur_ba, 47); + } + if (mask & 0x08) { + /* MCS = 1, MCA = 0 */ + mcfgp_regs[1][0] = PPC_BIT(0) | PPC_SHIFT(groups[i].group_size, 23) | + PPC_SHIFT(cur_ba, 47); + } + if (mask & 0x04) { + /* MCS = 1, MCA = 1 */ + mcfgp_regs[1][1] = PPC_BIT(0) | PPC_SHIFT(groups[i].group_size, 23) | + PPC_SHIFT(cur_ba, 47); + } + + cur_ba += groups[i].group_size + 1; + } + /* + * This would be a good place to check if we passed the start of PCIe MMIO + * range (2TB). In that case we probably should configure this memory hole + * somehow (MCFGPA?). + */ +} + +/* + * This function is different than all previous FIR unmasking. It doesn't touch + * Action0 register. It also doesn't modify Action1, it just writes the value + * discarding the old one. As these registers are not documented, I can't even + * tell whether it sets checkstop, recoverable error or something else. + */ +static void fir_unmask(int mcs_i) +{ + chiplet_id_t nest = mcs_to_nest[mcs_ids[mcs_i]]; + /* Stride discovered by trial and error due to lack of documentation. */ + uint64_t mul = 0x80; + + /* MCS_MCFIRACT1 // undocumented, 0x05010807 + [all] 0 + [0] MC_INTERNAL_RECOVERABLE_ERROR = 1 + [8] COMMAND_LIST_TIMEOUT = 1 + */ + write_scom_for_chiplet(nest, 0x05010807 + mcs_i * mul, + PPC_BIT(0) | PPC_BIT(8)); + + /* MCS_MCFIRMASK (AND) // undocumented, 0x05010804 + [all] 1 + [0] MC_INTERNAL_RECOVERABLE_ERROR = 0 + [1] MC_INTERNAL_NONRECOVERABLE_ERROR = 0 + [2] POWERBUS_PROTOCOL_ERROR = 0 + [4] MULTIPLE_BAR = 0 + [5] INVALID_ADDRESS = 0 + [8] COMMAND_LIST_TIMEOUT = 0 + */ + write_scom_for_chiplet(nest, 0x05010804 + mcs_i * mul, + ~(PPC_BIT(0) | PPC_BIT(1) | PPC_BIT(2) | + PPC_BIT(4) | PPC_BIT(5) | PPC_BIT(8))); +} + +static void mcd_fir_mask(void) +{ + /* These are set always for N1 chiplet only. */ + write_scom_for_chiplet(N1_CHIPLET_ID, MCD1_FIR_MASK_REG, ~0); + write_scom_for_chiplet(N1_CHIPLET_ID, MCD0_FIR_MASK_REG, ~0); +} + +/* + * 14.5 proc_setup_bars: Setup Memory BARs + * + * a) p9_mss_setup_bars.C (proc chip) -- Nimbus + * b) p9c_mss_setup_bars.C (proc chip) -- Cumulus + * - Same HWP interface for both Nimbus and Cumulus, input target is + * TARGET_TYPE_PROC_CHIP; HWP is to figure out if target is a Nimbus (MCS) + * or Cumulus (MI) internally. + * - Prior to setting the memory bars on each processor chip, this procedure + * needs to set the centaur security protection bit + * - TCM_CHIP_PROTECTION_EN_DC is SCOM Addr 0x03030000 + * - TCN_CHIP_PROTECTION_EN_DC is SCOM Addr 0x02030000 + * - Both must be set to protect Nest and Mem domains + * - Based on system memory map + * - Each MCS has its mirroring and non mirrored BARs + * - Set the correct checkerboard configs. Note that chip flushes to + * checkerboard + * - need to disable memory bar on slave otherwise base flush values will + * ack all memory accesses + * c) p9_setup_bars.C + * - Sets up Powerbus/MCD, L3 BARs on running core + * - Other cores are setup via winkle images + * - Setup dSMP and PCIe Bars + * - Setup PCIe outbound BARS (doing stores/loads from host core) + * - Addresses that PCIE responds to on powerbus (PCI init 1-7) + * - Informing PCIe of the memory map (inbound) + * - PCI Init 8-15 + * - Set up Powerbus Epsilon settings + * - Code is still running out of L3 cache + * - Use this procedure to setup runtime epsilon values + * - Must be done before memory is viable + */ +void istep_14_5(void) +{ + int mcs_i; + printk(BIOS_EMERG, "starting istep 14.5\n"); + report_istep(14, 5); + + /* Start MCS reset */ + revert_mc_hb_dcbz_config(); + + fill_groups(); + + for (mcs_i = 0; mcs_i < MCS_PER_PROC; mcs_i++) { + chiplet_id_t nest = mcs_to_nest[mcs_ids[mcs_i]]; + + if (!mem_data.mcs[mcs_i].functional) + continue; + + fir_unmask(mcs_i); + + /* + * More undocumented registers. First two are described before + * 'mcfgp_regs', last two are for setting up memory hole and SMF, they + * are unused now. + */ + write_scom_for_chiplet(nest, 0x0501080A, mcfgp_regs[mcs_i][0]); + write_scom_for_chiplet(nest, 0x0501080C, mcfgp_regs[mcs_i][1]); + write_scom_for_chiplet(nest, 0x0501080B, 0); + write_scom_for_chiplet(nest, 0x0501080D, 0); + } + + mcd_fir_mask(); + + printk(BIOS_EMERG, "ending istep 14.5\n"); +} diff --git a/src/soc/ibm/power9/romstage.c b/src/soc/ibm/power9/romstage.c index 715f93a015e..43693ebe0ba 100644 --- a/src/soc/ibm/power9/romstage.c +++ b/src/soc/ibm/power9/romstage.c @@ -349,6 +349,7 @@ void main(void) istep_13_13(); istep_14_1(); + istep_14_5(); /* Test if SCOM still works. Maybe should check also indirect access? */ printk(BIOS_DEBUG, "0xF000F = %llx\n", read_scom(0xF000F)); From 4de777b7def12cd05418776ab19ee72636cf0d14 Mon Sep 17 00:00:00 2001 From: Igor Bagnucki Date: Mon, 26 Apr 2021 14:00:17 +0200 Subject: [PATCH 041/213] soc/power9/istep_14_2.c: add istep code implementation 14.2 mss_thermal_init: Initialize the thermal sensor a) mss_thermal_init.C - Cumulus/Centaur only - Called on Centaur target, - NOTE: On Nimbus OCC has to directly read the thermals via the I2C Masters (shared with Host code) - Use lock HW and FW algorithm between OCC, Hostboot/OPAL/PHYP - Setup and configure I2C thermal sensor on dimms - Configure and start centaur thermal cache - Configure and start the OCC cache - Disable safe mode throttles - Will cause memory to go to runtime emergency throttles - When OCC starts polling OCC cache will revert to runtime settings b) p9_throttle_sync.C - Must be issued on all P9s, can only be issued after ALL centaurs on given p9 have thermal init complete (can also loop at the end of all centaurs) - Same HWP interface for both Nimbus and Cumulus, input target is TARGET_TYPE_PROC_CHIP; HWP is to figure out if target is a Nimbus (MCS) or Cumulus (MI) internally. - Triggers sync command from MCS to actually load the throttle values into the MBA/MCA Signed-off-by: Igor Bagnucki Change-Id: Ib9d95b8058c93e8fb987c9114bbe8d59a0d46f61 --- src/include/cpu/power/istep_14.h | 1 + src/soc/ibm/power9/Makefile.inc | 1 + src/soc/ibm/power9/istep_14_2.c | 62 ++++++++++++++++++++++++++++++++ src/soc/ibm/power9/romstage.c | 1 + 4 files changed, 65 insertions(+) create mode 100644 src/soc/ibm/power9/istep_14_2.c diff --git a/src/include/cpu/power/istep_14.h b/src/include/cpu/power/istep_14.h index 5b79655e4a7..5b0b778799b 100644 --- a/src/include/cpu/power/istep_14.h +++ b/src/include/cpu/power/istep_14.h @@ -4,6 +4,7 @@ #define CPU_PPC64_ISTEP_14_H void istep_14_1(void); +void istep_14_2(void); void istep_14_5(void); #endif /* CPU_PPC64_ISTEP_14_H */ diff --git a/src/soc/ibm/power9/Makefile.inc b/src/soc/ibm/power9/Makefile.inc index 1ccea084d93..fb5998a1196 100644 --- a/src/soc/ibm/power9/Makefile.inc +++ b/src/soc/ibm/power9/Makefile.inc @@ -17,6 +17,7 @@ romstage-y += istep_13_10.c romstage-y += istep_13_11.c romstage-y += istep_13_13.c romstage-y += istep_14_1.c +romstage-y += istep_14_2.c romstage-y += istep_14_5.c romstage-y += i2c.c romstage-y += ccs.c diff --git a/src/soc/ibm/power9/istep_14_2.c b/src/soc/ibm/power9/istep_14_2.c new file mode 100644 index 00000000000..6a973b7c548 --- /dev/null +++ b/src/soc/ibm/power9/istep_14_2.c @@ -0,0 +1,62 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include +#include + +#define MCS_MCMODE0 0x5010811 +#define MCS_MCSYNC 0x5010815 +#define MCA_MBA_FARB3Q 0x7010916 + +#define MCS_MCSYNC_SYNC_GO_CH0 16 +#define SUPER_SYNC_BIT 14 +#define MBA_REFRESH_SYNC_BIT 8 +#define MCS_MCMODE0_DISABLE_MC_SYNC 27 +#define MCS_MCMODE0_DISABLE_MC_PAIR_SYNC 28 + +static void thermal_init(void) +{ + for (size_t mcs_i = 0; mcs_i < MCS_PER_PROC; ++mcs_i) { + for (size_t mca_i = 0; mca_i < MCA_PER_MCS; ++mca_i) { + mca_and_or(mcs_ids[mcs_i], mca_i, MCA_MBA_FARB3Q, + ~PPC_BITMASK(0, 45), + PPC_BIT(10) | PPC_BIT(25) | PPC_BIT(37)); + } + scom_and_for_chiplet(mcs_to_nest[mcs_ids[mcs_i]], + MCS_MCMODE0 + 0x80 * mcs_i, + PPC_BIT(21)); + } +} + +static void prog_mc_mode0(chiplet_id_t nest_target, size_t index) +{ + uint64_t mask = PPC_BIT(MCS_MCMODE0_DISABLE_MC_SYNC) + | PPC_BIT(MCS_MCMODE0_DISABLE_MC_PAIR_SYNC); + uint64_t data = PPC_BIT(MCS_MCMODE0_DISABLE_MC_SYNC) + | PPC_BIT(MCS_MCMODE0_DISABLE_MC_PAIR_SYNC); + scom_and_or_for_chiplet(nest_target, MCS_MCMODE0 + 0x80 * index, ~mask, + data & mask); +} + +static void throttle_sync(void) +{ + for (size_t mcs_i = 0; mcs_i < MCS_PER_PROC; ++mcs_i) + prog_mc_mode0(mcs_to_nest[mcs_ids[mcs_i]], mcs_i); + scom_and_for_chiplet(N3_CHIPLET_ID, MCS_MCSYNC, + ~PPC_BIT(MCS_MCSYNC_SYNC_GO_CH0)); + scom_and_or_for_chiplet(N3_CHIPLET_ID, MCS_MCSYNC, ~PPC_BIT(SUPER_SYNC_BIT), + PPC_BITMASK(0, 16)); + scom_and_for_chiplet(N3_CHIPLET_ID, MCS_MCSYNC, ~PPC_BIT(MBA_REFRESH_SYNC_BIT)); +} + +void istep_14_2(void) +{ + report_istep(14, 2); + printk(BIOS_EMERG, "starting istep 14.2\n"); + + thermal_init(); + throttle_sync(); + + printk(BIOS_EMERG, "ending istep 14.2\n"); +} diff --git a/src/soc/ibm/power9/romstage.c b/src/soc/ibm/power9/romstage.c index 43693ebe0ba..f42b9b1d88a 100644 --- a/src/soc/ibm/power9/romstage.c +++ b/src/soc/ibm/power9/romstage.c @@ -349,6 +349,7 @@ void main(void) istep_13_13(); istep_14_1(); + istep_14_2(); istep_14_5(); /* Test if SCOM still works. Maybe should check also indirect access? */ From 8da0ad5590eb717555c7b775e2c5311363bbbe2d Mon Sep 17 00:00:00 2001 From: Krystian Hebel Date: Mon, 26 Apr 2021 09:01:04 +0200 Subject: [PATCH 042/213] mb/raptor-cs/talos-2: add timestamps and CBMEM initialization Signed-off-by: Krystian Hebel Change-Id: I0a36912aaaa9b3fd75f5f62ccd6d4ea689f7d066 --- src/arch/ppc64/Makefile.inc | 4 ---- src/arch/ppc64/stages.c | 3 +++ src/mainboard/raptor-cs/talos-2/mainboard.c | 13 ++++++++++--- src/soc/ibm/power9/romstage.c | 10 +++++++++- 4 files changed, 22 insertions(+), 8 deletions(-) diff --git a/src/arch/ppc64/Makefile.inc b/src/arch/ppc64/Makefile.inc index 8300ad693dc..9a89e00d73f 100644 --- a/src/arch/ppc64/Makefile.inc +++ b/src/arch/ppc64/Makefile.inc @@ -44,8 +44,6 @@ romstage-y += \ $(top)/src/lib/memmove.c \ $(top)/src/lib/memset.c -romstage-$(CONFIG_COLLECT_TIMESTAMPS) += timestamp.c - # Build the romstage $(objcbfs)/romstage.debug: $$(romstage-objs) @@ -76,8 +74,6 @@ ramstage-y += \ $(eval $(call create_class_compiler,rmodules,power8)) -ramstage-$(CONFIG_COLLECT_TIMESTAMPS) += timestamp.c - ramstage-srcs += src/mainboard/$(MAINBOARDDIR)/mainboard.c # Build the ramstage diff --git a/src/arch/ppc64/stages.c b/src/arch/ppc64/stages.c index 01b9efaba8d..6d5ae20a3c4 100644 --- a/src/arch/ppc64/stages.c +++ b/src/arch/ppc64/stages.c @@ -14,6 +14,7 @@ #include #include #include +#include void stage_entry(uintptr_t stage_arg) { @@ -23,6 +24,8 @@ void stage_entry(uintptr_t stage_arg) if (!ENV_ROMSTAGE_OR_BEFORE) _cbmem_top_ptr = stage_arg; + else + timestamp_init(read_spr(SPR_TB)); #if ENV_RAMSTAGE hrmor = read_spr(SPR_HRMOR); diff --git a/src/mainboard/raptor-cs/talos-2/mainboard.c b/src/mainboard/raptor-cs/talos-2/mainboard.c index b4d11efe4b4..ad779d9c5dd 100644 --- a/src/mainboard/raptor-cs/talos-2/mainboard.c +++ b/src/mainboard/raptor-cs/talos-2/mainboard.c @@ -9,9 +9,16 @@ static void mainboard_enable(struct device *dev) if (!dev) die("No dev0; die\n"); - /* Where does RAM live? */ - ram_resource_kb(dev, 0, 2048, 32768); - cbmem_recovery(0); + /* + * Smallest reported to be working (but not officially supported) DIMM is + * 4GB. This means that we always have at least as much available. Last + * 128MB of first 4GB are reserved for hostboot/coreboot - this is mostly + * dictated by HRMOR value. + * + * TODO: implement this properly for all RAM + */ + ram_resource_kb(dev, 0, 0, 4 * 1024 * 1024 - 128 * 1024); + reserved_ram_resource_kb(dev, 1, 4 * 1024 * 1024 - 128 * 1024, 128 * 1024); } struct chip_operations mainboard_ops = { diff --git a/src/soc/ibm/power9/romstage.c b/src/soc/ibm/power9/romstage.c index f42b9b1d88a..e1b157ea1f8 100644 --- a/src/soc/ibm/power9/romstage.c +++ b/src/soc/ibm/power9/romstage.c @@ -7,6 +7,8 @@ #include #include #include +#include +#include /* DIMM SPD addresses */ #define DIMM0 0x50 @@ -328,10 +330,13 @@ static void prepare_dimm_data(void) void main(void) { + timestamp_add_now(TS_ROMSTAGE_START); + console_init(); - vpd_pnor_main(); + timestamp_add_now(TS_INITRAM_START); + vpd_pnor_main(); prepare_dimm_data(); report_istep(13, 1); // no-op @@ -352,6 +357,8 @@ void main(void) istep_14_2(); istep_14_5(); + timestamp_add_now(TS_INITRAM_END); + /* Test if SCOM still works. Maybe should check also indirect access? */ printk(BIOS_DEBUG, "0xF000F = %llx\n", read_scom(0xF000F)); @@ -362,5 +369,6 @@ void main(void) if (read_scom(0xF000F) == 0xFFFFFFFFFFFFFFFF) die("SCOM stopped working, check FIRs, halting now\n"); + cbmem_initialize_empty(); run_ramstage(); } From 0c77f1bd56250d48983fc4afa5902184a3b99ff4 Mon Sep 17 00:00:00 2001 From: Krystian Hebel Date: Tue, 25 May 2021 18:32:32 +0200 Subject: [PATCH 043/213] soc/power9/timer.c: implement udelay() using Decrementer Exception Implementation that busy-reads Time Base register would be too slow. Reading the register so often results in reduced performance of initialization performed by other hardware than the main CPU. Another approach is to use Hypervisor Decrementer Interrupt, but because of hardwired interrupt handler vector offsets this may not be possible in every stage. Thanks to 'wait' instruction and exception definition on POWER it is possible to use Decrementer Exception without actually implementing Decrementer Interrupt handler. This results in a rather concise implementation that doesn't cause performance issues. Signed-off-by: Krystian Hebel Change-Id: I74cc95a503d4f3642609eff3b781f0b1c3168a3d --- src/include/cpu/power/spr.h | 15 +++++++ src/soc/ibm/power9/Kconfig | 5 +++ src/soc/ibm/power9/Makefile.inc | 1 + src/soc/ibm/power9/romstage.c | 2 + src/soc/ibm/power9/timer.c | 72 ++++++++++++++++++++++++++++++++- 5 files changed, 94 insertions(+), 1 deletion(-) diff --git a/src/include/cpu/power/spr.h b/src/include/cpu/power/spr.h index 300147d7dda..ef3c734a517 100644 --- a/src/include/cpu/power/spr.h +++ b/src/include/cpu/power/spr.h @@ -5,6 +5,10 @@ #include // PPC_BIT() +#define SPR_DEC 0x16 +#define SPR_DEC_IMPLEMENTED_BITS 56 +#define SPR_DEC_LONGEST_TIME ((1ull << (SPR_DEC_IMPLEMENTED_BITS - 1)) - 1) + #define SPR_TB 0x10C #define SPR_PVR 0x11F @@ -14,8 +18,14 @@ #define SPR_HSPRG0 0x130 #define SPR_HSPRG1 0x131 +#define SPR_HDEC 0x136 #define SPR_HRMOR 0x139 +#define SPR_LPCR 0x13E +#define SPR_LPCR_LD PPC_BIT(46) +#define SPR_LPCR_HEIC PPC_BIT(59) +#define SPR_LPCR_HDICE PPC_BIT(63) + #define SPR_HMER 0x150 /* Bits in HMER/HMEER */ #define SPR_HMER_MALFUNCTION_ALERT PPC_BIT(0) @@ -66,6 +76,11 @@ static inline uint64_t read_msr(void) return val; } +static inline void write_msr(uint64_t val) +{ + asm volatile("mtmsrd %0" :: "r"(val) : "memory"); +} + static inline uint64_t pvr_revision(void) { return read_spr(SPR_PVR) & SPR_PVR_REV_MASK; diff --git a/src/soc/ibm/power9/Kconfig b/src/soc/ibm/power9/Kconfig index bb12528bd70..6ab6aae4cc0 100644 --- a/src/soc/ibm/power9/Kconfig +++ b/src/soc/ibm/power9/Kconfig @@ -6,6 +6,11 @@ config CPU_IBM_POWER9 if CPU_IBM_POWER9 +# Doing delays with monotonic timers is suboptimal on this platform +config GENERIC_UDELAY + bool + default n + config SKIP_INITIAL_ECC_SCRUB bool "Skip initial ECC scrubbing" default n diff --git a/src/soc/ibm/power9/Makefile.inc b/src/soc/ibm/power9/Makefile.inc index fb5998a1196..10102f6f837 100644 --- a/src/soc/ibm/power9/Makefile.inc +++ b/src/soc/ibm/power9/Makefile.inc @@ -21,6 +21,7 @@ romstage-y += istep_14_2.c romstage-y += istep_14_5.c romstage-y += i2c.c romstage-y += ccs.c +romstage-y += timer.c ramstage-y += chip.c ramstage-y += rom_media.c ramstage-y += timer.c diff --git a/src/soc/ibm/power9/romstage.c b/src/soc/ibm/power9/romstage.c index e1b157ea1f8..0a9b3c084dd 100644 --- a/src/soc/ibm/power9/romstage.c +++ b/src/soc/ibm/power9/romstage.c @@ -330,6 +330,8 @@ static void prepare_dimm_data(void) void main(void) { + init_timer(); + timestamp_add_now(TS_ROMSTAGE_START); console_init(); diff --git a/src/soc/ibm/power9/timer.c b/src/soc/ibm/power9/timer.c index 2e0289c0451..162668c028f 100644 --- a/src/soc/ibm/power9/timer.c +++ b/src/soc/ibm/power9/timer.c @@ -1,8 +1,78 @@ /* SPDX-License-Identifier: GPL-2.0-only */ #include +#include +#include +#include + +/* Time base frequency is 512 MHz so 512 ticks per usec */ +#define TB_TICKS_PER_USEC 512 + +#if CONFIG(COLLECT_TIMESTAMPS) +uint64_t timestamp_get(void) +{ + return read_spr(SPR_TB); +} +#endif + +int timestamp_tick_freq_mhz(void) +{ + return TB_TICKS_PER_USEC; +} void init_timer(void) { - // no need to do anything here + /* + * Set both decrementers to the highest possible value. POWER9 implements + * 56 bits, they decrement with 512MHz frequency. Decrementer exception + * condition exists when the MSB implemented bit gets (HDEC) or is (DEC) + * set, meaning that maximal possible timeout for DEC is one bit less than + * that (this gives roughly 7 * 10^7 s = ~2.2 years for DEC and twice that + * for HDEC). By default DEC uses only 32 bits, this can be changed by + * setting bit 46 (LD) of LPCR (Logical Partitioning Control Register). + * Without it the counter overflows and generates an interrupt after ~4.2 s. + */ + + write_spr(SPR_LPCR, read_spr(SPR_LPCR) | SPR_LPCR_LD); + write_spr(SPR_DEC, SPR_DEC_LONGEST_TIME); + write_spr(SPR_HDEC, SPR_DEC_LONGEST_TIME); +} + +/* TODO: with HDEC we can get ~2ns resolution, may be useful for RAM init. */ +void udelay(unsigned int usec) +{ + uint64_t start = read_spr(SPR_TB); + uint64_t end = start + usec * TB_TICKS_PER_USEC; + + /* + * "When the contents of the DEC0 change from 0 to 1, a Decrementer + * exception will come into existence within a reasonable period of time", + * but this may not be precise enough. Set an interrupt for 1us less than + * requested and busy-loop the rest. + * + * In tests on Talos 2 this gives between 0 and 1/32 us more than requested, + * while interrupt only solution gave between 6/32 and 11/32 us more. + */ + if (usec > 1) { + write_spr(SPR_DEC, (usec - 1) * TB_TICKS_PER_USEC); + asm volatile("or 31,31,31"); // Lower priority + + do { + asm volatile("wait"); + } while(read_spr(SPR_DEC) < SPR_DEC_LONGEST_TIME); + + /* + * "When the contents of DEC0 change from 1 to 0, the existing + * Decrementer exception, if any, will cease to exist within a + * reasonable period of time, but not later than the completion of + * the next context synchronizing instruction or event" - last part + * of sentence doesn't matter, in worst case 'wait' in next udelay() + * will be executed more than once but this is still cheaper than + * synchronizing context explicitly. + */ + write_spr(SPR_DEC, SPR_DEC_LONGEST_TIME); + asm volatile("or 2,2,2"); // Back to normal priority + } + + while (end > read_spr(SPR_TB)); } From 5d19b17f3e93983d9064de94d1b6b8a8e4819224 Mon Sep 17 00:00:00 2001 From: Krystian Hebel Date: Tue, 8 Jun 2021 13:43:46 +0200 Subject: [PATCH 044/213] soc/power9/: fill memory table with real values Signed-off-by: Krystian Hebel Change-Id: I52019bba54cb62134d9409c47363f7604d8e130a --- src/cpu/power9/Makefile.inc | 1 + src/mainboard/raptor-cs/talos-2/mainboard.c | 9 ++--- src/soc/ibm/power9/chip.c | 42 +++++++++++++++++++++ 3 files changed, 46 insertions(+), 6 deletions(-) diff --git a/src/cpu/power9/Makefile.inc b/src/cpu/power9/Makefile.inc index 2fe9e57a96d..3585199ba89 100644 --- a/src/cpu/power9/Makefile.inc +++ b/src/cpu/power9/Makefile.inc @@ -4,3 +4,4 @@ ramstage-y += power9.c bootblock-y += scom.c romstage-y += scom.c +ramstage-y += scom.c diff --git a/src/mainboard/raptor-cs/talos-2/mainboard.c b/src/mainboard/raptor-cs/talos-2/mainboard.c index ad779d9c5dd..dbcd9b8f710 100644 --- a/src/mainboard/raptor-cs/talos-2/mainboard.c +++ b/src/mainboard/raptor-cs/talos-2/mainboard.c @@ -12,13 +12,10 @@ static void mainboard_enable(struct device *dev) /* * Smallest reported to be working (but not officially supported) DIMM is * 4GB. This means that we always have at least as much available. Last - * 128MB of first 4GB are reserved for hostboot/coreboot - this is mostly - * dictated by HRMOR value. - * - * TODO: implement this properly for all RAM + * 256MB of first 4GB are reserved for hostboot/coreboot, which is also + * included in device tree we are currently passing. */ - ram_resource_kb(dev, 0, 0, 4 * 1024 * 1024 - 128 * 1024); - reserved_ram_resource_kb(dev, 1, 4 * 1024 * 1024 - 128 * 1024, 128 * 1024); + reserved_ram_resource_kb(dev, 0, 4 * 1024 * 1024 - 256 * 1024, 256 * 1024); } struct chip_operations mainboard_ops = { diff --git a/src/soc/ibm/power9/chip.c b/src/soc/ibm/power9/chip.c index dd320b441b2..87a4c1f4e68 100644 --- a/src/soc/ibm/power9/chip.c +++ b/src/soc/ibm/power9/chip.c @@ -1,7 +1,49 @@ /* SPDX-License-Identifier: GPL-2.0-only */ #include +#include +#include "istep_13_scom.h" + +#define SIZE_MASK PPC_BITMASK(13,23) +#define SIZE_SHIFT (63 - 23) +#define BASE_MASK PPC_BITMASK(24,47) +#define BASE_SHIFT (63 - 47) + +/* Values in registers are in 4GB units, ram_resource_kb() expects kilobytes. */ +#define CONVERT_4GB_TO_KB(x) ((x) << 22) + +static inline unsigned long base_k(uint64_t reg) +{ + return CONVERT_4GB_TO_KB((reg & BASE_MASK) >> BASE_SHIFT); +} + +static inline unsigned long size_k(uint64_t reg) +{ + return CONVERT_4GB_TO_KB(((reg & SIZE_MASK) >> SIZE_SHIFT) + 1); +} + +static void enable_soc_dev(struct device *dev) +{ + int mcs_i, idx = 0; + + for (mcs_i = 0; mcs_i < MCS_PER_PROC; mcs_i++) { + uint64_t reg; + chiplet_id_t nest = mcs_to_nest[mcs_ids[mcs_i]]; + + /* These registers are undocumented, see istep 14.5. */ + /* MCS_MCFGP */ + reg = read_scom_for_chiplet(nest, 0x0501080A); + if (reg & PPC_BIT(0)) + ram_resource_kb(dev, idx++, base_k(reg), size_k(reg)); + + /* MCS_MCFGPM */ + reg = read_scom_for_chiplet(nest, 0x0501080C); + if (reg & PPC_BIT(0)) + ram_resource_kb(dev, idx++, base_k(reg), size_k(reg)); + } +} struct chip_operations soc_ibm_power9_ops = { CHIP_NAME("POWER9") + .enable_dev = enable_soc_dev, }; From 1d451e99d5929715f047aa3b1c28aadcc989357d Mon Sep 17 00:00:00 2001 From: Igor Bagnucki Date: Thu, 24 Jun 2021 14:58:39 +0200 Subject: [PATCH 045/213] ibm/power9/: implement istep 18.11 Change-Id: I49030208099337176a2b4c961bd5ff7c8409f5c5 Signed-off-by: Igor Bagnucki Signed-off-by: Krystian Hebel --- src/include/cpu/power/istep_18.h | 8 + src/soc/ibm/power9/Makefile.inc | 1 + src/soc/ibm/power9/chip.c | 3 + src/soc/ibm/power9/istep_18_11.c | 303 +++++++++++++++++++++++++++++++ 4 files changed, 315 insertions(+) create mode 100644 src/include/cpu/power/istep_18.h create mode 100644 src/soc/ibm/power9/istep_18_11.c diff --git a/src/include/cpu/power/istep_18.h b/src/include/cpu/power/istep_18.h new file mode 100644 index 00000000000..16d7ef1d103 --- /dev/null +++ b/src/include/cpu/power/istep_18.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef CPU_PPC64_ISTEP18_H +#define CPU_PPC64_ISTEP18_H + +void istep_18_11(void); + +#endif /* CPU_PPC64_ISTEP18_H */ diff --git a/src/soc/ibm/power9/Makefile.inc b/src/soc/ibm/power9/Makefile.inc index 10102f6f837..8a0b04243f5 100644 --- a/src/soc/ibm/power9/Makefile.inc +++ b/src/soc/ibm/power9/Makefile.inc @@ -25,5 +25,6 @@ romstage-y += timer.c ramstage-y += chip.c ramstage-y += rom_media.c ramstage-y += timer.c +ramstage-y += istep_18_11.c endif diff --git a/src/soc/ibm/power9/chip.c b/src/soc/ibm/power9/chip.c index 87a4c1f4e68..fbab4808dfd 100644 --- a/src/soc/ibm/power9/chip.c +++ b/src/soc/ibm/power9/chip.c @@ -2,6 +2,7 @@ #include #include +#include #include "istep_13_scom.h" #define SIZE_MASK PPC_BITMASK(13,23) @@ -41,6 +42,8 @@ static void enable_soc_dev(struct device *dev) if (reg & PPC_BIT(0)) ram_resource_kb(dev, idx++, base_k(reg), size_k(reg)); } + + istep_18_11(); } struct chip_operations soc_ibm_power9_ops = { diff --git a/src/soc/ibm/power9/istep_18_11.c b/src/soc/ibm/power9/istep_18_11.c new file mode 100644 index 00000000000..95947ffe7d4 --- /dev/null +++ b/src/soc/ibm/power9/istep_18_11.c @@ -0,0 +1,303 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include + +#include +#include +#include + +#define MDMT_TOD_GRID_CYCLE_STAGING_DELAY 6 +#define FREQ_X_MHZ 2000 +#define TOD_GRID_PS 400 + +#define PERV_ROOT_CTRL8_TP_PLL_CLKIN_SEL9_DC 21 +#define PERV_TOD_M_PATH_CTRL_REG_STEP_CREATE_DUAL_EDGE_DISABLE 4 + +#define M_PATH_0_OSC_NOT_VALID 0 +#define M_PATH_1_OSC_NOT_VALID 1 +#define M_PATH_0_STEP_ALIGN_DISABLE 2 + +#define M_PATH_STEP_CHECK_CPS_DEVIATION_FACTOR_OFFSET 25 +#define M_PATH_0_STEP_CHECK_CPS_DEVIATION_OFFSET 11 +#define M_PATH_SYNC_CREATE_SPS_SELECT_OFFSET 7 +#define M_PATH_0_STEP_CHECK_VALIDITY_COUNT_OFFSET 15 + +#define BUS_DELAY_63 PPC_BITMASK(52, 63) +#define BUS_DELAY_47 PPC_BITMASK(36, 47) + +// Power Bus Electrical Round Trip Delay Control Register +// Trip Delay Control Register +// [0:5] WO_1P PB_ELINK_RT_DELAY_CTL_SET: +// Setting a bit to 1 (auto reset to 0) causes the matching link +// to attempt to do a round-trip delay calculation. +// Results end up in the PB_ELINK_DLY_*_REG regs. +#define PU_PB_ELINK_RT_DELAY_CTL_REG 0x05013419 +// Processor bus Electrical Link Delay 0123 Register +// [36:47] ROX Reserved. +// [52:63] ROX Reserved. +// Note: Documentations describes these bits as reserved however they are used +// to get bus_delay value. +#define PU_PB_ELINK_DLY_0123_REG 0x0501340E +// Root Control 8 Register +// [21] RW ROOT_CTRL8_21_SPARE_PLL_CONTROL: +#define PERV_ROOT_CTRL8_SCOM 0x00050018 +// Master/slave select: master path select: slave path select: step check setup +// [1] RWX PRI_M_S_TOD_SELECT: Primary configuration: master-slave TOD select. +// 0 = TOD is slave. +// 1 = TOD is master. +// [2] RWX PRI_M_S_DRAWER_SELECT: Primary configuration: master-slave drawer select. +// 0 = Drawer is slave. +// 1 = Drawer is master It is just used for TOD internal power gating. +// [9] RWX SEC_M_S_TOD_SELECT: Secondary configuration: master-slave TOD select. +// 0 = TOD is slave. +// 1 = TOD is master. +// [10] RWX SEC_M_S_DRAWER_SELECT: Secondary configuration: master-slave drawer select. +// 0 = Drawer is slave. +// 1 = Drawer is master. It is used for TOD internal power gating. +#define PERV_TOD_PSS_MSS_CTRL_REG 0x00040007 +// Control register 1 for the secondary configuration distribution port. +#define PERV_TOD_SEC_PORT_1_CTRL_REG 0x00040004 +// Control register 1 for the primary configuration distribution port. +#define PERV_TOD_PRI_PORT_1_CTRL_REG 0x00040002 +// TOD: Setup for Master Paths Control Register +// Used for oscillator validity, step alignment, sync pulse frequency, and step check. +// [0] RW M_PATH_0_OSC_NOT_VALID: Indicates whether the oscillator attached to master path-0 +// is not valid. +// 0 = Valid oscillator is attached to master path-0. +// 1 = No valid oscillator is attached to master path-0. +// [1] RW M_PATH_1_OSC_NOT_VALID: Indicates whether the oscillator attached to master path-1 +// is not valid. +// 0 = Valid oscillator is attached to master path-1. +// 1 = No valid oscillator is attached to master path-1. +// [2] RW M_PATH_0_STEP_ALIGN_DISABLE: Master path-0. Indicates alignment of master path-0 +// step to master path-1 step is active +// 0 = Alignment of master path-0 step to master path-1 step is active. +// 1 = Alignment of master path-0 step to master path-1 Step is not active. +// [5:7] RW M_PATH_SYNC_CREATE_SPS_SELECT: Master path: sync create: steps per sync (SPS) +// select: number of STEP pulses per SYNC pulse. +// [8:11] RW M_PATH_0_STEP_CHECK_CPS_DEVIATION: Master path-0: step check: CPS deviation. +// [13:15] RW M_PATH_0_STEP_CHECK_VALIDITY_COUNT: Master path-0 step check. Specifies the number +// of received steps before the step is declared as valid. +// [24:25] RW M_PATH_STEP_CHECK_CPS_DEVIATION_FACTOR: Master path-01: step check: CPS deviation +// factor. +#define PERV_TOD_M_PATH_CTRL_REG 0x00040000 +// TOD: Internal Path Control Register +// [8:11] RW I_PATH_STEP_CHECK_CPS_DEVIATION: Internal path: step check: CPS deviation. +// [13:15] RW I_PATH_STEP_CHECK_VALIDITY_COUNT: Internal path: step check: validity count. +// Defines the number of received steps before the step is declared as valid. +// [32:39] RWX I_PATH_CPS: Internal path: CPS +// In write mode, the value is used to load the CPS for the constant CPS for the step +// checker. In read mode the value shows the actual CPS in the internal path. +#define PERV_TOD_I_PATH_CTRL_REG 0x00040006 +// TOD: Secondary Configuration Distribution Port Control Register 0 +// [32:39] RW SEC_I_PATH_DELAY_VALUE: Internal path: secondary configuration: delay value. +#define PERV_TOD_SEC_PORT_0_CTRL_REG 0x00040003 +// TOD: Primary Configuration Distribution Port Control Register 0 +// Same bit purpouse as in PERV_TOD_SEC_PORT_0_CTRL_REG +#define PERV_TOD_PRI_PORT_0_CTRL_REG 0x00040001 +// TOD: Chip Control Register +// [10:15] LOW_ORDER_STEP_VALUE: Low-order step value needed +// for USE_TB_STEP_SYNC as the programmable +// cycle counter for creating a step. +#define PERV_TOD_CHIP_CTRL_REG 0x00040010 +// TOD: Slave Path Control Register +// [26:27] RW S_PATH_REMOTE_SYNC_CHECK_CPS_DEVIATION_FACTOR: +// Slave path-01: remote sync: sync-step check: CPS deviation factor. +// [28:31] RW S_PATH_REMOTE_SYNC_CHECK_CPS_DEVIATION: +// Slave path-01: remote sync: sync-step check: CPS deviation. +// [32:39] RW S_PATH_REMOTE_SYNC_MISS_COUNT_MAX: +// Slave path-01: remote sync: maximum of SYNC miss counts: 0 - 255 syncs. +#define PERV_TOD_S_PATH_CTRL_REG 0x00040005 + +/* TODO: this one will change if we add more than one processor */ +#define MDMT (1) + +/* + * 2 CPU topology + * + * Primary (CHIP0 is MDMT): + * + * CHIP0 --TX--> XBUS1 --> XBUS1 --RX--> CHIP1 + * + * Secondary (CHIP1 is MDMT): + * + * CHIP0 <--RX-- XBUS1 <-- XBUS1 <--TX-- CHIP1 + */ + +static uint32_t calculate_topology_delay(void) +{ + /* + * In simple topology with one proc it is enough to assign 0. + * With multiple processors this will get more complicated, + * see calculate_node_link_delay() in Hostboot + */ + + if(MDMT) + return MDMT_TOD_GRID_CYCLE_STAGING_DELAY; + + /* TODO: check again if this really is write, not RMW */ + write_scom(PU_PB_ELINK_RT_DELAY_CTL_REG, PPC_BITMASK(2, 3)); + uint64_t l_bus_mode_reg = read_scom(PU_PB_ELINK_DLY_0123_REG); + + uint32_t bus_delay = ((l_bus_mode_reg & BUS_DELAY_47) >> 16) + + (l_bus_mode_reg & BUS_DELAY_63); + + /* + * FIXME: floating point wasn't fully configured, see if we can skip it. + * Testing requires bigger topology, i.e. more CPUs. + */ + return (uint32_t)(1 + ((double)(bus_delay * 8 * 1000000) + / (double)(4 * FREQ_X_MHZ * TOD_GRID_PS))); +} + +static void calculate_m_path(void) +{ + uint64_t dual_edge_disable = + (read_scom(PERV_ROOT_CTRL8_SCOM) & PPC_BIT(PERV_ROOT_CTRL8_TP_PLL_CLKIN_SEL9_DC)) + ? PPC_BIT(PERV_TOD_M_PATH_CTRL_REG_STEP_CREATE_DUAL_EDGE_DISABLE) + : 0; + + if(MDMT) { + scom_and_or(PERV_TOD_M_PATH_CTRL_REG, + ~(PPC_BIT(M_PATH_0_OSC_NOT_VALID) | + PPC_BIT(M_PATH_0_STEP_ALIGN_DISABLE) | + PPC_BIT(PERV_TOD_M_PATH_CTRL_REG_STEP_CREATE_DUAL_EDGE_DISABLE) | + PPC_SHIFT(0x7, M_PATH_0_STEP_CHECK_VALIDITY_COUNT_OFFSET) | + PPC_SHIFT(0x7, M_PATH_SYNC_CREATE_SPS_SELECT_OFFSET) | + PPC_SHIFT(0xF, M_PATH_0_STEP_CHECK_CPS_DEVIATION_OFFSET) | + PPC_SHIFT(0x3, M_PATH_STEP_CHECK_CPS_DEVIATION_FACTOR_OFFSET)), + PPC_BIT(M_PATH_1_OSC_NOT_VALID) | + PPC_SHIFT(0x8, M_PATH_0_STEP_CHECK_CPS_DEVIATION_OFFSET) | + PPC_SHIFT(0x3, M_PATH_0_STEP_CHECK_VALIDITY_COUNT_OFFSET) | + dual_edge_disable); + } else { + scom_and_or(PERV_TOD_M_PATH_CTRL_REG, + ~PPC_BIT(PERV_TOD_M_PATH_CTRL_REG_STEP_CREATE_DUAL_EDGE_DISABLE), + dual_edge_disable); + } +} + +void istep_18_11(void) +{ + printk(BIOS_EMERG, "starting istep 18.11\n"); + report_istep(18, 11); + + /* Clear previous primary topology */ + write_scom(PERV_TOD_PRI_PORT_0_CTRL_REG, 0); + write_scom(PERV_TOD_SEC_PORT_0_CTRL_REG, 0); + + /* Workaround for HW480181: Init remote sync checker tolerance to maximum + * [26-27] REMOTE_SYNC_CHECK_CPS_DEVIATION_FACTOR = 0x3 (factor 8) + * [28-31] REMOTE_SYNC_CHECK_CPS_DEVIATION = 0xF (93.75%) + */ + scom_or(PERV_TOD_S_PATH_CTRL_REG, PPC_SHIFT(0x3, 27) | PPC_SHIFT(0xF, 31)); + + /* + * Set PSS_MSS_CTRL_REG for primary configuration, assumptions: + * - MDMT = 1 + * - valid oscillator is attached to master path-0, but not path-1 + * [0] PRI_M_PATH_SELECT = 0 (path-0 selected) + * [1] PRI_M_S_TOD_SELECT = 1 (TOD is master) + * [2] PRI_M_S_DRAWER_SELECT = 1 (drawer is master) + */ + scom_and_or(PERV_TOD_PSS_MSS_CTRL_REG, ~PPC_BIT(0), + PPC_BIT(1) | PPC_BIT(2)); + + /* Configure PORT_CTRL_REGs (primary) */ + /* + * TODO: this touches XBUS/OBUS, but Hostboot doesn't modify registers so + * skip it for now and fix if needed. + */ + //scom_and_or(PERV_TOD_PRI_PORT_0_CTRL_REG, ???, ???); + //scom_and_or(PERV_TOD_SEC_PORT_0_CTRL_REG, ???, ???); + + /* Configure M_PATH_CTRL_REG */ + /* + * TODO: check this again. Value is correct, not sure whether fields are + * correctly cleared. Also comment the values written. + */ + calculate_m_path(); + + /* Configure I_PATH_CTRL_REG (primary) */ + /* PERV_TOD_PRI_PORT_0_CTRL_REG: + * [32-39] PRI_I_PATH_DELAY_VALUE = calculate + * PERV_TOD_I_PATH_CTRL_REG: + * [0] I_PATH_DELAY_DISABLE = 0 + * [1] I_PATH_DELAY_ADJUST_DISABLE = 0 + * [6-7] I_PATH_STEP_CHECK_CPS_DEVIATION_FACTOR = 0 (factor = 1) + * [8-11] I_PATH_STEP_CHECK_CPS_DEVIATION = 0xF (93.75%) + * [13-15] I_PATH_STEP_CHECK_VALIDITY_COUNT = 0x3 (count = 8) + */ + scom_and_or(PERV_TOD_PRI_PORT_0_CTRL_REG, ~PPC_BITMASK(32, 39), + PPC_SHIFT(calculate_topology_delay(), 39)); + scom_and_or(PERV_TOD_I_PATH_CTRL_REG, + ~(PPC_BIT(0) | PPC_BIT(1) | PPC_BITMASK(6, 11) | PPC_BITMASK(13, 15)), + PPC_SHIFT(0xF, 11) | PPC_SHIFT(0x3, 15)); + + /* Configure INIT_CHIP_CTRL_REG (primary) */ + /* [1-3] I_PATH_CORE_SYNC_PERIOD_SELECT = 0 (core sync period is 8us) + * [4] I_PATH_SYNC_CHECK_DISABLE = 0 (enable internal path sync check) + * [7] MOVE_TOD_TO_TB_ON_2X_SYNC_ENABLE = 0 (1x sync boundaries) + * [8] USE_TB_SYNC_MECHANISM = 0 + * [9] USE_TB_STEP_SYNC = 0 (use TB step sync from internal path) + * [10-15] LOW_ORDER_STEP_VALUE = 0x3F (4-bit WOF counter is incremented with each 200MHz clock cycle) + * [30] XSTOP_GATE = 0 (stop TOD on checkstop) + */ + scom_and_or(PERV_TOD_CHIP_CTRL_REG, + ~(PPC_BITMASK(1, 4) | PPC_BITMASK(7, 15) | PPC_BIT(30)), + PPC_SHIFT(0x3F, 15)); + + /* TODO: test if we can skip repeated writes (M_PATH, I_PATH, CHIP) */ + + /* Clear previous secondary topology */ + /* NOTE: order is swapped wrt primary, does it matter? */ + write_scom(PERV_TOD_SEC_PORT_1_CTRL_REG, 0); + write_scom(PERV_TOD_PRI_PORT_1_CTRL_REG, 0); + + /* + * Set PSS_MSS_CTRL_REG for secondary configuration, assumptions as before + * [8] SEC_M_PATH_SELECT = 0 (path-0 selected) + * [9] SEC_M_S_TOD_SELECT = 1 (TOD is master) + * [10] SEC_M_S_DRAWER_SELECT = 1 (drawer is master) + */ + scom_and_or(PERV_TOD_PSS_MSS_CTRL_REG, ~PPC_BIT(8), + PPC_BIT(9) | PPC_BIT(10)); + + /* Configure PORT_CTRL_REGs (secondary) */ + //scom_and_or(PERV_TOD_SEC_PORT_1_CTRL_REG, ???, ???); + //scom_and_or(PERV_TOD_PRI_PORT_1_CTRL_REG, ???, ???); + + /* Configure M_PATH_CTRL_REG */ + calculate_m_path(); + + /* Configure I_PATH_CTRL_REG (secondary) */ + /* PERV_TOD_SEC_PORT_0_CTRL_REG: + * [32-39] SEC_I_PATH_DELAY_VALUE = calculate + * PERV_TOD_I_PATH_CTRL_REG: + * [0] I_PATH_DELAY_DISABLE = 0 + * [1] I_PATH_DELAY_ADJUST_DISABLE = 0 + * [6-7] I_PATH_STEP_CHECK_CPS_DEVIATION_FACTOR = 0 (factor = 1) + * [8-11] I_PATH_STEP_CHECK_CPS_DEVIATION = 0xF (93.75%) + * [13-15] I_PATH_STEP_CHECK_VALIDITY_COUNT = 0x3 (count = 8) + */ + scom_and_or(PERV_TOD_SEC_PORT_0_CTRL_REG, ~PPC_BITMASK(32, 39), + PPC_SHIFT(calculate_topology_delay(), 39)); + scom_and_or(PERV_TOD_I_PATH_CTRL_REG, + ~(PPC_BIT(0) | PPC_BIT(1) | PPC_BITMASK(6, 11) | PPC_BITMASK(13, 15)), + PPC_SHIFT(0xF, 11) | PPC_SHIFT(0x3, 15)); + + /* Configure INIT_CHIP_CTRL_REG (secondary) */ + /* [1-3] I_PATH_CORE_SYNC_PERIOD_SELECT = 0 (core sync period is 8us) + * [4] I_PATH_SYNC_CHECK_DISABLE = 0 (enable internal path sync check) + * [7] MOVE_TOD_TO_TB_ON_2X_SYNC_ENABLE = 0 (1x sync boundaries) + * [8] USE_TB_SYNC_MECHANISM = 0 + * [9] USE_TB_STEP_SYNC = 0 (use TB step sync from internal path) + * [10-15] LOW_ORDER_STEP_VALUE = 0x3F (4-bit WOF counter is incremented with each 200MHz clock cycle) + * [30] XSTOP_GATE = 0 (stop TOD on checkstop) + */ + scom_and_or(PERV_TOD_CHIP_CTRL_REG, + ~(PPC_BITMASK(1, 4) | PPC_BITMASK(7, 15) | PPC_BIT(30)), + PPC_SHIFT(0x3F, 15)); + + printk(BIOS_EMERG, "ending istep 18.11\n"); +} From 91b81e981826f6d10ca0db9daef6fa562cc649be Mon Sep 17 00:00:00 2001 From: Igor Bagnucki Date: Thu, 24 Jun 2021 14:58:39 +0200 Subject: [PATCH 046/213] soc/power9/: implement istep 18.12 Change-Id: Ied85cde0e1525415022e39569cd3912d7be29146 Signed-off-by: Igor Bagnucki Signed-off-by: Krystian Hebel --- src/include/cpu/power/istep_18.h | 1 + src/soc/ibm/power9/Makefile.inc | 1 + src/soc/ibm/power9/chip.c | 1 + src/soc/ibm/power9/istep_18_12.c | 141 +++++++++++++++++++++++++++++++ 4 files changed, 144 insertions(+) create mode 100644 src/soc/ibm/power9/istep_18_12.c diff --git a/src/include/cpu/power/istep_18.h b/src/include/cpu/power/istep_18.h index 16d7ef1d103..3e9c154f75b 100644 --- a/src/include/cpu/power/istep_18.h +++ b/src/include/cpu/power/istep_18.h @@ -4,5 +4,6 @@ #define CPU_PPC64_ISTEP18_H void istep_18_11(void); +void istep_18_12(void); #endif /* CPU_PPC64_ISTEP18_H */ diff --git a/src/soc/ibm/power9/Makefile.inc b/src/soc/ibm/power9/Makefile.inc index 8a0b04243f5..0c60078a738 100644 --- a/src/soc/ibm/power9/Makefile.inc +++ b/src/soc/ibm/power9/Makefile.inc @@ -26,5 +26,6 @@ ramstage-y += chip.c ramstage-y += rom_media.c ramstage-y += timer.c ramstage-y += istep_18_11.c +ramstage-y += istep_18_12.c endif diff --git a/src/soc/ibm/power9/chip.c b/src/soc/ibm/power9/chip.c index fbab4808dfd..3b01ed2e71c 100644 --- a/src/soc/ibm/power9/chip.c +++ b/src/soc/ibm/power9/chip.c @@ -44,6 +44,7 @@ static void enable_soc_dev(struct device *dev) } istep_18_11(); + istep_18_12(); } struct chip_operations soc_ibm_power9_ops = { diff --git a/src/soc/ibm/power9/istep_18_12.c b/src/soc/ibm/power9/istep_18_12.c new file mode 100644 index 00000000000..dd638f2066b --- /dev/null +++ b/src/soc/ibm/power9/istep_18_12.c @@ -0,0 +1,141 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include + +#include +#include +#include + +#define PERV_TOD_ERROR_MASK_REG_RX_TTYPE_0 38 + +#define PERV_TOD_ERROR_REG_RX_TTYPE_0 38 + +#define PERV_TOD_FSM_REG_IS_RUNNING 4 +#define PERV_TOD_LOAD_TOD_MOD_REG_FSM_TRIGGER 0 +#define PERV_TOD_START_TOD_REG_FSM_TRIGGER 0 +#define PERV_TOD_TX_TTYPE_REG_TRIGGER 0 + +// TOD: TX TTYPE +// TX TType triggering register. +// [0] TX_TTYPE_2_TRIGGER: TX TTYPE trigger. +#define PERV_TOD_TX_TTYPE_2_REG 0x00040013 +#define PERV_TOD_TX_TTYPE_4_REG 0x00040015 +#define PERV_TOD_TX_TTYPE_5_REG 0x00040016 + +// TOD: Load +// TOD-mod triggering register. This register sets the FSM in NOT_SET state. +// [0] FSM_LOAD_TOD_MOD_TRIGGER: FSM: LOAD_TOD_MOD trigger. +#define PERV_TOD_LOAD_TOD_MOD_REG 0x00040018 +// TOD: Load Register TOD Incrementer: 60 +// Bit TOD and 4-bit WOF on read: Returns all 0s when the TOD is not running. +// On write: go to wait for sync state when data bit 6) = '0' (load TOD). +// Otherwise, go to stopped state (load TOD data63). +// [0-59] LOAD_TOD_VALUE: Internal path: load TOD value. +// [60-63] WOF: who's-on-first (WOF) incrementer. +#define PERV_TOD_LOAD_TOD_REG 0x00040021 +// TOD: Start TOD Triggering Register +// Goes to running state when data bit [02] = '0'. +// Otherwise, go to wait for sync state. +// [0] FSM_START_TOD_TRIGGER: FSM: Start TOD trigger. +#define PERV_TOD_START_TOD_REG 0x00040022 +// TOD: FSM Register +// [0:3] RWX I_PATH_FSM_STATE: Internal path. +// TOD FSM state (TOD is running in the following states: +// x'02', x'0A', x'0E'). 0000 = Error. +// [4] ROX TOD_IS_RUNNING: TOD running indicator. +#define PERV_TOD_FSM_REG 0x00040024 +// TOD: Error and Interrupt Register +// [38] RWX RX_TTYPE_0: Status: received TType-0. +// [39] RWX RX_TTYPE_1: Status: received TType-1. +// [40] RWX RX_TTYPE_2: Status: received TType-2. +// [41] RWX RX_TTYPE_3: Status: received TType-3. +// [42] RWX RX_TTYPE_4: Status: received TType-4. +// [43] RWX RX_TTYPE_5: Status: received TType-5 when FSM is in running state. +#define PERV_TOD_ERROR_REG 0x00040030 +// TOD: Error Mask Register Mask: Error Reporting Component (C_ERR_RPT) +// TOD: Error mask register mask of the error reporting component (c_err_rpt) +// This register holds masks for the same bits +// as in the previous (PERV_TOD_ERROR_REG) register +#define PERV_TOD_ERROR_MASK_REG 0x00040032 + +/* TODO: this one will change if we add more than one processor */ +#define MDMT (1) + +/* See istep 18.11 for 2 CPU topology diagram */ + +/* TODO: this will be much more complicated for different topology */ +static void init_tod_node(void) +{ + uint64_t error_reg; + + /* Clear the TOD error register by writing all bits to 1 */ + /* + * Probably documentation issue, all bits in this register are described as + * RW, but code treats them as if they were write-1-to-clear. + */ + write_scom(PERV_TOD_ERROR_REG, ~0); + + /* Assumption: node is MDMT */ + if (MDMT) { + /* Chip TOD step checkers enable */ + write_scom(PERV_TOD_TX_TTYPE_2_REG, + PPC_BIT(PERV_TOD_TX_TTYPE_REG_TRIGGER)); + + /* Switch local Chip TOD to 'Not Set' state */ + write_scom(PERV_TOD_LOAD_TOD_MOD_REG, + PPC_BIT(PERV_TOD_LOAD_TOD_MOD_REG_FSM_TRIGGER)); + + /* Switch all Chip TOD in the system to 'Not Set' state */ + write_scom(PERV_TOD_TX_TTYPE_5_REG, + PPC_BIT(PERV_TOD_TX_TTYPE_REG_TRIGGER)); + + /* Chip TOD load value (move TB to TOD) */ + write_scom(PERV_TOD_LOAD_TOD_REG, + PPC_SHIFT(0x3FF, 59) | PPC_SHIFT(0xC, 63)); + + /* Chip TOD start_tod (switch local Chip TOD to 'Running' state) */ + write_scom(PERV_TOD_START_TOD_REG, + PPC_BIT(PERV_TOD_START_TOD_REG_FSM_TRIGGER)); + + /* Send local Chip TOD value to all Chip TODs */ + write_scom(PERV_TOD_TX_TTYPE_4_REG, + PPC_BIT(PERV_TOD_TX_TTYPE_REG_TRIGGER)); + } + + /* Wait until TOD is running */ + if (!wait_us(1000, + read_scom(PERV_TOD_FSM_REG) & PPC_BIT(PERV_TOD_FSM_REG_IS_RUNNING))) { + printk(BIOS_ERR, "PERV_TOD_ERROR_REG = %#16.16llx\n", + read_scom(PERV_TOD_ERROR_REG)); + die("Error: TOD is not running!\n"); + } + + /* Clear TTYPE#2, TTYPE#4, and TTYPE#5 status */ + write_scom(PERV_TOD_ERROR_REG, + PPC_BIT(PERV_TOD_ERROR_REG_RX_TTYPE_0 + 2) | + PPC_BIT(PERV_TOD_ERROR_REG_RX_TTYPE_0 + 4) | + PPC_BIT(PERV_TOD_ERROR_REG_RX_TTYPE_0 + 5)); + + /* Check for real errors */ + error_reg = read_scom(PERV_TOD_ERROR_REG); + if (error_reg != 0) { + printk(BIOS_ERR, "PERV_TOD_ERROR_REG = %#16.16llx\n", + read_scom(PERV_TOD_ERROR_REG)); + die("Error: TOD initialization failed!\n"); + } + + /* Set error mask to runtime configuration (mask TTYPE informational bits) */ + write_scom(PERV_TOD_ERROR_MASK_REG, + PPC_BITMASK(PERV_TOD_ERROR_MASK_REG_RX_TTYPE_0, + PERV_TOD_ERROR_MASK_REG_RX_TTYPE_0 + 5)); + + /* In case of multinode system, configure child nodes recursively here */ +} + +void istep_18_12(void) +{ + printk(BIOS_EMERG, "starting istep 18.12\n"); + report_istep(18, 12); + init_tod_node(); + printk(BIOS_EMERG, "ending istep 18.12\n"); +} From 29c1c19ca0c5291b7b86a54951406c51f04fd288 Mon Sep 17 00:00:00 2001 From: Krystian Hebel Date: Wed, 8 Sep 2021 17:04:10 +0200 Subject: [PATCH 047/213] soc/power9/mcbist.c: move MCBIST from 14.1 to separate file This is done in preparation for SCOM debug config option. Signed-off-by: Krystian Hebel Change-Id: Iee8fa04b5d6046d447dd30ea1b6bd47dd0d63bfa --- src/soc/ibm/power9/Makefile.inc | 1 + src/soc/ibm/power9/istep_14_1.c | 221 +------------------------------ src/soc/ibm/power9/mcbist.c | 224 ++++++++++++++++++++++++++++++++ src/soc/ibm/power9/mcbist.h | 12 ++ 4 files changed, 238 insertions(+), 220 deletions(-) create mode 100644 src/soc/ibm/power9/mcbist.c create mode 100644 src/soc/ibm/power9/mcbist.h diff --git a/src/soc/ibm/power9/Makefile.inc b/src/soc/ibm/power9/Makefile.inc index 0c60078a738..d27ea7c43a8 100644 --- a/src/soc/ibm/power9/Makefile.inc +++ b/src/soc/ibm/power9/Makefile.inc @@ -21,6 +21,7 @@ romstage-y += istep_14_2.c romstage-y += istep_14_5.c romstage-y += i2c.c romstage-y += ccs.c +romstage-y += mcbist.c romstage-y += timer.c ramstage-y += chip.c ramstage-y += rom_media.c diff --git a/src/soc/ibm/power9/istep_14_1.c b/src/soc/ibm/power9/istep_14_1.c index 94735731900..f85ecec2801 100644 --- a/src/soc/ibm/power9/istep_14_1.c +++ b/src/soc/ibm/power9/istep_14_1.c @@ -7,6 +7,7 @@ #include #include "istep_13_scom.h" +#include "mcbist.h" static void fir_unmask(int mcs_i) { @@ -398,226 +399,6 @@ static void init_mcbist(int mcs_i) PPC_BIT(MBSTRQ_CFG_NCE_HARD_SYMBOL_COUNT_ENABLE)); } - -/***************************MCBIST***********************************/ -#define MCBIST_TESTS_PER_REG 4 -/* 32 total, but last register is under non-consecutive SCOM address */ -#define MAX_MCBIST_TESTS 28 -#define MAX_MCBIST_TEST_REGS (MAX_MCBIST_TESTS / MCBIST_TESTS_PER_REG) - -/* - * TODO: if we were to run both MCBISTs in parallel, we would need separate - * instances of those... - */ -static uint64_t mcbist_memreg_cache; -static unsigned tests; - -#define ECC_MODE 0x0008 -#define DONE 0x0004 - -enum data_mode -{ - // MCBIST test data modes - FIXED_DATA_MODE = 0x0000, - RAND_FWD_MODE = 0x0010, - RAND_REV_MODE = 0x0020, - RAND_FWD_MAINT = 0x0030, - RAND_REV_MAINT = 0x0040, - DATA_EQ_ADDR = 0x0050, - ROTATE_LEFT_MODE = 0x0060, - ROTATE_RIGHT_MODE = 0x0070, -}; - -enum op_type -{ - WRITE = 0x0000, // fast, with no concurrent traffic - READ = 0x1000, // fast, with no concurrent traffic - READ_WRITE = 0x2000, - WRITE_READ = 0x3000, - READ_WRITE_READ = 0x4000, - READ_WRITE_WRITE = 0x5000, - RAND_SEQ = 0x6000, - READ_READ_WRITE = 0x8000, - SCRUB_RRWR = 0x9000, - STEER_RW = 0xA000, - ALTER = 0xB000, // (W) - DISPLAY = 0xC000, // (R, slow) - CCS_EXECUTE = 0xF000, - - // if bits 9:11 (Data Mode bits) = 000 (bits 4:8 used to specify which subtest to go to) - // Refresh only cmd if bits 9:11 (Data Mode bits) /= 000 - GOTO_SUBTEST_N = 0x7000, -}; - -static void commit_mcbist_memreg_cache(int mcs_i) -{ - chiplet_id_t id = mcs_ids[mcs_i]; - int reg = (tests - 1) / MCBIST_TESTS_PER_REG; - - if (reg < 0) - die("commit_mcbist_memreg_cache() called without adding tests first!\n"); - - if (reg >= MAX_MCBIST_TEST_REGS) - die("Too many MCBIST instructions added\n"); - - /* MC01.MCBIST.MBA_SCOMFIR.MCBMRQ */ - write_scom_for_chiplet(id, MCBMR0Q + reg, mcbist_memreg_cache); - mcbist_memreg_cache = 0; -} - -static void add_mcbist_test(int mcs_i, uint16_t test) -{ - int test_i = tests % MCBIST_TESTS_PER_REG; - if (test_i == 0 && tests != 0) - commit_mcbist_memreg_cache(mcs_i); - - /* This assumes cache is properly cleared. */ - mcbist_memreg_cache |= PPC_SHIFT(test, test_i*16 + 15); - tests++; -} - -/* - * ECC Scrubbing - theory - * - * RAM cannot hold the data indefinitely. It uses capacitors to hold the bits, - * which are constantly being drawn by leaks. To counteract this, memory has to - * be periodically refreshed, which recharges the capacitors. However, sometimes - * this happens too late, when state of capacitor has already changed (either - * electric charge was depleted, or capacitor gained additional potential from - * outside - rowhammer, radiation) up to the point where it passes the threshold - * and 0 becomes 1 or vice versa. Refresh command in that case would only make - * "borderline 1" into "strong 1", so it won't be able to fix the problem. This - * is where ECC comes in. - * - * ECC is limited in number of changed bits it can fix and detect. Because of - * that it is important that ECC is checked and possible errors are corrected - * before too many bits have flipped, and corrected values are written back to - * RAM. This is done by hardware, without software's interaction, but it can be - * informed that ECC error has happened (machine check exception). - * - * ECC is checked every time data in RAM is accessed. To check every part of RAM - * even when CPU doesn't need to read it, memory controller does the accesses in - * the background. This is called ECC scrubbing. - * - * Note that it is enough for MC to just send read commands. When everything is - * correct, data is still written back to DRAM because reading operation is - * destructive - capacitors are discharged when read and have to be charged - * again. This happens internally in DRAM, there is no need to send that data - * through the memory bus when DRAM already has it. If there was an error, MC - * automatically sends corrected data to be written. - * - * ECC scrubbing happens between RAM and MC. CPU doesn't participate in this - * process, but it may be slowed down on memory intensive operations because - * some of the bandwidth is used for scrubbing. - * - * TL;DR: ECC scrub is read operation with discarded results. - */ -static void add_scrub(int mcs_i, int port_dimm) -{ - uint16_t test = READ | ECC_MODE | (port_dimm << 9); - add_mcbist_test(mcs_i, test); -} - -static void add_fixed_pattern_write(int mcs_i, int port_dimm) -{ - /* Use ALTER instead of WRITE to use maintenance pattern. ALTER is slow. */ - uint16_t test = WRITE | FIXED_DATA_MODE | ECC_MODE | (port_dimm << 9); - add_mcbist_test(mcs_i, test); -} - -/* -static void add_random_pattern_write(int port_dimm) -{ - uint16_t test = WRITE | RAND_FWD_MAINT | ECC_MODE | (port_dimm << 9); - add_mcbist_test(test); -} -*/ - -/* TODO: calculate initial delays and timeouts */ -static void mcbist_execute(int mcs_i) -{ - chiplet_id_t id = mcs_ids[mcs_i]; - /* This is index of last instruction, not the new one. */ - int test_i = (tests - 1) % MCBIST_TESTS_PER_REG; - uint64_t val; - - /* - * Nothing to do. Note that status register won't report "done", or will - * report state of previous program instead. According to docs this bits - * are writable, do we want to set them to simplify things? - * - * Another possibility would be to start MCBIST with single no-op test (goto - * with DONE bit set), but this may unnecessarily make things slower. - */ - if (tests == 0) - return; - - /* Check if in progress */ - /* TODO: we could force it to stop, but dying will help with debugging */ - if ((val = read_scom_for_chiplet(id, MCB_CNTLSTATQ)) & PPC_BIT(MCB_CNTLSTATQ_MCB_IP)) - die("MCBIST in progress already (%#16.16llx), this shouldn't happen\n", val); - - /* - * Contrary to CCS, we don't add no-op instruction here. DONE bit has to be - * set for instruction that is already present. Perhaps DONE is poor name, - * is tells that MCBIST should stop after this test, but this is how it is - * named in the documentation. - */ - mcbist_memreg_cache |= PPC_BIT(13 + test_i*16); - commit_mcbist_memreg_cache(mcs_i); - - /* MC01.MCBIST.MBA_SCOMFIR.MCB_CNTLQ - * [0] MCB_CNTLQ_MCB_START - */ - scom_and_or_for_chiplet(id, MCB_CNTLQ, ~0, PPC_BIT(MCB_CNTLQ_MCB_START)); - - /* Wait for MCBIST to start. Test for IP and DONE, it may finish early. */ - if (((val = read_scom_for_chiplet(id, MCB_CNTLSTATQ)) & - (PPC_BIT(MCB_CNTLSTATQ_MCB_IP) | PPC_BIT(MCB_CNTLSTATQ_MCB_DONE))) == 0) { - /* - * TODO: how long do we want to wait? Hostboot uses 10*100us polling, - * but so far it seems to always be already started on the first read. - */ - udelay(1); - if (((val = read_scom_for_chiplet(id, MCB_CNTLSTATQ)) & - (PPC_BIT(MCB_CNTLSTATQ_MCB_IP) | PPC_BIT(MCB_CNTLSTATQ_MCB_DONE))) == 0) - die("MCBIST failed (%#16.16llx) to start twice\n", val); - - /* Check if this is needed. Do not move before test, it impacts delay! */ - printk(BIOS_INFO, "MCBIST started after delay\n"); - } - - tests = 0; -} - -/* - * FIXME: 0x07012300[10] MCBIST_PROGRAM_COMPLETE should be checked instead. It - * gets set when MCBIST is paused, while 0x070123DC[0] IP stays on in that case. - * This may become a problem for 3DS DIMMs. - */ -static int mcbist_is_done(int mcs_i) -{ - chiplet_id_t id = mcs_ids[mcs_i]; - uint64_t val = val = read_scom_for_chiplet(id, MCB_CNTLSTATQ); - - /* Still in progress */ - if (val & PPC_BIT(MCB_CNTLSTATQ_MCB_IP)) - return 0; - - /* Not sure if DONE and FAIL can be set at the same time, check FAIL first */ - if ((val & PPC_BIT(MCB_CNTLSTATQ_MCB_FAIL)) || val == 0) - die("MCBIST error (%#16.16llx)\n"); - - /* Finished */ - if (val & PPC_BIT(MCB_CNTLSTATQ_MCB_DONE)) - return 1; - - /* Is it even possible to get here? */ - return 0; -} - -/***************************MCBIST end*******************************/ - /* * 14.1 mss_memdiag: Mainstore Pattern Testing * diff --git a/src/soc/ibm/power9/mcbist.c b/src/soc/ibm/power9/mcbist.c new file mode 100644 index 00000000000..36e1e9b0f22 --- /dev/null +++ b/src/soc/ibm/power9/mcbist.c @@ -0,0 +1,224 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include + +#include "istep_13_scom.h" +#include "mcbist.h" + +#define MCBIST_TESTS_PER_REG 4 +/* 32 total, but last register is under non-consecutive SCOM address */ +#define MAX_MCBIST_TESTS 28 +#define MAX_MCBIST_TEST_REGS (MAX_MCBIST_TESTS / MCBIST_TESTS_PER_REG) + +/* + * TODO: if we were to run both MCBISTs in parallel, we would need separate + * instances of those... + */ +static uint64_t mcbist_memreg_cache; +static unsigned tests; + +#define ECC_MODE 0x0008 +#define DONE 0x0004 + +enum data_mode +{ + // MCBIST test data modes + FIXED_DATA_MODE = 0x0000, + RAND_FWD_MODE = 0x0010, + RAND_REV_MODE = 0x0020, + RAND_FWD_MAINT = 0x0030, + RAND_REV_MAINT = 0x0040, + DATA_EQ_ADDR = 0x0050, + ROTATE_LEFT_MODE = 0x0060, + ROTATE_RIGHT_MODE = 0x0070, +}; + +enum op_type +{ + WRITE = 0x0000, // fast, with no concurrent traffic + READ = 0x1000, // fast, with no concurrent traffic + READ_WRITE = 0x2000, + WRITE_READ = 0x3000, + READ_WRITE_READ = 0x4000, + READ_WRITE_WRITE = 0x5000, + RAND_SEQ = 0x6000, + READ_READ_WRITE = 0x8000, + SCRUB_RRWR = 0x9000, + STEER_RW = 0xA000, + ALTER = 0xB000, // (W) + DISPLAY = 0xC000, // (R, slow) + CCS_EXECUTE = 0xF000, + + // if bits 9:11 (Data Mode bits) = 000 (bits 4:8 used to specify which subtest to go to) + // Refresh only cmd if bits 9:11 (Data Mode bits) /= 000 + GOTO_SUBTEST_N = 0x7000, +}; + +static void commit_mcbist_memreg_cache(int mcs_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + int reg = (tests - 1) / MCBIST_TESTS_PER_REG; + + if (reg < 0) + die("commit_mcbist_memreg_cache() called without adding tests first!\n"); + + if (reg >= MAX_MCBIST_TEST_REGS) + die("Too many MCBIST instructions added\n"); + + /* MC01.MCBIST.MBA_SCOMFIR.MCBMRQ */ + write_scom_for_chiplet(id, MCBMR0Q + reg, mcbist_memreg_cache); + mcbist_memreg_cache = 0; +} + +static void add_mcbist_test(int mcs_i, uint16_t test) +{ + int test_i = tests % MCBIST_TESTS_PER_REG; + if (test_i == 0 && tests != 0) + commit_mcbist_memreg_cache(mcs_i); + + /* This assumes cache is properly cleared. */ + mcbist_memreg_cache |= PPC_SHIFT(test, test_i*16 + 15); + tests++; +} + +/* + * ECC Scrubbing - theory + * + * RAM cannot hold the data indefinitely. It uses capacitors to hold the bits, + * which are constantly being drawn by leaks. To counteract this, memory has to + * be periodically refreshed, which recharges the capacitors. However, sometimes + * this happens too late, when state of capacitor has already changed (either + * electric charge was depleted, or capacitor gained additional potential from + * outside - rowhammer, radiation) up to the point where it passes the threshold + * and 0 becomes 1 or vice versa. Refresh command in that case would only make + * "borderline 1" into "strong 1", so it won't be able to fix the problem. This + * is where ECC comes in. + * + * ECC is limited in number of changed bits it can fix and detect. Because of + * that it is important that ECC is checked and possible errors are corrected + * before too many bits have flipped, and corrected values are written back to + * RAM. This is done by hardware, without software's interaction, but it can be + * informed that ECC error has happened (machine check exception). + * + * ECC is checked every time data in RAM is accessed. To check every part of RAM + * even when CPU doesn't need to read it, memory controller does the accesses in + * the background. This is called ECC scrubbing. + * + * Note that it is enough for MC to just send read commands. When everything is + * correct, data is still written back to DRAM because reading operation is + * destructive - capacitors are discharged when read and have to be charged + * again. This happens internally in DRAM, there is no need to send that data + * through the memory bus when DRAM already has it. If there was an error, MC + * automatically sends corrected data to be written. + * + * ECC scrubbing happens between RAM and MC. CPU doesn't participate in this + * process, but it may be slowed down on memory intensive operations because + * some of the bandwidth is used for scrubbing. + * + * TL;DR: ECC scrub is read operation with discarded results. + */ +void add_scrub(int mcs_i, int port_dimm) +{ + uint16_t test = READ | ECC_MODE | (port_dimm << 9); + add_mcbist_test(mcs_i, test); +} + +void add_fixed_pattern_write(int mcs_i, int port_dimm) +{ + /* Use ALTER instead of WRITE to use maintenance pattern. ALTER is slow. */ + uint16_t test = WRITE | FIXED_DATA_MODE | ECC_MODE | (port_dimm << 9); + add_mcbist_test(mcs_i, test); +} + +/* +static void add_random_pattern_write(int port_dimm) +{ + uint16_t test = WRITE | RAND_FWD_MAINT | ECC_MODE | (port_dimm << 9); + add_mcbist_test(test); +} +*/ + +/* TODO: calculate initial delays and timeouts */ +void mcbist_execute(int mcs_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + /* This is index of last instruction, not the new one. */ + int test_i = (tests - 1) % MCBIST_TESTS_PER_REG; + uint64_t val; + + /* + * Nothing to do. Note that status register won't report "done", or will + * report state of previous program instead. According to docs this bits + * are writable, do we want to set them to simplify things? + * + * Another possibility would be to start MCBIST with single no-op test (goto + * with DONE bit set), but this may unnecessarily make things slower. + */ + if (tests == 0) + return; + + /* Check if in progress */ + /* TODO: we could force it to stop, but dying will help with debugging */ + if ((val = read_scom_for_chiplet(id, MCB_CNTLSTATQ)) & PPC_BIT(MCB_CNTLSTATQ_MCB_IP)) + die("MCBIST in progress already (%#16.16llx), this shouldn't happen\n", val); + + /* + * Contrary to CCS, we don't add no-op instruction here. DONE bit has to be + * set for instruction that is already present. Perhaps DONE is poor name, + * is tells that MCBIST should stop after this test, but this is how it is + * named in the documentation. + */ + mcbist_memreg_cache |= PPC_BIT(13 + test_i*16); + commit_mcbist_memreg_cache(mcs_i); + + /* MC01.MCBIST.MBA_SCOMFIR.MCB_CNTLQ + * [0] MCB_CNTLQ_MCB_START + */ + scom_and_or_for_chiplet(id, MCB_CNTLQ, ~0, PPC_BIT(MCB_CNTLQ_MCB_START)); + + /* Wait for MCBIST to start. Test for IP and DONE, it may finish early. */ + if (((val = read_scom_for_chiplet(id, MCB_CNTLSTATQ)) & + (PPC_BIT(MCB_CNTLSTATQ_MCB_IP) | PPC_BIT(MCB_CNTLSTATQ_MCB_DONE))) == 0) { + /* + * TODO: how long do we want to wait? Hostboot uses 10*100us polling, + * but so far it seems to always be already started on the first read. + */ + udelay(1); + if (((val = read_scom_for_chiplet(id, MCB_CNTLSTATQ)) & + (PPC_BIT(MCB_CNTLSTATQ_MCB_IP) | PPC_BIT(MCB_CNTLSTATQ_MCB_DONE))) == 0) + die("MCBIST failed (%#16.16llx) to start twice\n", val); + + /* Check if this is needed. Do not move before test, it impacts delay! */ + printk(BIOS_INFO, "MCBIST started after delay\n"); + } + + tests = 0; +} + +/* + * FIXME: 0x07012300[10] MCBIST_PROGRAM_COMPLETE should be checked instead. It + * gets set when MCBIST is paused, while 0x070123DC[0] IP stays on in that case. + * This may become a problem for 3DS DIMMs. + */ +int mcbist_is_done(int mcs_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + uint64_t val = val = read_scom_for_chiplet(id, MCB_CNTLSTATQ); + + /* Still in progress */ + if (val & PPC_BIT(MCB_CNTLSTATQ_MCB_IP)) + return 0; + + /* Not sure if DONE and FAIL can be set at the same time, check FAIL first */ + if ((val & PPC_BIT(MCB_CNTLSTATQ_MCB_FAIL)) || val == 0) + die("MCBIST error (%#16.16llx)\n"); + + /* Finished */ + if (val & PPC_BIT(MCB_CNTLSTATQ_MCB_DONE)) + return 1; + + /* Is it even possible to get here? */ + return 0; +} diff --git a/src/soc/ibm/power9/mcbist.h b/src/soc/ibm/power9/mcbist.h new file mode 100644 index 00000000000..a7d686feb1c --- /dev/null +++ b/src/soc/ibm/power9/mcbist.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __SOC_IBM_POWER9_MCBIST_H +#define __SOC_IBM_POWER9_MCBIST_H + +void add_scrub(int mcs_i, int port_dimm); +void add_fixed_pattern_write(int mcs_i, int port_dimm); + +void mcbist_execute(int mcs_i); +int mcbist_is_done(int mcs_i); + +#endif /* __SOC_IBM_POWER9_MCBIST_H */ From 74ba232342d90eb8a9edc39faa2ee1fff43158c9 Mon Sep 17 00:00:00 2001 From: Krystian Hebel Date: Wed, 8 Sep 2021 17:17:12 +0200 Subject: [PATCH 048/213] include/cpu/power/scom.h: introduce DEBUG_SCOM config option Signed-off-by: Krystian Hebel Change-Id: Ib16a313db9697d6fe1f447ce3dc53de85abfdff1 --- src/cpu/power9/Kconfig.debug_cpu | 10 ++++++++++ src/include/cpu/power/scom.h | 21 +++++++++++++++++++++ src/soc/ibm/power9/ccs.c | 3 +++ src/soc/ibm/power9/i2c.c | 3 +++ src/soc/ibm/power9/mcbist.c | 3 +++ 5 files changed, 40 insertions(+) create mode 100644 src/cpu/power9/Kconfig.debug_cpu diff --git a/src/cpu/power9/Kconfig.debug_cpu b/src/cpu/power9/Kconfig.debug_cpu new file mode 100644 index 00000000000..b3679f05026 --- /dev/null +++ b/src/cpu/power9/Kconfig.debug_cpu @@ -0,0 +1,10 @@ +config DEBUG_SCOM + bool "Print SCOM accesses" if DEFAULT_CONSOLE_LOGLEVEL_8 || CONSOLE_OVERRIDE_LOGLEVEL + default n + help + This option enables SCOM debug messages. + + Some accesses are time-critical or happen too often to be logged. + In such cases, '#define SKIP_SCOM_DEBUG' before first inclusion of + scom.h can be used to disable logging for a given file. Messages + are currently suppressed for CCS, MCBIST and I2C. diff --git a/src/include/cpu/power/scom.h b/src/include/cpu/power/scom.h index 00f62b28b83..483c820ea5a 100644 --- a/src/include/cpu/power/scom.h +++ b/src/include/cpu/power/scom.h @@ -113,6 +113,27 @@ static inline uint64_t read_scom(uint64_t addr) return read_scom_direct(addr); } +#if CONFIG(DEBUG_SCOM) && !defined(SKIP_SCOM_DEBUG) +#include + +#define write_scom(x, y) \ +({ \ + uint64_t __xw = x; \ + uint64_t __yw = y; \ + printk(BIOS_SPEW, "SCOM W %16.16llX %16.16llX\n", __xw, __yw); \ + write_scom((__xw), (__yw)); \ +}) + +#define read_scom(x) \ +({ \ + uint64_t __xr = x; \ + uint64_t __yr = read_scom(__xr); \ + printk(BIOS_SPEW, "SCOM R %16.16llX %16.16llX\n", __xr, __yr); \ + __yr; \ +}) + +#endif + static inline void scom_and_or(uint64_t addr, uint64_t and, uint64_t or) { uint64_t data = read_scom(addr); diff --git a/src/soc/ibm/power9/ccs.c b/src/soc/ibm/power9/ccs.c index 81d81a4802a..247a11af7ec 100644 --- a/src/soc/ibm/power9/ccs.c +++ b/src/soc/ibm/power9/ccs.c @@ -1,5 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0-only */ +/* Debug is too slow here, hits timeouts */ +#define SKIP_SCOM_DEBUG + #include #include #include diff --git a/src/soc/ibm/power9/i2c.c b/src/soc/ibm/power9/i2c.c index db190a1b522..86477fb2b32 100644 --- a/src/soc/ibm/power9/i2c.c +++ b/src/soc/ibm/power9/i2c.c @@ -1,5 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0-only */ +/* Debugging every access takes too much time */ +#define SKIP_SCOM_DEBUG + #include #include #include diff --git a/src/soc/ibm/power9/mcbist.c b/src/soc/ibm/power9/mcbist.c index 36e1e9b0f22..f635119cd99 100644 --- a/src/soc/ibm/power9/mcbist.c +++ b/src/soc/ibm/power9/mcbist.c @@ -1,5 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0-only */ +/* Debug is too slow here, hits timeouts */ +#define SKIP_SCOM_DEBUG + #include #include #include From cb3f530df89586fc0e06837c4d10e09ebcab83a0 Mon Sep 17 00:00:00 2001 From: Krystian Hebel Date: Wed, 16 Jun 2021 20:10:45 +0200 Subject: [PATCH 049/213] include/cpu/power: get DD version from SCOM instead of PVR Value in PVR does not specify DD level. Signed-off-by: Krystian Hebel Change-Id: I90744f810574a34b35c867168a04d218b93d522b --- src/include/cpu/power/scom.h | 7 +++++++ src/include/cpu/power/spr.h | 5 ----- src/soc/ibm/power9/istep_14_1.c | 2 +- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/include/cpu/power/scom.h b/src/include/cpu/power/scom.h index 483c820ea5a..b55a03c2368 100644 --- a/src/include/cpu/power/scom.h +++ b/src/include/cpu/power/scom.h @@ -181,5 +181,12 @@ static inline void scom_or_for_chiplet(chiplet_id_t chiplet, uint64_t addr, uint scom_and_or_for_chiplet(chiplet, addr, ~0, or); } +static inline uint8_t get_dd(void) +{ + uint64_t val = read_scom(0xF000F); + val = ((val >> 52) & 0x0F) | ((val >> 56) & 0xF0); + return (uint8_t) val; +} + #endif /* __ASSEMBLER__ */ #endif /* CPU_PPC64_SCOM_H */ diff --git a/src/include/cpu/power/spr.h b/src/include/cpu/power/spr.h index ef3c734a517..ef832abbf67 100644 --- a/src/include/cpu/power/spr.h +++ b/src/include/cpu/power/spr.h @@ -81,10 +81,5 @@ static inline void write_msr(uint64_t val) asm volatile("mtmsrd %0" :: "r"(val) : "memory"); } -static inline uint64_t pvr_revision(void) -{ - return read_spr(SPR_PVR) & SPR_PVR_REV_MASK; -} - #endif /* __ASSEMBLER__ */ #endif /* CPU_PPC64_SPR_H */ diff --git a/src/soc/ibm/power9/istep_14_1.c b/src/soc/ibm/power9/istep_14_1.c index f85ecec2801..6ccc797655c 100644 --- a/src/soc/ibm/power9/istep_14_1.c +++ b/src/soc/ibm/power9/istep_14_1.c @@ -13,7 +13,7 @@ static void fir_unmask(int mcs_i) { chiplet_id_t id = mcs_ids[mcs_i]; int mca_i; - const int is_dd20 = pvr_revision() == SPR_PVR_REV(2, 0); + const int is_dd20 = get_dd() == 0x20; /* Bits in other registers (act0, mask) are already set properly. MC01.MCBIST.MBA_SCOMFIR.MCBISTFIRACT1 [3] MCBISTFIRQ_MCBIST_BRODCAST_OUT_OF_SYNC = 0 // checkstop (0,0,0) From 57bc231fa723cc7dec4618aa94acee1ffa8f48e2 Mon Sep 17 00:00:00 2001 From: Krystian Hebel Date: Thu, 17 Jun 2021 14:10:38 +0200 Subject: [PATCH 050/213] soc/power9/homer.c: begin filling HOMER - QPMR Signed-off-by: Krystian Hebel Signed-off-by: Sergii Dmytruk Change-Id: Ic8c713124dad2543e3e1d483e0f6cdecaabf3048 --- src/include/cpu/power/rom_media.h | 11 ++ src/soc/ibm/power9/Makefile.inc | 1 + src/soc/ibm/power9/chip.c | 23 +++- src/soc/ibm/power9/chip.h | 11 ++ src/soc/ibm/power9/homer.c | 149 +++++++++++++++++++++ src/soc/ibm/power9/homer.h | 216 ++++++++++++++++++++++++++++++ src/soc/ibm/power9/rom_media.c | 6 +- src/soc/ibm/power9/xip.h | 85 ++++++++++++ 8 files changed, 498 insertions(+), 4 deletions(-) create mode 100644 src/include/cpu/power/rom_media.h create mode 100644 src/soc/ibm/power9/chip.h create mode 100644 src/soc/ibm/power9/homer.c create mode 100644 src/soc/ibm/power9/homer.h create mode 100644 src/soc/ibm/power9/xip.h diff --git a/src/include/cpu/power/rom_media.h b/src/include/cpu/power/rom_media.h new file mode 100644 index 00000000000..630d7129100 --- /dev/null +++ b/src/include/cpu/power/rom_media.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef CPU_PPC64_ROM_MEDIA_H +#define CPU_PPC64_ROM_MEDIA_H + +struct mmap_helper_region_device; + +void mount_part_from_pnor(const char *part_name, + struct mmap_helper_region_device *mdev); + +#endif // CPU_PPC64_ROM_MEDIA_H diff --git a/src/soc/ibm/power9/Makefile.inc b/src/soc/ibm/power9/Makefile.inc index d27ea7c43a8..5723e949c07 100644 --- a/src/soc/ibm/power9/Makefile.inc +++ b/src/soc/ibm/power9/Makefile.inc @@ -24,6 +24,7 @@ romstage-y += ccs.c romstage-y += mcbist.c romstage-y += timer.c ramstage-y += chip.c +ramstage-y += homer.c ramstage-y += rom_media.c ramstage-y += timer.c ramstage-y += istep_18_11.c diff --git a/src/soc/ibm/power9/chip.c b/src/soc/ibm/power9/chip.c index 3b01ed2e71c..a3546a08533 100644 --- a/src/soc/ibm/power9/chip.c +++ b/src/soc/ibm/power9/chip.c @@ -3,7 +3,9 @@ #include #include #include + #include "istep_13_scom.h" +#include "chip.h" #define SIZE_MASK PPC_BITMASK(13,23) #define SIZE_SHIFT (63 - 23) @@ -26,6 +28,7 @@ static inline unsigned long size_k(uint64_t reg) static void enable_soc_dev(struct device *dev) { int mcs_i, idx = 0; + unsigned long reserved_size, top = 0; for (mcs_i = 0; mcs_i < MCS_PER_PROC; mcs_i++) { uint64_t reg; @@ -34,15 +37,31 @@ static void enable_soc_dev(struct device *dev) /* These registers are undocumented, see istep 14.5. */ /* MCS_MCFGP */ reg = read_scom_for_chiplet(nest, 0x0501080A); - if (reg & PPC_BIT(0)) + if (reg & PPC_BIT(0)) { ram_resource_kb(dev, idx++, base_k(reg), size_k(reg)); + if (base_k(reg) + size_k(reg) > top) + top = base_k(reg) + size_k(reg); + } /* MCS_MCFGPM */ reg = read_scom_for_chiplet(nest, 0x0501080C); - if (reg & PPC_BIT(0)) + if (reg & PPC_BIT(0)) { ram_resource_kb(dev, idx++, base_k(reg), size_k(reg)); + if (base_k(reg) + size_k(reg) > top) + top = base_k(reg) + size_k(reg); + } } + /* + * Reserve top 8M (OCC common area) + 4M (HOMER). + * + * TODO: 8M + (4M per CPU), hostboot reserves always 8M + 8 * 4M. + */ + reserved_size = 8*1024 + 4*1024 /* * num_of_cpus */; + top -= reserved_size; + reserved_ram_resource_kb(dev, idx++, top, reserved_size); + build_homer_image((void *)(top * 1024)); + istep_18_11(); istep_18_12(); } diff --git a/src/soc/ibm/power9/chip.h b/src/soc/ibm/power9/chip.h new file mode 100644 index 00000000000..d6ce11b3609 --- /dev/null +++ b/src/soc/ibm/power9/chip.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __SOC_IBM_POWER9_CHIP_H +#define __SOC_IBM_POWER9_CHIP_H + +struct soc_ibm_power9_config { +}; + +void build_homer_image(void *homer_bar); + +#endif /* __SOC_CAVIUM_CN81XX_CHIP_H */ diff --git a/src/soc/ibm/power9/homer.c b/src/soc/ibm/power9/homer.c new file mode 100644 index 00000000000..1f7b2b40604 --- /dev/null +++ b/src/soc/ibm/power9/homer.c @@ -0,0 +1,149 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include +#include +#include +#include // memset, memcpy + +#include "chip.h" +#include "homer.h" +#include "xip.h" + +static size_t copy_section(void *dst, struct xip_section *section, void *base, + uint8_t dd) +{ + if (!section->dd_support) { + memcpy(dst, base + section->offset, section->size); + return section->size; + } + + struct dd_container *cont = base + section->offset; + int i; + + assert(cont->magic == DD_CONTAINER_MAGIC); + for (i = 0; i < cont->num; i++) { + if (cont->blocks[i].dd == dd) { + memcpy(dst, (void *)cont + cont->blocks[i].offset, + cont->blocks[i].size); + return cont->blocks[i].size; + } + } + + die("XIP: Can't find container for DD=%x\n", dd); +} + +static void build_spge(struct homer_st *homer, struct xip_sgpe_header *sgpe, + uint8_t dd) +{ + struct sgpe_img_header *hdr; + size_t size; + + assert(sgpe->magic == XIP_MAGIC_SGPE); + + /* SGPE header */ + size = copy_section(&homer->qpmr.sgpe.header, &sgpe->qpmr, sgpe, dd); + assert(size <= sizeof(struct qpmr_header)); + + /* + * 0xFFF00000 (SRAM base) + 4k (IPC) + 60k (GPE0) + 64k (GPE1) + 50k (PGPE) + * + 2k (aux) + 2k (shared) = 0xFFF2D800 + * + * WARNING: I have no idea if this is constant or depends on SGPE version. + */ + assert(homer->qpmr.sgpe.header.sram_region_start == 0xFFF2D800); + assert(homer->qpmr.sgpe.header.sram_region_size == SGPE_SRAM_IMG_SIZE); + /* + * Apart from these the only filled fields (same values for all DDs) are: + * - magic ("XIP SGPE") + * - build_date + * - build_ver + * - img_offset (0x0a00, overwritten with the same value later by code) + * - img_len (0xbf64, ~49kB, overwritten with the same value later by code) + */ + + /* SGPE L1 bootloader */ + size = copy_section(&homer->qpmr.sgpe.l1_bootloader, &sgpe->l1_bootloader, + sgpe, dd); + homer->qpmr.sgpe.header.l1_offset = offsetof(struct qpmr_st, + sgpe.l1_bootloader); + assert(size <= GPE_BOOTLOADER_SIZE); + + /* SGPE L2 bootloader */ + size = copy_section(&homer->qpmr.sgpe.l2_bootloader, &sgpe->l2_bootloader, + sgpe, dd); + homer->qpmr.sgpe.header.l2_offset = offsetof(struct qpmr_st, + sgpe.l2_bootloader); + homer->qpmr.sgpe.header.l2_len = size; + assert(size <= GPE_BOOTLOADER_SIZE); + + /* SGPE HCODE */ + size = copy_section(&homer->qpmr.sgpe.sram_image, &sgpe->hcode, sgpe, dd); + homer->qpmr.sgpe.header.img_offset = offsetof(struct qpmr_st, + sgpe.sram_image); + homer->qpmr.sgpe.header.img_len = size; + assert(size <= SGPE_SRAM_IMG_SIZE); + + /* Cache SCOM region */ + homer->qpmr.sgpe.header.scom_offset = + offsetof(struct qpmr_st, cache_scom_region); + homer->qpmr.sgpe.header.scom_len = CACHE_SCOM_REGION_SIZE; + + /* Update SRAM image header */ + hdr = (struct sgpe_img_header *) + &homer->qpmr.sgpe.sram_image[INT_VECTOR_SIZE]; + hdr->ivpr_addr = homer->qpmr.sgpe.header.sram_region_start; + hdr->cmn_ring_occ_offset = homer->qpmr.sgpe.header.img_len; + hdr->cmn_ring_ovrd_occ_offset = 0; + hdr->spec_ring_occ_offset = 0; + hdr->scom_offset = 0; + /* Nest frequency divided by 64. */ + hdr->timebase_hz = (1866 * MHz) / 64; + + /* SGPE auxiliary functions */ + /* + * TODO: check if it is really enabled. This comes from hostboot attributes, + * but I don't know if/where those are set. + */ + hdr->aux_control = 1 << 24; + /* + * 0x80000000 (HOMER in OCI PBA memory space) + 1M (QPMR offset) + * + 512k (offset to aux) + * + * This probably is full address and not offset. + */ + hdr->aux_offset = 0x80000000 + offsetof(struct homer_st, qpmr.aux); + hdr->aux_len = CACHE_SCOM_AUX_SIZE; + homer->qpmr.sgpe.header.enable_24x7_ima = 1; + /* Here offset is relative to QPMR */ + homer->qpmr.sgpe.header.aux_offset = offsetof(struct qpmr_st, aux); + homer->qpmr.sgpe.header.aux_len = CACHE_SCOM_AUX_SIZE; +} + +/* + * This logic is for SMF disabled only! + */ +void build_homer_image(void *homer_bar) +{ + struct mmap_helper_region_device mdev = {0}; + struct homer_st *homer = homer_bar; + struct xip_hw_header *hw = homer_bar; + uint8_t dd = get_dd(); + + printk(BIOS_ERR, "DD%2.2x\n", dd); + + memset(homer_bar, 0, 4 * MiB); + + /* + * This will work as long as we don't call mmap(). mmap() calls + * mem_poll_alloc() which doesn't check if mdev->pool is valid or at least + * not NULL. + */ + mount_part_from_pnor("HCODE", &mdev); + /* First MB of HOMER is unused, we can write OCC image from PNOR there. */ + rdev_readat(&mdev.rdev, homer_bar, 0, 1 * MiB); + + build_spge(homer, (struct xip_sgpe_header *)(homer_bar + hw->sgpe.offset), + dd); +} diff --git a/src/soc/ibm/power9/homer.h b/src/soc/ibm/power9/homer.h new file mode 100644 index 00000000000..94cb642a45c --- /dev/null +++ b/src/soc/ibm/power9/homer.h @@ -0,0 +1,216 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __SOC_IBM_POWER9_HOMER_H +#define __SOC_IBM_POWER9_HOMER_H + +#include + +/* All fields are big-endian */ + +#define HOMER_ONE_REGION_SIZE (1 * MiB) + +/* + * OCC complex shares 768 kB SRAM, according to Figure 23-3 + * https://wiki.raptorcs.com/w/images/c/ce/POWER9_um_OpenPOWER_v21_10OCT2019_pub.pdf + */ +#define SGPE_SRAM_IMG_SIZE (74 * KiB) +#define PGPE_SRAM_IMG_SIZE (50 * KiB) + +/* According to above figure, CMEs have 32 kB SRAM each, how does it fit? */ +#define CME_SRAM_IMG_SIZE (64 * KiB) +#define GPE_BOOTLOADER_SIZE (1 * KiB) + +/* + * This is how CACHE_SCOM_REGION_SIZE is defined in hostboot. On the other hand, + * hostboot defines that quad has 256 entries, 16 bytes each. This gives 4kB per + * quad, and there are 6 quads (maximum) on POWER9 CPU, which gives 24kB total. + * One of the values is obviously wrong, but because this region is immediately + * followed by a padding it should not overwrite anything important. This is one + * of the reasons to clear whole HOMER, not just the used parts. + */ +#define CACHE_SCOM_REGION_SIZE (6 * KiB) +#define CACHE_SCOM_REGION_OFFSET (128 * KiB) +#define CACHE_SCOM_AUX_SIZE (64 * KiB) +#define CACHE_SCOM_AUX_OFFSET (512 * KiB) +#define SELF_RESTORE_REGION_SIZE (9 * KiB) +#define CORE_SCOM_RESTORE_SIZE (6 * KiB) +#define CORE_SCOM_RESTORE_OFFSET (256 * KiB) +#define PGPE_AUX_TASK_SIZE (2 * KiB) +#define PGPE_OCC_SHARED_SRAM_SIZE (2 * KiB) +#define PGPE_DOPTRACE_SIZE (64 * KiB) +#define PGPE_DOPTRACE_OFFSET (64 * KiB) +#define OCC_PARAM_BLOCK_REGION_SIZE (16 * KiB) +#define OCC_PARAM_BLOCK_REGION_OFFSET (128 * KiB) +#define PSTATE_OUTPUT_TABLES_SIZE (16 * KiB) +#define OCC_WOF_TABLES_SIZE (256 * KiB) +#define OCC_WOF_TABLES_OFFSET (768 * KiB) +#define PPMR_HEADER_SIZE (1 * KiB) + +/* =================== QPMR =================== */ + +struct qpmr_header { + uint64_t magic; /* "SGPE_1.0" */ + uint32_t l1_offset; + uint32_t reserved; + uint32_t l2_offset; + uint32_t l2_len; + uint32_t build_date; + uint32_t build_ver; + uint64_t reserved_flags; + uint32_t img_offset; + uint32_t img_len; + uint32_t common_ring_offset; + uint32_t common_ring_len; + uint32_t common_ovrd_offset; + uint32_t common_ovrd_len; + uint32_t spec_ring_offset; + uint32_t spec_ring_len; + uint32_t scom_offset; + uint32_t scom_len; + uint32_t aux_offset; + uint32_t aux_len; + uint32_t stop_ffdc_offset; + uint32_t stop_ffdc_len; + uint32_t boot_prog_code; + uint32_t sram_img_size; + uint32_t max_quad_restore_entry; + uint32_t enable_24x7_ima; + uint32_t sram_region_start; + uint32_t sram_region_size; +} __attribute__((packed, aligned(512))); + +/* This header is part of SRAM image, it starts after interrupt vectors. */ +#define INT_VECTOR_SIZE 384 +struct sgpe_img_header { + uint64_t magic; + uint32_t reset_addr; + uint32_t reserve1; + uint32_t ivpr_addr; + uint32_t timebase_hz; + uint32_t build_date; + uint32_t build_ver; + uint32_t reserve_flags; + uint16_t location_id; + uint16_t addr_extension; + uint32_t cmn_ring_occ_offset; + uint32_t cmn_ring_ovrd_occ_offset; + uint32_t spec_ring_occ_offset; + uint32_t scom_offset; + uint32_t scom_mem_offset; + uint32_t scom_mem_len; + uint32_t aux_offset; + uint32_t aux_len; + uint32_t aux_control; + uint32_t reserve4; + uint64_t chtm_mem_cfg; +}; + +struct sgpe_st { + struct qpmr_header header; + uint8_t l1_bootloader[GPE_BOOTLOADER_SIZE]; + uint8_t l2_bootloader[GPE_BOOTLOADER_SIZE]; + uint8_t sram_image[SGPE_SRAM_IMG_SIZE]; +}; + +check_member(sgpe_st, l1_bootloader, 512); + +struct qpmr_st { + struct sgpe_st sgpe; + uint8_t pad1[CACHE_SCOM_REGION_OFFSET - sizeof(struct sgpe_st)]; + uint8_t cache_scom_region[CACHE_SCOM_REGION_SIZE]; + uint8_t pad2[CACHE_SCOM_AUX_OFFSET + -(CACHE_SCOM_REGION_OFFSET + CACHE_SCOM_REGION_SIZE)]; + uint8_t aux[CACHE_SCOM_AUX_SIZE]; +}; + +check_member(qpmr_st, cache_scom_region, 128 * KiB); +check_member(qpmr_st, aux, 512 * KiB); + +/* =================== CPMR =================== */ + +struct cpmr_header { + uint32_t attn_opcodes[2]; + uint64_t magic; /* "CPMR_2.0" */ + uint32_t build_date; + uint32_t version; + uint8_t reserved_flags[4]; + uint8_t self_restore_ver; + uint8_t stop_api_ver; + uint8_t urmor_fix; + uint8_t fused_mode_status; + uint32_t img_offset; + uint32_t img_len; + uint32_t cme_common_ring_offset; + uint32_t cme_common_ring_len; + uint32_t cme_pstate_offset; + uint32_t cme_pstate_len; + uint32_t core_spec_ring_offset; // = real offset / 32 + uint32_t core_spec_ring_len; // = real length / 32 + uint32_t core_scom_offset; + uint32_t core_scom_len; + uint32_t core_self_restore_offset; + uint32_t core_self_restore_len; + uint32_t core_max_scom_entry; + uint32_t quad0_pstate_offset; + uint32_t quad1_pstate_offset; + uint32_t quad2_pstate_offset; + uint32_t quad3_pstate_offset; + uint32_t quad4_pstate_offset; + uint32_t quad5_pstate_offset; +} __attribute__((packed, aligned(256))); + +struct cpmr_st { + struct cpmr_header header; + uint8_t exe[SELF_RESTORE_REGION_SIZE - sizeof(struct cpmr_header)]; + /* This is 96kB followed by a padding in hostboot, KISS. */ + uint8_t core_self_restore[CORE_SCOM_RESTORE_OFFSET + - SELF_RESTORE_REGION_SIZE]; + uint8_t core_scom[CORE_SCOM_RESTORE_SIZE]; + uint8_t cme_sram_region[CME_SRAM_IMG_SIZE]; +}; + +check_member(cpmr_st, core_self_restore, 9 * KiB); +check_member(cpmr_st, core_scom, 256 * KiB); + +/* =================== PPMR =================== */ + +struct ppmr_st { + uint8_t header[PPMR_HEADER_SIZE]; + uint8_t l1_bootloader[GPE_BOOTLOADER_SIZE]; + uint8_t l2_bootloader[GPE_BOOTLOADER_SIZE]; + uint8_t pgpe_sram_img[PGPE_SRAM_IMG_SIZE]; + uint8_t aux_task[PGPE_AUX_TASK_SIZE]; + uint8_t pad0[OCC_PARAM_BLOCK_REGION_OFFSET - (PPMR_HEADER_SIZE + + GPE_BOOTLOADER_SIZE + GPE_BOOTLOADER_SIZE + + PGPE_SRAM_IMG_SIZE + PGPE_AUX_TASK_SIZE)]; + /* + * Two following fields in hostboot have different sizes, but are padded + * to 16kB each anyway. There are two consecutive paddings between + * pstate_table and wof_tables in hostboot. + */ + uint8_t occ_parm_block[OCC_PARAM_BLOCK_REGION_SIZE]; + uint8_t pstate_table[PSTATE_OUTPUT_TABLES_SIZE]; + uint8_t pad1[OCC_WOF_TABLES_OFFSET - (OCC_PARAM_BLOCK_REGION_OFFSET + + OCC_PARAM_BLOCK_REGION_SIZE + PSTATE_OUTPUT_TABLES_SIZE)]; + uint8_t wof_tables[OCC_WOF_TABLES_SIZE]; +}; + +check_member(ppmr_st, occ_parm_block, 128 * KiB); +check_member(ppmr_st, pstate_table, 144 * KiB); +check_member(ppmr_st, wof_tables, 768 * KiB); + +struct homer_st { + uint8_t occ_host_area[HOMER_ONE_REGION_SIZE]; + struct qpmr_st qpmr; + uint8_t pad_qpmr[HOMER_ONE_REGION_SIZE - sizeof(struct qpmr_st)]; + struct cpmr_st cpmr; + uint8_t pad_cpmr[HOMER_ONE_REGION_SIZE - sizeof(struct cpmr_st)]; + struct ppmr_st ppmr; + uint8_t pad_ppmr[HOMER_ONE_REGION_SIZE - sizeof(struct ppmr_st)]; +}; + +check_member(homer_st, qpmr, 1 * MiB); +check_member(homer_st, cpmr, 2 * MiB); +check_member(homer_st, ppmr, 3 * MiB); + +#endif /* __SOC_IBM_POWER9_HOMER_H */ diff --git a/src/soc/ibm/power9/rom_media.c b/src/soc/ibm/power9/rom_media.c index 41c110f5deb..3f9014a1fc6 100644 --- a/src/soc/ibm/power9/rom_media.c +++ b/src/soc/ibm/power9/rom_media.c @@ -1,5 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ +#include + #include #include #include @@ -330,8 +332,8 @@ struct region_device_ops ecc_rdev_ops = { .readat = ecc_readat, }; -static void mount_part_from_pnor(const char *part_name, - struct mmap_helper_region_device *mdev) +void mount_part_from_pnor(const char *part_name, + struct mmap_helper_region_device *mdev) { size_t base, size; unsigned int i, block_size, entry_count = 0; diff --git a/src/soc/ibm/power9/xip.h b/src/soc/ibm/power9/xip.h new file mode 100644 index 00000000000..c02e4e60027 --- /dev/null +++ b/src/soc/ibm/power9/xip.h @@ -0,0 +1,85 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __SOC_IBM_POWER9_XIP_H +#define __SOC_IBM_POWER9_XIP_H + +#define XIP_MAGIC_HW (0x5849502020204857) // "XIP HW" +#define XIP_MAGIC_SGPE (0x5849502053475045) // "XIP SGPE" + +/* All fields are big-endian */ + +struct xip_section { + uint32_t offset; + uint32_t size; + uint8_t alignment; + uint8_t dd_support; + uint8_t reserved8[2]; +}; + +/* Each XIP header holds 15 XIP sections, some of them are sometimes unused. */ +#define XIP_HEADER_COMMON_FIELDS_TOP \ + uint64_t magic; \ + uint64_t l1_addr; \ + uint64_t l2_addr; \ + uint64_t kernel_addr; \ + uint64_t link_address; \ + uint64_t reserved64[3]; \ + struct xip_section sections[5]; + +#define XIP_HEADER_COMMON_FIELDS_BOTTOM \ + uint32_t image_size; \ + /* In yyyymmdd format, e.g. 20110630, when read as decimal, not hex */ \ + uint32_t build_date; \ + /* In hhmm format, e.g. 0756 */ \ + uint32_t build_time; \ + char build_tag[20]; \ + uint8_t header_version; \ + uint8_t normalized; \ + uint8_t toc_sorted; \ + uint8_t reserved8[5]; \ + char build_user[16]; \ + char build_host[40]; \ + char reserved_char[8]; \ + +struct xip_hw_header { + XIP_HEADER_COMMON_FIELDS_TOP + struct xip_section sgpe; + struct xip_section restore; + struct xip_section cme; + struct xip_section pgpe; + struct xip_section ioppe; + struct xip_section fppe; + struct xip_section rings; + struct xip_section overlays; + struct xip_section unused[2]; /* Pad to 15 sections. */ + XIP_HEADER_COMMON_FIELDS_BOTTOM +}; + +struct xip_sgpe_header { + XIP_HEADER_COMMON_FIELDS_TOP + struct xip_section qpmr; + struct xip_section l1_bootloader; + struct xip_section l2_bootloader; + struct xip_section hcode; + struct xip_section unused[6]; /* Pad to 15 sections. */ + XIP_HEADER_COMMON_FIELDS_BOTTOM +}; + +#define DD_CONTAINER_MAGIC 0x4444434F // "DDCO" + +struct dd_block { + uint32_t offset; + uint32_t size; + uint8_t dd; + uint8_t reserved[3]; +}; + +struct dd_container { + uint32_t magic; + uint8_t num; + uint8_t reserved[3]; + struct dd_block blocks[0]; +}; + + +#endif /* __SOC_IBM_POWER9_XIP_H */ From c0023d64e49299a039cdadc237c42634dd2724eb Mon Sep 17 00:00:00 2001 From: Krystian Hebel Date: Fri, 18 Jun 2021 19:14:50 +0200 Subject: [PATCH 051/213] soc/power9/homer.c: build self restore image Signed-off-by: Krystian Hebel Change-Id: Iac91b7aea7a13ed7c54c5ebe5d38d3ddb67fad20 --- src/include/cpu/power/spr.h | 8 ++ src/soc/ibm/power9/homer.c | 233 ++++++++++++++++++++++++++++++++++++ src/soc/ibm/power9/homer.h | 16 ++- src/soc/ibm/power9/xip.h | 9 ++ 4 files changed, 263 insertions(+), 3 deletions(-) diff --git a/src/include/cpu/power/spr.h b/src/include/cpu/power/spr.h index ef832abbf67..20b07471e8d 100644 --- a/src/include/cpu/power/spr.h +++ b/src/include/cpu/power/spr.h @@ -9,6 +9,9 @@ #define SPR_DEC_IMPLEMENTED_BITS 56 #define SPR_DEC_LONGEST_TIME ((1ull << (SPR_DEC_IMPLEMENTED_BITS - 1)) - 1) +#define SPR_DAWR 0xB4 +#define SPR_CIABR 0xBB +#define SPR_DAWRX 0xBC #define SPR_TB 0x10C #define SPR_PVR 0x11F @@ -27,6 +30,7 @@ #define SPR_LPCR_HDICE PPC_BIT(63) #define SPR_HMER 0x150 +#define SPR_HMEER 0x151 /* Bits in HMER/HMEER */ #define SPR_HMER_MALFUNCTION_ALERT PPC_BIT(0) #define SPR_HMER_PROC_RECV_DONE PPC_BIT(2) @@ -44,6 +48,10 @@ #define SPR_HMER_XSCOM_STATUS PPC_BITMASK(21, 23) #define SPR_HMER_XSCOM_OCCUPIED PPC_BIT(23) +#define SPR_PTCR 0x1D0 +#define SPR_PSSCR 0x357 +#define SPR_PMCR 0x374 + #ifndef __ASSEMBLER__ #include diff --git a/src/soc/ibm/power9/homer.c b/src/soc/ibm/power9/homer.c index 1f7b2b40604..4e443d2ad91 100644 --- a/src/soc/ibm/power9/homer.c +++ b/src/soc/ibm/power9/homer.c @@ -121,6 +121,231 @@ static void build_spge(struct homer_st *homer, struct xip_sgpe_header *sgpe, homer->qpmr.sgpe.header.aux_len = CACHE_SCOM_AUX_SIZE; } +static const uint32_t _SMF = 0x5F534D46; // "_SMF" + +static const uint32_t ATTN_OP = 0x00000200; +static const uint32_t BLR_OP = 0x4E800020; +static const uint32_t SKIP_SPR_REST_INST = 0x4800001C; +static const uint32_t MR_R0_TO_R10_OP = 0x7C0A0378; +static const uint32_t MR_R0_TO_R21_OP = 0x7C150378; +static const uint32_t MR_R0_TO_R9_OP = 0x7C090378; +static const uint32_t MTLR_R30_OP = 0x7FC803A6; +static const uint32_t MFLR_R30_OP = 0x7FC802A6; + +static const uint32_t init_cpureg_template[] = { + 0x63000000, /* ori %r24, %r0, 0 */ /* |= spr, key for lookup */ + 0x7C000278, /* xor %r0, %r0, %r0 */ + 0x64000000, /* oris %r0, %r0, 0 */ /* |= val >> 48 */ + 0x60000000, /* ori %r0, %r0, 0 */ /* |= (val >> 32) & 0x0000FFFF */ + 0x780007C6, /* rldicr %r0, %r0, 32, 31 */ + 0x64000000, /* oris %r0, %r0, 0 */ /* |= (val >> 16) & 0x0000FFFF */ + 0x60000000, /* ori %r0, %r0, 0 */ /* |= val & 0x0000FFFF */ + 0x7C0003A6, /* mtspr 0, %r0 */ /* |= spr, encoded */ +}; + +/* + * These SPRs are not described in PowerISA 3.0B. + * MSR is not SPR, but self restore code treats it this way. + */ +#define SPR_USPRG0 0x1F0 +#define SPR_USPRG1 0x1F1 +#define SPR_URMOR 0x1F9 +#define SPR_SMFCTRL 0x1FF +#define SPR_LDBAR 0x352 +#define SPR_HID 0x3F0 +#define SPR_MSR 0x7D0 + +static void add_init_cpureg_entry(uint32_t *base, uint16_t spr, uint64_t val, + int init) +{ + while ((*base != (init_cpureg_template[0] | spr)) && *base != BLR_OP) + base++; + + /* Must change next instruction from attn to blr when adding new entry. */ + if (*base == BLR_OP) + *(base + ARRAY_SIZE(init_cpureg_template)) = BLR_OP; + + memcpy(base, init_cpureg_template, sizeof(init_cpureg_template)); + base[0] |= spr; + + if (init) { + base[1] = SKIP_SPR_REST_INST; + } else { + base[2] |= (val >> 48) & 0xFFFF; + base[3] |= (val >> 32) & 0xFFFF; + base[5] |= (val >> 16) & 0xFFFF; + base[6] |= val & 0xFFFF; + } + + /* Few exceptions are handled differently. */ + if (spr == SPR_MSR) { + base[7] = MR_R0_TO_R21_OP; + } else if (spr == SPR_HRMOR) { + base[7] = MR_R0_TO_R10_OP; + } else if (spr == SPR_URMOR) { + base[7] = MR_R0_TO_R9_OP; + } else { + base[7] |= ((spr & 0x1F) << 16) | ((spr & 0x3E0) << 6); + } +} + +static const uint32_t init_save_self_template[] = { + 0x60000000, /* ori %r0, %r0, 0 */ /* |= i */ + 0x3BFF0020, /* addi %r31, %r31, 0x20 */ + 0x60000000, /* nop (ori %r0, %r0, 0) */ +}; + +/* Honestly, I have no idea why saving uses different key than restoring... */ +static void add_init_save_self_entry(uint32_t **ptr, int i) +{ + memcpy(*ptr, init_save_self_template, sizeof(init_save_self_template)); + **ptr |= i; + *ptr += ARRAY_SIZE(init_save_self_template); +} + +static const uint16_t thread_sprs[] = { + SPR_CIABR, + SPR_DAWR, + SPR_DAWRX, + SPR_HSPRG0, + SPR_LDBAR, + SPR_LPCR, + SPR_PSSCR, + SPR_MSR, + SPR_SMFCTRL, + SPR_USPRG0, + SPR_USPRG1, +}; + +static const uint16_t core_sprs[] = { + SPR_HRMOR, + SPR_HID, + SPR_HMEER, + SPR_PMCR, + SPR_PTCR, + SPR_URMOR, +}; + +static void build_self_restore(struct homer_st *homer, + struct xip_restore_header *rest, uint8_t dd) +{ + /* Assumptions: SMT4 only, SMF available but disabled. */ + size_t size; + uint32_t *ptr; + + /* + * Data in XIP has its first 256 bytes zeroed, reserved for header, so even + * though this is exe part of self restore region, we should copy it to + * header's address. + */ + size = copy_section(&homer->cpmr.header, &rest->self, rest, dd); + + /* Now, overwrite header. */ + size = copy_section(&homer->cpmr.header, &rest->cpmr, rest, dd); + assert(size <= sizeof(struct cpmr_header)); + + /* + * According to comment in p9_hcode_image_build.C it is for Nimbus >= DD22. + * Earlier versions do things differently. For now die(), implement if + * needed. + * + * If _SMF doesn't exist: + * - fill memory from (CPMR + 8k + 256) for 192k with ATTN + * - starting from the beginning of that region change instruction at every + * 2k bytes into BLR + * + * If _SMF exists: + * - fill CPMR.core_self_restore with ATTN instructions + * - for every core: + * - change every thread's restore first instruction (at 0, 512, 1024, + * 1536 bytes) to BLR + * - change core's restore first instruction (at 3k) to BLR + */ + if (*(uint32_t *)&homer->cpmr.exe[0x1300 - sizeof(struct cpmr_header)] != + _SMF) + die("No _SMF magic number in self restore region\n"); + + ptr = (uint32_t *)homer->cpmr.core_self_restore; + for (size = 0; size < (96 * KiB) / sizeof(uint32_t); size++) { + ptr[size] = ATTN_OP; + } + + /* + * This loop combines two functions from hostboot: + * initSelfRestoreRegion() and initSelfSaveRestoreEntries(). The second one + * writes only sections for functional cores, code below does it for all. + * This will take more time, but makes the code easier to understand. + * + * TODO: check if we can skip both cpureg and save_self for nonfunctional + * cores + */ + for (int core = 0; core < MAX_CORES; core++) { + struct smf_core_self_restore *csr = &homer->cpmr.core_self_restore[core]; + uint32_t *csa = csr->core_save_area; + + for (int thread = 0; thread < 4; thread++) { + csr->thread_restore_area[thread][0] = BLR_OP; + uint32_t *tsa = csr->thread_save_area[thread]; + *tsa++ = MFLR_R30_OP; + + for (int i = 0; i < ARRAY_SIZE(thread_sprs); i++) { + /* + * Hostboot uses strange calculation for *_save_area keys. + * I don't know if this is only used by hostboot and save/restore + * code generated by it, or if something else (CME?) requires such + * format. For now leave it as hostboot does it, we can simplify + * this later. + * + * CIABR through MSR: key = 0..7 + * SMFCTRL through USPRG1: key = 1C..1E + */ + int tsa_key = i; + if (i > 7) + tsa_key += 0x14; + + add_init_cpureg_entry(csr->thread_restore_area[thread], + thread_sprs[i], 0, 1); + add_init_save_self_entry(&tsa, tsa_key); + } + + *tsa++ = MTLR_R30_OP; + *tsa++ = BLR_OP; + } + + csr->core_restore_area[0] = BLR_OP; + *csa++ = MFLR_R30_OP; + for (int i = 0; i < ARRAY_SIZE(core_sprs); i++) { + add_init_cpureg_entry(csr->core_restore_area, core_sprs[i], 0, 1); + /* + * HID through PTCR: key = 0x15..0x18 + * HRMOR and URMOR are skipped. + */ + if (core_sprs[i] != SPR_HRMOR && core_sprs[i] != SPR_URMOR) + add_init_save_self_entry(&csa, i + 0x14); + } + + *csa++ = MTLR_R30_OP; + *csa++ = BLR_OP; + } + + /* Populate CPMR header */ + homer->cpmr.header.fused_mode_status = 0xAA; // non-fused + + /* For SMF enabled */ +#if 0 + homer->cpmr.header.urmor_fix = 1; +#endif + + homer->cpmr.header.self_restore_ver = 2; + homer->cpmr.header.stop_api_ver = 2; + + /* + * WARNING: Hostboot filled CME header field with information whether cores + * are fused or not here. However, at this point CME image is not yet + * loaded, so that field will get overwritten. + */ +} + /* * This logic is for SMF disabled only! */ @@ -144,6 +369,14 @@ void build_homer_image(void *homer_bar) /* First MB of HOMER is unused, we can write OCC image from PNOR there. */ rdev_readat(&mdev.rdev, homer_bar, 0, 1 * MiB); + assert(hw->magic == XIP_MAGIC_HW); + build_spge(homer, (struct xip_sgpe_header *)(homer_bar + hw->sgpe.offset), dd); + + build_self_restore(homer, + (struct xip_restore_header *)(homer_bar + hw->restore.offset), + dd); + + //build_cme(...); } diff --git a/src/soc/ibm/power9/homer.h b/src/soc/ibm/power9/homer.h index 94cb642a45c..031f480adb8 100644 --- a/src/soc/ibm/power9/homer.h +++ b/src/soc/ibm/power9/homer.h @@ -159,12 +159,22 @@ struct cpmr_header { uint32_t quad5_pstate_offset; } __attribute__((packed, aligned(256))); +struct smf_core_self_restore { + uint32_t thread_restore_area[4][512 / sizeof(uint32_t)]; + uint32_t thread_save_area[4][256 / sizeof(uint32_t)]; + uint32_t core_restore_area[512 / sizeof(uint32_t)]; + uint32_t core_save_area[512 / sizeof(uint32_t)]; +}; + +#define MAX_CORES 24 + struct cpmr_st { struct cpmr_header header; uint8_t exe[SELF_RESTORE_REGION_SIZE - sizeof(struct cpmr_header)]; - /* This is 96kB followed by a padding in hostboot, KISS. */ - uint8_t core_self_restore[CORE_SCOM_RESTORE_OFFSET - - SELF_RESTORE_REGION_SIZE]; + struct smf_core_self_restore core_self_restore[MAX_CORES]; + uint8_t pad[CORE_SCOM_RESTORE_OFFSET - + (SELF_RESTORE_REGION_SIZE + + MAX_CORES * sizeof(struct smf_core_self_restore))]; uint8_t core_scom[CORE_SCOM_RESTORE_SIZE]; uint8_t cme_sram_region[CME_SRAM_IMG_SIZE]; }; diff --git a/src/soc/ibm/power9/xip.h b/src/soc/ibm/power9/xip.h index c02e4e60027..cde9f36c600 100644 --- a/src/soc/ibm/power9/xip.h +++ b/src/soc/ibm/power9/xip.h @@ -5,6 +5,7 @@ #define XIP_MAGIC_HW (0x5849502020204857) // "XIP HW" #define XIP_MAGIC_SGPE (0x5849502053475045) // "XIP SGPE" +#define XIP_MAGIC_RESTORE (0x5849502052455354) // "XIP REST" /* All fields are big-endian */ @@ -65,6 +66,14 @@ struct xip_sgpe_header { XIP_HEADER_COMMON_FIELDS_BOTTOM }; +struct xip_restore_header { + XIP_HEADER_COMMON_FIELDS_TOP + struct xip_section cpmr; + struct xip_section self; + struct xip_section unused[8]; /* Pad to 15 sections. */ + XIP_HEADER_COMMON_FIELDS_BOTTOM +}; + #define DD_CONTAINER_MAGIC 0x4444434F // "DDCO" struct dd_block { From b7c7d2ea300587e8c388d47774e7115702dcf41a Mon Sep 17 00:00:00 2001 From: Krystian Hebel Date: Mon, 21 Jun 2021 16:06:39 +0200 Subject: [PATCH 052/213] soc/power9/homer.c: build CME image and PPMR region Signed-off-by: Krystian Hebel Change-Id: Ic04d6c980738ae44ba4b8b6dcb8dfab3d82bd23d --- src/soc/ibm/power9/homer.c | 109 +++++++++++++++++++++++++++++++++++-- src/soc/ibm/power9/homer.h | 99 ++++++++++++++++++++++++++++++++- src/soc/ibm/power9/xip.h | 18 ++++++ 3 files changed, 219 insertions(+), 7 deletions(-) diff --git a/src/soc/ibm/power9/homer.c b/src/soc/ibm/power9/homer.c index 4e443d2ad91..0032bdce8d6 100644 --- a/src/soc/ibm/power9/homer.c +++ b/src/soc/ibm/power9/homer.c @@ -34,7 +34,7 @@ static size_t copy_section(void *dst, struct xip_section *section, void *base, die("XIP: Can't find container for DD=%x\n", dd); } -static void build_spge(struct homer_st *homer, struct xip_sgpe_header *sgpe, +static void build_sgpe(struct homer_st *homer, struct xip_sgpe_header *sgpe, uint8_t dd) { struct sgpe_img_header *hdr; @@ -84,6 +84,7 @@ static void build_spge(struct homer_st *homer, struct xip_sgpe_header *sgpe, sgpe.sram_image); homer->qpmr.sgpe.header.img_len = size; assert(size <= SGPE_SRAM_IMG_SIZE); + assert(size > (INT_VECTOR_SIZE + sizeof(struct sgpe_img_header))); /* Cache SCOM region */ homer->qpmr.sgpe.header.scom_offset = @@ -239,6 +240,7 @@ static void build_self_restore(struct homer_st *homer, * header's address. */ size = copy_section(&homer->cpmr.header, &rest->self, rest, dd); + assert(size > sizeof(struct cpmr_header)); /* Now, overwrite header. */ size = copy_section(&homer->cpmr.header, &rest->cpmr, rest, dd); @@ -346,6 +348,97 @@ static void build_self_restore(struct homer_st *homer, */ } +static void build_cme(struct homer_st *homer, struct xip_cme_header *cme, + uint8_t dd) +{ + size_t size; + struct cme_img_header *hdr; + + size = copy_section(&homer->cpmr.cme_sram_region, &cme->hcode, cme, dd); + assert(size <= CME_SRAM_IMG_SIZE); + assert(size > (INT_VECTOR_SIZE + sizeof(struct cme_img_header))); + + hdr = (struct cme_img_header *) + &homer->cpmr.cme_sram_region[INT_VECTOR_SIZE]; + + hdr->hcode_offset = 0; + hdr->hcode_len = size; + + hdr->pstate_region_offset = 0; + hdr->pstate_region_len = 0; + + hdr->cpmr_phy_addr = (uint64_t) homer | 2 * MiB; + /* With SMF disabled unsecure HOMER is the same as regular one */ + hdr->unsec_cpmr_phy_addr = hdr->cpmr_phy_addr; + + hdr->common_ring_offset = hdr->hcode_offset + hdr->hcode_len; + hdr->common_ring_len = 0; + + hdr->scom_offset = 0; + hdr->scom_len = CORE_SCOM_RESTORE_SIZE / MAX_CORES / 2; + + hdr->core_spec_ring_offset = 0; + hdr->max_spec_ring_len = 0; +} + +static void build_pgpe(struct homer_st *homer, struct xip_pgpe_header *pgpe, + uint8_t dd) +{ + size_t size; + struct pgpe_img_header *hdr; + + /* PPGE header */ + size = copy_section(&homer->ppmr.header, &pgpe->ppmr, pgpe, dd); + assert(size <= PPMR_HEADER_SIZE); + /* + * 0xFFF00000 (SRAM base) + 4k (IPC) + 60k (GPE0) + 64k (GPE1) = 0xFFF20000 + * + * WARNING: I have no idea if this is constant or depends on PPGE version. + */ + assert(homer->ppmr.header.sram_region_start == 0xFFF20000); + assert(homer->ppmr.header.sram_region_size == PGPE_SRAM_IMG_SIZE + + PGPE_AUX_TASK_SIZE + + PGPE_OCC_SHARED_SRAM_SIZE); + + /* PPGE L1 bootloader */ + size = copy_section(homer->ppmr.l1_bootloader, &pgpe->l1_bootloader, pgpe, + dd); + assert(size <= GPE_BOOTLOADER_SIZE); + homer->ppmr.header.l1_offset = offsetof(struct ppmr_st, l1_bootloader); + + /* PPGE L2 bootloader */ + size = copy_section(homer->ppmr.l2_bootloader, &pgpe->l2_bootloader, pgpe, + dd); + assert(size <= GPE_BOOTLOADER_SIZE); + homer->ppmr.header.l2_offset = offsetof(struct ppmr_st, l2_bootloader); + homer->ppmr.header.l2_len = size; + + /* PPGE HCODE */ + size = copy_section(homer->ppmr.pgpe_sram_img, &pgpe->hcode, pgpe, dd); + assert(size <= PGPE_SRAM_IMG_SIZE); + assert(size > (INT_VECTOR_SIZE + sizeof(struct pgpe_img_header))); + homer->ppmr.header.hcode_offset = offsetof(struct ppmr_st, pgpe_sram_img); + homer->ppmr.header.hcode_len = size; + + /* PPGE auxiliary task */ + size = copy_section(homer->ppmr.aux_task, &pgpe->aux_task, pgpe, dd); + assert(size <= PGPE_AUX_TASK_SIZE); + homer->ppmr.header.aux_task_offset = offsetof(struct ppmr_st, aux_task); + homer->ppmr.header.aux_task_len = size; + + /* 0x80000000 = HOMER in OCI PBA memory space */ + homer->ppmr.header.doptrace_offset = + 0x80000000 + offsetof(struct homer_st, ppmr.doptrace); + homer->ppmr.header.doptrace_len = PGPE_DOPTRACE_SIZE; + + /* Update SRAM image header */ + hdr = (struct pgpe_img_header *) + &homer->ppmr.pgpe_sram_img[INT_VECTOR_SIZE]; + + /* SPGE auxiliary functions */ + hdr->aux_controls = 1 << 24; +} + /* * This logic is for SMF disabled only! */ @@ -358,6 +451,10 @@ void build_homer_image(void *homer_bar) printk(BIOS_ERR, "DD%2.2x\n", dd); + /* HOMER must be aligned to 4M because CME HRMOR has bit for 2M set */ + if (!IS_ALIGNED((uint64_t) homer_bar, 4 * MiB)) + die("HOMER (%p) is not aligned to 4MB\n", homer_bar); + memset(homer_bar, 0, 4 * MiB); /* @@ -367,16 +464,20 @@ void build_homer_image(void *homer_bar) */ mount_part_from_pnor("HCODE", &mdev); /* First MB of HOMER is unused, we can write OCC image from PNOR there. */ - rdev_readat(&mdev.rdev, homer_bar, 0, 1 * MiB); + rdev_readat(&mdev.rdev, hw, 0, 1 * MiB); assert(hw->magic == XIP_MAGIC_HW); + assert(hw->image_size <= 1 * MiB); - build_spge(homer, (struct xip_sgpe_header *)(homer_bar + hw->sgpe.offset), + build_sgpe(homer, (struct xip_sgpe_header *)(homer_bar + hw->sgpe.offset), dd); build_self_restore(homer, (struct xip_restore_header *)(homer_bar + hw->restore.offset), dd); - //build_cme(...); + build_cme(homer, (struct xip_cme_header *)(homer_bar + hw->cme.offset), dd); + + build_pgpe(homer, (struct xip_pgpe_header *)(homer_bar + hw->pgpe.offset), + dd); } diff --git a/src/soc/ibm/power9/homer.h b/src/soc/ibm/power9/homer.h index 031f480adb8..6e80cd08a7a 100644 --- a/src/soc/ibm/power9/homer.h +++ b/src/soc/ibm/power9/homer.h @@ -166,6 +166,30 @@ struct smf_core_self_restore { uint32_t core_save_area[512 / sizeof(uint32_t)]; }; +/* This header is part of SRAM image, it starts after interrupt vectors. */ +struct cme_img_header { + uint64_t magic; + uint32_t hcode_offset; + uint32_t hcode_len; + uint32_t common_ring_offset; + uint32_t cmn_ring_ovrd_offset; + uint32_t common_ring_len; + uint32_t pstate_region_offset; + uint32_t pstate_region_len; + uint32_t core_spec_ring_offset; // = real offset / 32 + uint32_t max_spec_ring_len; // = real length / 32 + uint32_t scom_offset; // = real offset / 32 + uint32_t scom_len; + uint32_t mode_flags; + uint16_t location_id; + uint16_t qm_mode_flags; + uint32_t timebase_hz; + uint64_t cpmr_phy_addr; + uint64_t unsec_cpmr_phy_addr; + uint32_t pstate_offset; // = real offset / 32 + uint32_t custom_length; // = real length / 32 +}; + #define MAX_CORES 24 struct cpmr_st { @@ -184,15 +208,84 @@ check_member(cpmr_st, core_scom, 256 * KiB); /* =================== PPMR =================== */ +struct ppmr_header { + uint64_t magic; + uint32_t l1_offset; + uint32_t reserved; + uint32_t l2_offset; + uint32_t l2_len; + uint32_t build_date; + uint32_t build_ver; + uint64_t reserved_flags; + uint32_t hcode_offset; + uint32_t hcode_len; + uint32_t gppb_offset; + uint32_t gppb_len; + uint32_t lppb_offset; + uint32_t lppb_len; + uint32_t oppb_offset; + uint32_t oppb_len; + uint32_t pstables_offset; + uint32_t pstables_len; + uint32_t sram_img_size; + uint32_t boot_prog_code; + uint32_t wof_table_offset; + uint32_t wof_table_len; + uint32_t aux_task_offset; + uint32_t aux_task_len; + uint32_t doptrace_offset; + uint32_t doptrace_len; + uint32_t sram_region_start; + uint32_t sram_region_size; +} __attribute__((packed, aligned(512))); + +/* This header is part of SRAM image, it starts after interrupt vectors. */ +struct pgpe_img_header { + uint64_t magic; + uint32_t sys_reset_addr; + uint32_t shared_sram_addr; + uint32_t ivpr_addr; + uint32_t shared_sram_len; + uint32_t build_date; + uint32_t build_ver; + uint16_t flags; + uint16_t reserve1; + uint32_t timebase_hz; + uint32_t gppb_sram_addr; + uint32_t hcode_len; + uint32_t gppb_mem_offset; + uint32_t gppb_len; + uint32_t gen_pstables_mem_offset; + uint32_t gen_pstables_len; + uint32_t occ_pstables_sram_addr; + uint32_t occ_pstables_len; + uint32_t beacon_addr; + uint32_t quad_status_addr; + uint32_t wof_state_address; + uint32_t req_active_quad_address; + uint32_t wof_table_addr; + uint32_t wof_table_len; + uint32_t core_throttle_assert_cnt; + uint32_t core_throttle_deassert_cnt; + uint32_t aux_controls; + uint32_t optrace_pointer; + uint32_t doptrace_offset; + uint32_t doptrace_len; + uint32_t wof_values_address; +}; + struct ppmr_st { - uint8_t header[PPMR_HEADER_SIZE]; + struct ppmr_header header; + uint8_t pad0[PPMR_HEADER_SIZE - sizeof(struct ppmr_header)]; uint8_t l1_bootloader[GPE_BOOTLOADER_SIZE]; uint8_t l2_bootloader[GPE_BOOTLOADER_SIZE]; uint8_t pgpe_sram_img[PGPE_SRAM_IMG_SIZE]; uint8_t aux_task[PGPE_AUX_TASK_SIZE]; - uint8_t pad0[OCC_PARAM_BLOCK_REGION_OFFSET - (PPMR_HEADER_SIZE + + uint8_t pad1[PGPE_DOPTRACE_OFFSET - (PPMR_HEADER_SIZE + GPE_BOOTLOADER_SIZE + GPE_BOOTLOADER_SIZE + PGPE_SRAM_IMG_SIZE + PGPE_AUX_TASK_SIZE)]; + /* Deep Operational Trace */ + uint8_t doptrace[PGPE_DOPTRACE_SIZE]; /* * Two following fields in hostboot have different sizes, but are padded * to 16kB each anyway. There are two consecutive paddings between @@ -200,7 +293,7 @@ struct ppmr_st { */ uint8_t occ_parm_block[OCC_PARAM_BLOCK_REGION_SIZE]; uint8_t pstate_table[PSTATE_OUTPUT_TABLES_SIZE]; - uint8_t pad1[OCC_WOF_TABLES_OFFSET - (OCC_PARAM_BLOCK_REGION_OFFSET + + uint8_t pad2[OCC_WOF_TABLES_OFFSET - (OCC_PARAM_BLOCK_REGION_OFFSET + OCC_PARAM_BLOCK_REGION_SIZE + PSTATE_OUTPUT_TABLES_SIZE)]; uint8_t wof_tables[OCC_WOF_TABLES_SIZE]; }; diff --git a/src/soc/ibm/power9/xip.h b/src/soc/ibm/power9/xip.h index cde9f36c600..fb386c17daa 100644 --- a/src/soc/ibm/power9/xip.h +++ b/src/soc/ibm/power9/xip.h @@ -74,6 +74,24 @@ struct xip_restore_header { XIP_HEADER_COMMON_FIELDS_BOTTOM }; +struct xip_cme_header { + XIP_HEADER_COMMON_FIELDS_TOP + struct xip_section hcode; + struct xip_section unused[9]; /* Pad to 15 sections. */ + XIP_HEADER_COMMON_FIELDS_BOTTOM +}; + +struct xip_pgpe_header { + XIP_HEADER_COMMON_FIELDS_TOP + struct xip_section ppmr; + struct xip_section l1_bootloader; + struct xip_section l2_bootloader; + struct xip_section hcode; + struct xip_section aux_task; + struct xip_section unused[5]; /* Pad to 15 sections. */ + XIP_HEADER_COMMON_FIELDS_BOTTOM +}; + #define DD_CONTAINER_MAGIC 0x4444434F // "DDCO" struct dd_block { From 8131a2cdcf217f5d799f906c3d01a48f37a24c16 Mon Sep 17 00:00:00 2001 From: Krystian Hebel Date: Thu, 1 Jul 2021 18:00:59 +0200 Subject: [PATCH 053/213] soc/power9/homer.c: implement isteps 15.2-15.4 Boot halts because of unfinished 15.1. Signed-off-by: Krystian Hebel Change-Id: Ia83ff18e11533cb26b3dd0f703d89c7a48ce9a91 --- src/soc/ibm/power9/homer.c | 340 +++++++++++++++++++++++++++++++++++++ 1 file changed, 340 insertions(+) diff --git a/src/soc/ibm/power9/homer.c b/src/soc/ibm/power9/homer.c index 0032bdce8d6..e06d005d84b 100644 --- a/src/soc/ibm/power9/homer.c +++ b/src/soc/ibm/power9/homer.c @@ -6,6 +6,7 @@ #include #include #include // memset, memcpy +#include #include "chip.h" #include "homer.h" @@ -439,6 +440,218 @@ static void build_pgpe(struct homer_st *homer, struct xip_pgpe_header *pgpe, hdr->aux_controls = 1 << 24; } +static void pba_reset(void) +{ + long time; + /* Stopping Block Copy Download Engine + *0x00068010 // undocumented, PU_BCDE_CTL_SCOM + [all] 0 + [0] 1 + */ + write_scom(0x00068010, PPC_BIT(0)); + + /* Stopping Block Copy Upload Engine + *0x00068015 // undocumented, PU_BCUE_CTL_SCOM + [all] 0 + [0] 1 + */ + write_scom(0x00068015, PPC_BIT(0)); + + /* Polling on, to verify that BCDE & BCUE are indeed stopped + timeout(256*256us): + *0x00068012 // undocumented, PU_BCDE_STAT_SCOM + [0] PBA_BC_STAT_RUNNING? + *0x00068017 // undocumented, PU_BCUE_STAT_SCOM + [0] PBA_BC_STAT_RUNNING? + if both bits are clear: break + */ + time = wait_us(256*256, + (((read_scom(0x00068012) & PPC_BIT(0)) == 0) && + ((read_scom(0x00068017) & PPC_BIT(0)) == 0))); + + if (!time) + die("Timed out waiting for stopping of BCDE/BCUE\n"); + + /* Clear the BCDE and BCUE stop bits */ + write_scom(0x00068010, 0); + write_scom(0x00068015, 0); + + /* Reset each slave and wait for completion + timeout(16*1us): + // This write is inside the timeout loop. I don't know if this will cause slaves to reset + // on each iteration or not, but this is how it is done in hostboot. + *0x00068001 // undocumented, PU_PBASLVRST_SCOM + [all] 0 + [0] 1 // reset? + [1-2] sl + if *0x00068001[4 + sl] == 0: break // 4 + sl: reset in progress? + if *0x00068001[8 + sl]: die() // 8 + sl: busy? + */ + for (int sl = 0; sl < 3; sl++) { // Fourth is owned by SBE, do not reset + time = wait_us(16, + (write_scom(0x00068001, PPC_BIT(0) | PPC_SHIFT(sl, 2)), + (read_scom(0x00068001) & PPC_BIT(4 + sl)) == 0)); + + if (!time || read_scom(0x00068001) & PPC_BIT(8 + sl)) + die("Timed out waiting for slave %d reset\n", sl); + } + + /* Reset PBA regs + *0x00068013 // undocumented, PU_BCDE_PBADR_SCOM + *0x00068014 // undocumented, PU_BCDE_OCIBAR_SCOM + *0x00068015 // undocumented, PU_BCUE_CTL_SCOM + *0x00068016 // undocumented, PU_BCUE_SET_SCOM + *0x00068018 // undocumented, PU_BCUE_PBADR_SCOM + *0x00068019 // undocumented, PU_BCUE_OCIBAR_SCOM + *0x00068026 // undocumented, PU_PBAXSHBR0_SCOM + *0x0006802A // undocumented, PU_PBAXSHBR1_SCOM + *0x00068027 // undocumented, PU_PBAXSHCS0_SCOM + *0x0006802B // undocumented, PU_PBAXSHCS1_SCOM + *0x00068004 // undocumented, PU_PBASLVCTL0_SCOM + *0x00068005 // undocumented, PU_PBASLVCTL1_SCOM + *0x00068006 // undocumented, PU_PBASLVCTL2_SCOM + BRIDGE.PBA.PBAFIR // 0x05012840 + BRIDGE.PBA.PBAERRRPT0 // 0x0501284C + [all] 0 + */ + write_scom(0x00068013, 0); + write_scom(0x00068014, 0); + write_scom(0x00068015, 0); + write_scom(0x00068016, 0); + write_scom(0x00068018, 0); + write_scom(0x00068019, 0); + write_scom(0x00068026, 0); + write_scom(0x0006802A, 0); + write_scom(0x00068027, 0); + write_scom(0x0006802B, 0); + write_scom(0x00068004, 0); + write_scom(0x00068005, 0); + write_scom(0x00068006, 0); + write_scom(0x05012840, 0); + write_scom(0x0501284C, 0); + + /* Perform non-zero reset operations + BRIDGE.PBA.PBACFG // 0x0501284B + [all] 0 + [38] PBACFG_CHSW_DIS_GROUP_SCOPE = 1 + */ + write_scom(0x0501284B, PPC_BIT(38)); + + /* + *0x00068021 // undocumented, PU_PBAXCFG_SCOM + [all] 0 + [2] 1 // PBAXCFG_SND_RESET? + [3] 1 // PBAXCFG_RCV_RESET? + */ + write_scom(0x00068021, PPC_BIT(2) | PPC_BIT(3)); + + /* + * The following registers are undocumented. Their fields can be decoded + * from hostboot, but the values are always the same, so why bother... + */ + /* Set the PBA_MODECTL register */ + write_scom(0x00068000, 0x00A0BA9000000000); + + /* Slave 0 (SGPE and OCC boot) */ + write_scom(0x00068004, 0xB7005E0000000000); + + /* Slave 1 (405 ICU/DCU) */ + write_scom(0x00068005, 0xD5005E4000000000); + + /* Slave 2 (PGPE Boot) */ + write_scom(0x00068006, 0xA7005E4000000000); +} + +static void stop_gpe_init(struct homer_st *homer) +{ + /* First check if SGPE_ACTIVE is not set in OCCFLAG register + if (TP.TPCHIP.OCC.OCI.OCB.OCB_OCI_OCCFLG[8] == 1): // 0x0006C08A + TP.TPCHIP.OCC.OCI.OCB.OCB_OCI_OCCFLG (CLEAR) // 0x0006C08B + [all] 0 + [8] 1 // SGPE_ACTIVE, bits in this register are defined by OCC firmware + */ + if (read_scom(0x0006C08A) & PPC_BIT(8)) { + printk(BIOS_WARNING, "SGPE_ACTIVE is set in OCCFLAG register, clearing it\n"); + write_scom(0x0006C08B, PPC_BIT(8)); + } + + /* + * Program SGPE IVPR + * ATTR_STOPGPE_BOOT_COPIER_IVPR_OFFSET is set in updateGpeAttributes() in 15.1 + TP.TPCHIP.OCC.OCI.GPE3.GPEIVPR // 0x00066001 + [all] 0 + [0-31] GPEIVPR_IVPR = ATTR_STOPGPE_BOOT_COPIER_IVPR_OFFSET + // Only bits [0-22] are actually defined, meaning IVPR must be aligned to 512B + */ + uint32_t ivpr = 0x80000000 + homer->qpmr.sgpe.header.l1_offset + + offsetof(struct homer_st, qpmr); + write_scom(0x00066001, PPC_SHIFT(ivpr, 31)); + + /* Program XCR to ACTIVATE SGPE + TP.TPCHIP.OCC.OCI.GPE3.GPENXIXCR // 0x00066010 + [all] 0 + [1-3] PPE_XIXCR_XCR = 6 // hard reset + TP.TPCHIP.OCC.OCI.GPE3.GPENXIXCR // 0x00066010 + [all] 0 + [1-3] PPE_XIXCR_XCR = 4 // toggle XSR[TRH] + TP.TPCHIP.OCC.OCI.GPE3.GPENXIXCR // 0x00066010 + [all] 0 + [1-3] PPE_XIXCR_XCR = 2 // resume + */ + write_scom(0x00066010, PPC_SHIFT(6, 3)); + write_scom(0x00066010, PPC_SHIFT(4, 3)); + write_scom(0x00066010, PPC_SHIFT(2, 3)); + + + /* + * Now wait for SGPE to not be halted and for the HCode to indicate to be + * active. + * Warning: consts names in hostboot say timeouts are in ms, but code treats + * it as us. With debug output it takes much more than 20us between reads + * (~150us) and passes on 5th pass, which gives ~600us, +/- 150us on 4-core + * CPU (4 active CMEs). + timeout(125*20us): + if ((TP.TPCHIP.OCC.OCI.OCB.OCB_OCI_OCCFLG[8] == 1) && // 0x0006C08A + (TP.TPCHIP.OCC.OCI.GPE3.GPEXIXSR[0] == 0)): break // 0x00066021 + */ + long time = wait_us(125*20, ((read_scom(0x0006C08A) & PPC_BIT(8)) && + !(read_scom(0x00066021) & PPC_BIT(0)))); + + if (!time) + die("Timeout while waiting for SGPE activation\n"); +} + +static uint64_t get_available_cores(void) +{ + uint64_t ret = 0; + for (int i = 0; i < MAX_CORES; i++) { + uint64_t val = read_scom_for_chiplet(EC00_CHIPLET_ID + i, 0xF0040); + if (val & PPC_BIT(0)) { + printk(BIOS_SPEW, "Core %d is functional%s\n", i, + (val & PPC_BIT(1)) ? "" : " and running"); + ret |= PPC_BIT(i); + + /* Might as well set multicast groups for cores */ + if ((read_scom_for_chiplet(EC00_CHIPLET_ID + i, 0xF0001) & PPC_BITMASK(3,5)) + == PPC_BITMASK(3,5)) + scom_and_or_for_chiplet(EC00_CHIPLET_ID + i, 0xF0001, + ~(PPC_BITMASK(3,5) | PPC_BITMASK(16,23)), + PPC_BITMASK(19,21)); + + if ((read_scom_for_chiplet(EC00_CHIPLET_ID + i, 0xF0002) & PPC_BITMASK(3,5)) + == PPC_BITMASK(3,5)) + scom_and_or_for_chiplet(EC00_CHIPLET_ID + i, 0xF0002, + ~(PPC_BITMASK(3,5) | PPC_BITMASK(16,23)), + PPC_BIT(5) | PPC_BITMASK(19,21)); + } + } + return ret; +} + +#define IS_EC_FUNCTIONAL(ec, cores) (!!((cores) & PPC_BIT(ec))) +#define IS_EX_FUNCTIONAL(ex, cores) (!!((cores) & PPC_BITMASK(2*(ex), 2*(ex) + 1))) +#define IS_EQ_FUNCTIONAL(eq, cores) (!!((cores) & PPC_BITMASK(4*(eq), 4*(eq) + 3))) + /* * This logic is for SMF disabled only! */ @@ -448,6 +661,7 @@ void build_homer_image(void *homer_bar) struct homer_st *homer = homer_bar; struct xip_hw_header *hw = homer_bar; uint8_t dd = get_dd(); + uint64_t cores = get_available_cores(); printk(BIOS_ERR, "DD%2.2x\n", dd); @@ -480,4 +694,130 @@ void build_homer_image(void *homer_bar) build_pgpe(homer, (struct xip_pgpe_header *)(homer_bar + hw->pgpe.offset), dd); + + /* 15.2 set HOMER BAR */ + report_istep(15, 2); + write_scom(0x05012B00, (uint64_t)homer); + write_scom(0x05012B04, (4 * MiB - 1) & ~((uint64_t)MiB - 1)); + + /* 15.3 establish EX chiplet */ + report_istep(15, 3); + /* Multicast groups for cores were assigned in get_available_cores() */ + for (int i = 0; i < MAX_CORES/4; i++) { + if (IS_EQ_FUNCTIONAL(i, cores) && + (read_scom_for_chiplet(EP00_CHIPLET_ID + i, 0xF0001) & PPC_BITMASK(3,5)) + == PPC_BITMASK(3,5)) + scom_and_or_for_chiplet(EP00_CHIPLET_ID + i, 0xF0001, + ~(PPC_BITMASK(3,5) | PPC_BITMASK(16,23)), + PPC_BITMASK(19,21)); + } + + /* Writing OCC CCSR */ + write_scom(0x0006C090, cores); + + /* Writing OCC QCSR */ + uint64_t qcsr = 0; + for (int i = 0; i < MAX_CORES/2; i++) { + if (IS_EX_FUNCTIONAL(i, cores)) + qcsr |= PPC_BIT(i); + } + write_scom(0x0006C094, qcsr); + + /* 15.4 start STOP engine */ + report_istep(15, 4); + + /* Initialize the PFET controllers */ + for (int i = 0; i < MAX_CORES; i++) { + if (IS_EC_FUNCTIONAL(i, cores)) { + // Periodic core quiesce workaround + /* + TP.TPCHIP.NET.PCBSLEC14.PPMC.PPM_CORE_REGS.CPPM_CPMMR (WOR) // 0x200F0108 + [all] 0 + [2] CPPM_CPMMR_RESERVED_2 = 1 + */ + write_scom_for_chiplet(EC00_CHIPLET_ID + i, 0x200F0108, PPC_BIT(2)); + + /* + TP.TPCHIP.NET.PCBSLEC14.PPMC.PPM_COMMON_REGS.PPM_PFDLY // 0x200F011B + [all] 0 + [0-3] PPM_PFDLY_POWDN_DLY = 0x9 // 250ns, converted and encoded + [4-7] PPM_PFDLY_POWUP_DLY = 0x9 + */ + write_scom_for_chiplet(EC00_CHIPLET_ID + i, 0x200F011B, + PPC_SHIFT(0x9, 3) | PPC_SHIFT(0x9, 7)); + /* + TP.TPCHIP.NET.PCBSLEC14.PPMC.PPM_COMMON_REGS.PPM_PFOF // 0x200F011D + [all] 0 + [0-3] PPM_PFOFF_VDD_VOFF_SEL = 0x8 + [4-7] PPM_PFOFF_VCS_VOFF_SEL = 0x8 + */ + write_scom_for_chiplet(EC00_CHIPLET_ID + i, 0x200F011D, + PPC_SHIFT(0x8, 3) | PPC_SHIFT(0x8, 7)); + } + + if ((i % 4) == 0 && IS_EQ_FUNCTIONAL(i/4, cores)) { + /* + TP.TPCHIP.NET.PCBSLEP03.PPMQ.PPM_COMMON_REGS.PPM_PFDLY // 0x100F011B + [all] 0 + [0-3] PPM_PFDLY_POWDN_DLY = 0x9 // 250ns, converted and encoded + [4-7] PPM_PFDLY_POWUP_DLY = 0x9 + */ + write_scom_for_chiplet(EP00_CHIPLET_ID + i/4, 0x100F011B, + PPC_SHIFT(0x9, 3) | PPC_SHIFT(0x9, 7)); + /* + TP.TPCHIP.NET.PCBSLEP03.PPMQ.PPM_COMMON_REGS.PPM_PFOF // 0x100F011D + [all] 0 + [0-3] PPM_PFOFF_VDD_VOFF_SEL = 0x8 + [4-7] PPM_PFOFF_VCS_VOFF_SEL = 0x8 + */ + write_scom_for_chiplet(EP00_CHIPLET_ID + i/4, 0x100F011D, + PPC_SHIFT(0x8, 3) | PPC_SHIFT(0x8, 7)); + } + } + + /* Condition the PBA back to the base boot configuration */ + pba_reset(); + + /* + * TODO: this is tested only if (ATTR_VDM_ENABLED || ATTR_IVRM_ENABLED), + * both are set (or not) in 15.1 - p9_pstate_parameter_block(). For now + * assume they are enabled. + */ + /* TP.TPCHIP.TPC.ITR.FMU.KVREF_AND_VMEAS_MODE_STATUS_REG // 0x01020007 + if ([16] == 0): die() + */ + if (!(read_scom(0x01020007) & PPC_BIT(16))) + die("VDMs/IVRM are enabled but necessary VREF calibration failed\n"); + + /* First mask bit 7 in OIMR and then clear bit 7 in OISR + TP.TPCHIP.OCC.OCI.OCB.OCB_OCI_OIMR0 (OR) // 0x0006C006 + [all] 0 + [7] OCB_OCI_OISR0_GPE2_ERROR = 1 + TP.TPCHIP.OCC.OCI.OCB.OCB_OCI_OISR0 (CLEAR) // 0x0006C001 + [all] 0 + [7] OCB_OCI_OISR0_GPE2_ERROR = 1 + */ + write_scom(0x0006C006, PPC_BIT(7)); + write_scom(0x0006C001, PPC_BIT(7)); + + /* + * Setup the SGPE Timer Selects + * These hardcoded values are assumed by the SGPE Hcode for setting up + * the FIT and Watchdog values. + TP.TPCHIP.OCC.OCI.GPE3.GPETSEL // 0x00066000 + [all] 0 + [0-3] GPETSEL_FIT_SEL = 0x1 // FIT - fixed interval timer + [4-7] GPETSEL_WATCHDOG_SEL = 0xA + */ + write_scom(0x00066000, PPC_SHIFT(0x1, 3) | PPC_SHIFT(0xA, 7)); + + /* Clear error injection bits + *0x0006C18B // undocumented, PU_OCB_OCI_OCCFLG2_CLEAR + [all] 0 + [30] 1 // OCCFLG2_SGPE_HCODE_STOP_REQ_ERR_INJ + */ + write_scom(0x0006C18B, PPC_BIT(30)); + + // Boot the STOP GPE + stop_gpe_init(homer); } From 6796783d4f73dbad5c25f9c6735def11778b5faa Mon Sep 17 00:00:00 2001 From: Krystian Hebel Date: Tue, 6 Jul 2021 21:04:42 +0200 Subject: [PATCH 054/213] soc/power9/homer.c: add most of step 16.1 Signed-off-by: Krystian Hebel Change-Id: Id9aba154326579423b9699488aec17d260ae4d12 --- src/include/cpu/power/spr.h | 9 ++ src/soc/ibm/power9/chip.c | 2 +- src/soc/ibm/power9/homer.c | 263 +++++++++++++++++++++++++++++++- src/soc/ibm/power9/istep_13_3.c | 4 + 4 files changed, 272 insertions(+), 6 deletions(-) diff --git a/src/include/cpu/power/spr.h b/src/include/cpu/power/spr.h index 20b07471e8d..ffc6c2cfdac 100644 --- a/src/include/cpu/power/spr.h +++ b/src/include/cpu/power/spr.h @@ -9,6 +9,9 @@ #define SPR_DEC_IMPLEMENTED_BITS 56 #define SPR_DEC_LONGEST_TIME ((1ull << (SPR_DEC_IMPLEMENTED_BITS - 1)) - 1) +#define SPR_SRR0 0x2A +#define SPR_SRR1 0x2B + #define SPR_DAWR 0xB4 #define SPR_CIABR 0xBB #define SPR_DAWRX 0xBC @@ -25,8 +28,13 @@ #define SPR_HRMOR 0x139 #define SPR_LPCR 0x13E +#define SPR_LPCR_HVEE PPC_BIT(17) #define SPR_LPCR_LD PPC_BIT(46) +#define SPR_LPCR_EEE PPC_BIT(49) +#define SPR_LPCR_DEE PPC_BIT(50) +#define SPR_LPCR_OEE PPC_BIT(51) #define SPR_LPCR_HEIC PPC_BIT(59) +#define SPR_LPCR_HVICE PPC_BIT(62) #define SPR_LPCR_HDICE PPC_BIT(63) #define SPR_HMER 0x150 @@ -51,6 +59,7 @@ #define SPR_PTCR 0x1D0 #define SPR_PSSCR 0x357 #define SPR_PMCR 0x374 +#define SPR_PIR 0x3FF #ifndef __ASSEMBLER__ #include diff --git a/src/soc/ibm/power9/chip.c b/src/soc/ibm/power9/chip.c index a3546a08533..d7e7b152f0f 100644 --- a/src/soc/ibm/power9/chip.c +++ b/src/soc/ibm/power9/chip.c @@ -57,7 +57,7 @@ static void enable_soc_dev(struct device *dev) * * TODO: 8M + (4M per CPU), hostboot reserves always 8M + 8 * 4M. */ - reserved_size = 8*1024 + 4*1024 /* * num_of_cpus */; + reserved_size = 8*1024 + 4*1024 *8 /* * num_of_cpus */; top -= reserved_size; reserved_ram_resource_kb(dev, idx++, top, reserved_size); build_homer_image((void *)(top * 1024)); diff --git a/src/soc/ibm/power9/homer.c b/src/soc/ibm/power9/homer.c index e06d005d84b..27bca297a2d 100644 --- a/src/soc/ibm/power9/homer.c +++ b/src/soc/ibm/power9/homer.c @@ -5,6 +5,7 @@ #include #include #include +#include #include // memset, memcpy #include @@ -602,7 +603,6 @@ static void stop_gpe_init(struct homer_st *homer) write_scom(0x00066010, PPC_SHIFT(4, 3)); write_scom(0x00066010, PPC_SHIFT(2, 3)); - /* * Now wait for SGPE to not be halted and for the HCode to indicate to be * active. @@ -621,7 +621,7 @@ static void stop_gpe_init(struct homer_st *homer) die("Timeout while waiting for SGPE activation\n"); } -static uint64_t get_available_cores(void) +static uint64_t get_available_cores(int *me) { uint64_t ret = 0; for (int i = 0; i < MAX_CORES; i++) { @@ -630,6 +630,8 @@ static uint64_t get_available_cores(void) printk(BIOS_SPEW, "Core %d is functional%s\n", i, (val & PPC_BIT(1)) ? "" : " and running"); ret |= PPC_BIT(i); + if ((val & PPC_BIT(1)) == 0 && me != NULL) + *me = i; /* Might as well set multicast groups for cores */ if ((read_scom_for_chiplet(EC00_CHIPLET_ID + i, 0xF0001) & PPC_BITMASK(3,5)) @@ -652,6 +654,196 @@ static uint64_t get_available_cores(void) #define IS_EX_FUNCTIONAL(ex, cores) (!!((cores) & PPC_BITMASK(2*(ex), 2*(ex) + 1))) #define IS_EQ_FUNCTIONAL(eq, cores) (!!((cores) & PPC_BITMASK(4*(eq), 4*(eq) + 3))) +/* TODO: similar is used in 13.3. Add missing parameters and make it public? */ +static void psu_command(uint8_t flags, long time) +{ + /* TP.TPCHIP.PIB.PSU.PSU_SBE_DOORBELL_REG */ + if (read_scom(0x000D0060) & PPC_BIT(0)) + die("MBOX to SBE busy, this should not happen\n"); + + if (read_scom(0x000D0063) & PPC_BIT(0)) { + printk(BIOS_ERR, "SBE to Host doorbell already active, clearing it\n"); + write_scom(0x000D0064, ~PPC_BIT(0)); + } + + /* https://github.com/open-power/hostboot/blob/master/src/include/usr/sbeio/sbe_psudd.H#L418 */ + /* TP.TPCHIP.PIB.PSU.PSU_HOST_SBE_MBOX0_REG */ + /* REQUIRE_RESPONSE, CLASS_CORE_STATE, CMD_CONTROL_DEADMAN_LOOP, flags */ + write_scom(0x000D0050, 0x000001000000D101 | PPC_SHIFT(flags, 31)); + + /* TP.TPCHIP.PIB.PSU.PSU_HOST_SBE_MBOX0_REG */ + write_scom(0x000D0051, time); + + /* Ring the host->SBE doorbell */ + /* TP.TPCHIP.PIB.PSU.PSU_SBE_DOORBELL_REG_OR */ + write_scom(0x000D0062, PPC_BIT(0)); + + /* Wait for response */ + /* TP.TPCHIP.PIB.PSU.PSU_HOST_DOORBELL_REG */ + time = wait_ms(time, read_scom(0x000D0063) & PPC_BIT(0)); + + if (!time) + die("Timed out while waiting for SBE response\n"); + + /* Clear SBE->host doorbell */ + /* TP.TPCHIP.PIB.PSU.PSU_HOST_DOORBELL_REG_AND */ + write_scom(0x000D0064, ~PPC_BIT(0)); +} + +#define DEADMAN_LOOP_START 0x0001 +#define DEADMAN_LOOP_STOP 0x0002 + +static void block_wakeup_int(int core, int state) +{ + // TP.TPCHIP.NET.PCBSLEC14.PPMC.PPM_COMMON_REGS.PPM_GPMMR // 0x200F0100 + /* Depending on requested state we write to SCOM1 (CLEAR) or SCOM2 (OR). */ + uint64_t scom = state ? 0x200F0102 : 0x200F0101; + + write_scom_for_chiplet(EC00_CHIPLET_ID + core, 0x200F0108, PPC_BIT(1)); + /* Register is documented, but its bits are reserved... */ + write_scom_for_chiplet(EC00_CHIPLET_ID + core, scom, PPC_BIT(6)); + + write_scom_for_chiplet(EC00_CHIPLET_ID + core, 0x200F0107, PPC_BIT(1)); +} + +/* + * Some time will be lost between entering and exiting STOP 15, but we don't + * have a way of calculating it. In theory we could read tick count from one of + * the auxiliary chips (SBE, SGPE), but accessing those and converting to the + * frequency of TB may take longer than sleep took. + */ +struct save_state { + uint64_t r1; /* stack */ + uint64_t r2; /* TOC */ + uint64_t msr; + uint64_t nia; + uint64_t tb; + uint64_t lr; +} sstate; + +static void cpu_winkle(void) +{ + uint64_t lpcr = read_spr(SPR_LPCR); + /* + * Clear {External, Decrementer, Other} Exit Enable and Hypervisor + * Decrementer Interrupt Conditionally Enable + */ + lpcr &= ~(SPR_LPCR_EEE | SPR_LPCR_DEE | SPR_LPCR_OEE | SPR_LPCR_HDICE); + /* + * Set Hypervisor Virtualization Interrupt Conditionally Enable + * and Hypervisor Virtualization Exit Enable + */ + lpcr |= SPR_LPCR_HVICE | SPR_LPCR_HVEE; + write_spr(SPR_LPCR, lpcr); + write_spr(SPR_PSSCR, 0x00000000003F00FF); + sstate.msr = read_msr(); + + /* + * Cannot clobber: + * - r1 (stack) - reloaded from sstate + * - r2 (TOC aka PIC register) - reloaded from sstate + * - r3 (address of sstate) - storage duration limited to block below + */ + { + register void *r3 asm ("r3") = &sstate; + asm volatile("std 1, 0(%0)\n" + "std 2, 8(%0)\n" + "mflr 1\n" + "std 1, 40(%0)\n" + "lnia 1\n" + "__tmp_nia:" + "addi 1, 1, wakey - __tmp_nia\n" + "std 1, 24(%0)\n" + "mftb 1\n" + "std 1, 32(%0)\n" /* TB - save as late as possible */ + "sync\n" + "stop\n" + "wakey:\n" + : "+r"(r3) :: + "r0", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", + "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19", + "r20", "r21", "r22", "r23", "r24", "r25", "r26", "r27", + "r28", "r29", "r30", "r31", "memory", "cc"); + } + + /* + * Hostboot restored two additional registers at this point: LPCR and PSSCR. + * + * LPCR was restored from core self-restore region, coreboot won't need to. + * + * PSSCR won't be used before next 'stop' instruction, which won't happen + * before new settings are written by the payload. + */ + + /* + * Timing facilities were lost, this includes DEC register. Because during + * self-restore Large Decrementer was disabled for few instructions, value + * of DEC is trimmed to 32 bits. Restore it to something bigger, otherwise + * interrupt would arrive in ~4 seconds. + */ + write_spr(SPR_DEC, SPR_DEC_LONGEST_TIME); +} + +static void istep_16_1(int this_core) +{ + report_istep(16, 1); + /* + * Wait time 10.5 sec, anything larger than 10737 ms can cause overflow on + * SBE side of the timeout calculations. + */ + long time = 10500; + + /* + * Debugging aid - 0xE40 is Hypervisor Emulation Assistance vector. It is + * taken when processor tries to execute unimplemented instruction. All 0s + * is (and will always be) such an instruction, meaning we will get here + * when processor jumps into uninitialized memory. If this instruction were + * also uninitialized, processor would hit another exception and again jump + * here. This time, however, it would overwrite original HSRR0 value with + * 0xE40. Instruction below is 'b .'. This way HSRR0 will retain its value + * - address of instruction which generated this exception. It can be then + * read with pdbg. + */ + *(volatile uint32_t *)0xE40 = 0x48000000; + + /* TODO: configure_xive(this_core); */ + + printk(BIOS_ERR, "XIVE configured, enabling External Interrupt\n"); + write_msr(read_msr() | PPC_BIT(48)); + + /* + * This will request SBE to wake us up after we enter STOP 15. Hopefully + * we will come back to the place where we were before. + */ + printk(BIOS_ERR, "Entering dead man loop\n"); + psu_command(DEADMAN_LOOP_START, time); + + block_wakeup_int(this_core, 1); + + cpu_winkle(); + + /* + * SBE sets this doorbell bit when it finishes its part of STOP 15 wakeup. + * No need to handle the timeout, if it happens, SBE will checkstop the + * system anyway. + */ + wait_us(time, read_scom(0x000D0063) & PPC_BIT(2)); + + write_scom(0x000D0064, ~PPC_BIT(2)); + + /* + * This tells SBE that we were properly awoken. Hostboot uses default + * timeout of 90 seconds, but if SBE doesn't answer in 10 there is no reason + * to believe it will answer at all. + */ + psu_command(DEADMAN_LOOP_STOP, time); + + // core_checkstop_helper_hwp(..., true) + // p9_core_checkstop_handler(___, true) + // core_checkstop_helper_homer() + // p9_stop_save_scom() and others +} + /* * This logic is for SMF disabled only! */ @@ -661,9 +853,13 @@ void build_homer_image(void *homer_bar) struct homer_st *homer = homer_bar; struct xip_hw_header *hw = homer_bar; uint8_t dd = get_dd(); - uint64_t cores = get_available_cores(); + int this_core = -1; + uint64_t cores = get_available_cores(&this_core); - printk(BIOS_ERR, "DD%2.2x\n", dd); + if (this_core == -1) + die("Couldn't found active core\n"); + + printk(BIOS_ERR, "DD%2.2x, boot core: %d\n", dd, this_core); /* HOMER must be aligned to 4M because CME HRMOR has bit for 2M set */ if (!IS_ALIGNED((uint64_t) homer_bar, 4 * MiB)) @@ -695,10 +891,65 @@ void build_homer_image(void *homer_bar) build_pgpe(homer, (struct xip_pgpe_header *)(homer_bar + hw->pgpe.offset), dd); + // TBD + // getPpeScanRings() for CME + // layoutRingsForCME() + // getPpeScanRings for SGPE + // layoutRingsForSGPE() + + // buildParameterBlock(); + // updateCpmrCmeRegion(); + + // Update QPMR Header area in HOMER + // updateQpmrHeader(); + + // Update PPMR Header area in HOMER + // updatePpmrHeader(); + + // Update L2 Epsilon SCOM Registers + // populateEpsilonL2ScomReg( pChipHomer ); + + // Update L3 Epsilon SCOM Registers + // populateEpsilonL3ScomReg( pChipHomer ); + + // Update L3 Refresh Timer Control SCOM Registers + // populateL3RefreshScomReg( pChipHomer, i_procTgt); + + // Populate HOMER with SCOM restore value of NCU RNG BAR SCOM Register + // populateNcuRngBarScomReg( pChipHomer, i_procTgt ); + + // Update CME/SGPE Flags in respective image header. + // updateImageFlags( pChipHomer, i_procTgt ); + + // Set the Fabric IDs + // setFabricIds( pChipHomer, i_procTgt ); + // - doesn't modify anything? + + // Customize magic word based on endianness + // customizeMagicWord( pChipHomer ); + + /* Set up wakeup mode */ + for (int i = 0; i < MAX_CORES; i++) { + if (!IS_EC_FUNCTIONAL(i, cores)) + continue; + + /* + TP.TPCHIP.NET.PCBSLEC14.PPMC.PPM_CORE_REGS.CPPM_CPMMR // 0x200F0106 + // These bits, when set, make core wake up in HV (not UV) + [3] CPPM_CPMMR_RESERVED_2_9 = 1 + [4] CPPM_CPMMR_RESERVED_2_9 = 1 + */ + /* SCOM2 - OR, 0x200F0108 */ + write_scom_for_chiplet(EC00_CHIPLET_ID + i, 0x200F0108, + PPC_BIT(3) | PPC_BIT(4)); + } + /* 15.2 set HOMER BAR */ report_istep(15, 2); write_scom(0x05012B00, (uint64_t)homer); write_scom(0x05012B04, (4 * MiB - 1) & ~((uint64_t)MiB - 1)); + write_scom(0x05012B02, (uint64_t)homer + 8 * 4 * MiB); // FIXME + write_scom(0x05012B06, (8 * MiB - 1) & ~((uint64_t)MiB - 1)); /* 15.3 establish EX chiplet */ report_istep(15, 3); @@ -752,7 +1003,7 @@ void build_homer_image(void *homer_bar) [4-7] PPM_PFOFF_VCS_VOFF_SEL = 0x8 */ write_scom_for_chiplet(EC00_CHIPLET_ID + i, 0x200F011D, - PPC_SHIFT(0x8, 3) | PPC_SHIFT(0x8, 7)); + PPC_SHIFT(0x8, 3) | PPC_SHIFT(0x8, 7)); } if ((i % 4) == 0 && IS_EQ_FUNCTIONAL(i/4, cores)) { @@ -820,4 +1071,6 @@ void build_homer_image(void *homer_bar) // Boot the STOP GPE stop_gpe_init(homer); + + istep_16_1(this_core); } diff --git a/src/soc/ibm/power9/istep_13_3.c b/src/soc/ibm/power9/istep_13_3.c index cae3035b7d8..f8ca15a0fb8 100644 --- a/src/soc/ibm/power9/istep_13_3.c +++ b/src/soc/ibm/power9/istep_13_3.c @@ -60,6 +60,10 @@ void istep_13_3(void) * out of order command/response pair. Just fill a buffer, send it and make * sure the receiver (SBE) gets it. If you still want to know the details, * start digging here: https://github.com/open-power/hostboot/blob/master/src/usr/scan/scandd.C#L169 + * + * TODO: this is the only place where `putRing()` is called, but it isn't + * the only place where PSU commands are used (see 16.1-16.2). Consider + * making a function from this. */ // TP.TPCHIP.PIB.PSU.PSU_SBE_DOORBELL_REG if (read_scom(PSU_SBE_DOORBELL_REG) & PPC_BIT(0)) From 9aa06e04263174c1f31d13c8e2f404cac3105551 Mon Sep 17 00:00:00 2001 From: Krystian Hebel Date: Thu, 22 Jul 2021 19:46:20 +0200 Subject: [PATCH 055/213] soc/power9/homer.c: add operation type to copy_section() Signed-off-by: Krystian Hebel Change-Id: I0de449687c35e331f7fdf54e48381c7d1ab6d330 --- src/soc/ibm/power9/homer.c | 48 +++++++++++++++++++++++++------------- 1 file changed, 32 insertions(+), 16 deletions(-) diff --git a/src/soc/ibm/power9/homer.c b/src/soc/ibm/power9/homer.c index 27bca297a2d..bc1447162d8 100644 --- a/src/soc/ibm/power9/homer.c +++ b/src/soc/ibm/power9/homer.c @@ -13,11 +13,20 @@ #include "homer.h" #include "xip.h" +enum operation_type { + COPY, + FIND +}; + static size_t copy_section(void *dst, struct xip_section *section, void *base, - uint8_t dd) + uint8_t dd, enum operation_type op) { if (!section->dd_support) { - memcpy(dst, base + section->offset, section->size); + if (op == COPY) { + memcpy(dst, base + section->offset, section->size); + } else { + *(void **)dst = base + section->offset; + } return section->size; } @@ -27,8 +36,12 @@ static size_t copy_section(void *dst, struct xip_section *section, void *base, assert(cont->magic == DD_CONTAINER_MAGIC); for (i = 0; i < cont->num; i++) { if (cont->blocks[i].dd == dd) { - memcpy(dst, (void *)cont + cont->blocks[i].offset, - cont->blocks[i].size); + if (op == COPY) { + memcpy(dst, (void *)cont + cont->blocks[i].offset, + cont->blocks[i].size); + } else { + *(void **)dst = (void *)cont + cont->blocks[i].offset; + } return cont->blocks[i].size; } } @@ -45,7 +58,7 @@ static void build_sgpe(struct homer_st *homer, struct xip_sgpe_header *sgpe, assert(sgpe->magic == XIP_MAGIC_SGPE); /* SGPE header */ - size = copy_section(&homer->qpmr.sgpe.header, &sgpe->qpmr, sgpe, dd); + size = copy_section(&homer->qpmr.sgpe.header, &sgpe->qpmr, sgpe, dd, COPY); assert(size <= sizeof(struct qpmr_header)); /* @@ -67,21 +80,22 @@ static void build_sgpe(struct homer_st *homer, struct xip_sgpe_header *sgpe, /* SGPE L1 bootloader */ size = copy_section(&homer->qpmr.sgpe.l1_bootloader, &sgpe->l1_bootloader, - sgpe, dd); + sgpe, dd, COPY); homer->qpmr.sgpe.header.l1_offset = offsetof(struct qpmr_st, sgpe.l1_bootloader); assert(size <= GPE_BOOTLOADER_SIZE); /* SGPE L2 bootloader */ size = copy_section(&homer->qpmr.sgpe.l2_bootloader, &sgpe->l2_bootloader, - sgpe, dd); + sgpe, dd, COPY); homer->qpmr.sgpe.header.l2_offset = offsetof(struct qpmr_st, sgpe.l2_bootloader); homer->qpmr.sgpe.header.l2_len = size; assert(size <= GPE_BOOTLOADER_SIZE); /* SGPE HCODE */ - size = copy_section(&homer->qpmr.sgpe.sram_image, &sgpe->hcode, sgpe, dd); + size = copy_section(&homer->qpmr.sgpe.sram_image, &sgpe->hcode, sgpe, dd, + COPY); homer->qpmr.sgpe.header.img_offset = offsetof(struct qpmr_st, sgpe.sram_image); homer->qpmr.sgpe.header.img_len = size; @@ -241,11 +255,11 @@ static void build_self_restore(struct homer_st *homer, * though this is exe part of self restore region, we should copy it to * header's address. */ - size = copy_section(&homer->cpmr.header, &rest->self, rest, dd); + size = copy_section(&homer->cpmr.header, &rest->self, rest, dd, COPY); assert(size > sizeof(struct cpmr_header)); /* Now, overwrite header. */ - size = copy_section(&homer->cpmr.header, &rest->cpmr, rest, dd); + size = copy_section(&homer->cpmr.header, &rest->cpmr, rest, dd, COPY); assert(size <= sizeof(struct cpmr_header)); /* @@ -356,7 +370,8 @@ static void build_cme(struct homer_st *homer, struct xip_cme_header *cme, size_t size; struct cme_img_header *hdr; - size = copy_section(&homer->cpmr.cme_sram_region, &cme->hcode, cme, dd); + size = copy_section(&homer->cpmr.cme_sram_region, &cme->hcode, cme, dd, + COPY); assert(size <= CME_SRAM_IMG_SIZE); assert(size > (INT_VECTOR_SIZE + sizeof(struct cme_img_header))); @@ -390,7 +405,7 @@ static void build_pgpe(struct homer_st *homer, struct xip_pgpe_header *pgpe, struct pgpe_img_header *hdr; /* PPGE header */ - size = copy_section(&homer->ppmr.header, &pgpe->ppmr, pgpe, dd); + size = copy_section(&homer->ppmr.header, &pgpe->ppmr, pgpe, dd, COPY); assert(size <= PPMR_HEADER_SIZE); /* * 0xFFF00000 (SRAM base) + 4k (IPC) + 60k (GPE0) + 64k (GPE1) = 0xFFF20000 @@ -404,26 +419,27 @@ static void build_pgpe(struct homer_st *homer, struct xip_pgpe_header *pgpe, /* PPGE L1 bootloader */ size = copy_section(homer->ppmr.l1_bootloader, &pgpe->l1_bootloader, pgpe, - dd); + dd, COPY); assert(size <= GPE_BOOTLOADER_SIZE); homer->ppmr.header.l1_offset = offsetof(struct ppmr_st, l1_bootloader); /* PPGE L2 bootloader */ size = copy_section(homer->ppmr.l2_bootloader, &pgpe->l2_bootloader, pgpe, - dd); + dd, COPY); assert(size <= GPE_BOOTLOADER_SIZE); homer->ppmr.header.l2_offset = offsetof(struct ppmr_st, l2_bootloader); homer->ppmr.header.l2_len = size; /* PPGE HCODE */ - size = copy_section(homer->ppmr.pgpe_sram_img, &pgpe->hcode, pgpe, dd); + size = copy_section(homer->ppmr.pgpe_sram_img, &pgpe->hcode, pgpe, dd, + COPY); assert(size <= PGPE_SRAM_IMG_SIZE); assert(size > (INT_VECTOR_SIZE + sizeof(struct pgpe_img_header))); homer->ppmr.header.hcode_offset = offsetof(struct ppmr_st, pgpe_sram_img); homer->ppmr.header.hcode_len = size; /* PPGE auxiliary task */ - size = copy_section(homer->ppmr.aux_task, &pgpe->aux_task, pgpe, dd); + size = copy_section(homer->ppmr.aux_task, &pgpe->aux_task, pgpe, dd, COPY); assert(size <= PGPE_AUX_TASK_SIZE); homer->ppmr.header.aux_task_offset = offsetof(struct ppmr_st, aux_task); homer->ppmr.header.aux_task_len = size; From 24eb2773d3f70557e2d08abef1f06280007b323b Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Mon, 16 Aug 2021 18:25:04 +0300 Subject: [PATCH 056/213] soc/power9/vpd.c: export vpd_find_kwd() It works with any VPD records like those in MVPD, where it's going to be needed. Change-Id: Ifc26e96d899c3837b4c9035d3d67a3eca8788189 Signed-off-by: Sergii Dmytruk --- src/include/cpu/power/vpd.h | 8 ++++++++ src/soc/ibm/power9/vpd.c | 34 +++++++++++++++------------------- 2 files changed, 23 insertions(+), 19 deletions(-) diff --git a/src/include/cpu/power/vpd.h b/src/include/cpu/power/vpd.h index d25e2c62dd1..461e142325f 100644 --- a/src/include/cpu/power/vpd.h +++ b/src/include/cpu/power/vpd.h @@ -3,10 +3,18 @@ #ifndef CPU_PPC64_VPD_H #define CPU_PPC64_VPD_H +#include +#include + #define VPD_RECORD_NAME_LEN 4 #define VPD_RECORD_SIZE_LEN 2 #define VPD_KWD_NAME_LEN 2 void vpd_pnor_main(void); +/* Finds a keyword by its name. Retrieves its size too. Returns NULL on + * failure. */ +const uint8_t *vpd_find_kwd(const uint8_t *record, const char *record_name, + const char *kwd_name, size_t *size); + #endif /* CPU_PPC64_VPD_H */ diff --git a/src/soc/ibm/power9/vpd.c b/src/soc/ibm/power9/vpd.c index ecd687cbe59..6581c843e23 100644 --- a/src/soc/ibm/power9/vpd.c +++ b/src/soc/ibm/power9/vpd.c @@ -257,55 +257,51 @@ static char mapping_lookup(const struct vpd_info *vpd, const uint8_t *mapping, s return '\0'; } -/* Finds a keyword by its name. Retrieves its size too. Returns NULL on - * failure. */ -static const uint8_t *find_vpd_kwd(const struct vpd_info *vpd, const char *name, - size_t *size) +const uint8_t *vpd_find_kwd(const uint8_t *record, const char *record_name, + const char *kwd_name, size_t *size) { - const uint8_t *data = vpd->data; - size_t offset = 0; uint16_t record_size = 0; - if (strlen(name) != VPD_KWD_NAME_LEN) - die("Keyword name has wrong length!\n"); + if (strlen(kwd_name) != VPD_KWD_NAME_LEN) + die("Keyword name has wrong length: %s!\n", kwd_name); - memcpy(&record_size, &data[offset], sizeof(record_size)); + memcpy(&record_size, &record[offset], sizeof(record_size)); offset += VPD_RECORD_SIZE_LEN; record_size = le16toh(record_size); /* Skip mandatory "RT" and one byte of record size (always 4) */ offset += VPD_KWD_NAME_LEN + 1; - if (memcmp(&data[offset], "MEMD", VPD_RECORD_NAME_LEN)) - die("Failed to find MEMD record!\n"); + if (memcmp(&record[offset], record_name, VPD_RECORD_NAME_LEN)) + die("Expected to be working with %s record!\n", record_name); offset += VPD_RECORD_NAME_LEN; while (offset < record_size) { uint16_t kwd_size = 0; bool match = false; - const int two_byte_size = (data[offset] == '#'); + const int two_byte_size = (record[offset] == '#'); /* This is always the last keyword */ - if (!memcmp(&data[offset], "PF", VPD_KWD_NAME_LEN)) + if (!memcmp(&record[offset], "PF", VPD_KWD_NAME_LEN)) break; - match = !memcmp(&data[offset], name, VPD_KWD_NAME_LEN); + match = !memcmp(&record[offset], kwd_name, VPD_KWD_NAME_LEN); offset += VPD_KWD_NAME_LEN; if (two_byte_size) { - memcpy(&kwd_size, &data[offset], sizeof(kwd_size)); + memcpy(&kwd_size, &record[offset], sizeof(kwd_size)); kwd_size = le16toh(kwd_size); offset += 2; } else { - kwd_size = data[offset]; + kwd_size = record[offset]; offset += 1; } if (match) { *size = kwd_size; - return &data[offset]; + return &record[offset]; } offset += kwd_size; @@ -331,7 +327,7 @@ static const uint8_t *find_vpd_conf(const struct vpd_info *vpd, const char *mapp else die("Unsupported %s mapping type\n", mapping_name); - mapping = find_vpd_kwd(vpd, mapping_name, &kwd_size); + mapping = vpd_find_kwd(vpd->data, "MEMD", mapping_name, &kwd_size); if (!mapping) die("VPD is missing %s keyword!\n", mapping_name); @@ -339,7 +335,7 @@ static const uint8_t *find_vpd_conf(const struct vpd_info *vpd, const char *mapp if (!conf_name[1]) die("Failed to find matching %s configuration!\n", mapping_name); - conf = find_vpd_kwd(vpd, conf_name, &kwd_size); + conf = vpd_find_kwd(vpd->data, "MEMD", conf_name, &kwd_size); if (!conf) die("Failed to read %s configuration!\n", mapping_name); From 01ad0bec815357f7ce374519903c7e8ffdc8cd80 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Mon, 16 Aug 2021 18:25:04 +0300 Subject: [PATCH 057/213] soc/power9/rom_media.c: add MVPD partition Change-Id: I15333f47b38c03eee9238b43c0ef30ad494c1a65 Signed-off-by: Sergii Dmytruk --- src/include/cpu/power/mvpd.h | 14 ++++++++++++++ src/soc/ibm/power9/rom_media.c | 27 +++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) create mode 100644 src/include/cpu/power/mvpd.h diff --git a/src/include/cpu/power/mvpd.h b/src/include/cpu/power/mvpd.h new file mode 100644 index 00000000000..4292c8f2f01 --- /dev/null +++ b/src/include/cpu/power/mvpd.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef CPU_PPC64_MVPD_H +#define CPU_PPC64_MVPD_H + +struct region_device; + +void mvpd_device_init(void); + +void mvpd_device_unmount(void); + +const struct region_device *mvpd_device_ro(void); + +#endif /* CPU_PPC64_MVPD_H */ diff --git a/src/soc/ibm/power9/rom_media.c b/src/soc/ibm/power9/rom_media.c index 3f9014a1fc6..ccc877da1cf 100644 --- a/src/soc/ibm/power9/rom_media.c +++ b/src/soc/ibm/power9/rom_media.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -19,6 +20,8 @@ #define MEMD_PARTITION_NAME "MEMD" +#define MVPD_PARTITION_NAME "MVPD" + /* ffs_entry is not complete in included ffs.h, it lacks user data layout. * See https://github.com/open-power/skiboot/blob/master/libflash/ffs.h */ @@ -462,6 +465,30 @@ const struct region_device *memd_device_ro(void) return &memd_mdev.rdev; } +static struct mmap_helper_region_device mvpd_mdev = MMAP_HELPER_DEV_INIT( + &no_ecc_rdev_ops, 0, CONFIG_ROM_SIZE, &cbfs_cache); + +void mvpd_device_init(void) +{ + static int init_done; + if (init_done) + return; + + mount_part_from_pnor(MVPD_PARTITION_NAME, &mvpd_mdev); + + init_done = 1; +} + +void mvpd_device_unmount(void) +{ + mvpd_mdev.rdev.ops = &no_rdev_ops; +} + +const struct region_device *mvpd_device_ro(void) +{ + return &mvpd_mdev.rdev; +} + static struct mmap_helper_region_device boot_mdev = MMAP_HELPER_DEV_INIT( &no_ecc_rdev_ops, 0, CONFIG_ROM_SIZE, &cbfs_cache); From fa567002b1a18e2fd068c5820858ce7f3e56ca20 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Sun, 22 May 2022 17:52:50 +0300 Subject: [PATCH 058/213] vendorcode/ibm/: add RS4 data format and (de)compression code This data format is used to store information that describes hardware. It uses compression, which might be hard to reimplement without bugs. Moreover reimplementation probably won't work any different than the original. So the code is just included as the license allows it. Change-Id: I5edcda954504ee218064f28439117aff37b659f2 Signed-off-by: Sergii Dmytruk --- .checkpatch.conf | 1 + src/vendorcode/Makefile.inc | 1 + src/vendorcode/ibm/Makefile.inc | 3 + src/vendorcode/ibm/power9/Makefile.inc | 3 + src/vendorcode/ibm/power9/README.md | 10 + src/vendorcode/ibm/power9/rs4.c | 826 +++++++++++++++++++++++++ src/vendorcode/ibm/power9/rs4.h | 93 +++ 7 files changed, 937 insertions(+) create mode 100644 src/vendorcode/ibm/Makefile.inc create mode 100644 src/vendorcode/ibm/power9/Makefile.inc create mode 100644 src/vendorcode/ibm/power9/README.md create mode 100644 src/vendorcode/ibm/power9/rs4.c create mode 100644 src/vendorcode/ibm/power9/rs4.h diff --git a/.checkpatch.conf b/.checkpatch.conf index dbb1aaa7444..809d27a3f3b 100644 --- a/.checkpatch.conf +++ b/.checkpatch.conf @@ -38,3 +38,4 @@ --exclude src/vendorcode/cavium --exclude src/vendorcode/intel --exclude src/vendorcode/mediatek +--exclude src/vendorcode/ibm diff --git a/src/vendorcode/Makefile.inc b/src/vendorcode/Makefile.inc index 36a13bb6f67..9cc4f2903a0 100644 --- a/src/vendorcode/Makefile.inc +++ b/src/vendorcode/Makefile.inc @@ -5,3 +5,4 @@ subdirs-y += siemens subdirs-y += cavium subdirs-y += eltan subdirs-y += mediatek +subdirs-y += ibm diff --git a/src/vendorcode/ibm/Makefile.inc b/src/vendorcode/ibm/Makefile.inc new file mode 100644 index 00000000000..ecf3cedab77 --- /dev/null +++ b/src/vendorcode/ibm/Makefile.inc @@ -0,0 +1,3 @@ +## SPDX-License-Identifier: GPL-2.0-only + +subdirs-$(CONFIG_CPU_IBM_POWER9) += power9 diff --git a/src/vendorcode/ibm/power9/Makefile.inc b/src/vendorcode/ibm/power9/Makefile.inc new file mode 100644 index 00000000000..50ff5a4fa53 --- /dev/null +++ b/src/vendorcode/ibm/power9/Makefile.inc @@ -0,0 +1,3 @@ +## SPDX-License-Identifier: GPL-2.0-only + +ramstage-y += rs4.c diff --git a/src/vendorcode/ibm/power9/README.md b/src/vendorcode/ibm/power9/README.md new file mode 100644 index 00000000000..6dda50360d9 --- /dev/null +++ b/src/vendorcode/ibm/power9/README.md @@ -0,0 +1,10 @@ +RS4 implementation is copied from HostBoot mostly as is: + * some renaming + * removal of unused functions + * updates to make things work outside of HostBoot + +URLs for files that served as prototypes: + * `rs4.c`: + https://git.raptorcs.com/git/talos-hostboot/tree/src/import/chips/p9/utils/imageProcs/p9_scan_compression.C + * `rs4.h`: + https://git.raptorcs.com/git/talos-hostboot/tree/src/import/chips/p9/utils/imageProcs/p9_scan_compression.H diff --git a/src/vendorcode/ibm/power9/rs4.c b/src/vendorcode/ibm/power9/rs4.c new file mode 100644 index 00000000000..cef97d48e0e --- /dev/null +++ b/src/vendorcode/ibm/power9/rs4.c @@ -0,0 +1,826 @@ +/* IBM_PROLOG_BEGIN_TAG */ +/* This is an automatically generated prolog. */ +/* */ +/* $Source: src/import/chips/p9/utils/imageProcs/p9_scan_compression.C $ */ +/* */ +/* OpenPOWER HostBoot Project */ +/* */ +/* Contributors Listed Below - COPYRIGHT 2016,2017 */ +/* [+] International Business Machines Corp. */ +/* */ +/* */ +/* Licensed under the Apache License, Version 2.0 (the "License"); */ +/* you may not use this file except in compliance with the License. */ +/* You may obtain a copy of the License at */ +/* */ +/* http://www.apache.org/licenses/LICENSE-2.0 */ +/* */ +/* Unless required by applicable law or agreed to in writing, software */ +/* distributed under the License is distributed on an "AS IS" BASIS, */ +/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */ +/* implied. See the License for the specific language governing */ +/* permissions and limitations under the License. */ +/* */ +/* IBM_PROLOG_END_TAG */ +// Note: This file was originally named p8_scan_compression.c; See CVS archive +// for revision history of p8_scan_compression.c. + +#include "rs4.h" + +/// RS4 Compression Format (version 2) +/// ================================== +/// +/// Scan strings are compressed using a simple run-length encoding called +/// RS4. The string to be decompressed and scanned is the difference between +/// the current state of the ring and the desired final state of the ring. +/// +/// Both the data to be compressed and the final compressed data are treated +/// as strings of 4-bit nibbles. In the scan data structure the compressed +/// strings are padded with 0x0 nibbles to the next even multiple of 4. +/// The compressed string consists of control nibbles and data +/// nibbles. The string format includes a special control/data sequence that +/// marks the end of the string and the final bits of scan data. +/// +/// Special control/data sequences have been been added for RS4v2 to +/// store pairs of care mask nibble and data nibble. This enhancement +/// is needed to allow the scanning of significant zeros. +/// The RS4v1 format assumed that all zeros have no meaning other than +/// the positioning of 1 bits. +/// +/// Runs of 0x0 nibbles as determined by the care mask (rotates) are encoded +/// using a simple variable-length integer encoding known as a "stop code". +/// This code treats each nibble in a variable-length integer encoding as an +/// octal digit (the low-order 3 bits) plus a stop bit (the high-order bit). +/// The examples below illustrate the encoding. +/// +/// 1xxx - Rotate 0bxxx nibbles (0 - 7) +/// 0xxx 1yyy - Rotate 0bxxxyyy nibbles (8 - 63) +/// 0xxx 0yyy 1zzz - Rotate 0bxxxyyyzzz nibbles (64 - 511) +/// etc. +/// +/// A 0-length rotate (code 0b1000) is needed to resynchronize the state +/// machine in the event of long scans (see below), or a string that begins +/// with a non-0x0 nibble. +/// +/// Runs of non-0x0 nibbles (scans) are inserted verbatim into the compressed +/// string after a control nibble indicating the number of nibbles of +/// uncompressed data. If a run is longer than 14 nibbles, the compression +/// algorithm must insert a 0-length rotate and a new scan-length control +/// before continuing with the non-0 data nibbles. +/// +/// xxxx - Scan 0bxxxx nibbles which follow, 0bxxxx != 0 and 0bxxxx != 15 +/// +/// The special case of a 0b0000 code where a scan count is expected marks the +/// end of the string. The end of string marker is always followed by a +/// nibble that contains the terminal bit count in the range 0-3. If the +/// length of the original binary string was not an even multiple of 4, then a +/// final nibble contains the final scan data left justified. +/// +/// 0000 00nn [ttt0] - Terminate 0bnn bits, data 0bttt0 if 0bnn != 0 +/// +/// The special case of a 0b1111 code where a scan count is expected announces +/// a pair of care mask nibble and data nibble containing significant zeros. +/// Only a single pair can be stored this way, and longer sequences of such +/// pairs require resynchronization using zero rotates and special scan count +/// 0b1111 to be inserted. +/// +/// Termination with care mask and data is accomplished by a special +/// terminal data count: +/// +/// 0000 10nn [ccc0] [ttt0] - Terminate +/// 0bnn bits care mask and 0bnn bits data, +/// care mask 0bccc0 and data 0bttt0 if 0bnn != 0 +/// +/// BNF Grammar +/// =========== +/// +/// Following is a BNF grammar for the strings accepted by the RS4 +/// decompression and scan algorithm. At a high level, the state machine +/// recognizes a series of 1 or more sequences of a rotate (R) followed by a +/// scan (S) or end-of-string marker (E), followed by the terminal count (T) +/// and optional terminal data (D). +/// +/// (R S)* (R E) T D? +/// +/// \code +/// +/// ::= | +/// +/// +/// ::= | +/// +/// +/// ::= '0x0' | ... | '0x7' +/// +/// ::= '0x8' | ... | '0xf' +/// +/// ::= | +/// +/// +/// ::= * 0bnnnn, for N = 0bnnnn, N != 0 & N != 15 * +/// +/// ::= '0xf' +/// +/// ::= * N nibbles of uncompressed data, 0 < N < 15 * +/// +/// ::= +/// '0x0' | +/// '0x0' 0)> | +/// '0x0' 0)> +/// +/// ::= * 0b00nn, for T = 0bnn * +/// +/// ::= * 0b10nn, for T = 0bnn & T != 0 * +/// +/// ::= '0x0' | '0x8' +/// +/// ::= '0x0' | '0x4' | '0x8' | '0xc' +/// +/// ::= '0x0' | '0x2' | '0x4' | ... | '0xe' +/// +/// ::= * 0b1000 0b0000 * +/// +/// ::= * 0bij00 0bwx00, for +/// i >= w & j >= x & +/// ij > wx * +/// +/// ::= * 0bijk0 0bwxy0, for +/// i >= w & j >= x & k >= y & +/// ijk > wxy * +/// +/// ::= * 0bijkl 0bwxyz, for +/// i >= w & j >= x & k >= y & l >= z & +/// ijkl > wxyz * +/// +/// \endcode + +#include +#include +#include +#include + +#define MY_DBG(...) while(false) + +// Diagnostic aids for debugging +#ifdef DEBUG_P9_SCAN_COMPRESSION + +#include + + +#define BUG(rc) \ + ({ \ + fprintf(stderr,"%s:%d : Trapped rc = %d\n", \ + __FILE__, __LINE__, (rc)); \ + (rc); \ + }) + +#define BUGX(rc, ...) \ + ({ \ + BUG(rc); \ + fprintf(stderr, ##__VA_ARGS__); \ + (rc); \ + }) + +#else // DEBUG_P9_SCAN_COMPRESSION + +#define BUG(rc) (rc) +#define BUGX(rc, ...) (rc) + +#endif // DEBUG_P9_SCAN_COMPRESSION + +#define RS4_MAGIC (uint16_t)0x5253 // "RS" + +/// Scan data types +#define RS4_SCAN_DATA_TYPE_CMSK 1 +#define RS4_SCAN_DATA_TYPE_NON_CMSK 0 + +#define MAX_RING_BUF_SIZE_TOOL 200000 + +#define RS4_VERSION 3 + +typedef uint16_t RingId_t; + +typedef struct ring_hdr CompressedScanData; + +// Return a big-endian-indexed nibble from a byte string + +static int +rs4_get_nibble(const uint8_t* i_string, const uint32_t i_i) +{ + uint8_t byte; + int nibble; + + byte = i_string[i_i / 2]; + + if (i_i % 2) + { + nibble = byte & 0xf; + } + else + { + nibble = byte >> 4; + } + + return nibble; +} + + +// Set a big-endian-indexed nibble in a byte string + +static int +rs4_set_nibble(uint8_t* io_string, const uint32_t i_i, const int i_nibble) +{ + uint8_t* byte; + + byte = &(io_string[i_i / 2]); + + if (i_i % 2) + { + *byte = (*byte & 0xf0) | i_nibble; + } + else + { + *byte = (*byte & 0x0f) | (i_nibble << 4); + } + + return i_nibble; +} + + +// Encode an unsigned integer into a 4-bit octal stop code directly into a +// nibble stream at io_string, returning the number of nibbles in the +// resulting code. + +static int +rs4_stop_encode(const uint32_t i_count, uint8_t* io_string, const uint32_t i_i) +{ + uint32_t count; + int digits, offset; + + // Determine the number of octal digits. There is always at least 1. + + count = i_count >> 3; + digits = 1; + + while (count) + { + count >>= 3; + digits++; + } + + // First insert the stop (low-order) digit + + offset = digits - 1; + rs4_set_nibble(io_string, i_i + offset, (i_count & 0x7) | 0x8); + + // Now insert the high-order digits + + count = i_count >> 3; + offset--; + + while (count) + { + rs4_set_nibble(io_string, i_i + offset, count & 0x7); + offset--; + count >>= 3; + } + + return digits; +} + + +// Decode an unsigned integer from a 4-bit octal stop code appearing in a byte +// string at i_string, returning the number of nibbles decoded. + +static int +stop_decode(uint32_t* o_count, const uint8_t* i_string, const uint32_t i_i) +{ + int digits, nibble; + uint32_t i, count; + + digits = 0; + count = 0; + i = i_i; + + do + { + nibble = rs4_get_nibble(i_string, i); + count = (count * 8) + (nibble & 0x7); + i++; + digits++; + } + while ((nibble & 0x8) == 0); + + *o_count = count; + return digits; +} + + +// RS4 compression algorithm notes: +// +// RS4 compression processes i_data_str/i_care_str as a strings of nibbles. +// Final special-case code handles the 0-3 remaining terminal bits. +// +// There is a special case for 0x0 nibbles embedded in a string of non-0x0 +// nibbles. It is more efficient to encode a single 0x0 nibble as part of a +// longer string of non 0x0 nibbles. However it is break-even (actually a +// slight statistical advantage) to break a scan sequence for 2 0x0 nibbles. +// +// If a run of 14 scan nibbles is found the scan is terminated and we return +// to the rotate state. Runs of more than 14 scans will always include a +// 0-length rotate between the scan sequences. +// +// The ability to store a 15th consecutive scan nibble was given up for an +// enhancement of the compression algorithm: +// The scan count 15 has a special meaning and is reserved for handling +// single nibbles that come with a care mask, that is, an extra nibble that +// determines the significance of scan bits, including both 1 and 0 bits. +// +// Returns a scan compression return code. + +static int +__rs4_compress(uint8_t* o_rs4_str, + uint32_t* o_nibbles, + const uint8_t* i_data_str, + const uint8_t* i_care_str, + const uint32_t i_length) +{ + int state; /* 0 : Rotate, 1 : Scan */ + uint32_t n; /* Number of whole nibbles in i_data */ + uint32_t r; /* Number of reminaing bits in i_data */ + uint32_t i; /* Nibble index in i_data_str/i_care_str */ + uint32_t j; /* Nibble index in o_rs4_str */ + uint32_t k; /* Location to place */ + uint32_t count; /* Counts rotate/scan nibbles */ + int care_nibble; + int data_nibble; + + n = i_length / 4; + r = i_length % 4; + i = 0; + j = 0; + k = 0; /* Makes GCC happy */ + care_nibble = 0; + data_nibble = 0; + count = 0; + state = 0; + + // Process the bulk of the string. Note that state changes do not + // increment 'i' - the nibble at i_data is always scanned again. + + while (i < n) + { + care_nibble = rs4_get_nibble(i_care_str, i); + data_nibble = rs4_get_nibble(i_data_str, i); + + if (~care_nibble & data_nibble) + { + return BUGX(SCAN_COMPRESSION_INPUT_ERROR, + "Conflicting data and mask bits in nibble %d\n", i); + } + + if (state == 0) + //----------------// + // Rotate section // + //----------------// + { + if (care_nibble == 0) + { + count++; + i++; + } + else + { + j += rs4_stop_encode(count, o_rs4_str, j); + count = 0; + k = j; + j++; + + if ((care_nibble ^ data_nibble) == 0) + { + // Only one-data in nibble. + state = 1; + } + else + { + // There is zero-data in nibble. + state = 2; + } + } + } + else if (state == 1) + //------------------// + // One-data section // + //------------------// + { + if (care_nibble == 0) + { + if (((i + 1) < n) && (rs4_get_nibble(i_care_str, i + 1) == 0)) + { + // Set the in nibble k since no more data in + // current AND next nibble (or next nibble might be last). + rs4_set_nibble(o_rs4_str, k, count); + count = 0; + state = 0; + } + else + { + // Whether next nibble is last nibble or contains data, lets include the + // current empty nibble in the scan_data(N) count because its + // more efficient than inserting rotate go+stop nibbles. + rs4_set_nibble(o_rs4_str, j, 0); + count++; + i++; + j++; + } + } + else if ((care_nibble ^ data_nibble) == 0) + { + // Only one-data in nibble. Continue pilling on one-data nibbles. + rs4_set_nibble(o_rs4_str, j, data_nibble); + count++; + i++; + j++; + } + else + { + // There is zero-data in nibble. + // First set the in nibble k to end current + // sequence of one-data nibbles. + rs4_set_nibble(o_rs4_str, k, count); + count = 0; + state = 0; + } + + if ((state == 1) && (count == 14)) + { + rs4_set_nibble(o_rs4_str, k, 14); + count = 0; + state = 0; + } + } + else // state==2 + //-------------------// + // Zero-data section // + //-------------------// + { + rs4_set_nibble(o_rs4_str, k, 15); + rs4_set_nibble(o_rs4_str, j, care_nibble); + j++; + rs4_set_nibble(o_rs4_str, j, data_nibble); + i++; + j++; + count = 0; + state = 0; + } + } // End of while (i0, the remainder data + // nibble. Note that here we indicate the number of bits (0<=r<4). + if (r == 0) + { + rs4_set_nibble(o_rs4_str, j, r); + j++; + } + else + { + care_nibble = rs4_get_nibble(i_care_str, n) & ((0xf >> (4 - r)) << (4 - r)); // Make excess bits zero + data_nibble = rs4_get_nibble(i_data_str, n) & ((0xf >> (4 - r)) << (4 - r)); // Make excess bits zero + + if (~care_nibble & data_nibble) + { + return BUGX(SCAN_COMPRESSION_INPUT_ERROR, + "Conflicting data and mask bits in nibble %d\n", i); + } + + if ((care_nibble ^ data_nibble) == 0) + { + // Only one-data in rem nibble. + rs4_set_nibble(o_rs4_str, j, r); + j++; + rs4_set_nibble(o_rs4_str, j, data_nibble); + j++; + } + else + { + // Zero-data in rem nibble. + rs4_set_nibble(o_rs4_str, j, r + 8); + j++; + rs4_set_nibble(o_rs4_str, j, care_nibble); + j++; + rs4_set_nibble(o_rs4_str, j, data_nibble); + j++; + } + } + + *o_nibbles = j; + + return SCAN_COMPRESSION_OK; +} + + +// The worst-case compression for RS4 v2 occurs if all data nibbles +// contain significant zeros as specified by corresponding care nibbles, +// and if the raw ring length is a whole multiple of four. +// +// In general, each data and care nibble pair, which are one nibble +// in terms of input string length, are compressed into 4 nibbles: +// +// 1. a special data count nibble that indicates special case with care mask +// 2. a care mask nibble +// 3. a data nibble +// 4. a rotate nibble +// +// Then, if the raw ring length is a whole multiple of four (worst case), +// the last raw nibble also requires those RS4 four nibbles, and it is +// followed by 2 additional nibbles that terminate the compressed data. +// So a total of six nibbles to account for the last input nibble: +// +// 5. a '0x0' terminate nibble +// 6. a terminal count(0) nibble +// +// If on the other hand the last input nibble is partial, then that requires +// only four output nibbles because the terminate tag and data are combined +// in the encoding of : +// +// 1. a '0x0' terminate nibbel +// 2. a terminal count nibble for masked data +// 3. a care mask nibble +// 4. a data nibble +// +// Besides there is always a rotate nibble at the begin of the compressed +// data: +// +// 0. rotate + +static inline uint32_t +rs4_max_compressed_nibbles(const uint32_t i_length) +{ + uint32_t nibbles_raw, nibbles_rs4; + + nibbles_raw = (i_length + 3) / 4; // bits rounded up to full nibbles + nibbles_rs4 = 1 // initial rotate nibble + + nibbles_raw * 4 // worst case whole nibble encoding + + 1 // terminate nibble + + 1; // zero terminal count nibble + + return nibbles_rs4; +} + +static inline uint32_t +rs4_max_compressed_bytes(uint32_t nibbles) +{ + uint32_t bytes; + + bytes = ((nibbles + 1) / 2); // nibbles rounded up to full bytes + bytes += sizeof(CompressedScanData); // plus rs4 header + bytes = ((bytes + 3) / 4) * 4; // rounded up to multiple of 4 bytes + + return bytes; +} + + +// We always require the worst-case amount of memory including the header and +// any rounding required to guarantee that the data size is a multiple of 4 +// bytes. The final image size is also rounded up to a multiple of 4 bytes. +// +// Returns a scan compression return code. + +int +rs4_compress(CompressedScanData* io_rs4, + const uint32_t i_size, + const uint8_t* i_data_str, + const uint8_t* i_care_str, + const uint32_t i_length, + const uint32_t i_scanAddr, + const RingId_t i_ringId) +{ + int rc; + uint32_t nibbles = rs4_max_compressed_nibbles(i_length); + uint32_t bytes = rs4_max_compressed_bytes(nibbles); + uint8_t* rs4_str = (uint8_t*)io_rs4 + sizeof(CompressedScanData); + + if (bytes > i_size) + { + return BUG(SCAN_COMPRESSION_BUFFER_OVERFLOW); + } + + memset(io_rs4, 0, i_size); + + rc = __rs4_compress(rs4_str, &nibbles, i_data_str, i_care_str, i_length); + + if (rc == SCAN_COMPRESSION_OK) + { + bytes = rs4_max_compressed_bytes(nibbles); + + io_rs4->magic = htobe16(RS4_MAGIC); + io_rs4->version = RS4_VERSION; + // For now this assumes non-CMSK scan data. + // For CMSK support, we would need to: + // - either add a CMSK function parameter and set type here, + // - or rely on caller to set type later. + io_rs4->type = RS4_SCAN_DATA_TYPE_NON_CMSK; + io_rs4->size = htobe16(bytes); + io_rs4->ring_id = htobe16(i_ringId); + io_rs4->scan_addr = htobe32(i_scanAddr); + } + + return rc; +} + + +// Decompress an RS4-encoded string into a output string whose length must be +// exactly i_length bits. +// +// Returns a scan compression return code. + +static int +__rs4_decompress(uint8_t* o_data_str, + uint8_t* o_care_str, + uint32_t i_size, + uint32_t* o_length, + const uint8_t* i_rs4_str) +{ + int state; /* 0 : Rotate, 1 : Scan */ + uint32_t i; /* Nibble index in i_rs4_str */ + uint32_t j; /* Nibble index in o_data_str/o_care_str */ + uint32_t k; /* Loop index */ + uint32_t bits; /* Number of output bits decoded so far */ + uint32_t count; /* Count of rotate nibbles */ + uint32_t nibbles; /* Rotate encoding or scan nibbles to process */ + int r; /* Remainder bits */ + int masked; /* if a care mask is available */ + + i = 0; + j = 0; + bits = 0; + state = 0; + + // Decompress the bulk of the string + do + { + if (state == 0) + { + nibbles = stop_decode(&count, i_rs4_str, i); + i += nibbles; + + bits += 4 * count; + + if (bits > i_size * 8) + { + return BUG(SCAN_COMPRESSION_BUFFER_OVERFLOW); + } + + // keep 'count' zero care and data nibbles + // as initialised by memset in calling function + j += count; + + state = 1; + } + else + { + nibbles = rs4_get_nibble(i_rs4_str, i); + i++; + + if (nibbles == 0) + { + break; + } + + masked = (nibbles == 15 ? 1 : 0); + nibbles = (masked ? 1 : nibbles); + bits += 4 * nibbles; + + if (bits > i_size * 8) + { + return BUG(SCAN_COMPRESSION_BUFFER_OVERFLOW); + } + + for (k = 0; k < nibbles; k++) + { + rs4_set_nibble(o_care_str, j, rs4_get_nibble(i_rs4_str, i)); + i = (masked ? i + 1 : i); + rs4_set_nibble(o_data_str, j, rs4_get_nibble(i_rs4_str, i)); + i++; + j++; + } + + state = 0; + } + } + while (1); + + // Now handle string termination + + nibbles = rs4_get_nibble(i_rs4_str, i); + i++; + + masked = nibbles & 0x8; + r = nibbles & 0x3; + bits += r; + + if (bits > i_size * 8) + { + return BUG(SCAN_COMPRESSION_BUFFER_OVERFLOW); + } + + if (r != 0) + { + rs4_set_nibble(o_care_str, j, rs4_get_nibble(i_rs4_str, i)); + i = (masked ? i + 1 : i); + rs4_set_nibble(o_data_str, j, rs4_get_nibble(i_rs4_str, i)); + } + + *o_length = bits; + return SCAN_COMPRESSION_OK; +} + +int +rs4_decompress(uint8_t* o_data_str, + uint8_t* o_care_str, + uint32_t i_size, + uint32_t* o_length, + const CompressedScanData* i_rs4) +{ + uint8_t* rs4_str = (uint8_t*)i_rs4 + sizeof(CompressedScanData); + + if (be16toh(i_rs4->magic) != RS4_MAGIC) + { + return BUG(SCAN_DECOMPRESSION_MAGIC_ERROR); + } + + if (i_rs4->version != RS4_VERSION) + { + return BUG(SCAN_COMPRESSION_VERSION_ERROR); + } + + memset(o_data_str, 0, i_size); + memset(o_care_str, 0, i_size); + + return __rs4_decompress(o_data_str, o_care_str, i_size, + o_length, rs4_str); +} + +int +rs4_redundant(const CompressedScanData* i_data, int* o_redundant) +{ + uint8_t* data; + uint32_t length, pos; + + *o_redundant = 0; + + if (htobe16(i_data->magic) != RS4_MAGIC) + { + return BUG(SCAN_DECOMPRESSION_MAGIC_ERROR); + } + + data = (uint8_t*)i_data + sizeof(CompressedScanData); + + // A compressed scan string is redundant if the initial rotate is + // followed by the end-of-string marker, and any remaining mod-4 bits + // are also 0. + + pos = stop_decode(&length, data, 0); + length *= 4; + + if (rs4_get_nibble(data, pos) == 0) + { + if (rs4_get_nibble(data, pos + 1) == 0) + { + *o_redundant = 1; + } + else + { + length += rs4_get_nibble(data, pos + 1); + + if (rs4_get_nibble(data, pos + 2) == 0) + { + *o_redundant = 1; + } + } + } + + return SCAN_COMPRESSION_OK; +} diff --git a/src/vendorcode/ibm/power9/rs4.h b/src/vendorcode/ibm/power9/rs4.h new file mode 100644 index 00000000000..ba8f836c3dc --- /dev/null +++ b/src/vendorcode/ibm/power9/rs4.h @@ -0,0 +1,93 @@ +/* IBM_PROLOG_BEGIN_TAG */ +/* This is an automatically generated prolog. */ +/* */ +/* $Source: src/import/chips/p9/utils/imageProcs/p9_scan_compression.H $ */ +/* */ +/* OpenPOWER HostBoot Project */ +/* */ +/* Contributors Listed Below - COPYRIGHT 2016,2017 */ +/* [+] International Business Machines Corp. */ +/* */ +/* */ +/* Licensed under the Apache License, Version 2.0 (the "License"); */ +/* you may not use this file except in compliance with the License. */ +/* You may obtain a copy of the License at */ +/* */ +/* http://www.apache.org/licenses/LICENSE-2.0 */ +/* */ +/* Unless required by applicable law or agreed to in writing, software */ +/* distributed under the License is distributed on an "AS IS" BASIS, */ +/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */ +/* implied. See the License for the specific language governing */ +/* permissions and limitations under the License. */ +/* */ +/* IBM_PROLOG_END_TAG */ + +#ifndef __SOC_IBM_POWER9_RS4_H +#define __SOC_IBM_POWER9_RS4_H + +#include + +#define RS4_MAGIC (uint16_t)0x5253 // "RS" + +/// Normal return code +#define SCAN_COMPRESSION_OK (uint8_t)0 + +/// The (de)compression algorithm could not allocate enough memory for the +/// (de)compression. +#define SCAN_COMPRESSION_NO_MEMORY (uint8_t)1 + +/// Magic number mismatch on scan decompression +#define SCAN_DECOMPRESSION_MAGIC_ERROR (uint8_t)2 + +/// Decompression size error +/// +/// Decompression produced a string of a size different than indicated in the +/// header, indicating either a bug or data corruption. Note that the entire +/// application should be considered corrupted if this error occurs since it +/// may not be discovered until after the decompression buffer is +/// overrun. This error may also be returned by rs4_redundant() in the event +/// of inconsistencies in the compressed string. +#define SCAN_DECOMPRESSION_SIZE_ERROR (uint8_t)3 + +/// A buffer would overflow +/// +/// Either the caller-supplied memory buffer to rs4_decompress() was too +/// small to contain the decompressed string, or a caller-supplied buffer to +/// rs4_compress() was not large enough to hold the worst-case compressed +/// string. +#define SCAN_COMPRESSION_BUFFER_OVERFLOW (uint8_t)4 + +/// Inconsistent input data +/// +/// 1 in data is masked by 0 in care mask +#define SCAN_COMPRESSION_INPUT_ERROR 5 + +/// Invalid transition in state machine +#define SCAN_COMPRESSION_STATE_ERROR 6 + +/// wrong compression version +#define SCAN_COMPRESSION_VERSION_ERROR 7 + +/* Header of an RS4 compressed ring */ +struct ring_hdr { + uint16_t magic; // Always "RS" + uint8_t version; + uint8_t type; + uint16_t size; // Header + data size in BE + uint16_t ring_id; + uint32_t scan_addr; + uint8_t data[]; +} __attribute__((packed)); + +int rs4_compress(struct ring_hdr *io_rs4, const uint32_t i_size, + const uint8_t *i_data_str, const uint8_t *i_care_str, + const uint32_t i_length, const uint32_t i_scanAddr, + const uint16_t ring_id); + +int rs4_decompress(uint8_t *o_data_str, uint8_t *o_care_str, uint32_t i_size, + uint32_t *o_length, const struct ring_hdr *i_rs4); + +int rs4_redundant(const struct ring_hdr *i_data, int *o_redundant); + +#endif // __SOC_IBM_POWER9_RS4_H From 494609f6a219ce21758674a4b2b0ea902211a8e8 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Mon, 16 Aug 2021 18:25:04 +0300 Subject: [PATCH 059/213] soc/power9/mvpd.c: implement mvpd_extract_ring() Rings are basically blobs marked with some metadata that identifies them. They are stored inside keywords within some records of MVPD. Collection of rings makes a tor. Change-Id: I40856615f727f2d4358db19de647ca49ffba41e5 Signed-off-by: Sergii Dmytruk --- src/include/cpu/power/mvpd.h | 7 + src/soc/ibm/power9/Makefile.inc | 2 + src/soc/ibm/power9/mvpd.c | 181 +++++++++++++++++ src/soc/ibm/power9/tor.h | 350 ++++++++++++++++++++++++++++++++ 4 files changed, 540 insertions(+) create mode 100644 src/soc/ibm/power9/mvpd.c create mode 100644 src/soc/ibm/power9/tor.h diff --git a/src/include/cpu/power/mvpd.h b/src/include/cpu/power/mvpd.h index 4292c8f2f01..45561bedcaf 100644 --- a/src/include/cpu/power/mvpd.h +++ b/src/include/cpu/power/mvpd.h @@ -3,6 +3,9 @@ #ifndef CPU_PPC64_MVPD_H #define CPU_PPC64_MVPD_H +#include +#include + struct region_device; void mvpd_device_init(void); @@ -11,4 +14,8 @@ void mvpd_device_unmount(void); const struct region_device *mvpd_device_ro(void); +bool mvpd_extract_ring(const char *record_name, const char *kwd_name, + uint8_t chiplet_id, uint8_t even_odd, uint16_t ring_id, + uint8_t *buf, uint32_t buf_size); + #endif /* CPU_PPC64_MVPD_H */ diff --git a/src/soc/ibm/power9/Makefile.inc b/src/soc/ibm/power9/Makefile.inc index 5723e949c07..9f4854ce44b 100644 --- a/src/soc/ibm/power9/Makefile.inc +++ b/src/soc/ibm/power9/Makefile.inc @@ -29,5 +29,7 @@ ramstage-y += rom_media.c ramstage-y += timer.c ramstage-y += istep_18_11.c ramstage-y += istep_18_12.c +ramstage-y += mvpd.c +ramstage-y += vpd.c endif diff --git a/src/soc/ibm/power9/mvpd.c b/src/soc/ibm/power9/mvpd.c new file mode 100644 index 00000000000..2ae3b63cf86 --- /dev/null +++ b/src/soc/ibm/power9/mvpd.c @@ -0,0 +1,181 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "tor.h" + +#define MVPD_TOC_ENTRIES 32 +#define MVPD_TOC_SIZE (MVPD_TOC_ENTRIES*sizeof(struct mvpd_toc_entry)) + +/* Each entry points to a VPD record */ +struct mvpd_toc_entry { + char name[4]; // Name without trailing NUL byte + uint16_t offset; // Offset from the beginning of partition in LE + uint8_t reserved[2]; // Unused +} __attribute__((packed)); + +static struct mvpd_toc_entry *find_record(struct mvpd_toc_entry *toc, + const char *name) +{ + int i = 0; + for (i = 0; i < MVPD_TOC_ENTRIES; ++i) { + if (memcmp(toc[i].name, name, VPD_RECORD_NAME_LEN) == 0) + return &toc[i]; + } + return NULL; +} + +/* Checks if rings ends here. End is marked by an "END" string. */ +static bool is_end_of_rings(const uint8_t *buf_left, uint32_t len_left) +{ + return (len_left < 3 || memcmp(buf_left, "END", 3) == 0); +} + +/* Finds specific ring by combination of chiplet and ring ids */ +static struct ring_hdr *find_ring_step(uint8_t chiplet_id, + uint8_t even_odd, + uint16_t ring_id, + const uint8_t **buf_left, + uint32_t *len_left) +{ + uint32_t even_odd_mask = 0; + struct ring_hdr *hdr = (struct ring_hdr *)*buf_left; + + if (*len_left < sizeof(struct ring_hdr) || hdr->magic != RS4_MAGIC) + return NULL; + + *buf_left += hdr->size; + *len_left -= hdr->size; + + switch (ring_id) { + case EX_L3_REPR: + even_odd_mask = 0x00001000; + break; + case EX_L2_REPR: + even_odd_mask = 0x00000400; + break; + case EX_L3_REFR_TIME: + case EX_L3_REFR_REPR: + even_odd_mask = 0x00000040; + break; + default: + even_odd_mask = 0; + break; + } + + even_odd_mask >>= even_odd; + + if (hdr->ring_id != ring_id) + return NULL; + if (((hdr->scan_addr >> 24) & 0xFF) != chiplet_id) + return NULL; + if (even_odd_mask != 0 && !(hdr->scan_addr & even_odd_mask)) + return NULL; + + return hdr; +} + +/* Searches for a specific ring in a keyword */ +static struct ring_hdr *find_ring(uint8_t chiplet_id, uint8_t even_odd, + uint16_t ring_id, const uint8_t *buf, + uint32_t buf_len) +{ + /* Skip version number */ + --buf_len; + ++buf; + + while (!is_end_of_rings(buf, buf_len)) { + struct ring_hdr *ring = + find_ring_step(chiplet_id, even_odd, ring_id, &buf, &buf_len); + if (ring != NULL) + return ring; + } + + return NULL; +} + +static const uint8_t *mvpd_get_keyword(const char *record_name, + const char *kwd_name, + size_t *kwd_size, void **mmaped_data) +{ + const struct region_device *mvpd_device = mvpd_device_ro(); + + uint8_t mvpd_buf[MVPD_TOC_SIZE]; + struct mvpd_toc_entry *mvpd_toc = (struct mvpd_toc_entry *)mvpd_buf; + + struct mvpd_toc_entry *toc_entry = NULL; + uint16_t record_offset = 0; + uint8_t *record_data = NULL; + uint16_t record_size = 0; + + const uint8_t *kwd = NULL; + + /* Copy all TOC at once */ + if (rdev_readat(mvpd_device, mvpd_buf, 0, sizeof(mvpd_buf)) != sizeof(mvpd_buf)) + die("Failed to read MVPD TOC!\n"); + + toc_entry = find_record(mvpd_toc, record_name); + if (toc_entry == NULL) + die("Failed to find %s MVPD record!\n", record_name); + record_offset = le16toh(toc_entry->offset); + + /* Read size of the record */ + if (rdev_readat(mvpd_device, &record_size, record_offset, + sizeof(record_size)) != sizeof(record_size)) + die("Failed to read size of %s!\n", record_name); + + record_data = rdev_mmap(mvpd_device, record_offset, record_size); + if (!record_data) + die("Failed to map %s record!\n", record_name); + + kwd = vpd_find_kwd(record_data, record_name, kwd_name, kwd_size); + if (kwd == NULL) + die("Failed to find %s keyword in %s!\n", kwd_name, record_name); + + *mmaped_data = record_data; + return kwd; +} + +bool mvpd_extract_ring(const char *record_name, const char *kwd_name, + uint8_t chiplet_id, uint8_t even_odd, uint16_t ring_id, + uint8_t *buf, uint32_t buf_size) +{ + void *mmaped_data = NULL; + + const uint8_t *rings = NULL; + size_t rings_size = 0; + + struct ring_hdr *ring = NULL; + uint32_t ring_size = 0; + + mvpd_device_init(); + + rings = mvpd_get_keyword(record_name, kwd_name, &rings_size, &mmaped_data); + if (rings == NULL) + die("Failed to find %s keyword in %s!\n", kwd_name, record_name); + + ring = find_ring(chiplet_id, even_odd, ring_id, rings, rings_size); + if (ring == NULL) { + if (rdev_munmap(mvpd_device_ro(), mmaped_data)) + die("Failed to unmap %s record!\n", record_name); + + return false; + } + + ring_size = ring->size; + if (buf_size >= ring_size) + memcpy(buf, ring, ring_size); + + if (rdev_munmap(mvpd_device_ro(), mmaped_data)) + die("Failed to unmap %s record!\n", record_name); + + return (buf_size >= ring_size); +} diff --git a/src/soc/ibm/power9/tor.h b/src/soc/ibm/power9/tor.h new file mode 100644 index 00000000000..1b4e60d4040 --- /dev/null +++ b/src/soc/ibm/power9/tor.h @@ -0,0 +1,350 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __SOC_IBM_POWER9_TOR_H +#define __SOC_IBM_POWER9_TOR_H + +/* List of all Ring IDs as they appear in data */ +enum ring_id { + /* Perv Chiplet Rings */ + PERV_FURE = 0, + PERV_GPTR = 1, + PERV_TIME = 2, + OCC_FURE = 3, + OCC_GPTR = 4, + OCC_TIME = 5, + PERV_ANA_FUNC = 6, + PERV_ANA_GPTR = 7, + PERV_PLL_GPTR = 8, + PERV_PLL_BNDY = 9, + PERV_PLL_BNDY_BUCKET_1 = 10, + PERV_PLL_BNDY_BUCKET_2 = 11, + PERV_PLL_BNDY_BUCKET_3 = 12, + PERV_PLL_BNDY_BUCKET_4 = 13, + PERV_PLL_BNDY_BUCKET_5 = 14, + PERV_PLL_FUNC = 15, + PERV_REPR = 16, + OCC_REPR = 17, + SBE_FURE = 18, + SBE_GPTR = 19, + SBE_REPR = 20, + + /* Nest Chiplet Rings - N0 */ + N0_FURE = 21, + N0_GPTR = 22, + N0_TIME = 23, + N0_NX_FURE = 24, + N0_NX_GPTR = 25, + N0_NX_TIME = 26, + N0_CXA0_FURE = 27, + N0_CXA0_GPTR = 28, + N0_CXA0_TIME = 29, + N0_REPR = 30, + N0_NX_REPR = 31, + N0_CXA0_REPR = 32, + + /* Nest Chiplet Rings - N1 */ + N1_FURE = 33, + N1_GPTR = 34, + N1_TIME = 35, + N1_IOO0_FURE = 36, + N1_IOO0_GPTR = 37, + N1_IOO0_TIME = 38, + N1_IOO1_FURE = 39, + N1_IOO1_GPTR = 40, + N1_IOO1_TIME = 41, + N1_MCS23_FURE = 42, + N1_MCS23_GPTR = 43, + N1_MCS23_TIME = 44, + N1_REPR = 45, + N1_IOO0_REPR = 46, + N1_IOO1_REPR = 47, + N1_MCS23_REPR = 48, + + /* Nest Chiplet Rings - N2 */ + N2_FURE = 49, + N2_GPTR = 50, + N2_TIME = 51, + N2_CXA1_FURE = 52, + N2_CXA1_GPTR = 53, + N2_CXA1_TIME = 54, + N2_PSI_FURE = 55, + N2_PSI_GPTR = 56, + N2_PSI_TIME = 57, + N2_REPR = 58, + N2_CXA1_REPR = 59, + /* Values 60-61 unused */ + + /* Nest Chiplet Rings - N3 */ + N3_FURE = 62, + N3_GPTR = 63, + N3_TIME = 64, + N3_MCS01_FURE = 65, + N3_MCS01_GPTR = 66, + N3_MCS01_TIME = 67, + N3_NP_FURE = 68, + N3_NP_GPTR = 69, + N3_NP_TIME = 70, + N3_REPR = 71, + N3_MCS01_REPR = 72, + N3_NP_REPR = 73, + N3_BR_FURE = 74, + + /* X-Bus Chiplet Rings */ + /* Common - apply to all instances of X-Bus */ + XB_FURE = 75, + XB_GPTR = 76, + XB_TIME = 77, + XB_IO0_FURE = 78, + XB_IO0_GPTR = 79, + XB_IO0_TIME = 80, + XB_IO1_FURE = 81, + XB_IO1_GPTR = 82, + XB_IO1_TIME = 83, + XB_IO2_FURE = 84, + XB_IO2_GPTR = 85, + XB_IO2_TIME = 86, + XB_PLL_GPTR = 87, + XB_PLL_BNDY = 88, + XB_PLL_FUNC = 89, + + /* X-Bus Chiplet Rings */ + /* X0, X1 and X2 instance specific Rings */ + XB_REPR = 90, + XB_IO0_REPR = 91, + XB_IO1_REPR = 92, + XB_IO2_REPR = 93, + /* Values 94-95 unused */ + + /* MC Chiplet Rings */ + /* Common - apply to all instances of MC */ + MC_FURE = 96, + MC_GPTR = 97, + MC_TIME = 98, + MC_IOM01_FURE = 99, + MC_IOM01_GPTR = 100, + MC_IOM01_TIME = 101, + MC_IOM23_FURE = 102, + MC_IOM23_GPTR = 103, + MC_IOM23_TIME = 104, + MC_PLL_GPTR = 105, + MC_PLL_BNDY = 106, + MC_PLL_BNDY_BUCKET_1 = 107, + MC_PLL_BNDY_BUCKET_2 = 108, + MC_PLL_BNDY_BUCKET_3 = 109, + MC_PLL_BNDY_BUCKET_4 = 110, + MC_PLL_BNDY_BUCKET_5 = 111, + MC_PLL_FUNC = 112, + + /* MC Chiplet Rings */ + /* MC01 and MC23 instance specific Rings */ + MC_REPR = 113, + /* Value 114 unused */ + MC_IOM23_REPR = 115, + + /* OB0 Chiplet Rings */ + OB0_PLL_BNDY = 116, + OB0_PLL_BNDY_BUCKET_1 = 117, + OB0_PLL_BNDY_BUCKET_2 = 118, + OB0_GPTR = 119, + OB0_TIME = 120, + OB0_PLL_GPTR = 121, + OB0_FURE = 122, + OB0_PLL_BNDY_BUCKET_3 = 123, + + /* OB0 Chiplet instance specific Ring */ + OB0_REPR = 124, + + /* OB1 Chiplet Rings */ + OB1_PLL_BNDY = 125, + OB1_PLL_BNDY_BUCKET_1 = 126, + OB1_PLL_BNDY_BUCKET_2 = 127, + OB1_GPTR = 128, + OB1_TIME = 129, + OB1_PLL_GPTR = 130, + OB1_FURE = 131, + OB1_PLL_BNDY_BUCKET_3 = 132, + + /* OB1 Chiplet instance specific Ring */ + OB1_REPR = 133, + + /* OB2 Chiplet Rings */ + OB2_PLL_BNDY = 134, + OB2_PLL_BNDY_BUCKET_1 = 135, + OB2_PLL_BNDY_BUCKET_2 = 136, + OB2_GPTR = 137, + OB2_TIME = 138, + OB2_PLL_GPTR = 139, + OB2_FURE = 140, + OB2_PLL_BNDY_BUCKET_3 = 141, + + /* OB2 Chiplet instance specific Ring */ + OB2_REPR = 142, + + /* OB3 Chiplet Rings */ + OB3_PLL_BNDY = 143, + OB3_PLL_BNDY_BUCKET_1 = 144, + OB3_PLL_BNDY_BUCKET_2 = 145, + OB3_GPTR = 146, + OB3_TIME = 147, + OB3_PLL_GPTR = 148, + OB3_FURE = 149, + OB3_PLL_BNDY_BUCKET_3 = 150, + + /* OB3 Chiplet instance specific Rings */ + OB3_REPR = 151, + + /* Values 152-153 unused */ + + /* PCI Chiplet Rings */ + /* PCI0 Common Rings */ + PCI0_FURE = 154, + PCI0_GPTR = 155, + PCI0_TIME = 156, + PCI0_PLL_BNDY = 157, + PCI0_PLL_GPTR = 158, + /* Instance specific Rings */ + PCI0_REPR = 159, + + /* PCI1 Common Rings */ + PCI1_FURE = 160, + PCI1_GPTR = 161, + PCI1_TIME = 162, + PCI1_PLL_BNDY = 163, + PCI1_PLL_GPTR = 164, + /* Instance specific Rings */ + PCI1_REPR = 165, + + /* PCI2 Common Rings */ + PCI2_FURE = 166, + PCI2_GPTR = 167, + PCI2_TIME = 168, + PCI2_PLL_BNDY = 169, + PCI2_PLL_GPTR = 170, + /* Instance specific Rings */ + PCI2_REPR = 171, + + /* Quad Chiplet Rings */ + /* Common - apply to all Quad instances */ + EQ_FURE = 172, + EQ_GPTR = 173, + EQ_TIME = 174, + EQ_INEX = 175, + EX_L3_FURE = 176, + EX_L3_GPTR = 177, + EX_L3_TIME = 178, + EX_L2_MODE = 179, + EX_L2_FURE = 180, + EX_L2_GPTR = 181, + EX_L2_TIME = 182, + EX_L3_REFR_FURE = 183, + EX_L3_REFR_GPTR = 184, + EX_L3_REFR_TIME = 185, + EQ_ANA_FUNC = 186, + EQ_ANA_GPTR = 187, + EQ_DPLL_FUNC = 188, + EQ_DPLL_GPTR = 189, + EQ_DPLL_MODE = 190, + EQ_ANA_BNDY = 191, + EQ_ANA_BNDY_BUCKET_0 = 192, + EQ_ANA_BNDY_BUCKET_1 = 193, + EQ_ANA_BNDY_BUCKET_2 = 194, + EQ_ANA_BNDY_BUCKET_3 = 195, + EQ_ANA_BNDY_BUCKET_4 = 196, + EQ_ANA_BNDY_BUCKET_5 = 197, + EQ_ANA_BNDY_BUCKET_6 = 198, + EQ_ANA_BNDY_BUCKET_7 = 199, + EQ_ANA_BNDY_BUCKET_8 = 200, + EQ_ANA_BNDY_BUCKET_9 = 201, + EQ_ANA_BNDY_BUCKET_10 = 202, + EQ_ANA_BNDY_BUCKET_11 = 203, + EQ_ANA_BNDY_BUCKET_12 = 204, + EQ_ANA_BNDY_BUCKET_13 = 205, + EQ_ANA_BNDY_BUCKET_14 = 206, + EQ_ANA_BNDY_BUCKET_15 = 207, + EQ_ANA_BNDY_BUCKET_16 = 208, + EQ_ANA_BNDY_BUCKET_17 = 209, + EQ_ANA_BNDY_BUCKET_18 = 210, + EQ_ANA_BNDY_BUCKET_19 = 211, + EQ_ANA_BNDY_BUCKET_20 = 212, + EQ_ANA_BNDY_BUCKET_21 = 213, + EQ_ANA_BNDY_BUCKET_22 = 214, + EQ_ANA_BNDY_BUCKET_23 = 215, + EQ_ANA_BNDY_BUCKET_24 = 216, + EQ_ANA_BNDY_BUCKET_25 = 217, + EQ_ANA_BNDY_BUCKET_L3DCC = 218, + EQ_ANA_MODE = 219, + + /* Quad Chiplet Rings */ + /* EQ0 - EQ5 instance specific Rings */ + EQ_REPR = 220, + EX_L3_REPR = 221, + EX_L2_REPR = 222, + EX_L3_REFR_REPR = 223, + + /* Core Chiplet Rings */ + /* Common - apply to all Core instances */ + EC_FUNC = 224, + EC_GPTR = 225, + EC_TIME = 226, + EC_MODE = 227, + + /* Core Chiplet Rings */ + /* EC0 - EC23 instance specific Ring */ + EC_REPR = 228, + + /* Values 229-230 unused */ + + /* Core Chiplet Rings */ + /* ABIST engine mode */ + EC_ABST = 231, + + /* Additional rings for Nimbus DD2 */ + EQ_ANA_BNDY_BUCKET_26 = 232, + EQ_ANA_BNDY_BUCKET_27 = 233, + EQ_ANA_BNDY_BUCKET_28 = 234, + EQ_ANA_BNDY_BUCKET_29 = 235, + EQ_ANA_BNDY_BUCKET_30 = 236, + EQ_ANA_BNDY_BUCKET_31 = 237, + EQ_ANA_BNDY_BUCKET_32 = 238, + EQ_ANA_BNDY_BUCKET_33 = 239, + EQ_ANA_BNDY_BUCKET_34 = 240, + EQ_ANA_BNDY_BUCKET_35 = 241, + EQ_ANA_BNDY_BUCKET_36 = 242, + EQ_ANA_BNDY_BUCKET_37 = 243, + EQ_ANA_BNDY_BUCKET_38 = 244, + EQ_ANA_BNDY_BUCKET_39 = 245, + EQ_ANA_BNDY_BUCKET_40 = 246, + EQ_ANA_BNDY_BUCKET_41 = 247, + + /* EQ Inex ring bucket */ + EQ_INEX_BUCKET_1 = 248, + EQ_INEX_BUCKET_2 = 249, + EQ_INEX_BUCKET_3 = 250, + EQ_INEX_BUCKET_4 = 251, + + /* CMSK ring */ + EC_CMSK = 252, + + /* Perv PLL filter override rings */ + PERV_PLL_BNDY_FLT_1 = 253, + PERV_PLL_BNDY_FLT_2 = 254, + PERV_PLL_BNDY_FLT_3 = 255, + PERV_PLL_BNDY_FLT_4 = 256, + + /* MC OMI rings */ + MC_OMI0_FURE = 257, + MC_OMI0_GPTR = 258, + MC_OMI1_FURE = 259, + MC_OMI1_GPTR = 260, + MC_OMI2_FURE = 261, + MC_OMI2_GPTR = 262, + MC_OMIPPE_FURE = 263, + MC_OMIPPE_GPTR = 264, + MC_OMIPPE_TIME = 265, + /* Instance rings */ + MC_OMIPPE_REPR = 266, + + NUM_RING_IDS = 267 +}; + +#endif // __SOC_IBM_POWER9_TOR_H From 6c44e8f62db8903575d5c00ff60e1c914583feb4 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Mon, 16 Aug 2021 18:25:04 +0300 Subject: [PATCH 060/213] soc/power9/tor.c: implement TOR operations These are: * find ring in a TOR (tor_access_ring() with GET_RING_DATA) * find place to put ring into a TOR (tor_access_ring() with GET_RING_PUT_INFO) * build TOR of suitable rings out of CP00 record of MVPD (tor_fetch_and_insert_vpd_rings()) Change-Id: I3f7bb1a7d932749a573dfdb721b3f3df7ab3ef90 Signed-off-by: Sergii Dmytruk --- src/soc/ibm/power9/Makefile.inc | 1 + src/soc/ibm/power9/tor.c | 843 ++++++++++++++++++++++++++++++++ src/soc/ibm/power9/tor.h | 107 ++++ 3 files changed, 951 insertions(+) create mode 100644 src/soc/ibm/power9/tor.c diff --git a/src/soc/ibm/power9/Makefile.inc b/src/soc/ibm/power9/Makefile.inc index 9f4854ce44b..80864f3865f 100644 --- a/src/soc/ibm/power9/Makefile.inc +++ b/src/soc/ibm/power9/Makefile.inc @@ -31,5 +31,6 @@ ramstage-y += istep_18_11.c ramstage-y += istep_18_12.c ramstage-y += mvpd.c ramstage-y += vpd.c +ramstage-y += tor.c endif diff --git a/src/soc/ibm/power9/tor.c b/src/soc/ibm/power9/tor.c new file mode 100644 index 00000000000..a6386d46df1 --- /dev/null +++ b/src/soc/ibm/power9/tor.c @@ -0,0 +1,843 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include "tor.h" + +#include +#include +#include + +#include +#include +#include +#include + +#define UNDEFINED_PPE_TYPE ((uint8_t)0xFF) + +#define NUM_OF_CORES ((uint8_t)24) +#define NUM_OF_QUADS ((uint8_t)6) +#define CORES_PER_QUAD (NUM_OF_CORES / NUM_OF_QUADS) + +#define TOR_VERSION 7 + +#define TOR_MAGIC ((uint32_t)0x544F52) // "TOR" +#define TOR_MAGIC_HW ((uint32_t)0x544F5248) // "TORH" +#define TOR_MAGIC_SGPE ((uint32_t)0x544F5247) // "TORG" +#define TOR_MAGIC_CME ((uint32_t)0x544F524D) // "TORM" +#define TOR_MAGIC_OVLY ((uint32_t)0x544F524C) // "TORL" + +#define NUM_CHIP_TYPES 4 + +/* + * Structure of a TOR section: + * - Header (tor_hdr) + * - Payload + * - either array of PPE blocks that point to ring sections (HW TOR) + * - or ring section + * + * PPE block: + * - uint32_t -- offset (relative to payload) in BE to a ring section + * - uint32_t -- size in BE + * + * Ring section: + * - Array of chiplet blocks (we assume size of one for non-overlay rings) + * - Chiplet block + * - Array of TOR slots (value of 0 means "no such ring") + * - Array of rings pointed to by TOR slots + * + * Chiplet block: + * - uint32_t -- offset (relative to payload) to slots for common rings in BE + * - uint32_t -- offset (relative to payload) to slots for instance rings in BE + * + * TOR slot (`(max_instance_id - min_instance_id + 1)*ring_count` of them): + * - uint16_t -- offset (relative to chiplet block) to a ring in BE + */ + +enum chiplet_type { + PERV_TYPE, + N0_TYPE, + N1_TYPE, + N2_TYPE, + N3_TYPE, + XB_TYPE, + MC_TYPE, + OB0_TYPE, + OB1_TYPE, + OB2_TYPE, + OB3_TYPE, + PCI0_TYPE, + PCI1_TYPE, + PCI2_TYPE, + EQ_TYPE, + EC_TYPE, + SBE_NOOF_CHIPLETS +}; + +/* Description of a PPE block */ +struct tor_ppe_block { + uint32_t offset; + uint32_t size; +} __attribute__((packed)); + +/* Offsets to different kinds of rings within a section */ +struct tor_chiplet_block { + uint32_t common_offset; + uint32_t instance_offset; +} __attribute__((packed)); + +/* Static information about a ring to be searched for by the name */ +struct ring_info { + enum ring_id ring_id; + uint8_t min_instance_id; // Lower bound of instance id range + uint8_t max_instance_id; // Upper bound of instance id range +}; + +/* Static information about a chiplet */ +struct chiplet_info { + uint8_t common_rings_count; // [0..common_rings_count) + uint8_t instance_rings_count; // [0..instance_rings_count) +}; + +const struct ring_info EQ_COMMON_RING_INFO[] = { + {EQ_FURE , 0x10, 0x10}, + {EQ_GPTR , 0x10, 0x10}, + {EQ_TIME , 0x10, 0x10}, + {EQ_INEX , 0x10, 0x10}, + {EX_L3_FURE , 0x10, 0x10}, + {EX_L3_GPTR , 0x10, 0x10}, + {EX_L3_TIME , 0x10, 0x10}, + {EX_L2_MODE , 0x10, 0x10}, + {EX_L2_FURE , 0x10, 0x10}, + {EX_L2_GPTR , 0x10, 0x10}, + {EX_L2_TIME , 0x10, 0x10}, + {EX_L3_REFR_FURE , 0x10, 0x10}, + {EX_L3_REFR_GPTR , 0x10, 0x10}, + {EQ_ANA_FUNC , 0x10, 0x10}, + {EQ_ANA_GPTR , 0x10, 0x10}, + {EQ_DPLL_FUNC , 0x10, 0x10}, + {EQ_DPLL_GPTR , 0x10, 0x10}, + {EQ_DPLL_MODE , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_0 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_1 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_2 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_3 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_4 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_5 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_6 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_7 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_8 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_9 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_10 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_11 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_12 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_13 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_14 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_15 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_16 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_17 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_18 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_19 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_20 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_21 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_22 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_23 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_24 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_25 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_L3DCC , 0x10, 0x10}, + {EQ_ANA_MODE , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_26 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_27 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_28 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_29 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_30 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_31 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_32 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_33 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_34 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_35 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_36 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_37 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_38 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_39 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_40 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_41 , 0x10, 0x10}, + {EQ_INEX_BUCKET_1 , 0x10, 0x10}, + {EQ_INEX_BUCKET_2 , 0x10, 0x10}, + {EQ_INEX_BUCKET_3 , 0x10, 0x10}, + {EQ_INEX_BUCKET_4 , 0x10, 0x10}, +}; + +const struct ring_info EQ_INSTANCE_RING_INFO[] = { + {EQ_REPR , 0x10, 0x1B}, + {EX_L3_REPR , 0x10, 0x1B}, + {EX_L2_REPR , 0x10, 0x1B}, + {EX_L3_REFR_REPR , 0x10, 0x1B}, + {EX_L3_REFR_TIME , 0x10, 0x1B}, +}; + +static const struct chiplet_info EQ_CHIPLET_INFO = { + 66, // 66 common rings for Quad chiplet. + 5, // 5 instance specific rings for each EQ chiplet +}; + +const struct ring_info EC_COMMON_RING_INFO[] = { + {EC_FUNC , 0x20, 0x20}, + {EC_GPTR , 0x20, 0x20}, + {EC_TIME , 0x20, 0x20}, + {EC_MODE , 0x20, 0x20}, + {EC_ABST , 0x20, 0x20}, + {EC_CMSK , 0xFF, 0xFF}, +}; + +const struct ring_info EC_INSTANCE_RING_INFO[] = { + {EC_REPR , 0x20, 0x37}, +}; + +static const struct chiplet_info EC_CHIPLET_INFO = { + 6, // 6 common rings for Core chiplet + 1, // 1 instance specific ring for each Core chiplet +}; + +static const struct ring_query RING_QUERIES_PDG[] = { + /* ring_id ring_class kwd_name instance_id */ + /* min max */ + { PERV_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x01 , 0x01 }, + { PERV_TIME , RING_CLASS_NEST , "#G" , 0x01 , 0x01 }, + { OCC_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x01 , 0x01 }, + { OCC_TIME , RING_CLASS_NEST , "#G" , 0x01 , 0x01 }, + { SBE_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x01 , 0x01 }, + { PERV_ANA_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x01 , 0x01 }, + { PERV_PLL_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x01 , 0x01 }, + { N0_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x02 , 0x02 }, + { N0_TIME , RING_CLASS_NEST , "#G" , 0x02 , 0x02 }, + { N0_NX_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x02 , 0x02 }, + { N0_NX_TIME , RING_CLASS_NEST , "#G" , 0x02 , 0x02 }, + { N0_CXA0_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x02 , 0x02 }, + { N0_CXA0_TIME , RING_CLASS_NEST , "#G" , 0x02 , 0x02 }, + { N1_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x03 , 0x03 }, + { N1_TIME , RING_CLASS_NEST , "#G" , 0x03 , 0x03 }, + { N1_IOO0_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x03 , 0x03 }, + { N1_IOO0_TIME , RING_CLASS_NEST , "#G" , 0x03 , 0x03 }, + { N1_IOO1_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x03 , 0x03 }, + { N1_IOO1_TIME , RING_CLASS_NEST , "#G" , 0x03 , 0x03 }, + { N1_MCS23_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x03 , 0x03 }, + { N1_MCS23_TIME , RING_CLASS_NEST , "#G" , 0x03 , 0x03 }, + { N2_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x04 , 0x04 }, + { N2_TIME , RING_CLASS_NEST , "#G" , 0x04 , 0x04 }, + { N2_CXA1_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x04 , 0x04 }, + { N2_CXA1_TIME , RING_CLASS_NEST , "#G" , 0x04 , 0x04 }, + { N2_PSI_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x04 , 0x04 }, + { N3_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x05 , 0x05 }, + { N3_TIME , RING_CLASS_NEST , "#G" , 0x05 , 0x05 }, + { N3_MCS01_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x05 , 0x05 }, + { N3_MCS01_TIME , RING_CLASS_NEST , "#G" , 0x05 , 0x05 }, + { N3_NP_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x05 , 0x05 }, + { N3_NP_TIME , RING_CLASS_NEST , "#G" , 0x05 , 0x05 }, + { XB_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x06 , 0x06 }, + { XB_TIME , RING_CLASS_NEST , "#G" , 0x06 , 0x06 }, + { XB_IO0_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x06 , 0x06 }, + { XB_IO0_TIME , RING_CLASS_NEST , "#G" , 0x06 , 0x06 }, + { XB_IO1_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x06 , 0x06 }, + { XB_IO1_TIME , RING_CLASS_NEST , "#G" , 0x06 , 0x06 }, + { XB_IO2_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x06 , 0x06 }, + { XB_IO2_TIME , RING_CLASS_NEST , "#G" , 0x06 , 0x06 }, + { XB_PLL_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x06 , 0x06 }, + { MC_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x07 , 0xFF }, + { MC_TIME , RING_CLASS_NEST , "#G" , 0x07 , 0xFF }, + { MC_IOM01_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x07 , 0xFF }, + { MC_IOM23_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x07 , 0xFF }, + { MC_PLL_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x07 , 0xFF }, + { MC_OMI0_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x07 , 0xFF }, + { MC_OMI1_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x07 , 0xFF }, + { MC_OMI2_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x07 , 0xFF }, + { MC_OMIPPE_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x07 , 0xFF }, + { MC_OMIPPE_TIME , RING_CLASS_NEST , "#G" , 0x07 , 0xFF }, + { OB0_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x09 , 0x09 }, + { OB0_TIME , RING_CLASS_NEST , "#G" , 0x09 , 0x09 }, + { OB0_PLL_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x09 , 0x09 }, + { OB1_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x0A , 0x0A }, + { OB1_TIME , RING_CLASS_NEST , "#G" , 0x0A , 0x0A }, + { OB1_PLL_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x0A , 0x0A }, + { OB2_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x0B , 0x0B }, + { OB2_TIME , RING_CLASS_NEST , "#G" , 0x0B , 0x0B }, + { OB2_PLL_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x0B , 0x0B }, + { OB3_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x0C , 0x0C }, + { OB3_TIME , RING_CLASS_NEST , "#G" , 0x0C , 0x0C }, + { OB3_PLL_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x0C , 0x0C }, + { PCI0_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x0D , 0x0D }, + { PCI0_TIME , RING_CLASS_NEST , "#G" , 0x0D , 0x0D }, + { PCI0_PLL_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x0D , 0x0D }, + { PCI1_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x0E , 0x0E }, + { PCI1_TIME , RING_CLASS_NEST , "#G" , 0x0E , 0x0E }, + { PCI1_PLL_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x0E , 0x0E }, + { PCI2_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x0F , 0x0F }, + { PCI2_TIME , RING_CLASS_NEST , "#G" , 0x0F , 0x0F }, + { PCI2_PLL_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x0F , 0x0F }, + { EQ_GPTR , RING_CLASS_GPTR_EQ , "#G" , 0x10 , 0xFF }, + { EQ_TIME , RING_CLASS_EQ , "#G" , 0x10 , 0xFF }, + { EX_L3_GPTR , RING_CLASS_GPTR_EX , "#G" , 0x10 , 0xFF }, + { EX_L3_TIME , RING_CLASS_EX , "#G" , 0x10 , 0xFF }, + { EX_L2_GPTR , RING_CLASS_GPTR_EX , "#G" , 0x10 , 0xFF }, + { EX_L2_TIME , RING_CLASS_EX , "#G" , 0x10 , 0xFF }, + { EX_L3_REFR_GPTR , RING_CLASS_GPTR_EX , "#G" , 0x10 , 0xFF }, + { EQ_ANA_GPTR , RING_CLASS_GPTR_EQ , "#G" , 0x10 , 0xFF }, + { EQ_DPLL_GPTR , RING_CLASS_GPTR_EQ , "#G" , 0x10 , 0xFF }, + { EC_GPTR , RING_CLASS_GPTR_EC , "#G" , 0x20 , 0xFF }, + { EC_TIME , RING_CLASS_EC , "#G" , 0x20 , 0xFF }, +}; + +static const struct ring_query RING_QUERIES_PDR[] = { + /* ring_id ring_class kwd_name instance_id */ + /* min max */ + { PERV_REPR , RING_CLASS_NEST , "#R" , 0x01 , 0x01 }, + { OCC_REPR , RING_CLASS_NEST , "#R" , 0x01 , 0x01 }, + { SBE_REPR , RING_CLASS_NEST , "#R" , 0x01 , 0x01 }, + { N0_REPR , RING_CLASS_NEST , "#R" , 0x02 , 0x02 }, + { N0_NX_REPR , RING_CLASS_NEST , "#R" , 0x02 , 0x02 }, + { N0_CXA0_REPR , RING_CLASS_NEST , "#R" , 0x02 , 0x02 }, + { N1_REPR , RING_CLASS_NEST , "#R" , 0x03 , 0x03 }, + { N1_IOO0_REPR , RING_CLASS_NEST , "#R" , 0x03 , 0x03 }, + { N1_IOO1_REPR , RING_CLASS_NEST , "#R" , 0x03 , 0x03 }, + { N1_MCS23_REPR , RING_CLASS_NEST , "#R" , 0x03 , 0x03 }, + { N2_REPR , RING_CLASS_NEST , "#R" , 0x04 , 0x04 }, + { N2_CXA1_REPR , RING_CLASS_NEST , "#R" , 0x04 , 0x04 }, + { N3_REPR , RING_CLASS_NEST , "#R" , 0x05 , 0x05 }, + { N3_MCS01_REPR , RING_CLASS_NEST , "#R" , 0x05 , 0x05 }, + { N3_NP_REPR , RING_CLASS_NEST , "#R" , 0x05 , 0x05 }, + { XB_REPR , RING_CLASS_NEST , "#R" , 0x06 , 0x06 }, + { XB_IO0_REPR , RING_CLASS_NEST , "#R" , 0x06 , 0x06 }, + { XB_IO1_REPR , RING_CLASS_NEST , "#R" , 0x06 , 0x06 }, + { XB_IO2_REPR , RING_CLASS_NEST , "#R" , 0x06 , 0x06 }, + { MC_REPR , RING_CLASS_NEST , "#R" , 0x07 , 0x08 }, + { MC_IOM23_REPR , RING_CLASS_NEST , "#R" , 0x07 , 0x08 }, + { MC_OMIPPE_REPR , RING_CLASS_NEST , "#R" , 0x07 , 0x08 }, + { OB0_REPR , RING_CLASS_NEST , "#R" , 0x09 , 0x09 }, + { OB1_REPR , RING_CLASS_NEST , "#R" , 0x0A , 0x0A }, + { OB2_REPR , RING_CLASS_NEST , "#R" , 0x0B , 0x0B }, + { OB3_REPR , RING_CLASS_NEST , "#R" , 0x0C , 0x0C }, + { PCI0_REPR , RING_CLASS_NEST , "#R" , 0x0D , 0x0D }, + { PCI1_REPR , RING_CLASS_NEST , "#R" , 0x0E , 0x0E }, + { PCI2_REPR , RING_CLASS_NEST , "#R" , 0x0F , 0x0F }, + { EQ_REPR , RING_CLASS_EQ_INS , "#R" , 0x10 , 0x15 }, + { EX_L3_REFR_TIME , RING_CLASS_EX_INS , "#R" , 0x10 , 0x15 }, + { EX_L3_REPR , RING_CLASS_EX_INS , "#R" , 0x10 , 0x15 }, + { EX_L2_REPR , RING_CLASS_EX_INS , "#R" , 0x10 , 0x15 }, + { EX_L3_REFR_REPR , RING_CLASS_EX_INS , "#R" , 0x10 , 0x15 }, + { EC_REPR , RING_CLASS_EC_INS , "#R" , 0x20 , 0x37 }, +}; + +/* Retrieves properties for specified kind of TOR */ +static void get_section_properties(uint32_t tor_magic, + uint8_t chiplet_type, + const struct chiplet_info **chiplet_info, + const struct ring_info **common_ring_info, + const struct ring_info **instance_ring_info) +{ + if (tor_magic == TOR_MAGIC_CME) + chiplet_type = EC_TYPE; + else if (tor_magic == TOR_MAGIC_SGPE) + chiplet_type = EQ_TYPE; + else if (tor_magic != TOR_MAGIC_OVLY) + die("Unexpected TOR type\n"); + + switch (chiplet_type) { + case EC_TYPE: + *chiplet_info = &EC_CHIPLET_INFO; + *common_ring_info = EC_COMMON_RING_INFO; + *instance_ring_info = EC_INSTANCE_RING_INFO; + break; + case EQ_TYPE: + *chiplet_info = &EQ_CHIPLET_INFO; + *common_ring_info = EQ_COMMON_RING_INFO; + *instance_ring_info = EQ_INSTANCE_RING_INFO; + break; + default: + *chiplet_info = NULL; + *common_ring_info = NULL; + *instance_ring_info = NULL; + break; + }; +} + +/* Either reads ring into the buffer (on GET_RING_DATA) or treats it as an + instance of ring_put_info (on GET_RING_PUT_INFO) */ +static bool ring_access(struct tor_hdr *ring_section, uint16_t ring_id, + uint8_t ring_variant, uint8_t instance_id, + void *data_buf, uint32_t *data_buf_size, + enum ring_operation operation) +{ + const bool overlay = (be32toh(ring_section->magic) == TOR_MAGIC_OVLY); + uint8_t i = 0; + uint8_t chiplet_count = (overlay ? SBE_NOOF_CHIPLETS : 1); + uint8_t max_variants = (overlay ? 1 : NUM_RING_VARIANTS); + + assert(ring_section->version == TOR_VERSION); + + for (i = 0; i < chiplet_count * 2; ++i) { + const uint8_t chiplet_idx = i / 2; + const bool instance_rings = (i % 2 == 1); + + uint32_t tor_slot_idx = 0; + uint8_t instance = 0; + + const struct ring_info *ring_info; + uint8_t ring_count; + struct tor_chiplet_block *blocks; + uint32_t chiplet_offset; + uint8_t variant_count; + + const struct chiplet_info *chiplet_info; + const struct ring_info *common_ring_info; + const struct ring_info *instance_ring_info; + + get_section_properties(be32toh(ring_section->magic), + chiplet_idx, &chiplet_info, + &common_ring_info, &instance_ring_info); + if (chiplet_info == NULL) + continue; + + ring_info = instance_rings ? instance_ring_info : common_ring_info; + + ring_count = instance_rings + ? chiplet_info->instance_rings_count + : chiplet_info->common_rings_count; + blocks = (void *)ring_section->data; + chiplet_offset = instance_rings + ? be32toh(blocks[chiplet_idx].instance_offset) + : be32toh(blocks[chiplet_idx].common_offset); + /* Instance rings have only BASE variant and both EC and EQ have + * all of them and their order matches enumeration values */ + variant_count = (instance_rings ? 1 : max_variants); + + for (instance = ring_info->min_instance_id; + instance <= ring_info->max_instance_id; + ++instance) { + uint8_t ring_idx; + for (ring_idx = 0; ring_idx < ring_count; ++ring_idx) { + if (ring_info[ring_idx].ring_id != ring_id || + (instance_rings && instance != instance_id)) { + /* Jump over all variants of the ring */ + tor_slot_idx += variant_count; + continue; + } + + if (variant_count > 1) + /* Skip to the slot with the variant */ + tor_slot_idx += ring_variant; + + uint16_t *tor_slots = + (void *)&ring_section->data[chiplet_offset]; + if (operation == GET_RING_DATA) { + uint16_t slot_value = be16toh(tor_slots[tor_slot_idx]); + uint32_t ring_slot_offset = chiplet_offset + slot_value; + struct ring_hdr *ring = + (void *)&ring_section->data[ring_slot_offset]; + uint32_t ring_size = be16toh(ring->size); + + if (slot_value == 0) + /* Didn't find the ring */ + return false; + + if (ring->magic != htobe16(RS4_MAGIC)) { + printk(BIOS_EMERG, "chiplet_offset = 0x%08x\n", chiplet_offset); + printk(BIOS_EMERG, "tor_slot_idx = 0x%08x\n", tor_slot_idx); + printk(BIOS_EMERG, "slot_value = 0x%08x\n", slot_value); + printk(BIOS_EMERG, "ring_slot_offset = 0x%08x\n", ring_slot_offset); + printk(BIOS_EMERG, "Full section:\n"); + hexdump(ring_section, ring_section->size); + printk(BIOS_EMERG, "Ring:\n"); + hexdump(ring, ring_size); + die("Got junk instead of a ring"); + } + + if (*data_buf_size != 0 && *data_buf_size >= ring_size) + memcpy(data_buf, ring, ring_size); + *data_buf_size = ring_size; + } else if (operation == GET_RING_PUT_INFO) { + struct ring_put_info *put_info = data_buf; + + if (tor_slots[tor_slot_idx] != 0) + die("Slot isn't empty!"); + + if (*data_buf_size != sizeof(struct ring_put_info)) + die("Invalid buffer for GET_RING_PUT_INFO!"); + + put_info->chiplet_offset = sizeof(*ring_section) + + chiplet_offset; + put_info->ring_slot_offset = + (uint8_t*)&tor_slots[tor_slot_idx] - + (uint8_t*)ring_section; + } + return true; + } + } + } + + return false; +} + +/* A wrapper around ring_access() that does safety checks and tor traversal if + necessary */ +bool tor_access_ring(struct tor_hdr *ring_section, uint16_t ring_id, + enum ppe_type ppe_type, uint8_t ring_variant, + uint8_t instance_id, void *data_buf, + uint32_t *data_buf_size, enum ring_operation operation) +{ + if (be32toh(ring_section->magic) >> 8 != TOR_MAGIC || + ring_section->version == 0 || + ring_section->version > TOR_VERSION || + ring_section->chip_type >= NUM_CHIP_TYPES) + die("Invalid call to tor_access_ring()!"); + + if (operation == GET_RING_DATA || operation == GET_RING_PUT_INFO) { + struct tor_hdr *section = ring_section; + if (be32toh(ring_section->magic) == TOR_MAGIC_HW) { + struct tor_ppe_block *tor_ppe_block = (void *)ring_section->data; + const uint32_t section_offset = be32toh(tor_ppe_block[ppe_type].offset); + section = (void *)&ring_section->data[section_offset]; + } + + return ring_access(section, ring_id, ring_variant, instance_id, + data_buf, data_buf_size, operation); + } + + if (operation == GET_PPE_LEVEL_RINGS) { + uint32_t section_size = 0; + uint32_t section_offset = 0; + struct tor_ppe_block *tor_ppe_block = (void *)ring_section->data; + + assert(ring_id == UNDEFINED_RING_ID); + assert(ring_variant == UNDEFINED_RING_VARIANT); + assert(instance_id == UNDEFINED_INSTANCE_ID); + assert(be32toh(ring_section->magic) == TOR_MAGIC_HW); + + section_size = be32toh(tor_ppe_block[ppe_type].size); + section_offset = be32toh(tor_ppe_block[ppe_type].offset); + + if (*data_buf_size != 0 && *data_buf_size >= section_size) + memcpy(data_buf, &ring_section->data[section_offset], + section_size); + + *data_buf_size = section_size; + return true; + } + + die("Unhandled TOR ring access operation!"); +} + +/* Retrieves an overlay ring in both compressed and uncompressed forms */ +static bool get_overlays_ring(struct tor_hdr *overlays_section, + uint16_t ring_id, void *rs4_buf, void *raw_buf) +{ + uint32_t uncompressed_bit_size = 0; + uint32_t rs4_buf_size = 0xFFFFFFFF; + + if (!tor_access_ring(overlays_section, ring_id, UNDEFINED_PPE_TYPE, + UNDEFINED_RING_VARIANT, UNDEFINED_INSTANCE_ID, + rs4_buf, &rs4_buf_size, GET_RING_DATA)) + return false; + + rs4_decompress(raw_buf, raw_buf + MAX_RING_BUF_SIZE / 2, + MAX_RING_BUF_SIZE / 2, &uncompressed_bit_size, + (struct ring_hdr *)rs4_buf); + return true; +} + +/* Decompress ring, modify it to leave only data allowed by overlay mask and + compress back */ +static void apply_overlays_ring(struct ring_hdr *ring, uint8_t *rs4_buf, + const uint8_t *raw_buf) +{ + uint8_t *data = rs4_buf; + uint8_t *care = rs4_buf + MAX_RING_BUF_SIZE / 2; + const uint8_t *overlay = raw_buf + MAX_RING_BUF_SIZE / 2; + uint32_t uncompressed_bit_size; + + rs4_decompress(data, care, MAX_RING_BUF_SIZE / 2, &uncompressed_bit_size, ring); + + /* + * Copies bits from raw_buf into both data and care only if bit at the + * same index in overlay is set. + */ + for (uint32_t bit = 0; bit < uncompressed_bit_size; ++bit) { + const int byte_idx = bit / 8; + const uint8_t bit_mask = (0x80 >> bit % 8); + if (overlay[byte_idx] & bit_mask) { + if (raw_buf[byte_idx] & bit_mask) { + data[byte_idx] |= bit_mask; + care[byte_idx] |= bit_mask; + } else { + data[byte_idx] &= ~bit_mask; + care[byte_idx] &= ~bit_mask; + } + } + } + + rs4_compress(ring, MAX_RING_BUF_SIZE, data, care, uncompressed_bit_size, + be32toh(ring->scan_addr), be16toh(ring->ring_id)); +} + +static void apply_overlays_to_gptr(struct tor_hdr *overlays_section, + struct ring_hdr *ring, uint8_t *rs4_buf, + uint8_t *raw_buf) +{ + if (get_overlays_ring(overlays_section, be16toh(ring->ring_id), rs4_buf, + raw_buf)) { + /* raw_buf is passed from get_overlays_ring(), rs4_buf is just reused */ + apply_overlays_ring(ring, rs4_buf, raw_buf); + } +} + +static void tor_append_ring(struct tor_hdr *ring_section, + uint32_t *ring_section_size, uint16_t ring_id, + enum ppe_type ppe_type, uint8_t ring_variant, + uint8_t instance_id, struct ring_hdr *ring) +{ + uint16_t ring_offset; + uint32_t ring_size; + + struct ring_put_info put_info; + uint32_t put_info_size = sizeof(put_info); + + if (!tor_access_ring(ring_section, ring_id, ppe_type, ring_variant, + instance_id, &put_info, &put_info_size, GET_RING_PUT_INFO)) + die("Failed to find where to put a ring!"); + + if (*ring_section_size - put_info.chiplet_offset > MAX_TOR_RING_OFFSET) + die("TOR section has reached its maximum size!"); + + ring_offset = htobe16(*ring_section_size - put_info.chiplet_offset); + ring_size = be16toh(ring->size); + + memcpy((uint8_t *)ring_section + put_info.ring_slot_offset, + &ring_offset, sizeof(ring_offset)); + memcpy((uint8_t *)ring_section + *ring_section_size, ring, ring_size); + + *ring_section_size = be32toh(ring_section->size) + ring_size; + ring_section->size = htobe32(*ring_section_size); +} + +/* + * Extracts a ring from CP00 record of MVPD and appends it to the ring section + * applying overlay if necessary. All buffers must be be at least + * MAX_RING_BUF_SIZE bytes in length. Indicates result by setting *ring_status. + */ +static void tor_fetch_and_insert_vpd_ring(struct tor_hdr *ring_section, + uint32_t *ring_section_size, + const struct ring_query *query, + uint32_t max_ring_section_size, + struct tor_hdr *overlays_section, + enum ppe_type ppe_type, + uint8_t chiplet_id, + uint8_t even_odd, + uint8_t *buf1, + uint8_t *buf2, + uint8_t *buf3, + enum ring_status *ring_status) +{ + + bool success = false; + uint8_t instance_id = 0; + struct ring_hdr *ring = NULL; + + success = mvpd_extract_ring("CP00", query->kwd_name, chiplet_id, even_odd, + query->ring_id, buf1, MAX_RING_BUF_SIZE); + if (!success) { + *ring_status = RING_NOT_FOUND; + return; + } + + ring = (struct ring_hdr *)buf1; + + if (query->ring_class == RING_CLASS_GPTR_NEST || + query->ring_class == RING_CLASS_GPTR_EQ || + query->ring_class == RING_CLASS_GPTR_EX || + query->ring_class == RING_CLASS_GPTR_EC) + apply_overlays_to_gptr(overlays_section, ring, buf2, buf3); + + if (ring->magic == htobe16(RS4_MAGIC)) { + int redundant = 0; + rs4_redundant(ring, &redundant); + if (redundant) { + *ring_status = RING_REDUNDANT; + return; + } + } + + if (*ring_section_size + be16toh(ring->size) > max_ring_section_size) + die("Not enough memory to append the ring: %d > %d", + *ring_section_size + be16toh(ring->size), + max_ring_section_size); + + instance_id = chiplet_id + even_odd; + if (query->ring_class == RING_CLASS_EX_INS) + instance_id += chiplet_id - query->min_instance_id; + + tor_append_ring(ring_section, ring_section_size, query->ring_id, + ppe_type, RV_BASE, instance_id, ring); + + *ring_status = RING_FOUND; +} + +void tor_fetch_and_insert_vpd_rings(struct tor_hdr *ring_section, + uint32_t *ring_section_size, + uint32_t max_ring_section_size, + struct tor_hdr *overlays_section, + enum ppe_type ppe_type, + uint8_t *buf1, uint8_t *buf2, uint8_t *buf3) +{ + const size_t pdg_query_count = + sizeof(RING_QUERIES_PDG) / sizeof(RING_QUERIES_PDG[0]); + const size_t pdr_query_count = + sizeof(RING_QUERIES_PDR) / sizeof(RING_QUERIES_PDR[0]); + const size_t ring_query_count = pdg_query_count + pdr_query_count; + + size_t i = 0; + uint8_t eq = 0; + + const struct ring_query *eq_query = NULL; + const struct ring_query *ec_query = NULL; + + const struct ring_query *ex_queries[4]; + uint8_t ex_query_count = 0; + + /* Add all common rings */ + for (i = 0; i < ring_query_count; ++i) { + uint8_t instance = 0; + uint8_t max_instance_id = 0; + const struct ring_query *query = NULL; + + if (i < pdg_query_count) + query = &RING_QUERIES_PDG[i]; + else + query = &RING_QUERIES_PDR[i - pdg_query_count]; + + if (query->ring_class == RING_CLASS_EQ_INS || + query->ring_class == RING_CLASS_EX_INS || + query->ring_class == RING_CLASS_EC_INS) + continue; + + max_instance_id = query->max_instance_id; + /* 0xff meant multicast in Power8, but doesn't in Power9 */ + if (max_instance_id == 0xff) + max_instance_id = query->min_instance_id; + + if (ppe_type == PT_CME && + query->ring_class != RING_CLASS_EC && + query->ring_class != RING_CLASS_GPTR_EC) + continue; + + if (ppe_type == PT_SGPE && + query->ring_class != RING_CLASS_EX && + query->ring_class != RING_CLASS_EQ && + query->ring_class != RING_CLASS_GPTR_EQ && + query->ring_class != RING_CLASS_GPTR_EX) + continue; + + for (instance = query->min_instance_id; + instance <= max_instance_id; + ++instance) { + enum ring_status ring_status; + tor_fetch_and_insert_vpd_ring(ring_section, + ring_section_size, + query, + max_ring_section_size, + overlays_section, + ppe_type, + instance, + /*even_odd=*/0, + buf1, buf2, buf3, + &ring_status); + + if (ring_status == RING_NOT_FOUND) + die("Failed to insert a common ring."); + } + } + + /* Add all instance rings */ + + for (i = 0; i < pdr_query_count; ++i) { + const struct ring_query *query = &RING_QUERIES_PDR[i]; + const enum ring_class class = query->ring_class; + if (class == RING_CLASS_EQ_INS && eq_query == NULL) { + eq_query = query; + } else if (class == RING_CLASS_EX_INS && ex_query_count < 4) { + ex_queries[ex_query_count] = query; + ++ex_query_count; + } else if (class == RING_CLASS_EC_INS && ec_query == NULL) { + ec_query = query; + } + } + + for (eq = 0; eq < NUM_OF_QUADS; ++eq) { + /* EQ instances */ + if ((ppe_type == PT_SBE || ppe_type == PT_SGPE) && eq_query != NULL) { + const uint8_t instance = eq_query->min_instance_id + eq; + + enum ring_status ring_status; + + tor_fetch_and_insert_vpd_ring(ring_section, + ring_section_size, + eq_query, + max_ring_section_size, + overlays_section, + ppe_type, + instance, + /*even_odd=*/0, + buf1, buf2, buf3, + &ring_status); + + if (ring_status == RING_NOT_FOUND) + die("Failed to insert an EQ ring."); + } + + /* EX instances */ + if ((ppe_type == PT_SBE || ppe_type == PT_SGPE) && ex_query_count != 0) { + uint8_t ex = 0; + for (ex = 2 * eq; ex < 2 * (eq + 1); ++ex) { + for (i = 0; i < ex_query_count; ++i) { + const uint8_t instance = ex_queries[i]->min_instance_id + eq; + + enum ring_status ring_status; + + tor_fetch_and_insert_vpd_ring(ring_section, + ring_section_size, + ex_queries[i], + max_ring_section_size, + overlays_section, + ppe_type, + instance, + /*even_odd=*/ex % 2, + buf1, buf2, buf3, + &ring_status); + + if (ring_status == RING_NOT_FOUND) + die("Failed to insert an EC ring."); + } + } + } + + /* EC instances */ + if ((ppe_type == PT_SBE || ppe_type == PT_CME) && ec_query != NULL) { + uint8_t ec = 0; + for (ec = 4 * eq; ec < 4 * (eq + 1); ++ec) { + const uint8_t instance = ec_query->min_instance_id + ec; + + enum ring_status ring_status; + + tor_fetch_and_insert_vpd_ring(ring_section, + ring_section_size, + ec_query, + max_ring_section_size, + overlays_section, + ppe_type, + instance, + /*even_odd=*/0, + buf1, buf2, buf3, + &ring_status); + + if (ring_status == RING_NOT_FOUND) + die("Failed to insert an EC ring."); + } + } + } +} diff --git a/src/soc/ibm/power9/tor.h b/src/soc/ibm/power9/tor.h index 1b4e60d4040..0bdc2df56a1 100644 --- a/src/soc/ibm/power9/tor.h +++ b/src/soc/ibm/power9/tor.h @@ -3,6 +3,16 @@ #ifndef __SOC_IBM_POWER9_TOR_H #define __SOC_IBM_POWER9_TOR_H +#include +#include + +#define UNDEFINED_RING_ID ((uint16_t)0xFFFF) +#define UNDEFINED_RING_VARIANT ((uint8_t)0xFF) +#define UNDEFINED_INSTANCE_ID ((uint8_t)0xFF) + +#define MAX_RING_BUF_SIZE ((uint32_t)60000) +#define MAX_TOR_RING_OFFSET ((uint16_t)0xFFFF) + /* List of all Ring IDs as they appear in data */ enum ring_id { /* Perv Chiplet Rings */ @@ -347,4 +357,101 @@ enum ring_id { NUM_RING_IDS = 267 }; +/* Supported ring variants. Values match order in ring sections. */ +enum ring_variant { + RV_BASE, + RV_CC, + RV_RL, // Kernel and user protection + RV_RL2, // Kernel only protection + RV_RL3, // Rugby v4 + RV_RL4, // Java performance + RV_RL5, // Spare + NUM_RING_VARIANTS +}; + +/* List of groups of rings */ +enum ring_class { + RING_CLASS_NEST, // Common NEST rings except GPTR #G rings + RING_CLASS_GPTR_NEST, // Common GPTR #G rings-NEST + RING_CLASS_GPTR_EQ, // Common GPTR #G rings-EQ + RING_CLASS_GPTR_EX, // Common GPTR #G rings-EX + RING_CLASS_GPTR_EC, // Common GPTR #G rings-EC + RING_CLASS_EQ, // Common EQ rings + RING_CLASS_EX, // Common EX rings + RING_CLASS_EC, // Common EC rings + RING_CLASS_EQ_INS, // Instance EQ rings + RING_CLASS_EX_INS, // Instance EX rings + RING_CLASS_EC_INS, // Instance EC rings +}; + +/* PPE types, enum values match indices inside rings section */ +enum ppe_type { + PT_SBE, + PT_CME, + PT_SGPE, +}; + +/* Available ring access operations */ +enum ring_operation { + GET_RING_DATA, + GET_RING_PUT_INFO, + GET_PPE_LEVEL_RINGS, +}; + +/* Result of calling tor_fetch_and_insert_vpd_rings() */ +enum ring_status { + RING_NOT_FOUND, + RING_FOUND, + RING_REDUNDANT, +}; + +/* Information necessary to put a ring into a ring section */ +struct ring_put_info { + uint32_t chiplet_offset; // Relative to ring section + uint32_t ring_slot_offset; // Relative to ring section +}; + +/* Describes ring search characteristics for tor_fetch_and_insert_vpd_rings() */ +struct ring_query { + enum ring_id ring_id; + enum ring_class ring_class; + char kwd_name[3]; // Keyword name + uint8_t min_instance_id; + uint8_t max_instance_id; +}; + +/* Header of a ring section */ +struct tor_hdr { + uint32_t magic; // One of TOR_MAGIC_* + uint8_t version; + uint8_t chip_type; + uint8_t dd_level; + uint8_t undefined; + uint32_t size; + uint8_t data[]; +} __attribute__((packed)); + +/* + * Either reads ring into the buffer (on GET_RING_DATA) or treats the buffer as + * an instance of ring_put_info (on GET_RING_PUT_INFO) + */ +bool tor_access_ring(struct tor_hdr *ring_section, uint16_t ring_id, + enum ppe_type ppe_type, uint8_t ring_variant, + uint8_t instance_id, void *data_buf, + uint32_t *data_buf_size, enum ring_operation operation); + +/* + * Extracts rings from CP00 record of MVPD and appends them to the ring section + * applying overlay if necessary. All buffers must be be at least + * MAX_RING_BUF_SIZE bytes in length. + */ +void tor_fetch_and_insert_vpd_rings(struct tor_hdr *ring_section, + uint32_t *ring_section_size, + uint32_t max_ring_section_size, + struct tor_hdr *overlays_section, + enum ppe_type ppe_type, + uint8_t *buf1, + uint8_t *buf2, + uint8_t *buf3); + #endif // __SOC_IBM_POWER9_TOR_H From f2011d3202798f2fb43db42c68a35d8b08180d4d Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Thu, 5 Aug 2021 01:54:50 +0300 Subject: [PATCH 061/213] soc/power9/mvpd.c: add mvpd_extract_keyword() Change-Id: I1a6dfa7fbd90bb9528b70d8f469c4a66f62697a1 Signed-off-by: Sergii Dmytruk --- src/include/cpu/power/mvpd.h | 6 ++++++ src/soc/ibm/power9/mvpd.c | 30 ++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/src/include/cpu/power/mvpd.h b/src/include/cpu/power/mvpd.h index 45561bedcaf..0e93c400a7e 100644 --- a/src/include/cpu/power/mvpd.h +++ b/src/include/cpu/power/mvpd.h @@ -14,6 +14,12 @@ void mvpd_device_unmount(void); const struct region_device *mvpd_device_ro(void); +/* Finds a specific keyword in MVPD partition and extracts it. *size is updated + * to reflect needed or used space in the buffer. */ +bool mvpd_extract_keyword(const char *record_name, const char *kwd_name, + uint8_t *buf, uint32_t *size); + +/* Finds a specific ring in MVPD partition and extracts it */ bool mvpd_extract_ring(const char *record_name, const char *kwd_name, uint8_t chiplet_id, uint8_t even_odd, uint16_t ring_id, uint8_t *buf, uint32_t buf_size); diff --git a/src/soc/ibm/power9/mvpd.c b/src/soc/ibm/power9/mvpd.c index 2ae3b63cf86..82cd36b9a95 100644 --- a/src/soc/ibm/power9/mvpd.c +++ b/src/soc/ibm/power9/mvpd.c @@ -144,6 +144,36 @@ static const uint8_t *mvpd_get_keyword(const char *record_name, return kwd; } +bool mvpd_extract_keyword(const char *record_name, const char *kwd_name, + uint8_t *buf, uint32_t *size) +{ + void *mmaped_data = NULL; + + const uint8_t *kwd = NULL; + size_t kwd_size = 0; + bool copied_data = false; + + mvpd_device_init(); + + kwd = mvpd_get_keyword(record_name, kwd_name, &kwd_size, &mmaped_data); + if (kwd == NULL) + die("Failed to find %s keyword in %s!\n", kwd_name, + record_name); + + if (*size >= kwd_size) { + memcpy(buf, kwd, kwd_size); + copied_data = true; + } + + *size = kwd_size; + + if (rdev_munmap(mvpd_device_ro(), mmaped_data)) + die("Failed to unmap %s record!\n", record_name); + + return copied_data; +} + +/* Finds a specific ring in MVPD partition and extracts it */ bool mvpd_extract_ring(const char *record_name, const char *kwd_name, uint8_t chiplet_id, uint8_t even_odd, uint16_t ring_id, uint8_t *buf, uint32_t buf_size) From bb55867d1a8317900b58da1fa022a5d0af90300d Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Thu, 5 Aug 2021 01:57:56 +0300 Subject: [PATCH 062/213] soc/power9/: implement istep 8.6 as powerbus.c Change-Id: I04b4a384fb0ea4bb4071ddd5f1ab3c6d1417363a Signed-off-by: Sergii Dmytruk --- src/include/cpu/power/mvpd.h | 35 +++++ src/include/cpu/power/powerbus.h | 52 +++++++ src/soc/ibm/power9/Makefile.inc | 3 + src/soc/ibm/power9/powerbus.c | 259 +++++++++++++++++++++++++++++++ src/soc/ibm/power9/scratch.h | 20 +++ 5 files changed, 369 insertions(+) create mode 100644 src/include/cpu/power/powerbus.h create mode 100644 src/soc/ibm/power9/powerbus.c create mode 100644 src/soc/ibm/power9/scratch.h diff --git a/src/include/cpu/power/mvpd.h b/src/include/cpu/power/mvpd.h index 0e93c400a7e..7fc48a71438 100644 --- a/src/include/cpu/power/mvpd.h +++ b/src/include/cpu/power/mvpd.h @@ -6,6 +6,41 @@ #include #include +/* Single bucket within #V keyword of version 3 */ +struct voltage_data { + uint16_t freq; // MHz + uint16_t vdd_voltage; + uint16_t idd_current; + uint16_t vcs_voltage; + uint16_t ics_current; +} __attribute__((__packed__)); + +/* Single bucket within #V keyword of version 3 */ +struct voltage_bucket_data { + uint8_t id; + + struct voltage_data nominal; + struct voltage_data powersave; + struct voltage_data turbo; + struct voltage_data ultra_turbo; + struct voltage_data powerbus; + + uint16_t sort_power_normal; + uint16_t sort_power_turbo; + + uint8_t reserved[6]; +} __attribute__((__packed__)); + +#define VOLTAGE_DATA_VERSION 3 +#define VOLTAGE_BUCKET_COUNT 6 + +/* #V of LRP[0-5] in MVPD */ +struct voltage_kwd { + uint8_t version; + uint8_t pnp[3]; + struct voltage_bucket_data buckets[VOLTAGE_BUCKET_COUNT]; +} __attribute__((__packed__)); + struct region_device; void mvpd_device_init(void); diff --git a/src/include/cpu/power/powerbus.h b/src/include/cpu/power/powerbus.h new file mode 100644 index 00000000000..d2598b9648a --- /dev/null +++ b/src/include/cpu/power/powerbus.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef CPU_PPC64_POWERBUS_H +#define CPU_PPC64_POWERBUS_H + +#include + +enum FABRIC_CORE_FLOOR_RATIO +{ + FABRIC_CORE_FLOOR_RATIO_RATIO_8_8 = 0x0, + FABRIC_CORE_FLOOR_RATIO_RATIO_7_8 = 0x1, + FABRIC_CORE_FLOOR_RATIO_RATIO_6_8 = 0x2, + FABRIC_CORE_FLOOR_RATIO_RATIO_5_8 = 0x3, + FABRIC_CORE_FLOOR_RATIO_RATIO_4_8 = 0x4, + FABRIC_CORE_FLOOR_RATIO_RATIO_2_8 = 0x5, +}; + +enum FABRIC_CORE_CEILING_RATIO +{ + FABRIC_CORE_CEILING_RATIO_RATIO_8_8 = 0x0, + FABRIC_CORE_CEILING_RATIO_RATIO_7_8 = 0x1, + FABRIC_CORE_CEILING_RATIO_RATIO_6_8 = 0x2, + FABRIC_CORE_CEILING_RATIO_RATIO_5_8 = 0x3, + FABRIC_CORE_CEILING_RATIO_RATIO_4_8 = 0x4, + FABRIC_CORE_CEILING_RATIO_RATIO_2_8 = 0x5, +}; + +#define NUM_EPSILON_READ_TIERS 3 +#define NUM_EPSILON_WRITE_TIERS 2 + +/* Description of PowerBus configuration */ +struct powerbus_cfg +{ + /* Data computed from #V of LRP0 in MVPD, is MHz */ + uint32_t freq_core_floor; + uint32_t freq_core_ceiling; + uint32_t fabric_freq; + + /* Derived from data above */ + enum FABRIC_CORE_FLOOR_RATIO core_floor_ratio; + enum FABRIC_CORE_CEILING_RATIO core_ceiling_ratio; + + /* Derived from all data above */ + /* ATTR_PROC_EPS_READ_CYCLES_T* */ + uint32_t eps_r[NUM_EPSILON_READ_TIERS]; + /* ATTR_PROC_EPS_WRITE_CYCLES_T* */ + uint32_t eps_w[NUM_EPSILON_WRITE_TIERS]; +}; + +const struct powerbus_cfg *powerbus_cfg(void); + +#endif // CPU_PPC64_POWERBUS_H diff --git a/src/soc/ibm/power9/Makefile.inc b/src/soc/ibm/power9/Makefile.inc index 80864f3865f..7466971c1c3 100644 --- a/src/soc/ibm/power9/Makefile.inc +++ b/src/soc/ibm/power9/Makefile.inc @@ -6,7 +6,9 @@ bootblock-y += bootblock.c bootblock-y += rom_media.c romstage-y += rom_media.c romstage-y += romstage.c +romstage-y += mvpd.c romstage-y += vpd.c +romstage-y += powerbus.c romstage-y += istep_13_2.c romstage-y += istep_13_3.c romstage-y += istep_13_4.c @@ -32,5 +34,6 @@ ramstage-y += istep_18_12.c ramstage-y += mvpd.c ramstage-y += vpd.c ramstage-y += tor.c +ramstage-y += powerbus.c endif diff --git a/src/soc/ibm/power9/powerbus.c b/src/soc/ibm/power9/powerbus.c new file mode 100644 index 00000000000..3df5655753e --- /dev/null +++ b/src/soc/ibm/power9/powerbus.c @@ -0,0 +1,259 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include + +#include +#include +#include +#include + +#include "scratch.h" + +#define EPSILON_MAX_VALUE 0xFFFFFFFF + +#define EPS_GUARDBAND 20 + +/* From src/import/chips/p9/procedures/hwp/nest/p9_fbc_eff_config.C */ +/* LE epsilon (2 chips per-group) */ +static const uint32_t EPSILON_R_T0_LE[] = { 7, 7, 8, 8, 10, 22 }; +static const uint32_t EPSILON_R_T1_LE[] = { 7, 7, 8, 8, 10, 22 }; +static const uint32_t EPSILON_R_T2_LE[] = { 67, 69, 71, 74, 79, 103 }; +static const uint32_t EPSILON_W_T0_LE[] = { 0, 0, 0, 0, 0, 5 }; +static const uint32_t EPSILON_W_T1_LE[] = { 15, 16, 17, 19, 21, 33 }; + +/* See get_first_valid_pdV_pbFreq() in Hostboot */ + +static bool read_voltage_data(struct powerbus_cfg *cfg) +{ + uint8_t buf[512]; + uint32_t buf_size = sizeof(buf); + struct voltage_kwd *voltage = (void *)buf; + + bool success = false; + int i = 0; + + /* + * ATTR_FREQ_PB_MHZ + * + * It's equal to the first non-zero PowerBus frequency, unless its + * value is fixed for the platform, which is the case for Talos II. + */ + const uint32_t pb_freq = 1866; + /* ATTR_FREQ_CORE_CEILING_MHZ, equal to the minimum of turbo frequencies */ + uint32_t freq_ceiling = 0; + /* ATTR_FREQ_CORE_FLOOR_MHZ, equal to the maximum of powersave frequencies */ + uint32_t freq_floor = 0; + + /* Using LRP0 because frequencies are the same in all LRP records */ + success = mvpd_extract_keyword("LRP0", "#V", buf, &buf_size); + if (!success) { + printk(BIOS_ERR, "Failed to read LRP0 record from MVPD\n"); + return false; + } + + if (voltage->version != VOLTAGE_DATA_VERSION) { + printk(BIOS_ERR, "Only version %d of voltage data is supported, got: %d\n", + VOLTAGE_DATA_VERSION, voltage->version); + return false; + } + + for (i = 0; i < VOLTAGE_BUCKET_COUNT; ++i) { + struct voltage_bucket_data *bucket = &voltage->buckets[i]; + if (bucket->id == 0) + continue; + + if (bucket->powersave.freq != 0 && + (freq_floor == 0 || bucket->powersave.freq > freq_floor)) { + freq_floor = bucket->powersave.freq; + } + + if (bucket->turbo.freq != 0 && + (freq_ceiling == 0 || bucket->turbo.freq < freq_ceiling)) { + freq_ceiling = bucket->turbo.freq; + } + } + + cfg->fabric_freq = pb_freq; + cfg->freq_core_floor = freq_floor; + cfg->freq_core_ceiling = freq_ceiling; + + return true; +} + +static bool calculate_frequencies(struct powerbus_cfg *cfg) +{ + const uint32_t pb_freq = cfg->fabric_freq; + const uint32_t freq_floor = cfg->freq_core_floor; + const uint32_t freq_ceiling = cfg->freq_core_ceiling; + + enum FABRIC_CORE_FLOOR_RATIO floor_ratio; + enum FABRIC_CORE_CEILING_RATIO ceiling_ratio; + + /* breakpoint ratio: core floor 4.0, pb 2.0 (cache floor :: pb = 8/8) */ + if (freq_floor >= (2 * pb_freq)) { + floor_ratio = FABRIC_CORE_FLOOR_RATIO_RATIO_8_8; + /* breakpoint ratio: core floor 3.5, pb 2.0 (cache floor :: pb = 7/8) */ + } else if ((4 * freq_floor) >= (7 * pb_freq)) { + floor_ratio = FABRIC_CORE_FLOOR_RATIO_RATIO_7_8; + /* breakpoint ratio: core floor 3.0, pb 2.0 (cache floor :: pb = 6/8) */ + } else if ((2 * freq_floor) >= (3 * pb_freq)) { + floor_ratio = FABRIC_CORE_FLOOR_RATIO_RATIO_6_8; + /* breakpoint ratio: core floor 2.5, pb 2.0 (cache floor :: pb = 5/8) */ + } else if ((4 * freq_floor) >= (5 * pb_freq)) { + floor_ratio = FABRIC_CORE_FLOOR_RATIO_RATIO_5_8; + /* breakpoint ratio: core floor 2.0, pb 2.0 (cache floor :: pb = 4/8) */ + } else if (freq_floor >= pb_freq) { + floor_ratio = FABRIC_CORE_FLOOR_RATIO_RATIO_4_8; + /* breakpoint ratio: core floor 1.0, pb 2.0 (cache floor :: pb = 2/8) */ + } else if ((2 * freq_floor) >= pb_freq) { + floor_ratio = FABRIC_CORE_FLOOR_RATIO_RATIO_2_8; + } else { + printk(BIOS_ERR, "Unsupported core ceiling/PB frequency ratio = (%d/%d)\n", + freq_floor, pb_freq); + return false; + } + + /* breakpoint ratio: core ceiling 4.0, pb 2.0 (cache ceiling :: pb = 8/8) */ + if (freq_ceiling >= (2 * pb_freq)) { + ceiling_ratio = FABRIC_CORE_CEILING_RATIO_RATIO_8_8; + /* breakpoint ratio: core ceiling 3.5, pb 2.0 (cache ceiling :: pb = 7/8) */ + } else if ((4 * freq_ceiling) >= (7 * pb_freq)) { + ceiling_ratio = FABRIC_CORE_CEILING_RATIO_RATIO_7_8; + /* breakpoint ratio: core ceiling 3.0, pb 2.0 (cache ceiling :: pb = 6/8) */ + } else if ((2 * freq_ceiling) >= (3 * pb_freq)) { + ceiling_ratio = FABRIC_CORE_CEILING_RATIO_RATIO_6_8; + /* breakpoint ratio: core ceiling 2.5, pb 2.0 (cache ceiling :: pb = 5/8) */ + } else if ((4 * freq_ceiling) >= (5 * pb_freq)) { + ceiling_ratio = FABRIC_CORE_CEILING_RATIO_RATIO_5_8; + /* breakpoint ratio: core ceiling 2.0, pb 2.0 (cache ceiling :: pb = 4/8) */ + } else if (freq_ceiling >= pb_freq) { + ceiling_ratio = FABRIC_CORE_CEILING_RATIO_RATIO_4_8; + /* breakpoint ratio: core ceiling 1.0, pb 2.0 (cache ceiling :: pb = 2/8) */ + } else if ((2 * freq_ceiling) >= pb_freq) { + ceiling_ratio = FABRIC_CORE_CEILING_RATIO_RATIO_2_8; + } else { + printk(BIOS_ERR, "Unsupported core ceiling/PB frequency ratio = (%d/%d)\n", + freq_ceiling, pb_freq); + return false; + } + + cfg->core_floor_ratio = floor_ratio; + cfg->core_ceiling_ratio = ceiling_ratio; + return true; +} + +static void config_guardband_epsilon(uint8_t gb_percentage, uint32_t *target_value) +{ + uint32_t delta = (*target_value * gb_percentage) / 100; + delta += ((*target_value * gb_percentage) % 100) ? 1 : 0; + + /* Clamp to maximum value if necessary */ + if (delta > (EPSILON_MAX_VALUE - *target_value)) { + printk(BIOS_DEBUG, "Guardband application generated out-of-range target value," + " clamping to maximum value!\n"); + *target_value = EPSILON_MAX_VALUE; + } else { + *target_value += delta; + } +} + +static void dump_epsilons(struct powerbus_cfg *cfg) +{ + uint32_t i; + + for (i = 0; i < NUM_EPSILON_READ_TIERS; i++) + printk(BIOS_DEBUG, " R_T[%d] = %d\n", i, cfg->eps_r[i]); + + for (i = 0; i < NUM_EPSILON_WRITE_TIERS; i++) + printk(BIOS_DEBUG, " W_T[%d] = %d\n", i, cfg->eps_w[i]); +} + +static void calculate_epsilons(struct powerbus_cfg *cfg) +{ + const enum FABRIC_CORE_FLOOR_RATIO floor_ratio = cfg->core_floor_ratio; + const enum FABRIC_CORE_CEILING_RATIO ceiling_ratio = cfg->core_ceiling_ratio; + const uint32_t pb_freq = cfg->fabric_freq; + const uint32_t freq_ceiling = cfg->freq_core_ceiling; + + uint32_t *eps_r = cfg->eps_r; + uint32_t *eps_w = cfg->eps_w; + + uint32_t i; + + uint64_t scratch_reg6 = read_scom(MBOX_SCRATCH_REG1 + 5); + /* ATTR_PROC_FABRIC_PUMP_MODE, it's either node or group pump mode */ + bool node_pump_mode = !(scratch_reg6 & PPC_BIT(MBOX_SCRATCH_REG6_GROUP_PUMP_MODE)); + + /* Assuming that ATTR_PROC_EPS_TABLE_TYPE = EPS_TYPE_LE in talos.xml is always correct */ + + eps_r[0] = EPSILON_R_T0_LE[floor_ratio]; + + if (node_pump_mode) + eps_r[1] = EPSILON_R_T1_LE[floor_ratio]; + else + eps_r[1] = EPSILON_R_T0_LE[floor_ratio]; + + eps_r[2] = EPSILON_R_T2_LE[floor_ratio]; + + eps_w[0] = EPSILON_W_T0_LE[floor_ratio]; + eps_w[1] = EPSILON_W_T1_LE[floor_ratio]; + + /* Dump base epsilon values */ + printk(BIOS_DEBUG, "Base epsilon values read from table:\n"); + dump_epsilons(cfg); + + /* Scale base epsilon values if core is running 2x nest frequency */ + if (ceiling_ratio == FABRIC_CORE_CEILING_RATIO_RATIO_8_8) { + uint8_t scale_percentage = 100 * freq_ceiling / (2 * pb_freq); + if (scale_percentage < 100) + die("scale_percentage is too small!"); + scale_percentage -= 100; + + printk(BIOS_DEBUG, "Scaling based on ceiling frequency\n"); + + for (i = 0; i < NUM_EPSILON_READ_TIERS; i++) + config_guardband_epsilon(scale_percentage, &eps_r[i]); + + for (i = 0; i < NUM_EPSILON_WRITE_TIERS; i++) + config_guardband_epsilon(scale_percentage, &eps_w[i]); + } + + for (i = 0; i < NUM_EPSILON_READ_TIERS; i++) + config_guardband_epsilon(EPS_GUARDBAND, &eps_r[i]); + + for (i = 0; i < NUM_EPSILON_WRITE_TIERS; i++) + config_guardband_epsilon(EPS_GUARDBAND, &eps_w[i]); + + /* Dump final epsilon values */ + printk(BIOS_DEBUG, "Scaled epsilon values based on %s%d percent guardband:\n", + (EPS_GUARDBAND >= 0 ? "+" : "-"), EPS_GUARDBAND); + dump_epsilons(cfg); + + /* + * Check relationship of epsilon counters: + * read tier values are strictly increasing + * write tier values are strictly increasing + */ + if (eps_r[0] > eps_r[1] || eps_r[1] > eps_r[2] || eps_w[0] > eps_w[1]) + printk(BIOS_WARNING, "Invalid relationship between base epsilon values\n"); +} + +const struct powerbus_cfg *powerbus_cfg(void) +{ + static struct powerbus_cfg cfg; + + static bool init_done; + if (init_done) + return &cfg; + + if (!read_voltage_data(&cfg)) + die("Failed to read voltage data"); + + if (!calculate_frequencies(&cfg)) + die("Incorrect core or PowerBus frequency"); + + calculate_epsilons(&cfg); + + init_done = true; + return &cfg; +} diff --git a/src/soc/ibm/power9/scratch.h b/src/soc/ibm/power9/scratch.h new file mode 100644 index 00000000000..5b2cc29bad3 --- /dev/null +++ b/src/soc/ibm/power9/scratch.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __SOC_IBM_POWER9_SCRATCH_H +#define __SOC_IBM_POWER9_SCRATCH_H + +/* + * This file is for common definitions related to + * TP.TPVSB.FSI.W.FSI_MAILBOX.FSXCOMP.FSXLOG.SCRATCH_REGISTER_1 + * and the consecutive 7 scratch registers. + */ + +/* SCOM address of the first scratch register */ +#define MBOX_SCRATCH_REG1 0x00050038 + +/* CFAM address of the first scratch register (word addressing) */ +#define MBOX_SCRATCH_REG1_FSI 0x00002838 + +#define MBOX_SCRATCH_REG6_GROUP_PUMP_MODE 23 + +#endif /* __SOC_IBM_POWER9_SCRATCH_H */ From dd2ee398ec8f47c0a63330051dcdb7d9b9e6b38f Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Thu, 5 Aug 2021 01:58:26 +0300 Subject: [PATCH 063/213] soc/power9/istep_13_8.c: use powerbus.c unit Change-Id: I0efe478537ec03afe71677e3d64d7468cd552162 Signed-off-by: Sergii Dmytruk --- src/soc/ibm/power9/istep_13_8.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/src/soc/ibm/power9/istep_13_8.c b/src/soc/ibm/power9/istep_13_8.c index 836bbf11e20..23361273162 100644 --- a/src/soc/ibm/power9/istep_13_8.c +++ b/src/soc/ibm/power9/istep_13_8.c @@ -1,13 +1,13 @@ /* SPDX-License-Identifier: GPL-2.0-only */ #include +#include #include #include #include "istep_13_scom.h" #define ATTR_PG 0xE000000000000000ull -#define FREQ_PB_MHZ 1866 /* * This function was generated from initfiles. Some of the registers used here @@ -22,6 +22,8 @@ */ static void p9n_mca_scom(int mcs_i, int mca_i) { + const struct powerbus_cfg *pb_cfg = powerbus_cfg(); + chiplet_id_t id = mcs_ids[mcs_i]; mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; const int mca_mul = 0x10; @@ -108,10 +110,15 @@ static void p9n_mca_scom(int mcs_i, int mca_i) [32-39] = (ATTR_PROC_EPS_READ_CYCLES_T2 + 6) / 4 // REMOTE_NODAL_EPSILON [40-47] = (ATTR_PROC_EPS_READ_CYCLES_T2 + 6) / 4 // VECTOR_GROUP_EPSILON */ + #define F(X) (((X) + 6) / 4) scom_and_or_for_chiplet(nest, 0x05010826 + mca_i * mca_mul, ~PPC_BITMASK(0, 47), - PPC_SHIFT(1, 7) /* FIXME: fill the rest with non-hardcoded values*/ - | PPC_SHIFT(4, 15) | PPC_SHIFT(4, 23) | PPC_SHIFT(4, 31) - | PPC_SHIFT(0x19, 39) | PPC_SHIFT(0x19, 47)); + PPC_SHIFT(1, 7) + | PPC_SHIFT(F(pb_cfg->eps_r[0]), 15) + | PPC_SHIFT(F(pb_cfg->eps_r[1]), 23) + | PPC_SHIFT(F(pb_cfg->eps_r[1]), 31) + | PPC_SHIFT(F(pb_cfg->eps_r[2]), 39) + | PPC_SHIFT(F(pb_cfg->eps_r[2]), 47)); + #undef F //~ static const uint32_t EPSILON_R_T0_LE[] = { 7, 7, 8, 8, 10, 22 }; // T0, T1 //~ static const uint32_t EPSILON_R_T2_LE[] = { 67, 69, 71, 74, 79, 103 }; // T2 @@ -469,9 +476,10 @@ static void p9n_mca_scom(int mcs_i, int mca_i) /* * From Hostboot: * l_def_mn_freq_ratio = 1000 * ATTR_MSS_FREQ / ATTR_FREQ_PB_MHZ; - * ATTR_MSS_FREQ is in MT/s (sigh), ATTR_FREQ_PB_MHZ is 1866 MHz (from talos.xml). + * ATTR_MSS_FREQ is in MT/s (sigh). */ - uint64_t mn_freq_ratio = 1000 * mem_data.speed / FREQ_PB_MHZ; + uint32_t pb_freq = pb_cfg->fabric_freq; + uint64_t mn_freq_ratio = 1000 * mem_data.speed / pb_freq; uint64_t val_to_data = mn_freq_ratio < 915 ? 3 : mn_freq_ratio < 1150 ? 4 : mn_freq_ratio < 1300 ? 5 : 6; From 704428ddea6f742223ff1bb3e521e5765fe60144 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Thu, 12 Aug 2021 19:29:48 +0300 Subject: [PATCH 064/213] soc/power9/mvpd.c: add mvpd_get_voltage_data() Change-Id: Ie7550e4158a7483a938c6fbe90abae77361c723b Signed-off-by: Sergii Dmytruk --- src/include/cpu/power/mvpd.h | 3 +++ src/soc/ibm/power9/mvpd.c | 31 +++++++++++++++++++++++++++++++ src/soc/ibm/power9/powerbus.c | 20 +++----------------- 3 files changed, 37 insertions(+), 17 deletions(-) diff --git a/src/include/cpu/power/mvpd.h b/src/include/cpu/power/mvpd.h index 7fc48a71438..d69e2d41d37 100644 --- a/src/include/cpu/power/mvpd.h +++ b/src/include/cpu/power/mvpd.h @@ -49,6 +49,9 @@ void mvpd_device_unmount(void); const struct region_device *mvpd_device_ro(void); +/* Reads #V of one of LRP records (mind that there is only one buffer) */ +const struct voltage_kwd *mvpd_get_voltage_data(int lrp); + /* Finds a specific keyword in MVPD partition and extracts it. *size is updated * to reflect needed or used space in the buffer. */ bool mvpd_extract_keyword(const char *record_name, const char *kwd_name, diff --git a/src/soc/ibm/power9/mvpd.c b/src/soc/ibm/power9/mvpd.c index 82cd36b9a95..1660e1c8186 100644 --- a/src/soc/ibm/power9/mvpd.c +++ b/src/soc/ibm/power9/mvpd.c @@ -2,6 +2,7 @@ #include +#include #include #include #include @@ -173,6 +174,36 @@ bool mvpd_extract_keyword(const char *record_name, const char *kwd_name, return copied_data; } +const struct voltage_kwd *mvpd_get_voltage_data(int lrp) +{ + static int inited_lrp = -1; + static uint8_t buf[sizeof(struct voltage_kwd)]; + + char record_name[] = { 'L', 'R', 'P', '0' + lrp, '\0' }; + uint32_t buf_size = sizeof(buf); + struct voltage_kwd *voltage = (void *)buf; + + assert(lrp >= 0 && lrp < 6); + if (inited_lrp == lrp) + return voltage; + + inited_lrp = -1; + + if (!mvpd_extract_keyword(record_name, "#V", buf, &buf_size)) { + printk(BIOS_ERR, "Failed to read LRP0 record from MVPD\n"); + return NULL; + } + + if (voltage->version != VOLTAGE_DATA_VERSION) { + printk(BIOS_ERR, "Only version %d of voltage data is supported, got: %d\n", + VOLTAGE_DATA_VERSION, voltage->version); + return NULL; + } + + inited_lrp = lrp; + return voltage; +} + /* Finds a specific ring in MVPD partition and extracts it */ bool mvpd_extract_ring(const char *record_name, const char *kwd_name, uint8_t chiplet_id, uint8_t even_odd, uint16_t ring_id, diff --git a/src/soc/ibm/power9/powerbus.c b/src/soc/ibm/power9/powerbus.c index 3df5655753e..2e011300be5 100644 --- a/src/soc/ibm/power9/powerbus.c +++ b/src/soc/ibm/power9/powerbus.c @@ -25,12 +25,8 @@ static const uint32_t EPSILON_W_T1_LE[] = { 15, 16, 17, 19, 21, 33 } static bool read_voltage_data(struct powerbus_cfg *cfg) { - uint8_t buf[512]; - uint32_t buf_size = sizeof(buf); - struct voltage_kwd *voltage = (void *)buf; - - bool success = false; int i = 0; + const struct voltage_kwd *voltage = NULL; /* * ATTR_FREQ_PB_MHZ @@ -45,20 +41,10 @@ static bool read_voltage_data(struct powerbus_cfg *cfg) uint32_t freq_floor = 0; /* Using LRP0 because frequencies are the same in all LRP records */ - success = mvpd_extract_keyword("LRP0", "#V", buf, &buf_size); - if (!success) { - printk(BIOS_ERR, "Failed to read LRP0 record from MVPD\n"); - return false; - } - - if (voltage->version != VOLTAGE_DATA_VERSION) { - printk(BIOS_ERR, "Only version %d of voltage data is supported, got: %d\n", - VOLTAGE_DATA_VERSION, voltage->version); - return false; - } + voltage = mvpd_get_voltage_data(0); for (i = 0; i < VOLTAGE_BUCKET_COUNT; ++i) { - struct voltage_bucket_data *bucket = &voltage->buckets[i]; + const struct voltage_bucket_data *bucket = &voltage->buckets[i]; if (bucket->id == 0) continue; From 294caf6afa0677cf198973d16e74f8765e7f2597 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Mon, 16 Aug 2021 18:25:04 +0300 Subject: [PATCH 065/213] soc/power9/homer.c: implement get_ppe_scan_rings() It extracts rings for a specific Programmable PowerPC-lite Engine (CME or SGPE). Change-Id: I78c1c3a5a477e1fcb761dbb3be5069417c30c396 Signed-off-by: Sergii Dmytruk --- src/soc/ibm/power9/homer.c | 76 +++++++++++++++++++++++++++++++++++--- 1 file changed, 71 insertions(+), 5 deletions(-) diff --git a/src/soc/ibm/power9/homer.c b/src/soc/ibm/power9/homer.c index bc1447162d8..f311be5db3e 100644 --- a/src/soc/ibm/power9/homer.c +++ b/src/soc/ibm/power9/homer.c @@ -11,8 +11,20 @@ #include "chip.h" #include "homer.h" +#include "tor.h" #include "xip.h" +struct ring_data { + void *rings_buf; + void *work_buf1; + void *work_buf2; + void *work_buf3; + uint32_t rings_buf_size; + uint32_t work_buf1_size; + uint32_t work_buf2_size; + uint32_t work_buf3_size; +}; + enum operation_type { COPY, FIND @@ -860,11 +872,58 @@ static void istep_16_1(int this_core) // p9_stop_save_scom() and others } +/* Extracts rings for a specific Programmable PowerPC-lite Engine */ +static void get_ppe_scan_rings(struct xip_hw_header *hw, uint8_t dd, enum ppe_type ppe, + struct ring_data *ring_data) +{ + const uint32_t max_rings_buf_size = ring_data->rings_buf_size; + + struct tor_hdr *rings; + struct tor_hdr *overlays; + + if (dd < 0x20) + die("DD must be at least 0x20!"); + if (!hw->overlays.dd_support) + die("Overlays must support DD!"); + + copy_section(&rings, &hw->rings, hw, dd, FIND); + copy_section(&overlays, &hw->overlays, hw, dd, FIND); + + if (!tor_access_ring(rings, UNDEFINED_RING_ID, ppe, UNDEFINED_RING_VARIANT, + UNDEFINED_INSTANCE_ID, ring_data->rings_buf, + &ring_data->rings_buf_size, GET_PPE_LEVEL_RINGS)) + die("Failed to access PPE level rings!"); + + assert(ring_data->work_buf1_size == MAX_RING_BUF_SIZE); + assert(ring_data->work_buf2_size == MAX_RING_BUF_SIZE); + assert(ring_data->work_buf3_size == MAX_RING_BUF_SIZE); + + tor_fetch_and_insert_vpd_rings((struct tor_hdr *)ring_data->rings_buf, + &ring_data->rings_buf_size, max_rings_buf_size, + overlays, ppe, + ring_data->work_buf1, + ring_data->work_buf2, + ring_data->work_buf3); +} + /* * This logic is for SMF disabled only! */ void build_homer_image(void *homer_bar) { + static uint8_t rings_buf[300 * KiB]; + + static uint8_t work_buf1[MAX_RING_BUF_SIZE]; + static uint8_t work_buf2[MAX_RING_BUF_SIZE]; + static uint8_t work_buf3[MAX_RING_BUF_SIZE]; + + struct ring_data ring_data = { + .rings_buf = rings_buf, .rings_buf_size = sizeof(rings_buf), + .work_buf1 = work_buf1, .work_buf1_size = sizeof(work_buf1), + .work_buf2 = work_buf2, .work_buf2_size = sizeof(work_buf2), + .work_buf3 = work_buf3, .work_buf3_size = sizeof(work_buf3), + }; + struct mmap_helper_region_device mdev = {0}; struct homer_st *homer = homer_bar; struct xip_hw_header *hw = homer_bar; @@ -907,11 +966,18 @@ void build_homer_image(void *homer_bar) build_pgpe(homer, (struct xip_pgpe_header *)(homer_bar + hw->pgpe.offset), dd); - // TBD - // getPpeScanRings() for CME - // layoutRingsForCME() - // getPpeScanRings for SGPE - // layoutRingsForSGPE() + get_ppe_scan_rings(hw, dd, PT_CME, &ring_data); + + // TODO: layoutRingsForCME() + + /* Reset buffer sizes to maximum values before reusing the structure */ + ring_data.rings_buf_size = sizeof(rings_buf); + ring_data.work_buf1_size = sizeof(work_buf1); + ring_data.work_buf2_size = sizeof(work_buf2); + ring_data.work_buf3_size = sizeof(work_buf3); + get_ppe_scan_rings(hw, dd, PT_SGPE, &ring_data); + + // TODO: layoutRingsForSGPE() // buildParameterBlock(); // updateCpmrCmeRegion(); From 60a580eeb5901763c1fd10ee4ae5af34886f5860 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Thu, 19 Aug 2021 01:27:36 +0300 Subject: [PATCH 066/213] soc/power9/homer.c: implement layout_cmn_rings_for_cme() There are also instance rings. Change-Id: I16f751eb7b060d8de6ccd620ffd28366bba6d969 Signed-off-by: Sergii Dmytruk --- src/soc/ibm/power9/homer.c | 65 ++++++++++++++++++++++++++++++++++++-- src/soc/ibm/power9/homer.h | 2 ++ 2 files changed, 65 insertions(+), 2 deletions(-) diff --git a/src/soc/ibm/power9/homer.c b/src/soc/ibm/power9/homer.c index f311be5db3e..12d0f7a3028 100644 --- a/src/soc/ibm/power9/homer.c +++ b/src/soc/ibm/power9/homer.c @@ -25,6 +25,11 @@ struct ring_data { uint32_t work_buf3_size; }; +struct cme_cmn_ring_list { + uint16_t ring[8]; // In order: EC_FUNC, EC_GPTR, EC_TIME, EC_MODE, EC_ABST, 3 reserved + uint8_t payload[]; +}; + enum operation_type { COPY, FIND @@ -906,6 +911,60 @@ static void get_ppe_scan_rings(struct xip_hw_header *hw, uint8_t dd, enum ppe_ty ring_data->work_buf3); } +static void layout_cmn_rings_for_cme(struct homer_st *homer, + struct ring_data *ring_data, + enum ring_variant ring_variant, + uint32_t *ring_len) +{ + struct cme_cmn_ring_list *tmp = + (void *)&homer->cpmr.cme_sram_region[*ring_len]; + uint8_t *start = (void *)tmp; + uint8_t *payload = tmp->payload; + + uint32_t i = 0; + const enum ring_id ring_ids[] = { EC_FUNC, EC_GPTR, EC_TIME, EC_MODE }; + + for (i = 0; i < ARRAY_SIZE(ring_ids); ++i) { + const enum ring_id id = ring_ids[i]; + + uint32_t ring_size = MAX_RING_BUF_SIZE; + uint8_t *ring_dst = start + ALIGN_UP(payload - start, 8); + + enum ring_variant this_ring_variant = ring_variant; + if (id == EC_GPTR || id == EC_TIME) + this_ring_variant = RV_BASE; + + if (!tor_access_ring(ring_data->rings_buf, id, PT_CME, + this_ring_variant, EC00_CHIPLET_ID, + ring_dst, &ring_size, GET_RING_DATA)) + continue; + + tmp->ring[i] = ring_dst - start; + payload = ring_dst + ALIGN_UP(ring_size, 8); + } + + if (payload != tmp->payload) + *ring_len += payload - start; + + *ring_len = ALIGN_UP(*ring_len, 8); +} + +static void layout_rings_for_cme(struct homer_st *homer, + struct ring_data *ring_data, + enum ring_variant ring_variant) +{ + struct cpmr_header *cpmr_hdr = &homer->cpmr.header; + struct cme_img_header *cme_hdr = (void *)&homer->cpmr.cme_sram_region[INT_VECTOR_SIZE]; + + uint32_t ring_len = cme_hdr->hcode_offset + cme_hdr->hcode_len; + + assert(cpmr_hdr->magic == CPMR_VDM_PER_QUAD); + + layout_cmn_rings_for_cme(homer, ring_data, ring_variant, &ring_len); + + // TODO: layout_inst_rings_for_cme() +} + /* * This logic is for SMF disabled only! */ @@ -923,6 +982,7 @@ void build_homer_image(void *homer_bar) .work_buf2 = work_buf2, .work_buf2_size = sizeof(work_buf2), .work_buf3 = work_buf3, .work_buf3_size = sizeof(work_buf3), }; + enum ring_variant ring_variant; struct mmap_helper_region_device mdev = {0}; struct homer_st *homer = homer_bar; @@ -966,9 +1026,10 @@ void build_homer_image(void *homer_bar) build_pgpe(homer, (struct xip_pgpe_header *)(homer_bar + hw->pgpe.offset), dd); - get_ppe_scan_rings(hw, dd, PT_CME, &ring_data); + ring_variant = (dd < 0x23 ? RV_BASE : RV_RL4); - // TODO: layoutRingsForCME() + get_ppe_scan_rings(hw, dd, PT_CME, &ring_data); + layout_rings_for_cme(homer, &ring_data, ring_variant); /* Reset buffer sizes to maximum values before reusing the structure */ ring_data.rings_buf_size = sizeof(rings_buf); diff --git a/src/soc/ibm/power9/homer.h b/src/soc/ibm/power9/homer.h index 6e80cd08a7a..ae3b8bad717 100644 --- a/src/soc/ibm/power9/homer.h +++ b/src/soc/ibm/power9/homer.h @@ -128,6 +128,8 @@ check_member(qpmr_st, aux, 512 * KiB); /* =================== CPMR =================== */ +#define CPMR_VDM_PER_QUAD 0x43504D525F322E30ull + struct cpmr_header { uint32_t attn_opcodes[2]; uint64_t magic; /* "CPMR_2.0" */ From 4b1a4651ebd6a17521c68d2213ed0e807b525c14 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Thu, 19 Aug 2021 02:24:39 +0300 Subject: [PATCH 067/213] soc/power9/homer.c: first pass of layout_inst_rings_for_cme() This introduces layout_inst_rings_for_cm(), which starts by computing maximum size of rings per EX (a pair of cores). Change-Id: Idd64a3962e1ff39164c237f897d0f71c11359596 Signed-off-by: Sergii Dmytruk --- src/soc/ibm/power9/homer.c | 58 ++++++++++++++++++++++++++++++++++++-- src/soc/ibm/power9/homer.h | 2 ++ 2 files changed, 58 insertions(+), 2 deletions(-) diff --git a/src/soc/ibm/power9/homer.c b/src/soc/ibm/power9/homer.c index 12d0f7a3028..87a74cdea88 100644 --- a/src/soc/ibm/power9/homer.c +++ b/src/soc/ibm/power9/homer.c @@ -949,8 +949,47 @@ static void layout_cmn_rings_for_cme(struct homer_st *homer, *ring_len = ALIGN_UP(*ring_len, 8); } +static void layout_inst_rings_for_cme(struct homer_st *homer, + struct ring_data *ring_data, + uint64_t cores, + enum ring_variant ring_variant, + uint32_t *ring_len) +{ + uint32_t max_ex_len = 0; + + uint32_t ex = 0; + + for (ex = 0; ex < MAX_CMES_PER_CHIP; ++ex) { + uint32_t i = 0; + uint32_t ex_len = 0; + + for (i = 0; i < MAX_CORES_PER_EX; ++i) { + const uint32_t core = ex * MAX_CORES_PER_EX + i; + + uint32_t ring_size = 0; + + if (!IS_EC_FUNCTIONAL(core, cores)) + continue; + + ring_size = ring_data->work_buf1_size; + if (!tor_access_ring(ring_data->rings_buf, EC_REPR, + PT_CME, RV_BASE, + EC00_CHIPLET_ID + core, + ring_data->work_buf1, + &ring_size, GET_RING_DATA)) + continue; + + ex_len += ALIGN_UP(ring_size, 8); + } + + if (ex_len > max_ex_len) + max_ex_len = ex_len; + } +} + static void layout_rings_for_cme(struct homer_st *homer, struct ring_data *ring_data, + uint64_t cores, enum ring_variant ring_variant) { struct cpmr_header *cpmr_hdr = &homer->cpmr.header; @@ -962,7 +1001,22 @@ static void layout_rings_for_cme(struct homer_st *homer, layout_cmn_rings_for_cme(homer, ring_data, ring_variant, &ring_len); - // TODO: layout_inst_rings_for_cme() + cme_hdr->common_ring_len = ring_len - (cme_hdr->hcode_offset + cme_hdr->hcode_len); + + // if common ring is empty, force offset to be 0 + if (cme_hdr->common_ring_len == 0) + cme_hdr->common_ring_offset = 0; + + ring_len = ALIGN_UP(ring_len, 32); + + layout_inst_rings_for_cme(homer, ring_data, cores, RV_BASE, &ring_len); + + if (ring_len != 0) { + cme_hdr->max_spec_ring_len = ALIGN_UP(ring_len, 32) / 32; + cme_hdr->core_spec_ring_offset = + ALIGN_UP(cme_hdr->common_ring_offset + cme_hdr->common_ring_len, 32) / + 32; + } } /* @@ -1029,7 +1083,7 @@ void build_homer_image(void *homer_bar) ring_variant = (dd < 0x23 ? RV_BASE : RV_RL4); get_ppe_scan_rings(hw, dd, PT_CME, &ring_data); - layout_rings_for_cme(homer, &ring_data, ring_variant); + layout_rings_for_cme(homer, &ring_data, cores, ring_variant); /* Reset buffer sizes to maximum values before reusing the structure */ ring_data.rings_buf_size = sizeof(rings_buf); diff --git a/src/soc/ibm/power9/homer.h b/src/soc/ibm/power9/homer.h index ae3b8bad717..276ba536d61 100644 --- a/src/soc/ibm/power9/homer.h +++ b/src/soc/ibm/power9/homer.h @@ -193,6 +193,8 @@ struct cme_img_header { }; #define MAX_CORES 24 +#define MAX_CORES_PER_EX 2 +#define MAX_CMES_PER_CHIP (MAX_CORES / MAX_CORES_PER_EX) struct cpmr_st { struct cpmr_header header; From afca72ac7532d268be501ea68373b930bf95e63a Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Sat, 21 Aug 2021 23:40:15 +0300 Subject: [PATCH 068/213] soc/power9/homer.c: finish layout_inst_rings_for_cme() This is where rings are actually stored in the image. Change-Id: Ifd666ecff2d38329d86a4272f66231f82ededa07 Signed-off-by: Sergii Dmytruk --- src/soc/ibm/power9/homer.c | 45 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/src/soc/ibm/power9/homer.c b/src/soc/ibm/power9/homer.c index 87a74cdea88..55daf6521e2 100644 --- a/src/soc/ibm/power9/homer.c +++ b/src/soc/ibm/power9/homer.c @@ -30,6 +30,11 @@ struct cme_cmn_ring_list { uint8_t payload[]; }; +struct cme_inst_ring_list { + uint16_t ring[4]; // In order: EC_REPR0, EC_REPR1, 2 reserved + uint8_t payload[]; +}; + enum operation_type { COPY, FIND @@ -985,6 +990,46 @@ static void layout_inst_rings_for_cme(struct homer_st *homer, if (ex_len > max_ex_len) max_ex_len = ex_len; } + + if (max_ex_len > 0) { + max_ex_len += sizeof(struct cme_inst_ring_list); + max_ex_len = ALIGN_UP(max_ex_len, 32); + } + + for (ex = 0; ex < MAX_CMES_PER_CHIP; ++ex) { + // TODO: update with sizeof(LocalPstateParmBlock) when it's defined + const uint32_t ex_offset = ex * (max_ex_len + ALIGN_UP(616, 32)); + + uint8_t *start = &homer->cpmr.cme_sram_region[*ring_len + ex_offset]; + struct cme_inst_ring_list *tmp = (void *)start; + uint8_t *payload = tmp->payload; + + uint32_t i = 0; + + for (i = 0; i < MAX_CORES_PER_EX; ++i) { + const uint32_t core = ex * MAX_CORES_PER_EX + i; + + uint32_t ring_size = MAX_RING_BUF_SIZE; + + if (!IS_EC_FUNCTIONAL(core, cores)) + continue; + + if ((payload - start) % 8 != 0) + payload = start + ALIGN_UP(payload - start, 8); + + if (!tor_access_ring(ring_data->rings_buf, EC_REPR, + PT_CME, RV_BASE, + EC00_CHIPLET_ID + core, + payload, + &ring_size, GET_RING_DATA)) + continue; + + tmp->ring[i] = payload - start; + payload += ALIGN_UP(ring_size, 8); + } + } + + *ring_len = max_ex_len; } static void layout_rings_for_cme(struct homer_st *homer, From bb723b2204a1ccd523b3f4c8b0d59529091f7207 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Sun, 22 Aug 2021 01:38:25 +0300 Subject: [PATCH 069/213] soc/power9/homer.c: implement layout_cmn_rings_for_sgpe() There are also instance rings. Change-Id: I0bb6e6d248111ec917ecc4ee700502949fbce4db Signed-off-by: Sergii Dmytruk --- src/soc/ibm/power9/homer.c | 112 ++++++++++++++++++++++++++++++++++++- 1 file changed, 110 insertions(+), 2 deletions(-) diff --git a/src/soc/ibm/power9/homer.c b/src/soc/ibm/power9/homer.c index 55daf6521e2..7616ccd0988 100644 --- a/src/soc/ibm/power9/homer.c +++ b/src/soc/ibm/power9/homer.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -35,6 +36,12 @@ struct cme_inst_ring_list { uint8_t payload[]; }; +struct sgpe_cmn_ring_list { + // See the list in layout_cmn_rings_for_sgpe() skipping EQ_ANA_BNDY, 3 reserved + uint16_t ring[64]; + uint8_t payload[]; +}; + enum operation_type { COPY, FIND @@ -1064,6 +1071,106 @@ static void layout_rings_for_cme(struct homer_st *homer, } } +static enum ring_id resolve_eq_inex_bucket(void) +{ + switch (powerbus_cfg()->core_floor_ratio) { + case FABRIC_CORE_FLOOR_RATIO_RATIO_8_8: + return EQ_INEX_BUCKET_4; + + case FABRIC_CORE_FLOOR_RATIO_RATIO_7_8: + case FABRIC_CORE_FLOOR_RATIO_RATIO_6_8: + case FABRIC_CORE_FLOOR_RATIO_RATIO_5_8: + case FABRIC_CORE_FLOOR_RATIO_RATIO_4_8: + return EQ_INEX_BUCKET_3; + + case FABRIC_CORE_FLOOR_RATIO_RATIO_2_8: + return EQ_INEX_BUCKET_2; + } + + die("Failed to resolve EQ_INEX_BUCKET_*!\n"); +} + +static void layout_cmn_rings_for_sgpe(struct homer_st *homer, + struct ring_data *ring_data, + enum ring_variant ring_variant) +{ + const enum ring_id ring_ids[] = { + EQ_FURE, EQ_GPTR, EQ_TIME, EQ_INEX, EX_L3_FURE, EX_L3_GPTR, EX_L3_TIME, + EX_L2_MODE, EX_L2_FURE, EX_L2_GPTR, EX_L2_TIME, EX_L3_REFR_FURE, + EX_L3_REFR_GPTR, EQ_ANA_FUNC, EQ_ANA_GPTR, EQ_DPLL_FUNC, EQ_DPLL_GPTR, + EQ_DPLL_MODE, EQ_ANA_BNDY_BUCKET_0, EQ_ANA_BNDY_BUCKET_1, + EQ_ANA_BNDY_BUCKET_2, EQ_ANA_BNDY_BUCKET_3, EQ_ANA_BNDY_BUCKET_4, + EQ_ANA_BNDY_BUCKET_5, EQ_ANA_BNDY_BUCKET_6, EQ_ANA_BNDY_BUCKET_7, + EQ_ANA_BNDY_BUCKET_8, EQ_ANA_BNDY_BUCKET_9, EQ_ANA_BNDY_BUCKET_10, + EQ_ANA_BNDY_BUCKET_11, EQ_ANA_BNDY_BUCKET_12, EQ_ANA_BNDY_BUCKET_13, + EQ_ANA_BNDY_BUCKET_14, EQ_ANA_BNDY_BUCKET_15, EQ_ANA_BNDY_BUCKET_16, + EQ_ANA_BNDY_BUCKET_17, EQ_ANA_BNDY_BUCKET_18, EQ_ANA_BNDY_BUCKET_19, + EQ_ANA_BNDY_BUCKET_20, EQ_ANA_BNDY_BUCKET_21, EQ_ANA_BNDY_BUCKET_22, + EQ_ANA_BNDY_BUCKET_23, EQ_ANA_BNDY_BUCKET_24, EQ_ANA_BNDY_BUCKET_25, + EQ_ANA_BNDY_BUCKET_L3DCC, EQ_ANA_MODE, EQ_ANA_BNDY_BUCKET_26, + EQ_ANA_BNDY_BUCKET_27, EQ_ANA_BNDY_BUCKET_28, EQ_ANA_BNDY_BUCKET_29, + EQ_ANA_BNDY_BUCKET_30, EQ_ANA_BNDY_BUCKET_31, EQ_ANA_BNDY_BUCKET_32, + EQ_ANA_BNDY_BUCKET_33, EQ_ANA_BNDY_BUCKET_34, EQ_ANA_BNDY_BUCKET_35, + EQ_ANA_BNDY_BUCKET_36, EQ_ANA_BNDY_BUCKET_37, EQ_ANA_BNDY_BUCKET_38, + EQ_ANA_BNDY_BUCKET_39, EQ_ANA_BNDY_BUCKET_40, EQ_ANA_BNDY_BUCKET_41 + }; + + const enum ring_id eq_index_bucket_id = resolve_eq_inex_bucket(); + + struct qpmr_header *qpmr_hdr = &homer->qpmr.sgpe.header; + struct sgpe_cmn_ring_list *tmp = + (void *)&homer->qpmr.sgpe.sram_image[qpmr_hdr->img_len]; + uint8_t *start = (void *)tmp; + uint8_t *payload = tmp->payload; + + uint32_t i = 0; + + for (i = 0; i < ARRAY_SIZE(ring_ids); ++i) { + enum ring_variant this_ring_variant; + uint32_t ring_size = MAX_RING_BUF_SIZE; + + enum ring_id id = ring_ids[i]; + if (id == EQ_INEX) + id = eq_index_bucket_id; + + this_ring_variant = ring_variant; + if (id == EQ_GPTR || // EQ GPTR + id == EQ_ANA_GPTR || + id == EQ_DPLL_GPTR || + id == EX_L3_GPTR || // EX GPTR + id == EX_L2_GPTR || + id == EX_L3_REFR_GPTR || + id == EQ_TIME || // EQ TIME + id == EX_L3_TIME || // EX TIME + id == EX_L2_TIME) + this_ring_variant = RV_BASE; + + if ((payload - start) % 8 != 0) + payload = start + ALIGN_UP(payload - start, 8); + + if (!tor_access_ring(ring_data->rings_buf, id, PT_SGPE, + this_ring_variant, EP00_CHIPLET_ID, + payload, &ring_size, GET_RING_DATA)) + continue; + + tmp->ring[i] = payload - start; + payload += ALIGN_UP(ring_size, 8); + } + + qpmr_hdr->common_ring_len = payload - start; + qpmr_hdr->common_ring_offset = + offsetof(struct qpmr_st, sgpe.sram_image) + qpmr_hdr->img_len; +} + +static void layout_rings_for_sgpe(struct homer_st *homer, + struct ring_data *ring_data, + struct xip_sgpe_header *sgpe, + uint64_t cores, + enum ring_variant ring_variant) +{ + layout_cmn_rings_for_sgpe(homer, ring_data, ring_variant); +} + /* * This logic is for SMF disabled only! */ @@ -1136,8 +1243,9 @@ void build_homer_image(void *homer_bar) ring_data.work_buf2_size = sizeof(work_buf2); ring_data.work_buf3_size = sizeof(work_buf3); get_ppe_scan_rings(hw, dd, PT_SGPE, &ring_data); - - // TODO: layoutRingsForSGPE() + layout_rings_for_sgpe(homer, &ring_data, + (struct xip_sgpe_header *)(homer_bar + hw->sgpe.offset), + cores, ring_variant); // buildParameterBlock(); // updateCpmrCmeRegion(); From 03f4575c321e5bde9cc45ff8c57d8bbd1b505b3b Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Sun, 22 Aug 2021 18:13:22 +0300 Subject: [PATCH 070/213] soc/power9/homer.c: implement layout_inst_rings_for_sgpe() Change-Id: Ifc86aeeb5b93336466eb8b46fa51cd39c6e4f15c Signed-off-by: Sergii Dmytruk --- src/soc/ibm/power9/homer.c | 87 ++++++++++++++++++++++++++++++++++++++ src/soc/ibm/power9/homer.h | 1 + 2 files changed, 88 insertions(+) diff --git a/src/soc/ibm/power9/homer.c b/src/soc/ibm/power9/homer.c index 7616ccd0988..d545181afc3 100644 --- a/src/soc/ibm/power9/homer.c +++ b/src/soc/ibm/power9/homer.c @@ -42,6 +42,16 @@ struct sgpe_cmn_ring_list { uint8_t payload[]; }; +struct sgpe_inst_ring_list { + /* For each quad, in order: + EQ_REPR0, EX0_L3_REPR, EX1_L3_REPR, EX0_L2_REPR, EX1_L2_REPR, + EX0_L3_REFR_REPR, EX1_L3_REFR_REPR, EX0_L3_REFR_TIME, + EX1_L3_REFR_TIME, 3 reserved. */ + uint16_t ring[MAX_QUADS_PER_CHIP][12]; + + uint8_t payload[]; +}; + enum operation_type { COPY, FIND @@ -1162,13 +1172,90 @@ static void layout_cmn_rings_for_sgpe(struct homer_st *homer, offsetof(struct qpmr_st, sgpe.sram_image) + qpmr_hdr->img_len; } +static void layout_inst_rings_for_sgpe(struct homer_st *homer, + struct ring_data *ring_data, + uint64_t cores, + enum ring_variant ring_variant) +{ + struct qpmr_header *qpmr_hdr = &homer->qpmr.sgpe.header; + uint32_t inst_rings_offset = qpmr_hdr->img_len + qpmr_hdr->common_ring_len; + + uint8_t *start = &homer->qpmr.sgpe.sram_image[inst_rings_offset]; + struct sgpe_inst_ring_list *tmp = (void *)start; + uint8_t *payload = tmp->payload; + + /* It's EQ_REPR and three pairs of EX rings */ + const enum ring_id ring_ids[] = { + EQ_REPR, EX_L3_REPR, EX_L3_REPR, EX_L2_REPR, EX_L2_REPR, + EX_L3_REFR_REPR, EX_L3_REFR_REPR, EX_L3_REFR_TIME, + EX_L3_REFR_TIME + }; + + uint8_t quad = 0; + + for (quad = 0; quad < MAX_QUADS_PER_CHIP; ++quad) { + uint8_t i; + + /* Skip non-functional quads */ + if (!IS_EQ_FUNCTIONAL(quad, cores)) + continue; + + for (i = 0; i < ARRAY_SIZE(ring_ids); ++i) { + const enum ring_id id = ring_ids[i]; + + uint32_t ring_size = MAX_RING_BUF_SIZE; + + /* Despite the constant, this is not a SCOM chiplet ID, + it's just used as a base value */ + uint8_t instance_id = EP00_CHIPLET_ID + quad; + if (i != 0) { + instance_id += quad; + if (i % 2 == 0) + ++instance_id; + } + + if ((payload - start) % 8 != 0) + payload = start + ALIGN_UP(payload - start, 8); + + if (!tor_access_ring(ring_data->rings_buf, id, PT_SGPE, + ring_variant, instance_id, + payload, &ring_size, GET_RING_DATA)) + continue; + + tmp->ring[quad][i] = payload - start; + payload += ALIGN_UP(ring_size, 8); + } + } + + qpmr_hdr->spec_ring_offset = qpmr_hdr->common_ring_offset + qpmr_hdr->common_ring_len; + qpmr_hdr->spec_ring_len = payload - start; +} + static void layout_rings_for_sgpe(struct homer_st *homer, struct ring_data *ring_data, struct xip_sgpe_header *sgpe, uint64_t cores, enum ring_variant ring_variant) { + struct qpmr_header *qpmr_hdr = &homer->qpmr.sgpe.header; + struct sgpe_img_header *sgpe_img_hdr = + (void *)&homer->qpmr.sgpe.sram_image[INT_VECTOR_SIZE]; + layout_cmn_rings_for_sgpe(homer, ring_data, ring_variant); + layout_inst_rings_for_sgpe(homer, ring_data, cores, RV_BASE); + + if (qpmr_hdr->common_ring_len == 0) { + /* If quad common rings don't exist, ensure its offset in image + header is zero */ + sgpe_img_hdr->cmn_ring_occ_offset = 0; + } + + if (qpmr_hdr->spec_ring_len > 0) { + sgpe_img_hdr->spec_ring_occ_offset = qpmr_hdr->img_len + + qpmr_hdr->common_ring_len; + sgpe_img_hdr->scom_offset = sgpe_img_hdr->spec_ring_occ_offset + + qpmr_hdr->spec_ring_len; + } } /* diff --git a/src/soc/ibm/power9/homer.h b/src/soc/ibm/power9/homer.h index 276ba536d61..f8c0e292320 100644 --- a/src/soc/ibm/power9/homer.h +++ b/src/soc/ibm/power9/homer.h @@ -193,6 +193,7 @@ struct cme_img_header { }; #define MAX_CORES 24 +#define MAX_QUADS_PER_CHIP 6 #define MAX_CORES_PER_EX 2 #define MAX_CMES_PER_CHIP (MAX_CORES / MAX_CORES_PER_EX) From d7dbee7bdb8ec0206b65eaf26a80ead5e33a72dd Mon Sep 17 00:00:00 2001 From: Krystian Hebel Date: Thu, 12 Aug 2021 19:27:30 +0200 Subject: [PATCH 071/213] vendorcode/ibm/pstates/: add pstates data structures Power management requires extensive data structures, which are better to copy as is from the source as they are not clearly defined even in the source code ("packed" attribute is sometimes missing and comments contradict each other). Change-Id: I0b624d79affb22f02ae6d8939a54d25b92545fdf Signed-off-by: Krystian Hebel " --- src/vendorcode/ibm/power9/pstates/README.md | 28 ++ .../ibm/power9/pstates/p9_pstates_cmeqm.h | 357 ++++++++++++++ .../ibm/power9/pstates/p9_pstates_common.h | 435 ++++++++++++++++++ .../ibm/power9/pstates/p9_pstates_occ.h | 216 +++++++++ .../ibm/power9/pstates/p9_pstates_pgpe.h | 367 +++++++++++++++ 5 files changed, 1403 insertions(+) create mode 100644 src/vendorcode/ibm/power9/pstates/README.md create mode 100644 src/vendorcode/ibm/power9/pstates/p9_pstates_cmeqm.h create mode 100644 src/vendorcode/ibm/power9/pstates/p9_pstates_common.h create mode 100644 src/vendorcode/ibm/power9/pstates/p9_pstates_occ.h create mode 100644 src/vendorcode/ibm/power9/pstates/p9_pstates_pgpe.h diff --git a/src/vendorcode/ibm/power9/pstates/README.md b/src/vendorcode/ibm/power9/pstates/README.md new file mode 100644 index 00000000000..01992f68871 --- /dev/null +++ b/src/vendorcode/ibm/power9/pstates/README.md @@ -0,0 +1,28 @@ +Files in this directory come from talos-hostboot repo, commit a2ddbf3 [1]. No +changes were made, other than: + * converting `#include <...>` to `#include "..."` + * commenting out some #include directives to not bring those headers + * using a differently named constant for maximum number of quads + +In some cases units mentioned in comments in the bigger structure are different +than in comments above internal structure definitions and field names. An +example of such difference is VpdOperatingPoint, defined in p9_pstates_common.h, +where "voltages are specified in units of 1mV, and characterization currents are +specified in units of 100mA", which is consistent with its fields names. When +this structure is used in other structures (each other file uses it), unit for +voltage becomes 5mV, and for currents - 500mA. + +Another issue is poundW_data - it doesn't have 'packed' attribute, but it is +packed in MVPD. Additional fields were added at some point [2], with a comment +that additional reserved field was added to keep the size the same. The problem +is that new field (the important one, not reserved) was added in the middle of +the structure, between uint64 that is by default naturally aligned to 8B, and +that is the biggest alignment used in that structure. Modifying anything after +that field won't help in keeping the size the same as before. Luckily, offsets +to all of the non-reserved fields are proper. sizeof() or an array of this type +cannot be used. + +There may be other inconsistencies, be advised. + +[1]: https://git.raptorcs.com/git/talos-hostboot/tree/src/import/chips/p9/procedures/hwp/lib?id=a2ddbf3150e2c02ccc904b25d6650c9932a8a841 +[2]: https://git.raptorcs.com/git/talos-hostboot/commit/src/import/chips/p9/procedures/hwp/lib/p9_pstates_cmeqm.h?id=2ab88987e5fed942b71b757e0c2972adee5b8e1b diff --git a/src/vendorcode/ibm/power9/pstates/p9_pstates_cmeqm.h b/src/vendorcode/ibm/power9/pstates/p9_pstates_cmeqm.h new file mode 100644 index 00000000000..0f8deee8fb8 --- /dev/null +++ b/src/vendorcode/ibm/power9/pstates/p9_pstates_cmeqm.h @@ -0,0 +1,357 @@ +/* IBM_PROLOG_BEGIN_TAG */ +/* This is an automatically generated prolog. */ +/* */ +/* $Source: src/import/chips/p9/procedures/hwp/lib/p9_pstates_cmeqm.h $ */ +/* */ +/* OpenPOWER HostBoot Project */ +/* */ +/* Contributors Listed Below - COPYRIGHT 2015,2019 */ +/* [+] International Business Machines Corp. */ +/* */ +/* */ +/* Licensed under the Apache License, Version 2.0 (the "License"); */ +/* you may not use this file except in compliance with the License. */ +/* You may obtain a copy of the License at */ +/* */ +/* http://www.apache.org/licenses/LICENSE-2.0 */ +/* */ +/* Unless required by applicable law or agreed to in writing, software */ +/* distributed under the License is distributed on an "AS IS" BASIS, */ +/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */ +/* implied. See the License for the specific language governing */ +/* permissions and limitations under the License. */ +/* */ +/* IBM_PROLOG_END_TAG */ +/// @file p9_pstates_cmeqm.h +/// @brief Pstate structures and support routines for CME Hcode +/// +// *HWP HW Owner : Rahul Batra +// *HWP HW Owner : Michael Floyd +// *HWP Team : PM +// *HWP Level : 1 +// *HWP Consumed by : CME:PGPE + +#ifndef __P9_PSTATES_CME_H__ +#define __P9_PSTATES_CME_H__ + +#include "p9_pstates_common.h" +//#include + + +/// @} + +#ifndef __ASSEMBLER__ +#ifdef __cplusplus +extern "C" { +#endif + +#include + +/// LocalParmsBlock Magic Number +/// +/// This magic number identifies a particular version of the +/// PstateParmsBlock and its substructures. The version number should be +/// kept up to date as changes are made to the layout or contents of the +/// structure. + +#define LOCAL_PARMSBLOCK_MAGIC 0x434d455050423030ull /* CMEPPB00 */ + +/// Quad Manager Flags +/// + +typedef union +{ + uint16_t value; + struct + { +#ifdef _BIG_ENDIAN + uint16_t resclk_enable : 1; + uint16_t ivrm_enable : 1; + uint16_t vdm_enable : 1; + uint16_t wof_enable : 1; + uint16_t dpll_dynamic_fmax_enable : 1; + uint16_t dpll_dynamic_fmin_enable : 1; + uint16_t dpll_droop_protect_enable : 1; + uint16_t reserved : 9; +#else + uint16_t reserved : 9; + uint16_t dpll_droop_protect_enable : 1; + uint16_t dpll_dynamic_fmin_enable : 1; + uint16_t dpll_dynamic_fmax_enable : 1; + uint16_t wof_enable : 1; + uint16_t vdm_enable : 1; + uint16_t ivrm_enable : 1; + uint16_t resclk_enable : 1; +#endif // _BIG_ENDIAN + } fields; + +} QuadManagerFlags; + +/// Resonant Clock Stepping Entry +/// +typedef union +{ + uint16_t value; + struct + { +#ifdef _BIG_ENDIAN + uint16_t sector_buffer : 4; + uint16_t spare1 : 1; + uint16_t pulse_enable : 1; + uint16_t pulse_mode : 2; + uint16_t resonant_switch : 4; + uint16_t spare4 : 4; +#else + uint16_t spare4 : 4; + uint16_t resonant_switch : 4; + uint16_t pulse_mode : 2; + uint16_t pulse_enable : 1; + uint16_t spare1 : 1; + uint16_t sector_buffer : 4; +#endif // _BIG_ENDIAN + } fields; + +} ResonantClockingStepEntry; + +#define RESCLK_FREQ_REGIONS 8 +#define RESCLK_STEPS 64 +#define RESCLK_L3_STEPS 4 + +typedef struct ResonantClockControl +{ + uint8_t resclk_freq[RESCLK_FREQ_REGIONS]; // Lower frequency of Resclk Regions + + uint8_t resclk_index[RESCLK_FREQ_REGIONS]; // Index into value array for the + // respective Resclk Region + + /// Array containing the transition steps + ResonantClockingStepEntry steparray[RESCLK_STEPS]; + + /// Delay between steps (in nanoseconds) + /// Maximum delay: 65.536us + uint16_t step_delay_ns; + + /// L3 Clock Stepping Array + uint8_t l3_steparray[RESCLK_L3_STEPS]; + + /// Resonant Clock Voltage Threshold (in millivolts) + /// This value is used to choose the appropriate L3 clock region setting. + uint16_t l3_threshold_mv; + +} ResonantClockingSetup; + +// #W data points (version 2) +typedef struct +{ + uint16_t ivdd_tdp_ac_current_10ma; + uint16_t ivdd_tdp_dc_current_10ma; + uint8_t vdm_overvolt_small_thresholds; + uint8_t vdm_large_extreme_thresholds; + uint8_t vdm_normal_freq_drop; // N_S and N_L Drop + uint8_t vdm_normal_freq_return; // L_S and S_N Return + uint8_t vdm_vid_compare_ivid; + uint8_t vdm_spare; +} poundw_entry_t; + +typedef struct +{ + uint16_t r_package_common; + uint16_t r_quad; + uint16_t r_core; + uint16_t r_quad_header; + uint16_t r_core_header; +} resistance_entry_t; + +typedef struct __attribute__((packed)) +{ + uint16_t r_package_common; + uint16_t r_quad; + uint16_t r_core; + uint16_t r_quad_header; + uint16_t r_core_header; + uint8_t r_vdm_cal_version; + uint8_t r_avg_min_scale_fact; + uint16_t r_undervolt_vmin_floor_limit; + uint8_t r_min_bin_protect_pc_adder; + uint8_t r_min_bin_protect_bin_adder; + uint8_t r_undervolt_allowed; + uint8_t reserve[10]; +} +resistance_entry_per_quad_t; + +typedef struct +{ + poundw_entry_t poundw[NUM_OP_POINTS]; + resistance_entry_t resistance_data; + uint8_t undervolt_tested; + uint8_t reserved; + uint64_t reserved1; + uint8_t reserved2; //This field was added to keep the size of struct same when undervolt_tested field was added +} PoundW_data; + +/// VDM/Droop Parameter Block +/// +typedef struct +{ + PoundW_data vpd_w_data; +} LP_VDMParmBlock; + +typedef struct __attribute__((packed)) +{ + uint16_t ivdd_tdp_ac_current_10ma; + uint16_t ivdd_tdp_dc_current_10ma; + uint8_t vdm_overvolt_small_thresholds; + uint8_t vdm_large_extreme_thresholds; + uint8_t vdm_normal_freq_drop; // N_S and N_L Drop + uint8_t vdm_normal_freq_return; // L_S and S_N Return + uint8_t vdm_vid_compare_per_quad[MAXIMUM_QUADS]; + uint8_t vdm_cal_state_avg_min_per_quad[MAXIMUM_QUADS]; + uint16_t vdm_cal_state_vmin; + uint8_t vdm_cal_state_avg_core_dts; + uint16_t vdm_cal_state_avg_core_current; + uint16_t vdm_spare; +} +poundw_entry_per_quad_t; + +typedef struct __attribute__((packed)) +{ + poundw_entry_per_quad_t poundw[NUM_OP_POINTS]; + resistance_entry_per_quad_t resistance_data; +} +PoundW_data_per_quad; + + +typedef struct +{ + PoundW_data_per_quad vpd_w_data; +} LP_VDMParmBlock_PerQuad; + +/// The layout of the data created by the Pstate table creation firmware for +/// comsumption by the Pstate GPE. This data will reside in the Quad +/// Power Management Region (QPMR). +/// + +/// Standard options controlling Pstate setup procedures + +/// System Power Distribution Paramenters +/// +/// Parameters set by system design that influence the power distribution +/// for a rail to the processor module. This values are typically set in the +/// system machine readable workbook and are used in the generation of the +/// Global Pstate Table. This values are carried in the Pstate SuperStructure +/// for use and/or reference by OCC firmware (eg the WOF algorithm) + + +/// IVRM Parameter Block +/// +/// @todo Major work item. Largely will seed the CME Quad Manager to perform +/// iVRM voltage calculations + +#define IVRM_ARRAY_SIZE 64 +typedef struct iVRMInfo +{ + + /// Pwidth from 0.03125 to 1.96875 in 1/32 increments at Vin=Vin_Max + uint8_t strength_lookup[IVRM_ARRAY_SIZE]; // Each entry is a six bit value, right justified + + /// Scaling factor for the Vin_Adder calculation. + uint8_t vin_multiplier[IVRM_ARRAY_SIZE]; // Each entry is from 0 to 255. + + /// Vin_Max used in Vin_Adder calculation (in millivolts) + uint16_t vin_max_mv; + + /// Delay between steps (in nanoseconds) + /// Maximum delay: 65.536us + uint16_t step_delay_ns; + + /// Stabilization delay once target voltage has been reached (in nanoseconds) + /// Maximum delay: 65.536us + uint16_t stablization_delay_ns; + + /// Deadzone (in millivolts) + /// Maximum: 255mV. If this value is 0, 50mV is assumed. + uint8_t deadzone_mv; + + /// Pad to 8B + uint8_t pad; + +} IvrmParmBlock; + +typedef uint8_t CompareVIDPoints; + +/// The layout of the data created by the Pstate table creation firmware for +/// comsumption by the CME Quad Manager. This data will reside in the Core +/// Power Management Region (CPMR). +/// +typedef struct +{ + + /// Magic Number + uint64_t magic; // the last byte of this number the structure's version. + + // QM Flags + QuadManagerFlags qmflags; + + /// Operating points + /// + /// VPD operating points are stored without load-line correction. Frequencies + /// are in MHz, voltages are specified in units of 5mV, and currents are + /// in units of 500mA. + VpdOperatingPoint operating_points[NUM_OP_POINTS]; + + /// Loadlines and Distribution values for the VDD rail + SysPowerDistParms vdd_sysparm; + + /// External Biases + /// + /// Biases applied to the VPD operating points prior to load-line correction + /// in setting the external voltages. This is used to recompute the Vin voltage + /// based on the Global Actual Pstate . + /// Values in 0.5% + VpdBias ext_biases[NUM_OP_POINTS]; + + /// Internal Biases + /// + /// Biases applied to the VPD operating points that are used for interpolation + /// in setting the internal voltages (eg Vout to the iVRMs) as part of the + /// Local Actual Pstate. + /// Values in 0.5% + VpdBias int_biases[NUM_OP_POINTS]; + + /// IVRM Data + IvrmParmBlock ivrm; + + /// Resonant Clock Grid Management Setup + ResonantClockingSetup resclk; + + /// VDM Data + LP_VDMParmBlock vdm; + + /// DPLL pstate 0 value + uint32_t dpll_pstate0_value; + + // Biased Compare VID operating points + CompareVIDPoints vid_point_set[NUM_OP_POINTS]; + + // Biased Threshold operation points + uint8_t threshold_set[NUM_OP_POINTS][NUM_THRESHOLD_POINTS]; + + //pstate-volt compare slopes + int16_t PsVIDCompSlopes[VPD_NUM_SLOPES_REGION]; + + //pstate-volt threshold slopes + int16_t PsVDMThreshSlopes[VPD_NUM_SLOPES_REGION][NUM_THRESHOLD_POINTS]; + + //Jump value operating points + uint8_t jump_value_set[NUM_OP_POINTS][NUM_JUMP_VALUES]; + + //Jump-value slopes + int16_t PsVDMJumpSlopes[VPD_NUM_SLOPES_REGION][NUM_JUMP_VALUES]; + +} LocalPstateParmBlock; + +#ifdef __cplusplus +} // end extern C +#endif +#endif /* __ASSEMBLER__ */ +#endif /* __P9_PSTATES_CME_H__ */ diff --git a/src/vendorcode/ibm/power9/pstates/p9_pstates_common.h b/src/vendorcode/ibm/power9/pstates/p9_pstates_common.h new file mode 100644 index 00000000000..ffee05153de --- /dev/null +++ b/src/vendorcode/ibm/power9/pstates/p9_pstates_common.h @@ -0,0 +1,435 @@ +/* IBM_PROLOG_BEGIN_TAG */ +/* This is an automatically generated prolog. */ +/* */ +/* $Source: src/import/chips/p9/procedures/hwp/lib/p9_pstates_common.h $ */ +/* */ +/* OpenPOWER HostBoot Project */ +/* */ +/* Contributors Listed Below - COPYRIGHT 2015,2019 */ +/* [+] International Business Machines Corp. */ +/* */ +/* */ +/* Licensed under the Apache License, Version 2.0 (the "License"); */ +/* you may not use this file except in compliance with the License. */ +/* You may obtain a copy of the License at */ +/* */ +/* http://www.apache.org/licenses/LICENSE-2.0 */ +/* */ +/* Unless required by applicable law or agreed to in writing, software */ +/* distributed under the License is distributed on an "AS IS" BASIS, */ +/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */ +/* implied. See the License for the specific language governing */ +/* permissions and limitations under the License. */ +/* */ +/* IBM_PROLOG_END_TAG */ +/// @file p9_pstates_common.h +/// @brief Common Pstate definitions +/// +// *HWP HW Owner : Rahul Batra +// *HWP HW Owner : Michael Floyd +// *HWP Team : PM +// *HWP Level : 1 +// *HWP Consumed by : PGPE:CME:HB:OCC + + +#ifndef __P9_PSTATES_COMMON_H__ +#define __P9_PSTATES_COMMON_H__ + +/// The maximum Pstate (knowing the increasing Pstates numbers represent +/// decreasing frequency) +#define PSTATE_MAX 255 + +/// The minimum Pstate (knowing the increasing Pstates numbers represent +/// decreasing frequency) +#define PSTATE_MIN 0 + + +/// Maximum number of Quads (4 cores plus associated caches) +#define MAXIMUM_QUADS 6 + +// Constants associated with VRM stepping +// @todo Determine what is needed here (eg Attribute mapping) and if any constants +// are warrented + +/// VPD #V Data from keyword (eg VPD order) + +#define NUM_JUMP_VALUES 4 +#define NUM_THRESHOLD_POINTS 4 + +// @todo RTC 181607 +// This is synchronization work-around to avoid a co-req update between CME Hcode +// and the Pstate Parameter Block. The CME uses "IDX" while these use "INDEX". +// In the future, these should be common between the two platforms. +// +// As this file is included in both platforms, the definition below can be used +// in the CME Hcode and the "IDX" versions deprecated once this file version +// is included in both platforms. +#ifndef __ASSEMBLER__ +typedef enum +{ + VDM_OVERVOLT_INDEX = 0, + VDM_SMALL_INDEX = 1, + VDM_LARGE_INDEX = 2, + VDM_XTREME_INDEX = 3 +} VDM_THRESHOLD_INDEX; + +typedef enum +{ + VDM_N_S_INDEX = 0, + VDM_N_L_INDEX = 1, + VDM_L_S_INDEX = 2, + VDM_S_N_INDEX = 3 +} VDM_JUMP_VALUE_INDEX; +#endif + +#define NUM_OP_POINTS 4 +#define VPD_PV_POWERSAVE 1 +#define VPD_PV_NOMINAL 0 +#define VPD_PV_TURBO 2 +#define VPD_PV_ULTRA 3 +#define VPD_PV_POWERBUS 4 + +#define VPD_PV_ORDER {VPD_PV_POWERSAVE, VPD_PV_NOMINAL, VPD_PV_TURBO, VPD_PV_ULTRA} +#define VPD_PV_ORDER_STR {"Nominal ","PowerSave ", "Turbo ", "UltraTurbo"} +#define VPD_THRESHOLD_ORDER_STR {"Overvolt", "Small", "Large", "Extreme" } + +/// VPD #V Operating Points (eg Natural order) +#define POWERSAVE 0 +#define NOMINAL 1 +#define TURBO 2 +#define ULTRA 3 +#define POWERBUS 4 +#define PV_OP_ORDER {POWERSAVE, NOMINAL, TURBO, ULTRA} +#define PV_OP_ORDER_STR {"PowerSave ", "Nominal ","Turbo ", "UltraTurbo"} + +#define VPD_PV_CORE_FREQ_MHZ 0 +#define VPD_PV_VDD_MV 1 +#define VPD_PV_IDD_100MA 2 +#define VPD_PV_VCS_MV 3 +#define VPD_PV_ICS_100MA 4 +#define VPD_PV_PB_FREQ_MHZ 0 +#define VPD_PV_VDN_MV 1 +#define VPD_PV_IDN_100MA 2 + +#define VPD_NUM_SLOPES_REGION 3 +#define REGION_POWERSAVE_NOMINAL 0 +#define REGION_NOMINAL_TURBO 1 +#define REGION_TURBO_ULTRA 2 +#define VPD_OP_SLOPES_REGION_ORDER {REGION_POWERSAVE_NOMINAL,REGION_NOMINAL_TURBO,REGION_TURBO_ULTRA} +#define VPD_OP_SLOPES_REGION_ORDER_STR {"POWERSAVE_NOMINAL", "NOMINAL_TURBO ","TURBO_ULTRA "} + +// Different points considered for calculating slopes +#define NUM_VPD_PTS_SET 4 +#define VPD_PT_SET_RAW 0 +#define VPD_PT_SET_SYSP 1 +#define VPD_PT_SET_BIASED 2 +#define VPD_PT_SET_BIASED_SYSP 3 +#define VPD_PT_SET_ORDER {VPD_PT_SET_RAW, VPD_PT_SET_SYSP, VPD_PT_SET_BIASED, VPD_PT_SET_BIASED_SYSP} +#define VPD_PT_SET_ORDER_STR {"Raw", "SysParam","Biased", "Biased/SysParam"} + +#define VID_SLOPE_FP_SHIFT 13 //TODO: Remove this. RTC 174743 +#define VID_SLOPE_FP_SHIFT_12 12 +#define THRESH_SLOPE_FP_SHIFT 12 + +// 0 = PowerSave, 1 = Nominal; 2 = Turbo; 3 = UltraTurbo; 4 = Enable +#define VDM_DROOP_OP_POINTS 5 + + +#define PSTATE_LT_PSTATE_MIN 0x00778a03 +#define PSTATE_GT_PSTATE_MAX 0x00778a04 +#define ACTIVE_QUADS 6 + +/// IDDQ readings, +#define IDDQ_MEASUREMENTS 6 +#define IDDQ_ARRAY_VOLTAGES { 0.60 , 0.70 , 0.80 , 0.90 , 1.00 , 1.10} +#define IDDQ_ARRAY_VOLTAGES_STR {"0.60", "0.70", "0.80", "0.90", "1.00", "1.10"} + +/// WOF Items +#define NUM_ACTIVE_CORES 24 +#define MAX_UT_PSTATES 64 // Oversized + + + +#ifndef __ASSEMBLER__ +#ifdef __cplusplus +extern "C" { +#endif + +/// A Pstate type +/// +/// Pstates are unsigned but, to avoid bugs, Pstate register fields should +/// always be extracted to a variable of type Pstate. If the size of Pstate +/// variables ever changes we will have to revisit this convention. +typedef uint8_t Pstate; + +/// A DPLL frequency code +/// +/// DPLL frequency codes (Fmax and Fmult) are 15 bits +typedef uint16_t DpllCode; + +/// An AVS VID code +typedef uint16_t VidAVS; + +/// A VPD operating point +/// +/// VPD operating points are stored without load-line correction. Frequencies +/// are in MHz, voltages are specified in units of 1mV, and characterization +/// currents are specified in units of 100mA. +/// +typedef struct +{ + uint32_t vdd_mv; + uint32_t vcs_mv; + uint32_t idd_100ma; + uint32_t ics_100ma; + uint32_t frequency_mhz; + uint8_t pstate; // Pstate of this VpdOperating + uint8_t pad[3]; // Alignment padding +} VpdOperatingPoint; + +//Defined same as #V vpd points used to validate +typedef struct +{ + uint32_t frequency_mhz; + uint32_t vdd_mv; + uint32_t idd_100ma; + uint32_t vcs_mv; + uint32_t ics_100ma; +} VpdPoint; +/// VPD Biases. +/// +/// Percent bias applied to VPD operating points prior to interolation +/// +/// All values on in .5 percent (half percent -> hp) +typedef struct +{ + + int8_t vdd_ext_hp; + int8_t vdd_int_hp; + int8_t vdn_ext_hp; + int8_t vcs_ext_hp; + int8_t frequency_hp; + +} VpdBias; + +/// System Power Distribution Paramenters +/// +/// Parameters set by system design that influence the power distribution +/// for a rail to the processor module. This values are typically set in the +/// system machine readable workbook and are used in the generation of the +/// Global Pstate Table. This values are carried in the Pstate SuperStructure +/// for use and/or reference by OCC firmware (eg the WOF algorithm) + +typedef struct +{ + + /// Loadline + /// Impedance (binary microOhms) of the load line from a processor VDD VRM + /// to the Processor Module pins. + uint32_t loadline_uohm; + + /// Distribution Loss + /// Impedance (binary in microOhms) of the VDD distribution loss sense point + /// to the circuit. + uint32_t distloss_uohm; + + /// Distribution Offset + /// Offset voltage (binary in microvolts) to apply to the rail VRM + /// distribution to the processor module. + uint32_t distoffset_uv; + +} SysPowerDistParms; + +/// AVSBUS Topology +/// +/// AVS Bus and Rail numbers for VDD, VDN, VCS, and VIO +/// +typedef struct +{ + uint8_t vdd_avsbus_num; + uint8_t vdd_avsbus_rail; + uint8_t vdn_avsbus_num; + uint8_t vdn_avsbus_rail; + uint8_t vcs_avsbus_num; + uint8_t vcs_avsbus_rail; + uint8_t vio_avsbus_num; + uint8_t vio_avsbus_rail; +} AvsBusTopology_t; + +// +// WOF Voltage, Frequency Ratio Tables +// +//VFRT calculation part +#define SYSTEM_VERSION_FRQUENCY(VFRT) (1000 + (16.67 * VFRT)) +#define SYSTEM_VFRT_VALUE(FREQ) ((FREQ - 1000)/16.67) + + +#define HOMER_VFRT_VALUE(FREQ,BSF) ((BSF - FREQ)/16.67) +#define HOMER_VERSION_FREQUENCY(VFRT,BSF) (BSF - (16.67 * VFRT)) + + +//VFRT Header fields +typedef struct __attribute__((packed)) VFRTHeaderLayout +{ + // VFRT Magic code "VT" + uint16_t magic_number; + + uint16_t reserved; + // 0:System type, 1:Homer type (0:3) + // if version 1: VFRT size is 12 row(voltage) X 11 column(freq) of size uint8_t + // (4:7) + // if version 2: VFRT size is 24 row(Voltage) X 5 column (Freq) of size uint8_t + uint8_t type_version; + //Identifies the Vdn assumptions tht went in this VFRT (0:7) + uint8_t res_vdnId; + //Identifies the Vdd assumptions tht went in this VFRT (0:7) + uint8_t VddId_QAId; + //Identifies the Quad Active assumptions tht went in this VFRT (5:7) + uint8_t rsvd_QAId; +} VFRTHeaderLayout_t;// WOF Tables Header + +typedef enum +{ + WOF_MODE_UNKNOWN = 0, + WOF_MODE_NOMINAL = 1, + WOF_MODE_TURBO = 2 +} WOF_MODE; + +typedef struct __attribute__((packed, aligned(128))) WofTablesHeader +{ + + /// Magic Number + /// Set to ASCII "WFTH___x" where x is the version of the VFRT structure + uint32_t magic_number; + + /// Reserved version + /// version 1 - mode is reserved (0) + /// version 2 - mode is SET to 1 or 2 + union + { + uint32_t reserved_version; + struct + { + unsigned reserved_bits: 20; + unsigned mode: 4; /// new to version 2 (1 = Nominal, 2 = Turbo) + uint8_t version; + } PACKED; + }; + + /// VFRT Block Size + /// Length, in bytes, of a VFRT + uint16_t vfrt_block_size; + + /// VFRT block header size + uint16_t vfrt_block_header_size; + + /// VFRT Data Size + /// Length, in bytes, of the data field. + uint16_t vfrt_data_size; + + /// Quad Active Size + /// Total number of Active Quads + uint8_t quads_active_size; + + /// Core count + uint8_t core_count; + + /// Ceff Vdn Start + /// CeffVdn value represented by index 0 (in 0.01%) + uint16_t vdn_start; + + /// Ceff Vdn Step + /// CeffVdn step value for each CeffVdn index (in 0.01%) + uint16_t vdn_step; + + /// Ceff Vdn Size + /// Number of CeffVdn indexes + uint16_t vdn_size; + + /// Ceff Vdd Start + /// CeffVdd value represented by index 0 (in 0.01%) + uint16_t vdd_start; + + /// Ceff Vdd Step + /// CeffVdd step value for each CeffVdd index (in 0.01%) + uint16_t vdd_step; + + /// Ceff Vdd Size + /// Number of CeffVdd indexes + uint16_t vdd_size; + + /// Vratio Start + /// Vratio value represented by index 0 (in 0.01%) + uint16_t vratio_start; + + /// Vratio Step + /// Vratio step value for each CeffVdd index (in 0.01%) + uint16_t vratio_step; + + /// Vratio Size + /// Number of Vratio indexes + uint16_t vratio_size; + + /// Fratio Start + /// Fratio value represented by index 0 (in 0.01%) + uint16_t fratio_start; + + /// Fratio Step + /// Fratio step value for each CeffVdd index (in 0.01%) + uint16_t fratio_step; + + /// Fratio Size + /// Number of Fratio indexes + uint16_t fratio_size; + + /// Future usage + uint16_t Vdn_percent[8]; + + /// Socket Power (in Watts) for the WOF Tables + uint16_t socket_power_w; + + /// Nest Frequency (in MHz) used in building the WOF Tables + uint16_t nest_frequency_mhz; + + /// Core Sort Power Target Frequency (in MHz) - The #V frequency associated + /// with the sort power target for this table set. This will be either the + /// Nominal or Turbo #V frequency + uint16_t sort_power_freq_mhz; + + /// Regulator Design Point Capacity (in Amps) + uint16_t rdp_capacity; + + /// Up to 8 ASCII characters to be defined by the Table generation team to + /// back reference table sources + char wof_table_source_tag[8]; + + /// Up to 16 ASCII characters as a Package designator + char package_name[16]; + + // Padding to 128B is left to the compiler via the following attribute. + +} WofTablesHeader_t; + + +// Data is provided in 1/24ths granularity with adjustments for integer +// representation +#define VFRT_VRATIO_SIZE 24 + +// 5 steps down from 100% is Fratio_step sizes +#define VFRT_FRATIO_SIZE 5 + + +// HOMER VFRT Layout +typedef struct __attribute__((packed, aligned(256))) HomerVFRTLayout +{ + VFRTHeaderLayout_t vfrtHeader; + uint8_t vfrt_data[VFRT_FRATIO_SIZE][VFRT_VRATIO_SIZE]; + uint8_t padding[128]; +} HomerVFRTLayout_t; + + +#ifdef __cplusplus +} // end extern C +#endif +#endif /* __ASSEMBLER__ */ +#endif /* __P9_PSTATES_COMMON_H__ */ diff --git a/src/vendorcode/ibm/power9/pstates/p9_pstates_occ.h b/src/vendorcode/ibm/power9/pstates/p9_pstates_occ.h new file mode 100644 index 00000000000..a523bf7dc68 --- /dev/null +++ b/src/vendorcode/ibm/power9/pstates/p9_pstates_occ.h @@ -0,0 +1,216 @@ +/* IBM_PROLOG_BEGIN_TAG */ +/* This is an automatically generated prolog. */ +/* */ +/* $Source: src/import/chips/p9/procedures/hwp/lib/p9_pstates_occ.h $ */ +/* */ +/* OpenPOWER HostBoot Project */ +/* */ +/* Contributors Listed Below - COPYRIGHT 2015,2019 */ +/* [+] International Business Machines Corp. */ +/* */ +/* */ +/* Licensed under the Apache License, Version 2.0 (the "License"); */ +/* you may not use this file except in compliance with the License. */ +/* You may obtain a copy of the License at */ +/* */ +/* http://www.apache.org/licenses/LICENSE-2.0 */ +/* */ +/* Unless required by applicable law or agreed to in writing, software */ +/* distributed under the License is distributed on an "AS IS" BASIS, */ +/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */ +/* implied. See the License for the specific language governing */ +/* permissions and limitations under the License. */ +/* */ +/* IBM_PROLOG_END_TAG */ +/// @file p9_pstates.h +/// @brief Pstate structures and support routines for OCC product firmware +/// +// *HWP HW Owner : Greg Still +// *HWP HW Owner : Michael Floyd +// *HWP FW Owner : Martha Broyles +// *HWP Team : PM +// *HWP Level : 1 +// *HWP Consumed by : HB:OCC + +#ifndef __P9_PSTATES_OCC_H__ +#define __P9_PSTATES_OCC_H__ + +#include "p9_pstates_common.h" +#include "p9_pstates_pgpe.h" + +#ifndef __ASSEMBLER__ +#ifdef __cplusplus +extern "C" { +#endif + +/// PstateParmsBlock Magic Number +/// +/// This magic number identifies a particular version of the +/// PstateParmsBlock and its substructures. The version number should be +/// kept up to date as changes are made to the layout or contents of the +/// structure. + +#define OCC_PARMSBLOCK_MAGIC 0x4f43435050423030ull /* OCCPPB00 */ + +/// IDDQ Reading Type +/// Each entry is 2 bytes. The values are in 6.25mA units; this allow for a +/// maximum value of 409.6A to be represented. +/// +typedef uint16_t iddq_entry_t; + +/// AvgTemp Reading Type +/// Each entry is 1 byte. The values are in 0.5degC units; this allow for a +/// maximum value of 127degC to be represented. +/// +typedef uint8_t avgtemp_entry_t; + +/// Iddq Table +/// +/// A set of arrays of leakage values (Iddq) collected at various voltage +/// conditions during manufacturing test that will feed into the Workload +/// Optimized Frequency algorithms on the OCC. These values are not installed +/// in any hardware facilities. +/// +typedef struct +{ + + /// IDDQ version + uint8_t iddq_version; + + /// Good Quads per Sort + uint8_t good_quads_per_sort; + + /// Good Normal Cores per Sort + uint8_t good_normal_cores_per_sort; + + /// Good Caches per Sort + uint8_t good_caches_per_sort; + + /// Good Normal Cores + uint8_t good_normal_cores[MAXIMUM_QUADS]; + + /// Good Caches + uint8_t good_caches[MAXIMUM_QUADS]; + + /// RDP to TDP Scaling Factor in 0.01% units + uint16_t rdp_to_tdp_scale_factor; + + /// WOF Iddq Margin (aging factor) in 0.01% units + uint16_t wof_iddq_margin_factor; + + /// VDD Temperature Scale Factor per 10C in 0.01% units + uint16_t vdd_temperature_scale_factor; + + /// VDN Temperature Scale Factor per 10C in 0.01% units + uint16_t vdn_temperature_scale_factor; + + /// Spare + uint8_t spare[8]; + + /// IVDD ALL Good Cores ON; 5mA units + iddq_entry_t ivdd_all_good_cores_on_caches_on[IDDQ_MEASUREMENTS]; + + /// IVDD ALL Cores OFF; 5mA units + iddq_entry_t ivdd_all_cores_off_caches_off[IDDQ_MEASUREMENTS]; + + /// IVDD ALL Good Cores OFF; 5mA units + iddq_entry_t ivdd_all_good_cores_off_good_caches_on[IDDQ_MEASUREMENTS]; + + /// IVDD Quad 0 Good Cores ON, Caches ON; 5mA units + iddq_entry_t ivdd_quad_good_cores_on_good_caches_on[MAXIMUM_QUADS][IDDQ_MEASUREMENTS]; + + /// IVDDN; 5mA units + iddq_entry_t ivdn[IDDQ_MEASUREMENTS]; + + /// IVDD ALL Good Cores ON, Caches ON; 0.5C units + avgtemp_entry_t avgtemp_all_good_cores_on[IDDQ_MEASUREMENTS]; + + /// avgtemp ALL Cores OFF, Caches OFF; 0.5C units + avgtemp_entry_t avgtemp_all_cores_off_caches_off[IDDQ_MEASUREMENTS]; + + /// avgtemp ALL Good Cores OFF, Caches ON; 0.5C units + avgtemp_entry_t avgtemp_all_good_cores_off[IDDQ_MEASUREMENTS]; + + /// avgtemp Quad 0 Good Cores ON, Caches ON; 0.5C units + avgtemp_entry_t avgtemp_quad_good_cores_on[MAXIMUM_QUADS][IDDQ_MEASUREMENTS]; + + /// avgtempN; 0.5C units + avgtemp_entry_t avgtemp_vdn[IDDQ_MEASUREMENTS]; + + /// spare (per MVPD documentation + /// + /// NOTE: The MVPD documentation defines 43 spare bytes to lead to a 255B structure. However, + /// some consuming code already assumed a 250B structure and the correction of this size was disruptive. + /// This is not a problem until the IQ keyword actually defines these bytes at which time a keyword + /// version update will be need. Thus, this structure will remain at 250B. + uint8_t spare_1[38]; +} IddqTable; + + + +/// The layout of the data created by the Pstate table creation firmware for +/// comsumption by the OCC firmware. This data will reside in the Quad +/// Power Management Region (QPMR). +/// +/// This structure is aligned to 128B to allow for easy downloading using the +/// OCC block copy engine +/// +typedef struct +{ + + /// Magic Number + uint64_t magic; // the last byte of this number the structure's version. + + /// Operating points + /// + /// VPD operating points are stored without load-line correction. Frequencies + /// are in MHz, voltages are specified in units of 5mV, and currents are + /// in units of 500mA. + VpdOperatingPoint operating_points[NUM_OP_POINTS]; + + /// Loadlines and Distribution values for the VDD rail + SysPowerDistParms vdd_sysparm; + + /// Loadlines and Distribution values for the VDN rail + SysPowerDistParms vdn_sysparm; + + /// Loadlines and Distribution values for the VCS rail + SysPowerDistParms vcs_sysparm; + + /// Iddq Table + IddqTable iddq; + + /// WOF Controls + WOFElements wof; + + // Frequency Limits + uint32_t frequency_min_khz; // Comes from Safe Mode computation + uint32_t frequency_max_khz; // Comes from UltraTurbo #V point after biases + uint32_t frequency_step_khz; // Comes from refclk/dpll_divider attributes. + + // Minimum Pstate; Maximum is always 0. + uint32_t pstate_min; // Pstate reflecting frequency_min_khz + + /// Nest frequency in Mhz. This is used by FIT interrupt + uint32_t nest_frequency_mhz; + + //Nest leakage percentage used to calculate the Core leakage + uint16_t nest_leakage_percent; + + uint16_t ceff_tdp_vdn; + + // AC tdp vdd turbo + uint16_t lac_tdp_vdd_turbo_10ma; + + // AC tdp vdd nominal + uint16_t lac_tdp_vdd_nominal_10ma; + + AvsBusTopology_t avs_bus_topology; + +} __attribute__((aligned(128))) OCCPstateParmBlock; + +#ifdef __cplusplus +} // end extern C +#endif +#endif /* __ASSEMBLER__ */ +#endif /* __P9_PSTATES_OCC_H__ */ diff --git a/src/vendorcode/ibm/power9/pstates/p9_pstates_pgpe.h b/src/vendorcode/ibm/power9/pstates/p9_pstates_pgpe.h new file mode 100644 index 00000000000..1f22bf6b6a8 --- /dev/null +++ b/src/vendorcode/ibm/power9/pstates/p9_pstates_pgpe.h @@ -0,0 +1,367 @@ +/* IBM_PROLOG_BEGIN_TAG */ +/* This is an automatically generated prolog. */ +/* */ +/* $Source: src/import/chips/p9/procedures/hwp/lib/p9_pstates_pgpe.h $ */ +/* */ +/* OpenPOWER HostBoot Project */ +/* */ +/* Contributors Listed Below - COPYRIGHT 2015,2019 */ +/* [+] International Business Machines Corp. */ +/* */ +/* */ +/* Licensed under the Apache License, Version 2.0 (the "License"); */ +/* you may not use this file except in compliance with the License. */ +/* You may obtain a copy of the License at */ +/* */ +/* http://www.apache.org/licenses/LICENSE-2.0 */ +/* */ +/* Unless required by applicable law or agreed to in writing, software */ +/* distributed under the License is distributed on an "AS IS" BASIS, */ +/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */ +/* implied. See the License for the specific language governing */ +/* permissions and limitations under the License. */ +/* */ +/* IBM_PROLOG_END_TAG */ +/// @file p9_pstates_pgpe.h +/// @brief Pstate structures and support routines for PGPE Hcode +/// +// *HWP HW Owner : Rahul Batra +// *HWP HW Owner : Michael Floyd +// *HWP Team : PM +// *HWP Level : 1 +// *HWP Consumed by : PGPE:HS + + +#ifndef __P9_PSTATES_PGPE_H__ +#define __P9_PSTATES_PGPE_H__ + +#include "p9_pstates_common.h" +#include "p9_pstates_cmeqm.h" + +/// PstateParmsBlock Magic Number +/// +/// This magic number identifies a particular version of the +/// PstateParmsBlock and its substructures. The version number should be +/// kept up to date as changes are made to the layout or contents of the +/// structure. + +#define PSTATE_PARMSBLOCK_MAGIC 0x5053544154453030ull /* PSTATE00 */ + +#ifndef __ASSEMBLER__ +#ifdef __cplusplus +extern "C" { +#endif + +#include + + +/// Pad repurpose structure +typedef union +{ + uint32_t value; + struct + { + // Reserve 3 bytes + uint16_t reserved16; + uint8_t reserved8; + + // The following is used by PGPE for the WOF algorithm that computes + // vratio. The placement here is, frankly, a bit of a hack but is + // done to allievate cross-platform dependencies by not changing the + // overall size of the Global Paramter Block structure. In the future, + // this field should be moved into the base Global Paramter Block + // structure. + uint8_t good_cores_in_sort; + } fields; +} GPPBOptionsPadUse; + +/// Standard options controlling Pstate setup and installation procedures +typedef struct +{ + + /// Option flags; See \ref pstate_options + uint32_t options; + + /// Pad structure to 8 bytes. Could also be used for other options later. + uint32_t pad; + +} PGPEOptions; + +/// UltraTurbo Segment VIDs by Core Count +typedef struct +{ + + /// Number of Segment Pstates + uint8_t ut_segment_pstates; + + /// Maximum number of core possibly active + uint8_t ut_max_cores; + + /// VDD VID modification + /// 1 core active = offset 0 + /// 2 cores active = offset 1 + /// ... + /// 12 cores active = offset 11 + uint8_t ut_segment_vdd_vid[MAX_UT_PSTATES][NUM_ACTIVE_CORES]; + + /// VCS VID modification + /// 1 core active = offset 0 + /// 2 cores active = offset 1 + /// ... + /// 12 cores active = offset 11 + uint8_t ut_segment_vcs_vid[MAX_UT_PSTATES][NUM_ACTIVE_CORES]; + +} VIDModificationTable; + +/// Workload Optimized Frequency (WOF) Elements +/// +/// Structure defining various control elements needed by the WOF algorithm +/// firmware running on the OCC. +/// +typedef struct +{ + + /// WOF Enablement + uint8_t wof_enabled; + + /// TDP<>RDP Current Factor + /// Value read from ??? VPD + /// Defines the scaling factor that converts current (amperage) value from + /// the Thermal Design Point to the Regulator Design Point (RDP) as input + /// to the Workload Optimization Frequency (WOF) OCC algorithm. + /// + /// This is a ratio value and has a granularity of 0.01 decimal. Data + /// is held in hexidecimal (eg 1.22 is represented as 122 and then converted + /// to hex 0x7A). + uint32_t tdp_rdp_factor; + + /// UltraTurbo Segment VIDs by Core Count + VIDModificationTable ut_vid_mod; + +} WOFElements; + +/// VDM/Droop Parameter Block +/// +typedef struct +{ + uint8_t vid_compare_override_mv[VDM_DROOP_OP_POINTS]; + uint8_t vdm_response; + + // For the following *_enable fields, bits are defined to indicate + // which of the respective *override* array entries are valid. + // bit 0: UltraTurbo; bit 1: Turbo; bit 2: Nominal; bit 3: PowSave + + // The respecitve *_enable above indicate which index values are valid + uint8_t droop_small_override[VDM_DROOP_OP_POINTS]; + uint8_t droop_large_override[VDM_DROOP_OP_POINTS]; + uint8_t droop_extreme_override[VDM_DROOP_OP_POINTS]; + uint8_t overvolt_override[VDM_DROOP_OP_POINTS]; + uint16_t fmin_override_khz[VDM_DROOP_OP_POINTS]; + uint16_t fmax_override_khz[VDM_DROOP_OP_POINTS]; + + /// Pad structure to 8-byte alignment + /// @todo pad once fully structure is complete. + // uint8_t pad[1]; + +} GP_VDMParmBlock; + +/// Global Pstate Parameter Block +/// +/// The GlobalPstateParameterBlock is an abstraction of a set of voltage/frequency +/// operating points along with hardware limits. Besides the hardware global +/// Pstate table, the abstract table contains enough extra information to make +/// it the self-contained source for setting up and managing voltage and +/// frequency in either Hardware or Firmware Pstate mode. +/// +/// When installed in PMC, Global Pstate table indices are adjusted such that +/// the defined Pstates begin with table entry 0. The table need not be full - +/// the \a pmin and \a entries fields define the minimum and maximum Pstates +/// represented in the table. However at least 1 entry must be defined to +/// create a legal table. +/// +/// Note that Global Pstate table structures to be mapped into PMC hardware +/// must be 1KB-aligned. This requirement is fullfilled by ensuring that +/// instances of this structure are 1KB-aligned. +typedef struct +{ + + + /// Magic Number + uint64_t magic; // the last byte of this number the structure's version. + + /// Pstate options + /// + /// The options are included as part of the GlobalPstateTable so that they + /// are available to upon PGPE initialization. + PGPEOptions options; + + /// The frequency associated with Pstate[0] in KHz + uint32_t reference_frequency_khz; + + /// The frequency step in KHz + uint32_t frequency_step_khz; + + /// Operating points + /// + /// VPD operating points are stored without load-line correction. Frequencies + /// are in MHz, voltages are specified in units of 5mV, and currents are + /// in units of 500mA. + /// \todo Remove this. RTC: 174743 + VpdOperatingPoint operating_points[NUM_OP_POINTS]; + + /// Biases + /// + /// Biases applied to the VPD operating points prior to load-line correction + /// in setting the external voltages. + /// Values in 0.5% + VpdBias ext_biases[NUM_OP_POINTS]; + + /// Loadlines and Distribution values for the VDD rail + SysPowerDistParms vdd_sysparm; + + /// Loadlines and Distribution values for the VCS rail + SysPowerDistParms vcs_sysparm; + + /// Loadlines and Distribution values for the VDN rail + SysPowerDistParms vdn_sysparm; + + /// The "Safe" Voltage + /// + /// A voltage to be used when safe-mode is activated + uint32_t safe_voltage_mv; + + /// The "Safe" Frequency + /// + /// A voltage to be used when safe-mode is activated + uint32_t safe_frequency_khz; + + /// The exponent of the exponential encoding of Pstate stepping delay + uint8_t vrm_stepdelay_range; + + /// The significand of the exponential encoding of Pstate stepping delay + uint8_t vrm_stepdelay_value; + + /// VDM Data + GP_VDMParmBlock vdm; + + /// The following are needed to generated the Pstate Table to HOMER. + + /// Internal Biases + /// + /// Biases applied to the VPD operating points that are used for interpolation + /// in setting the internal voltages (eg Vout to the iVRMs) as part of the + /// Local Actual Pstate. + /// Values in 0.5% + VpdBias int_biases[NUM_OP_POINTS]; + + /// IVRM Data + IvrmParmBlock ivrm; + + /// Resonant Clock Grid Management Setup + ResonantClockingSetup resclk; + + /// Time b/w ext VRM detects write voltage cmd and when voltage begins to move + uint32_t ext_vrm_transition_start_ns; + + /// Transition rate for an increasing VDD voltage excursion + uint32_t ext_vrm_transition_rate_inc_uv_per_us; + + /// Transition rate for an decreasing VDD voltage excursion + uint32_t ext_vrm_transition_rate_dec_uv_per_us; + + /// Delay to account for VDD rail setting + uint32_t ext_vrm_stabilization_time_us; + + /// External VRM transition step size + uint32_t ext_vrm_step_size_mv; + + /// Nest frequency in Mhz. This is used by FIT interrupt + uint32_t nest_frequency_mhz; + + //Maximum performance loss threshold when undervolting(in 0.1%, tenths of percent) + uint8_t wov_underv_perf_loss_thresh_pct; + + //WOV undervolting increment percentage(in 0.1%, tenths of percent) + uint8_t wov_underv_step_incr_pct; + + //WOV undervolting decrement percentage(in 0.1%, tenths of percent) + uint8_t wov_underv_step_decr_pct; + + //WOV undervolting max percentage(in 0.1%, tenths of percent) + uint8_t wov_underv_max_pct; + + //When undervolting, if this value is non-zero, then voltage will never be set + //below this value. If it is zero, then the minimum voltage is only bounded by + //wov_underv_max_pct. + uint16_t wov_underv_vmin_mv; + + //When overvolting, then voltage will never be set above this value + uint16_t wov_overv_vmax_mv; + + //WOV overvolting increment percentage(in 0.1%, tenths of percent) + uint8_t wov_overv_step_incr_pct; + + //WOV overvolting decrement percentage(in 0.1%, tenths of percent) + uint8_t wov_overv_step_decr_pct; + + //WOV overvolting max percentage(in 0.1%, tenths of percent) + uint8_t wov_overv_max_pct; + + uint8_t pad; + + //Determine how often to call the wov algorithm with respect + //to PGPE FIT ticks + uint32_t wov_sample_125us; + + //Maximum performance loss(in 0.1%, tenths of percent). We should never be at + //this level, but we check using this value inside PGPE to make sure that this + //is reported if it ever happens + uint32_t wov_max_droop_pct; + + uint32_t pad1; + + /// All operating points + VpdOperatingPoint operating_points_set[NUM_VPD_PTS_SET][NUM_OP_POINTS]; + + //DPLL pstate 0 value + uint32_t dpll_pstate0_value; + + /// Precalculated Pstate-Voltage Slopes + uint16_t PStateVSlopes[NUM_VPD_PTS_SET][VPD_NUM_SLOPES_REGION]; + + /// Precalculated Voltage-Pstates Slopes + uint16_t VPStateSlopes[NUM_VPD_PTS_SET][VPD_NUM_SLOPES_REGION]; + + // Biased Compare VID operating points + CompareVIDPoints vid_point_set[NUM_OP_POINTS]; + + // Biased Threshold operation points + uint8_t threshold_set[NUM_OP_POINTS][NUM_THRESHOLD_POINTS]; + + //pstate-volt compare slopes + int16_t PsVIDCompSlopes[VPD_NUM_SLOPES_REGION]; + + //pstate-volt threshold slopes + int16_t PsVDMThreshSlopes[VPD_NUM_SLOPES_REGION][NUM_THRESHOLD_POINTS]; + + //Jump value operating points + uint8_t jump_value_set[NUM_OP_POINTS][NUM_JUMP_VALUES]; + + //Jump-value slopes + int16_t PsVDMJumpSlopes[VPD_NUM_SLOPES_REGION][NUM_JUMP_VALUES]; + + uint8_t pad2[2]; + + //AvsBusTopology + AvsBusTopology_t avs_bus_topology; + + // @todo DPLL Droop Settings. These need communication to SGPE for STOP + +} __attribute__((packed, aligned(1024))) GlobalPstateParmBlock; + + +#ifdef __cplusplus +} // end extern C +#endif +#endif /* __ASSEMBLER__ */ +#endif /* __P9_PSTATES_PGPE_H__ */ From d4a0a9b1f5189594c8218a8084464daf07d64d1d Mon Sep 17 00:00:00 2001 From: Krystian Hebel Date: Thu, 12 Aug 2021 19:27:30 +0200 Subject: [PATCH 072/213] soc/power9/pstates.c: start filling PPMR in HOMER Signed-off-by: Krystian Hebel Change-Id: I8a39f0104a48508e934c7d849e83a468b50686f1 --- src/soc/ibm/power9/Makefile.inc | 1 + src/soc/ibm/power9/homer.c | 13 +- src/soc/ibm/power9/homer.h | 7 + src/soc/ibm/power9/pstates.c | 873 ++++++++++++++++++++++++++++++++ 4 files changed, 887 insertions(+), 7 deletions(-) create mode 100644 src/soc/ibm/power9/pstates.c diff --git a/src/soc/ibm/power9/Makefile.inc b/src/soc/ibm/power9/Makefile.inc index 7466971c1c3..1da693c2efc 100644 --- a/src/soc/ibm/power9/Makefile.inc +++ b/src/soc/ibm/power9/Makefile.inc @@ -35,5 +35,6 @@ ramstage-y += mvpd.c ramstage-y += vpd.c ramstage-y += tor.c ramstage-y += powerbus.c +ramstage-y += pstates.c endif diff --git a/src/soc/ibm/power9/homer.c b/src/soc/ibm/power9/homer.c index d545181afc3..b8a7166e6fa 100644 --- a/src/soc/ibm/power9/homer.c +++ b/src/soc/ibm/power9/homer.c @@ -9,6 +9,8 @@ #include #include // memset, memcpy #include +#include +#include #include "chip.h" #include "homer.h" @@ -705,10 +707,6 @@ static uint64_t get_available_cores(int *me) return ret; } -#define IS_EC_FUNCTIONAL(ec, cores) (!!((cores) & PPC_BIT(ec))) -#define IS_EX_FUNCTIONAL(ex, cores) (!!((cores) & PPC_BITMASK(2*(ex), 2*(ex) + 1))) -#define IS_EQ_FUNCTIONAL(eq, cores) (!!((cores) & PPC_BITMASK(4*(eq), 4*(eq) + 3))) - /* TODO: similar is used in 13.3. Add missing parameters and make it public? */ static void psu_command(uint8_t flags, long time) { @@ -1014,8 +1012,8 @@ static void layout_inst_rings_for_cme(struct homer_st *homer, } for (ex = 0; ex < MAX_CMES_PER_CHIP; ++ex) { - // TODO: update with sizeof(LocalPstateParmBlock) when it's defined - const uint32_t ex_offset = ex * (max_ex_len + ALIGN_UP(616, 32)); + const uint32_t ex_offset = + ex * (max_ex_len + ALIGN_UP(sizeof(LocalPstateParmBlock), 32)); uint8_t *start = &homer->cpmr.cme_sram_region[*ring_len + ex_offset]; struct cme_inst_ring_list *tmp = (void *)start; @@ -1334,7 +1332,8 @@ void build_homer_image(void *homer_bar) (struct xip_sgpe_header *)(homer_bar + hw->sgpe.offset), cores, ring_variant); - // buildParameterBlock(); + build_parameter_blocks(homer, cores); + // updateCpmrCmeRegion(); // Update QPMR Header area in HOMER diff --git a/src/soc/ibm/power9/homer.h b/src/soc/ibm/power9/homer.h index f8c0e292320..f2369c4c032 100644 --- a/src/soc/ibm/power9/homer.h +++ b/src/soc/ibm/power9/homer.h @@ -3,6 +3,7 @@ #ifndef __SOC_IBM_POWER9_HOMER_H #define __SOC_IBM_POWER9_HOMER_H +#include // PPC_BIT(), PPC_BITMASK() #include /* All fields are big-endian */ @@ -321,4 +322,10 @@ check_member(homer_st, qpmr, 1 * MiB); check_member(homer_st, cpmr, 2 * MiB); check_member(homer_st, ppmr, 3 * MiB); +#define IS_EC_FUNCTIONAL(ec, cores) (!!((cores) & PPC_BIT(ec))) +#define IS_EX_FUNCTIONAL(ex, cores) (!!((cores) & PPC_BITMASK(2*(ex), 2*(ex) + 1))) +#define IS_EQ_FUNCTIONAL(eq, cores) (!!((cores) & PPC_BITMASK(4*(eq), 4*(eq) + 3))) + +void build_parameter_blocks(struct homer_st *homer, uint64_t functional_cores); + #endif /* __SOC_IBM_POWER9_HOMER_H */ diff --git a/src/soc/ibm/power9/pstates.c b/src/soc/ibm/power9/pstates.c new file mode 100644 index 00000000000..1154a11da52 --- /dev/null +++ b/src/soc/ibm/power9/pstates.c @@ -0,0 +1,873 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include "homer.h" +#include +#include +#include +#include // memcpy +#include +#include + +#define IDDQ_MEASUREMENTS 6 +#define MAX_UT_PSTATES 64 // Oversized +#define FREQ_STEP_KHZ 16666 + +#ifndef _BIG_ENDIAN +#error "_BIG_ENDIAN not defined" +#endif + +/* Comes from p9_resclk_defines.H */ +static const int resclk_freq_mhz[] = + {0, 1500, 2000, 3000, 3400, 3700, 3900, 4100}; + +static ResonantClockingSetup resclk = +{ + { }, // pstates - filled by code + { 3, 3, 21, 23, 24, 22, 20, 19}, // idx + { + {0x2000}, {0x3000}, {0x1000}, {0x0000}, + {0x0010}, {0x0030}, {0x0020}, {0x0060}, + {0x0070}, {0x0050}, {0x0040}, {0x00C0}, + {0x00D0}, {0x00F0}, {0x00E0}, {0x00A0}, + {0x00B0}, {0x0090}, {0x0080}, {0x8080}, + {0x9080}, {0xB080}, {0xA080}, {0xE080}, + {0xF080}, {0xF080}, {0xF080}, {0xF080}, + {0xF080}, {0xF080}, {0xF080}, {0xF080}, + {0xF080}, {0xF080}, {0xF080}, {0xF080}, + {0xF080}, {0xF080}, {0xF080}, {0xF080}, + {0xF080}, {0xF080}, {0xF080}, {0xF080}, + {0xF080}, {0xF080}, {0xF080}, {0xF080}, + {0xF080}, {0xF080}, {0xF080}, {0xF080}, + {0xF080}, {0xF080}, {0xF080}, {0xF080}, + {0xF080}, {0xF080}, {0xF080}, {0xF080}, + {0xF080}, {0xF080}, {0xF080}, {0xF080} + }, // Array containing the transition steps + 0, // Delay between steps (in nanoseconds) + { 0, 1, 3, 2}, // L3 clock stepping array + 580 // L3 voltage threshold +}; + +static void copy_poundW_v2_to_v3(PoundW_data_per_quad *v3, PoundW_data *v2) +{ + memset(v3, 0, sizeof(PoundW_data_per_quad)); + + /* Copy poundW */ + for (int i = 0; i < NUM_OP_POINTS; i++) { + v3->poundw[i].ivdd_tdp_ac_current_10ma = + v2->poundw[i].ivdd_tdp_ac_current_10ma; + v3->poundw[i].ivdd_tdp_dc_current_10ma = + v2->poundw[i].ivdd_tdp_dc_current_10ma; + v3->poundw[i].vdm_overvolt_small_thresholds = + v2->poundw[i].vdm_overvolt_small_thresholds; + v3->poundw[i].vdm_large_extreme_thresholds = + v2->poundw[i].vdm_large_extreme_thresholds; + v3->poundw[i].vdm_normal_freq_drop = + v2->poundw[i].vdm_normal_freq_drop; + v3->poundw[i].vdm_normal_freq_return = + v2->poundw[i].vdm_normal_freq_return; + v3->poundw[i].vdm_vid_compare_per_quad[0] = + v2->poundw[i].vdm_vid_compare_ivid; + v3->poundw[i].vdm_vid_compare_per_quad[1] = + v2->poundw[i].vdm_vid_compare_ivid; + v3->poundw[i].vdm_vid_compare_per_quad[2] = + v2->poundw[i].vdm_vid_compare_ivid; + v3->poundw[i].vdm_vid_compare_per_quad[3] = + v2->poundw[i].vdm_vid_compare_ivid; + v3->poundw[i].vdm_vid_compare_per_quad[4] = + v2->poundw[i].vdm_vid_compare_ivid; + v3->poundw[i].vdm_vid_compare_per_quad[5] = + v2->poundw[i].vdm_vid_compare_ivid; + } + + /* Copy resistance data */ + memcpy(&v3->resistance_data, &v2->resistance_data, + sizeof(v2->resistance_data)); + + v3->resistance_data.r_undervolt_allowed = v2->undervolt_tested; +} + +static void check_valid_poundV(struct voltage_bucket_data *bucket) +{ + struct voltage_data *data = &bucket->nominal; + assert(bucket != NULL); + + for (int i = 0; i < NUM_OP_POINTS; i++) { // skip powerbus + if (data[i].freq == 0 || data[i].vdd_voltage == 0 || + data[i].idd_current == 0 || data[i].vcs_voltage == 0 || + data[i].ics_current == 0) + die("Bad #V data\n"); + } + // TODO: check if values increase with operating points +} + +static void check_valid_poundW(PoundW_data_per_quad *poundW_bucket, + uint64_t functional_cores) +{ + uint8_t prev_vid_compare_per_quad[MAXIMUM_QUADS] = {}; + /* + * TODO: If the #W version is less than 3, validate Turbo VDM large + * threshold not larger than -32mV to filter out parts that have bad VPD. + */ + + for (int op = 0; op < NUM_OP_POINTS; op++) { + /* Assuming WOF is enabled - check that TDP VDD currents are nonzero */ + if (poundW_bucket->poundw[op].ivdd_tdp_ac_current_10ma == 0 || + poundW_bucket->poundw[op].ivdd_tdp_dc_current_10ma == 0) + die("TDP VDD current equals zero\n"); + + /* Assuming VDM is enabled - validate threshold values */ + for (int quad = 0; quad < MAXIMUM_QUADS; quad++) { + if (!IS_EQ_FUNCTIONAL(quad, functional_cores)) + continue; + + if (poundW_bucket->poundw[op].vdm_vid_compare_per_quad[quad] == 0) + die("VID compare per quad is zero for quad %d\n", quad); + + if (poundW_bucket->poundw[op].vdm_vid_compare_per_quad[quad] < + prev_vid_compare_per_quad[quad]) + die("VID compare per quad is decreasing for quad %d\n", quad); + + prev_vid_compare_per_quad[quad] = + poundW_bucket->poundw[op].vdm_vid_compare_per_quad[quad]; + } + + /* For threshold to be valid... */ + if (/* overvolt threshold must be <= 7 or == 0xC */ + ((poundW_bucket->poundw[op].vdm_overvolt_small_thresholds & 0xF0) > 0x70 && + (poundW_bucket->poundw[op].vdm_overvolt_small_thresholds & 0xF0) != 0xC0) || + /* small threshold must be != 8 and != 9 */ + ((poundW_bucket->poundw[op].vdm_overvolt_small_thresholds & 0x0F) == 0x08 || + (poundW_bucket->poundw[op].vdm_overvolt_small_thresholds & 0x0F) == 0x09) || + /* large threshold must be != 8 and != 9 */ + ((poundW_bucket->poundw[op].vdm_large_extreme_thresholds & 0xF0) == 0x80 || + (poundW_bucket->poundw[op].vdm_large_extreme_thresholds & 0xF0) == 0x90) || + /* extreme threshold must be != 8 and != 9 */ + ((poundW_bucket->poundw[op].vdm_large_extreme_thresholds & 0x0F) == 0x08 || + (poundW_bucket->poundw[op].vdm_large_extreme_thresholds & 0x0F) == 0x09) || + /* N_L must be <= 7 */ + (poundW_bucket->poundw[op].vdm_normal_freq_drop & 0x0F) > 7 || + /* N_S must be <= N_L */ + (((poundW_bucket->poundw[op].vdm_normal_freq_drop & 0xF0) >> 4) > + (poundW_bucket->poundw[op].vdm_normal_freq_drop & 0x0F)) || + /* S_N must be <= N_S */ + ((poundW_bucket->poundw[op].vdm_normal_freq_return & 0x0F) > + ((poundW_bucket->poundw[op].vdm_normal_freq_drop & 0xF0) >> 4)) || + /* L_S must be <= N_L - S_N */ + (((poundW_bucket->poundw[op].vdm_normal_freq_return & 0xF0) >> 4) > + ((poundW_bucket->poundw[op].vdm_normal_freq_drop & 0x0F) - + (poundW_bucket->poundw[op].vdm_normal_freq_return & 0x0F)))) + die("Bad #W threshold values\n"); + } +} + +static void check_valid_iddq(IddqTable *iddq) +{ + if (iddq->iddq_version == 0 || + iddq->good_quads_per_sort == 0 || + iddq->good_normal_cores_per_sort == 0 || + iddq->good_caches_per_sort == 0) + die("Bad IDDQ data\n"); + + for (int i = 0; i < IDDQ_MEASUREMENTS; i++) { + if (iddq->ivdd_all_cores_off_caches_off[i] & 0x8000) + iddq->ivdd_all_cores_off_caches_off[i] = 0; + } +} + +static inline +uint32_t sysp_mv_offset(uint32_t i_100ma, SysPowerDistParms sysparams) +{ + // 100mA*uOhm/10 -> uV + return (i_100ma * (sysparams.loadline_uohm + sysparams.distloss_uohm) / 10 + + sysparams.distoffset_uv) / 1000; +} + +static const +uint8_t grey_map [] = +{ + /* 0mV 0x00*/ 0, + /* - 8mV 0x01*/ 1, + /* -24mV 0x02*/ 3, + /* -16mV 0x03*/ 2, + /* -56mV 0x04*/ 7, + /* -48mV 0x05*/ 6, + /* -32mV 0x06*/ 4, + /* -40mV 0x07*/ 5, + /* -96mV 0x08*/ 12, + /* -96mV 0x09*/ 12, + /* -96mV 0x0a*/ 12, + /* -96mV 0x0b*/ 12, + /* -64mV 0x0c*/ 8, + /* -72mV 0x0d*/ 9, + /* -88mV 0x0e*/ 11, + /* -80mV 0x0f*/ 10 +}; + +/* + * Hostboot has two versions of this function - one for unsigned values and one + * for signed. Usually we are passing smaller types, the only time uint32_t is + * passed is for 'vdd_mv'. As long as these voltages are below 2^31 mV (~2 MV) + * signed type doesn't matter. + */ +static int16_t calc_slope(int32_t y1, int32_t y0, int32_t x1, int32_t x0) +{ + int32_t half = (x1 - x0) / 2; + return (((y1 - y0) << 12) + half) / (x1 - x0); +} + +static void calculate_slopes(GlobalPstateParmBlock *gppb, + PoundW_data_per_quad *pW) +{ + VpdOperatingPoint *ops = gppb->operating_points_set[VPD_PT_SET_BIASED]; + + for (int op = 0; op < NUM_OP_POINTS; op++) { + /* + * Even though vid_point_set doesn't have space for per-quad data, + * Hostboot still writes to the same field for each functional quad. + */ + gppb->vid_point_set[op] = pW->poundw[op].vdm_vid_compare_per_quad[0]; + + gppb->threshold_set[op][VDM_OVERVOLT_INDEX] = + grey_map[(pW->poundw[op].vdm_overvolt_small_thresholds >> 4) & 0x0F]; + gppb->threshold_set[op][VDM_SMALL_INDEX] = + grey_map[pW->poundw[op].vdm_overvolt_small_thresholds & 0x0F]; + gppb->threshold_set[op][VDM_LARGE_INDEX] = + grey_map[(pW->poundw[op].vdm_large_extreme_thresholds >> 4) & 0x0F]; + gppb->threshold_set[op][VDM_XTREME_INDEX] = + grey_map[pW->poundw[op].vdm_large_extreme_thresholds & 0x0F]; + + gppb->jump_value_set[op][VDM_N_S_INDEX] = + (pW->poundw[op].vdm_normal_freq_drop >> 4) & 0x0F; + gppb->jump_value_set[op][VDM_N_L_INDEX] = + pW->poundw[op].vdm_normal_freq_drop & 0x0F; + gppb->jump_value_set[op][VDM_L_S_INDEX] = + (pW->poundw[op].vdm_normal_freq_return >> 4) & 0x0F; + gppb->jump_value_set[op][VDM_S_N_INDEX] = + pW->poundw[op].vdm_normal_freq_return & 0x0F; + } + + /* Slopes are saved in 4.12 fixed point format */ + for (int sl = 0; sl < VPD_NUM_SLOPES_REGION; sl++) { + gppb->PsVIDCompSlopes[sl] = calc_slope(gppb->vid_point_set[sl+1], + gppb->vid_point_set[sl], + ops[sl].pstate, + ops[sl+1].pstate); + + for (int i = 0; i < NUM_THRESHOLD_POINTS; i++) { + gppb->PsVDMThreshSlopes[sl][i] = calc_slope(gppb->threshold_set[sl+1][i], + gppb->threshold_set[sl][i], + ops[sl].pstate, + ops[sl+1].pstate); + } + + for (int i = 0; i < NUM_JUMP_VALUES; i++) { + gppb->PsVDMJumpSlopes[sl][i] = calc_slope(gppb->jump_value_set[sl+1][i], + gppb->jump_value_set[sl][i], + ops[sl].pstate, + ops[sl+1].pstate); + } + } + + #define OPS gppb->operating_points_set + for (int set = 0; set < NUM_VPD_PTS_SET; set++) { + for (int sl = 0; sl < VPD_NUM_SLOPES_REGION; sl++) { + gppb->PStateVSlopes[set][sl] = calc_slope(OPS[set][sl+1].vdd_mv, + OPS[set][sl].vdd_mv, + OPS[set][sl].pstate, + OPS[set][sl+1].pstate); + gppb->VPStateSlopes[set][sl] = calc_slope(OPS[set][sl].pstate, + OPS[set][sl+1].pstate, + OPS[set][sl+1].vdd_mv, + OPS[set][sl].vdd_mv); + } + } + #undef OPS +} + +static uint32_t calculate_sm_voltage(uint8_t sm_pstate, + GlobalPstateParmBlock *gppb) +{ + int op = NUM_OP_POINTS - 1; + VpdOperatingPoint *ops = gppb->operating_points_set[VPD_PT_SET_BIASED_SYSP]; + uint16_t *slopes = gppb->PStateVSlopes[VPD_PT_SET_BIASED_SYSP]; + + while (op >= 0 && (ops[op].pstate < sm_pstate)) + op--; + + assert(ops[op].pstate >= sm_pstate); + + /* sm_pstate is somewhere between op and op+1 */ + return ops[op].vdd_mv + ((ops[op].pstate - sm_pstate) * slopes[op] >> 12); +} + +/* resclk is always sorted */ +static void update_resclk(int ref_freq_khz) +{ + uint8_t prev_idx = resclk.resclk_index[0]; + for (int i = 0; i < RESCLK_FREQ_REGIONS; i++) { + /* If freq == 0 round pstate down - can't have negative frequency */ + if (resclk_freq_mhz[i] == 0) { + resclk.resclk_freq[i] = ref_freq_khz / FREQ_STEP_KHZ; + continue; + } + + /* If freq > ref_freq - cap and use previous index */ + if (resclk_freq_mhz[i] * 1000 > ref_freq_khz) { + resclk.resclk_freq[i] = 0; + resclk.resclk_index[i] = prev_idx; + continue; + } + + /* Otherwise always round pstate up */ + resclk.resclk_freq[i] = (ref_freq_khz - resclk_freq_mhz[i] * 1000 + + FREQ_STEP_KHZ - 1) / FREQ_STEP_KHZ; + + prev_idx = resclk.resclk_index[i]; + } +} + +/* Assumption: no bias is applied to operating points */ +void build_parameter_blocks(struct homer_st *homer, uint64_t functional_cores) +{ + uint8_t buf[512]; + uint32_t size = sizeof(buf); + struct voltage_kwd *poundV = (struct voltage_kwd *)&buf; + struct voltage_bucket_data *bucket = NULL; + struct voltage_bucket_data poundV_bucket = {}; + PoundW_data_per_quad poundW_bucket = {}; + OCCPstateParmBlock *oppb = (OCCPstateParmBlock *)homer->ppmr.occ_parm_block; + GlobalPstateParmBlock *gppb = (GlobalPstateParmBlock *) + &homer->ppmr.pgpe_sram_img[homer->ppmr.header.hcode_len]; + char record[] = "LRP0"; + + oppb->magic = OCC_PARMSBLOCK_MAGIC; // "OCCPPB00" + oppb->frequency_step_khz = FREQ_STEP_KHZ; + oppb->wof.tdp_rdp_factor = 0; // ATTR_TDP_RDP_CURRENT_FACTOR 0 from talos.xml + oppb->nest_leakage_percent = 60; // ATTR_NEST_LEAKAGE_PERCENT from hb_temp_defaults.xml + + gppb->magic = PSTATE_PARMSBLOCK_MAGIC; // "PSTATE00" + gppb->options.options = 0; + gppb->frequency_step_khz = FREQ_STEP_KHZ; + + /* + * VpdBias External and Internal Biases for Global and Local parameter + * blocks - assumed no bias, fill with 0. + */ + memset(gppb->ext_biases, 0, sizeof(gppb->ext_biases)); + memset(gppb->int_biases, 0, sizeof(gppb->int_biases)); + + /* Default values are from talos.xml */ + gppb->vdd_sysparm.loadline_uohm = 254; + gppb->vdd_sysparm.distloss_uohm = 0; + gppb->vdd_sysparm.distoffset_uv = 0; + + gppb->vcs_sysparm.loadline_uohm = 0; + gppb->vcs_sysparm.distloss_uohm = 64; + gppb->vcs_sysparm.distoffset_uv = 0; + + gppb->vdn_sysparm.loadline_uohm = 0; + gppb->vdn_sysparm.distloss_uohm = 50; + gppb->vdn_sysparm.distoffset_uv = 0; + + /* External VRM parameters - values are internal defaults */ + gppb->ext_vrm_transition_start_ns = 8000; + gppb->ext_vrm_transition_rate_inc_uv_per_us = 10000; + gppb->ext_vrm_transition_rate_dec_uv_per_us = 10000; + gppb->ext_vrm_stabilization_time_us = 5; + gppb->ext_vrm_step_size_mv = 50; + + /* WOV parameters - values are internal defaults */ + gppb->wov_sample_125us = 2; + gppb->wov_max_droop_pct = 125; + gppb->wov_underv_perf_loss_thresh_pct = 5; + gppb->wov_underv_step_incr_pct = 5; + gppb->wov_underv_step_decr_pct = 5; + gppb->wov_underv_max_pct = 100; + gppb->wov_overv_vmax_mv = 1150; + gppb->wov_overv_step_incr_pct = 5; + gppb->wov_overv_step_decr_pct = 5; + gppb->wov_overv_max_pct = 0; + + /* Avs Bus topology - values come from talos.xml */ + gppb->avs_bus_topology.vdd_avsbus_num = 0; + gppb->avs_bus_topology.vdd_avsbus_rail = 0; + gppb->avs_bus_topology.vdn_avsbus_num = 1; + gppb->avs_bus_topology.vdn_avsbus_rail = 0; + gppb->avs_bus_topology.vcs_avsbus_num = 0; + gppb->avs_bus_topology.vcs_avsbus_rail = 1; + + for (int quad = 0; quad < MAXIMUM_QUADS; quad++) { + if (!IS_EQ_FUNCTIONAL(quad, functional_cores)) + continue; + + record[3] = '0' + quad; + size = sizeof(buf); + if (!mvpd_extract_keyword(record, "#V", buf, &size)) { + die("Failed to read %s record from MVPD", record); + } + + assert(poundV->version == VOLTAGE_DATA_VERSION); + assert(size >= sizeof(struct voltage_kwd)); + + /* + * Q: How does Hostboot decide which bucket to use? + * A: It checks if bucket's PB freq equals PB freq saved in attribute. + * Q: Where does PB freq attribute value come from? + * A: #V - it is first non-zero value. + * + * Given that in any case we would have to iterate over all buckets, + * there is no need to read PB freq again. + */ + for (int i = 0; i < VOLTAGE_BUCKET_COUNT; i++) { + if (poundV->buckets[i].powerbus.freq != 0) { + bucket = £V->buckets[i]; + break; + } + } + + check_valid_poundV(bucket); + + if (poundV_bucket.id == 0) { + memcpy(£V_bucket, bucket, sizeof(poundV_bucket)); + continue; + } + + /* Frequencies must match */ + if (bucket->nominal.freq != poundV_bucket.nominal.freq || + bucket->powersave.freq != poundV_bucket.powersave.freq || + bucket->turbo.freq != poundV_bucket.turbo.freq || + bucket->ultra_turbo.freq != poundV_bucket.ultra_turbo.freq || + bucket->powerbus.freq != poundV_bucket.powerbus.freq) + die("Frequency mismatch in #V MVPD between quads\n"); + + /* + * Voltages don't have to match, but we want to know the bucket ID for + * the highest voltage. Note: vdd_voltage in powerbus is actually VDN. + */ + if (bucket->nominal.vdd_voltage > poundV_bucket.nominal.vdd_voltage || + bucket->powersave.vdd_voltage > poundV_bucket.powersave.vdd_voltage || + bucket->turbo.vdd_voltage > poundV_bucket.turbo.vdd_voltage || + bucket->ultra_turbo.vdd_voltage > poundV_bucket.ultra_turbo.vdd_voltage || + bucket->powerbus.vdd_voltage > poundV_bucket.powerbus.vdd_voltage) + memcpy(£V_bucket, bucket, sizeof(poundV_bucket)); + } + + assert(poundV_bucket.id != 0); + struct voltage_data *vd = £V_bucket.nominal; + + /* Save UltraTurbo frequency as reference */ + update_resclk(vd[VPD_PV_ULTRA].freq * 1000); + oppb->frequency_max_khz = vd[VPD_PV_ULTRA].freq * 1000; + oppb->nest_frequency_mhz = vd[VPD_PV_POWERBUS].freq; + + gppb->reference_frequency_khz = oppb->frequency_max_khz; + gppb->nest_frequency_mhz = oppb->nest_frequency_mhz; + // This is Pstate value that would be assigned to frequency of 0 + gppb->dpll_pstate0_value = gppb->reference_frequency_khz / + gppb->frequency_step_khz; + + for (int op = 0; op < NUM_OP_POINTS; op++) { + /* Assuming no bias */ + oppb->operating_points[op].frequency_mhz = vd[op].freq; + oppb->operating_points[op].vdd_mv = vd[op].vdd_voltage; + oppb->operating_points[op].idd_100ma = vd[op].idd_current; + oppb->operating_points[op].vcs_mv = vd[op].vcs_voltage; + oppb->operating_points[op].ics_100ma = vd[op].ics_current; + /* Integer math rounds pstates down (i.e. towards higher frequency) */ + oppb->operating_points[op].pstate = + (oppb->frequency_max_khz - vd[op].freq * 1000) / oppb->frequency_step_khz; + } + + /* Sort operating points - swap power saving with nominal */ + { + VpdOperatingPoint nom; + nom = oppb->operating_points[VPD_PV_NOMINAL]; + oppb->operating_points[POWERSAVE] = + oppb->operating_points[VPD_PV_POWERSAVE]; + oppb->operating_points[NOMINAL] = nom; + } + + /* TODO: copy operating points to LPPB */ + memcpy(gppb->operating_points, oppb->operating_points, + sizeof(gppb->operating_points)); + { + + memcpy(gppb->operating_points_set[VPD_PT_SET_RAW], oppb->operating_points, + sizeof(gppb->operating_points)); + memcpy(gppb->operating_points_set[VPD_PT_SET_SYSP], oppb->operating_points, + sizeof(gppb->operating_points)); + /* Assuming no bias */ + memcpy(gppb->operating_points_set[VPD_PT_SET_BIASED], + oppb->operating_points, sizeof(gppb->operating_points)); + memcpy(gppb->operating_points_set[VPD_PT_SET_BIASED_SYSP], + oppb->operating_points, sizeof(gppb->operating_points)); + + for (int op = 0; op < NUM_OP_POINTS; op++) { + gppb->operating_points_set[VPD_PT_SET_SYSP][op].vdd_mv += + sysp_mv_offset(gppb->operating_points_set[VPD_PT_SET_SYSP][op].idd_100ma, + gppb->vdd_sysparm); + gppb->operating_points_set[VPD_PT_SET_SYSP][op].vcs_mv += + sysp_mv_offset(gppb->operating_points_set[VPD_PT_SET_SYSP][op].ics_100ma, + gppb->vcs_sysparm); + gppb->operating_points_set[VPD_PT_SET_BIASED_SYSP][op].vdd_mv += + sysp_mv_offset(gppb->operating_points_set[VPD_PT_SET_BIASED_SYSP][op].idd_100ma, + gppb->vdd_sysparm); + gppb->operating_points_set[VPD_PT_SET_BIASED_SYSP][op].vcs_mv += + sysp_mv_offset(gppb->operating_points_set[VPD_PT_SET_BIASED_SYSP][op].ics_100ma, + gppb->vdd_sysparm); + } + } + + + /* + * #W is in CRP0, there is no CRP1..5 for other quads. Format of #W: + * - Version: 1 byte + * - #V Bucket ID #1: 1 byte + * - VDM Data for bucket 1: varies by version + * - #V Bucket ID #2: 1 byte + * - ... + * - #V Bucket ID #6: 1 byte + * - VDM Data for bucket 6: varies by version + * + * Size of each VDM data (excluding bucket ID) by version: + * - 0x1 - 0x28 bytes + * - 0x2-0xF - 0x3C bytes + * - 0x30 - 0x87 bytes + * + * Following code supports the second and third version only. + * + * HOSTBUG: Hostboot reads #W for each (functional) quad, does all the + * parsing and then writes it to one output buffer, overwriting data written + * previously. As there is only one #W, this doesn't make any sense. It also + * first parses/writes, then tests if bucket ID even match. + */ + size = sizeof(buf); + if (!mvpd_extract_keyword("CRP0", "#W", buf, &size)) { + die("Failed to read %s record from MVPD", "CRP0"); + } + + if ((buf[0] < 0x2 || buf[0] > 0xF) && buf[0] != 0x30) + die("Unsupported version (%#x) of #W MVPD\n", buf[0]); + + if (buf[0] == 0x30) { + /* Version 3, just find proper bucket and copy data */ + assert(size >= 1 + VOLTAGE_BUCKET_COUNT * + (1 + sizeof(PoundW_data_per_quad))); + for (int i = 0; i < VOLTAGE_BUCKET_COUNT; i++) { + /* Version + i * (bucket ID + bucket data) */ + int offset = 1 + i * (1 + sizeof(PoundW_data_per_quad)); + if (buf[offset] == poundV_bucket.id) { + memcpy(£W_bucket, &buf[offset + 1], sizeof(poundW_bucket)); + break; + } + } + } + else { + /* Version 2, different data size (0x3C) and format */ + /* + * HOSTBUG: we should be able to use sizeof(PoundW_data), but we can't. + * #W is packed in MVPD, but not in the type's definition. + */ + assert(size >= 1 + VOLTAGE_BUCKET_COUNT * (1 + 0x3C)); + for (int i = 0; i < VOLTAGE_BUCKET_COUNT; i++) { + /* Version + i * (bucket ID + bucket data) */ + int offset = 1 + i * (1 + 0x3C); + if (buf[offset] == poundV_bucket.id) { + copy_poundW_v2_to_v3(£W_bucket, + (PoundW_data *)&buf[offset + 1]); + break; + } + } + } + + /* Sort operating points - swap power saving with nominal */ + { + poundw_entry_per_quad_t nom; + nom = poundW_bucket.poundw[VPD_PV_NOMINAL]; + poundW_bucket.poundw[POWERSAVE] = + poundW_bucket.poundw[VPD_PV_POWERSAVE]; + poundW_bucket.poundw[NOMINAL] = nom; + } + + check_valid_poundW(£W_bucket, functional_cores); + calculate_slopes(gppb, £W_bucket); + + /* Calculate safe mode frequency/pstate/voltage */ + { + /* + * Assumption: N_L values are the same for PS and N operating points. + * Not sure if this is always true so assert just in case. + * + * This makes calculation of jump value much easier. + */ + assert((poundW_bucket.poundw[POWERSAVE].vdm_normal_freq_drop & 0x0F) == + (poundW_bucket.poundw[NOMINAL].vdm_normal_freq_drop & 0x0F)); + uint8_t jump_value = + poundW_bucket.poundw[POWERSAVE].vdm_normal_freq_drop & 0x0F; + + uint32_t sm_freq = (oppb->frequency_max_khz - + (oppb->operating_points[POWERSAVE].pstate * + oppb->frequency_step_khz)) + * 32 / (32 - jump_value); + + uint8_t sm_pstate = (oppb->frequency_max_khz - sm_freq) / + oppb->frequency_step_khz; + + assert(sm_pstate < oppb->operating_points[POWERSAVE].pstate); + + oppb->pstate_min = sm_pstate; + /* Reverse calculation to deal with rounding caused by integer math */ + oppb->frequency_min_khz = oppb->frequency_max_khz - + sm_pstate * oppb->frequency_step_khz; + gppb->safe_frequency_khz = oppb->frequency_min_khz; + + assert(oppb->frequency_min_khz < oppb->frequency_max_khz); + /* TODO: safe mode voltage will be needed for GPPB - requires sysparams values */ + gppb->safe_voltage_mv = calculate_sm_voltage(sm_pstate, gppb); + gppb->wov_underv_vmin_mv = gppb->safe_voltage_mv; + + printk(BIOS_DEBUG, "Safe mode freq = %d kHZ, voltage = %d mv\n", + oppb->frequency_min_khz, gppb->safe_voltage_mv); + } + + /* + * IDDQ - can't read straight to IddqTable, see comment before spare bytes + * in struct definition. + */ + size = sizeof(buf); + if (!mvpd_extract_keyword("CRP0", "IQ", buf, &size)) { + die("Failed to read %s record from MVPD", "CRP0"); + } + assert(size >= sizeof(IddqTable)); + memcpy(&oppb->iddq, buf, sizeof(IddqTable)); + + check_valid_iddq(&oppb->iddq); + +//~ // Update P State parameter block info in HOMER +//~ buildParameterBlock(homer, proc, ppmrHdr = homer.ppmr.header, imgType, buf1, buf1s): + //~ p9_pstate_parameter_block(proc, &stateSupStruct /*13K struct */, buf1, wofTableSize = buf1s): + //~ // Instantiate pstate object + //~ PlatPmPPB l_pmPPB(proc) + //~ - this constructor makes a local copy of attributes and prints them + + //~ // ----------------------------------------------------------- + //~ // Clear the PstateSuperStructure and install the magic number + //~ //---------------------------------------------------------- + //~ memset(stateSupStruct, 0, sizeof(stateSupStruct)) + + //~ *stateSupStruct.magic = PSTATE_PARMSBLOCK_MAGIC // 0x5053544154453030ull, PSTATE00 + + //~ // ---------------- + //~ // get Resonant clocking attributes + //~ // ---------------- + //~ l_pmPPB.resclk_init(): + //~ - assuming Resonant Clocks are enabled + //~ set_resclk_table_attrs(): + //~ - reads data from p9_resclk_defines.H and writes it to attributes: + //~ - ATTR_SYSTEM_RESCLK_L3_VALUE + //~ - ATTR_SYSTEM_RESCLK_FREQ_REGIONS + //~ - ATTR_SYSTEM_RESCLK_FREQ_REGION_INDEX + //~ - ATTR_SYSTEM_RESCLK_VALUE + //~ - ATTR_SYSTEM_RESCLK_L3_VOLTAGE_THRESHOLD_MV + //~ res_clock_setup(): + //~ - reads data from attributes and saves it to iv_resclk_setup + //~ - all of the p9_resclk_defines.H + //~ - ATTR_SYSTEM_RESCLK_STEP_DELAY (= 0?) + //~ - none of these is used anywhere else + //~ - pstates and indices are capped at ultra turbo, but all entries are still written + + //~ // ---------------- + //~ // Initialize GPPB structure + //~ // ---------------- + //~ l_pmPPB.gppb_init(&l_globalppb): + //~ // LHS fields are members of l_globalppb + //~ // This function is basically one big unnecessary memcpy... + + //~ // Struct definition in p9_pstates_pgpe.h + //~ vdm = {ATTR_VDM_VID_COMPARE_OVERRIDE_MV, ATTR_DPLL_VDM_RESPONSE, + //~ ATTR_VDM_DROOP_SMALL_OVERRIDE, ATTR_VDM_DROOP_LARGE_OVERRIDE, ATTR_VDM_DROOP_EXTREME_OVERRIDE, + //~ ATTR_VDM_OVERVOLT_OVERRIDE, ATTR_VDM_FMIN_OVERRIDE_KHZ, ATTR_VDM_FMAX_OVERRIDE_KHZ} + + //~ // Struct definition in p9_pstates_cmeqm.h + //~ ivrm = {ATTR_IVRM_STRENGTH_LOOKUP, ATTR_IVRM_VIN_MULTIPLIER, ATTR_IVRM_VIN_MAX_MV, + //~ ATTR_IVRM_STEP_DELAY_NS, ATTR_IVRM_STABILIZATION_DELAY_NS, ATTR_IVRM_DEADZONE_MV} + + //~ // Initialize res clk data + //~ resclk = iv_resclk_setup // from res_clock_setup() + + //~ // Put the good_normal_cores value into the GPPB for PGPE + //~ // This is u8 -> u32 conversion + //~ options.pad = iv_iddqt.good_normal_cores_per_sort // from get_mvpd_iddq() + + //~ // ---------------- + //~ // Initialize LPPB structure + //~ // ---------------- + //~ // l_localppb is an array of 6 (quads per CPU) + //~ l_pmPPB.lppb_init(&l_localppb[0]): + //~ for each functional quad: + //~ // LHS is l_localppb[quad] + //~ magic = LOCAL_PARMSBLOCK_MAGIC // 0x434d455050423030, "CMEPPB00" + + //~ // VpdBias External and Internal Biases for Global and Local parameter + //~ // block + //~ for each OP point: + //~ ext_biases[op] = iv_bias[op] // = 0? + //~ int_biases[op] = iv_bias[op] + + //~ // Load vpd operating points - use biased values from compute_vpd_pts() + //~ for each OP point: + //~ operating_points[op].frequency_mhz = iv_operating_points[BIASED][op].frequency_mhz + //~ operating_points[op].vdd_mv = iv_operating_points[BIASED][op].vdd_mv + //~ operating_points[op].idd_100ma = iv_operating_points[BIASED][op].idd_100ma + //~ operating_points[op].vcs_mv = iv_operating_points[BIASED][op].vcs_mv + //~ operating_points[op].ics_100ma = iv_operating_points[BIASED][op].ics_100ma + //~ operating_points[op].pstate = iv_operating_points[BIASED][op].pstate + + //~ // Defaul values are from talos.xml + //~ vdd_sysparm = {ATTR_PROC_R_LOADLINE_VDD_UOHM, ATTR_PROC_R_DISTLOSS_VDD_UOHM, ATTR_PROC_VRM_VOFFSET_VDD_UV} = {254, 0, 0} + + //~ // IvrmParmBlock + //~ // Struct definition in p9_pstates_cmeqm.h + //~ ivrm = {ATTR_IVRM_STRENGTH_LOOKUP, ATTR_IVRM_VIN_MULTIPLIER, ATTR_IVRM_VIN_MAX_MV, + //~ ATTR_IVRM_STEP_DELAY_NS, ATTR_IVRM_STABILIZATION_DELAY_NS, ATTR_IVRM_DEADZONE_MV} + + //~ // VDMParmBlock + //~ // WARNING: this is different than in GPPB + //~ memset(vdm, 0, sizeof(vdm)) + + //~ dpll_pstate0_value = reference_frequency_khz / frequency_step_khz + + //~ resclk = iv_resclk_setup // from res_clock_setup() + + //~ // Code memcpies always from data for first quad, seems like a bug + //~ for each OP point: + //~ vid_point_set[op] = iv_vid_point_set[0][op] // from compute_vdm_threshold_pts() + + //~ threshold_set = iv_threshold_set // from compute_vdm_threshold_pts() + //~ jump_value_set = iv_jump_value_set // from compute_vdm_threshold_pts() + + //~ // Code memcpies always from data for first quad, seems like a bug + //~ for each Pstate segment: + //~ PsVIDCompSlopes[segment] = iv_PsVIDCompSlopes[0][segment] // from compute_PsVIDCompSlopes_slopes() + + //~ PsVDMThreshSlopes = iv_PsVDMThreshSlopes // from compute_PsVDMThreshSlopes() + //~ PsVDMJumpSlopes = iv_PsVDMJumpSlopes // from compute_PsVDMJumpSlopes() + + //~ // ---------------- + //~ // WOF initialization + //~ // ---------------- + //~ l_pmPPB.wof_init(o_buf /* will be homer->ppmr.wof_tables after few more memcpies */, o_size): + //~ - Search for proper data in WOFDATA PNOR partition + //~ - WOFDATA is 3M, make sure CBFS_CACHE is big enough + //~ - search until match is found: + //~ - core count + //~ - socket power (nominal, as read from #V) + //~ - frequency (nominal, as read from #V) + //~ - if version >= WOF_TABLE_VERSION_POWERMODE (2): + //~ - mode matches current mode (WOF_MODE_NOMINAL = 1) or wildcard (WOF_MODE_UNKNOWN = 0) + //~ - structures used: + //~ - wofImageHeader_t from plat_wof_access.C + //~ - check magic and version + //~ - wofSectionTableEntry_t from plat_wof_access.C + //~ - WofTablesHeader_t from p9_pstates_common.h + //~ memcpy(o_buf, &WofTablesHeader_t /* for found entry */, wofSectionTableEntry_t[found_entry_idx].size) + + //~ // Just the header, rest needs parsing + //~ memcpy(homer->ppmr.wof_tables, o_buf, sizeof(WofTablesHeader_t)) + + //~ for vfrt_index in 0..((WofTablesHeader_t*)o_buf->vdn_size * (WofTablesHeader_t*)o_buf->vdd_size * ACTIVE_QUADS) -1: + //~ src = o_buf + sizeof(WofTablesHeader_t) + vfrt_index * 128 /* vRTF size */ + //~ dst = homer->ppmr.wof_tables + sizeof(WofTablesHeader_t) + vfrt_index * sizeof(HomerVFRTLayout_t) /* 256B */ + //~ update_vfrt (src, dst): + //~ - Assumption: no bias, makes this function so much easier + //~ // Data in src has 8B header followed by 5*24 bytes of frequency information, such that freq = value*step_size + 1GHz. + //~ // Data in dst has (almost) the same header followed by 5*24 bytes of Pstates. + //~ // Copy header + //~ memcpy(dst, src, 8) + //~ // Flip type from System to Homer + //~ dst.type_version |= 0x10 + //~ assert(dst.magic = "VT") + //~ for idx in 0..5*24 -1: + //~ dst[8+idx] = freq_to_pstate(src[8+idx]) // rounded properly + + //~ // ---------------- + //~ //Initialize OPPB structure + //~ // ---------------- + //~ l_pmPPB.oppb_init(&l_occppb): + //~ // LHS is l_occppb, it eventually will be homer->ppmr.occ_parm_block + //~ magic = OCC_PARMSBLOCK_MAGIC // 0x4f43435050423030, "OCCPPB00" + + //~ wof.wof_enabled = 1 // Assuming wof_init() succeeded + + //~ vdd_sysparm = {ATTR_PROC_R_LOADLINE_VDD_UOHM, ATTR_PROC_R_DISTLOSS_VDD_UOHM, ATTR_PROC_VRM_VOFFSET_VDD_UV} = {254, 0, 0} + //~ vcs_sysparm = {ATTR_PROC_R_LOADLINE_VCS_UOHM, ATTR_PROC_R_DISTLOSS_VCS_UOHM, ATTR_PROC_VRM_VOFFSET_VCS_UV} = {0, 64, 0} + //~ vdn_sysparm = {ATTR_PROC_R_LOADLINE_VDN_UOHM, ATTR_PROC_R_DISTLOSS_VDN_UOHM, ATTR_PROC_VRM_VOFFSET_VDN_UV} = {0, 50, 0} + + //~ // Load vpd operating points - use biased values from compute_vpd_pts() + //~ for each OP point: + //~ operating_points[op].frequency_mhz = iv_operating_points[BIASED][op].frequency_mhz + //~ operating_points[op].vdd_mv = iv_operating_points[BIASED][op].vdd_mv + //~ operating_points[op].idd_100ma = iv_operating_points[BIASED][op].idd_100ma + //~ operating_points[op].vcs_mv = iv_operating_points[BIASED][op].vcs_mv + //~ operating_points[op].ics_100ma = iv_operating_points[BIASED][op].ics_100ma + //~ operating_points[op].pstate = iv_operating_points[BIASED][op].pstate + + + //~ // The minimum Pstate must be rounded down so that core floor constraints are not violated. + //~ pstate_min = freq_to_pstate(ATTR_SAFE_MODE_FREQUENCY_MHZ * 1000) // from safe_mode_computation() + + //~ frequency_min_khz = iv_reference_frequency_khz - (pstate_min * iv_frequency_step_khz) + //~ frequency_max_khz = iv_reference_frequency_khz + //~ frequency_step_khz = iv_frequency_step_khz + + + //~ // Iddq Table + //~ iddq = iv_iddqt // from get_mvpd_iddq() + + //~ wof.tdp_rdp_factor = ATTR_TDP_RDP_CURRENT_FACTOR // 0 from talos.xml + //~ nest_leakage_percent = ATTR_NEST_LEAKAGE_PERCENT // 60 (0x3C) from hb_temp_defaults.xml + + //~ lac_tdp_vdd_turbo_10ma = + //~ iv_poundW_data.poundw[TURBO].ivdd_tdp_ac_current_10ma + //~ lac_tdp_vdd_nominal_10ma = + //~ iv_poundW_data.poundw[NOMINAL].ivdd_tdp_ac_current_10ma + + //~ // As the Vdn dimension is not supported in the WOF tables, + //~ // hardcoding this value to the OCC as non-zero to keep it happy. + //~ ceff_tdp_vdn = 1; + + //~ //Update nest frequency in OPPB + //~ nest_frequency_mhz = ATTR_FREQ_PB_MHZ // 1866 from talos.xml + + //~ // Assuming >= CPMR_2.0 + //~ buildCmePstateInfo(homer, proc, imgType, &stateSupStruct): + //~ CmeHdr = &homer->cpmr.cme_sram_region[INT_VECTOR_SIZE] + //~ CmeHdr->pstate_offset = CmeHdr->core_spec_ring_offset + CmeHdr->max_spec_ring_len + //~ CmeHdr->custom_length = ROUND_UP(sizeof(LocalPstateParmBlock), 32) / 32 + CmeHdr->max_spec_ring_len + //~ for each functional CME: + //~ memcpy(&homer->cpmr.cme_sram_region[cme * CmeHdr->custom_length * 32 + CmeHdr->pstate_offset], stateSupStruct->localppb[cme/2], sizeof(LocalPstateParmBlock)) + + //~ memcpy(&homer->ppmr.pgpe_sram_img[homer->ppmr.header.hcode_len], stateSupStruct->globalppb, sizeof(GlobalPstateParmBlock)) + //~ homer->ppmr.header.gppb_offset = homer->ppmr.header.hcode_offset + homer->ppmr.header.hcode_len + //~ homer->ppmr.header.gppb_len = ALIGN_UP(sizeof(GlobalPstateParmBlock), 8) + + //~ memcpy(&homer->ppmr.occ_parm_block, stateSupStruct->occppb, sizeof(OCCPstateParmBlock)) + //~ homer->ppmr.header.oppb_offset = offsetof(ppmr, ppmr.occ_parm_block) + //~ homer->ppmr.header.oppb_len = ALIGN_UP(sizeof(OCCPstateParmBlock), 8) + + //~ // Assuming >= CPMR_2.0 + //~ homer->ppmr.header.lppb_offset = 0 + //~ homer->ppmr.header.lppb_len = 0 + + //~ homer->ppmr.header.pstables_offset = offsetof(ppmr, ppmr.pstate_table) + //~ homer->ppmr.header.pstables_len = PSTATE_OUTPUT_TABLES_SIZE // 16 KiB + + //~ homer->ppmr.header.wof_table_offset = OCC_WOF_TABLES_OFFSET + //~ homer->ppmr.header.wof_table_len = OCC_WOF_TABLES_SIZE + //~ // Instead of this memcpy write it directly to its final destination in wof_init() + //~ memcpy(homer->ppmr.wof_tables, o_buf/* see wof_init() */, o_size/* see wof_init() */) + + //~ homer->ppmr.header.sram_img_size = homer->ppmr.header.hcode_len + homer->ppmr.header.gppb_len + + +} From 9ec310a9d5f1d4ef514263dd6878eef2a9d6ed65 Mon Sep 17 00:00:00 2001 From: Krystian Hebel Date: Mon, 16 Aug 2021 17:05:23 +0200 Subject: [PATCH 073/213] soc/power9/pstates.c: add code for Local Pstate Parameters Block Signed-off-by: Krystian Hebel Change-Id: I85df7b2df924c9f214adf632140421da82fbd7e9 --- src/soc/ibm/power9/pstates.c | 454 ++++++++++++++++------------------- 1 file changed, 209 insertions(+), 245 deletions(-) diff --git a/src/soc/ibm/power9/pstates.c b/src/soc/ibm/power9/pstates.c index 1154a11da52..776f38e0052 100644 --- a/src/soc/ibm/power9/pstates.c +++ b/src/soc/ibm/power9/pstates.c @@ -335,26 +335,78 @@ void build_parameter_blocks(struct homer_st *homer, uint64_t functional_cores) struct voltage_bucket_data *bucket = NULL; struct voltage_bucket_data poundV_bucket = {}; PoundW_data_per_quad poundW_bucket = {}; + char record[] = "LRP0"; + struct cme_img_header *cme_hdr = (struct cme_img_header *) + &homer->cpmr.cme_sram_region[INT_VECTOR_SIZE]; + cme_hdr->pstate_offset = cme_hdr->core_spec_ring_offset + + cme_hdr->max_spec_ring_len; + cme_hdr->custom_length = + ALIGN_UP(cme_hdr->max_spec_ring_len * 32 + sizeof(LocalPstateParmBlock), 32) / + 32; + + /* + * OCC Pstate Parameter Block and Global Pstate Parameter Block are filled + * directly in their final place as we go. + * + * Local Pstate Parameter Block in Hostboot uses an array with entry for + * each quad (note there are two CMEs per quad, those are written with the + * same entry). Nevertheless, data written to LPPB for each quad (and CME) + * is identical - the only field that could have per-quad data is VID comp, + * but it is filled with data for quad 0. It looks as if the code was made + * with anticipation of #W v3, but that version is not yet used. + * + * Here, we use CME 0 on quad 0 as a template that is filled as we go. This + * structure is then copied to other functional CMEs. Note that the first + * CME doesn't have to be functional, but always writing to its region is + * much easier than finding out proper source for memcpy later. + */ OCCPstateParmBlock *oppb = (OCCPstateParmBlock *)homer->ppmr.occ_parm_block; GlobalPstateParmBlock *gppb = (GlobalPstateParmBlock *) &homer->ppmr.pgpe_sram_img[homer->ppmr.header.hcode_len]; - char record[] = "LRP0"; + LocalPstateParmBlock *lppb = (LocalPstateParmBlock *) + &homer->cpmr.cme_sram_region[cme_hdr->pstate_offset * 32]; + + /* OPPB - constant fields */ oppb->magic = OCC_PARMSBLOCK_MAGIC; // "OCCPPB00" oppb->frequency_step_khz = FREQ_STEP_KHZ; oppb->wof.tdp_rdp_factor = 0; // ATTR_TDP_RDP_CURRENT_FACTOR 0 from talos.xml oppb->nest_leakage_percent = 60; // ATTR_NEST_LEAKAGE_PERCENT from hb_temp_defaults.xml + /* FIXME: uncomment after WOF_DATA is prepared */ + //oppb->wof.wof_enabled = 1; // Assuming wof_init() succeeds + oppb->wof.tdp_rdp_factor = 0; // ATTR_TDP_RDP_CURRENT_FACTOR from talos.xml + oppb->nest_leakage_percent = 60; // ATTR_NEST_LEAKAGE_PERCENT from hb_temp_defaults.xml + /* + * As the Vdn dimension is not supported in the WOF tables, hardcoding this + * value to the OCC as non-zero to keep it happy. + */ + oppb->ceff_tdp_vdn = 1; + + /* Default values are from talos.xml */ + oppb->vdd_sysparm.loadline_uohm = 254; + oppb->vdd_sysparm.distloss_uohm = 0; + oppb->vdd_sysparm.distoffset_uv = 0; + + oppb->vcs_sysparm.loadline_uohm = 0; + oppb->vcs_sysparm.distloss_uohm = 64; + oppb->vcs_sysparm.distoffset_uv = 0; + + oppb->vdn_sysparm.loadline_uohm = 0; + oppb->vdn_sysparm.distloss_uohm = 50; + oppb->vdn_sysparm.distoffset_uv = 0; + + /* GPPB -constant fields */ + gppb->magic = PSTATE_PARMSBLOCK_MAGIC; // "PSTATE00" gppb->options.options = 0; gppb->frequency_step_khz = FREQ_STEP_KHZ; /* * VpdBias External and Internal Biases for Global and Local parameter - * blocks - assumed no bias, fill with 0. + * blocks - assumed no bias, filled with 0. HOMER was already cleared so + * no need to repeat it. */ - memset(gppb->ext_biases, 0, sizeof(gppb->ext_biases)); - memset(gppb->int_biases, 0, sizeof(gppb->int_biases)); /* Default values are from talos.xml */ gppb->vdd_sysparm.loadline_uohm = 254; @@ -396,6 +448,17 @@ void build_parameter_blocks(struct homer_st *homer, uint64_t functional_cores) gppb->avs_bus_topology.vcs_avsbus_num = 0; gppb->avs_bus_topology.vcs_avsbus_rail = 1; + /* LPPB - constant fields */ + lppb->magic = LOCAL_PARMSBLOCK_MAGIC; // "CMEPPB00" + + /* Default values are from talos.xml */ + lppb->vdd_sysparm.loadline_uohm = 254; + lppb->vdd_sysparm.distloss_uohm = 0; + lppb->vdd_sysparm.distoffset_uv = 0; + + + /* Read and validate #V */ + for (int quad = 0; quad < MAXIMUM_QUADS; quad++) { if (!IS_EQ_FUNCTIONAL(quad, functional_cores)) continue; @@ -455,17 +518,13 @@ void build_parameter_blocks(struct homer_st *homer, uint64_t functional_cores) assert(poundV_bucket.id != 0); struct voltage_data *vd = £V_bucket.nominal; + + /* OPPB - #V data */ + /* Save UltraTurbo frequency as reference */ - update_resclk(vd[VPD_PV_ULTRA].freq * 1000); oppb->frequency_max_khz = vd[VPD_PV_ULTRA].freq * 1000; oppb->nest_frequency_mhz = vd[VPD_PV_POWERBUS].freq; - gppb->reference_frequency_khz = oppb->frequency_max_khz; - gppb->nest_frequency_mhz = oppb->nest_frequency_mhz; - // This is Pstate value that would be assigned to frequency of 0 - gppb->dpll_pstate0_value = gppb->reference_frequency_khz / - gppb->frequency_step_khz; - for (int op = 0; op < NUM_OP_POINTS; op++) { /* Assuming no bias */ oppb->operating_points[op].frequency_mhz = vd[op].freq; @@ -487,7 +546,25 @@ void build_parameter_blocks(struct homer_st *homer, uint64_t functional_cores) oppb->operating_points[NOMINAL] = nom; } - /* TODO: copy operating points to LPPB */ + /* GPPB - #V data */ + + gppb->reference_frequency_khz = oppb->frequency_max_khz; + gppb->nest_frequency_mhz = oppb->nest_frequency_mhz; + /* This is Pstate value that would be assigned to frequency of 0 */ + gppb->dpll_pstate0_value = gppb->reference_frequency_khz / + gppb->frequency_step_khz; + + update_resclk(gppb->reference_frequency_khz); + memcpy(&gppb->resclk, &resclk, sizeof(ResonantClockingSetup)); + + /* + * Global PPB VDM iVRM are set based on attributes, but all of them are by + * default 0. HOMER was memset to 0, so no need to do anything more. + * + * For Local PPBs, VDM is explicitly set to 0 even when attributes have + * different values. iVRM are still set based on attributes. + */ + memcpy(gppb->operating_points, oppb->operating_points, sizeof(gppb->operating_points)); { @@ -518,6 +595,14 @@ void build_parameter_blocks(struct homer_st *homer, uint64_t functional_cores) } } + /* LPPB - #V data */ + + /* LPPB has neither reference frequency nor step size, use GPPB values */ + lppb->dpll_pstate0_value = gppb->reference_frequency_khz / + gppb->frequency_step_khz; + memcpy(lppb->operating_points, oppb->operating_points, + sizeof(lppb->operating_points)); + memcpy(&lppb->resclk, &resclk, sizeof(ResonantClockingSetup)); /* * #W is in CRP0, there is no CRP1..5 for other quads. Format of #W: @@ -590,7 +675,14 @@ void build_parameter_blocks(struct homer_st *homer, uint64_t functional_cores) } check_valid_poundW(£W_bucket, functional_cores); - calculate_slopes(gppb, £W_bucket); + + + /* OPPB - #W data */ + + oppb->lac_tdp_vdd_turbo_10ma = + poundW_bucket.poundw[TURBO].ivdd_tdp_ac_current_10ma; + oppb->lac_tdp_vdd_nominal_10ma = + poundW_bucket.poundw[NOMINAL].ivdd_tdp_ac_current_10ma; /* Calculate safe mode frequency/pstate/voltage */ { @@ -619,17 +711,39 @@ void build_parameter_blocks(struct homer_st *homer, uint64_t functional_cores) /* Reverse calculation to deal with rounding caused by integer math */ oppb->frequency_min_khz = oppb->frequency_max_khz - sm_pstate * oppb->frequency_step_khz; - gppb->safe_frequency_khz = oppb->frequency_min_khz; assert(oppb->frequency_min_khz < oppb->frequency_max_khz); - /* TODO: safe mode voltage will be needed for GPPB - requires sysparams values */ - gppb->safe_voltage_mv = calculate_sm_voltage(sm_pstate, gppb); - gppb->wov_underv_vmin_mv = gppb->safe_voltage_mv; - - printk(BIOS_DEBUG, "Safe mode freq = %d kHZ, voltage = %d mv\n", - oppb->frequency_min_khz, gppb->safe_voltage_mv); } + /* GPPB - #W data */ + + calculate_slopes(gppb, £W_bucket); + gppb->safe_frequency_khz = oppb->frequency_min_khz; + gppb->safe_voltage_mv = calculate_sm_voltage(oppb->pstate_min, gppb); + gppb->wov_underv_vmin_mv = gppb->safe_voltage_mv; + + printk(BIOS_DEBUG, "Safe mode freq = %d kHZ, voltage = %d mv\n", + gppb->safe_frequency_khz, gppb->safe_voltage_mv); + + /* LPPB - #W data */ + /* + * This basically repeats calculate_slopes() for LPPB. Unfortunately, the + * structures aren't compatible. + */ + memcpy(lppb->vid_point_set, gppb->vid_point_set, + sizeof(lppb->vid_point_set)); + memcpy(lppb->threshold_set, gppb->threshold_set, + sizeof(lppb->threshold_set)); + memcpy(lppb->jump_value_set, gppb->jump_value_set, + sizeof(lppb->jump_value_set)); + memcpy(lppb->PsVIDCompSlopes, gppb->PsVIDCompSlopes, + sizeof(lppb->PsVIDCompSlopes)); + memcpy(lppb->PsVDMThreshSlopes, gppb->PsVDMThreshSlopes, + sizeof(lppb->PsVDMThreshSlopes)); + memcpy(lppb->PsVDMJumpSlopes, gppb->PsVDMJumpSlopes, + sizeof(lppb->PsVDMJumpSlopes)); + + /* * IDDQ - can't read straight to IddqTable, see comment before spare bytes * in struct definition. @@ -643,231 +757,81 @@ void build_parameter_blocks(struct homer_st *homer, uint64_t functional_cores) check_valid_iddq(&oppb->iddq); -//~ // Update P State parameter block info in HOMER -//~ buildParameterBlock(homer, proc, ppmrHdr = homer.ppmr.header, imgType, buf1, buf1s): - //~ p9_pstate_parameter_block(proc, &stateSupStruct /*13K struct */, buf1, wofTableSize = buf1s): - //~ // Instantiate pstate object - //~ PlatPmPPB l_pmPPB(proc) - //~ - this constructor makes a local copy of attributes and prints them - - //~ // ----------------------------------------------------------- - //~ // Clear the PstateSuperStructure and install the magic number - //~ //---------------------------------------------------------- - //~ memset(stateSupStruct, 0, sizeof(stateSupStruct)) - - //~ *stateSupStruct.magic = PSTATE_PARMSBLOCK_MAGIC // 0x5053544154453030ull, PSTATE00 - - //~ // ---------------- - //~ // get Resonant clocking attributes - //~ // ---------------- - //~ l_pmPPB.resclk_init(): - //~ - assuming Resonant Clocks are enabled - //~ set_resclk_table_attrs(): - //~ - reads data from p9_resclk_defines.H and writes it to attributes: - //~ - ATTR_SYSTEM_RESCLK_L3_VALUE - //~ - ATTR_SYSTEM_RESCLK_FREQ_REGIONS - //~ - ATTR_SYSTEM_RESCLK_FREQ_REGION_INDEX - //~ - ATTR_SYSTEM_RESCLK_VALUE - //~ - ATTR_SYSTEM_RESCLK_L3_VOLTAGE_THRESHOLD_MV - //~ res_clock_setup(): - //~ - reads data from attributes and saves it to iv_resclk_setup - //~ - all of the p9_resclk_defines.H - //~ - ATTR_SYSTEM_RESCLK_STEP_DELAY (= 0?) - //~ - none of these is used anywhere else - //~ - pstates and indices are capped at ultra turbo, but all entries are still written - - //~ // ---------------- - //~ // Initialize GPPB structure - //~ // ---------------- - //~ l_pmPPB.gppb_init(&l_globalppb): - //~ // LHS fields are members of l_globalppb - //~ // This function is basically one big unnecessary memcpy... - - //~ // Struct definition in p9_pstates_pgpe.h - //~ vdm = {ATTR_VDM_VID_COMPARE_OVERRIDE_MV, ATTR_DPLL_VDM_RESPONSE, - //~ ATTR_VDM_DROOP_SMALL_OVERRIDE, ATTR_VDM_DROOP_LARGE_OVERRIDE, ATTR_VDM_DROOP_EXTREME_OVERRIDE, - //~ ATTR_VDM_OVERVOLT_OVERRIDE, ATTR_VDM_FMIN_OVERRIDE_KHZ, ATTR_VDM_FMAX_OVERRIDE_KHZ} - - //~ // Struct definition in p9_pstates_cmeqm.h - //~ ivrm = {ATTR_IVRM_STRENGTH_LOOKUP, ATTR_IVRM_VIN_MULTIPLIER, ATTR_IVRM_VIN_MAX_MV, - //~ ATTR_IVRM_STEP_DELAY_NS, ATTR_IVRM_STABILIZATION_DELAY_NS, ATTR_IVRM_DEADZONE_MV} - - //~ // Initialize res clk data - //~ resclk = iv_resclk_setup // from res_clock_setup() - - //~ // Put the good_normal_cores value into the GPPB for PGPE - //~ // This is u8 -> u32 conversion - //~ options.pad = iv_iddqt.good_normal_cores_per_sort // from get_mvpd_iddq() - - //~ // ---------------- - //~ // Initialize LPPB structure - //~ // ---------------- - //~ // l_localppb is an array of 6 (quads per CPU) - //~ l_pmPPB.lppb_init(&l_localppb[0]): - //~ for each functional quad: - //~ // LHS is l_localppb[quad] - //~ magic = LOCAL_PARMSBLOCK_MAGIC // 0x434d455050423030, "CMEPPB00" - - //~ // VpdBias External and Internal Biases for Global and Local parameter - //~ // block - //~ for each OP point: - //~ ext_biases[op] = iv_bias[op] // = 0? - //~ int_biases[op] = iv_bias[op] - - //~ // Load vpd operating points - use biased values from compute_vpd_pts() - //~ for each OP point: - //~ operating_points[op].frequency_mhz = iv_operating_points[BIASED][op].frequency_mhz - //~ operating_points[op].vdd_mv = iv_operating_points[BIASED][op].vdd_mv - //~ operating_points[op].idd_100ma = iv_operating_points[BIASED][op].idd_100ma - //~ operating_points[op].vcs_mv = iv_operating_points[BIASED][op].vcs_mv - //~ operating_points[op].ics_100ma = iv_operating_points[BIASED][op].ics_100ma - //~ operating_points[op].pstate = iv_operating_points[BIASED][op].pstate - - //~ // Defaul values are from talos.xml - //~ vdd_sysparm = {ATTR_PROC_R_LOADLINE_VDD_UOHM, ATTR_PROC_R_DISTLOSS_VDD_UOHM, ATTR_PROC_VRM_VOFFSET_VDD_UV} = {254, 0, 0} - - //~ // IvrmParmBlock - //~ // Struct definition in p9_pstates_cmeqm.h - //~ ivrm = {ATTR_IVRM_STRENGTH_LOOKUP, ATTR_IVRM_VIN_MULTIPLIER, ATTR_IVRM_VIN_MAX_MV, - //~ ATTR_IVRM_STEP_DELAY_NS, ATTR_IVRM_STABILIZATION_DELAY_NS, ATTR_IVRM_DEADZONE_MV} - - //~ // VDMParmBlock - //~ // WARNING: this is different than in GPPB - //~ memset(vdm, 0, sizeof(vdm)) - - //~ dpll_pstate0_value = reference_frequency_khz / frequency_step_khz - - //~ resclk = iv_resclk_setup // from res_clock_setup() - - //~ // Code memcpies always from data for first quad, seems like a bug - //~ for each OP point: - //~ vid_point_set[op] = iv_vid_point_set[0][op] // from compute_vdm_threshold_pts() - - //~ threshold_set = iv_threshold_set // from compute_vdm_threshold_pts() - //~ jump_value_set = iv_jump_value_set // from compute_vdm_threshold_pts() - - //~ // Code memcpies always from data for first quad, seems like a bug - //~ for each Pstate segment: - //~ PsVIDCompSlopes[segment] = iv_PsVIDCompSlopes[0][segment] // from compute_PsVIDCompSlopes_slopes() - - //~ PsVDMThreshSlopes = iv_PsVDMThreshSlopes // from compute_PsVDMThreshSlopes() - //~ PsVDMJumpSlopes = iv_PsVDMJumpSlopes // from compute_PsVDMJumpSlopes() - - //~ // ---------------- - //~ // WOF initialization - //~ // ---------------- - //~ l_pmPPB.wof_init(o_buf /* will be homer->ppmr.wof_tables after few more memcpies */, o_size): - //~ - Search for proper data in WOFDATA PNOR partition - //~ - WOFDATA is 3M, make sure CBFS_CACHE is big enough - //~ - search until match is found: - //~ - core count - //~ - socket power (nominal, as read from #V) - //~ - frequency (nominal, as read from #V) - //~ - if version >= WOF_TABLE_VERSION_POWERMODE (2): - //~ - mode matches current mode (WOF_MODE_NOMINAL = 1) or wildcard (WOF_MODE_UNKNOWN = 0) - //~ - structures used: - //~ - wofImageHeader_t from plat_wof_access.C - //~ - check magic and version - //~ - wofSectionTableEntry_t from plat_wof_access.C - //~ - WofTablesHeader_t from p9_pstates_common.h - //~ memcpy(o_buf, &WofTablesHeader_t /* for found entry */, wofSectionTableEntry_t[found_entry_idx].size) - - //~ // Just the header, rest needs parsing - //~ memcpy(homer->ppmr.wof_tables, o_buf, sizeof(WofTablesHeader_t)) - - //~ for vfrt_index in 0..((WofTablesHeader_t*)o_buf->vdn_size * (WofTablesHeader_t*)o_buf->vdd_size * ACTIVE_QUADS) -1: - //~ src = o_buf + sizeof(WofTablesHeader_t) + vfrt_index * 128 /* vRTF size */ - //~ dst = homer->ppmr.wof_tables + sizeof(WofTablesHeader_t) + vfrt_index * sizeof(HomerVFRTLayout_t) /* 256B */ - //~ update_vfrt (src, dst): - //~ - Assumption: no bias, makes this function so much easier - //~ // Data in src has 8B header followed by 5*24 bytes of frequency information, such that freq = value*step_size + 1GHz. - //~ // Data in dst has (almost) the same header followed by 5*24 bytes of Pstates. - //~ // Copy header - //~ memcpy(dst, src, 8) - //~ // Flip type from System to Homer - //~ dst.type_version |= 0x10 - //~ assert(dst.magic = "VT") - //~ for idx in 0..5*24 -1: - //~ dst[8+idx] = freq_to_pstate(src[8+idx]) // rounded properly - - //~ // ---------------- - //~ //Initialize OPPB structure - //~ // ---------------- - //~ l_pmPPB.oppb_init(&l_occppb): - //~ // LHS is l_occppb, it eventually will be homer->ppmr.occ_parm_block - //~ magic = OCC_PARMSBLOCK_MAGIC // 0x4f43435050423030, "OCCPPB00" - - //~ wof.wof_enabled = 1 // Assuming wof_init() succeeded - - //~ vdd_sysparm = {ATTR_PROC_R_LOADLINE_VDD_UOHM, ATTR_PROC_R_DISTLOSS_VDD_UOHM, ATTR_PROC_VRM_VOFFSET_VDD_UV} = {254, 0, 0} - //~ vcs_sysparm = {ATTR_PROC_R_LOADLINE_VCS_UOHM, ATTR_PROC_R_DISTLOSS_VCS_UOHM, ATTR_PROC_VRM_VOFFSET_VCS_UV} = {0, 64, 0} - //~ vdn_sysparm = {ATTR_PROC_R_LOADLINE_VDN_UOHM, ATTR_PROC_R_DISTLOSS_VDN_UOHM, ATTR_PROC_VRM_VOFFSET_VDN_UV} = {0, 50, 0} - - //~ // Load vpd operating points - use biased values from compute_vpd_pts() - //~ for each OP point: - //~ operating_points[op].frequency_mhz = iv_operating_points[BIASED][op].frequency_mhz - //~ operating_points[op].vdd_mv = iv_operating_points[BIASED][op].vdd_mv - //~ operating_points[op].idd_100ma = iv_operating_points[BIASED][op].idd_100ma - //~ operating_points[op].vcs_mv = iv_operating_points[BIASED][op].vcs_mv - //~ operating_points[op].ics_100ma = iv_operating_points[BIASED][op].ics_100ma - //~ operating_points[op].pstate = iv_operating_points[BIASED][op].pstate - - - //~ // The minimum Pstate must be rounded down so that core floor constraints are not violated. - //~ pstate_min = freq_to_pstate(ATTR_SAFE_MODE_FREQUENCY_MHZ * 1000) // from safe_mode_computation() - - //~ frequency_min_khz = iv_reference_frequency_khz - (pstate_min * iv_frequency_step_khz) - //~ frequency_max_khz = iv_reference_frequency_khz - //~ frequency_step_khz = iv_frequency_step_khz - - - //~ // Iddq Table - //~ iddq = iv_iddqt // from get_mvpd_iddq() - - //~ wof.tdp_rdp_factor = ATTR_TDP_RDP_CURRENT_FACTOR // 0 from talos.xml - //~ nest_leakage_percent = ATTR_NEST_LEAKAGE_PERCENT // 60 (0x3C) from hb_temp_defaults.xml - - //~ lac_tdp_vdd_turbo_10ma = - //~ iv_poundW_data.poundw[TURBO].ivdd_tdp_ac_current_10ma - //~ lac_tdp_vdd_nominal_10ma = - //~ iv_poundW_data.poundw[NOMINAL].ivdd_tdp_ac_current_10ma - - //~ // As the Vdn dimension is not supported in the WOF tables, - //~ // hardcoding this value to the OCC as non-zero to keep it happy. - //~ ceff_tdp_vdn = 1; - - //~ //Update nest frequency in OPPB - //~ nest_frequency_mhz = ATTR_FREQ_PB_MHZ // 1866 from talos.xml - - //~ // Assuming >= CPMR_2.0 - //~ buildCmePstateInfo(homer, proc, imgType, &stateSupStruct): - //~ CmeHdr = &homer->cpmr.cme_sram_region[INT_VECTOR_SIZE] - //~ CmeHdr->pstate_offset = CmeHdr->core_spec_ring_offset + CmeHdr->max_spec_ring_len - //~ CmeHdr->custom_length = ROUND_UP(sizeof(LocalPstateParmBlock), 32) / 32 + CmeHdr->max_spec_ring_len - //~ for each functional CME: - //~ memcpy(&homer->cpmr.cme_sram_region[cme * CmeHdr->custom_length * 32 + CmeHdr->pstate_offset], stateSupStruct->localppb[cme/2], sizeof(LocalPstateParmBlock)) - - //~ memcpy(&homer->ppmr.pgpe_sram_img[homer->ppmr.header.hcode_len], stateSupStruct->globalppb, sizeof(GlobalPstateParmBlock)) - //~ homer->ppmr.header.gppb_offset = homer->ppmr.header.hcode_offset + homer->ppmr.header.hcode_len - //~ homer->ppmr.header.gppb_len = ALIGN_UP(sizeof(GlobalPstateParmBlock), 8) - - //~ memcpy(&homer->ppmr.occ_parm_block, stateSupStruct->occppb, sizeof(OCCPstateParmBlock)) - //~ homer->ppmr.header.oppb_offset = offsetof(ppmr, ppmr.occ_parm_block) - //~ homer->ppmr.header.oppb_len = ALIGN_UP(sizeof(OCCPstateParmBlock), 8) - - //~ // Assuming >= CPMR_2.0 - //~ homer->ppmr.header.lppb_offset = 0 - //~ homer->ppmr.header.lppb_len = 0 - - //~ homer->ppmr.header.pstables_offset = offsetof(ppmr, ppmr.pstate_table) - //~ homer->ppmr.header.pstables_len = PSTATE_OUTPUT_TABLES_SIZE // 16 KiB - - //~ homer->ppmr.header.wof_table_offset = OCC_WOF_TABLES_OFFSET - //~ homer->ppmr.header.wof_table_len = OCC_WOF_TABLES_SIZE - //~ // Instead of this memcpy write it directly to its final destination in wof_init() - //~ memcpy(homer->ppmr.wof_tables, o_buf/* see wof_init() */, o_size/* see wof_init() */) - - //~ homer->ppmr.header.sram_img_size = homer->ppmr.header.hcode_len + homer->ppmr.header.gppb_len + /* + * Pad was re-purposed, Hostboot developers created additional union. The + * new union is in the same header file, few lines above the structure, but + * the original field still uses 'uint32_t pad', instead of new type. This + * leads to the following monstrosity. + */ + ((GPPBOptionsPadUse *)&gppb->options.pad)->fields.good_cores_in_sort = + oppb->iddq.good_normal_cores_per_sort; + + /* TODO: WOF */ + //~ // ---------------- + //~ // WOF initialization + //~ // ---------------- + //~ wof_init(o_buf = &homer->ppmr.wof_tables): + //~ - Search for proper data in WOFDATA PNOR partition + //~ - WOFDATA is 3M, make sure CBFS_CACHE is big enough + //~ - search until match is found: + //~ - core count + //~ - socket power (nominal, as read from #V) + //~ - frequency (nominal, as read from #V) + //~ - if version >= WOF_TABLE_VERSION_POWERMODE (2): + //~ - mode matches current mode (WOF_MODE_NOMINAL = 1) or wildcard (WOF_MODE_UNKNOWN = 0) + //~ - structures used: + //~ - wofImageHeader_t from plat_wof_access.C + //~ - check magic and version + //~ - wofSectionTableEntry_t from plat_wof_access.C + //~ - WofTablesHeader_t from p9_pstates_common.h + //~ memcpy(o_buf, &WofTablesHeader_t /* for found entry */, wofSectionTableEntry_t[found_entry_idx].size) + + //~ // Just the header, rest needs parsing + //~ memcpy(homer->ppmr.wof_tables, o_buf, sizeof(WofTablesHeader_t)) + + //~ for vfrt_index in 0..((WofTablesHeader_t*)o_buf->vdn_size * (WofTablesHeader_t*)o_buf->vdd_size * ACTIVE_QUADS) -1: + //~ src = o_buf + sizeof(WofTablesHeader_t) + vfrt_index * 128 /* vRTF size */ + //~ dst = homer->ppmr.wof_tables + sizeof(WofTablesHeader_t) + vfrt_index * sizeof(HomerVFRTLayout_t) /* 256B */ + //~ update_vfrt (src, dst): + //~ - Assumption: no bias, makes this function so much easier + //~ // Data in src has 8B header followed by 5*24 bytes of frequency information, such that freq = value*step_size + 1GHz. + //~ // Data in dst has (almost) the same header followed by 5*24 bytes of Pstates. + //~ // Copy header + //~ memcpy(dst, src, 8) + //~ // Flip type from System to Homer + //~ dst.type_version |= 0x10 + //~ assert(dst.magic = "VT") + //~ for idx in 0..5*24 -1: + //~ dst[8+idx] = freq_to_pstate(src[8+idx]) // rounded properly + + /* Copy LPPB to functional CMEs */ + for (int cme = 1; cme < MAX_CMES_PER_CHIP; cme++) { + if (!IS_EX_FUNCTIONAL(cme, functional_cores)) + continue; + + memcpy(&homer->cpmr.cme_sram_region[cme * cme_hdr->custom_length * 32 + + cme_hdr->pstate_offset * 32], + lppb, sizeof(LocalPstateParmBlock)); + } + + /* Finally, update headers */ + homer->ppmr.header.gppb_offset = homer->ppmr.header.hcode_offset + + homer->ppmr.header.hcode_len; + homer->ppmr.header.gppb_len = ALIGN_UP(sizeof(GlobalPstateParmBlock), 8); + + homer->ppmr.header.oppb_offset = offsetof(struct ppmr_st, occ_parm_block); + homer->ppmr.header.oppb_len = ALIGN_UP(sizeof(OCCPstateParmBlock), 8); + + /* Assuming >= CPMR_2.0 */ + homer->ppmr.header.lppb_offset = 0; + homer->ppmr.header.lppb_len = 0; + + homer->ppmr.header.pstables_offset = offsetof(struct ppmr_st, pstate_table); + homer->ppmr.header.pstables_len = PSTATE_OUTPUT_TABLES_SIZE; // 16 KiB + homer->ppmr.header.wof_table_offset = OCC_WOF_TABLES_OFFSET; + homer->ppmr.header.wof_table_len = OCC_WOF_TABLES_SIZE; + homer->ppmr.header.sram_img_size = homer->ppmr.header.hcode_len + + homer->ppmr.header.gppb_len; } From 759f369185b07dec6f2951d380d695846662f5fe Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Thu, 12 May 2022 14:38:16 +0300 Subject: [PATCH 074/213] soc/power9/homer.c: make MAX_* macros consistent Signed-off-by: Krystian Hebel Change-Id: If4bc3f5248b59ec44e565ac1ea9c6574c0c22afa --- src/soc/ibm/power9/homer.c | 14 +++++++------- src/soc/ibm/power9/homer.h | 21 ++++++++------------- 2 files changed, 15 insertions(+), 20 deletions(-) diff --git a/src/soc/ibm/power9/homer.c b/src/soc/ibm/power9/homer.c index b8a7166e6fa..56c9d740946 100644 --- a/src/soc/ibm/power9/homer.c +++ b/src/soc/ibm/power9/homer.c @@ -338,7 +338,7 @@ static void build_self_restore(struct homer_st *homer, * TODO: check if we can skip both cpureg and save_self for nonfunctional * cores */ - for (int core = 0; core < MAX_CORES; core++) { + for (int core = 0; core < MAX_CORES_PER_CHIP; core++) { struct smf_core_self_restore *csr = &homer->cpmr.core_self_restore[core]; uint32_t *csa = csr->core_save_area; @@ -433,7 +433,7 @@ static void build_cme(struct homer_st *homer, struct xip_cme_header *cme, hdr->common_ring_len = 0; hdr->scom_offset = 0; - hdr->scom_len = CORE_SCOM_RESTORE_SIZE / MAX_CORES / 2; + hdr->scom_len = CORE_SCOM_RESTORE_SIZE / MAX_CORES_PER_CHIP / 2; hdr->core_spec_ring_offset = 0; hdr->max_spec_ring_len = 0; @@ -681,7 +681,7 @@ static void stop_gpe_init(struct homer_st *homer) static uint64_t get_available_cores(int *me) { uint64_t ret = 0; - for (int i = 0; i < MAX_CORES; i++) { + for (int i = 0; i < MAX_CORES_PER_CHIP; i++) { uint64_t val = read_scom_for_chiplet(EC00_CHIPLET_ID + i, 0xF0040); if (val & PPC_BIT(0)) { printk(BIOS_SPEW, "Core %d is functional%s\n", i, @@ -1365,7 +1365,7 @@ void build_homer_image(void *homer_bar) // customizeMagicWord( pChipHomer ); /* Set up wakeup mode */ - for (int i = 0; i < MAX_CORES; i++) { + for (int i = 0; i < MAX_CORES_PER_CHIP; i++) { if (!IS_EC_FUNCTIONAL(i, cores)) continue; @@ -1390,7 +1390,7 @@ void build_homer_image(void *homer_bar) /* 15.3 establish EX chiplet */ report_istep(15, 3); /* Multicast groups for cores were assigned in get_available_cores() */ - for (int i = 0; i < MAX_CORES/4; i++) { + for (int i = 0; i < MAX_QUADS_PER_CHIP; i++) { if (IS_EQ_FUNCTIONAL(i, cores) && (read_scom_for_chiplet(EP00_CHIPLET_ID + i, 0xF0001) & PPC_BITMASK(3,5)) == PPC_BITMASK(3,5)) @@ -1404,7 +1404,7 @@ void build_homer_image(void *homer_bar) /* Writing OCC QCSR */ uint64_t qcsr = 0; - for (int i = 0; i < MAX_CORES/2; i++) { + for (int i = 0; i < MAX_CMES_PER_CHIP; i++) { if (IS_EX_FUNCTIONAL(i, cores)) qcsr |= PPC_BIT(i); } @@ -1414,7 +1414,7 @@ void build_homer_image(void *homer_bar) report_istep(15, 4); /* Initialize the PFET controllers */ - for (int i = 0; i < MAX_CORES; i++) { + for (int i = 0; i < MAX_CORES_PER_CHIP; i++) { if (IS_EC_FUNCTIONAL(i, cores)) { // Periodic core quiesce workaround /* diff --git a/src/soc/ibm/power9/homer.h b/src/soc/ibm/power9/homer.h index f2369c4c032..64d8d8af828 100644 --- a/src/soc/ibm/power9/homer.h +++ b/src/soc/ibm/power9/homer.h @@ -47,6 +47,11 @@ #define OCC_WOF_TABLES_OFFSET (768 * KiB) #define PPMR_HEADER_SIZE (1 * KiB) +#define MAX_CORES_PER_CHIP 24 +#define MAX_CORES_PER_EX 2 +#define MAX_QUADS_PER_CHIP (MAX_CORES_PER_CHIP / 4) +#define MAX_CMES_PER_CHIP (MAX_CORES_PER_CHIP / MAX_CORES_PER_EX) + /* =================== QPMR =================== */ struct qpmr_header { @@ -154,12 +159,7 @@ struct cpmr_header { uint32_t core_self_restore_offset; uint32_t core_self_restore_len; uint32_t core_max_scom_entry; - uint32_t quad0_pstate_offset; - uint32_t quad1_pstate_offset; - uint32_t quad2_pstate_offset; - uint32_t quad3_pstate_offset; - uint32_t quad4_pstate_offset; - uint32_t quad5_pstate_offset; + uint32_t quad_pstate_offset[MAX_QUADS_PER_CHIP]; } __attribute__((packed, aligned(256))); struct smf_core_self_restore { @@ -193,18 +193,13 @@ struct cme_img_header { uint32_t custom_length; // = real length / 32 }; -#define MAX_CORES 24 -#define MAX_QUADS_PER_CHIP 6 -#define MAX_CORES_PER_EX 2 -#define MAX_CMES_PER_CHIP (MAX_CORES / MAX_CORES_PER_EX) - struct cpmr_st { struct cpmr_header header; uint8_t exe[SELF_RESTORE_REGION_SIZE - sizeof(struct cpmr_header)]; - struct smf_core_self_restore core_self_restore[MAX_CORES]; + struct smf_core_self_restore core_self_restore[MAX_CORES_PER_CHIP]; uint8_t pad[CORE_SCOM_RESTORE_OFFSET - (SELF_RESTORE_REGION_SIZE + - MAX_CORES * sizeof(struct smf_core_self_restore))]; + MAX_CORES_PER_CHIP * sizeof(struct smf_core_self_restore))]; uint8_t core_scom[CORE_SCOM_RESTORE_SIZE]; uint8_t cme_sram_region[CME_SRAM_IMG_SIZE]; }; From 65cb63053ed929d00bfaf8c66922cc2a3be4748b Mon Sep 17 00:00:00 2001 From: Krystian Hebel Date: Tue, 17 Aug 2021 15:51:02 +0200 Subject: [PATCH 075/213] soc/power9/homer.c: update HOMER headers Signed-off-by: Krystian Hebel Signed-off-by: Sergii Dmytruk Change-Id: If4bc3f5248b59ec44e565ac1ea9c6574c0c22afa --- src/soc/ibm/power9/homer.c | 135 ++++++++++++++++++++++++++++++++++--- 1 file changed, 126 insertions(+), 9 deletions(-) diff --git a/src/soc/ibm/power9/homer.c b/src/soc/ibm/power9/homer.c index 56c9d740946..d23a525c4a7 100644 --- a/src/soc/ibm/power9/homer.c +++ b/src/soc/ibm/power9/homer.c @@ -1256,6 +1256,127 @@ static void layout_rings_for_sgpe(struct homer_st *homer, } } +static void update_headers(struct homer_st *homer, uint64_t cores) +{ + /* + * Update CPMR Header with Scan Ring details + * This function for each entry does one of: + * - write constant value + * - copy value form other field + * - one or both of the above with arithmetic operations + * Consider writing these fields in previous functions instead. + */ + struct cpmr_header *cpmr_hdr = &homer->cpmr.header; + struct cme_img_header *cme_hdr = (struct cme_img_header *) + &homer->cpmr.cme_sram_region[INT_VECTOR_SIZE]; + cpmr_hdr->img_offset = offsetof(struct cpmr_st, cme_sram_region) / 32; + cpmr_hdr->cme_pstate_offset = offsetof(struct cpmr_st, cme_sram_region) + cme_hdr->pstate_region_offset; + cpmr_hdr->cme_pstate_len = cme_hdr->pstate_region_len; + cpmr_hdr->img_len = cme_hdr->hcode_len; + cpmr_hdr->core_scom_offset = offsetof(struct cpmr_st, core_scom); + cpmr_hdr->core_scom_len = CORE_SCOM_RESTORE_SIZE; // 6k + cpmr_hdr->core_max_scom_entry = 15; + + if (cme_hdr->common_ring_len) { + cpmr_hdr->cme_common_ring_offset = offsetof(struct cpmr_st, cme_sram_region) + + cme_hdr->common_ring_offset; + cpmr_hdr->cme_common_ring_len = cme_hdr->common_ring_len; + } + + if (cme_hdr->max_spec_ring_len) { + cpmr_hdr->core_spec_ring_offset = ALIGN_UP(cpmr_hdr->img_offset * 32 + + cpmr_hdr->img_len + + cpmr_hdr->cme_pstate_len + + cpmr_hdr->cme_common_ring_len, + 32) / 32; + cpmr_hdr->core_spec_ring_len = cme_hdr->max_spec_ring_len; + } + + cme_hdr->custom_length = + ALIGN_UP(cme_hdr->max_spec_ring_len * 32 + sizeof(LocalPstateParmBlock), 32) / 32; + + for (int cme = 0; cme < MAX_CORES_PER_CHIP/2; cme++) { + /* + * CME index/position is the same as EX, however this means that Pstate + * offset is overwritten when there are 2 functional CMEs in one quad. + * Maybe we can use "for each functional quad" instead, but maybe + * 'cme * cme_hdr->custom_length' points to different data, based on + * whether there is one or two functional CMEs (is that even possible?). + */ + if (!IS_EX_FUNCTIONAL(cme, cores)) + continue; + + /* Assuming >= CPMR_2.0 */ + cpmr_hdr->quad_pstate_offset [cme/2] = cpmr_hdr->core_spec_ring_offset + + cpmr_hdr->core_spec_ring_len + + cme * cme_hdr->custom_length; + } + + /* Updating CME Image header */ + /* Assuming >= CPMR_2.0 */ + cme_hdr->scom_offset = + ALIGN_UP(cme_hdr->pstate_offset * 32 + sizeof(LocalPstateParmBlock), 32) / 32; + + /* Adding to it instance ring length which is already a multiple of 32B */ + cme_hdr->scom_len = 512; + + /* Timebase frequency */ + cme_hdr->timebase_hz = powerbus_cfg()->fabric_freq * MHz / 64; + + /* + * Update QPMR Header area in HOMER + * In Hostboot, qpmrHdr is a copy of the header, it doesn't operate on HOMER + * directly until now - it fills the following fields in the copy and then + * does memcpy() to HOMER. As BAR is set up in next istep, I don't see why. + */ + homer->qpmr.sgpe.header.sram_img_size = + homer->qpmr.sgpe.header.img_len + + homer->qpmr.sgpe.header.common_ring_len + + homer->qpmr.sgpe.header.spec_ring_len; + homer->qpmr.sgpe.header.max_quad_restore_entry = 255; + homer->qpmr.sgpe.header.build_ver = 3; + struct sgpe_img_header *sgpe_hdr = (struct sgpe_img_header *) + &homer->qpmr.sgpe.sram_image[INT_VECTOR_SIZE]; + sgpe_hdr->scom_mem_offset = offsetof(struct homer_st, qpmr.cache_scom_region); + + /* Update PPMR Header area in HOMER */ + struct pgpe_img_header *pgpe_hdr = (struct pgpe_img_header *) + &homer->ppmr.pgpe_sram_img[INT_VECTOR_SIZE]; + pgpe_hdr->core_throttle_assert_cnt = 0; + pgpe_hdr->core_throttle_deassert_cnt = 0; + pgpe_hdr->ivpr_addr = 0xFFF20000; // OCC_SRAM_PGPE_BASE_ADDR + // = homer->ppmr.header.sram_region_start + pgpe_hdr->gppb_sram_addr = 0; // set by PGPE Hcode (or not?) + pgpe_hdr->hcode_len = homer->ppmr.header.hcode_len; + /* FIXME: remove hardcoded HOMER in OCI PBA */ + pgpe_hdr->gppb_mem_offset = 0x80000000 + offsetof(struct homer_st, ppmr) + + homer->ppmr.header.gppb_offset; + pgpe_hdr->gppb_len = homer->ppmr.header.gppb_len; + pgpe_hdr->gen_pstables_mem_offset = 0x80000000 + offsetof(struct homer_st, ppmr) + + homer->ppmr.header.pstables_offset; + pgpe_hdr->gen_pstables_len = homer->ppmr.header.pstables_len; + pgpe_hdr->occ_pstables_sram_addr = 0; + pgpe_hdr->occ_pstables_len = 0; + pgpe_hdr->beacon_addr = 0; + pgpe_hdr->quad_status_addr = 0; + pgpe_hdr->wof_state_address = 0; + pgpe_hdr->wof_values_address = 0; + pgpe_hdr->req_active_quad_address = 0; + pgpe_hdr->wof_table_addr = homer->ppmr.header.wof_table_offset; + pgpe_hdr->wof_table_len = homer->ppmr.header.wof_table_len; + pgpe_hdr->timebase_hz = 1866 * MHz / 64; + pgpe_hdr->doptrace_offset = homer->ppmr.header.doptrace_offset; + pgpe_hdr->doptrace_len = homer->ppmr.header.doptrace_len; + + /* Update magic numbers */ + homer->qpmr.sgpe.header.magic = 0x51504d525f312e30; // QPMR_1.0 + homer->cpmr.header.magic = 0x43504d525f322e30; // CPMR_2.0 + homer->ppmr.header.magic = 0x50504d525f312e30; // PPMR_1.0 + sgpe_hdr->magic = 0x534750455f312e30; // SGPE_1.0 + cme_hdr->magic = 0x434d455f5f312e30; // CME__1.0 + pgpe_hdr->magic = 0x504750455f312e30; // PGPE_1.0 +} + /* * This logic is for SMF disabled only! */ @@ -1334,13 +1455,7 @@ void build_homer_image(void *homer_bar) build_parameter_blocks(homer, cores); - // updateCpmrCmeRegion(); - - // Update QPMR Header area in HOMER - // updateQpmrHeader(); - - // Update PPMR Header area in HOMER - // updatePpmrHeader(); + update_headers(homer, cores); // Update L2 Epsilon SCOM Registers // populateEpsilonL2ScomReg( pChipHomer ); @@ -1354,8 +1469,10 @@ void build_homer_image(void *homer_bar) // Populate HOMER with SCOM restore value of NCU RNG BAR SCOM Register // populateNcuRngBarScomReg( pChipHomer, i_procTgt ); - // Update CME/SGPE Flags in respective image header. - // updateImageFlags( pChipHomer, i_procTgt ); + /* Update flag fields in image headers */ + ((struct sgpe_img_header *)&homer->qpmr.sgpe.sram_image[INT_VECTOR_SIZE])->reserve_flags = 0x04000000; + ((struct cme_img_header *)&homer->cpmr.cme_sram_region[INT_VECTOR_SIZE])->qm_mode_flags = 0xF100; + ((struct pgpe_img_header *)&homer->ppmr.pgpe_sram_img[INT_VECTOR_SIZE])->flags = 0xF032; // Set the Fabric IDs // setFabricIds( pChipHomer, i_procTgt ); From e897b937bd657400653e25f10527f4f853546fbb Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Fri, 20 Aug 2021 17:53:37 +0300 Subject: [PATCH 076/213] soc/power9/rom_media.c: add WOFDATA partition Change-Id: I41a7bfe2b23d5c25932a15e4317e239a2d6b780e Signed-off-by: Sergii Dmytruk --- src/soc/ibm/power9/rom_media.c | 28 ++++++++++++++++++++++++++-- src/soc/ibm/power9/wof.h | 14 ++++++++++++++ 2 files changed, 40 insertions(+), 2 deletions(-) create mode 100644 src/soc/ibm/power9/wof.h diff --git a/src/soc/ibm/power9/rom_media.c b/src/soc/ibm/power9/rom_media.c index ccc877da1cf..eb38e388473 100644 --- a/src/soc/ibm/power9/rom_media.c +++ b/src/soc/ibm/power9/rom_media.c @@ -12,15 +12,15 @@ #include #include #include "../../../../3rdparty/ffs/ffs/ffs.h" +#include "wof.h" #define LPC_FLASH_MIN (MMIO_GROUP0_CHIP0_LPC_BASE_ADDR + LPCHC_FW_SPACE) #define LPC_FLASH_TOP (LPC_FLASH_MIN + FW_SPACE_SIZE) #define CBFS_PARTITION_NAME "HBI" - #define MEMD_PARTITION_NAME "MEMD" - #define MVPD_PARTITION_NAME "MVPD" +#define WOF_PARTITION_NAME "WOFDATA" /* ffs_entry is not complete in included ffs.h, it lacks user data layout. * See https://github.com/open-power/skiboot/blob/master/libflash/ffs.h */ @@ -489,6 +489,30 @@ const struct region_device *mvpd_device_ro(void) return &mvpd_mdev.rdev; } +static struct mmap_helper_region_device wof_mdev = MMAP_HELPER_DEV_INIT( + &no_ecc_rdev_ops, 0, CONFIG_ROM_SIZE, &cbfs_cache); + +void wof_device_init(void) +{ + static int init_done; + if (init_done) + return; + + mount_part_from_pnor(WOF_PARTITION_NAME, &wof_mdev); + + init_done = 1; +} + +void wof_device_unmount(void) +{ + wof_mdev.rdev.ops = &no_rdev_ops; +} + +const struct region_device *wof_device_ro(void) +{ + return &wof_mdev.rdev; +} + static struct mmap_helper_region_device boot_mdev = MMAP_HELPER_DEV_INIT( &no_ecc_rdev_ops, 0, CONFIG_ROM_SIZE, &cbfs_cache); diff --git a/src/soc/ibm/power9/wof.h b/src/soc/ibm/power9/wof.h new file mode 100644 index 00000000000..9bf4f51d5f9 --- /dev/null +++ b/src/soc/ibm/power9/wof.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __SOC_IBM_POWER9_WOF_H +#define __SOC_IBM_POWER9_WOF_H + +struct region_device; + +void wof_device_init(void); + +void wof_device_unmount(void); + +const struct region_device *wof_device_ro(void); + +#endif /* __SOC_IBM_POWER9_WOF_H */ From 556e97fe09d92c068394be08e21dd13890cad399 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Mon, 16 May 2022 15:38:53 +0300 Subject: [PATCH 077/213] soc/power9/pstates.c: populate PPMR WOF tables from WOFDATA This finds suitable table in WOFDATA partition and pulls VFRT entries out of it performing necessary conversions on them. Change-Id: Icb956e9be5ed811a9e18bf2dd114771dce182971 Signed-off-by: Sergii Dmytruk --- src/soc/ibm/power9/pstates.c | 291 ++++++++++++++++++++++++++++++----- 1 file changed, 252 insertions(+), 39 deletions(-) diff --git a/src/soc/ibm/power9/pstates.c b/src/soc/ibm/power9/pstates.c index 776f38e0052..2a21ae53d12 100644 --- a/src/soc/ibm/power9/pstates.c +++ b/src/soc/ibm/power9/pstates.c @@ -1,6 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0-only */ #include "homer.h" +#include "wof.h" +#include #include #include #include @@ -12,6 +14,8 @@ #define MAX_UT_PSTATES 64 // Oversized #define FREQ_STEP_KHZ 16666 +#define SYSTEM_VFRT_SIZE 128 + #ifndef _BIG_ENDIAN #error "_BIG_ENDIAN not defined" #endif @@ -47,6 +51,102 @@ static ResonantClockingSetup resclk = 580 // L3 voltage threshold }; +#define WOF_IMAGE_MAGIC_VALUE ((uint32_t)0x57544948) // "WTIH" +#define WOF_IMAGE_VERSION ((uint32_t)1) + +#define WOF_TABLES_MAGIC_VALUE ((uint32_t)0x57465448) // "WFTH" +#define WOF_TABLES_VERSION ((uint32_t)2) +#define WOF_TABLES_MAX_VERSION WOF_TABLES_VERSION + +/* + * WOF image: + * - header (struct wof_image_hdr) + * - section table + * - array of WOF tables + * + * WOF table: + * - wof_tables_hdr for header + * - data begins with vfrt_hdr + */ + +/* Top-level header for WOF */ +struct wof_image_hdr { + uint32_t magic_number; // WOF_IMAGE_MAGIC_VALUE + uint8_t version; // WOF_IMAGE_VERSION + uint8_t entry_count; // Number of entries in section table + uint32_t offset; // BE offset to section table from image start +} __attribute__((__packed__)); + +/* Entry of WOF's section table */ +struct wof_image_entry { + uint32_t offset; // BE offset to section from image start + uint32_t size; // BE size of the section +} __attribute__((__packed__)); + +/* Header of WOF's section */ +struct wof_tables_hdr { + uint32_t magic_number; // WOF_TABLES_MAGIC_VALUE + + uint16_t reserved; + uint8_t mode; // version 1 = 0; version 2 = 1 or 2; WOF_MODE_* + uint8_t version; + + uint16_t vfrt_block_size; + uint16_t vfrt_block_header_size; + uint16_t vfrt_data_size; + uint8_t quads_active_size; + uint8_t core_count; + uint16_t vdn_start; // CeffVdn value represented by index 0 (in 0.01%) + uint16_t vdn_step; // CeffVdn step value for each CeffVdn index (in 0.01%) + uint16_t vdn_size; // Number of CeffVdn indexes + uint16_t vdd_start; // CeffVdd value represented by index 0 (in 0.01%) + uint16_t vdd_step; // CeffVdd step value for each CeffVdd index (in 0.01%) + uint16_t vdd_size; // Number of CeffVdd indexes + uint16_t vratio_start; // Vratio value represented by index 0 (in 0.01%) + uint16_t vratio_step; // Vratio step value for each CeffVdd index (in 0.01%) + uint16_t vratio_size; // Number of Vratio indexes + uint16_t fratio_start; // Fratio value represented by index 0 (in 0.01%) + uint16_t fratio_step; // Fratio step value for each CeffVdd index (in 0.01%) + uint16_t fratio_size; // Number of Fratio indexes + + uint16_t vdn_percent[8]; // Currently unused + + uint16_t socket_power_w; + uint16_t nest_frequency_mhz; + uint16_t sort_power_freq_mhz; // Either the Nominal or Turbo #V frequency + uint16_t rdp_capacity; // Regulator Design Point Capacity (in Amps) + + char wof_table_source_tag[8]; + char package_name[16]; +} __attribute__((packed, aligned(128))); + +#define VFRT_HDR_MAGIC 0x5654 // "VT" +#define VFRT_HDR_VERSION 2 + +/* Header of data within a WOF table */ +struct vfrt_hdr { + uint16_t magic_number; // VFRT_HDR_MAGIC + uint16_t reserved; + // bits 4-7 are type: 0 -- "System", 1 -- "Homer" + // bits 0-3 are version: 1 -- 12 row(voltage) X 11 column(freq) + // 2 -- 24 row(Voltage) X 5 column (Freq) + uint8_t type_version; + uint8_t res_vdnId; // Vdn assumptions + uint8_t vddId_QAId; // Vdd assumptions + uint8_t rsvd_QAId; // bits 0-2: Quad Active assumptions +} __attribute__((packed)); + +/* Data is provided in 1/24ths granularity with adjustments for integer representation */ +#define VFRT_VRATIO_SIZE 24 +/* 5 steps down from 100% is Fratio_step sizes */ +#define VFRT_FRATIO_SIZE 5 + +/* Form of VFRT data as stored in HOMER */ +struct homer_vfrt_entry { + struct vfrt_hdr vfrt_hdr; + uint8_t pstate[VFRT_FRATIO_SIZE * VFRT_VRATIO_SIZE]; +} __attribute__((packed, aligned(256))); + static void copy_poundW_v2_to_v3(PoundW_data_per_quad *v3, PoundW_data *v2) { memset(v3, 0, sizeof(PoundW_data_per_quad)); @@ -326,6 +426,153 @@ static void update_resclk(int ref_freq_khz) } } +static int32_t wof_find(struct wof_image_entry *entries, uint8_t entry_count, + uint32_t core_count, + const struct voltage_bucket_data *poundV_bucket) +{ + const struct region_device *wof_device = wof_device_ro(); + + const uint16_t socket_power_w = poundV_bucket->sort_power_turbo; + const uint16_t sort_power_freq_mhz = poundV_bucket->turbo.freq; + + int32_t i = 0; + + for (i = 0; i < entry_count; ++i) { + uint8_t tbl_hdr_buf[sizeof(struct wof_tables_hdr)]; + struct wof_tables_hdr *tbl_hdr = (void *)tbl_hdr_buf; + uint8_t mode = 0; + + if (rdev_readat(wof_device, tbl_hdr_buf, entries[i].offset, + sizeof(tbl_hdr_buf)) != sizeof(tbl_hdr_buf)) + die("Failed to read a WOF tables header!\n"); + + if (tbl_hdr->magic_number != WOF_TABLES_MAGIC_VALUE) + die("Incorrect magic value of WOF table header!\n"); + + if (tbl_hdr->version == 0 || tbl_hdr->version > WOF_TABLES_MAX_VERSION) + die("Unsupported version of WOF table header: %d!\n", + tbl_hdr->version); + + mode = (tbl_hdr->mode & 0x0F); + if (tbl_hdr->version >= WOF_TABLES_VERSION && + mode != WOF_MODE_UNKNOWN && + mode != WOF_MODE_TURBO) + continue; + + if (tbl_hdr->core_count == core_count && + tbl_hdr->socket_power_w == socket_power_w && + tbl_hdr->sort_power_freq_mhz == sort_power_freq_mhz) + /* Found a suitable WOF tables entry */ + return i; + } + + return -1; +} + +static void import_vfrt(const struct vfrt_hdr *src, struct homer_vfrt_entry *dst, + const OCCPstateParmBlock *oppb) +{ + const uint32_t ref_freq = oppb->frequency_max_khz; + const uint32_t freq_step = oppb->frequency_step_khz; + + uint16_t i = 0; + uint8_t *freq = NULL; + + if (src->magic_number != VFRT_HDR_MAGIC) + die("Invalid magic value of a VFRT header: %d!\n", src->magic_number); + + if ((src->type_version & 0x0F) != VFRT_HDR_VERSION) { + die("Expected VFRT header version %d, got %d!", + VFRT_HDR_VERSION, (src->type_version & 0x0F)); + } + + dst->vfrt_hdr = *src; + /* Flip type from "System" to "Homer" */ + dst->vfrt_hdr.type_version |= 0x10; + + freq = (uint8_t *)src + sizeof(*src); + for (i = 0; i < VFRT_FRATIO_SIZE * VFRT_VRATIO_SIZE; ++i) { + const uint32_t freq_khz = freq[i] * freq_step + 1000000; + + /* Round towards zero */ + dst->pstate[i] = (ref_freq - freq_khz) / freq_step; + } +} + +static void wof_extract(uint8_t *buf, struct wof_image_entry entry, + const OCCPstateParmBlock *oppb) +{ + const struct region_device *wof_device = wof_device_ro(); + + struct wof_tables_hdr *tbl_hdr = NULL; + + uint32_t i; + + uint8_t *table_data = NULL; + uint8_t *wof_vfrt_entry = NULL; + struct homer_vfrt_entry *homer_vfrt_entry = NULL; + + table_data = rdev_mmap(wof_device, entry.offset, entry.size); + if (!table_data) + die("Failed to map WOF section!\n"); + + tbl_hdr = (void *)table_data; + memcpy(buf, tbl_hdr, sizeof(*tbl_hdr)); + + wof_vfrt_entry = table_data + sizeof(*tbl_hdr); + homer_vfrt_entry = (struct homer_vfrt_entry *)(buf + sizeof(*tbl_hdr)); + + for (i = 0; i < tbl_hdr->vdn_size * tbl_hdr->vdd_size * MAX_QUADS_PER_CHIP; ++i) { + import_vfrt((const struct vfrt_hdr *)wof_vfrt_entry, homer_vfrt_entry, oppb); + + wof_vfrt_entry += SYSTEM_VFRT_SIZE; + ++homer_vfrt_entry; + } + + if (rdev_munmap(wof_device, table_data)) + die("Failed to unmap WOF section!\n"); +} + +static void wof_init(uint8_t *buf, uint32_t core_count, + const OCCPstateParmBlock *oppb, + const struct voltage_bucket_data *poundV_bucket) +{ + const struct region_device *wof_device = NULL; + + uint8_t hdr_buf[sizeof(struct wof_image_hdr)]; + struct wof_image_hdr *hdr = (void *)hdr_buf; + + struct wof_image_entry *entries = NULL; + int32_t entry_idx = 0; + + wof_device_init(); + wof_device = wof_device_ro(); + + if (rdev_readat(wof_device, hdr_buf, 0, sizeof(hdr_buf)) != sizeof(hdr_buf)) + die("Failed to read WOF header!\n"); + + if (hdr->magic_number != WOF_IMAGE_MAGIC_VALUE) + die("Incorrect magic value of WOF header!\n"); + + if (hdr->version != WOF_IMAGE_VERSION) { + die("Expected WOF header version %d, got %d!", + WOF_IMAGE_VERSION, hdr->version); + } + + entries = rdev_mmap(wof_device, hdr->offset, hdr->entry_count * sizeof(entries)); + if (!entries) + die("Failed to map section table of WOF!\n"); + + entry_idx = wof_find(entries, hdr->entry_count, core_count, poundV_bucket); + if (entry_idx == -1) + die("Failed to find a matching WOF tables section!\n"); + + wof_extract(buf, entries[entry_idx], oppb); + + if (rdev_munmap(wof_device, entries)) + die("Failed to unmap section table of WOF!\n"); +} + /* Assumption: no bias is applied to operating points */ void build_parameter_blocks(struct homer_st *homer, uint64_t functional_cores) { @@ -373,8 +620,7 @@ void build_parameter_blocks(struct homer_st *homer, uint64_t functional_cores) oppb->wof.tdp_rdp_factor = 0; // ATTR_TDP_RDP_CURRENT_FACTOR 0 from talos.xml oppb->nest_leakage_percent = 60; // ATTR_NEST_LEAKAGE_PERCENT from hb_temp_defaults.xml - /* FIXME: uncomment after WOF_DATA is prepared */ - //oppb->wof.wof_enabled = 1; // Assuming wof_init() succeeds + oppb->wof.wof_enabled = 1; // Assuming wof_init() succeeds or dies oppb->wof.tdp_rdp_factor = 0; // ATTR_TDP_RDP_CURRENT_FACTOR from talos.xml oppb->nest_leakage_percent = 60; // ATTR_NEST_LEAKAGE_PERCENT from hb_temp_defaults.xml /* @@ -766,43 +1012,10 @@ void build_parameter_blocks(struct homer_st *homer, uint64_t functional_cores) ((GPPBOptionsPadUse *)&gppb->options.pad)->fields.good_cores_in_sort = oppb->iddq.good_normal_cores_per_sort; - /* TODO: WOF */ - //~ // ---------------- - //~ // WOF initialization - //~ // ---------------- - //~ wof_init(o_buf = &homer->ppmr.wof_tables): - //~ - Search for proper data in WOFDATA PNOR partition - //~ - WOFDATA is 3M, make sure CBFS_CACHE is big enough - //~ - search until match is found: - //~ - core count - //~ - socket power (nominal, as read from #V) - //~ - frequency (nominal, as read from #V) - //~ - if version >= WOF_TABLE_VERSION_POWERMODE (2): - //~ - mode matches current mode (WOF_MODE_NOMINAL = 1) or wildcard (WOF_MODE_UNKNOWN = 0) - //~ - structures used: - //~ - wofImageHeader_t from plat_wof_access.C - //~ - check magic and version - //~ - wofSectionTableEntry_t from plat_wof_access.C - //~ - WofTablesHeader_t from p9_pstates_common.h - //~ memcpy(o_buf, &WofTablesHeader_t /* for found entry */, wofSectionTableEntry_t[found_entry_idx].size) - - //~ // Just the header, rest needs parsing - //~ memcpy(homer->ppmr.wof_tables, o_buf, sizeof(WofTablesHeader_t)) - - //~ for vfrt_index in 0..((WofTablesHeader_t*)o_buf->vdn_size * (WofTablesHeader_t*)o_buf->vdd_size * ACTIVE_QUADS) -1: - //~ src = o_buf + sizeof(WofTablesHeader_t) + vfrt_index * 128 /* vRTF size */ - //~ dst = homer->ppmr.wof_tables + sizeof(WofTablesHeader_t) + vfrt_index * sizeof(HomerVFRTLayout_t) /* 256B */ - //~ update_vfrt (src, dst): - //~ - Assumption: no bias, makes this function so much easier - //~ // Data in src has 8B header followed by 5*24 bytes of frequency information, such that freq = value*step_size + 1GHz. - //~ // Data in dst has (almost) the same header followed by 5*24 bytes of Pstates. - //~ // Copy header - //~ memcpy(dst, src, 8) - //~ // Flip type from System to Homer - //~ dst.type_version |= 0x10 - //~ assert(dst.magic = "VT") - //~ for idx in 0..5*24 -1: - //~ dst[8+idx] = freq_to_pstate(src[8+idx]) // rounded properly + wof_init(homer->ppmr.wof_tables, + __builtin_popcount((uint32_t)functional_cores) + + __builtin_popcount(functional_cores >> 32), + oppb, £V_bucket); /* Copy LPPB to functional CMEs */ for (int cme = 1; cme < MAX_CMES_PER_CHIP; cme++) { From 2a2f56b15dd89f35848a8164a94efe376bcf6b3d Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Mon, 16 May 2022 20:14:21 +0300 Subject: [PATCH 078/213] soc/power9/homer.c: implement populate_ncu_rng_bar_scom_reg() Change-Id: Ie1de9551f752d239df5a618020106cb7e566a25c Signed-off-by: Sergii Dmytruk --- src/soc/ibm/power9/homer.c | 150 ++++++++++++++++++++++++++++++++++++- src/soc/ibm/power9/homer.h | 8 ++ 2 files changed, 156 insertions(+), 2 deletions(-) diff --git a/src/soc/ibm/power9/homer.c b/src/soc/ibm/power9/homer.c index d23a525c4a7..4159febd48f 100644 --- a/src/soc/ibm/power9/homer.c +++ b/src/soc/ibm/power9/homer.c @@ -17,6 +17,23 @@ #include "tor.h" #include "xip.h" +#define EX_0_NCU_DARN_BAR_REG 0x10011011 + +#define MAX_EQ_SCOM_ENTRIES 31 +#define MAX_L2_SCOM_ENTRIES 16 +#define MAX_L3_SCOM_ENTRIES 16 + +#define PPC_PLACE(val, pos, len) \ + PPC_SHIFT((val) & ((1 << ((len) + 1)) - 1), ((pos) + ((len) - 1))) + +/* Subsections of STOP image that contain SCOM entries */ +enum scom_section { + STOP_SECTION_CORE_SCOM, + STOP_SECTION_EQ_SCOM, + STOP_SECTION_L2, + STOP_SECTION_L3, +}; + struct ring_data { void *rings_buf; void *work_buf1; @@ -54,6 +71,23 @@ struct sgpe_inst_ring_list { uint8_t payload[]; }; +struct scom_entry_t { + uint32_t hdr; + uint32_t address; + uint64_t data; +}; + +struct stop_cache_section_t { + struct scom_entry_t non_cache_area[MAX_EQ_SCOM_ENTRIES]; + struct scom_entry_t l2_cache_area[MAX_L2_SCOM_ENTRIES]; + struct scom_entry_t l3_cache_area[MAX_L3_SCOM_ENTRIES]; +}; + +enum scom_operation { + SCOM_APPEND, + SCOM_REPLACE +}; + enum operation_type { COPY, FIND @@ -183,6 +217,7 @@ static const uint32_t _SMF = 0x5F534D46; // "_SMF" static const uint32_t ATTN_OP = 0x00000200; static const uint32_t BLR_OP = 0x4E800020; +static const uint32_t ORI_OP = 0x60000000; static const uint32_t SKIP_SPR_REST_INST = 0x4800001C; static const uint32_t MR_R0_TO_R10_OP = 0x7C0A0378; static const uint32_t MR_R0_TO_R21_OP = 0x7C150378; @@ -1256,6 +1291,117 @@ static void layout_rings_for_sgpe(struct homer_st *homer, } } +static void stop_save_scom(struct homer_st *homer, uint32_t scom_address, + uint64_t scom_data, enum scom_section section, + enum scom_operation operation) +{ + enum { + STOP_API_VER = 0x00, + SCOM_ENTRY_START = 0xDEADDEAD, + }; + + chiplet_id_t chiplet_id = (scom_address >> 24) & 0x3F; + uint32_t max_scom_restore_entries = 0; + struct stop_cache_section_t *stop_cache_scom = NULL; + struct scom_entry_t *scom_entry = NULL; + struct scom_entry_t *nop_entry = NULL; + struct scom_entry_t *matching_entry = NULL; + struct scom_entry_t *end_entry = NULL; + struct scom_entry_t *entry = NULL; + uint32_t entry_limit = 0; + + if (chiplet_id >= EC00_CHIPLET_ID) { + uint32_t offset = (chiplet_id - EC00_CHIPLET_ID) + * CORE_SCOM_RESTORE_SIZE_PER_CORE; + scom_entry = (struct scom_entry_t *)&homer->cpmr.core_scom[offset]; + max_scom_restore_entries = homer->cpmr.header.core_max_scom_entry; + } else { + uint32_t offset = (chiplet_id - EP00_CHIPLET_ID) + * QUAD_SCOM_RESTORE_SIZE_PER_QUAD; + stop_cache_scom = + (struct stop_cache_section_t *)&homer->qpmr.cache_scom_region[offset]; + max_scom_restore_entries = homer->qpmr.sgpe.header.max_quad_restore_entry; + } + + if (stop_cache_scom == NULL) + die("Failed to prepare for updating STOP SCOM\n"); + + switch (section) { + case STOP_SECTION_CORE_SCOM: + entry_limit = max_scom_restore_entries; + break; + case STOP_SECTION_EQ_SCOM: + scom_entry = stop_cache_scom->non_cache_area; + entry_limit = MAX_EQ_SCOM_ENTRIES; + break; + default: + die("Unhandled STOP image section.\n"); + break; + } + + for (uint32_t i = 0; i < entry_limit; ++i) { + uint32_t entry_address = scom_entry[i].address; + uint32_t entry_hdr = scom_entry[i].hdr; + + if (entry_address == scom_address && matching_entry == NULL) + matching_entry = &scom_entry[i]; + + if ((entry_address == ORI_OP || entry_address == ATTN_OP || + entry_address == BLR_OP) && nop_entry == NULL) + nop_entry = &scom_entry[i]; + + /* If entry is either 0xDEADDEAD or has SCOM entry limit in LSB of its header, + * the place is already occupied */ + if (entry_hdr == SCOM_ENTRY_START || (entry_hdr & 0x000000FF)) + continue; + + end_entry = &scom_entry[i]; + break; + } + + if (matching_entry == NULL && end_entry == NULL) + die("Failed to find SCOM entry in STOP image.\n"); + + entry = end_entry; + if (operation == SCOM_APPEND && nop_entry != NULL) + entry = nop_entry; + else if (operation == SCOM_REPLACE && matching_entry != NULL) + entry = matching_entry; + + if (entry == NULL) + die("Failed to insert SCOM entry in STOP image.\n"); + + entry->hdr = (0x000000FF & max_scom_restore_entries) + | ((STOP_API_VER & 0x7) << 28); + entry->address = scom_address; + entry->data = scom_data; +} + +static void populate_ncu_rng_bar_scom_reg(struct homer_st *homer) +{ + enum { NX_RANGE_BAR_ADDR_OFFSET = 0x00000302031D0000 }; + + uint8_t ex = 0; + + uint64_t regNcuRngBarData = PPC_PLACE(0x0, 8, 5) // system ID + | PPC_PLACE(0x3, 13, 2) // msel + | PPC_PLACE(0x0, 15, 4) // group ID + | PPC_PLACE(0x0, 19, 3); // chip ID + + regNcuRngBarData += NX_RANGE_BAR_ADDR_OFFSET; + + for (ex = 0; ex < MAX_CMES_PER_CHIP; ++ex) { + /* Create restore entry for NCU RNG register */ + + uint32_t scom_addr = EX_0_NCU_DARN_BAR_REG + | ((ex / 2) << 24) + | ((ex % 2) ? 0x0400 : 0x0000); + + stop_save_scom(homer, scom_addr, regNcuRngBarData, + STOP_SECTION_EQ_SCOM, SCOM_REPLACE); + } +} + static void update_headers(struct homer_st *homer, uint64_t cores) { /* @@ -1466,8 +1612,8 @@ void build_homer_image(void *homer_bar) // Update L3 Refresh Timer Control SCOM Registers // populateL3RefreshScomReg( pChipHomer, i_procTgt); - // Populate HOMER with SCOM restore value of NCU RNG BAR SCOM Register - // populateNcuRngBarScomReg( pChipHomer, i_procTgt ); + /* Populate HOMER with SCOM restore value of NCU RNG BAR SCOM Register */ + populate_ncu_rng_bar_scom_reg(homer); /* Update flag fields in image headers */ ((struct sgpe_img_header *)&homer->qpmr.sgpe.sram_image[INT_VECTOR_SIZE])->reserve_flags = 0x04000000; diff --git a/src/soc/ibm/power9/homer.h b/src/soc/ibm/power9/homer.h index 64d8d8af828..f7d6b7d8435 100644 --- a/src/soc/ibm/power9/homer.h +++ b/src/soc/ibm/power9/homer.h @@ -47,6 +47,14 @@ #define OCC_WOF_TABLES_OFFSET (768 * KiB) #define PPMR_HEADER_SIZE (1 * KiB) +#define SCOM_RESTORE_ENTRY_SIZE 16 // 4B pad, 4B address, 8B data +#define QUAD_SCOM_RESTORE_REGS_PER_QUAD 256 +#define QUAD_SCOM_RESTORE_SIZE_PER_QUAD \ + (SCOM_RESTORE_ENTRY_SIZE * QUAD_SCOM_RESTORE_REGS_PER_QUAD) +#define CORE_SCOM_RESTORE_REGS_PER_CORE 16 +#define CORE_SCOM_RESTORE_SIZE_PER_CORE \ + (SCOM_RESTORE_ENTRY_SIZE * CORE_SCOM_RESTORE_REGS_PER_CORE) + #define MAX_CORES_PER_CHIP 24 #define MAX_CORES_PER_EX 2 #define MAX_QUADS_PER_CHIP (MAX_CORES_PER_CHIP / 4) From 8d6c3d5f6978116b01dfd80ff34afbfc327797c3 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Wed, 8 Sep 2021 18:03:57 +0300 Subject: [PATCH 079/213] soc/power9/homer.c: implement 3 populate_*_scom_reg() * populate_epsilon_l2_scom_reg() * populate_epsilon_l3_scom_reg() * populate_l3_refresh_scom_reg() Change-Id: Ibc9c36fcee79fa4c720f9f9cbc09bfb2130c7a36 Signed-off-by: Sergii Dmytruk --- src/soc/ibm/power9/homer.c | 138 +++++++++++++++++++++++++++++++++++-- 1 file changed, 131 insertions(+), 7 deletions(-) diff --git a/src/soc/ibm/power9/homer.c b/src/soc/ibm/power9/homer.c index 4159febd48f..99cc72c4012 100644 --- a/src/soc/ibm/power9/homer.c +++ b/src/soc/ibm/power9/homer.c @@ -17,12 +17,24 @@ #include "tor.h" #include "xip.h" +#define L2_EPS_DIVIDER 1 +#define L3_EPS_DIVIDER 1 + +#define EX_L2_RD_EPS_REG 0x10010810 +#define EX_L2_WR_EPS_REG 0x10010811 +#define EX_L3_RD_EPS_REG 0x10011829 +#define EX_L3_WR_EPS_REG 0x1001182A +#define EX_DRAM_REF_REG 0x1001180F #define EX_0_NCU_DARN_BAR_REG 0x10011011 +#define ODD_EVEN_EX_POS 0x00000400 + #define MAX_EQ_SCOM_ENTRIES 31 #define MAX_L2_SCOM_ENTRIES 16 #define MAX_L3_SCOM_ENTRIES 16 +#define QUAD_BIT_POS 24 + #define PPC_PLACE(val, pos, len) \ PPC_SHIFT((val) & ((1 << ((len) + 1)) - 1), ((pos) + ((len) - 1))) @@ -1377,6 +1389,121 @@ static void stop_save_scom(struct homer_st *homer, uint32_t scom_address, entry->data = scom_data; } +static void populate_epsilon_l2_scom_reg(struct homer_st *homer) +{ + const struct powerbus_cfg *pb_cfg = powerbus_cfg(); + + uint32_t eps_r_t0 = pb_cfg->eps_r[0] / 8 / L2_EPS_DIVIDER + 1; + uint32_t eps_r_t1 = pb_cfg->eps_r[1] / 8 / L2_EPS_DIVIDER + 1; + uint32_t eps_r_t2 = pb_cfg->eps_r[2] / 8 / L2_EPS_DIVIDER + 1; + + uint32_t eps_w_t0 = pb_cfg->eps_w[0] / 8 / L2_EPS_DIVIDER + 1; + uint32_t eps_w_t1 = pb_cfg->eps_w[1] / 8 / L2_EPS_DIVIDER + 1; + + uint64_t eps_r = PPC_PLACE(eps_r_t0, 0, 12) + | PPC_PLACE(eps_r_t1, 12, 12) + | PPC_PLACE(eps_r_t2, 24, 12); + + uint64_t eps_w = PPC_PLACE(eps_w_t0, 0, 12) + | PPC_PLACE(eps_w_t1, 12, 12) + | PPC_PLACE(L2_EPS_DIVIDER, 24, 4); + + uint8_t quad = 0; + + for (quad = 0; quad < MAX_QUADS_PER_CHIP; ++quad) { + uint32_t scom_addr; + + /* Create restore entry for epsilon L2 RD register */ + + scom_addr = (EX_L2_RD_EPS_REG | (quad << QUAD_BIT_POS)); + stop_save_scom(homer, scom_addr, eps_r, STOP_SECTION_EQ_SCOM, + SCOM_APPEND); + + scom_addr |= ODD_EVEN_EX_POS; + stop_save_scom(homer, scom_addr, eps_r, STOP_SECTION_EQ_SCOM, + SCOM_APPEND); + + /* Create restore entry for epsilon L2 WR register */ + + scom_addr = (EX_L2_WR_EPS_REG | (quad << QUAD_BIT_POS)); + stop_save_scom(homer, scom_addr, eps_w, STOP_SECTION_EQ_SCOM, + SCOM_APPEND); + + scom_addr |= ODD_EVEN_EX_POS; + stop_save_scom(homer, scom_addr, eps_w, STOP_SECTION_EQ_SCOM, + SCOM_APPEND); + } +} + +static void populate_epsilon_l3_scom_reg(struct homer_st *homer) +{ + const struct powerbus_cfg *pb_cfg = powerbus_cfg(); + + uint32_t eps_r_t0 = pb_cfg->eps_r[0] / 8 / L3_EPS_DIVIDER + 1; + uint32_t eps_r_t1 = pb_cfg->eps_r[1] / 8 / L3_EPS_DIVIDER + 1; + uint32_t eps_r_t2 = pb_cfg->eps_r[2] / 8 / L3_EPS_DIVIDER + 1; + + uint32_t eps_w_t0 = pb_cfg->eps_w[0] / 8 / L3_EPS_DIVIDER + 1; + uint32_t eps_w_t1 = pb_cfg->eps_w[1] / 8 / L3_EPS_DIVIDER + 1; + + uint64_t eps_r = PPC_PLACE(eps_r_t0, 0, 12) + | PPC_PLACE(eps_r_t1, 12, 12) + | PPC_PLACE(eps_r_t2, 24, 12); + + uint64_t eps_w = PPC_PLACE(eps_w_t0, 0, 12) + | PPC_PLACE(eps_w_t1, 12, 12) + | PPC_PLACE(L2_EPS_DIVIDER, 30, 4); + + uint8_t quad = 0; + + for (quad = 0; quad < MAX_QUADS_PER_CHIP; ++quad) { + uint32_t scom_addr; + + /* Create restore entry for epsilon L2 RD register */ + + scom_addr = (EX_L3_RD_EPS_REG | (quad << QUAD_BIT_POS)); + stop_save_scom(homer, scom_addr, eps_r, STOP_SECTION_EQ_SCOM, + SCOM_APPEND); + + scom_addr |= ODD_EVEN_EX_POS; + stop_save_scom(homer, scom_addr, eps_r, STOP_SECTION_EQ_SCOM, + SCOM_APPEND); + + /* Create restore entry for epsilon L2 WR register */ + + scom_addr = (EX_L3_WR_EPS_REG | (quad << QUAD_BIT_POS)); + stop_save_scom(homer, scom_addr, eps_w, STOP_SECTION_EQ_SCOM, + SCOM_APPEND); + + scom_addr |= ODD_EVEN_EX_POS; + stop_save_scom(homer, scom_addr, eps_w, STOP_SECTION_EQ_SCOM, + SCOM_APPEND); + } +} + +static void populate_l3_refresh_scom_reg(struct homer_st *homer, uint8_t dd) +{ + uint64_t refresh_val = 0x2000000000000000ULL; + + uint8_t quad = 0; + + /* ATTR_CHIP_EC_FEATURE_HW408892 === (DD <= 0x20) */ + if (powerbus_cfg()->fabric_freq >= 2000 && dd > 0x20) + refresh_val |= PPC_PLACE(0x2, 8, 4); + + for (quad = 0; quad < MAX_QUADS_PER_CHIP; ++quad) { + /* Create restore entry for L3 Refresh Timer Divider register */ + + uint32_t scom_addr = (EX_DRAM_REF_REG | (quad << QUAD_BIT_POS)); + stop_save_scom(homer, scom_addr, refresh_val, + STOP_SECTION_EQ_SCOM, SCOM_APPEND); + + scom_addr |= ODD_EVEN_EX_POS; + stop_save_scom(homer, scom_addr, refresh_val, + STOP_SECTION_EQ_SCOM, SCOM_APPEND); + } +} + static void populate_ncu_rng_bar_scom_reg(struct homer_st *homer) { enum { NX_RANGE_BAR_ADDR_OFFSET = 0x00000302031D0000 }; @@ -1603,14 +1730,11 @@ void build_homer_image(void *homer_bar) update_headers(homer, cores); - // Update L2 Epsilon SCOM Registers - // populateEpsilonL2ScomReg( pChipHomer ); - - // Update L3 Epsilon SCOM Registers - // populateEpsilonL3ScomReg( pChipHomer ); + populate_epsilon_l2_scom_reg(homer); + populate_epsilon_l3_scom_reg(homer); - // Update L3 Refresh Timer Control SCOM Registers - // populateL3RefreshScomReg( pChipHomer, i_procTgt); + /* Update L3 Refresh Timer Control SCOM Registers */ + populate_l3_refresh_scom_reg(homer, dd); /* Populate HOMER with SCOM restore value of NCU RNG BAR SCOM Register */ populate_ncu_rng_bar_scom_reg(homer); From 22d0d8eb571b73307cceee173bca6857d37ef787 Mon Sep 17 00:00:00 2001 From: Krystian Hebel Date: Wed, 8 Sep 2021 15:42:43 +0200 Subject: [PATCH 080/213] soc/power9/homer.c: fill self-restore register values Signed-off-by: Krystian Hebel Change-Id: If8f528f30a46521e3594b6c39f840e9ae1f6b5c6 --- src/soc/ibm/power9/homer.c | 66 ++++++++++++++++++++++++++++++-------- 1 file changed, 53 insertions(+), 13 deletions(-) diff --git a/src/soc/ibm/power9/homer.c b/src/soc/ibm/power9/homer.c index 99cc72c4012..9ebcef3d649 100644 --- a/src/soc/ibm/power9/homer.c +++ b/src/soc/ibm/power9/homer.c @@ -332,11 +332,27 @@ static const uint16_t core_sprs[] = { }; static void build_self_restore(struct homer_st *homer, - struct xip_restore_header *rest, uint8_t dd) + struct xip_restore_header *rest, uint8_t dd, + uint64_t functional_cores) { /* Assumptions: SMT4 only, SMF available but disabled. */ size_t size; uint32_t *ptr; + const uint64_t hrmor = read_spr(SPR_HRMOR); + /* See cpu_winkle() */ + const uint64_t lpcr = + (read_spr(SPR_LPCR) + & ~(SPR_LPCR_EEE | SPR_LPCR_DEE | SPR_LPCR_OEE | SPR_LPCR_HDICE)) + | (SPR_LPCR_HVICE | SPR_LPCR_HVEE); + /* + * Timing facilities may be lost. During their restoration Large Decrementer + * in LPCR may be initially turned off, which may result in a spurious + * Decrementer Exception. Disable External Interrupts on self-restore, they + * will be re-enabled later by coreboot. + */ + const uint64_t msr = read_msr() & ~PPC_BIT(48); + /* Clear en_attn for HID */ + const uint64_t hid = read_spr(SPR_HID) & ~PPC_BIT(3); /* * Data in XIP has its first 256 bytes zeroed, reserved for header, so even @@ -372,20 +388,26 @@ static void build_self_restore(struct homer_st *homer, die("No _SMF magic number in self restore region\n"); ptr = (uint32_t *)homer->cpmr.core_self_restore; - for (size = 0; size < (96 * KiB) / sizeof(uint32_t); size++) { + for (size = 0; size < (192 * KiB) / sizeof(uint32_t); size++) { ptr[size] = ATTN_OP; } /* - * This loop combines two functions from hostboot: - * initSelfRestoreRegion() and initSelfSaveRestoreEntries(). The second one - * writes only sections for functional cores, code below does it for all. - * This will take more time, but makes the code easier to understand. - * - * TODO: check if we can skip both cpureg and save_self for nonfunctional - * cores + * This loop combines three functions from hostboot: + * initSelfRestoreRegion(), initSelfSaveRestoreEntries() and + * applyHcodeGenCpuRegs(). There is inconsistency as for calling them for + * all cores vs only functional ones. As far as I can tell, cores are waken + * based on OCC CCSR register, so nonfunctional ones should be skipped and + * don't need any self-restore code. */ for (int core = 0; core < MAX_CORES_PER_CHIP; core++) { + /* + * TODO: test if we can skip both cpureg and save_self for nonfunctional + * cores + */ + if (!IS_EC_FUNCTIONAL(core, functional_cores)) + continue; + struct smf_core_self_restore *csr = &homer->cpmr.core_self_restore[core]; uint32_t *csa = csr->core_save_area; @@ -409,8 +431,17 @@ static void build_self_restore(struct homer_st *homer, if (i > 7) tsa_key += 0x14; - add_init_cpureg_entry(csr->thread_restore_area[thread], - thread_sprs[i], 0, 1); + if (thread_sprs[i] == SPR_LPCR) { + add_init_cpureg_entry(csr->thread_restore_area[thread], + thread_sprs[i], lpcr, 0); + } else if (thread_sprs[i] == SPR_MSR && thread == 0) { + /* One MSR per core, restored last so must (?) be here */ + add_init_cpureg_entry(csr->thread_restore_area[thread], + thread_sprs[i], msr, 0); + } else { + add_init_cpureg_entry(csr->thread_restore_area[thread], + thread_sprs[i], 0, 1); + } add_init_save_self_entry(&tsa, tsa_key); } @@ -421,7 +452,16 @@ static void build_self_restore(struct homer_st *homer, csr->core_restore_area[0] = BLR_OP; *csa++ = MFLR_R30_OP; for (int i = 0; i < ARRAY_SIZE(core_sprs); i++) { - add_init_cpureg_entry(csr->core_restore_area, core_sprs[i], 0, 1); + if (core_sprs[i] == SPR_HRMOR || core_sprs[i] == SPR_URMOR) { + add_init_cpureg_entry(csr->core_restore_area, core_sprs[i], + hrmor, 0); + } else if (core_sprs[i] == SPR_HID) { + add_init_cpureg_entry(csr->core_restore_area, core_sprs[i], + hid, 0); + } else { + add_init_cpureg_entry(csr->core_restore_area, core_sprs[i], + 0, 1); + } /* * HID through PTCR: key = 0x15..0x18 * HRMOR and URMOR are skipped. @@ -1704,7 +1744,7 @@ void build_homer_image(void *homer_bar) build_self_restore(homer, (struct xip_restore_header *)(homer_bar + hw->restore.offset), - dd); + dd, cores); build_cme(homer, (struct xip_cme_header *)(homer_bar + hw->cme.offset), dd); From 9351b512bf2d08733d990c92231f065832ed8ca2 Mon Sep 17 00:00:00 2001 From: Krystian Hebel Date: Thu, 2 Sep 2021 16:35:08 +0200 Subject: [PATCH 081/213] soc/power9/xive.c: implement interrupt controller initialization Signed-off-by: Krystian Hebel Change-Id: I02ea5dc146a39e90d5885e98ecb795150f16d864 --- src/arch/ppc64/Makefile.inc | 2 +- src/soc/ibm/power9/Makefile.inc | 2 + src/soc/ibm/power9/homer.c | 2 +- src/soc/ibm/power9/homer.h | 1 + src/soc/ibm/power9/int_vectors.S | 139 +++++++++++++++++++++++++++++++ src/soc/ibm/power9/xive.c | 112 +++++++++++++++++++++++++ 6 files changed, 256 insertions(+), 2 deletions(-) create mode 100644 src/soc/ibm/power9/int_vectors.S create mode 100644 src/soc/ibm/power9/xive.c diff --git a/src/arch/ppc64/Makefile.inc b/src/arch/ppc64/Makefile.inc index 9a89e00d73f..144a04ec091 100644 --- a/src/arch/ppc64/Makefile.inc +++ b/src/arch/ppc64/Makefile.inc @@ -2,7 +2,7 @@ ppc64_flags = -I$(src)/arch/ppc64/ -mbig-endian -mcpu=power8 -mtune=power8 -mno-pointers-to-nested-functions -ppc64_asm_flags = +ppc64_asm_flags = -Wa,--fatal-warnings ################################################################################ ## bootblock diff --git a/src/soc/ibm/power9/Makefile.inc b/src/soc/ibm/power9/Makefile.inc index 1da693c2efc..46b46104783 100644 --- a/src/soc/ibm/power9/Makefile.inc +++ b/src/soc/ibm/power9/Makefile.inc @@ -36,5 +36,7 @@ ramstage-y += vpd.c ramstage-y += tor.c ramstage-y += powerbus.c ramstage-y += pstates.c +ramstage-y += xive.c +ramstage-y += int_vectors.S endif diff --git a/src/soc/ibm/power9/homer.c b/src/soc/ibm/power9/homer.c index 9ebcef3d649..7c8c5387276 100644 --- a/src/soc/ibm/power9/homer.c +++ b/src/soc/ibm/power9/homer.c @@ -946,7 +946,7 @@ static void istep_16_1(int this_core) */ *(volatile uint32_t *)0xE40 = 0x48000000; - /* TODO: configure_xive(this_core); */ + configure_xive(this_core); printk(BIOS_ERR, "XIVE configured, enabling External Interrupt\n"); write_msr(read_msr() | PPC_BIT(48)); diff --git a/src/soc/ibm/power9/homer.h b/src/soc/ibm/power9/homer.h index f7d6b7d8435..1c190bdd090 100644 --- a/src/soc/ibm/power9/homer.h +++ b/src/soc/ibm/power9/homer.h @@ -330,5 +330,6 @@ check_member(homer_st, ppmr, 3 * MiB); #define IS_EQ_FUNCTIONAL(eq, cores) (!!((cores) & PPC_BITMASK(4*(eq), 4*(eq) + 3))) void build_parameter_blocks(struct homer_st *homer, uint64_t functional_cores); +void configure_xive(int tgt_core); #endif /* __SOC_IBM_POWER9_HOMER_H */ diff --git a/src/soc/ibm/power9/int_vectors.S b/src/soc/ibm/power9/int_vectors.S new file mode 100644 index 00000000000..4a7b58db1fc --- /dev/null +++ b/src/soc/ibm/power9/int_vectors.S @@ -0,0 +1,139 @@ +## SPDX-License-Identifier: GPL-2.0-only + +/* Load an immediate 64-bit value into a register */ +#define LOAD_IMM64(r, e) \ + lis r,(e)@highest; \ + ori r,r,(e)@higher; \ + rldicr r,r, 32, 31; \ + oris r,r, (e)@h; \ + ori r,r, (e)@l; + +/* + * Macro to check if code fills in the specified size. For proper behavior + * requires -Wa,--fatal-warnings. Value is saved into ".comment" section to be + * discarded during linking. + */ +.macro int_vector_check_size vec, max_size +.pushsection ".comment" +.byte 255 - \max_size + (\vec\()_end - \vec) +.popsection +.endm + +.section ".text", "ax", %progbits +/* + * System reset vector (0x100) + * + * - reload r1 and r2 from saved state + * - add saved TB value to the current value (self-restore took some time) + * - TB can't be written with one mtspr + * - have to use 3 writes to deal with possible overflow of lower half + * - move nia and msr to HSRR0/1 + * - return from hypervisor interrupt + * - due to clobbers in inline assembly in cpu_winkle all other registers are + * reloaded by compiler + * - contents of vector and floating point registers are lost + */ +.globl sys_reset_int +sys_reset_int: + li %r0, 0 + /* WARNING: this assumes that ramstage is not relocatable */ + LOAD_IMM64(%r3, sstate) + /* Time Base */ + ld %r2, 32(%r3) + mftb %r4 + add %r4, %r2, %r4 + rldicl %r5, %r4, 32, 32 + mttbl %r0 + mttbu %r5 + mttbl %r4 + /* Stack */ + ld %r1, 0(%r3) + /* TOC */ + ld %r2, 8(%r3) + /* MSR -> HSRR1 */ + ld %r4, 16(%r3) + mtspr 315, %r4 + /* NIA -> HSRR0 */ + ld %r4, 24(%r3) + mtspr 314, %r4 + /* Link register */ + ld %r4, 40(%r3) + mtlr %r4 + hrfid +.globl sys_reset_int_end +sys_reset_int_end: + +int_vector_check_size sys_reset_int 0x100 + +/* + * External interrupt vector (0x500) + * + * Only one LSI source is enabled - PSU. It has vector 0xD, which is written + * to bits 48:51 in the address below. + * - mask interrupt - read 0xD00 + * - read state (needed?) - read 0x800 + * - send EOI to XIVE IC (read returns 1 if interrupt pending) read 0x0006030203103000 + * - unmask interrupt + * + * No need for eieio: + * - "If two Store instructions or two Load instructions specify storage + * locations that are both Caching Inhibited and Guarded, the corresponding + * storage accesses are performed in program order with respect to any + * processor or mechanism." - POWER ISA 3.0B, Book II, 1.7.1 - Storage Access + * Ordering, we have only loads here, and + * - "The storage accesses caused by the instructions described in this section + * are performed as though the specified storage location is Caching + * Inhibited and Guarded." - Book III, 4.4.1 - Fixed-Point Load and Store + * Caching Inhibited Instructions + * - both interrupts and '(h)rfid' instructions are context synchronizing. + */ +.globl ext_int +ext_int: + mtsprg0 %r3 + mtsprg1 %r4 + LOAD_IMM64(%r3, 0x00060302031CDD00) + /* Mask interrupt */ + ldcix %r4, 0, %r3 + subi %r3, %r3, 0x500 + /* Read state */ + ldcix %r4, 0, %r3 + LOAD_IMM64(%r4, 0x0006030203103000) + /* Send EOI */ + ldcix %r4, 0, %r4 + addi %r3, %r3, 0x400 + /* Unmask interrupt */ + ldcix %r4, 0, %r3 + mfsprg0 %r3 + mfsprg1 %r4 + hrfid +.globl ext_int_end +ext_int_end: + +int_vector_check_size ext_int 0x100 + +/* + * Hypervisor Virtualization vector (0xEA0) + * + * Taken when cores are waken up by deadman loop. The occurrence of the + * interrupt does not cause the exception to cease to exist, it must be + * acknowledged by read16(0x0006020000001830). After that, External Exception + * is still asserted and must also be handled. + * + * There are only 0x20 bytes reserved for this handler, which gives just 8 + * instructions. LOAD_IMM64 would use 5 of those, so don't use it. Bits in + * address are set in a way that makes it possible to use two load instructions + * and one rotate/shift operation. + */ +.globl hyp_virt_int +hyp_virt_int: + mtsprg0 %r3 + li %r3, 0x0602 + rldicr %r3, %r3, 40, 23 + ori %r3, %r3, 0x1830 + lhzcix %r3, 0, %r3 + /* Jump to External Interrupt handler, skipping 'mtsprg0 %r3' */ + ba 0x504 +.globl hyp_virt_int_end +hyp_virt_int_end: + +int_vector_check_size hyp_virt_int 0x20 diff --git a/src/soc/ibm/power9/xive.c b/src/soc/ibm/power9/xive.c new file mode 100644 index 00000000000..f31e96b8525 --- /dev/null +++ b/src/soc/ibm/power9/xive.c @@ -0,0 +1,112 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include +#include + +#include "homer.h" + +#define CODE_SIZE(x) ((x ## _end) - (x)) + +extern uint8_t sys_reset_int[]; +extern uint8_t sys_reset_int_end[]; +extern uint8_t ext_int[]; +extern uint8_t ext_int_end[]; +extern uint8_t hyp_virt_int[]; +extern uint8_t hyp_virt_int_end[]; + +#define IVPE_BAR 0x0006020000000000 +#define FSP_BAR 0x0006030100000000 +#define PSI_HB_BAR 0x0006030203000000 +#define PSI_HB_ESB_BAR 0x00060302031C0000 +#define XIVE_IC_BAR 0x0006030203100000 + +/* + * XIVE is not officially documented anywhere. There are bits and pieces that + * can be put together in [KVM] and [QEMU], but those are mostly about using + * XIVE for virtualization, not bare metal. Code below was ported from Hostboot + * and probably this is the best that we can hope for without documentation. + * + * [KVM] https://www.kernel.org/doc/html/latest/virt/kvm/devices/xive.html + * [QEMU] https://qemu.readthedocs.io/en/latest/specs/ppc-xive.html + */ +void configure_xive(int core) +{ + uint64_t tmp; + + /* Install handlers */ + memcpy((void *)0x100, sys_reset_int, CODE_SIZE(sys_reset_int)); + memcpy((void *)0x500, ext_int, CODE_SIZE(ext_int)); + memcpy((void *)0xEA0, hyp_virt_int, CODE_SIZE(hyp_virt_int)); + + /* IVPE BAR + enable bit */ + write_scom(0x05013012, IVPE_BAR | PPC_BIT(0)); + + /* FSP BAR */ + write_scom(0x0501290B, FSP_BAR); + + /* PSI HB BAR + enable bit */ + /* TODO: check if 2 separate writes are required */ + write_scom(0x0501290A, PSI_HB_BAR); + write_scom(0x0501290A, PSI_HB_BAR | PPC_BIT(63)); + + /* Disable VPC Pull error */ + scom_and(0x05013179, ~PPC_BIT(30)); + + /* PSI HB ESB BAR + enable bit */ + /* TODO: check if 2 separate writes are required */ + write_scom(0x05012916, PSI_HB_ESB_BAR); + write_scom(0x05012916, PSI_HB_ESB_BAR | PPC_BIT(63)); + + /* XIVE IC BAR + enable bit */ + write_scom(0x05013010, XIVE_IC_BAR | PPC_BIT(0)); + + /* Set HB mode on P3PC register */ + scom_or(0x05013110, PPC_BIT(33)); + + /* Disable PSI interrupts */ + write_scom(0x05012913, PPC_BIT(3)); + + void *esb_bar = (void *)PSI_HB_ESB_BAR; + /* Mask all interrupt sources */ + for (int i = 0; i < 14; i++) { + tmp = read64(esb_bar + i*0x1000 + 0xD00); + eieio(); + tmp = read64(esb_bar + i*0x1000 + 0x800); + assert(tmp == 1); + } + + /* Route interrupts to CEC (whatever that is) instead of FSP */ + void *hb_bar = (void *)PSI_HB_BAR; + write64(hb_bar + 0x20, read64(hb_bar + 0x20) | PPC_BIT(3)); + + /* Enable PSIHB interrupts */ + write64(hb_bar + 0x58, read64(hb_bar + 0x58) | PPC_BIT(0)); + + /* Route interrupts to first thread of active core */ + int offset = (core < 16) ? 0x48 : 0x68; + void *xive_ic_bar = (void *)XIVE_IC_BAR; + write64(xive_ic_bar + 0x400 + offset, PPC_BIT(4 * (core % 16))); + eieio(); + + /* Configure LSI mode for HB CEC interrupts */ + void *ivpe_bar = (void *)IVPE_BAR; + write8(ivpe_bar + 0x38, 0x81); + eieio(); + + /* Route LSI to master processor */ + /* TODO: check if 2 separate writes are required */ + write64(hb_bar + 0x68, 0x0006030203102000); + write64(hb_bar + 0x68, 0x0006030203102001); + write64(hb_bar + 0x58, 0); + + /* Enable LSI interrupts */ + tmp = read64(xive_ic_bar + 0x3000 + 0xC00); + + /* Unmask PSU interrupts */ + tmp = read64(esb_bar + 0xD*0x1000 + 0xC00); + eieio(); + tmp = read64(esb_bar + 0xD*0x1000 + 0x800); + assert(tmp == 0); +} From 7c929eb74f60313a619ce751c9b923fdd4666400 Mon Sep 17 00:00:00 2001 From: Krystian Hebel Date: Tue, 7 Sep 2021 13:50:24 +0200 Subject: [PATCH 082/213] soc/power9/chip.c: clear IPMI attention bits using IPMI BT driver BMC sends event telling us that HIOMAP (access to flash, either real or emulated, through LPC) daemon has been started. This sets the mentioned bit. Skiboot enables interrupts, but because those are triggered on 0->1 transition and bit is already set, they do not arrive. Change-Id: Ia73a132a724be2c2f9ec7398e4bcae0372c0e715 Signed-off-by: Krystian Hebel Signed-off-by: Sergii Dmytruk --- src/mainboard/raptor-cs/talos-2/Kconfig | 1 + src/mainboard/raptor-cs/talos-2/devicetree.cb | 4 ++++ src/soc/ibm/power9/chip.c | 24 +++++++++++++++++++ 3 files changed, 29 insertions(+) diff --git a/src/mainboard/raptor-cs/talos-2/Kconfig b/src/mainboard/raptor-cs/talos-2/Kconfig index 54c763131d0..898159be1db 100644 --- a/src/mainboard/raptor-cs/talos-2/Kconfig +++ b/src/mainboard/raptor-cs/talos-2/Kconfig @@ -23,6 +23,7 @@ config BOARD_SPECIFIC_OPTIONS select BOOT_DEVICE_NOT_SPI_FLASH select MISSING_BOARD_RESET select HAVE_DEBUG_RAM_SETUP + select IPMI_BT config MEMLAYOUT_LD_FILE string diff --git a/src/mainboard/raptor-cs/talos-2/devicetree.cb b/src/mainboard/raptor-cs/talos-2/devicetree.cb index 85440064fa4..eec041b2f88 100644 --- a/src/mainboard/raptor-cs/talos-2/devicetree.cb +++ b/src/mainboard/raptor-cs/talos-2/devicetree.cb @@ -2,4 +2,8 @@ chip soc/ibm/power9 device cpu_cluster 0 on end + + chip drivers/ipmi + device pnp e4.0 on end # IPMI BT + end end diff --git a/src/soc/ibm/power9/chip.c b/src/soc/ibm/power9/chip.c index d7e7b152f0f..f0df55a026c 100644 --- a/src/soc/ibm/power9/chip.c +++ b/src/soc/ibm/power9/chip.c @@ -1,6 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0-only */ #include +#include +#include #include #include @@ -66,6 +68,28 @@ static void enable_soc_dev(struct device *dev) istep_18_12(); } +void platform_prog_run(struct prog *prog) +{ + /* + * TODO: do what 16.2 did now, when the payload and its interrupt + * vectors are already loaded + */ + + /* + * Clear SMS_ATN aka EVT_ATN in BT_CTRL - Block Transfer IPMI protocol + * + * BMC sends event telling us that HIOMAP (access to flash, either real or + * emulated, through LPC) daemon has been started. This sets the mentioned bit. + * Skiboot enables interrupts, but because those are triggered on 0->1 + * transition and bit is already set, they do not arrive. + * + * While we're at it, clear read and write pointers, in case circular buffer + * rolls over. + */ + if (ipmi_bt_clear(CONFIG_BMC_BT_BASE)) + die("ipmi_bt_clear() has failed.\n"); +} + struct chip_operations soc_ibm_power9_ops = { CHIP_NAME("POWER9") .enable_dev = enable_soc_dev, From 78a94fca4f29b45d246debdcc95349ec511d35c8 Mon Sep 17 00:00:00 2001 From: Krystian Hebel Date: Wed, 15 Sep 2021 19:27:24 +0200 Subject: [PATCH 083/213] soc/power9/homer.c: additionaly enable HDEE in self-restore Hypervisor Doorbell Exit Enable is required for waking threads with doorbell messages. Signed-off-by: Krystian Hebel Change-Id: I0cbbe2c5d029b3cae5337a6fd5fcf3dbd5321b1a --- src/include/cpu/power/spr.h | 1 + src/soc/ibm/power9/homer.c | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/include/cpu/power/spr.h b/src/include/cpu/power/spr.h index ffc6c2cfdac..67687dc2486 100644 --- a/src/include/cpu/power/spr.h +++ b/src/include/cpu/power/spr.h @@ -30,6 +30,7 @@ #define SPR_LPCR 0x13E #define SPR_LPCR_HVEE PPC_BIT(17) #define SPR_LPCR_LD PPC_BIT(46) +#define SPR_LPCR_HDEE PPC_BIT(48) #define SPR_LPCR_EEE PPC_BIT(49) #define SPR_LPCR_DEE PPC_BIT(50) #define SPR_LPCR_OEE PPC_BIT(51) diff --git a/src/soc/ibm/power9/homer.c b/src/soc/ibm/power9/homer.c index 7c8c5387276..e13df8b8f14 100644 --- a/src/soc/ibm/power9/homer.c +++ b/src/soc/ibm/power9/homer.c @@ -339,11 +339,11 @@ static void build_self_restore(struct homer_st *homer, size_t size; uint32_t *ptr; const uint64_t hrmor = read_spr(SPR_HRMOR); - /* See cpu_winkle() */ + /* See cpu_winkle(), additionally set Hypervisor Doorbell Exit Enable */ const uint64_t lpcr = (read_spr(SPR_LPCR) & ~(SPR_LPCR_EEE | SPR_LPCR_DEE | SPR_LPCR_OEE | SPR_LPCR_HDICE)) - | (SPR_LPCR_HVICE | SPR_LPCR_HVEE); + | (SPR_LPCR_HVICE | SPR_LPCR_HVEE | SPR_LPCR_HDEE); /* * Timing facilities may be lost. During their restoration Large Decrementer * in LPCR may be initially turned off, which may result in a spurious From d6bda37a8d8017053504cb06bb2c4e7e2b8e3693 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Sat, 25 Sep 2021 20:22:57 +0300 Subject: [PATCH 084/213] soc/power9/chip.c: activate slave cores right before starting payload Change-Id: I8d4fb5292902b1f12a4f6dff97d341778affdd06 Signed-off-by: Sergii Dmytruk --- src/soc/ibm/power9/chip.c | 63 +++++++++++++++++++++++++++++++++++---- 1 file changed, 58 insertions(+), 5 deletions(-) diff --git a/src/soc/ibm/power9/chip.c b/src/soc/ibm/power9/chip.c index f0df55a026c..b3fbb048e02 100644 --- a/src/soc/ibm/power9/chip.c +++ b/src/soc/ibm/power9/chip.c @@ -6,6 +6,7 @@ #include #include +#include "homer.h" #include "istep_13_scom.h" #include "chip.h" @@ -68,13 +69,55 @@ static void enable_soc_dev(struct device *dev) istep_18_12(); } -void platform_prog_run(struct prog *prog) +static void activate_slave_cores(void) { - /* - * TODO: do what 16.2 did now, when the payload and its interrupt - * vectors are already loaded - */ + enum { DOORBELL_MSG_TYPE = 0x0000000028000000 }; + + uint8_t i; + + /* Read OCC CCSR written by the code earlier */ + const uint64_t functional_cores = read_scom(0x0006C090); + + /* Find and process the first core in a separate loop to slightly + * simplify processing of all the other cores by removing a conditional */ + for (i = 0; i < MAX_CORES_PER_CHIP; ++i) { + uint8_t thread; + uint64_t core_msg; + + if (!IS_EC_FUNCTIONAL(i, functional_cores)) + continue; + + /* Message value for thread 0 of the current core */ + core_msg = DOORBELL_MSG_TYPE | (i << 2); + + /* Skip sending doorbell to the current thread of the current core */ + for (thread = 1; thread < 4; ++thread) { + register uint64_t msg = core_msg | thread; + asm volatile("msgsnd %0" :: "r" (msg)); + } + + break; + } + + for (++i; i < MAX_CORES_PER_CHIP; ++i) { + uint8_t thread; + uint64_t core_msg; + if (!IS_EC_FUNCTIONAL(i, functional_cores)) + continue; + + /* Message value for thread 0 of the i-th core */ + core_msg = DOORBELL_MSG_TYPE | (i << 2); + + for (thread = 0; thread < 4; ++thread) { + register uint64_t msg = core_msg | thread; + asm volatile("msgsnd %0" :: "r" (msg)); + } + } +} + +void platform_prog_run(struct prog *prog) +{ /* * Clear SMS_ATN aka EVT_ATN in BT_CTRL - Block Transfer IPMI protocol * @@ -88,6 +131,16 @@ void platform_prog_run(struct prog *prog) */ if (ipmi_bt_clear(CONFIG_BMC_BT_BASE)) die("ipmi_bt_clear() has failed.\n"); + + /* + * Now that the payload and its interrupt vectors are already loaded + * perform 16.2. + * + * This MUST be done as late as possible so that none of the newly + * activated threads start execution before current thread jumps into + * the payload. + */ + activate_slave_cores(); } struct chip_operations soc_ibm_power9_ops = { From f4d3c2550d5e5d0dc63fecaa48cd235462f8dfa8 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Mon, 16 May 2022 21:14:35 +0300 Subject: [PATCH 085/213] soc/power9/mvpd.c: construct MVPD partition in memory on startup Reading it from MVPD partition assumes that it was populated by Hostboot for current system, which is not a requirement we want to have. Building MVPD is much slower than just reading PNOR and in the future MVPD partition should be used again to cache built MVPD in PNOR. It requires figuring out how Hostboot determines if MVPD is up to date and how to write to PNOR. Change-Id: Iec1eadcfe95d3f1c68cb307b4bc662c24aaf9eb6 Signed-off-by: Sergii Dmytruk --- src/soc/ibm/power9/Makefile.inc | 1 + src/soc/ibm/power9/mvpd.c | 319 +++++++++++++++++++++++++++----- 2 files changed, 277 insertions(+), 43 deletions(-) diff --git a/src/soc/ibm/power9/Makefile.inc b/src/soc/ibm/power9/Makefile.inc index 46b46104783..c7efb063f13 100644 --- a/src/soc/ibm/power9/Makefile.inc +++ b/src/soc/ibm/power9/Makefile.inc @@ -38,5 +38,6 @@ ramstage-y += powerbus.c ramstage-y += pstates.c ramstage-y += xive.c ramstage-y += int_vectors.S +ramstage-y += i2c.c endif diff --git a/src/soc/ibm/power9/mvpd.c b/src/soc/ibm/power9/mvpd.c index 1660e1c8186..86c969b4c10 100644 --- a/src/soc/ibm/power9/mvpd.c +++ b/src/soc/ibm/power9/mvpd.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -16,6 +17,8 @@ #define MVPD_TOC_ENTRIES 32 #define MVPD_TOC_SIZE (MVPD_TOC_ENTRIES*sizeof(struct mvpd_toc_entry)) +#define EEPROM_CHIP_SIZE (64 * KiB) + /* Each entry points to a VPD record */ struct mvpd_toc_entry { char name[4]; // Name without trailing NUL byte @@ -23,6 +26,271 @@ struct mvpd_toc_entry { uint8_t reserved[2]; // Unused } __attribute__((packed)); +struct pt_record { + char record_name[4]; + /* All of these fields are in little endian */ + uint16_t record_type; + uint16_t record_offset; + uint16_t record_length; + uint16_t ecc_offset; + uint16_t ecc_length; +} __attribute__((packed)); + +/* + * Configuration of EEPROM with VPD data in talos.xml: + * + * + * EEPROM_VPD_PRIMARY_INFO + * + * + * i2cMasterPath + * + * /sys-0/node-0/motherboard-0/proc_socket-0/sforza-0/p9_proc_s/i2c-master-prom0-mvpd-primary/ + * + * + * port0 + * devAddr0xA0 + * engine1 + * byteAddrOffset0x02 + * maxMemorySizeKB0x80 + * chipCount0x02 + * writePageSize0x80 + * writeCycleTime0x0A + * + * + */ + +/* Reads from a single EEPROM chip, which is deduced from offset. Returns zero + on success. */ +static int read_eeprom_chip(uint32_t offset, void *data, uint16_t len) +{ + const unsigned int bus = 1; + uint16_t addr = 0xA0; + uint16_t slave = 0; + uint16_t actual_offset = 0; + + struct i2c_msg seg[2]; + + /* Two chips at two different addresses */ + if (offset >= EEPROM_CHIP_SIZE) { + offset -= EEPROM_CHIP_SIZE; + addr += 0x02; + } + + assert(offset < EEPROM_CHIP_SIZE); + actual_offset = offset; + + /* Most-significant bit is port number */ + slave = addr >> 1; + + seg[0].flags = 0; + seg[0].slave = slave; + seg[0].buf = (uint8_t *)&actual_offset; + seg[0].len = sizeof(actual_offset); + seg[1].flags = I2C_M_RD; + seg[1].slave = slave; + seg[1].buf = data; + seg[1].len = len; + + return i2c_transfer(bus, seg, ARRAY_SIZE(seg)); +} + +/* Reads from EEPROM handling accesses across chip boundaries (64 KiB). Returns + zero on success. */ +static int read_eeprom(uint32_t offset, void *data, uint32_t len) +{ + uint16_t len1 = 0; + uint16_t len2 = 0; + + assert(len != 0); + if (offset / EEPROM_CHIP_SIZE == (offset + len - 1) / EEPROM_CHIP_SIZE) + return read_eeprom_chip(offset, data, len); + + len1 = EEPROM_CHIP_SIZE - offset; + len2 = len - len1; + + if (read_eeprom_chip(offset, data, len1)) + return 1; + if (read_eeprom_chip(EEPROM_CHIP_SIZE, (uint8_t *)data + len1, len2)) + return 1; + + return 0; +} + +/* Finds and extracts i-th keyword (`index` specifies which one) from a record + in EEPROM that starts at specified offset */ +static bool eeprom_extract_kwd(uint64_t offset, uint8_t index, + const char *record_name, const char *kwd_name, + uint8_t *buf, size_t *size) +{ + uint16_t record_size = 0; + uint8_t name[VPD_RECORD_NAME_LEN]; + + if (strlen(record_name) != VPD_RECORD_NAME_LEN) + die("Record name has wrong length: %s!\n", record_name); + if (strlen(kwd_name) != VPD_KWD_NAME_LEN) + die("Keyword name has wrong length: %s!\n", kwd_name); + + if (read_eeprom(offset, &record_size, sizeof(record_size))) + die("Failed to read record size from EEPROM\n"); + + offset += VPD_RECORD_SIZE_LEN; + record_size = le16toh(record_size); + + /* Skip mandatory "RT" and one byte of keyword size (always 4) */ + offset += VPD_KWD_NAME_LEN + 1; + + if (read_eeprom(offset, name, sizeof(name))) + die("Failed to read record name from EEPROM\n"); + + if (memcmp(name, record_name, VPD_RECORD_NAME_LEN)) + die("Expected to be working with %s record, got %.4s!\n", + record_name, name); + + offset += VPD_RECORD_NAME_LEN; + + while (offset < record_size) { + uint8_t name_buf[VPD_KWD_NAME_LEN]; + uint16_t kwd_size = 0; + + if (read_eeprom(offset, name_buf, sizeof(name_buf))) + die("Failed to read keyword name from EEPROM\n"); + + /* This is always the last keyword */ + if (!memcmp(name_buf, "PF", VPD_KWD_NAME_LEN)) + break; + + offset += VPD_KWD_NAME_LEN; + + if (name_buf[0] == '#') { + /* This is a large (two-byte size) keyword */ + if (read_eeprom(offset, &kwd_size, sizeof(kwd_size))) + die("Failed to read large keyword size from EEPROM\n"); + kwd_size = le16toh(kwd_size); + offset += 2; + } else { + uint8_t small_size; + if (read_eeprom(offset, &small_size, sizeof(small_size))) + die("Failed to read small keyword size from EEPROM\n"); + kwd_size = small_size; + offset += 1; + } + + if (!memcmp(name_buf, kwd_name, VPD_KWD_NAME_LEN) && index-- == 0) { + if (*size < kwd_size) { + die("Keyword buffer is too small: %llu instead of %llu\n", + (unsigned long long)*size, (unsigned long long)kwd_size); + } + + if (read_eeprom(offset, buf, kwd_size)) + die("Failed to read keyword body from EEPROM\n"); + + *size = kwd_size; + return true; + } + + offset += kwd_size; + } + + return false; +} + +/* Builds MVPD partition for a single processor (64 KiB per chip) or returns an + already built one */ +static const uint8_t *mvpd_get(void) +{ + enum { SECTION_SIZE = 64 * KiB }; + + static uint8_t mvpd_buf[SECTION_SIZE]; + + static const char *const mvpd_records[] = { + "CRP0", "CP00", "VINI", + "LRP0", "LRP1", "LRP2", "LRP3", "LRP4", "LRP5", + "LWP0", "LWP1", "LWP2", "LWP3", "LWP4", "LWP5", + "VRML", "VWML", "VER0", "MER0", "VMSC", + }; + + struct mvpd_toc_entry *toc = (void *)&mvpd_buf[0]; + uint16_t mvpd_offset = MVPD_TOC_SIZE; + + uint8_t pt_buf[256]; + struct pt_record *pt_record = (void *)pt_buf; + size_t pt_size = sizeof(struct pt_record); + + uint8_t i = 0; + + /* Skip the ECC data + "large resource" byte (0x84) in the VHDR */ + uint64_t offset = 12; + + /* Partition is already constructed (filled one can't be empty) */ + if (mvpd_buf[0] != '\0') + return mvpd_buf; + + if (!eeprom_extract_kwd(offset, 0, "VHDR", "PT", pt_buf, &pt_size)) + die("Failed to find PT keyword of VHDR record in EEPROM.\n"); + + if (memcmp(pt_record->record_name, "VTOC", VPD_RECORD_NAME_LEN)) + die("VHDR in EEPROM is invalid (got %.4s instead of VTOC.\n", + pt_record->record_name); + + /* Move to the TOC record, skip "large resource" byte (0x84) */ + offset = le16toh(pt_record->record_offset) + 1; + + /* Fill whole TOC with 0xFF */ + memset(toc, 0xFF, MVPD_TOC_SIZE); + + /* Up to three PT keywords in VTOC record */ + for (i = 0; i < 3; ++i) { + uint8_t j; + uint8_t entry_count; + + pt_size = sizeof(pt_buf); + if (!eeprom_extract_kwd(offset, i, "VTOC", "PT", pt_buf, &pt_size)) { + if (i == 0) + die("Failed to find any PT keyword of VTOC record in EEPROM\n"); + break; + } + + entry_count = pt_size / sizeof(struct pt_record); + + for (j = 0; j < entry_count; ++j) { + const char *record_name = pt_record[j].record_name; + /* Skip "large resource" byte (0x84) */ + const uint16_t record_offset = le16toh(pt_record[j].record_offset) + 1; + const uint16_t record_size = le16toh(pt_record[j].record_length); + + uint8_t k; + for (k = 0; k < ARRAY_SIZE(mvpd_records); ++k) { + if (!memcmp(record_name, mvpd_records[k], 4)) + break; + } + + if (k == ARRAY_SIZE(mvpd_records)) + continue; + + if (mvpd_offset + record_size > SECTION_SIZE) { + die("MVPD section doesn't have space for %.4s record of " + "size %d\n", record_name, record_size); + } + + /* Store this record to MVPD */ + + memcpy(toc->name, record_name, VPD_RECORD_NAME_LEN); + toc->offset = htole16(mvpd_offset); + toc->reserved[0] = 0x5A; + toc->reserved[1] = 0x5A; + + if (read_eeprom(record_offset, mvpd_buf + mvpd_offset, record_size)) + die("Failed to read %.4s record from EEPROM\n", record_name); + + ++toc; + mvpd_offset += record_size; + } + } + + return mvpd_buf; +} + static struct mvpd_toc_entry *find_record(struct mvpd_toc_entry *toc, const char *name) { @@ -105,58 +373,37 @@ static struct ring_hdr *find_ring(uint8_t chiplet_id, uint8_t even_odd, static const uint8_t *mvpd_get_keyword(const char *record_name, const char *kwd_name, - size_t *kwd_size, void **mmaped_data) + size_t *kwd_size) { - const struct region_device *mvpd_device = mvpd_device_ro(); - - uint8_t mvpd_buf[MVPD_TOC_SIZE]; - struct mvpd_toc_entry *mvpd_toc = (struct mvpd_toc_entry *)mvpd_buf; + const uint8_t *mvpd = mvpd_get(); + struct mvpd_toc_entry *mvpd_toc = (void *)mvpd; struct mvpd_toc_entry *toc_entry = NULL; - uint16_t record_offset = 0; - uint8_t *record_data = NULL; - uint16_t record_size = 0; + const uint8_t *record_data = NULL; const uint8_t *kwd = NULL; - /* Copy all TOC at once */ - if (rdev_readat(mvpd_device, mvpd_buf, 0, sizeof(mvpd_buf)) != sizeof(mvpd_buf)) - die("Failed to read MVPD TOC!\n"); - toc_entry = find_record(mvpd_toc, record_name); if (toc_entry == NULL) die("Failed to find %s MVPD record!\n", record_name); - record_offset = le16toh(toc_entry->offset); - - /* Read size of the record */ - if (rdev_readat(mvpd_device, &record_size, record_offset, - sizeof(record_size)) != sizeof(record_size)) - die("Failed to read size of %s!\n", record_name); - record_data = rdev_mmap(mvpd_device, record_offset, record_size); - if (!record_data) - die("Failed to map %s record!\n", record_name); + record_data = mvpd + le16toh(toc_entry->offset); kwd = vpd_find_kwd(record_data, record_name, kwd_name, kwd_size); if (kwd == NULL) die("Failed to find %s keyword in %s!\n", kwd_name, record_name); - *mmaped_data = record_data; return kwd; } bool mvpd_extract_keyword(const char *record_name, const char *kwd_name, uint8_t *buf, uint32_t *size) { - void *mmaped_data = NULL; - const uint8_t *kwd = NULL; size_t kwd_size = 0; bool copied_data = false; - mvpd_device_init(); - - kwd = mvpd_get_keyword(record_name, kwd_name, &kwd_size, &mmaped_data); + kwd = mvpd_get_keyword(record_name, kwd_name, &kwd_size); if (kwd == NULL) die("Failed to find %s keyword in %s!\n", kwd_name, record_name); @@ -168,9 +415,6 @@ bool mvpd_extract_keyword(const char *record_name, const char *kwd_name, *size = kwd_size; - if (rdev_munmap(mvpd_device_ro(), mmaped_data)) - die("Failed to unmap %s record!\n", record_name); - return copied_data; } @@ -209,34 +453,23 @@ bool mvpd_extract_ring(const char *record_name, const char *kwd_name, uint8_t chiplet_id, uint8_t even_odd, uint16_t ring_id, uint8_t *buf, uint32_t buf_size) { - void *mmaped_data = NULL; - const uint8_t *rings = NULL; size_t rings_size = 0; struct ring_hdr *ring = NULL; uint32_t ring_size = 0; - mvpd_device_init(); - - rings = mvpd_get_keyword(record_name, kwd_name, &rings_size, &mmaped_data); + rings = mvpd_get_keyword(record_name, kwd_name, &rings_size); if (rings == NULL) die("Failed to find %s keyword in %s!\n", kwd_name, record_name); ring = find_ring(chiplet_id, even_odd, ring_id, rings, rings_size); - if (ring == NULL) { - if (rdev_munmap(mvpd_device_ro(), mmaped_data)) - die("Failed to unmap %s record!\n", record_name); - + if (ring == NULL) return false; - } ring_size = ring->size; if (buf_size >= ring_size) memcpy(buf, ring, ring_size); - if (rdev_munmap(mvpd_device_ro(), mmaped_data)) - die("Failed to unmap %s record!\n", record_name); - return (buf_size >= ring_size); } From 123fc7a27406e76748a21185e68823ebafe4e2e7 Mon Sep 17 00:00:00 2001 From: Igor Bagnucki Date: Thu, 26 Aug 2021 10:01:47 +0200 Subject: [PATCH 086/213] soc/power9/occ.c: start adding code for managing OCC This mainly introduces code for OCB channels and directly related functionality. OCB channels are used to interact with OCC. Change-Id: I96978530582c6fc39adb8f953172c9771dd77cb3 Signed-off-by: Igor Bagnucki Signed-off-by: Sergii Dmytruk --- src/include/cpu/power/occ.h | 17 ++ src/soc/ibm/power9/Makefile.inc | 1 + src/soc/ibm/power9/homer.c | 11 +- src/soc/ibm/power9/occ.c | 379 ++++++++++++++++++++++++++++++++ src/soc/ibm/power9/ops.h | 69 ++++++ 5 files changed, 467 insertions(+), 10 deletions(-) create mode 100644 src/include/cpu/power/occ.h create mode 100644 src/soc/ibm/power9/occ.c create mode 100644 src/soc/ibm/power9/ops.h diff --git a/src/include/cpu/power/occ.h b/src/include/cpu/power/occ.h new file mode 100644 index 00000000000..7f29a94c1d2 --- /dev/null +++ b/src/include/cpu/power/occ.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef CPU_PPC64_OCC_H +#define CPU_PPC64_OCC_H + +#include + +void read_occ_sram(uint32_t address, uint64_t *buffer, size_t data_length); +void write_occ_command(uint64_t write_data); +void clear_occ_special_wakeups(uint64_t cores); +void special_occ_wakeup_disable(uint64_t cores); +void occ_start_from_mem(void); + +void pm_occ_fir_init(void); +void pm_pba_fir_init(void); + +#endif /* CPU_PPC64_OCC_H */ diff --git a/src/soc/ibm/power9/Makefile.inc b/src/soc/ibm/power9/Makefile.inc index c7efb063f13..efa9ddd7064 100644 --- a/src/soc/ibm/power9/Makefile.inc +++ b/src/soc/ibm/power9/Makefile.inc @@ -39,5 +39,6 @@ ramstage-y += pstates.c ramstage-y += xive.c ramstage-y += int_vectors.S ramstage-y += i2c.c +ramstage-y += occ.c endif diff --git a/src/soc/ibm/power9/homer.c b/src/soc/ibm/power9/homer.c index e13df8b8f14..7ce0f521efc 100644 --- a/src/soc/ibm/power9/homer.c +++ b/src/soc/ibm/power9/homer.c @@ -14,6 +14,7 @@ #include "chip.h" #include "homer.h" +#include "ops.h" #include "tor.h" #include "xip.h" @@ -227,16 +228,6 @@ static void build_sgpe(struct homer_st *homer, struct xip_sgpe_header *sgpe, static const uint32_t _SMF = 0x5F534D46; // "_SMF" -static const uint32_t ATTN_OP = 0x00000200; -static const uint32_t BLR_OP = 0x4E800020; -static const uint32_t ORI_OP = 0x60000000; -static const uint32_t SKIP_SPR_REST_INST = 0x4800001C; -static const uint32_t MR_R0_TO_R10_OP = 0x7C0A0378; -static const uint32_t MR_R0_TO_R21_OP = 0x7C150378; -static const uint32_t MR_R0_TO_R9_OP = 0x7C090378; -static const uint32_t MTLR_R30_OP = 0x7FC803A6; -static const uint32_t MFLR_R30_OP = 0x7FC802A6; - static const uint32_t init_cpureg_template[] = { 0x63000000, /* ori %r24, %r0, 0 */ /* |= spr, key for lookup */ 0x7C000278, /* xor %r0, %r0, %r0 */ diff --git a/src/soc/ibm/power9/occ.c b/src/soc/ibm/power9/occ.c new file mode 100644 index 00000000000..c224b51d7db --- /dev/null +++ b/src/soc/ibm/power9/occ.c @@ -0,0 +1,379 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include + +#include "homer.h" +#include "ops.h" + +#define OCB_PIB_OCBCSR0_OCB_STREAM_MODE 4 +#define OCB_PIB_OCBCSR0_OCB_STREAM_TYPE 5 + +#define OCB_OCI_OCBSHCS0_PUSH_ENABLE 31 +#define OCB_OCI_OCBSHCS0_PUSH_FULL 0 + +#define PU_OCB_PIB_OCBCSR0_RO 0x0006D011 +#define PU_OCB_PIB_OCBCSR1_RO 0x0006D031 + +#define PU_OCB_OCI_OCBSHCS0_SCOM 0x0006C204 +#define PU_OCB_OCI_OCBSHCS1_SCOM 0x0006C214 + +#define EX_PPM_SPWKUP_OCC 0x200F010C +#define PU_OCB_PIB_OCBAR0 0x0006D010 + +#define PU_OCB_PIB_OCBDR0 0x0006D015 +#define PU_OCB_PIB_OCBDR1 0x0006D035 + +#define PU_OCB_PIB_OCBCSR0_OR 0x0006D013 +#define PU_OCB_PIB_OCBCSR0_CLEAR 0x0006D012 + +/* FIR register offset from base */ +enum fir_offset { + BASE_WAND_INCR = 1, + BASE_WOR_INCR = 2, + MASK_INCR = 3, + MASK_WAND_INCR = 4, + MASK_WOR_INCR = 5, + ACTION0_INCR = 6, + ACTION1_INCR = 7 +}; + +static void pm_ocb_setup(uint32_t ocb_bar) +{ + write_scom(PU_OCB_PIB_OCBCSR0_OR, PPC_BIT(OCB_PIB_OCBCSR0_OCB_STREAM_MODE)); + write_scom(PU_OCB_PIB_OCBCSR0_CLEAR, PPC_BIT(OCB_PIB_OCBCSR0_OCB_STREAM_TYPE)); + write_scom(PU_OCB_PIB_OCBAR0, (uint64_t)ocb_bar << 32); +} + +static void check_ocb_mode(uint64_t ocb_csr_address, uint64_t ocb_shcs_address) +{ + uint64_t ocb_pib = read_scom(ocb_csr_address); + + /* + * The following check for circular mode is an additional check + * performed to ensure a valid data access. + */ + if ((ocb_pib & PPC_BIT(OCB_PIB_OCBCSR0_OCB_STREAM_MODE)) && + (ocb_pib & PPC_BIT(OCB_PIB_OCBCSR0_OCB_STREAM_TYPE))) { + /* + * Check if push queue is enabled. If not, let the store occur + * anyway to let the PIB error response return occur. (That is + * what will happen if this checking code were not here.) + */ + uint64_t stream_push_ctrl = read_scom(ocb_shcs_address); + + if (stream_push_ctrl & PPC_BIT(OCB_OCI_OCBSHCS0_PUSH_ENABLE)) { + uint8_t counter = 0; + for (counter = 0; counter < 4; counter++) { + /* Proceed if the OCB_OCI_OCBSHCS0_PUSH_FULL is clear */ + if (!(stream_push_ctrl & PPC_BIT(OCB_OCI_OCBSHCS0_PUSH_FULL))) + break; + + stream_push_ctrl = read_scom(ocb_shcs_address); + } + + if (counter == 4) + die("Failed to write to circular buffer.\n"); + } + } +} + +static void put_ocb_indirect(uint32_t ocb_req_length, uint32_t oci_address, + uint64_t *ocb_buffer) +{ + write_scom(PU_OCB_PIB_OCBAR0, (uint64_t)oci_address << 32); + + check_ocb_mode(PU_OCB_PIB_OCBCSR0_RO, PU_OCB_OCI_OCBSHCS0_SCOM); + + for (uint32_t index = 0; index < ocb_req_length; index++) + write_scom(PU_OCB_PIB_OCBDR0, ocb_buffer[index]); +} + +static void get_ocb_indirect(uint32_t ocb_req_length, uint32_t oci_address, + uint64_t *ocb_buffer) +{ + write_scom(PU_OCB_PIB_OCBAR0, (uint64_t)oci_address << 32); + for (uint32_t loopCount = 0; loopCount < ocb_req_length; loopCount++) + ocb_buffer[loopCount] = read_scom(PU_OCB_PIB_OCBDR0); +} + +static void write_occ_sram(uint32_t address, uint64_t *buffer, size_t data_length) +{ + pm_ocb_setup(address); + put_ocb_indirect(data_length / 8, address, buffer); +} + +void read_occ_sram(uint32_t address, uint64_t *buffer, size_t data_length) +{ + pm_ocb_setup(address); + get_ocb_indirect(data_length / 8, address, buffer); +} + +void write_occ_command(uint64_t write_data) +{ + check_ocb_mode(PU_OCB_PIB_OCBCSR1_RO, PU_OCB_OCI_OCBSHCS1_SCOM); + write_scom(PU_OCB_PIB_OCBDR1, write_data); +} + +void clear_occ_special_wakeups(uint64_t cores) +{ + for (size_t i = 0; i < MAX_CORES_PER_CHIP; i += 2) { + if (!IS_EX_FUNCTIONAL(i, cores)) + continue; + scom_and_for_chiplet(EC00_CHIPLET_ID + i, EX_PPM_SPWKUP_OCC, ~PPC_BIT(0)); + } +} + +void special_occ_wakeup_disable(uint64_t cores) +{ + enum { PPM_SPWKUP_FSP = 0x200F010B }; + + for (int i = 0; i < MAX_CORES_PER_CHIP; ++i) { + if (!IS_EC_FUNCTIONAL(i, cores)) + continue; + + write_scom_for_chiplet(EC00_CHIPLET_ID + i, PPM_SPWKUP_FSP, 0); + /* This puts an inherent delay in the propagation of the reset transition */ + (void)read_scom_for_chiplet(EC00_CHIPLET_ID + i, PPM_SPWKUP_FSP); + } +} + +/* Sets up boot loader in SRAM and returns 32-bit jump instruction to it */ +static uint64_t setup_memory_boot(void) +{ + enum { + OCC_BOOT_OFFSET = 0x40, + CTR = 9, + OCC_SRAM_BOOT_ADDR = 0xFFF40000, + OCC_SRAM_BOOT_ADDR2 = 0xFFF40002, + }; + + uint64_t sram_program[2]; + + /* lis r1, 0x8000 */ + sram_program[0] = ((uint64_t)ppc_lis(1, 0x8000) << 32); + + /* ori r1, r1, OCC_BOOT_OFFSET */ + sram_program[0] |= ppc_ori(1, 1, OCC_BOOT_OFFSET); + + /* mtctr (mtspr r1, CTR) */ + sram_program[1] = ((uint64_t)ppc_mtspr(1, CTR) << 32); + + /* bctr */ + sram_program[1] |= ppc_bctr(); + + /* Write to SRAM */ + write_occ_sram(OCC_SRAM_BOOT_ADDR, sram_program, sizeof(sram_program)); + + return ((uint64_t)ppc_b(OCC_SRAM_BOOT_ADDR2) << 32); +} + +void occ_start_from_mem(void) +{ + enum { + OCB_PIB_OCR_CORE_RESET_BIT = 0, + JTG_PIB_OJCFG_DBG_HALT_BIT = 6, + + PU_SRAM_SRBV0_SCOM = 0x0006A004, + + PU_JTG_PIB_OJCFG_AND = 0x0006D005, + PU_OCB_PIB_OCR_CLEAR = 0x0006D001, + PU_OCB_PIB_OCR_OR = 0x0006D002, + }; + + write_scom(PU_OCB_PIB_OCBCSR0_OR, PPC_BIT(OCB_PIB_OCBCSR0_OCB_STREAM_MODE)); + + /* + * Set up Boot Vector Registers in SRAM: + * - set bv0-2 to all 0's (illegal instructions) + * - set bv3 to proper branch instruction + */ + write_scom(PU_SRAM_SRBV0_SCOM, 0); + write_scom(PU_SRAM_SRBV0_SCOM + 1, 0); + write_scom(PU_SRAM_SRBV0_SCOM + 2, 0); + write_scom(PU_SRAM_SRBV0_SCOM + 3, setup_memory_boot()); + + write_scom(PU_JTG_PIB_OJCFG_AND, ~PPC_BIT(JTG_PIB_OJCFG_DBG_HALT_BIT)); + write_scom(PU_OCB_PIB_OCR_OR, PPC_BIT(OCB_PIB_OCR_CORE_RESET_BIT)); + write_scom(PU_OCB_PIB_OCR_CLEAR, PPC_BIT(OCB_PIB_OCR_CORE_RESET_BIT)); +} + +void pm_occ_fir_init(void) +{ + enum { + PERV_TP_OCC_SCOM_OCCLFIR = 0x01010800, + + /* Bits of OCC LFIR */ + OCC_FW0 = 0, + OCC_FW1 = 1, + CME_ERR_NOTIFY = 2, + STOP_RCV_NOTIFY_PRD = 3, + OCC_HB_NOTIFY = 4, + GPE0_WD_TIMEOUT = 5, + GPE1_WD_TIMEOUT = 6, + GPE2_WD_TIMEOUT = 7, + GPE3_WD_TIMEOUT = 8, + GPE0_ERR = 9, + GPE1_ERR = 10, + GPE2_ERR = 11, + GPE3_ERR = 12, + OCB_ERR = 13, + SRAM_UE = 14, + SRAM_CE = 15, + SRAM_READ_ERR = 16, + SRAM_WRITE_ERR = 17, + SRAM_DATAOUT_PERR = 18, + SRAM_OCI_WDATA_PARITY = 19, + SRAM_OCI_BE_PARITY_ERR = 20, + SRAM_OCI_ADDR_PARITY_ERR = 21, + GPE0_HALTED = 22, + GPE1_HALTED = 23, + GPE2_HALTED = 24, + GPE3_HALTED = 25, + EXT_TRAP = 26, + PPC405_CORE_RESET = 27, + PPC405_CHIP_RESET = 28, + PPC405_SYS_RESET = 29, + PPC405_WAIT_STATE = 30, + PPC405_DBGSTOPACK = 31, + OCB_DB_OCI_TIMEOUT = 32, + OCB_DB_OCI_RDATA_PARITY = 33, + OCB_DB_OCI_SLVERR = 34, + OCB_PIB_ADDR_PARITY_ERR = 35, + OCB_DB_PIB_DATA_PARITY_ERR = 36, + OCB_IDC0_ERR = 37, + OCB_IDC1_ERR = 38, + OCB_IDC2_ERR = 39, + OCB_IDC3_ERR = 40, + SRT_FSM_ERR = 41, + JTAGACC_ERR = 42, + SPARE_ERR_38 = 43, + C405_ECC_UE = 44, + C405_ECC_CE = 45, + C405_OCI_MC_CHK = 46, + SRAM_SPARE_DIRERR0 = 47, + SRAM_SPARE_DIRERR1 = 48, + SRAM_SPARE_DIRERR2 = 49, + SRAM_SPARE_DIRERR3 = 50, + GPE0_OCISLV_ERR = 51, + GPE1_OCISLV_ERR = 52, + GPE2_OCISLV_ERR = 53, + GPE3_OCISLV_ERR = 54, + C405ICU_M_TIMEOUT = 55, + C405DCU_M_TIMEOUT = 56, + OCC_CMPLX_FAULT = 57, + OCC_CMPLX_NOTIFY = 58, + SPARE_59 = 59, + SPARE_60 = 60, + SPARE_61 = 61, + FIR_PARITY_ERR_DUP = 62, + FIR_PARITY_ERR = 63, + }; + + const uint64_t action0_bits = 0; + const uint64_t action1_bits = + PPC_BIT(C405_ECC_CE) | PPC_BIT(C405_OCI_MC_CHK) + | PPC_BIT(C405DCU_M_TIMEOUT) | PPC_BIT(GPE0_ERR) + | PPC_BIT(GPE0_OCISLV_ERR) | PPC_BIT(GPE1_ERR) + | PPC_BIT(GPE1_OCISLV_ERR) | PPC_BIT(GPE2_OCISLV_ERR) + | PPC_BIT(GPE3_OCISLV_ERR) | PPC_BIT(JTAGACC_ERR) + | PPC_BIT(OCB_DB_OCI_RDATA_PARITY) | PPC_BIT(OCB_DB_OCI_SLVERR) + | PPC_BIT(OCB_DB_OCI_TIMEOUT) | PPC_BIT(OCB_DB_PIB_DATA_PARITY_ERR) + | PPC_BIT(OCB_IDC0_ERR) | PPC_BIT(OCB_IDC1_ERR) + | PPC_BIT(OCB_IDC2_ERR) | PPC_BIT(OCB_IDC3_ERR) + | PPC_BIT(OCB_PIB_ADDR_PARITY_ERR) | PPC_BIT(OCC_CMPLX_FAULT) + | PPC_BIT(OCC_CMPLX_NOTIFY) | PPC_BIT(SRAM_CE) + | PPC_BIT(SRAM_DATAOUT_PERR) | PPC_BIT(SRAM_OCI_ADDR_PARITY_ERR) + | PPC_BIT(SRAM_OCI_BE_PARITY_ERR) | PPC_BIT(SRAM_OCI_WDATA_PARITY) + | PPC_BIT(SRAM_READ_ERR) | PPC_BIT(SRAM_SPARE_DIRERR0) + | PPC_BIT(SRAM_SPARE_DIRERR1) | PPC_BIT(SRAM_SPARE_DIRERR2) + | PPC_BIT(SRAM_SPARE_DIRERR3) | PPC_BIT(SRAM_UE) + | PPC_BIT(SRAM_WRITE_ERR) | PPC_BIT(SRT_FSM_ERR) + | PPC_BIT(STOP_RCV_NOTIFY_PRD) | PPC_BIT(C405_ECC_UE); + + uint64_t mask = read_scom(PERV_TP_OCC_SCOM_OCCLFIR + MASK_INCR); + mask &= ~action0_bits; + mask &= ~action1_bits; + + write_scom(PERV_TP_OCC_SCOM_OCCLFIR, 0); + write_scom(PERV_TP_OCC_SCOM_OCCLFIR + ACTION0_INCR, action0_bits); + write_scom(PERV_TP_OCC_SCOM_OCCLFIR + ACTION1_INCR, action1_bits); + write_scom(PERV_TP_OCC_SCOM_OCCLFIR + MASK_WOR_INCR, mask); + write_scom(PERV_TP_OCC_SCOM_OCCLFIR + MASK_WAND_INCR, mask); +} + +void pm_pba_fir_init(void) +{ + enum { + PU_PBAFIR = 0x05012840, + + /* Bits of PBA LFIR. */ + PBAFIR_OCI_APAR_ERR = 0, + PBAFIR_PB_RDADRERR_FW = 1, + PBAFIR_PB_RDDATATO_FW = 2, + PBAFIR_PB_SUE_FW = 3, + PBAFIR_PB_UE_FW = 4, + PBAFIR_PB_CE_FW = 5, + PBAFIR_OCI_SLAVE_INIT = 6, + PBAFIR_OCI_WRPAR_ERR = 7, + PBAFIR_SPARE = 8, + PBAFIR_PB_UNEXPCRESP = 9, + PBAFIR_PB_UNEXPDATA = 10, + PBAFIR_PB_PARITY_ERR = 11, + PBAFIR_PB_WRADRERR_FW = 12, + PBAFIR_PB_BADCRESP = 13, + PBAFIR_PB_ACKDEAD_FW_RD = 14, + PBAFIR_PB_CRESPTO = 15, + PBAFIR_BCUE_SETUP_ERR = 16, + PBAFIR_BCUE_PB_ACK_DEAD = 17, + PBAFIR_BCUE_PB_ADRERR = 18, + PBAFIR_BCUE_OCI_DATERR = 19, + PBAFIR_BCDE_SETUP_ERR = 20, + PBAFIR_BCDE_PB_ACK_DEAD = 21, + PBAFIR_BCDE_PB_ADRERR = 22, + PBAFIR_BCDE_RDDATATO_ERR = 23, + PBAFIR_BCDE_SUE_ERR = 24, + PBAFIR_BCDE_UE_ERR = 25, + PBAFIR_BCDE_CE = 26, + PBAFIR_BCDE_OCI_DATERR = 27, + PBAFIR_INTERNAL_ERR = 28, + PBAFIR_ILLEGAL_CACHE_OP = 29, + PBAFIR_OCI_BAD_REG_ADDR = 30, + PBAFIR_AXPUSH_WRERR = 31, + PBAFIR_AXRCV_DLO_ERR = 32, + PBAFIR_AXRCV_DLO_TO = 33, + PBAFIR_AXRCV_RSVDATA_TO = 34, + PBAFIR_AXFLOW_ERR = 35, + PBAFIR_AXSND_DHI_RTYTO = 36, + PBAFIR_AXSND_DLO_RTYTO = 37, + PBAFIR_AXSND_RSVTO = 38, + PBAFIR_AXSND_RSVERR = 39, + PBAFIR_PB_ACKDEAD_FW_WR = 40, + PBAFIR_RESERVED_41 = 41, + PBAFIR_RESERVED_42 = 42, + PBAFIR_RESERVED_43 = 43, + PBAFIR_FIR_PARITY_ERR2 = 44, + PBAFIR_FIR_PARITY_ERR = 45, + }; + + const uint64_t action0_bits = 0; + const uint64_t action1_bits = + PPC_BIT(PBAFIR_OCI_APAR_ERR) | PPC_BIT(PBAFIR_PB_UE_FW) + | PPC_BIT(PBAFIR_PB_CE_FW) | PPC_BIT(PBAFIR_OCI_SLAVE_INIT) + | PPC_BIT(PBAFIR_OCI_WRPAR_ERR) | PPC_BIT(PBAFIR_PB_UNEXPCRESP) + | PPC_BIT(PBAFIR_PB_UNEXPDATA) | PPC_BIT(PBAFIR_PB_PARITY_ERR) + | PPC_BIT(PBAFIR_PB_WRADRERR_FW) | PPC_BIT(PBAFIR_PB_BADCRESP) + | PPC_BIT(PBAFIR_PB_CRESPTO) | PPC_BIT(PBAFIR_INTERNAL_ERR) + | PPC_BIT(PBAFIR_ILLEGAL_CACHE_OP) | PPC_BIT(PBAFIR_OCI_BAD_REG_ADDR); + + uint64_t mask = PPC_BITMASK(0, 63); + mask &= ~action0_bits; + mask &= ~action1_bits; + + write_scom(PU_PBAFIR, 0); + write_scom(PU_PBAFIR + ACTION0_INCR, action0_bits); + write_scom(PU_PBAFIR + ACTION1_INCR, action1_bits); + write_scom(PU_PBAFIR + MASK_WOR_INCR, mask); + write_scom(PU_PBAFIR + MASK_WAND_INCR, mask); +} diff --git a/src/soc/ibm/power9/ops.h b/src/soc/ibm/power9/ops.h new file mode 100644 index 00000000000..e66df58a90a --- /dev/null +++ b/src/soc/ibm/power9/ops.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef CPU_PPC64_OPS_H +#define CPU_PPC64_OPS_H + +#include + +static const uint32_t ATTN_OP = 0x00000200; +static const uint32_t BLR_OP = 0x4E800020; +static const uint32_t BR_OP = 0x48000000; +static const uint32_t BCCTR_OP = 0x4C000420; +static const uint32_t ORI_OP = 0x60000000; +static const uint32_t LIS_OP = 0x3C000000; +static const uint32_t MTSPR_OP = 0x7C0003A6; +static const uint32_t SKIP_SPR_REST_INST = 0x4800001C; +static const uint32_t MR_R0_TO_R10_OP = 0x7C0A0378; +static const uint32_t MR_R0_TO_R21_OP = 0x7C150378; +static const uint32_t MR_R0_TO_R9_OP = 0x7C090378; +static const uint32_t MTLR_R30_OP = 0x7FC803A6; +static const uint32_t MFLR_R30_OP = 0x7FC802A6; + +static inline uint32_t ppc_lis(uint16_t rt, uint16_t data) +{ + uint32_t inst; + inst = LIS_OP; + inst |= rt << (31 - 10); + inst |= data; + return inst; +} + +static inline uint32_t ppc_ori(uint16_t rs, uint16_t ra, uint16_t data) +{ + uint32_t inst; + inst = ORI_OP; + inst |= rs << (31 - 10); + inst |= ra << (31 - 15); + inst |= data; + return inst; +} + +static inline uint32_t ppc_mtspr(uint16_t rs, uint16_t spr) +{ + uint32_t temp = ((spr & 0x03FF) << (31 - 20)); + + uint32_t inst; + inst = MTSPR_OP; + inst |= rs << (31 - 10); + inst |= (temp & 0x0000F800) << 5; // Perform swizzle + inst |= (temp & 0x001F0000) >> 5; // Perform swizzle + return inst; +} + +static inline uint32_t ppc_bctr(void) +{ + uint32_t inst; + inst = BCCTR_OP; + inst |= 20 << (31 - 10); // BO + return inst; +} + +static inline uint32_t ppc_b(uint32_t target_addr) +{ + uint32_t inst; + inst = BR_OP; + inst |= (target_addr & 0x03FFFFFF); + return inst; +} + +#endif /* CPU_PPC64_OPS_H */ From 1e281e7f2023d194f19616148896043c6ea6bbb5 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Tue, 17 May 2022 19:38:30 +0300 Subject: [PATCH 087/213] soc/power9/homer.c: improve PBA reset code Also touches setting PU_OCB_OCI_OCCFLG2_CLEAR, which was added in the same earlier commit and is similarly undocumented. Change-Id: Iecd371f8f72367c15db6806899a21f50d3fae2ce Signed-off-by: Sergii Dmytruk --- src/soc/ibm/power9/homer.c | 139 ++++++++++++++++++++++++++++++++----- 1 file changed, 120 insertions(+), 19 deletions(-) diff --git a/src/soc/ibm/power9/homer.c b/src/soc/ibm/power9/homer.c index 7ce0f521efc..c6d549dfcb9 100644 --- a/src/soc/ibm/power9/homer.c +++ b/src/soc/ibm/power9/homer.c @@ -47,6 +47,10 @@ enum scom_section { STOP_SECTION_L3, }; +/* Undocumented */ +#define PU_OCB_OCI_OCCFLG2_CLEAR 0x0006C18B +#define PU_PBAXCFG_SCOM 0x00068021 + struct ring_data { void *rings_buf; void *work_buf1; @@ -576,6 +580,117 @@ static void build_pgpe(struct homer_st *homer, struct xip_pgpe_header *pgpe, hdr->aux_controls = 1 << 24; } +static void pba_slave_setup_runtime_phase(void) +{ + enum { + OCI_MASTER_ID_GPE2 = 0x2, + OCI_MASTER_ID_GPE3 = 0x3, + OCI_MASTER_ID_ICU = 0x5, + OCI_MASTER_ID_PGPE = OCI_MASTER_ID_GPE2, + OCI_MASTER_ID_SGPE = OCI_MASTER_ID_GPE3, + OCI_MASTER_ID_MASK_ALL = 0x7, + + PBA_READ_TTYPE_CL_RD_NC = 0x0, /// Cache line read + PBA_WRITE_GATHER_TIMEOUT_2_PULSES = 0x4, + PBA_READ_PREFETCH_NONE = 0x1, /// No prefetch + PBA_WRITE_TTYPE_DMA_PR_WR = 0x0, /// DMA Partial Write + + /* Values for PBA Mode register fields */ + PBA_OCI_REGION = 0x2, + PBA_BCE_OCI_TRANSACTION_64_BYTES = 0x1, + + PU_PBAMODE_SCOM = 0x00068000, + PU_PBASLVCTL0_SCOM = 0x00068004, + PU_PBASLVCTL1_SCOM = 0x00068005, + PU_PBASLVCTL2_SCOM = 0x00068006, + }; + + uint64_t data; + + /* + * Set the PBA_MODECTL register. It's not yet clear how PBA BCE + * transaction size will affect performance - for now we go with the + * largest size. The HTM marker space is enabled and configured. Slave + * fairness is enabled. The setting 'dis_slvmatch_order' ensures that PBA + * will correctly flush write data before allowing a read of the same + * address from a different master on a different slave. The second write + * buffer is enabled. + */ + + data = 0; + data |= PPC_PLACE(PBA_OCI_REGION, 16, 2); // pba_region + data |= PPC_PLACE(PBA_BCE_OCI_TRANSACTION_64_BYTES, 21, 2); // bcde_ocitrans + data |= PPC_PLACE(PBA_BCE_OCI_TRANSACTION_64_BYTES, 23, 2); // bcue_ocitrans + data |= PPC_BIT(8); // en_marker_ack + data |= PPC_PLACE(0x7, 18, 3); // oci_marker_space + data |= PPC_BIT(27); // en_slv_fairness + data |= PPC_BIT(10); // en_second_wrbuf + + write_scom(PU_PBAMODE_SCOM, data); + + /* + * Slave 0 (SGPE STOP). This is a read/write slave in the event that + * the STOP functions needs to write to memory. + */ + + data = 0; + data |= PPC_BIT(0); // enable + data |= PPC_PLACE(OCI_MASTER_ID_SGPE, 1, 3); // mid_match_value + data |= PPC_PLACE(OCI_MASTER_ID_MASK_ALL, 5, 3); // mid_care_mask + data |= PPC_PLACE(PBA_READ_TTYPE_CL_RD_NC, 15, 1); // read_ttype + data |= PPC_PLACE(PBA_READ_PREFETCH_NONE, 16, 2); // read_prefetch_ctl + data |= PPC_PLACE(PBA_WRITE_TTYPE_DMA_PR_WR, 8, 3); // write_ttype + data |= PPC_BIT(20); // buf_alloc_a + data |= PPC_BIT(21); // buf_alloc_b + data |= PPC_BIT(22); // buf_alloc_c + data |= PPC_BIT(19); // buf_alloc_w + + write_scom(PU_PBASLVCTL0_SCOM, data); + + /* + * Slave 1 (GPE 1, PPC405 booting). This is a read/write slave. Write gathering is + * allowed, but with the shortest possible timeout. + */ + + data = 0; + data |= PPC_BIT(0); // enable + data |= PPC_PLACE(OCI_MASTER_ID_ICU, 1, 3); // mid_match_value + data |= PPC_PLACE(OCI_MASTER_ID_ICU, 5, 3); // mid_care_mask + data |= PPC_PLACE(PBA_READ_TTYPE_CL_RD_NC, 15, 1); // read_ttype + data |= PPC_PLACE(PBA_READ_PREFETCH_NONE, 16, 2); // read_prefetch_ctl + data |= PPC_PLACE(PBA_WRITE_TTYPE_DMA_PR_WR, 8, 3); // write_ttype + data |= PPC_PLACE(PBA_WRITE_GATHER_TIMEOUT_2_PULSES, 25, 3); // wr_gather_timeout + data |= PPC_BIT(20); // buf_alloc_a + data |= PPC_BIT(21); // buf_alloc_b + data |= PPC_BIT(22); // buf_alloc_c + data |= PPC_BIT(19); // buf_alloc_w + + write_scom(PU_PBASLVCTL1_SCOM, data); + + /* + * Slave 2 (PGPE Boot, Pstates/WOF). This is a read/write slave. Write gethering is + * allowed, but with the shortest possible timeout. This slave is + * effectively disabled soon after IPL. + */ + + data = 0; + data |= PPC_BIT(0); // enable + data |= PPC_PLACE(OCI_MASTER_ID_PGPE, 1, 3); // mid_match_value + data |= PPC_PLACE(OCI_MASTER_ID_MASK_ALL, 5, 3); // mid_care_mask + data |= PPC_PLACE(PBA_READ_TTYPE_CL_RD_NC, 15, 1); // read_ttype + data |= PPC_PLACE(PBA_READ_PREFETCH_NONE, 16, 2); // read_prefetch_ctl + data |= PPC_PLACE(PBA_WRITE_TTYPE_DMA_PR_WR, 8, 3); // write_ttype + data |= PPC_PLACE(PBA_WRITE_GATHER_TIMEOUT_2_PULSES, 25, 3); // wr_gather_timeout + data |= PPC_BIT(20); // buf_alloc_a + data |= PPC_BIT(21); // buf_alloc_b + data |= PPC_BIT(22); // buf_alloc_c + data |= PPC_BIT(19); // buf_alloc_w + + write_scom(PU_PBASLVCTL2_SCOM, data); + + /* Slave 3 is not modified by this function, because it is owned by SBE */ +} + static void pba_reset(void) { long time; @@ -674,28 +789,14 @@ static void pba_reset(void) write_scom(0x0501284B, PPC_BIT(38)); /* - *0x00068021 // undocumented, PU_PBAXCFG_SCOM + *0x00068021 // Undocumented, PU_PBAXCFG_SCOM [all] 0 [2] 1 // PBAXCFG_SND_RESET? [3] 1 // PBAXCFG_RCV_RESET? */ - write_scom(0x00068021, PPC_BIT(2) | PPC_BIT(3)); - - /* - * The following registers are undocumented. Their fields can be decoded - * from hostboot, but the values are always the same, so why bother... - */ - /* Set the PBA_MODECTL register */ - write_scom(0x00068000, 0x00A0BA9000000000); - - /* Slave 0 (SGPE and OCC boot) */ - write_scom(0x00068004, 0xB7005E0000000000); - - /* Slave 1 (405 ICU/DCU) */ - write_scom(0x00068005, 0xD5005E4000000000); + write_scom(PU_PBAXCFG_SCOM, PPC_BIT(2) | PPC_BIT(3)); - /* Slave 2 (PGPE Boot) */ - write_scom(0x00068006, 0xA7005E4000000000); + pba_slave_setup_runtime_phase(); } static void stop_gpe_init(struct homer_st *homer) @@ -1917,11 +2018,11 @@ void build_homer_image(void *homer_bar) write_scom(0x00066000, PPC_SHIFT(0x1, 3) | PPC_SHIFT(0xA, 7)); /* Clear error injection bits - *0x0006C18B // undocumented, PU_OCB_OCI_OCCFLG2_CLEAR + *0x0006C18B // Undocumented, PU_OCB_OCI_OCCFLG2_CLEAR [all] 0 [30] 1 // OCCFLG2_SGPE_HCODE_STOP_REQ_ERR_INJ */ - write_scom(0x0006C18B, PPC_BIT(30)); + write_scom(PU_OCB_OCI_OCCFLG2_CLEAR, PPC_BIT(30)); // Boot the STOP GPE stop_gpe_init(homer); From 13a21a9c0910c2b62bbae98929d81c086ca448d6 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Tue, 17 May 2022 21:08:30 +0300 Subject: [PATCH 088/213] soc/power9/occ.c: implement OCC activation Change-Id: I4d97f0fa6c465d0792597c99224c8d95483c7a44 Signed-off-by: Sergii Dmytruk Signed-off-by: Krystian Hebel --- src/include/cpu/power/occ.h | 6 +- src/soc/ibm/power9/homer.c | 23 + src/soc/ibm/power9/homer.h | 3 + src/soc/ibm/power9/occ.c | 972 +++++++++++++++++++++++++++++++++++- 4 files changed, 1000 insertions(+), 4 deletions(-) diff --git a/src/include/cpu/power/occ.h b/src/include/cpu/power/occ.h index 7f29a94c1d2..49fb6231ec9 100644 --- a/src/include/cpu/power/occ.h +++ b/src/include/cpu/power/occ.h @@ -5,11 +5,13 @@ #include -void read_occ_sram(uint32_t address, uint64_t *buffer, size_t data_length); -void write_occ_command(uint64_t write_data); +struct homer_st; + void clear_occ_special_wakeups(uint64_t cores); void special_occ_wakeup_disable(uint64_t cores); void occ_start_from_mem(void); +/* Moves OCC to active state */ +void activate_occ(struct homer_st *homer); void pm_occ_fir_init(void); void pm_pba_fir_init(void); diff --git a/src/soc/ibm/power9/homer.c b/src/soc/ibm/power9/homer.c index c6d549dfcb9..f19f3d35311 100644 --- a/src/soc/ibm/power9/homer.c +++ b/src/soc/ibm/power9/homer.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -1782,6 +1783,28 @@ static void update_headers(struct homer_st *homer, uint64_t cores) pgpe_hdr->magic = 0x504750455f312e30; // PGPE_1.0 } +const struct voltage_bucket_data * get_voltage_data(void) +{ + const struct voltage_kwd *voltage = NULL; + const struct voltage_bucket_data *bucket = NULL; + + uint8_t i = 0; + + /* Using LRP0 because frequencies are the same in all LRP records */ + voltage = mvpd_get_voltage_data(0); + + for (i = 0; i < VOLTAGE_BUCKET_COUNT; ++i) { + bucket = &voltage->buckets[i]; + if (bucket->powerbus.freq != 0) + break; + } + + if (bucket == NULL) + die("Failed to find a valid voltage data bucket.\n"); + + return bucket; +} + /* * This logic is for SMF disabled only! */ diff --git a/src/soc/ibm/power9/homer.h b/src/soc/ibm/power9/homer.h index 1c190bdd090..80cd4bfa2b7 100644 --- a/src/soc/ibm/power9/homer.h +++ b/src/soc/ibm/power9/homer.h @@ -329,7 +329,10 @@ check_member(homer_st, ppmr, 3 * MiB); #define IS_EX_FUNCTIONAL(ex, cores) (!!((cores) & PPC_BITMASK(2*(ex), 2*(ex) + 1))) #define IS_EQ_FUNCTIONAL(eq, cores) (!!((cores) & PPC_BITMASK(4*(eq), 4*(eq) + 3))) +struct voltage_bucket_data; + void build_parameter_blocks(struct homer_st *homer, uint64_t functional_cores); void configure_xive(int tgt_core); +const struct voltage_bucket_data * get_voltage_data(void); #endif /* __SOC_IBM_POWER9_HOMER_H */ diff --git a/src/soc/ibm/power9/occ.c b/src/soc/ibm/power9/occ.c index c224b51d7db..0e65f69bb00 100644 --- a/src/soc/ibm/power9/occ.c +++ b/src/soc/ibm/power9/occ.c @@ -1,8 +1,14 @@ /* SPDX-License-Identifier: GPL-2.0-only */ #include +#include #include #include +#include +#include +#include // memset, memcpy +#include +#include #include "homer.h" #include "ops.h" @@ -28,6 +34,29 @@ #define PU_OCB_PIB_OCBCSR0_OR 0x0006D013 #define PU_OCB_PIB_OCBCSR0_CLEAR 0x0006D012 +#define OCC_CMD_ADDR 0x000E0000 +#define OCC_RSP_ADDR 0x000E1000 + +#define OCC_CMD_POLL 0x00 +#define OCC_CMD_CLEAR_ERROR_LOG 0x12 +#define OCC_CMD_SET_STATE 0x20 +#define OCC_CMD_SETUP_CFG_DATA 0x21 +#define OCC_CMD_SET_POWER_CAP 0x22 + +#define OCC_RC_SUCCESS 0x00 +#define OCC_RC_INIT_FAILURE 0xE5 +#define OCC_RC_OCC_INIT_CHECKPOINT 0xE1 + +#define OCC_CFGDATA_FREQ_POINT 0x02 +#define OCC_CFGDATA_OCC_ROLE 0x03 +#define OCC_CFGDATA_APSS_CONFIG 0x04 +#define OCC_CFGDATA_MEM_CONFIG 0x05 +#define OCC_CFGDATA_PCAP_CONFIG 0x07 +#define OCC_CFGDATA_SYS_CONFIG 0x0F +#define OCC_CFGDATA_TCT_CONFIG 0x13 +#define OCC_CFGDATA_AVSBUS_CONFIG 0x14 +#define OCC_CFGDATA_GPU_CONFIG 0x15 + /* FIR register offset from base */ enum fir_offset { BASE_WAND_INCR = 1, @@ -39,6 +68,31 @@ enum fir_offset { ACTION1_INCR = 7 }; +struct occ_cfg_info { + const char *name; + void (*func)(struct homer_st *homer, uint8_t *data, uint16_t *size); +}; + +struct occ_poll_response { + uint8_t status; + uint8_t ext_status; + uint8_t occs_present; + uint8_t requested_cfg; + uint8_t state; + uint8_t mode; + uint8_t ips_status; + uint8_t error_id; + uint32_t error_address; + uint16_t error_length; + uint8_t error_source; + uint8_t gpu_cfg; + uint8_t code_level[16]; + uint8_t sensor[6]; + uint8_t num_blocks; + uint8_t version; + uint8_t sensor_data[]; // 4049 bytes +} __attribute__((packed)); + static void pm_ocb_setup(uint32_t ocb_bar) { write_scom(PU_OCB_PIB_OCBCSR0_OR, PPC_BIT(OCB_PIB_OCBCSR0_OCB_STREAM_MODE)); @@ -104,13 +158,13 @@ static void write_occ_sram(uint32_t address, uint64_t *buffer, size_t data_lengt put_ocb_indirect(data_length / 8, address, buffer); } -void read_occ_sram(uint32_t address, uint64_t *buffer, size_t data_length) +static void read_occ_sram(uint32_t address, uint64_t *buffer, size_t data_length) { pm_ocb_setup(address); get_ocb_indirect(data_length / 8, address, buffer); } -void write_occ_command(uint64_t write_data) +static void write_occ_command(uint64_t write_data) { check_ocb_mode(PU_OCB_PIB_OCBCSR1_RO, PU_OCB_OCI_OCBSHCS1_SCOM); write_scom(PU_OCB_PIB_OCBDR1, write_data); @@ -199,6 +253,920 @@ void occ_start_from_mem(void) write_scom(PU_OCB_PIB_OCR_CLEAR, PPC_BIT(OCB_PIB_OCR_CORE_RESET_BIT)); } +/* Wait for OCC to reach communications checkpoint */ +static void wait_for_occ_checkpoint(void) +{ + enum { + /* Wait up to 15 seconds for OCC to be ready (1500 * 10ms = 15s) */ + US_BETWEEN_READ = 10000, + READ_RETRY_LIMIT = 1500, + + OCC_COMM_INIT_COMPLETE = 0x0EFF, + OCC_INIT_FAILURE = 0xE000, + + OCC_RSP_SRAM_ADDR = 0xFFFBF000, + }; + + int retry_count = 0; + + while (retry_count++ < READ_RETRY_LIMIT) { + uint8_t response[8] = { 0x0 }; + uint8_t status; + uint16_t checkpoint; + + udelay(US_BETWEEN_READ); + + /* Read SRAM response buffer to check for OCC checkpoint */ + read_occ_sram(OCC_RSP_SRAM_ADDR, (uint64_t *)response, sizeof(response)); + + /* Pull status from response (byte 2) */ + status = response[2]; + + /* Pull checkpoint from response (bytes 6-7) */ + checkpoint = (response[6] << 8) | response[7]; + + if (status == OCC_RC_OCC_INIT_CHECKPOINT && + checkpoint == OCC_COMM_INIT_COMPLETE) + /* Success */ + return; + + if ((checkpoint & OCC_INIT_FAILURE) == OCC_INIT_FAILURE || + status == OCC_RC_INIT_FAILURE) + die("OCC initialization has failed\n"); + } + + die("Waiting for OCC initialization checkpoint has timed out.\n"); +} + +static void build_occ_cmd(struct homer_st *homer, uint8_t occ_cmd, uint8_t seq_num, + const uint8_t *data, uint16_t data_len) +{ + uint8_t *cmd_buf = &homer->occ_host_area[OCC_CMD_ADDR]; + uint16_t cmd_len = 0; + uint16_t checksum = 0; + uint16_t i = 0; + + cmd_buf[cmd_len++] = seq_num; + cmd_buf[cmd_len++] = occ_cmd; + cmd_buf[cmd_len++] = (data_len >> 8) & 0xFF; + cmd_buf[cmd_len++] = data_len & 0xFF; + memcpy(&cmd_buf[cmd_len], data, data_len); + cmd_len += data_len; + + for (i = 0; i < cmd_len; ++i) + checksum += cmd_buf[i]; + cmd_buf[cmd_len++] = (checksum >> 8) & 0xFF; + cmd_buf[cmd_len++] = checksum & 0xFF; + + /* + * When the P8 processor writes to memory (such as the HOMER) there is + * no certainty that the writes happen in order or that they have + * actually completed by the time the instructions complete. 'sync' + * is a memory barrier to ensure the HOMER data has actually been made + * consistent with respect to memory, so that if the OCC were to read + * it they would see all of the data. Otherwise, there is potential + * for them to get stale or incomplete data. + */ + asm volatile("sync" ::: "memory"); +} + +static void wait_for_occ_response(struct homer_st *homer, uint32_t timeout_sec, + uint8_t seq_num) +{ + enum { + /* + * With two CPUs OCC polls were failing with this set to 10 or 20 us. + * Apparently, checks performed by the code might not guarantee + * that poll data is available in full (checksum doesn't match). + * + * With one CPU wait_for_occ_status() reports OCC is asking for PCAP + * configuration data if *this* delay (not the one in wait_for_occ_status) + * is small (50 us or smaller), 100 us seems fine. + * + * Something is wrong with synchronization and huge delays in Hostboot + * might be hiding the issue. + */ + OCC_RSP_SAMPLE_TIME_US = 100, + OCC_COMMAND_IN_PROGRESS = 0xFF, + }; + + const uint8_t *rsp_buf = &homer->occ_host_area[OCC_RSP_ADDR]; + + long timeout_us = timeout_sec * USECS_PER_SEC; + if (timeout_sec == 0) + timeout_us = OCC_RSP_SAMPLE_TIME_US; + + while (timeout_us >= 0) { + /* + * 1. When OCC receives the command, it will set the status to + * COMMAND_IN_PROGRESS. + * 2. When the response is ready OCC will update the full + * response buffer (except the status) + * 3. The status field is updated last to indicate response ready + * + * Note: Need to check the sequence number to be sure we are + * processing the expected response + */ + if (rsp_buf[2] != OCC_COMMAND_IN_PROGRESS && rsp_buf[0] == seq_num) { + /* + * Need an 'isync' here to ensure that previous instructions + * have completed before the code continues on. This is a type + * of read-barrier. Without this the processor can do + * speculative reads of the HOMER data and you can actually + * get stale data as part of the instructions that happen + * afterwards. Another 'weak consistency' issue. + */ + asm volatile("isync" ::: "memory"); + + /* OCC must have processed the command */ + break; + } + + if (timeout_us > 0) { + /* Delay before the next check */ + long sleep_us = OCC_RSP_SAMPLE_TIME_US; + if (timeout_us < sleep_us) + sleep_us = timeout_us; + + udelay(sleep_us); + timeout_us -= sleep_us; + } else { + /* Time expired */ + die("Timed out while waiting for OCC response\n"); + } + } +} + +static bool parse_occ_response(struct homer_st *homer, uint8_t occ_cmd, + uint8_t *status, uint8_t *seq_num, + uint8_t *response, uint32_t *response_len) +{ + uint16_t index = 0; + uint16_t data_len = 0; + uint16_t checksum = 0; + uint16_t i = 0; + + const uint8_t *rsp_buf = &homer->occ_host_area[OCC_RSP_ADDR]; + + *seq_num = rsp_buf[index++]; + index += 1; /* command */ + *status = rsp_buf[index++]; + + data_len = *(uint16_t *)&rsp_buf[index]; + index += 2; + + if (data_len > 0) { + uint16_t copy_size = data_len; + if (copy_size > *response_len) + copy_size = *response_len; + + memcpy(response, &rsp_buf[index], copy_size); + *response_len = copy_size; + + index += data_len; + } + + for (i = 0; i < index; ++i) + checksum += rsp_buf[i]; + + if (checksum != *(uint16_t *)&rsp_buf[index]) { + printk(BIOS_WARNING, "OCC response for 0x%02x has invalid checksum\n", + occ_cmd); + return false; + } + + if (*status != OCC_RC_SUCCESS) { + printk(BIOS_WARNING, "0x%02x OCC command failed with an error code: 0x%02x\n", + occ_cmd, *status); + return false; + } + + return true; +} + +static bool write_occ_cmd(struct homer_st *homer, uint8_t occ_cmd, + const uint8_t *data, uint16_t data_len, + uint8_t *response, uint32_t *response_len) +{ + static uint8_t cmd_seq_num; + + uint8_t status = 0; + uint8_t rsp_seq_num = 0; + + ++cmd_seq_num; + /* Do not use 0 for sequence number */ + if (cmd_seq_num == 0) + ++cmd_seq_num; + + build_occ_cmd(homer, occ_cmd, cmd_seq_num, data, data_len); + /* Sender: HTMGT; command: Command Write Attention */ + write_occ_command(0x1001000000000000); + + /* Wait for OCC to process command and send response (timeout is the + same for all commands) */ + wait_for_occ_response(homer, 20, cmd_seq_num); + + if (!parse_occ_response(homer, occ_cmd, &status, &rsp_seq_num, response, + response_len)) { + /* Statuses of 0xE0-EF are reserved for OCC exceptions */ + if ((status & 0xF0) == 0xE0) { + printk(BIOS_WARNING, + "OCC exception occurred while running 0x%02x command\n", + occ_cmd); + } + + printk(BIOS_WARNING, "Received OCC response:\n"); + hexdump(response, *response_len); + printk(BIOS_WARNING, "Failed to parse OCC response\n"); + return false; + } + + if (rsp_seq_num != cmd_seq_num) { + printk(BIOS_WARNING, + "Received OCC response for a wrong command while running 0x%02x\n", + occ_cmd); + return false; + } + + return true; +} + +static void send_occ_cmd(struct homer_st *homer, uint8_t occ_cmd, + const uint8_t *data, uint16_t data_len, + uint8_t *response, uint32_t *response_len) +{ + enum { MAX_TRIES = 2 }; + + uint8_t i = 0; + + for (i = 0; i < MAX_TRIES; ++i) { + if (write_occ_cmd(homer, occ_cmd, data, data_len, response, response_len)) + break; + + if (i < MAX_TRIES - 1) + printk(BIOS_WARNING, "Retrying running OCC command 0x%02x\n", occ_cmd); + } + + if (i == MAX_TRIES) + die("Failed running OCC command 0x%02x %d times\n", occ_cmd, MAX_TRIES); +} + +/* Reports OCC error to the user and clears it on OCC's side */ +static void handle_occ_error(struct homer_st *homer, + const struct occ_poll_response *response) +{ + static uint8_t error_log_buf[4096]; + + uint16_t error_length = response->error_length; + + const uint8_t clear_log_data[4] = { + 0x01, // Version + response->error_id, + response->error_source, + 0x00 // Reserved + }; + uint32_t response_len = 0; + + if (error_length > sizeof(error_log_buf)) { + printk(BIOS_WARNING, "Truncating OCC error log from %d to %ld bytes\n", + error_length, sizeof(error_log_buf)); + error_length = sizeof(error_log_buf); + } + + read_occ_sram(response->error_address, (uint64_t *)error_log_buf, error_length); + + printk(BIOS_WARNING, "OCC error log:\n"); + hexdump(error_log_buf, error_length); + + /* Confirm to OCC that we've read the log */ + send_occ_cmd(homer, OCC_CMD_CLEAR_ERROR_LOG, + clear_log_data, sizeof(clear_log_data), + NULL, &response_len); +} + +static void poll_occ(struct homer_st *homer, bool flush_all_errors, + struct occ_poll_response *response) +{ + enum { OCC_POLL_DATA_MIN_SIZE = 40 }; + + uint8_t max_more_errors = 10; + while (true) { + const uint8_t poll_data[1] = { 0x20 /*version*/ }; + uint32_t response_len = sizeof(*response); + + send_occ_cmd(homer, OCC_CMD_POLL, poll_data, sizeof(poll_data), + (uint8_t *)response, &response_len); + + if (response_len < OCC_POLL_DATA_MIN_SIZE) + die("Invalid data length"); + + if (!flush_all_errors) + break; + + if (response->error_id == 0) + break; + + handle_occ_error(homer, response); + + --max_more_errors; + if (max_more_errors == 0) { + printk(BIOS_WARNING, "Last OCC poll response:\n"); + hexdump(response, response_len); + die("Hit too many errors on polling OCC\n"); + } + } +} + +static void get_freq_point_msg_data(struct homer_st *homer, uint8_t *data, uint16_t *size) +{ + enum { OCC_CFGDATA_FREQ_POINT_VERSION = 0x20 }; + OCCPstateParmBlock *oppb = (void *)homer->ppmr.occ_parm_block; + + const struct voltage_bucket_data *bucket = get_voltage_data(); + + uint16_t index = 0; + uint16_t min_freq = 0; + + data[index++] = OCC_CFGDATA_FREQ_POINT; + data[index++] = OCC_CFGDATA_FREQ_POINT_VERSION; + + /* Nominal Frequency in MHz */ + memcpy(&data[index], &bucket->nominal.freq, 2); + index += 2; + + /* Turbo Frequency in MHz */ + memcpy(&data[index], &bucket->turbo.freq, 2); + index += 2; + + /* Minimum Frequency in MHz */ + min_freq = oppb->frequency_min_khz / 1000; + memcpy(&data[index], &min_freq, 2); + index += 2; + + /* Ultra Turbo Frequency in MHz */ + memcpy(&data[index], &bucket->ultra_turbo.freq, 2); + index += 2; + + /* Reserved (Static Power Save in PowerVM) */ + memset(&data[index], 0, 2); + index += 2; + + /* Reserved (FFO in PowerVM) */ + memset(&data[index], 0, 2); + index += 2; + + *size = index; +} + +static void get_occ_role_msg_data(struct homer_st *homer, uint8_t *data, uint16_t *size) +{ + enum { OCC_ROLE_MASTER = 0x01 }; + + data[0] = OCC_CFGDATA_OCC_ROLE; + data[1] = OCC_ROLE_MASTER; + + *size = 2; +} + +static void get_apss_msg_data(struct homer_st *homer, uint8_t *data, uint16_t *size) +{ + enum { OCC_CFGDATA_APSS_VERSION = 0x20 }; + + /* ATTR_APSS_GPIO_PORT_PINS */ + uint8_t function[16] = { 0x0 }; + + /* ATTR_ADC_CHANNEL_GNDS */ + uint8_t ground[16] = { 0x0 }; + + /* ATTR_ADC_CHANNEL_GAINS */ + uint32_t gain[16] = { 0x0 }; + + /* ATTR_ADC_CHANNEL_OFFSETS */ + uint32_t offset[16] = { 0x0 }; + + uint16_t index = 0; + + data[index++] = OCC_CFGDATA_APSS_CONFIG; + data[index++] = OCC_CFGDATA_APSS_VERSION; + data[index++] = 0; + data[index++] = 0; + + for (uint64_t channel = 0; channel < sizeof(function); ++channel) { + data[index++] = function[channel]; // ADC Channel assignment + + memset(&data[index], 0, sizeof(uint32_t)); // Sensor ID + index += 4; + + data[index++] = ground[channel]; // Ground Select + + memcpy(&data[index], &gain[channel], sizeof(uint32_t)); + index += 4; + + memcpy(&data[index], &offset[channel], sizeof(uint32_t)); + index += 4; + } + + /* ATTR_APSS_GPIO_PORT_MODES */ + uint8_t gpio_mode[2] = { 0x0 }; + /* ATTR_APSS_GPIO_PORT_PINS */ + uint8_t gpio_pin[16] = { 0x0 }; + + uint64_t pins_per_port = sizeof(gpio_pin) / sizeof(gpio_mode); + uint64_t pin_idx = 0; + + for (uint64_t port = 0; port < sizeof(gpio_mode); ++port) { + data[index++] = gpio_mode[port]; + data[index++] = 0; + + memcpy(&data[index], gpio_pin + pin_idx, pins_per_port); + index += pins_per_port; + + pin_idx += pins_per_port; + } + + *size = index; +} + +static void get_mem_cfg_msg_data(struct homer_st *homer, uint8_t *data, uint16_t *size) +{ + enum { OCC_CFGDATA_MEM_CONFIG_VERSION = 0x21 }; + + uint16_t index = 0; + + data[index++] = OCC_CFGDATA_MEM_CONFIG; + data[index++] = OCC_CFGDATA_MEM_CONFIG_VERSION; + + /* If OPAL then no "Power Control Default" support */ + + /* Byte 3: Memory Power Control Default */ + data[index++] = 0xFF; + /* Byte 4: Idle Power Memory Power Control */ + data[index++] = 0xFF; + + /* Byte 5: Number of data sets */ + data[index++] = 0; // Monitoring is disabled + + *size = index; +} + +static void add_sensor_id(uint8_t *data, uint16_t *index, uint32_t sensor_id) +{ + data[*index + 0] = sensor_id >> 24 & 0xFF; + data[*index + 1] = sensor_id >> 16 & 0xFF; + data[*index + 2] = sensor_id >> 8 & 0xFF; + data[*index + 3] = sensor_id >> 0 & 0xFF; + *index += 4; +} + +/* + * Sensors IDs listed here are valid for Talos II. Values come from talos.xml + * and may or may not be different for other boards. + */ +#define PROC_CALLOUT_ID 0x08 +#define CORE0_TEMP_ID 0x5B +#define CORE0_FREQ_ID 0xA0 +#define BACKPLANE_CALLOUT_ID 0x8C +#define APSS_CALLOUT_ID 0x93 +/* Same as Backplane Callout ID */ +#define VRM_VDD_CALLOUT_ID 0x8C +#define VRM_VDD_TEMP_ID 0xFF + +static void get_sys_cfg_msg_data(struct homer_st *homer, uint8_t *data, uint16_t *size) +{ + enum { + OCC_CFGDATA_SYS_CONFIG_VERSION = 0x21, + + /* KVM or OPAL mode + single node */ + OCC_CFGDATA_OPENPOWER_OPALVM = 0x81, + + OCC_CFGDATA_NON_REDUNDANT_PS = 0x02, + OCC_REPORT_THROTTLE_BELOW_NOMINAL = 0x08, + }; + + uint8_t system_type = OCC_CFGDATA_OPENPOWER_OPALVM; + uint16_t index = 0; + int i = 0; + + data[index++] = OCC_CFGDATA_SYS_CONFIG; + data[index++] = OCC_CFGDATA_SYS_CONFIG_VERSION; + + /* System Type */ + + /* ATTR_REPORT_THROTTLE_BELOW_NOMINAL == 0 */ + + /* 0 = OCC report throttling when max frequency lowered below turbo */ + system_type &= ~OCC_REPORT_THROTTLE_BELOW_NOMINAL; + /* Power supply policy is redundant */ + system_type &= ~OCC_CFGDATA_NON_REDUNDANT_PS; + data[index++] = system_type; + + /* Processor Callout Sensor ID */ + add_sensor_id(data, &index, PROC_CALLOUT_ID); + + /* Next 24*2 IDs are for core sensors */ + for (i = 0; i < MAX_CORES_PER_CHIP; ++i) { + /* Core Temp Sensor ID */ + add_sensor_id(data, &index, CORE0_TEMP_ID + i); + + /* Core Frequency Sensor ID */ + add_sensor_id(data, &index, CORE0_FREQ_ID + i); + } + + /* Backplane Callout Sensor ID */ + add_sensor_id(data, &index, BACKPLANE_CALLOUT_ID); + + /* APSS Callout Sensor ID */ + add_sensor_id(data, &index, APSS_CALLOUT_ID); + + /* Format 21 - VRM VDD Callout Sensor ID */ + add_sensor_id(data, &index, VRM_VDD_CALLOUT_ID); + + /* Format 21 - VRM VDD Temperature Sensor ID */ + add_sensor_id(data, &index, VRM_VDD_TEMP_ID); + + *size = index; +} + +static void get_thermal_ctrl_msg_data(struct homer_st *homer, uint8_t *data, uint16_t *size) +{ + enum { + OCC_CFGDATA_TCT_CONFIG_VERSION = 0x20, + + CFGDATA_FRU_TYPE_PROC = 0x00, + CFGDATA_FRU_TYPE_MEMBUF = 0x01, + CFGDATA_FRU_TYPE_DIMM = 0x02, + CFGDATA_FRU_TYPE_VRM = 0x03, + CFGDATA_FRU_TYPE_GPU_CORE = 0x04, + CFGDATA_FRU_TYPE_GPU_MEMORY = 0x05, + CFGDATA_FRU_TYPE_VRM_VDD = 0x06, + + OCC_NOT_DEFINED = 0xFF, + }; + + uint16_t index = 0; + + data[index++] = OCC_CFGDATA_TCT_CONFIG; + data[index++] = OCC_CFGDATA_TCT_CONFIG_VERSION; + + /* Processor Core Weight, ATTR_OPEN_POWER_PROC_WEIGHT, from talos.xml */ + data[index++] = 9; + + /* Processor Quad Weight, ATTR_OPEN_POWER_QUAD_WEIGHT, from talos.xml */ + data[index++] = 1; + + /* Data sets following (proc, DIMM, etc.), and each will get a FRU type, + DVS temp, error temp and max read timeout */ + data[index++] = 5; + + /* + * Note: Bytes 4 and 5 of each data set represent the PowerVM DVFS and ERROR + * Resending the regular DVFS and ERROR for now. + */ + + /* Processor */ + data[index++] = CFGDATA_FRU_TYPE_PROC; + data[index++] = 85; // DVFS, ATTR_OPEN_POWER_PROC_DVFS_TEMP_DEG_C, from talos.xml + data[index++] = 95; // ERROR, ATTR_OPEN_POWER_PROC_ERROR_TEMP_DEG_C, from talos.xml + data[index++] = OCC_NOT_DEFINED; // PM_DVFS + data[index++] = OCC_NOT_DEFINED; // PM_ERROR + data[index++] = 5; // ATTR_OPEN_POWER_PROC_READ_TIMEOUT_SEC, from talos.xml + + /* DIMM */ + data[index++] = CFGDATA_FRU_TYPE_DIMM; + data[index++] = 84; // DVFS, ATTR_OPEN_POWER_DIMM_THROTTLE_TEMP_DEG_C, from talos.xml + data[index++] = 84; // ERROR, ATTR_OPEN_POWER_DIMM_ERROR_TEMP_DEG_C, from talos.xml + data[index++] = OCC_NOT_DEFINED; // PM_DVFS + data[index++] = OCC_NOT_DEFINED; // PM_ERROR + data[index++] = 30; // TIMEOUT, ATTR_OPEN_POWER_DIMM_READ_TIMEOUT_SEC, from talos.xml + + /* VRM OT monitoring is disabled, because ATTR_OPEN_POWER_VRM_READ_TIMEOUT_SEC == 0 + (default) */ + + /* GPU Cores */ + data[index++] = CFGDATA_FRU_TYPE_GPU_CORE; + // DVFS + data[index++] = OCC_NOT_DEFINED; + // ERROR, ATTR_OPEN_POWER_GPU_ERROR_TEMP_DEG_C, not set + data[index++] = OCC_NOT_DEFINED; + // PM_DVFS + data[index++] = OCC_NOT_DEFINED; + // PM_ERROR + data[index++] = OCC_NOT_DEFINED; + // TIMEOUT, ATTR_OPEN_POWER_GPU_READ_TIMEOUT_SEC, default + data[index++] = OCC_NOT_DEFINED; + + /* GPU Memory */ + data[index++] = CFGDATA_FRU_TYPE_GPU_MEMORY; + data[index++] = OCC_NOT_DEFINED; // DVFS + // ERROR, ATTR_OPEN_POWER_GPU_MEM_ERROR_TEMP_DEG_C, not set + data[index++] = OCC_NOT_DEFINED; + // PM_DVFS + data[index++] = OCC_NOT_DEFINED; + // PM_ERROR + data[index++] = OCC_NOT_DEFINED; + // TIMEOUT, ATTR_OPEN_POWER_GPU_MEM_READ_TIMEOUT_SEC, not set + data[index++] = OCC_NOT_DEFINED; + + /* VRM Vdd */ + data[index++] = CFGDATA_FRU_TYPE_VRM_VDD; + // DVFS, ATTR_OPEN_POWER_VRM_VDD_DVFS_TEMP_DEG_C, default + data[index++] = OCC_NOT_DEFINED; + // ERROR, ATTR_OPEN_POWER_VRM_VDD_ERROR_TEMP_DEG_C, default + data[index++] = OCC_NOT_DEFINED; + // PM_DVFS + data[index++] = OCC_NOT_DEFINED; + // PM_ERROR + data[index++] = OCC_NOT_DEFINED; + // TIMEOUT, ATTR_OPEN_POWER_VRM_VDD_READ_TIMEOUT_SEC, default + data[index++] = OCC_NOT_DEFINED; + + *size = index; +} + +static void get_power_cap_msg_data(struct homer_st *homer, uint8_t *data, uint16_t *size) +{ + enum { OCC_CFGDATA_PCAP_CONFIG_VERSION = 0x20 }; + + uint16_t index = 0; + + /* Values of the following attributes were taken from Hostboot's log */ + + /* Minimum HARD Power Cap (ATTR_OPEN_POWER_MIN_POWER_CAP_WATTS) */ + uint16_t min_pcap = 2000; + + /* Minimum SOFT Power Cap (ATTR_OPEN_POWER_SOFT_MIN_PCAP_WATTS) */ + uint16_t soft_pcap = 2000; + + /* Quick Power Drop Power Cap (ATTR_OPEN_POWER_N_BULK_POWER_LIMIT_WATTS) */ + uint16_t qpd_pcap = 2000; + + /* System Maximum Power Cap (ATTR_OPEN_POWER_N_PLUS_ONE_HPC_BULK_POWER_LIMIT_WATTS) */ + uint16_t max_pcap = 3000; + + data[index++] = OCC_CFGDATA_PCAP_CONFIG; + data[index++] = OCC_CFGDATA_PCAP_CONFIG_VERSION; + + memcpy(&data[index], &soft_pcap, 2); + index += 2; + + memcpy(&data[index], &min_pcap, 2); + index += 2; + + memcpy(&data[index], &max_pcap, 2); + index += 2; + + memcpy(&data[index], &qpd_pcap, 2); + index += 2; + + *size = index; +} + +static void get_avs_bus_cfg_msg_data(struct homer_st *homer, uint8_t *data, uint16_t *size) +{ + enum { OCC_CFGDATA_AVSBUS_CONFIG_VERSION = 0x01 }; + + /* ATTR_NO_APSS_PROC_POWER_VCS_VIO_WATTS, from talos.xml */ + const uint16_t power_adder = 19; + + uint16_t index = 0; + + data[index++] = OCC_CFGDATA_AVSBUS_CONFIG; + data[index++] = OCC_CFGDATA_AVSBUS_CONFIG_VERSION; + data[index++] = 0; // Vdd Bus, ATTR_VDD_AVSBUS_BUSNUM + data[index++] = 0; // Vdd Rail Sel, ATTR_VDD_AVSBUS_RAIL + data[index++] = 0xFF; // reserved + data[index++] = 0xFF; // reserved + data[index++] = 1; // Vdn Bus, ATTR_VDN_AVSBUS_BUSNUM, from talos.xml + data[index++] = 0; // Vdn Rail sel, ATTR_VDN_AVSBUS_RAIL, from talos.xml + + data[index++] = (power_adder >> 8) & 0xFF; + data[index++] = power_adder & 0xFF; + + /* ATTR_VDD_CURRENT_OVERFLOW_WORKAROUND_ENABLE == 0 */ + + *size = index; +} + +static void get_power_data(struct homer_st *homer, uint16_t *power_max, uint16_t *power_drop) +{ + const struct voltage_bucket_data *bucket = get_voltage_data(); + + /* All processor chips (do not have to be functional) */ + const uint8_t num_procs = 2; // from Hostboot log + + const uint16_t proc_socket_power = 250; // ATTR_PROC_SOCKET_POWER_WATTS, default + const uint16_t misc_power = 0; // ATTR_MISC_SYSTEM_COMPONENTS_MAX_POWER_WATTS, default + + const uint16_t mem_power_min_throttles = 36; // from Hostboot log + const uint16_t mem_power_max_throttles = 23; // from Hostboot log + + /* + * Calculate Total non-GPU maximum power (Watts): + * Maximum system power excluding GPUs when CPUs are at maximum frequency + * (ultra turbo) and memory at maximum power (least throttled) plus + * everything else (fans...) excluding GPUs. + */ + *power_max = proc_socket_power * num_procs; + *power_max += mem_power_min_throttles + misc_power; + + OCCPstateParmBlock *oppb = (void *)homer->ppmr.occ_parm_block; + uint16_t min_freq_mhz = oppb->frequency_min_khz / 1000; + const uint16_t mhz_per_watt = 28; // ATTR_PROC_MHZ_PER_WATT, from talos.xml + /* Drop is always calculated from Turbo to Min (not ultra) */ + uint32_t proc_drop = (bucket->turbo.freq - min_freq_mhz) / mhz_per_watt; + proc_drop *= num_procs; + const uint16_t memory_drop = mem_power_min_throttles - mem_power_max_throttles; + + *power_drop = proc_drop + memory_drop; +} + +static void get_gpu_msg_data(struct homer_st *homer, uint8_t *data, uint16_t *size) +{ + enum { + OCC_CFGDATA_GPU_CONFIG_VERSION = 0x01, + MAX_GPUS = 3, + }; + + uint16_t power_max = 0; + uint16_t power_drop = 0; + + uint16_t index = 0; + + data[index++] = OCC_CFGDATA_GPU_CONFIG; + data[index++] = OCC_CFGDATA_GPU_CONFIG_VERSION; + + get_power_data(homer, &power_max, &power_drop); + + memcpy(&data[index], &power_max, 2); // Total non-GPU max power (W) + index += 2; + + memcpy(&data[index], &power_drop, 2); // Total proc/mem power drop (W) + index += 2; + data[index++] = 0; // reserved + data[index++] = 0; // reserved + + /* No sensors ID. Might require OBus or just be absent. */ + uint32_t gpu_func_sensors[MAX_GPUS] = {0}; + uint32_t gpu_temp_sensors[MAX_GPUS] = {0}; + uint32_t gpu_memtemp_sensors[MAX_GPUS] = {0}; + + /* GPU0 */ + memcpy(&data[index], &gpu_temp_sensors[0], 4); + index += 4; + memcpy(&data[index], &gpu_memtemp_sensors[0], 4); + index += 4; + memcpy(&data[index], &gpu_func_sensors[0], 4); + index += 4; + + /* GPU1 */ + memcpy(&data[index], &gpu_temp_sensors[1], 4); + index += 4; + memcpy(&data[index], &gpu_memtemp_sensors[1], 4); + index += 4; + memcpy(&data[index], &gpu_func_sensors[1], 4); + index += 4; + + /* GPU2 */ + memcpy(&data[index], &gpu_temp_sensors[2], 4); + index += 4; + memcpy(&data[index], &gpu_memtemp_sensors[2], 4); + index += 4; + memcpy(&data[index], &gpu_func_sensors[2], 4); + index += 4; + + *size = index; +} + +static void send_occ_config_data(struct homer_st *homer) +{ + /* + * Order in which these are sent is important! + * Not every order works. + */ + struct occ_cfg_info cfg_info[] = { + { "System config", &get_sys_cfg_msg_data }, + { "APSS config", &get_apss_msg_data }, + { "OCC role", &get_occ_role_msg_data }, + { "Frequency points", &get_freq_point_msg_data }, + { "Memory config", &get_mem_cfg_msg_data }, + { "Power cap", &get_power_cap_msg_data }, + { "Thermal control", &get_thermal_ctrl_msg_data }, + { "AVS", &get_avs_bus_cfg_msg_data }, + { "GPU", &get_gpu_msg_data }, + }; + + uint8_t i; + + for (i = 0; i < ARRAY_SIZE(cfg_info); ++i) { + /* All our messages are short */ + uint8_t data[256]; + uint16_t data_len = 0; + uint32_t response_len = 0; + + /* Poll is sent between configuration packets to flush errors */ + struct occ_poll_response poll_response; + + cfg_info[i].func(homer, data, &data_len); + if (data_len > sizeof(data)) + die("Buffer for OCC data is too small!\n"); + + send_occ_cmd(homer, OCC_CMD_SETUP_CFG_DATA, data, data_len, NULL, + &response_len); + poll_occ(homer, /*flush_all_errors=*/false, &poll_response); + } +} + +static void send_occ_user_power_cap(struct homer_st *homer) +{ + /* No power limit */ + const uint8_t data[2] = { 0x00, 0x00 }; + uint32_t response_len = 0; + send_occ_cmd(homer, OCC_CMD_SET_POWER_CAP, data, sizeof(data), NULL, &response_len); +} + +static void wait_for_occ_status(struct homer_st *homer, uint8_t status_bit) +{ + enum { + MAX_POLLS = 200, + DELAY_BETWEEN_POLLS_US = 50000, + }; + + uint8_t num_polls = 0; + struct occ_poll_response poll_response; + + for (num_polls = 0; num_polls < MAX_POLLS; ++num_polls) { + poll_occ(homer, /*flush_all_errors=*/false, &poll_response); + if (poll_response.status & status_bit) + break; + + if (poll_response.requested_cfg != 0x00) { + die("OCC requests 0x%02x configuration data\n", + poll_response.requested_cfg); + } + + if (num_polls < MAX_POLLS) + udelay(DELAY_BETWEEN_POLLS_US); + } + + if (num_polls == MAX_POLLS) + die("Failed to wait until OCC has reached state 0x%02x\n", status_bit); +} + +static void set_occ_state(struct homer_st *homer, uint8_t state) +{ + struct occ_poll_response poll_response; + + /* Fields: version, state, reserved */ + const uint8_t data[3] = { 0x00, state, 0x00 }; + uint32_t response_len = 0; + + /* Send poll cmd to confirm comm has been established and flush old errors */ + poll_occ(homer, /*flush_all_errors=*/true, &poll_response); + + /* Try to switch to a new state */ + send_occ_cmd(homer, OCC_CMD_SET_STATE, data, sizeof(data), NULL, &response_len); + + /* Send poll to query state of all OCC and flush any errors */ + poll_occ(homer, /*flush_all_errors=*/true, &poll_response); + + if (poll_response.state != state) + die("State of OCC is 0x%02x instead of 0x%02x.\n", poll_response.state, state); +} + +static void set_occ_active_state(struct homer_st *homer) +{ + enum { + OCC_STATUS_ACTIVE_READY = 0x01, + OCC_STATE_ACTIVE = 0x03, + }; + + wait_for_occ_status(homer, OCC_STATUS_ACTIVE_READY); + set_occ_state(homer, OCC_STATE_ACTIVE); +} + +void activate_occ(struct homer_st *homer) +{ + struct occ_poll_response poll_response; + + /* Make sure OCCs are ready for communication */ + wait_for_occ_checkpoint(); + + /* Send initial poll to all OCCs to establish communication */ + poll_occ(homer, /*flush_all_errors=*/false, &poll_response); + + /* Send OCC's config data */ + send_occ_config_data(homer); + + /* Set the User PCAP */ + send_occ_user_power_cap(homer); + + /* Switch for OCC to active state */ + set_occ_active_state(homer); + + /* Hostboot sets active sensors for all OCCs here, so BMC can start + communication with OCCs. */ +} + void pm_occ_fir_init(void) { enum { From 3ad36dd4aa5908c9c4d10d62eba5c1e25cc04a0e Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Tue, 17 May 2022 21:16:45 +0300 Subject: [PATCH 089/213] soc/power9/homer.c: load OCC data into HOMER and start OCC Change-Id: I15ac1e958d2707a5f396bb7f94df76d09b72bc68 Signed-off-by: Sergii Dmytruk --- src/soc/ibm/power9/homer.c | 481 ++++++++++++++++++++++++++++++++++++- src/soc/ibm/power9/homer.h | 3 + 2 files changed, 482 insertions(+), 2 deletions(-) diff --git a/src/soc/ibm/power9/homer.c b/src/soc/ibm/power9/homer.c index f19f3d35311..f47e295659d 100644 --- a/src/soc/ibm/power9/homer.c +++ b/src/soc/ibm/power9/homer.c @@ -4,6 +4,8 @@ #include #include #include +#include +#include #include #include #include @@ -48,10 +50,55 @@ enum scom_section { STOP_SECTION_L3, }; +#define INIT_CONFIG_VALUE 0x8000000C09800000ull +#define QPMR_PROC_CONFIG_POS 0xBFC18 + /* Undocumented */ #define PU_OCB_OCI_OCCFLG2_CLEAR 0x0006C18B #define PU_PBAXCFG_SCOM 0x00068021 +/* Host configuration information passed from host to OCC */ +struct occ_host_config { + uint32_t version; // Version of this structure + + uint32_t nest_freq; // For computation of timebase frequency + + /* + * Interrupt type to the host: + * - 0x00000000 = FSI2HOST Mailbox + * - 0x00000001 = OCC interrupt line through PSIHB complex + */ + uint32_t interrupt_type; + + uint32_t is_fir_master; // If this OCC is the FIR master + + /* FIR collection configuration data needed by FIR Master OCC in the + * event of a checkstop */ + uint8_t firdataConfig[3072]; + + uint32_t is_smf_mode; // Whether SMF mode is enabled +}; + +/* Bit positions for various chiplets in host configuration vector */ +enum { + MCS_POS = 1, + MBA_POS = 9, // This is actually MCA_POS + MEM_BUF_POS = 17, + XBUS_POS = 25, + PHB_POS = 30, + CAPP_POS = 37, + OBUS_POS = 41, + ABUS_POS = 41, + NVLINK_POS = 45, + + OBUS_BRICK_0_POS = 0, + OBUS_BRICK_1_POS = 1, + OBUS_BRICK_2_POS = 2, + OBUS_BRICK_9_POS = 9, + OBUS_BRICK_10_POS = 10, + OBUS_BRICK_11_POS = 11, +}; + struct ring_data { void *rings_buf; void *work_buf1; @@ -1077,6 +1124,436 @@ static void istep_16_1(int this_core) // p9_stop_save_scom() and others } +/* Loads OCC Image from PNOR into HOMER */ +static void load_occ_image_to_homer(struct homer_st *homer) +{ + struct mmap_helper_region_device mdev = {0}; + + /* + * This will work as long as we don't call mmap(). mmap() calls + * mem_poll_alloc() which doesn't check if mdev->pool is valid or at least + * not NULL. + */ + mount_part_from_pnor("OCC", &mdev); + /* + * Common OCC area is located right after HOMER image. 0x120000 is the + * size of OCC partition in PNOR, last 0x2000 bytes aren't important? + */ + rdev_readat(&mdev.rdev, &homer->occ_host_area, 0, 1 * MiB); +} + +/* Writes information about the host to be read by OCC */ +static void load_host_data_to_homer(struct homer_st *homer) +{ + enum { + OCC_HOST_DATA_VERSION = 0x00000090, + USE_PSIHB_COMPLEX = 0x00000001, + }; + + struct occ_host_config *config_data = + (void *)&homer->occ_host_area[HOMER_OFFSET_TO_OCC_HOST_DATA]; + + config_data->version = OCC_HOST_DATA_VERSION; + config_data->nest_freq = powerbus_cfg()->fabric_freq; + config_data->interrupt_type = USE_PSIHB_COMPLEX; + config_data->is_fir_master = false; + config_data->is_smf_mode = false; +} + +static void load_pm_complex(struct homer_st *homer) +{ + /* + * Hostboot resets OCC here, but we haven't started it yet, so reset + * shouldn't be necessary. + */ + + load_occ_image_to_homer(homer); + load_host_data_to_homer(homer); +} + +static void pm_corequad_init(uint64_t cores) +{ + enum { + EQ_QPPM_QPMMR_CLEAR = 0x100F0104, + EQ_QPPM_ERR = 0x100F0121, + EQ_QPPM_ERRMSK = 0x100F0122, + C_CPPM_CPMMR_CLEAR = 0x200F0107, + C_CPPM_ERR = 0x200F0121, + C_CPPM_CSAR_CLEAR = 0x200F0139, + C_CPPM_ERRMSK = 0x200F0122, + DOORBELLS_COUNT = 4, + }; + + const uint64_t CME_DOORBELL_CLEAR[DOORBELLS_COUNT] = { + 0x200F0191, 0x200F0195, 0x200F0199, 0x200F019D + }; + + /* + * This is supposed to be stored by pm_corequad_reset() in ATTR_QUAD_PPM_ERRMASK + * and ATTR_CORE_PPM_ERRMASK. + * + * If there was no reset, maybe no need to set it? + */ + uint32_t err_mask = 0; + + for (int quad = 0; quad < MAX_QUADS_PER_CHIP; ++quad) { + chiplet_id_t quad_chiplet = EP00_CHIPLET_ID + quad; + + if (!IS_EQ_FUNCTIONAL(quad, cores)) + continue; + + /* + * Setup the Quad PPM Mode Register + * Clear the following bits: + * 0 : Force FSAFE + * 1 - 11 : FSAFE + * 12 : Enable FSAFE on heartbeat loss + * 13 : Enable DROOP protect upon heartbeat loss + * 14 : Enable PFETs upon iVRMs dropout + * 18 - 19 : PCB interrupt + * 20,22,24,26: InterPPM Ivrm/Aclk/Vdata/Dpll enable + */ + write_scom_for_chiplet(quad_chiplet, EQ_QPPM_QPMMR_CLEAR, + PPC_BIT(0) | + PPC_BITMASK(1, 11) | + PPC_BIT(12) | + PPC_BIT(13) | + PPC_BIT(14) | + PPC_BITMASK(18, 19) | + PPC_BIT(20) | + PPC_BIT(22) | + PPC_BIT(24) | + PPC_BIT(26)); + + /* Clear QUAD PPM ERROR Register */ + write_scom_for_chiplet(quad_chiplet, EQ_QPPM_ERR, 0); + + /* Restore Quad PPM Error Mask */ + err_mask = 0xFFFFFF00; // from Hostboot's log + write_scom_for_chiplet(quad_chiplet, EQ_QPPM_ERRMSK, + PPC_SHIFT(err_mask, 31)); + + for (int core = quad * 4; core < (quad + 1) * 4; ++core) { + chiplet_id_t core_chiplet = EC00_CHIPLET_ID + core; + + /* Clear the Core PPM CME DoorBells */ + for (int i = 0; i < DOORBELLS_COUNT; ++i) + write_scom_for_chiplet(core_chiplet, CME_DOORBELL_CLEAR[i], + PPC_BITMASK(0, 63)); + + /* + * Setup Core PPM Mode register + * + * Clear the following bits: + * 1 : PPM Write control override + * 11 : Block interrupts + * 12 : PPM response for CME error + * 14 : enable pece + * 15 : cme spwu done dis + * + * Other bits are Init or Reset by STOP Hcode and, thus, not touched + * here: + * 0 : PPM Write control + * 9 : FUSED_CORE_MODE + * 10 : STOP_EXIT_TYPE_SEL + * 13 : WKUP_NOTIFY_SELECT + */ + write_scom_for_chiplet(core_chiplet, C_CPPM_CPMMR_CLEAR, + PPC_BIT(1) | + PPC_BIT(11) | + PPC_BIT(12) | + PPC_BIT(14) | + PPC_BIT(15)); + + /* Clear Core PPM Errors */ + write_scom_for_chiplet(core_chiplet, C_CPPM_ERR, 0); + + /* + * Clear Hcode Error Injection and other CSAR settings: + * 27 : FIT_HCODE_ERROR_INJECT + * 28 : ENABLE_PSTATE_REGISTRATION_INTERLOCK + * 29 : DISABLE_CME_NACK_ON_PROLONGED_DROOP + * 30 : PSTATE_HCODE_ERROR_INJECT + * 31 : STOP_HCODE_ERROR_INJECT + * + * DISABLE_CME_NACK_ON_PROLONGED_DROOP is NOT cleared + * as this is a persistent, characterization setting. + */ + write_scom_for_chiplet(core_chiplet, C_CPPM_CSAR_CLEAR, + PPC_BIT(27) | + PPC_BIT(28) | + PPC_BIT(30) | + PPC_BIT(31)); + + /* Restore CORE PPM Error Mask */ + err_mask = 0xFFF00000; // from Hostboot's log + write_scom_for_chiplet(core_chiplet, C_CPPM_ERRMSK, + PPC_SHIFT(err_mask, 31)); + } + } +} + +static void pstate_gpe_init(struct homer_st *homer, uint64_t cores) +{ + enum { + /* The following constants hold approximate values */ + PGPE_TIMEOUT_MS = 500, + PGPE_POLLTIME_MS = 20, + TIMEOUT_COUNT = PGPE_TIMEOUT_MS / PGPE_POLLTIME_MS, + + EQ_QPPM_QPMMR = 0x100F0103, + + PU_GPE2_PPE_XIXCR = 0x00064010, + PU_GPE2_PPE_XIDBGPRO = 0x00064015, + PU_GPE3_PPE_XIDBGPRO = 0x00066015, + + PU_GPE2_GPEIVPR_SCOM = 0x00064001, + PU_OCB_OCI_OCCS2_SCOM = 0x0006C088, + PU_OCB_OCI_OCCFLG_SCOM2 = 0x0006C08C, + PU_GPE2_GPETSEL_SCOM = 0x00064000, + + /* OCC SCRATCH2 */ + PGPE_ACTIVE = 0, + PGPE_PSTATE_PROTOCOL_ACTIVE = 1, + + /* XSR */ + HALTED_STATE = 0, + + /* XCR */ + RESUME = 2, + TOGGLE_XSR_TRH = 4, + HARD_RESET = 6, + }; + + uint64_t occ_scratch; + /* ATTR_VDD_AVSBUS_BUSNUM */ + uint8_t avsbus_number = 0; + /* ATTR_VDD_AVSBUS_RAIL */ + uint8_t avsbus_rail = 0; + + uint64_t ivpr = 0x80000000 + offsetof(struct homer_st, ppmr.l1_bootloader); + write_scom(PU_GPE2_GPEIVPR_SCOM, ivpr << 32); + + /* Set up the OCC Scratch 2 register before PGPE boot */ + occ_scratch = read_scom(PU_OCB_OCI_OCCS2_SCOM); + occ_scratch &= ~PPC_BIT(PGPE_ACTIVE); + occ_scratch &= ~PPC_BITMASK(27, 31); + occ_scratch |= PPC_PLACE(avsbus_number, 27, 1); + occ_scratch |= PPC_PLACE(avsbus_rail, 28, 4); + write_scom(PU_OCB_OCI_OCCS2_SCOM, occ_scratch); + + write_scom(PU_GPE2_GPETSEL_SCOM, 0x1A00000000000000); + + /* OCCFLG2_PGPE_HCODE_FIT_ERR_INJ | OCCFLG2_PGPE_HCODE_PSTATE_REQ_ERR_INJ */ + write_scom(PU_OCB_OCI_OCCFLG2_CLEAR, 0x1100000000); + + printk(BIOS_ERR, "Attempting PGPE activation...\n"); + + write_scom(PU_GPE2_PPE_XIXCR, PPC_PLACE(HARD_RESET, 1, 3)); + write_scom(PU_GPE2_PPE_XIXCR, PPC_PLACE(TOGGLE_XSR_TRH, 1, 3)); + write_scom(PU_GPE2_PPE_XIXCR, PPC_PLACE(RESUME, 1, 3)); + + wait_ms(PGPE_POLLTIME_MS * TIMEOUT_COUNT, + (read_scom(PU_OCB_OCI_OCCS2_SCOM) & PPC_BIT(PGPE_ACTIVE)) || + (read_scom(PU_GPE2_PPE_XIDBGPRO) & PPC_BIT(HALTED_STATE))); + + if (read_scom(PU_OCB_OCI_OCCS2_SCOM) & PPC_BIT(PGPE_ACTIVE)) + printk(BIOS_ERR, "PGPE was activated successfully\n"); + else + die("Failed to activate PGPE\n"); + + OCCPstateParmBlock *oppb = (OCCPstateParmBlock *)homer->ppmr.occ_parm_block; + GlobalPstateParmBlock *gppb = (GlobalPstateParmBlock *) + &homer->ppmr.pgpe_sram_img[homer->ppmr.header.hcode_len]; + + uint32_t safe_mode_freq = oppb->frequency_min_khz / gppb->frequency_step_khz; + + for (int quad = 0; quad < MAX_QUADS_PER_CHIP; ++quad) { + if (!IS_EQ_FUNCTIONAL(quad, cores)) + continue; + + scom_and_or_for_chiplet(EP00_CHIPLET_ID + quad, EQ_QPPM_QPMMR, + ~PPC_BITMASK(1, 11), + PPC_SHIFT(safe_mode_freq, 11)); + } +} + +static void pm_pba_init(void) +{ + enum { + PU_PBACFG = 0x0501284B, + PU_PBAFIR = 0x05012840, + + PU_PBACFG_CHSW_DIS_GROUP_SCOPE = 38, + + /* These don't have corresponding attributes */ + PBAX_DATA_TIMEOUT = 0x0, + PBAX_SND_RETRY_COMMIT_OVERCOMMIT = 0x0, + PBAX_SND_RETRY_THRESHOLD = 0x0, + PBAX_SND_TIMEOUT = 0x0, + }; + + uint64_t data = 0; + /* Assuming all these attributes have zero values */ + uint8_t attr_pbax_groupid = 0; + uint8_t attr_pbax_chipid = 0; + uint8_t attr_pbax_broadcast_vector = 0; + + /* Assuming ATTR_CHIP_EC_FEATURE_HW423589_OPTION1 == true */ + write_scom(PU_PBACFG, PPC_BIT(PU_PBACFG_CHSW_DIS_GROUP_SCOPE)); + + write_scom(PU_PBAFIR, 0); + + data |= PPC_PLACE(attr_pbax_groupid, 4, 4); + data |= PPC_PLACE(attr_pbax_chipid, 8, 3); + data |= PPC_PLACE(attr_pbax_broadcast_vector, 12, 8); + data |= PPC_PLACE(PBAX_DATA_TIMEOUT, 20, 5); + data |= PPC_PLACE(PBAX_SND_RETRY_COMMIT_OVERCOMMIT, 27, 1); + data |= PPC_PLACE(PBAX_SND_RETRY_THRESHOLD, 28, 8); + data |= PPC_PLACE(PBAX_SND_TIMEOUT, 36, 5); + write_scom(PU_PBAXCFG_SCOM, data); +} + +static void pm_pstate_gpe_init(struct homer_st *homer, uint64_t cores) +{ + pstate_gpe_init(homer, cores); + pm_pba_init(); +} + +/* Generates host configuration vector and updates the value in HOMER */ +static void check_proc_config(struct homer_st *homer) +{ + uint64_t vector_value = INIT_CONFIG_VALUE; + uint64_t *conf_vector = (void *)((uint8_t *)&homer->qpmr + QPMR_PROC_CONFIG_POS); + + int mcs_i = 0; + + for (mcs_i = 0; mcs_i < MCS_PER_PROC; mcs_i++) { + chiplet_id_t nest = mcs_to_nest[mcs_ids[mcs_i]]; + + /* MCS_MCFGP and MCS_MCFGPM registers are undocumented, see istep 14.5. */ + if ((read_scom_for_chiplet(nest, 0x0501080A) & PPC_BIT(0)) || + (read_scom_for_chiplet(nest, 0x0501080C) & PPC_BIT(0))) { + uint8_t pos = MCS_POS + mcs_i; + *conf_vector |= PPC_BIT(pos); + + /* MCS and MBA/MCA seem to have equivalent values */ + pos = MBA_POS + mcs_i; + *conf_vector |= PPC_BIT(pos); + } + } + + /* TODO: set configuration bits for XBUS and PHB when their state is available */ + + *conf_vector = vector_value; +} + +static void pm_pss_init(void) +{ + enum { + PU_SPIPSS_ADC_CTRL_REG0 = 0x00070000, + PU_SPIPSS_ADC_WDATA_REG = 0x00070010, + PU_SPIPSS_P2S_CTRL_REG0 = 0x00070040, + PU_SPIPSS_P2S_WDATA_REG = 0x00070050, + PU_SPIPSS_100NS_REG = 0x00070028, + }; + + /* + * 0-5 frame size + * 12-17 in delay + */ + scom_and_or(PU_SPIPSS_ADC_CTRL_REG0, + ~PPC_BITMASK(0, 5) & ~PPC_BITMASK(12, 17), + PPC_SHIFT(0x20, 5)); + + /* + * 0 adc_fsm_enable = 1 + * 1 adc_device = 0 + * 2 adc_cpol = 0 + * 3 adc_cpha = 0 + * 4-13 adc_clock_divider = set to 10Mhz + * 14-17 adc_nr_of_frames = 0x10 (for auto 2 mode) + * + * Truncating last value to 4 bits gives 0. + */ + scom_and_or(PU_SPIPSS_ADC_CTRL_REG0 + 1, ~PPC_BITMASK(0, 17), + PPC_BIT(0) | PPC_SHIFT(10, 13) | PPC_SHIFT(0, 17)); + + /* + * 0-16 inter frame delay + */ + scom_and(PU_SPIPSS_ADC_CTRL_REG0 + 2, ~PPC_BITMASK(0, 16)); + + write_scom(PU_SPIPSS_ADC_WDATA_REG, 0); + + /* + * 0-5 frame size + * 12-17 in delay + */ + scom_and_or(PU_SPIPSS_P2S_CTRL_REG0, + ~PPC_BITMASK(0, 5) & ~PPC_BITMASK(12, 17), + PPC_SHIFT(0x20, 5)); + + /* + * 0 p2s_fsm_enable = 1 + * 1 p2s_device = 0 + * 2 p2s_cpol = 0 + * 3 p2s_cpha = 0 + * 4-13 p2s_clock_divider = set to 10Mhz + * 17 p2s_nr_of_frames = 1 (for auto 2 mode) + */ + scom_and_or(PU_SPIPSS_P2S_CTRL_REG0 + 1, + ~(PPC_BITMASK(0, 13) | PPC_BIT(17)), + PPC_BIT(0) | PPC_SHIFT(10, 13) | PPC_BIT(17)); + + /* + * 0-16 inter frame delay + */ + scom_and(PU_SPIPSS_P2S_CTRL_REG0 + 2, ~PPC_BITMASK(0, 16)); + + write_scom(PU_SPIPSS_P2S_WDATA_REG, 0); + + /* + * 0-31 100ns value + */ + scom_and_or(PU_SPIPSS_100NS_REG, + PPC_BITMASK(0, 31), + PPC_SHIFT(powerbus_cfg()->fabric_freq / 40, 31)); +} + +/* Initializes power-management and starts OCC */ +static void start_pm_complex(struct homer_st *homer, uint64_t cores) +{ + enum { STOP_RECOVERY_TRIGGER_ENABLE = 29 }; + + pm_corequad_init(cores); + pm_pss_init(); + pm_occ_fir_init(); + pm_pba_fir_init(); + stop_gpe_init(homer); + pm_pstate_gpe_init(homer, cores); + + check_proc_config(homer); + clear_occ_special_wakeups(cores); + special_occ_wakeup_disable(cores); + occ_start_from_mem(); + + write_scom(PU_OCB_OCI_OCCFLG2_CLEAR, PPC_BIT(STOP_RECOVERY_TRIGGER_ENABLE)); +} + +static void istep_21_1(struct homer_st *homer, uint64_t cores) +{ + load_pm_complex(homer); + + printk(BIOS_ERR, "Starting PM complex...\n"); + start_pm_complex(homer, cores); + printk(BIOS_ERR, "Done starting PM complex\n"); + + printk(BIOS_ERR, "Activating OCC...\n"); + activate_occ(homer); + printk(BIOS_ERR, "Done activating OCC\n"); +} + /* Extracts rings for a specific Programmable PowerPC-lite Engine */ static void get_ppe_scan_rings(struct xip_hw_header *hw, uint8_t dd, enum ppe_type ppe, struct ring_data *ring_data) @@ -2047,8 +2524,8 @@ void build_homer_image(void *homer_bar) */ write_scom(PU_OCB_OCI_OCCFLG2_CLEAR, PPC_BIT(30)); - // Boot the STOP GPE - stop_gpe_init(homer); + /* Boot OCC here and activate SGPE at the same time */ + istep_21_1(homer, cores); istep_16_1(this_core); } diff --git a/src/soc/ibm/power9/homer.h b/src/soc/ibm/power9/homer.h index 80cd4bfa2b7..3d6c03bdf4f 100644 --- a/src/soc/ibm/power9/homer.h +++ b/src/soc/ibm/power9/homer.h @@ -60,6 +60,9 @@ #define MAX_QUADS_PER_CHIP (MAX_CORES_PER_CHIP / 4) #define MAX_CMES_PER_CHIP (MAX_CORES_PER_CHIP / MAX_CORES_PER_EX) +/* Offset from HOMER to OCC Host Data Area */ +#define HOMER_OFFSET_TO_OCC_HOST_DATA (768 * KiB) + /* =================== QPMR =================== */ struct qpmr_header { From ab74488ae70479dc6aa1c85f619a1925ca6c1e4a Mon Sep 17 00:00:00 2001 From: Krystian Hebel Date: Tue, 7 Sep 2021 17:40:17 +0200 Subject: [PATCH 090/213] soc/power9/istep_10_13.c: first part of RNG initialization (BIST) Signed-off-by: Krystian Hebel Change-Id: I6efca330f65bfd94e1fc790c0896e0f72b1be103 --- src/include/cpu/power/istep_10.h | 8 +++ src/soc/ibm/power9/Makefile.inc | 1 + src/soc/ibm/power9/istep_10_13.c | 87 ++++++++++++++++++++++++++++++++ src/soc/ibm/power9/romstage.c | 3 ++ 4 files changed, 99 insertions(+) create mode 100644 src/include/cpu/power/istep_10.h create mode 100644 src/soc/ibm/power9/istep_10_13.c diff --git a/src/include/cpu/power/istep_10.h b/src/include/cpu/power/istep_10.h new file mode 100644 index 00000000000..f09f013b450 --- /dev/null +++ b/src/include/cpu/power/istep_10.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef CPU_PPC64_ISTEP10_H +#define CPU_PPC64_ISTEP10_H + +void istep_10_13(void); + +#endif /* CPU_PPC64_ISTEP10_H */ diff --git a/src/soc/ibm/power9/Makefile.inc b/src/soc/ibm/power9/Makefile.inc index efa9ddd7064..69f1570b68c 100644 --- a/src/soc/ibm/power9/Makefile.inc +++ b/src/soc/ibm/power9/Makefile.inc @@ -9,6 +9,7 @@ romstage-y += romstage.c romstage-y += mvpd.c romstage-y += vpd.c romstage-y += powerbus.c +romstage-y += istep_10_13.c romstage-y += istep_13_2.c romstage-y += istep_13_3.c romstage-y += istep_13_4.c diff --git a/src/soc/ibm/power9/istep_10_13.c b/src/soc/ibm/power9/istep_10_13.c new file mode 100644 index 00000000000..b05b6ddef24 --- /dev/null +++ b/src/soc/ibm/power9/istep_10_13.c @@ -0,0 +1,87 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include + +/* + * 10.13 host_rng_bist: Trigger Built In Self Test for RNG + * + * a) p9_rng_init_phase1.C + * - Trigger the Random Number Generator Built In Self Test (BIST). Results + * are checked later in step 16 when RNG is secured + */ +void istep_10_13(void) +{ + printk(BIOS_EMERG, "starting istep 10.13\n"); + + report_istep(10, 13); + + /* Assume DD2.0 or newer */ + + /* PU_NX_RNG_CFG + [44] COND_STARTUP_TEST_FAIL + */ + if (read_scom_for_chiplet(N0_CHIPLET_ID, 0x020110E0) & PPC_BIT(44)) + die("RNG Conditioner startup test failed\n"); + + /* PU_NX_RNG_ST0 + [0-1] REPTEST_MATCH_TH = 0x1 (3 repeated numbers) + [7-8] ADAPTEST_SAMPLE_SIZE = 0x2 (8b wide sample) + [9-11] ADAPTEST_WINDOW_SIZE = 0x1 (512 size) + [12-23] ADAPTEST_RRN_RNG0_MATCH_TH = 0x32 (50; Assuming H = 5) + [24-35] ADAPTEST_RRN_RNG1_MATCH_TH = 0x32 (50; Assuming H = 5) + [36-47] ADAPTEST_CRN_RNG0_MATCH_TH = 0x32 (50; Assuming H = 5) + [48-59] ADAPTEST_CRN_RNG1_MATCH_TH = 0x32 (50; Assuming H = 5) + */ + scom_and_or_for_chiplet(N0_CHIPLET_ID, 0x020110E1, + ~(PPC_BITMASK(0, 1) | PPC_BITMASK(7, 63)), + PPC_SHIFT(1, 1) | PPC_SHIFT(2, 8) | PPC_SHIFT(1, 11) + | PPC_SHIFT(0x32, 23) | PPC_SHIFT(0x32, 35) + | PPC_SHIFT(0x32, 47) | PPC_SHIFT(0x32, 59)); + + /* PU_NX_RNG_ST1 + [0-6] ADAPTEST_SOFT_FAIL_TH = 2 + [7-22] ADAPTEST_1BIT_MATCH_TH_MIN = 100 + [23-38] ADAPTEST_1BIT_MATCH_TH_MAX = 415 + */ + scom_and_or_for_chiplet(N0_CHIPLET_ID, 0x020110E2, ~PPC_BITMASK(0, 38), + PPC_SHIFT(2, 6) | PPC_SHIFT(100, 22) + | PPC_SHIFT(415, 38)); + + /* PU_NX_RNG_ST3 + [0] SAMPTEST_RRN_ENABLE = 1 + [1-3] SAMPTEST_WINDOW_SIZE = 7 (64k -1 size) + [4-19] SAMPTEST_MATCH_TH_MIN = 0x6D60 (28,000) + [20-35] SAMPTEST_MATCH_TH_MAX = 0x988A (39,050) + */ + scom_and_or_for_chiplet(N0_CHIPLET_ID, 0x020110E8, ~PPC_BITMASK(0, 35), + PPC_BIT(0) | PPC_SHIFT(7, 3) | PPC_SHIFT(0x6D60, 19) + | PPC_SHIFT(0x988A, 35)); + + /* PU_NX_RNG_RDELAY + [6] LFSR_RESEED_EN = 1 + [7-11] READ_RTY_RATIO = 0x1D (1/16) + */ + scom_and_or_for_chiplet(N0_CHIPLET_ID, 0x020110E5, ~PPC_BITMASK(6, 11), + PPC_BIT(6) | PPC_SHIFT(0x1D, 11)); + + /* PU_NX_RNG_CFG + [30-37] ST2_RESET_PERIOD = 0x1B + [39] MASK_TOGGLE_ENABLE = 0 + [40] SAMPTEST_ENABLE = 1 + [41] REPTEST_ENABLE = 1 + [42] ADAPTEST_1BIT_ENABLE = 1 + [43] ADAPTEST_ENABLE = 1 + [46-61] PACE_RATE = 0x07D0 (2000) + [63] ENABLE = 1 + */ + scom_and_or_for_chiplet(N0_CHIPLET_ID, 0x020110E0, + ~(PPC_BITMASK(30, 37) | PPC_BITMASK(39, 43) + | PPC_BITMASK(46, 61) | PPC_BIT(63)), + PPC_SHIFT(0x1B, 37) | PPC_BIT(40) | PPC_BIT(41) + | PPC_BIT(42) | PPC_BIT(43) | PPC_SHIFT(0x07D0, 61) + | PPC_BIT(63)); + + printk(BIOS_EMERG, "ending istep 10.13\n"); +} diff --git a/src/soc/ibm/power9/romstage.c b/src/soc/ibm/power9/romstage.c index 0a9b3c084dd..a9fe17e05eb 100644 --- a/src/soc/ibm/power9/romstage.c +++ b/src/soc/ibm/power9/romstage.c @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -336,6 +337,8 @@ void main(void) console_init(); + istep_10_13(); + timestamp_add_now(TS_INITRAM_START); vpd_pnor_main(); From 812ad19baef9ed91c178ae900573173bb5a603cf Mon Sep 17 00:00:00 2001 From: Krystian Hebel Date: Tue, 14 Sep 2021 16:39:18 +0200 Subject: [PATCH 091/213] soc/power9/chip.c: second phase of RNG initialization Signed-off-by: Krystian Hebel Change-Id: Ic034bb7c4d935f086b7b4889afaecc5455f524fb --- src/soc/ibm/power9/chip.c | 42 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/src/soc/ibm/power9/chip.c b/src/soc/ibm/power9/chip.c index b3fbb048e02..567fe296c5d 100644 --- a/src/soc/ibm/power9/chip.c +++ b/src/soc/ibm/power9/chip.c @@ -1,5 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ +#include #include #include #include @@ -28,6 +29,46 @@ static inline unsigned long size_k(uint64_t reg) return CONVERT_4GB_TO_KB(((reg & SIZE_MASK) >> SIZE_SHIFT) + 1); } +static void rng_init(void) +{ + /* + * RNG is allowed to run for M cycles (M = enough time to complete init; + * recommend 1 second of time). + * + * The only thing that ensures this is delay between istep 10.13 and now. + * 14.1 is the most time-consuming istep, its duration depends on the amount + * of installed RAM under the bigger of MCBISTs (i.e. sides of CPU on the + * board). This is more than enough in Hostboot. + * + * TODO: test if this is enough for coreboot with initial ECC scrubbing + * skipped, low amount of RAM and no debug output. + */ + /* NX.PBI.PBI_RNG.NX_RNG_CFG + * [0-9] FAIL_REG - abort if any of these bits is set + * [17] BIST_COMPLETE - should be 1 at this point + */ + uint64_t rng_status = read_scom(0x020110E0); + assert(rng_status & PPC_BIT(17)); + while (!((rng_status = read_scom(0x020110E0)) & PPC_BIT(17))); + + if (rng_status & PPC_BITMASK(0, 9)) + die("RNG initialization failed, NX_RNG_CFG = %#16.16llx\n", rng_status); + + /* + * Hostboot sets 'enable' bit again even though it was already set. + * Following that behavior just in case. + */ + write_scom(0x020110E0, rng_status | PPC_BIT(63)); + + /* + * This would be the place to set BARs, but it is done as part of quad SCOM + * restore. + */ + + /* Lock NX RNG configuration */ + scom_or(0x00010005, PPC_BIT(9)); +} + static void enable_soc_dev(struct device *dev) { int mcs_i, idx = 0; @@ -65,6 +106,7 @@ static void enable_soc_dev(struct device *dev) reserved_ram_resource_kb(dev, idx++, top, reserved_size); build_homer_image((void *)(top * 1024)); + rng_init(); istep_18_11(); istep_18_12(); } From 0c8307243a270d7a81df4d259a6b514e6512355c Mon Sep 17 00:00:00 2001 From: Igor Bagnucki Date: Tue, 8 Jun 2021 12:52:40 +0200 Subject: [PATCH 092/213] soc/power9: add DT to CBFS, load and pass it to payload Signed-off-by: Igor Bagnucki Signed-off-by: Sergii Dmytruk Change-Id: I0bdac7ca3125fe299c6f3fae56fcd9034b567f1c --- src/mainboard/raptor-cs/talos-2/1-cpu.dts | 636 ++++++++++++++++++++++ src/mainboard/raptor-cs/talos-2/Kconfig | 1 + src/soc/ibm/power9/Makefile.inc | 10 + src/soc/ibm/power9/chip.c | 29 + 4 files changed, 676 insertions(+) create mode 100644 src/mainboard/raptor-cs/talos-2/1-cpu.dts diff --git a/src/mainboard/raptor-cs/talos-2/1-cpu.dts b/src/mainboard/raptor-cs/talos-2/1-cpu.dts new file mode 100644 index 00000000000..7b8124ae925 --- /dev/null +++ b/src/mainboard/raptor-cs/talos-2/1-cpu.dts @@ -0,0 +1,636 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +/* This is a base DT common to 1- and 2-CPU configurations */ + +/dts-v1/; + +/ { + #address-cells = <0x02>; + #size-cells = <0x02>; + compatible = "ibm,powernv\0ibm,p9-openbmc\0rcs,talos"; + nest-frequency = <0x00 0x6f38e680>; + vendor = "IBM"; + ibm,sw-checkstop-fir = <0x5012000 0x1f>; + model = "T2P9D01 REV 1.00"; /* TODO: unhardcode */ + /* TODO: system-id = "A1234567" */ + model-name = "rcs,talos"; + + bmc { + #address-cells = <0x01>; + #size-cells = <0x00>; + compatible = "ibm,ast2500,openbmc"; + + sensors { + #address-cells = <0x01>; + #size-cells = <0x00>; + + sensor@1 { + compatible = "ibm,ipmi-sensor"; + reg = <0x01>; + ipmi-sensor-type = <0x22>; + }; + + sensor@2 { + compatible = "ibm,ipmi-sensor"; + reg = <0x02>; + ipmi-sensor-type = <0x0f>; + }; + + sensor@3 { + compatible = "ibm,ipmi-sensor"; + reg = <0x03>; + ipmi-sensor-type = <0x07>; + ibm,chip-id = <0x00>; + }; + + sensor@6 { + compatible = "ibm,ipmi-sensor"; + reg = <0x06>; + ipmi-sensor-type = <0x01>; + ibm,chip-id = <0x00>; + }; + + sensor@8 { + compatible = "ibm,ipmi-sensor"; + reg = <0x08>; + ipmi-sensor-type = <0x07>; + ibm,chip-id = <0x00>; + }; + + sensor@a { + compatible = "ibm,ipmi-sensor"; + reg = <0x0a>; + ipmi-sensor-type = <0xc1>; + }; + + sensor@b { + compatible = "ibm,ipmi-sensor"; + reg = <0x0b>; + ipmi-sensor-type = <0x0c>; + }; + + sensor@d { + compatible = "ibm,ipmi-sensor"; + reg = <0x0d>; + ipmi-sensor-type = <0x0c>; + }; + + sensor@f { + compatible = "ibm,ipmi-sensor"; + reg = <0x0f>; + ipmi-sensor-type = <0x0c>; + }; + + sensor@11 { + compatible = "ibm,ipmi-sensor"; + reg = <0x11>; + ipmi-sensor-type = <0x0c>; + }; + + sensor@1b { + compatible = "ibm,ipmi-sensor"; + reg = <0x1b>; + ipmi-sensor-type = <0x01>; + }; + + sensor@1d { + compatible = "ibm,ipmi-sensor"; + reg = <0x1d>; + ipmi-sensor-type = <0x01>; + }; + + sensor@1f { + compatible = "ibm,ipmi-sensor"; + reg = <0x1f>; + ipmi-sensor-type = <0x01>; + }; + + sensor@21 { + compatible = "ibm,ipmi-sensor"; + reg = <0x21>; + ipmi-sensor-type = <0x01>; + }; + + sensor@8b { + compatible = "ibm,ipmi-sensor"; + reg = <0x8b>; + ipmi-sensor-type = <0xc3>; + }; + + sensor@8c { + compatible = "ibm,ipmi-sensor"; + reg = <0x8c>; + ipmi-sensor-type = <0xc7>; + }; + + sensor@8d { + compatible = "ibm,ipmi-sensor"; + reg = <0x8d>; + ipmi-sensor-type = <0xc7>; + }; + + sensor@8e { + compatible = "ibm,ipmi-sensor"; + reg = <0x8e>; + ipmi-sensor-type = <0xc7>; + }; + + sensor@8f { + compatible = "ibm,ipmi-sensor"; + reg = <0x8f>; + ipmi-sensor-type = <0xc7>; + }; + + sensor@90 { + compatible = "ibm,ipmi-sensor"; + reg = <0x90>; + ipmi-sensor-type = <0x12>; + }; + + sensor@91 { + compatible = "ibm,ipmi-sensor"; + reg = <0x91>; + ipmi-sensor-type = <0x1f>; + }; + + sensor@92 { + compatible = "ibm,ipmi-sensor"; + reg = <0x92>; + ipmi-sensor-type = <0xc4>; + }; + + sensor@93 { + compatible = "ibm,ipmi-sensor"; + reg = <0x93>; + ipmi-sensor-type = <0xc7>; + }; + + sensor@94 { + compatible = "ibm,ipmi-sensor"; + reg = <0x94>; + ipmi-sensor-type = <0xc2>; + }; + + sensor@95 { + compatible = "ibm,ipmi-sensor"; + reg = <0x95>; + ipmi-sensor-type = <0xca>; + }; + + sensor@96 { + compatible = "ibm,ipmi-sensor"; + reg = <0x96>; + ipmi-sensor-type = <0xc8>; + }; + + sensor@97 { + compatible = "ibm,ipmi-sensor"; + reg = <0x97>; + ipmi-sensor-type = <0xc6>; + }; + }; + }; + + cpus { + #address-cells = <0x01>; + #size-cells = <0x00>; + }; + + ibm,opal { + + leds { + led-mode = "lightpath"; + }; + + power-mgt { + ibm,enabled-stop-levels = <0xec000000>; + }; + + }; + + ibm,pcie-slots { + #address-cells = <0x02>; + #size-cells = <0x00>; + + root-complex@0,0 { + reg = <0x00 0x00>; + #address-cells = <0x02>; + #size-cells = <0x00>; + compatible = "ibm,pcie-port\0ibm,pcie-root-port"; + ibm,chip-id = <0x00>; + ibm,slot-label = "SLOT3"; + + pluggable { + mrw-slot-id = <0x03>; + lane-mask = <0xffff>; + ibm,slot-label = "SLOT3"; + }; + }; + + root-complex@0,1 { + reg = <0x00 0x01>; + #address-cells = <0x02>; + #size-cells = <0x00>; + compatible = "ibm,pcie-port\0ibm,pcie-root-port"; + ibm,chip-id = <0x00>; + lane-mask = <0xff00>; + lanes-reversed = <0x1111>; + + builtin { + }; + }; + + root-complex@0,2 { + reg = <0x00 0x02>; + #address-cells = <0x02>; + #size-cells = <0x00>; + compatible = "ibm,pcie-port\0ibm,pcie-root-port"; + ibm,chip-id = <0x00>; + lane-mask = <0xff>; + + builtin { + }; + }; + + root-complex@0,3 { + reg = <0x00 0x03>; + #address-cells = <0x02>; + #size-cells = <0x00>; + compatible = "ibm,pcie-port\0ibm,pcie-root-port"; + ibm,chip-id = <0x00>; + ibm,slot-label = "SLOT1"; + + pluggable { + mrw-slot-id = <0x01>; + lane-mask = <0xff00>; + ibm,slot-label = "SLOT1"; + }; + }; + + root-complex@0,4 { + reg = <0x00 0x04>; + #address-cells = <0x02>; + #size-cells = <0x00>; + compatible = "ibm,pcie-port\0ibm,pcie-root-port"; + ibm,chip-id = <0x00>; + lane-mask = <0xf0>; + + switch-up@10b5,8725 { + reg = <0x10b5 0x8725>; + #address-cells = <0x01>; + #size-cells = <0x00>; + upstream-port = <0x00>; + ibm,pluggable; + + down-port@a { + compatible = "ibm,pcie-port"; + reg = <0x0a>; + ibm,pluggable; + ibm,slot-label = "GPU0"; + + builtin { + ibm,slot-label = "GPU0"; + }; + }; + + down-port@b { + compatible = "ibm,pcie-port"; + reg = <0x0b>; + ibm,pluggable; + ibm,slot-label = "GPU1"; + + builtin { + ibm,slot-label = "GPU1"; + }; + }; + + down-port@c { + compatible = "ibm,pcie-port"; + reg = <0x0c>; + ibm,pluggable; + ibm,slot-label = "GPU2"; + + builtin { + ibm,slot-label = "GPU2"; + }; + }; + }; + }; + + root-complex@0,5 { + reg = <0x00 0x05>; + #address-cells = <0x02>; + #size-cells = <0x00>; + compatible = "ibm,pcie-port\0ibm,pcie-root-port"; + ibm,chip-id = <0x00>; + lane-mask = <0x0f>; + + builtin { + }; + }; + }; + + lpcm-opb@6030000000000 { + #address-cells = <0x01>; + #size-cells = <0x01>; + compatible = "ibm,power9-lpcm-opb\0simple-bus"; + reg = <0x60300 0x00 0x01 0x00>; + ibm,chip-id = <0x00>; + ranges = <0x00 0x60300 0x00 0x80000000 0x80000000 0x60300 0x80000000 0x80000000>; + + lpc-controller@c0012000 { + compatible = "ibm,power9-lpc-controller"; + reg = <0xc0012000 0x100>; + }; + + lpc@0 { + #address-cells = <0x02>; + #size-cells = <0x01>; + compatible = "ibm,power9-lpc\0ibm,power8-lpc"; + ranges = <0x00 0x00 0xe0000000 0x10000000 0x01 0x00 0xd0010000 0x10000 0x03 0x00 0xf0000000 0x10000000>; + + serial@i3f8 { + reg = <0x01 0x3f8 0x01>; + compatible = "ns16550"; + current-speed = <0x1c200>; + clock-frequency = <0x1c2000>; + interrupts = <0x04>; + device_type = "serial"; + }; + }; + + opb-arbiter@c0011000 { + compatible = "ibm,power9-lpcm-opb-arbiter"; + reg = <0xc0011000 0x08>; + }; + + opb-master@c0010000 { + compatible = "ibm,power9-lpcm-opb-master"; + reg = <0xc0010000 0x60>; + }; + }; + + xscom@603fc00000000 { + ibm,chip-id = <0x00>; + ibm,proc-chip-id = <0x00>; + #address-cells = <0x01>; + #size-cells = <0x01>; + scom-controller; + compatible = "ibm,xscom\0ibm,power9-xscom"; + reg = <0x603fc 0x00 0x08 0x00>; + bus-frequency = <0x00 0x1bce39a0>; + primary; + + chiptod@40000 { + reg = <0x40000 0x34>; + compatible = "ibm,power-chiptod\0ibm,power9-chiptod"; + primary; + }; + + i2cm@a1000 { + reg = <0xa1000 0x1000>; + compatible = "ibm,power8-i2cm\0ibm,power9-i2cm"; + #size-cells = <0x00>; + #address-cells = <0x01>; + chip-engine# = <0x01>; + clock-frequency = <0x6f38e68>; + + i2c-bus@0 { + reg = <0x00>; + #size-cells = <0x00>; + #address-cells = <0x01>; + compatible = "ibm,opal-i2c\0ibm,power8-i2c-port\0ibm,power9-i2c-port"; + bus-frequency = <0xf4240>; + + eeprom@50 { + reg = <0x50>; + link-id = <0x02>; + compatible = "atmel,24c512"; + label = "module-vpd"; + }; + }; + + i2c-bus@2 { + reg = <0x02>; + #size-cells = <0x00>; + #address-cells = <0x01>; + compatible = "ibm,opal-i2c\0ibm,power8-i2c-port\0ibm,power9-i2c-port"; + bus-frequency = <0xf4240>; + + eeprom@50 { + reg = <0x50>; + link-id = <0x04>; + compatible = "atmel,24c128"; + label = "module-vpd"; + }; + }; + }; + + i2cm@a2000 { + reg = <0xa2000 0x1000>; + compatible = "ibm,power8-i2cm\0ibm,power9-i2cm"; + #size-cells = <0x00>; + #address-cells = <0x01>; + chip-engine# = <0x02>; + clock-frequency = <0x6f38e68>; + + i2c-bus@0 { + reg = <0x00>; + #size-cells = <0x00>; + #address-cells = <0x01>; + compatible = "ibm,opal-i2c\0ibm,power8-i2c-port\0ibm,power9-i2c-port"; + bus-frequency = <0x61a80>; + + eeprom@50 { + reg = <0x50>; + link-id = <0x06>; + compatible = "atmel,24c128"; + label = "module-vpd"; + }; + }; + }; + + i2cm@a3000 { + reg = <0xa3000 0x1000>; + compatible = "ibm,power8-i2cm\0ibm,power9-i2cm"; + #size-cells = <0x00>; + #address-cells = <0x01>; + chip-engine# = <0x03>; + clock-frequency = <0x6f38e68>; + + i2c-bus@0 { + reg = <0x00>; + #size-cells = <0x00>; + #address-cells = <0x01>; + compatible = "ibm,opal-i2c\0ibm,power8-i2c-port\0ibm,power9-i2c-port"; + bus-frequency = <0x61a80>; + + eeprom@50 { + compatible = "spd"; + label = "dimm-spd"; + link-id = <0x12>; + reg = <0x50>; + }; + + eeprom@51 { + compatible = "spd"; + label = "dimm-spd"; + link-id = <0x0c>; + reg = <0x51>; + }; + + eeprom@52 { + compatible = "spd"; + label = "dimm-spd"; + link-id = <0x01>; + reg = <0x52>; + }; + + eeprom@53 { + compatible = "spd"; + label = "dimm-spd"; + link-id = <0x03>; + reg = <0x53>; + }; + }; + + i2c-bus@1 { + reg = <0x01>; + #size-cells = <0x00>; + #address-cells = <0x01>; + compatible = "ibm,opal-i2c\0ibm,power8-i2c-port\0ibm,power9-i2c-port"; + bus-frequency = <0x61a80>; + + eeprom@54 { + compatible = "spd"; + label = "dimm-spd"; + link-id = <0x0d>; + reg = <0x54>; + }; + + eeprom@55 { + compatible = "spd"; + label = "dimm-spd"; + link-id = <0x0e>; + reg = <0x55>; + }; + + eeprom@56 { + compatible = "spd"; + label = "dimm-spd"; + link-id = <0x10>; + reg = <0x56>; + }; + + eeprom@57 { + compatible = "spd"; + label = "dimm-spd"; + link-id = <0x00>; + reg = <0x57>; + }; + }; + }; + + nmmu@5012c40 { + compatible = "ibm,power9-nest-mmu"; + reg = <0x5012c40 0x20>; + }; + + nx@2010000 { + compatible = "ibm,power9-nx"; + reg = <0x2010000 0x4000>; + }; + + pbcq@4010c00 { + reg = <0x4010c00 0x100 0xd010800 0x200>; + compatible = "ibm,power9-pbcq"; + ibm,pec-index = <0x00>; + #address-cells = <0x01>; + #size-cells = <0x00>; + ibm,hub-id = <0x00>; + + stack@0 { + reg = <0x00>; + ibm,phb-index = <0x00>; + compatible = "ibm,power9-phb-stack"; + status = "okay"; + ibm,lane-eq = <0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x77777777 0x77777777 0x77777777 0x77777777>; + }; + }; + + pbcq@4011000 { + reg = <0x4011000 0x100 0xe010800 0x200>; + compatible = "ibm,power9-pbcq"; + ibm,pec-index = <0x01>; + #address-cells = <0x01>; + #size-cells = <0x00>; + ibm,hub-id = <0x00>; + + stack@0 { + reg = <0x00>; + ibm,phb-index = <0x01>; + compatible = "ibm,power9-phb-stack"; + status = "okay"; + ibm,lane-eq = <0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x77777777 0x77777777 0x77777777 0x77777777>; + }; + + stack@1 { + reg = <0x01>; + ibm,phb-index = <0x02>; + compatible = "ibm,power9-phb-stack"; + status = "okay"; + ibm,lane-eq = <0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x77777777 0x77777777 0x77777777 0x77777777>; + }; + }; + + pbcq@4011400 { + reg = <0x4011400 0x100 0xf010800 0x200>; + compatible = "ibm,power9-pbcq"; + ibm,pec-index = <0x02>; + #address-cells = <0x01>; + #size-cells = <0x00>; + ibm,hub-id = <0x00>; + + stack@0 { + reg = <0x00>; + ibm,phb-index = <0x03>; + compatible = "ibm,power9-phb-stack"; + status = "okay"; + ibm,lane-eq = <0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x77777777 0x77777777 0x77777777 0x77777777>; + }; + + stack@1 { + reg = <0x01>; + ibm,phb-index = <0x04>; + compatible = "ibm,power9-phb-stack"; + status = "okay"; + ibm,lane-eq = <0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x77777777 0x77777777 0x77777777 0x77777777>; + }; + + stack@2 { + reg = <0x02>; + ibm,phb-index = <0x05>; + compatible = "ibm,power9-phb-stack"; + status = "okay"; + ibm,lane-eq = <0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x77777777 0x77777777 0x77777777 0x77777777>; + }; + }; + + psihb@5012900 { + reg = <0x5012900 0x100>; + compatible = "ibm,power9-psihb-x\0ibm,psihb-x"; + }; + + vas@3011800 { + reg = <0x3011800 0x300>; + compatible = "ibm,power9-vas-x"; + ibm,vas-id = <0x00>; + }; + + xive@5013000 { + reg = <0x5013000 0x300>; + compatible = "ibm,power9-xive-x"; + force-assign-bars; + }; + }; +}; diff --git a/src/mainboard/raptor-cs/talos-2/Kconfig b/src/mainboard/raptor-cs/talos-2/Kconfig index 898159be1db..43c1852b1d1 100644 --- a/src/mainboard/raptor-cs/talos-2/Kconfig +++ b/src/mainboard/raptor-cs/talos-2/Kconfig @@ -24,6 +24,7 @@ config BOARD_SPECIFIC_OPTIONS select MISSING_BOARD_RESET select HAVE_DEBUG_RAM_SETUP select IPMI_BT + select FLATTENED_DEVICE_TREE config MEMLAYOUT_LD_FILE string diff --git a/src/soc/ibm/power9/Makefile.inc b/src/soc/ibm/power9/Makefile.inc index 69f1570b68c..6c1ccd48579 100644 --- a/src/soc/ibm/power9/Makefile.inc +++ b/src/soc/ibm/power9/Makefile.inc @@ -42,4 +42,14 @@ ramstage-y += int_vectors.S ramstage-y += i2c.c ramstage-y += occ.c +MB_DIR = src/mainboard/$(MAINBOARDDIR) +ONECPU_DTB = 1-cpu.dtb + +$(obj)/%.dtb: $(MB_DIR)/%.dts + dtc -I dts -O dtb -o $@ -i $(MB_DIR) $< + +cbfs-files-y += $(ONECPU_DTB) +$(ONECPU_DTB)-file := $(obj)/$(ONECPU_DTB) +$(ONECPU_DTB)-type := raw + endif diff --git a/src/soc/ibm/power9/chip.c b/src/soc/ibm/power9/chip.c index 567fe296c5d..40094d70f56 100644 --- a/src/soc/ibm/power9/chip.c +++ b/src/soc/ibm/power9/chip.c @@ -2,8 +2,10 @@ #include #include +#include #include #include +#include #include #include @@ -158,8 +160,35 @@ static void activate_slave_cores(void) } } +static void *load_fdt(const char *dtb_file) +{ + void *fdt; + void *fdt_rom; + struct device_tree *tree; + + fdt_rom = cbfs_map(dtb_file, NULL); + if (fdt_rom == NULL) + die("Unable to load %s from CBFS\n", dtb_file); + + tree = fdt_unflatten(fdt_rom); + + fdt = malloc(dt_flat_size(tree)); + if (fdt == NULL) + die("Unable to allocate memory for flat device tree\n"); + + dt_flatten(tree, fdt); + return fdt; +} + void platform_prog_run(struct prog *prog) { + void *fdt; + + fdt = load_fdt("1-cpu.dtb"); + + /* See asm/head.S in skiboot where fdt_entry starts at offset 0x10 */ + prog_set_entry(prog, prog_start(prog) + 0x10, fdt); + /* * Clear SMS_ATN aka EVT_ATN in BT_CTRL - Block Transfer IPMI protocol * From 328143f4100e5262739107d7013c5b4c0a691c97 Mon Sep 17 00:00:00 2001 From: Krystian Hebel Date: Tue, 14 Sep 2021 16:16:13 +0200 Subject: [PATCH 093/213] soc/power9/chip.c: generate info about available cores for device tree Signed-off-by: Krystian Hebel Change-Id: I6b9b5355464a0637187609e5bb32d01c6175e75d --- src/soc/ibm/power9/chip.c | 303 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 303 insertions(+) diff --git a/src/soc/ibm/power9/chip.c b/src/soc/ibm/power9/chip.c index 40094d70f56..89da3e4e121 100644 --- a/src/soc/ibm/power9/chip.c +++ b/src/soc/ibm/power9/chip.c @@ -8,11 +8,215 @@ #include #include #include +#include +#include // xzalloc #include "homer.h" #include "istep_13_scom.h" #include "chip.h" +/* + * These are various definitions of the page sizes and segment sizes supported + * by the MMU. Values are the same as dumped from original firmware, comments + * are copied from Hostboot for POWER8. Compared to POWER8, POWER9 doesn't have + * 1M entries in segment page sizes. + */ +static uint32_t page_sizes[4] = { 0xC, 0x10, 0x18, 0x22 }; +static uint32_t segment_sizes[4] = { 0x1C, 0x28, 0xFFFFFFFF, 0xFFFFFFFF }; +static uint32_t segment_page_sizes[] = +{ + 12, 0x0, 3, /* 4k SLB page size, L,LP = 0,x1, 3 page size encodings */ + 12, 0x0, /* 4K PTE page size, L,LP = 0,x0 */ + 16, 0x7, /* 64K PTE page size, L,LP = 1,x7 */ + 24, 0x38, /* 16M PTE page size, L,LP = 1,x38 */ + 16, 0x110, 2, /* 64K SLB page size, L,LP = 1,x1, 2 page size encodings*/ + 16, 0x1, /* 64K PTE page size, L,LP = 1,x1 */ + 24, 0x8, /* 16M PTE page size, L,LP = 1,x8 */ + 24, 0x100, 1, /* 16M SLB page size, L,LP = 1,x0, 1 page size encoding */ + 24, 0x0, /* 16M PTE page size, L,LP = 1,x0 */ + 34, 0x120, 1, /* 16G SLB page size, L,LP = 1,x2, 1 page size encoding */ + 34, 0x3 /* 16G PTE page size, L,LP = 1,x3 */ +}; +static uint32_t radix_AP_enc[4] = { 0x0C, 0xA0000010, 0x20000015, 0x4000001E }; + +/* + * Dumped from Hostboot, might need reviewing. Comment in + * skiboot/external/mambo/skiboot.tcl says that PAPR defines up to byte 63 (plus + * 2 bytes for header), but the newest version I found describes only up to byte + * number 23 (Revision 2.9_pre7 from June 11, 2020). + */ +static uint8_t pa_features[] = +{ + 64, 0, /* Header: size and format, respectively */ + 0xF6, 0x3F, 0xC7, 0xC0, 0x80, 0xD0, 0x80, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x00, 0x00, + 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x00, 0x00, + 0x80, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00, + 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, + 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, + 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00 +}; + +static void fill_l3_node(struct device_tree_node *node, uint32_t phandle, + uint32_t pir) +{ + node->phandle = phandle; + dt_add_u32_prop(node, "phandle", phandle); + dt_add_u32_prop(node, "reg", pir); + dt_add_string_prop(node, "device_type", "cache"); + dt_add_bin_prop(node, "cache-unified", NULL, 0); + dt_add_string_prop(node, "status", "okay"); + + /* POWER9 Processor User's Manual, 7.3 */ + dt_add_u32_prop(node, "d-cache-size", 10 * MiB); + dt_add_u32_prop(node, "d-cache-sets", 8); /* Per Hostboot. Why not 20? */ + dt_add_u32_prop(node, "i-cache-size", 10 * MiB); + dt_add_u32_prop(node, "i-cache-sets", 8); /* Per Hostboot. Why not 20? */ +} + +static void fill_l2_node(struct device_tree_node *node, uint32_t phandle, + uint32_t pir, uint32_t next_lvl_phandle) +{ + node->phandle = phandle; + dt_add_u32_prop(node, "phandle", phandle); + /* This is not a typo, "l2-cache" points to the node of L3 cache */ + dt_add_u32_prop(node, "l2-cache", next_lvl_phandle); + dt_add_u32_prop(node, "reg", pir); + dt_add_string_prop(node, "device_type", "cache"); + dt_add_bin_prop(node, "cache-unified", NULL, 0); + dt_add_string_prop(node, "status", "okay"); + + /* POWER9 Processor User's Manual, 6.1 */ + dt_add_u32_prop(node, "d-cache-size", 512 * KiB); + dt_add_u32_prop(node, "d-cache-sets", 8); + dt_add_u32_prop(node, "i-cache-size", 512 * KiB); + dt_add_u32_prop(node, "i-cache-sets", 8); +} + +static void fill_cpu_node(struct device_tree_node *node, uint32_t phandle, + uint32_t pir, uint32_t next_lvl_phandle) +{ + /* Mandatory/standard properties */ + node->phandle = phandle; + dt_add_u32_prop(node, "phandle", phandle); + dt_add_string_prop(node, "device_type", "cpu"); + dt_add_bin_prop(node, "64-bit", NULL, 0); + dt_add_bin_prop(node, "32-64-bridge", NULL, 0); + dt_add_bin_prop(node, "graphics", NULL, 0); + dt_add_bin_prop(node, "general-purpose", NULL, 0); + dt_add_u32_prop(node, "l2-cache", next_lvl_phandle); + + /* + * The "status" property indicate whether the core is functional. It's + * a string containing "okay" for a good core or "bad" for a non-functional + * one. You can also just ommit the non-functional ones from the DT + */ + dt_add_string_prop(node, "status", "okay"); + + /* + * This is the same value as the PIR of thread 0 of that core + * (ie same as the @xx part of the node name) + */ + dt_add_u32_prop(node, "reg", pir); + dt_add_u32_prop(node, "ibm,pir", pir); + + /* Chip ID of this core */ + dt_add_u32_prop(node, "ibm,chip-id", 0); /* FIXME for second CPU */ + + /* + * Interrupt server numbers (aka HW processor numbers) of all threads + * on that core. This should have 4 numbers and the first one should + * have the same value as the above ibm,pir and reg properties + */ + uint32_t int_srvrs[4] = {pir, pir+1, pir+2, pir+3}; + /* + * This will be added to actual FDT later, so local array on stack can't + * be used. + */ + void *int_srvrs_ptr = xmalloc(sizeof(int_srvrs)); + memcpy(int_srvrs_ptr, int_srvrs, sizeof(int_srvrs)); + dt_add_bin_prop(node, "ibm,ppc-interrupt-server#s", int_srvrs_ptr, + sizeof(int_srvrs)); + + /* + * This is the "architected processor version" as defined in PAPR. + */ + dt_add_u32_prop(node, "cpu-version", read_spr(SPR_PVR)); + + /* + * Page sizes and segment sizes supported by the MMU. + */ + dt_add_bin_prop(node, "ibm,processor-page-sizes", &page_sizes, + sizeof(page_sizes)); + dt_add_bin_prop(node, "ibm,processor-segment-sizes", &segment_sizes, + sizeof(segment_sizes)); + dt_add_bin_prop(node, "ibm,segment-page-sizes", &segment_page_sizes, + sizeof(segment_page_sizes)); + dt_add_bin_prop(node, "ibm,processor-radix-AP-encodings", &radix_AP_enc, + sizeof(radix_AP_enc)); + + dt_add_bin_prop(node, "ibm,pa-features", &pa_features, + sizeof(pa_features)); + + /* SLB size, use as-is */ + dt_add_u32_prop(node, "ibm,slb-size", 0x20); + + /* VSX support, use as-is */ + dt_add_u32_prop(node, "ibm,vmx", 0x2); + + /* DFP support, use as-is */ + dt_add_u32_prop(node, "ibm,dfp", 0x2); + + /* PURR/SPURR support, use as-is */ + dt_add_u32_prop(node, "ibm,purr", 0x1); + dt_add_u32_prop(node, "ibm,spurr", 0x1); + + /* + * FIXME: un-hardcode. This is either nominal or safe mode frequency, + * depending on whether OCC has been started successfully. + */ + uint64_t clock_freq = 2700ULL * MHz; + /* + * Old-style core clock frequency. Only create this property if the + * frequency fits in a 32-bit number. Do not create it if it doesn't. + */ + if ((clock_freq >> 32) == 0) + dt_add_u32_prop(node, "clock-frequency", clock_freq); + + /* + * Mandatory: 64-bit version of the core clock frequency, always create + * this property. + */ + dt_add_u64_prop(node, "ibm,extended-clock-frequency", clock_freq); + + /* Timebase freq has a fixed value, always use that */ + dt_add_u32_prop(node, "timebase-frequency", 512 * MHz); + /* extended-timebase-frequency will be deprecated at some point */ + dt_add_u64_prop(node, "ibm,extended-timebase-frequency", 512 * MHz); + + /* Use as-is, values dumped from booted system */ + dt_add_u32_prop(node, "reservation-granule-size", 0x80); + dt_add_u64_prop(node, "performance-monitor", 1); + /* POWER9 Processor User's Manual, 2.3.1 */ + dt_add_u32_prop(node, "i-cache-size", 32 * KiB); + dt_add_u32_prop(node, "i-cache-sets", 8); + dt_add_u32_prop(node, "i-cache-block-size", 128); + dt_add_u32_prop(node, "i-cache-line-size", 128); // Makes Linux happier + dt_add_u32_prop(node, "i-tlb-size", 0); + dt_add_u32_prop(node, "i-tlb-sets", 0); + /* POWER9 Processor User's Manual, 2.3.5 */ + dt_add_u32_prop(node, "d-cache-size", 32 * KiB); + dt_add_u32_prop(node, "d-cache-sets", 8); + dt_add_u32_prop(node, "d-cache-block-size", 128); + dt_add_u32_prop(node, "d-cache-line-size", 128); // Makes Linux happier + /* POWER9 Processor User's Manual, 2.3.7 */ + dt_add_u32_prop(node, "d-tlb-size", 1024); + dt_add_u32_prop(node, "d-tlb-sets", 4); + dt_add_u32_prop(node, "tlb-size", 1024); + dt_add_u32_prop(node, "tlb-sets", 4); +} + #define SIZE_MASK PPC_BITMASK(13,23) #define SIZE_SHIFT (63 - 23) #define BASE_MASK PPC_BITMASK(24,47) @@ -31,6 +235,103 @@ static inline unsigned long size_k(uint64_t reg) return CONVERT_4GB_TO_KB(((reg & SIZE_MASK) >> SIZE_SHIFT) + 1); } +static int dt_platform_update(struct device_tree *tree) +{ + struct device_tree_node *node, *cpus; + uint64_t cores = read_scom(0x0006C090); + assert(cores != 0); + + /* Find "cpus" node, create if necessary */ + cpus = dt_find_node_by_path(tree, "/cpus", NULL, NULL, 1); + assert(cpus != NULL); + + /* + * First remove all existing "cpu" nodes, then add ours. + * + * TODO: check if any other node relies on phandles of "cpu" or "cache" + * nodes + */ + list_for_each(node, cpus->children, list_node) { + list_remove(&node->list_node); + } + + for (int core_id = 0; core_id <= 24; core_id++) { + if (IS_EC_FUNCTIONAL(core_id, cores)) { + /* + * Not sure who is the original author of this comment, it is + * duplicated in Hostboot and Skiboot, and now also here. It + * lacks one important piece of information: PIR is PIR value + * of thread 0 of _first_ core in pair, both for L2 and L3. + */ + /* + * Cache nodes. Those are siblings of the processor nodes under /cpus and + * represent the various level of caches. + * + * The unit address (and reg property) is mostly free-for-all as long as + * there is no collisions. On HDAT machines we use the following encoding + * which I encourage you to also follow to limit surprises: + * + * L2 : (0x20 << 24) | PIR (PIR is PIR value of thread 0 of core) + * L3 : (0x30 << 24) | PIR + * L3.5 : (0x35 << 24) | PIR + * + * In addition, each cache points to the next level cache via its + * own "l2-cache" (or "next-level-cache") property, so the core node + * points to the L2, the L2 points to the L3 etc... + */ + uint32_t pir = core_id * 4; + uint32_t l2_pir = (0x20 << 24) | (pir & ~7); + uint32_t l3_pir = (0x30 << 24) | (pir & ~7); + /* "/cpus/l?-cache@12345678" -> 23 characters + terminator */ + char l2_path[24]; + char l3_path[24]; + snprintf(l2_path, sizeof(l2_path), "/cpus/l%d-cache@%x", 2, l2_pir); + snprintf(l3_path, sizeof(l3_path), "/cpus/l%d-cache@%x", 3, l3_pir); + + /* + * 21 for "/cpus/PowerPC,POWER9@", 4 for PIR just in case (2nd CPU), + * 1 for terminator + */ + char cpu_path[26]; + snprintf(cpu_path, sizeof(cpu_path), "/cpus/PowerPC,POWER9@%x", pir); + + struct device_tree_node *l2_node = + dt_find_node_by_path(tree, l2_path, NULL, NULL, 1); + struct device_tree_node *l3_node = + dt_find_node_by_path(tree, l3_path, NULL, NULL, 1); + struct device_tree_node *cpu_node = + dt_find_node_by_path(tree, cpu_path, NULL, NULL, 1); + + /* + * Cache nodes may already be created if this is the second active + * core in a pair. If L3 node doesn't exist, L2 also doesn't - they + * are created at the same time, no need to test both. + */ + if (!l3_node->phandle) { + fill_l3_node(l3_node, ++tree->max_phandle, l3_pir); + fill_l2_node(l2_node, ++tree->max_phandle, l2_pir, + l3_node->phandle); + } + + fill_cpu_node(cpu_node, ++tree->max_phandle, pir, l2_node->phandle); + } + } + + /* Debug for Skiroot's kernel. TODO: make this a config option? */ + node = dt_find_node_by_path(tree, "/chosen", NULL, NULL, 1); + dt_add_string_prop(node, "bootargs", "console=hvc0"); + + /* Will be created by Skiboot. TODO: remove from dts */ + node = dt_find_node_by_path(tree, "/ibm,opal", NULL, NULL, 0); + if (node) + list_remove(&node->list_node); + + node = dt_find_node_by_path(tree, "/ibm,opal/power-mgt", NULL, NULL, 1); + dt_add_u32_prop(node, "ibm,enabled-stop-levels", 0xEC000000); + + return 0; +} + static void rng_init(void) { /* @@ -172,6 +473,8 @@ static void *load_fdt(const char *dtb_file) tree = fdt_unflatten(fdt_rom); + dt_platform_update(tree); + fdt = malloc(dt_flat_size(tree)); if (fdt == NULL) die("Unable to allocate memory for flat device tree\n"); From 1cef9258df5f3df86e3c57bd78fe3b620da2f480 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Thu, 21 Oct 2021 20:02:31 +0300 Subject: [PATCH 094/213] soc/power9/: implement istep 10.10 Change-Id: I7475668e43f7da1bb7ed9bb4da0f1090e35a9ff0 Signed-off-by: Sergii Dmytruk --- src/include/cpu/power/istep_10.h | 3 + src/soc/ibm/power9/Makefile.inc | 1 + src/soc/ibm/power9/istep_10_10.c | 599 +++++++++++++++++++++++++++++++ src/soc/ibm/power9/pci.h | 20 ++ src/soc/ibm/power9/romstage.c | 6 + 5 files changed, 629 insertions(+) create mode 100644 src/soc/ibm/power9/istep_10_10.c create mode 100644 src/soc/ibm/power9/pci.h diff --git a/src/include/cpu/power/istep_10.h b/src/include/cpu/power/istep_10.h index f09f013b450..b4c349e72ac 100644 --- a/src/include/cpu/power/istep_10.h +++ b/src/include/cpu/power/istep_10.h @@ -3,6 +3,9 @@ #ifndef CPU_PPC64_ISTEP10_H #define CPU_PPC64_ISTEP10_H +#include + +void istep_10_10(uint8_t *phb_active_mask, uint8_t *iovalid_enable); void istep_10_13(void); #endif /* CPU_PPC64_ISTEP10_H */ diff --git a/src/soc/ibm/power9/Makefile.inc b/src/soc/ibm/power9/Makefile.inc index 6c1ccd48579..10bdf3d02a0 100644 --- a/src/soc/ibm/power9/Makefile.inc +++ b/src/soc/ibm/power9/Makefile.inc @@ -9,6 +9,7 @@ romstage-y += romstage.c romstage-y += mvpd.c romstage-y += vpd.c romstage-y += powerbus.c +romstage-y += istep_10_10.c romstage-y += istep_10_13.c romstage-y += istep_13_2.c romstage-y += istep_13_3.c diff --git a/src/soc/ibm/power9/istep_10_10.c b/src/soc/ibm/power9/istep_10_10.c new file mode 100644 index 00000000000..5b408ece84d --- /dev/null +++ b/src/soc/ibm/power9/istep_10_10.c @@ -0,0 +1,599 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include + +#include +#include +#include +#include +#include +#include + +#include "pci.h" + +#define MAX_LANE_GROUPS_PER_PEC 4 + +#define NUM_PCIE_LANES 16 +#define NUM_PCS_CONFIG 4 + +/* Enum indicating lane width (units = "number of lanes") */ +enum lane_width { + LANE_WIDTH_NC = 0, + LANE_WIDTH_4X = 4, + LANE_WIDTH_8X = 8, + LANE_WIDTH_16X = 16 +}; + +enum lane_mask { + LANE_MASK_X16 = 0xFFFF, + LANE_MASK_X8_GRP0 = 0xFF00, + LANE_MASK_X8_GRP1 = 0x00FF, + LANE_MASK_X4_GRP0 = 0x00F0, + LANE_MASK_X4_GRP1 = 0x000F, +}; + +/* Enumeration of PHB to PCI MAC mappings */ +enum phb_to_mac { + PHB_X16_MAC_MAP = 0x0000, + PHB_X8_X8_MAC_MAP = 0x0050, + PHB_X8_X4_X4_MAC_MAP = 0x0090, +}; + +/* + * Bit position of the PHB with the largest number a given PEC can use + * (see enum phb_active_mask for bit values). + */ +enum pec_phb_shift { + PEC0_PHB_SHIFT = 7, // PHB0 only + PEC1_PHB_SHIFT = 5, // PHB1 - PHB2 + PEC2_PHB_SHIFT = 2, // PHB3 - PHB5 +}; + +/* + * Struct for each row in PCIE IOP configuration table. + * Used by code to compute the IOP config and PHBs active mask. + */ +struct lane_config_row { + /* + * Grouping of lanes under one IOP. + * Value signifies width of each PCIE lane set (0, 4, 8, or 16). + */ + uint8_t lane_set[MAX_LANE_GROUPS_PER_PEC]; // enum lane_width + + /* IOP config value from PCIE IOP configuration table */ + uint8_t lane_config; + + /* PHB active mask (see phb_active_mask enum) */ + uint8_t phb_active; + + uint16_t phb_to_pcie_mac; // enum phb_to_mac +}; + +/* + * Currently there are three PEC config tables for procs with 48 usable PCIE + * lanes. In general, the code accumulates the current configuration of + * the PECs from the MRW and other dynamic information (such as bifurcation) + * then matches that config to one of the rows in the table. Once a match + * is discovered, the PEC config value is pulled from the matching row for + * future use. + * + * Each PEC can control up to 16 lanes: + * - PEC0 can give 16 lanes to PHB0 + * - PEC1 can split 16 lanes between PHB1 & PHB2 + * - PEC2 can split 16 lanes between PHB3, PHB4 & PHB5 + */ +static const struct lane_config_row pec0_lane_cfg[] = { + { + { LANE_WIDTH_NC, LANE_WIDTH_NC, LANE_WIDTH_NC, LANE_WIDTH_NC }, + 0x00, + PHB_MASK_NA, + PHB_X16_MAC_MAP + }, + { + { LANE_WIDTH_16X, LANE_WIDTH_NC, LANE_WIDTH_NC, LANE_WIDTH_NC }, + 0x00, + PHB0_MASK, + PHB_X16_MAC_MAP + }, +}; +static const struct lane_config_row pec1_lane_cfg[] = { + { + { LANE_WIDTH_NC, LANE_WIDTH_NC, LANE_WIDTH_NC, LANE_WIDTH_NC }, + 0x00, + PHB_MASK_NA, + PHB_X8_X8_MAC_MAP + }, + { + { LANE_WIDTH_8X, LANE_WIDTH_NC, LANE_WIDTH_8X, LANE_WIDTH_NC }, + 0x00, + PHB1_MASK | PHB2_MASK, + PHB_X8_X8_MAC_MAP + }, + { + { LANE_WIDTH_8X, LANE_WIDTH_NC, LANE_WIDTH_NC, LANE_WIDTH_NC }, + 0x00, + PHB1_MASK, + PHB_X8_X8_MAC_MAP + }, + { + { LANE_WIDTH_NC, LANE_WIDTH_NC, LANE_WIDTH_8X, LANE_WIDTH_NC }, + 0x00, + PHB2_MASK, + PHB_X8_X8_MAC_MAP + }, +}; +static const struct lane_config_row pec2_lane_cfg[] = { + { + { LANE_WIDTH_NC, LANE_WIDTH_NC, LANE_WIDTH_NC, LANE_WIDTH_NC }, + 0x00, + PHB_MASK_NA, + PHB_X16_MAC_MAP + }, + { + { LANE_WIDTH_16X, LANE_WIDTH_NC, LANE_WIDTH_NC, LANE_WIDTH_NC }, + 0x00, + PHB3_MASK, + PHB_X16_MAC_MAP + }, + { + { LANE_WIDTH_8X, LANE_WIDTH_NC, LANE_WIDTH_8X, LANE_WIDTH_NC }, + 0x10, + PHB3_MASK | PHB4_MASK, + PHB_X8_X8_MAC_MAP + }, + { + { LANE_WIDTH_8X, LANE_WIDTH_NC, LANE_WIDTH_4X, LANE_WIDTH_4X }, + 0x20, + PHB3_MASK | PHB4_MASK | PHB5_MASK, + PHB_X8_X4_X4_MAC_MAP + }, +}; + +static const struct lane_config_row *pec_lane_cfgs[] = { + pec0_lane_cfg, + pec1_lane_cfg, + pec2_lane_cfg +}; +static const size_t pec_lane_cfg_sizes[] = { + ARRAY_SIZE(pec0_lane_cfg), + ARRAY_SIZE(pec1_lane_cfg), + ARRAY_SIZE(pec2_lane_cfg) +}; + +/* + * PEC_PCIE_LANE_MASK_NON_BIFURCATED in processed talos.xml for the first + * processor chip. Values correspond to lane_width enumeration. + */ +static uint16_t lane_masks[MAX_PEC_PER_PROC][MAX_LANE_GROUPS_PER_PEC] = { + { LANE_MASK_X16, 0x0, 0x0, 0x0 }, + { LANE_MASK_X8_GRP0, 0x0, LANE_MASK_X8_GRP1, 0x0 }, + { LANE_MASK_X8_GRP0, 0x0, LANE_MASK_X4_GRP0, LANE_MASK_X4_GRP1 }, +}; + +static const uint64_t RX_VGA_CTRL3_REGISTER[NUM_PCIE_LANES] = { + 0x8000008D0D010C3F, + 0x800000CD0D010C3F, + 0x8000018D0D010C3F, + 0x800001CD0D010C3F, + 0x8000028D0D010C3F, + 0x800002CD0D010C3F, + 0x8000038D0D010C3F, + 0x800003CD0D010C3F, + 0x8000088D0D010C3F, + 0x800008CD0D010C3F, + 0x8000098D0D010C3F, + 0x800009CD0D010C3F, + 0x80000A8D0D010C3F, + 0x80000ACD0D010C3F, + 0x80000B8D0D010C3F, + 0x80000BCD0D010C3F, +}; + +static const uint64_t RX_LOFF_CNTL_REGISTER[NUM_PCIE_LANES] = { + 0x800000A60D010C3F, + 0x800000E60D010C3F, + 0x800001A60D010C3F, + 0x800001E60D010C3F, + 0x800002A60D010C3F, + 0x800002E60D010C3F, + 0x800003A60D010C3F, + 0x800003E60D010C3F, + 0x800008A60D010C3F, + 0x800008E60D010C3F, + 0x800009A60D010C3F, + 0x800009E60D010C3F, + 0x80000AA60D010C3F, + 0x80000AE60D010C3F, + 0x80000BA60D010C3F, + 0x80000BE60D010C3F, +}; + +static enum lane_width lane_mask_to_width(uint16_t mask) +{ + enum lane_width width = LANE_WIDTH_NC; + + if (mask == LANE_MASK_X16) + width = LANE_WIDTH_16X; + else if (mask == LANE_MASK_X8_GRP0 || mask == LANE_MASK_X8_GRP1) + width = LANE_WIDTH_8X; + else if (mask == LANE_MASK_X4_GRP0 || mask == LANE_MASK_X4_GRP1) + width = LANE_WIDTH_4X; + + return width; +} + +static void determine_lane_configs(uint8_t *phb_active_mask, + const struct lane_config_row **pec_cfgs) +{ + uint8_t pec = 0; + + *phb_active_mask = 0; + + for (pec = 0; pec < MAX_PEC_PER_PROC; ++pec) { + uint8_t i; + uint8_t lane_group; + + struct lane_config_row config = { + { LANE_WIDTH_NC, LANE_WIDTH_NC, LANE_WIDTH_NC, LANE_WIDTH_NC }, + 0x00, + PHB_MASK_NA, + PHB_X16_MAC_MAP, + }; + + /* Transform effective config to match lane config table format */ + for (lane_group = 0; lane_group < MAX_LANE_GROUPS_PER_PEC; ++lane_group) { + config.lane_set[lane_group] = + lane_mask_to_width(lane_masks[pec][lane_group]); + } + + for (i = 0; i < pec_lane_cfg_sizes[pec]; ++i) { + if (memcmp(pec_lane_cfgs[pec][i].lane_set, &config.lane_set, + sizeof(config.lane_set)) == 0) + break; + } + + if (i == pec_lane_cfg_sizes[pec]) + die("Failed to find PCIE IOP configuration for PEC%d\n", pec); + + *phb_active_mask |= pec_lane_cfgs[pec][i].phb_active; + + pec_cfgs[pec] = &pec_lane_cfgs[pec][i]; + + /* + * In the rest of the PCIe-related code the following PEC attributes have these + * values: + * - PEC[ATTR_PROC_PCIE_IOP_CONFIG] := pec_cfgs[pec]->lane_config + * - PEC[ATTR_PROC_PCIE_REFCLOCK_ENABLE] := 1 + * - PEC[ATTR_PROC_PCIE_PCS_SYSTEM_CNTL] := pec_cfgs[pec]->phb_to_pcie_mac + */ + } +} + +static uint64_t pec_val(int pec_id, uint8_t in, + uint32_t pec0_s, uint32_t pec0_c, + uint32_t pec1_s, uint32_t pec1_c, + uint32_t pec2_s, uint32_t pec2_c) +{ + uint64_t out = 0; + + switch (pec_id) { + case 0: + out = PPC_SHIFT(in & ((1 << pec0_c) - 1), pec0_s + pec0_c - 1); + break; + case 1: + out = PPC_SHIFT(in & ((1 << pec1_c) - 1), pec1_s + pec1_c - 1); + break; + case 2: + out = PPC_SHIFT(in & ((1 << pec2_c) - 1), pec2_s + pec2_c - 1); + break; + default: + die("Unknown PEC ID: %d\n", pec_id); + } + + return out; +} + +static void phase1(const struct lane_config_row **pec_cfgs, + const uint8_t *iovalid_enable) +{ + enum { + PEC_CPLT_CONF1_OR = 0x0D000019, + PEC_CPLT_CTRL0_OR = 0x0D000010, + PEC_CPLT_CONF1_CLEAR = 0x0D000029, + + PEC_PCS_RX_ROT_CNTL_REG = 0x800004820D010C3F, + PEC_PCS_RX_CONFIG_MODE_REG = 0x800004800D010C3F, + PEC_PCS_RX_CDR_GAIN_REG = 0x800004B30D010C3F, + PEC_PCS_RX_SIGDET_CONTROL_REG = 0x800004A70D010C3F, + + PCI_IOP_FIR_ACTION0_REG = 0x0000000000000000ULL, + PCI_IOP_FIR_ACTION1_REG = 0xE000000000000000ULL, + PCI_IOP_FIR_MASK_REG = 0x1FFFFFFFF8000000ULL, + + PEC_FIR_ACTION0_REG = 0x0D010C06, + PEC_FIR_ACTION1_REG = 0x0D010C07, + PEC_FIR_MASK_REG = 0x0D010C03, + + PEC0_IOP_CONFIG_START_BIT = 13, + PEC1_IOP_CONFIG_START_BIT = 14, + PEC2_IOP_CONFIG_START_BIT = 10, + PEC0_IOP_BIT_COUNT = 1, + PEC1_IOP_BIT_COUNT = 2, + PEC2_IOP_BIT_COUNT = 3, + PEC0_IOP_SWAP_START_BIT = 12, + PEC1_IOP_SWAP_START_BIT = 12, + PEC2_IOP_SWAP_START_BIT = 7, + PEC0_IOP_IOVALID_ENABLE_START_BIT = 4, + PEC1_IOP_IOVALID_ENABLE_START_BIT = 4, + PEC2_IOP_IOVALID_ENABLE_START_BIT = 4, + PEC_IOP_IOVALID_ENABLE_STACK0_BIT = 4, + PEC_IOP_IOVALID_ENABLE_STACK1_BIT = 5, + PEC_IOP_IOVALID_ENABLE_STACK2_BIT = 6, + PEC_IOP_REFCLOCK_ENABLE_START_BIT = 32, + PEC_IOP_PMA_RESET_START_BIT = 29, + PEC_IOP_PIPE_RESET_START_BIT = 28, + + PEC_PCS_PCLCK_CNTL_PLLA_REG = 0x8000050F0D010C3F, + PEC_PCS_PCLCK_CNTL_PLLB_REG = 0x8000054F0D010C3F, + PEC_PCS_TX_DCLCK_ROTATOR_REG = 0x800004450D010C3F, + PEC_PCS_TX_PCIE_REC_DETECT_CNTL1_REG = 0x8000046C0D010C3F, + PEC_PCS_TX_PCIE_REC_DETECT_CNTL2_REG = 0x8000046D0D010C3F, + PEC_PCS_TX_POWER_SEQ_ENABLE_REG = 0x800004700D010C3F, + + PEC_SCOM0X0B_EDMOD = 52, + + PEC_PCS_RX_VGA_CONTROL1_REG = 0x8000048B0D010C3F, + PEC_PCS_RX_VGA_CONTROL2_REG = 0x8000048C0D010C3F, + PEC_IOP_RX_DFE_FUNC_REGISTER1 = 0x8000049F0D010C3F, + PEC_PCS_SYS_CONTROL_REG = 0x80000C000D010C3F, + + PEC_PCS_M1_CONTROL_REG = 0x80000C010D010C3F, + PEC_PCS_M2_CONTROL_REG = 0x80000C020D010C3F, + PEC_PCS_M3_CONTROL_REG = 0x80000C030D010C3F, + PEC_PCS_M4_CONTROL_REG = 0x80000C040D010C3F, + }; + + uint8_t pec = 0; + + for (pec = 0; pec < MAX_PEC_PER_PROC; ++pec) { + long time; + uint8_t i; + uint64_t val; + uint8_t proc_pcie_iop_swap; + + chiplet_id_t chiplet = PCI0_CHIPLET_ID + pec; + + /* ATTR_PROC_PCIE_PCS_RX_CDR_GAIN, from talos.xml */ + uint8_t pcs_cdr_gain[] = { 0x56, 0x47, 0x47, 0x47 }; + /* ATTR_PROC_PCIE_PCS_RX_INIT_GAIN, all zeroes by default */ + uint8_t pcs_init_gain = 0; + /* ATTR_PROC_PCIE_PCS_RX_PK_INIT, all zeroes by default */ + uint8_t pcs_pk_init = 0; + /* ATTR_PROC_PCIE_PCS_RX_SIGDET_LVL, defaults and talos.xml */ + uint8_t pcs_sigdet_lvl = 0x0B; + + uint32_t pcs_config_mode[NUM_PCS_CONFIG] = { 0xA006, 0xA805, 0xB071, 0xB870 }; + + /* Phase1 init step 1 (get VPD, no operation here) */ + + /* Phase1 init step 2a */ + val = pec_val(pec, pec_cfgs[pec]->lane_config, + PEC0_IOP_CONFIG_START_BIT, PEC0_IOP_BIT_COUNT * 2, + PEC1_IOP_CONFIG_START_BIT, PEC1_IOP_BIT_COUNT * 2, + PEC2_IOP_CONFIG_START_BIT, PEC2_IOP_BIT_COUNT * 2); + write_scom_for_chiplet(chiplet, PEC_CPLT_CONF1_OR, val); + + /* Phase1 init step 2b */ + + /* ATTR_PROC_PCIE_IOP_SWAP from processed talos.xml for first proc */ + proc_pcie_iop_swap = (pec == 0); + + val = pec_val(pec, proc_pcie_iop_swap, + PEC0_IOP_SWAP_START_BIT, PEC0_IOP_BIT_COUNT, + PEC1_IOP_SWAP_START_BIT, PEC1_IOP_BIT_COUNT, + PEC2_IOP_SWAP_START_BIT, PEC2_IOP_BIT_COUNT); + write_scom_for_chiplet(chiplet, PEC_CPLT_CONF1_OR, val); + + /* Phase1 init step 3a */ + + val = pec_val(pec, iovalid_enable[pec], + PEC0_IOP_IOVALID_ENABLE_START_BIT, PEC0_IOP_BIT_COUNT, + PEC1_IOP_IOVALID_ENABLE_START_BIT, PEC1_IOP_BIT_COUNT, + PEC2_IOP_IOVALID_ENABLE_START_BIT, PEC2_IOP_BIT_COUNT); + + /* Set IOVALID for base PHB if PHB2, or PHB4, or PHB5 are set (SW417485) */ + if ((val & PPC_BIT(PEC_IOP_IOVALID_ENABLE_STACK1_BIT)) || + (val & PPC_BIT(PEC_IOP_IOVALID_ENABLE_STACK2_BIT))) { + val |= PPC_BIT(PEC_IOP_IOVALID_ENABLE_STACK0_BIT); + val |= PPC_BIT(PEC_IOP_IOVALID_ENABLE_STACK1_BIT); + } + + write_scom_for_chiplet(chiplet, PEC_CPLT_CONF1_OR, val); + + /* Phase1 init step 3b (enable clock) */ + /* ATTR_PROC_PCIE_REFCLOCK_ENABLE, all PECs are enabled. */ + write_scom_for_chiplet(chiplet, PEC_CPLT_CTRL0_OR, + PPC_BIT(PEC_IOP_REFCLOCK_ENABLE_START_BIT)); + + /* Phase1 init step 4 (PMA reset) */ + + write_scom_for_chiplet(chiplet, PEC_CPLT_CONF1_CLEAR, + PPC_BIT(PEC_IOP_PMA_RESET_START_BIT)); + udelay(1); /* at least 400ns */ + write_scom_for_chiplet(chiplet, PEC_CPLT_CONF1_OR, + PPC_BIT(PEC_IOP_PMA_RESET_START_BIT)); + udelay(1); /* at least 400ns */ + write_scom_for_chiplet(chiplet, PEC_CPLT_CONF1_CLEAR, + PPC_BIT(PEC_IOP_PMA_RESET_START_BIT)); + + /* + * Poll for PRTREADY status on PLLA and PLLB: + * PEC_IOP_PLLA_VCO_COURSE_CAL_REGISTER1 = 0x800005010D010C3F + * PEC_IOP_PLLB_VCO_COURSE_CAL_REGISTER1 = 0x800005410D010C3F + * PEC_IOP_HSS_PORT_READY_START_BIT = 58 + */ + time = wait_us(40, + (read_scom_for_chiplet(chiplet, 0x800005010D010C3F) & PPC_BIT(58)) || + (read_scom_for_chiplet(chiplet, 0x800005410D010C3F) & PPC_BIT(58))); + if (!time) + die("IOP HSS Port Ready status is not set!"); + + /* Phase1 init step 5 (Set IOP FIR action0) */ + write_scom_for_chiplet(chiplet, PEC_FIR_ACTION0_REG, PCI_IOP_FIR_ACTION0_REG); + + /* Phase1 init step 6 (Set IOP FIR action1) */ + write_scom_for_chiplet(chiplet, PEC_FIR_ACTION1_REG, PCI_IOP_FIR_ACTION1_REG); + + /* Phase1 init step 7 (Set IOP FIR mask) */ + write_scom_for_chiplet(chiplet, PEC_FIR_MASK_REG, PCI_IOP_FIR_MASK_REG); + + /* Phase1 init step 8-11 (Config 0 - 3) */ + + for (i = 0; i < NUM_PCS_CONFIG; ++i) { + uint8_t lane; + + /* RX Config Mode */ + write_scom_for_chiplet(chiplet, PEC_PCS_RX_CONFIG_MODE_REG, + pcs_config_mode[i]); + + /* RX CDR GAIN */ + scom_and_or_for_chiplet(chiplet, PEC_PCS_RX_CDR_GAIN_REG, + ~PPC_BITMASK(56, 63), + pcs_cdr_gain[i]); + + for (lane = 0; lane < NUM_PCIE_LANES; ++lane) { + /* RX INITGAIN */ + scom_and_or_for_chiplet(chiplet, RX_VGA_CTRL3_REGISTER[lane], + ~PPC_BITMASK(48, 52), + PPC_SHIFT(pcs_init_gain, 52)); + + /* RX PKINIT */ + scom_and_or_for_chiplet(chiplet, RX_LOFF_CNTL_REGISTER[lane], + ~PPC_BITMASK(58, 63), + pcs_pk_init); + } + + /* RX SIGDET LVL */ + scom_and_or_for_chiplet(chiplet, PEC_PCS_RX_SIGDET_CONTROL_REG, + ~PPC_BITMASK(59, 63), + pcs_sigdet_lvl); + } + + /* + * Phase1 init step 12 (RX Rot Cntl CDR Lookahead Disabled, SSC Disabled) + * + * All these attributes are zero for Nimbus: + * - ATTR_PROC_PCIE_PCS_RX_ROT_CDR_LOOKAHEAD (55) + * - ATTR_PROC_PCIE_PCS_RX_ROT_CDR_SSC (63) + * - ATTR_PROC_PCIE_PCS_RX_ROT_EXTEL (59) + * - ATTR_PROC_PCIE_PCS_RX_ROT_RST_FW (62) + */ + scom_and_for_chiplet(chiplet, PEC_PCS_RX_ROT_CNTL_REG, + ~(PPC_BIT(55) | PPC_BIT(63) | PPC_BIT(59) | PPC_BIT(62))); + + /* Phase1 init step 13 (RX Config Mode Enable External Config Control) */ + write_scom_for_chiplet(chiplet, PEC_PCS_RX_CONFIG_MODE_REG, 0x8600); + + /* Phase1 init step 14 (PCLCK Control Register - PLLA) */ + /* ATTR_PROC_PCIE_PCS_PCLCK_CNTL_PLLA = 0xF8 */ + scom_and_or_for_chiplet(chiplet, PEC_PCS_PCLCK_CNTL_PLLA_REG, + ~PPC_BITMASK(56, 63), + 0xF8); + + /* Phase1 init step 15 (PCLCK Control Register - PLLB) */ + /* ATTR_PROC_PCIE_PCS_PCLCK_CNTL_PLLB = 0xF8 */ + scom_and_or_for_chiplet(chiplet, PEC_PCS_PCLCK_CNTL_PLLB_REG, + ~PPC_BITMASK(56, 63), + 0xF8); + + /* Phase1 init step 16 (TX DCLCK Rotator Override) */ + /* ATTR_PROC_PCIE_PCS_TX_DCLCK_ROT = 0x0022 */ + write_scom_for_chiplet(chiplet, PEC_PCS_TX_DCLCK_ROTATOR_REG, 0x0022); + + /* Phase1 init step 17 (TX PCIe Receiver Detect Control Register 1) */ + /* ATTR_PROC_PCIE_PCS_TX_PCIE_RECV_DETECT_CNTL_REG1 = 0xAA7A */ + write_scom_for_chiplet(chiplet, PEC_PCS_TX_PCIE_REC_DETECT_CNTL1_REG, 0xaa7a); + + /* Phase1 init step 18 (TX PCIe Receiver Detect Control Register 2) */ + /* ATTR_PROC_PCIE_PCS_TX_PCIE_RECV_DETECT_CNTL_REG2 = 0x2000 */ + write_scom_for_chiplet(chiplet, PEC_PCS_TX_PCIE_REC_DETECT_CNTL2_REG, 0x2000); + + /* Phase1 init step 19 (TX Power Sequence Enable) */ + /* ATTR_PROC_PCIE_PCS_TX_POWER_SEQ_ENABLE = 0xFF, but field is 7 bits */ + scom_and_or_for_chiplet(chiplet, PEC_PCS_TX_POWER_SEQ_ENABLE_REG, + ~PPC_BITMASK(56, 62), + PPC_SHIFT(0x7F, 62)); + + /* Phase1 init step 20 (RX VGA Control Register 1) */ + + /* ATTR_PROC_PCIE_PCS_RX_VGA_CNTL_REG1 = 0 */ + val = 0; + + /* ATTR_CHIP_EC_FEATURE_HW414759 = 0, so not setting PEC_SCOM0X0B_EDMOD */ + + write_scom_for_chiplet(chiplet, PEC_PCS_RX_VGA_CONTROL1_REG, val); + + /* Phase1 init step 21 (RX VGA Control Register 2) */ + /* ATTR_PROC_PCIE_PCS_RX_VGA_CNTL_REG2 = 0 */ + write_scom_for_chiplet(chiplet, PEC_PCS_RX_VGA_CONTROL2_REG, 0); + + /* Phase1 init step 22 (RX DFE Func Control Register 1) */ + /* ATTR_PROC_PCIE_PCS_RX_DFE_FDDC = 1 */ + scom_or_for_chiplet(chiplet, PEC_IOP_RX_DFE_FUNC_REGISTER1, PPC_BIT(50)); + + /* Phase1 init step 23 (PCS System Control) */ + /* ATTR_PROC_PCIE_PCS_SYSTEM_CNTL computed above */ + scom_and_or_for_chiplet(chiplet, PEC_PCS_SYS_CONTROL_REG, + ~PPC_BITMASK(55, 63), + pec_cfgs[pec]->phb_to_pcie_mac); + + /* + * All values in ATTR_PROC_PCIE_PCS_M_CNTL are 0. + * Hostboot has bugs here in that it updates PEC_PCS_M1_CONTROL_REG + * 4 times instead of updating 4 different registers (M1-M4). + */ + + /* Phase1 init step 24 (PCS M1 Control) */ + scom_and_for_chiplet(chiplet, PEC_PCS_M1_CONTROL_REG, ~PPC_BITMASK(55, 63)); + /* Phase1 init step 25 (PCS M2 Control) */ + scom_and_for_chiplet(chiplet, PEC_PCS_M2_CONTROL_REG, ~PPC_BITMASK(55, 63)); + /* Phase1 init step 26 (PCS M3 Control) */ + scom_and_for_chiplet(chiplet, PEC_PCS_M3_CONTROL_REG, ~PPC_BITMASK(55, 63)); + /* Phase1 init step 27 (PCS M4 Control) */ + scom_and_for_chiplet(chiplet, PEC_PCS_M4_CONTROL_REG, ~PPC_BITMASK(55, 63)); + + /* Delay a minimum of 200ns to allow prior SCOM programming to take effect */ + udelay(1); + + /* Phase1 init step 28 */ + write_scom_for_chiplet(chiplet, PEC_CPLT_CONF1_CLEAR, + PPC_BIT(PEC_IOP_PIPE_RESET_START_BIT)); + + /* + * Delay a minimum of 300ns for reset to complete. + * Inherent delay before deasserting PCS PIPE Reset is enough here. + */ + } +} + +void istep_10_10(uint8_t *phb_active_mask, uint8_t *iovalid_enable) +{ + const struct lane_config_row *pec_cfgs[MAX_PEC_PER_PROC] = { NULL }; + + printk(BIOS_EMERG, "starting istep 10.10\n"); + report_istep(10, 10); + + determine_lane_configs(phb_active_mask, pec_cfgs); + + /* + * Mask of functional PHBs for each PEC, ATTR_PROC_PCIE_IOVALID_ENABLE in Hostboot. + * LSB is the PHB with the highest number for the given PEC. + */ + iovalid_enable[0] = pec_cfgs[0]->phb_active >> PEC0_PHB_SHIFT; + iovalid_enable[1] = pec_cfgs[1]->phb_active >> PEC1_PHB_SHIFT; + iovalid_enable[2] = pec_cfgs[2]->phb_active >> PEC2_PHB_SHIFT; + + phase1(pec_cfgs, iovalid_enable); + + printk(BIOS_EMERG, "ending istep 10.10\n"); +} diff --git a/src/soc/ibm/power9/pci.h b/src/soc/ibm/power9/pci.h new file mode 100644 index 00000000000..345fbe1505d --- /dev/null +++ b/src/soc/ibm/power9/pci.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __SOC_IBM_POWER9_PCI_H +#define __SOC_IBM_POWER9_PCI_H + +#define MAX_PEC_PER_PROC 3 +#define MAX_PHB_PER_PROC 6 + +/* Enum giving bitmask values for enabled PHBs */ +enum phb_active_mask { + PHB_MASK_NA = 0x00, // Sentinel mask (loop terminations) + PHB0_MASK = 0x80, // PHB0 enabled + PHB1_MASK = 0x40, // PHB1 enabled + PHB2_MASK = 0x20, // PHB2 enabled + PHB3_MASK = 0x10, // PHB3 enabled + PHB4_MASK = 0x08, // PHB4 enabled + PHB5_MASK = 0x04, // PHB5 enabled +}; + +#endif /* __SOC_IBM_POWER9_PCI_H */ diff --git a/src/soc/ibm/power9/romstage.c b/src/soc/ibm/power9/romstage.c index a9fe17e05eb..89c9cd19b73 100644 --- a/src/soc/ibm/power9/romstage.c +++ b/src/soc/ibm/power9/romstage.c @@ -11,6 +11,8 @@ #include #include +#include "pci.h" + /* DIMM SPD addresses */ #define DIMM0 0x50 #define DIMM1 0x51 @@ -331,12 +333,16 @@ static void prepare_dimm_data(void) void main(void) { + uint8_t phb_active_mask = 0; + uint8_t iovalid_enable[MAX_PEC_PER_PROC] = { 0 }; + init_timer(); timestamp_add_now(TS_ROMSTAGE_START); console_init(); + istep_10_10(&phb_active_mask, iovalid_enable); istep_10_13(); timestamp_add_now(TS_INITRAM_START); From ba6a0afdd23bbfbec2d8633e3c676ff5f2ead023 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Tue, 17 May 2022 00:22:49 +0300 Subject: [PATCH 095/213] soc/power9/: implement istep 10.12 Change-Id: Ifcbd7987c37c0f8b2779867e108b0f4223384904 Signed-off-by: Sergii Dmytruk --- src/include/cpu/power/istep_10.h | 1 + src/soc/ibm/power9/Makefile.inc | 1 + src/soc/ibm/power9/istep_10_12.c | 43 ++++++++++++++++++++++++++++++++ src/soc/ibm/power9/romstage.c | 1 + 4 files changed, 46 insertions(+) create mode 100644 src/soc/ibm/power9/istep_10_12.c diff --git a/src/include/cpu/power/istep_10.h b/src/include/cpu/power/istep_10.h index b4c349e72ac..c79d0edd0d2 100644 --- a/src/include/cpu/power/istep_10.h +++ b/src/include/cpu/power/istep_10.h @@ -6,6 +6,7 @@ #include void istep_10_10(uint8_t *phb_active_mask, uint8_t *iovalid_enable); +void istep_10_12(void); void istep_10_13(void); #endif /* CPU_PPC64_ISTEP10_H */ diff --git a/src/soc/ibm/power9/Makefile.inc b/src/soc/ibm/power9/Makefile.inc index 10bdf3d02a0..c499686a1c3 100644 --- a/src/soc/ibm/power9/Makefile.inc +++ b/src/soc/ibm/power9/Makefile.inc @@ -10,6 +10,7 @@ romstage-y += mvpd.c romstage-y += vpd.c romstage-y += powerbus.c romstage-y += istep_10_10.c +romstage-y += istep_10_12.c romstage-y += istep_10_13.c romstage-y += istep_13_2.c romstage-y += istep_13_3.c diff --git a/src/soc/ibm/power9/istep_10_12.c b/src/soc/ibm/power9/istep_10_12.c new file mode 100644 index 00000000000..563996bf85f --- /dev/null +++ b/src/soc/ibm/power9/istep_10_12.c @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include + +#include +#include + +#include "pci.h" + +/* PCIe only at the moment, should also do other buses */ +static void enable_ridi(void) +{ + enum { + PERV_NET_CTRL0 = 0x000F0040, + PERV_NET_CTRL0_WOR = 0x000F0042, + }; + + uint8_t pec = 0; + + for (pec = 0; pec < MAX_PEC_PER_PROC; ++pec) { + chiplet_id_t chiplet = PCI0_CHIPLET_ID + pec; + + /* Getting NET_CTRL0 register value and checking its CHIPLET_ENABLE bit */ + if (read_scom_for_chiplet(chiplet, PERV_NET_CTRL0) & PPC_BIT(0)) { + /* Enable Recievers, Drivers DI1 & DI2 */ + uint64_t val = 0; + val |= PPC_BIT(19); // NET_CTRL0.RI_N = 1 + val |= PPC_BIT(20); // NET_CTRL0.DI1_N = 1 + val |= PPC_BIT(21); // NET_CTRL0.DI2_N = 1 + write_scom_for_chiplet(chiplet, PERV_NET_CTRL0_WOR, val); + } + } +} + +void istep_10_12(void) +{ + printk(BIOS_EMERG, "starting istep 10.12\n"); + report_istep(10, 12); + + enable_ridi(); + + printk(BIOS_EMERG, "ending istep 10.12\n"); +} diff --git a/src/soc/ibm/power9/romstage.c b/src/soc/ibm/power9/romstage.c index 89c9cd19b73..f623bc1763d 100644 --- a/src/soc/ibm/power9/romstage.c +++ b/src/soc/ibm/power9/romstage.c @@ -343,6 +343,7 @@ void main(void) console_init(); istep_10_10(&phb_active_mask, iovalid_enable); + istep_10_12(); istep_10_13(); timestamp_add_now(TS_INITRAM_START); From aaa0375afb628f43a7c85449aac7f56e61c07b1e Mon Sep 17 00:00:00 2001 From: Igor Bagnucki Date: Thu, 29 Apr 2021 12:32:44 +0200 Subject: [PATCH 096/213] soc/power9/istep_14_3.c: add istep code Change-Id: Ibb18aa095853eddd1ba964ad719272b076653f66 Signed-off-by: Igor Bagnucki Signed-off-by: Krystian Hebel Signed-off-by: Sergii Dmytruk --- src/include/cpu/power/istep_14.h | 3 + src/soc/ibm/power9/Makefile.inc | 1 + src/soc/ibm/power9/istep_14_3.c | 512 +++++++++++++++++++++++++++++++ src/soc/ibm/power9/romstage.c | 2 + 4 files changed, 518 insertions(+) create mode 100644 src/soc/ibm/power9/istep_14_3.c diff --git a/src/include/cpu/power/istep_14.h b/src/include/cpu/power/istep_14.h index 5b0b778799b..d79f38dd804 100644 --- a/src/include/cpu/power/istep_14.h +++ b/src/include/cpu/power/istep_14.h @@ -3,8 +3,11 @@ #ifndef CPU_PPC64_ISTEP_14_H #define CPU_PPC64_ISTEP_14_H +#include + void istep_14_1(void); void istep_14_2(void); +void istep_14_3(uint8_t phb_active_mask, const uint8_t *iovalid_enable); void istep_14_5(void); #endif /* CPU_PPC64_ISTEP_14_H */ diff --git a/src/soc/ibm/power9/Makefile.inc b/src/soc/ibm/power9/Makefile.inc index c499686a1c3..6bcda75e801 100644 --- a/src/soc/ibm/power9/Makefile.inc +++ b/src/soc/ibm/power9/Makefile.inc @@ -23,6 +23,7 @@ romstage-y += istep_13_11.c romstage-y += istep_13_13.c romstage-y += istep_14_1.c romstage-y += istep_14_2.c +romstage-y += istep_14_3.c romstage-y += istep_14_5.c romstage-y += i2c.c romstage-y += ccs.c diff --git a/src/soc/ibm/power9/istep_14_3.c b/src/soc/ibm/power9/istep_14_3.c new file mode 100644 index 00000000000..93ab8efe1ca --- /dev/null +++ b/src/soc/ibm/power9/istep_14_3.c @@ -0,0 +1,512 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include + +#include +#include + +#include "pci.h" +#include "scratch.h" + +static uint64_t pec_addr(uint8_t pec, uint64_t addr) +{ + return addr + pec * 0x400; +} + +static void init_pecs(const uint8_t *iovalid_enable) +{ + enum { + P9N2_PEC_ADDREXTMASK_REG = 0x4010C05, + PEC_PBCQHWCFG_REG = 0x4010C00, + PEC_NESTTRC_REG = 0x4010C03, + PEC_PBAIBHWCFG_REG = 0xD010800, + + PEC_PBCQHWCFG_REG_PE_DISABLE_TCE_ARBITRATION = 60, + PEC_PBAIBHWCFG_REG_PE_PCIE_CLK_TRACE_EN = 30, + PEC_AIB_HWCFG_OSBM_HOL_BLK_CNT = 42, + PEC_PBCQHWCFG_REG_PE_DISABLE_OOO_MODE = 0x16, + PEC_PBCQHWCFG_REG_PE_DISABLE_WR_SCOPE_GROUP = 42, + PEC_PBCQHWCFG_REG_PE_CHANNEL_STREAMING_EN = 33, + PEC_PBCQHWCFG_REG_PE_DISABLE_WR_VG = 41, + PEC_PBCQHWCFG_REG_PE_DISABLE_INTWR_VG = 43, + PEC_PBCQHWCFG_REG_PE_DISABLE_INTWR_SCOPE_GROUP = 44, + PEC_PBCQHWCFG_REG_PE_DISABLE_RD_SCOPE_GROUP = 51, + PEC_PBCQHWCFG_REG_PE_DISABLE_RD_VG = 54, + PEC_PBCQHWCFG_REG_PE_DISABLE_TCE_SCOPE_GROUP = 56, + PEC_PBCQHWCFG_REG_PE_DISABLE_TCE_VG = 59, + }; + + uint64_t scratch_reg6 = 0; + uint8_t pec = 0; + bool node_pump_mode = false; + uint8_t dd = get_dd(); + + scratch_reg6 = read_scom(MBOX_SCRATCH_REG1 + 5); + + /* ATTR_PROC_FABRIC_PUMP_MODE, it's either node or group pump mode */ + node_pump_mode = !(scratch_reg6 & PPC_BIT(MBOX_SCRATCH_REG6_GROUP_PUMP_MODE)); + + for (pec = 0; pec < MAX_PEC_PER_PROC; ++pec) { + uint64_t val = 0; + + printk(BIOS_EMERG, "Initializing PEC%d...\n", pec); + + /* + * ATTR_FABRIC_ADDR_EXTENSION_GROUP_ID = 0 + * ATTR_FABRIC_ADDR_EXTENSION_CHIP_ID = 0 + */ + scom_and_or_for_chiplet(N2_CHIPLET_ID, pec_addr(pec, P9N2_PEC_ADDREXTMASK_REG), + ~PPC_BITMASK(0, 6), + PPC_SHIFT(0, 6)); + + /* + * Phase2 init step 1 + * NestBase + 0x00 + * Set bits 00:03 = 0b0001 Set hang poll scale + * Set bits 04:07 = 0b0001 Set data scale + * Set bits 08:11 = 0b0001 Set hang pe scale + * Set bit 22 = 0b1 Disable out of order store behavior + * Set bit 33 = 0b1 Enable Channel Tag streaming behavior + * Set bits 34:35 = 0b11 Set P9 Style cache-inject behavior + * Set bits 46:48 = 0b011 Set P9 Style cache-inject rate, 1/16 cycles + * Set bit 60 = 0b1 only if PEC is bifurcated or trifurcated. + * + * if HW423589_option1, set Disable Group Scope (r/w) and Use Vg(sys) at Vg + * scope + */ + + val = read_scom_for_chiplet(N2_CHIPLET_ID, pec_addr(pec, PEC_PBCQHWCFG_REG)); + /* Set hang poll scale */ + val &= ~PPC_BITMASK(0, 3); + val |= PPC_SHIFT(1, 3); + /* Set data scale */ + val &= ~PPC_BITMASK(4, 7); + val |= PPC_SHIFT(1, 7); + /* Set hang pe scale */ + val &= ~PPC_BITMASK(8, 11); + val |= PPC_SHIFT(1, 11); + /* Disable out of order store behavior */ + val |= PPC_BIT(22); + /* Enable Channel Tag streaming behavior */ + val |= PPC_BIT(33); + + /* Set Disable Group Scope (r/w) and Use Vg(sys) at Vg scope */ + val |= PPC_BIT(PEC_PBCQHWCFG_REG_PE_DISABLE_WR_VG); + val |= PPC_BIT(PEC_PBCQHWCFG_REG_PE_DISABLE_WR_SCOPE_GROUP); + val |= PPC_BIT(PEC_PBCQHWCFG_REG_PE_DISABLE_INTWR_VG); + val |= PPC_BIT(PEC_PBCQHWCFG_REG_PE_DISABLE_INTWR_SCOPE_GROUP); + val |= PPC_BIT(PEC_PBCQHWCFG_REG_PE_DISABLE_RD_VG); + val |= PPC_BIT(PEC_PBCQHWCFG_REG_PE_DISABLE_RD_SCOPE_GROUP); + val |= PPC_BIT(PEC_PBCQHWCFG_REG_PE_DISABLE_TCE_SCOPE_GROUP); + val |= PPC_BIT(PEC_PBCQHWCFG_REG_PE_DISABLE_TCE_VG); + + /* Disable P9 Style cache injects if chip is node */ + if (!node_pump_mode) { + /* + * ATTR_PROC_PCIE_CACHE_INJ_MODE + * Attribute to control the cache inject mode. + * + * DISABLE_CI = 0x0 - Disable cache inject completely. + * (Reset value default.) + * P7_STYLE_CI = 0x1 - Use cache inject design from Power7. + * PCITLP_STYLE_CI = 0x2 - Use PCI TLP Hint bits in packet to perform + * the cache inject. + * P9_STYLE_CI = 0x3 - Initial attempt as cache inject. Power9 + * style. (Attribute default.) + * + * Different cache inject modes will affect DMA write performance. The + * attribute default was selected based on various workloads and was to + * be the most optimal settings for Power9. + * + * fapi2::ATTR_PROC_PCIE_CACHE_INJ_MODE = 3 by default + */ + val &= ~PPC_BITMASK(34, 35); + val |= PPC_SHIFT(0x3, 35); + + if (dd == 0x21 || dd == 0x22 || dd == 0x23) { + /* + * ATTR_PROC_PCIE_CACHE_INJ_THROTTLE + * Attribute to control the cache inject throttling when cache + * inject is enable. + * + * DISABLE = 0x0 - Disable cache inject throttling. + * (Reset value default.) + * 16_CYCLES = 0x1 - Perform 1 cache inject every 16 clock + * cycles. + * 32_CYCLES = 0x3 - Perform 1 cache inject every 32 clock + * cycles. (Attribute default.) + * 64_CYCLES = 0x7 - Perform 1 cache inject every 32 clock + * cycles. + * + * Different throttle rates will affect DMA write performance. + * The attribute default settings were optimal settings found + * across various workloads. + */ + val &= ~PPC_BITMASK(46, 48); + val |= PPC_SHIFT(0x3, 48); + } + } + + if (pec == 1 || (pec == 2 && iovalid_enable[pec] != 0x4)) + val |= PPC_BIT(PEC_PBCQHWCFG_REG_PE_DISABLE_TCE_ARBITRATION); + + write_scom_for_chiplet(N2_CHIPLET_ID, pec_addr(pec, PEC_PBCQHWCFG_REG), val); + + /* + * Phase2 init step 2 + * NestBase + 0x01 + * N/A Modify Drop Priority Control Register (DrPriCtl) + */ + + /* + * Phase2 init step 3 + * NestBase + 0x03 + * Set bits 00:03 = 0b1001 Enable trace, and select + * inbound operations with addr information + */ + scom_and_or_for_chiplet(N2_CHIPLET_ID, pec_addr(pec, PEC_NESTTRC_REG), + ~PPC_BITMASK(0, 3), + PPC_SHIFT(9, 3)); + + /* + * Phase2 init step 4 + * NestBase + 0x05 + * N/A For use of atomics/asb_notify + */ + + /* + * Phase2 init step 5 + * NestBase + 0x06 + * N/A To override scope prediction + */ + + /* + * Phase2 init step 6 + * PCIBase +0x00 + * Set bits 30 = 0b1 Enable Trace + */ + val = 0; + val |= PPC_BIT(PEC_PBAIBHWCFG_REG_PE_PCIE_CLK_TRACE_EN); + val |= PPC_SHIFT(7, PEC_AIB_HWCFG_OSBM_HOL_BLK_CNT); + write_scom_for_chiplet(PCI0_CHIPLET_ID + pec, PEC_PBAIBHWCFG_REG, val); + } +} + +/* See src/import/chips/p9/common/scominfo/p9_scominfo.C in Hostboot */ +static void phb_write(uint8_t phb, uint64_t addr, uint64_t data) +{ + chiplet_id_t chiplet; + uint8_t sat_id = (addr >> 6) & 0xF; + + if (phb == 0) { + chiplet = PCI0_CHIPLET_ID; + sat_id = (sat_id < 4 ? 1 : 4); + } else { + chiplet = PCI0_CHIPLET_ID + (phb / 3) + 1; + sat_id = (sat_id < 4 ? 1 : 4) + + ((phb % 2) ? 0 : 1) + + (2 * (phb / 5)); + } + + addr &= ~PPC_BITMASK(54, 57); + addr |= PPC_SHIFT(sat_id & 0xF, 57); + + write_scom_for_chiplet(chiplet, addr, data); +} + +/* See src/import/chips/p9/common/scominfo/p9_scominfo.C in Hostboot */ +static void phb_nest_write(uint8_t phb, uint64_t addr, uint64_t data) +{ + enum { N2_PCIS0_0_RING_ID = 0x3 }; + + uint8_t ring; + uint8_t sat_id = (addr >> 6) & 0xF; + + if (phb == 0) { + ring = (N2_PCIS0_0_RING_ID & 0xF); + sat_id = (sat_id < 4 ? 1 : 4); + } else { + ring = ((N2_PCIS0_0_RING_ID + (phb / 3) + 1) & 0xF); + sat_id = (sat_id < 4 ? 1 : 4) + + (phb % 2 ? 0 : 1) + + (2 * (phb / 5)); + } + + addr &= ~PPC_BITMASK(50, 53); + addr |= PPC_SHIFT(ring & 0xF, 53); + + addr &= ~PPC_BITMASK(54, 57); + addr |= PPC_SHIFT(sat_id & 0xF, 57); + + write_scom_for_chiplet(N2_CHIPLET_ID, addr, data); +} + +static void init_phbs(uint8_t phb_active_mask, const uint8_t *iovalid_enable) +{ + enum { + PHB_CERR_RPT0_REG = 0x4010C4A, + PHB_CERR_RPT1_REG = 0x4010C4B, + PHB_NFIR_REG = 0x4010C40, + PHB_NFIRWOF_REG = 0x4010C48, + + PHB_NFIRACTION0_REG = 0x4010C46, + PCI_NFIR_ACTION0_REG = 0x5B0F81E000000000, + + PHB_NFIRACTION1_REG = 0x4010C47, + PCI_NFIR_ACTION1_REG = 0x7F0F81E000000000, + + PHB_NFIRMASK_REG = 0x4010C43, + PCI_NFIR_MASK_REG = 0x30001C00000000, + + PHB_PE_DFREEZE_REG = 0x4010C55, + PHB_PBAIB_CERR_RPT_REG = 0xD01084B, + PHB_PFIR_REG = 0xD010840, + PHB_PFIRWOF_REG = 0xD010848, + + PHB_PFIRACTION0_REG = 0xD010846, + PCI_PFIR_ACTION0_REG = 0xB000000000000000, + + PHB_PFIRACTION1_REG = 0xD010847, + PCI_PFIR_ACTION1_REG = 0xB000000000000000, + + PHB_PFIRMASK_REG = 0xD010843, + PCI_PFIR_MASK_REG = 0xE00000000000000, + + P9_PCIE_CONFIG_BAR_SHIFT = 8, + + PHB_MMIOBAR0_REG = 0x4010C4E, + PHB_MMIOBAR0_MASK_REG = 0x4010C4F, + PHB_MMIOBAR1_REG = 0x4010C50, + PHB_MMIOBAR1_MASK_REG = 0x04010C51, + PHB_PHBBAR_REG = 0x4010C52, + PHB_BARE_REG = 0x4010C54, + + PHB_PHBRESET_REG = 0xD01084A, + PHB_ACT0_REG = 0xD01090E, + PHB_ACTION1_REG = 0xD01090F, + PHB_MASK_REG = 0xD01090B, + }; + + /* ATTR_PROC_PCIE_MMIO_BAR0_BASE_ADDR_OFFSET */ + uint64_t mmio_bar0_offsets[MAX_PHB_PER_PROC] = { 0 }; + /* ATTR_PROC_PCIE_MMIO_BAR1_BASE_ADDR_OFFSET */ + uint64_t mmio_bar1_offsets[MAX_PHB_PER_PROC] = { 0 }; + /* ATTR_PROC_PCIE_REGISTER_BAR_BASE_ADDR_OFFSET */ + uint64_t register_bar_offsets[MAX_PHB_PER_PROC] = { 0 }; + /* ATTR_PROC_PCIE_BAR_SIZE */ + uint64_t bar_sizes[3] = { 0 }; + + /* Determine base address of chip MMIO range */ + uint64_t base_addr_mmio = 0; + base_addr_mmio |= PPC_SHIFT(0, 12); // 5 bits, ATTR_PROC_FABRIC_SYSTEM_ID + base_addr_mmio |= PPC_SHIFT(0, 18); // 4 bits, ATTR_PROC_EFF_FABRIC_GROUP_ID + base_addr_mmio |= PPC_SHIFT(0, 21); // 3 bits, ATTR_PROC_EFF_FABRIC_CHIP_ID + base_addr_mmio |= PPC_SHIFT(3, 14); // 2 bits, FABRIC_ADDR_MSEL, + // nm = 0b00/01, m = 0b10, mmio = 0b11 + + uint8_t phb = 0; + for (phb = 0; phb < MAX_PHB_PER_PROC; ++phb) { + /* BAR enable attribute (ATTR_PROC_PCIE_BAR_ENABLE) */ + uint8_t bar_enables[3] = { 0 }; + + uint64_t val = 0; + uint64_t mmio0_bar = base_addr_mmio; + uint64_t mmio1_bar = base_addr_mmio; + uint64_t register_bar = base_addr_mmio; + + if (!(phb_active_mask & (PHB0_MASK >> phb))) + continue; + + printk(BIOS_EMERG, "Initializing PHB%d...\n", phb); + + /* + * Phase2 init step 12_a (yes, out of order) + * NestBase + StackBase + 0xA + * 0xFFFFFFFF_FFFFFFFF + * Clear any spurious cerr_rpt0 bits (cerr_rpt0) + */ + phb_nest_write(phb, PHB_CERR_RPT0_REG, PPC_BITMASK(0, 63)); + + /* + * Phase2 init step 12_b (yes, out of order) + * NestBase + StackBase + 0xB + * 0xFFFFFFFF_FFFFFFFF + * Clear any spurious cerr_rpt1 bits (cerr_rpt1) + */ + phb_nest_write(phb, PHB_CERR_RPT1_REG, PPC_BITMASK(0, 63)); + + /* + * Phase2 init step 7_c + * NestBase + StackBase + 0x0 + * 0x00000000_00000000 + * Clear any spurious FIR + * bits (NFIR)NFIR + */ + phb_nest_write(phb, PHB_NFIR_REG, 0); + + /* + * Phase2 init step 8 + * NestBase + StackBase + 0x8 + * 0x00000000_00000000 + * Clear any spurious WOF bits (NFIRWOF) + */ + phb_nest_write(phb, PHB_NFIRWOF_REG, 0); + + /* + * Phase2 init step 9 + * NestBase + StackBase + 0x6 + * Set the per FIR Bit Action 0 register + */ + phb_nest_write(phb, PHB_NFIRACTION0_REG, PCI_NFIR_ACTION0_REG); + + /* + * Phase2 init step 10 + * NestBase + StackBase + 0x7 + * Set the per FIR Bit Action 1 register + */ + phb_nest_write(phb, PHB_NFIRACTION1_REG, PCI_NFIR_ACTION1_REG); + + /* + * Phase2 init step 11 + * NestBase + StackBase + 0x3 + * Set FIR Mask Bits to allow errors (NFIRMask) + */ + phb_nest_write(phb, PHB_NFIRMASK_REG, PCI_NFIR_MASK_REG); + + /* + * Phase2 init step 12 + * NestBase + StackBase + 0x15 + * 0x00000000_00000000 + * Set Data Freeze Type Register for SUE handling (DFREEZE) + */ + phb_nest_write(phb, PHB_PE_DFREEZE_REG, 0); + + /* + * Phase2 init step 13_a + * PCIBase + StackBase + 0xB + * 0x00000000_00000000 + * Clear any spurious pbaib_cerr_rpt bits + */ + phb_write(phb, PHB_PBAIB_CERR_RPT_REG, 0); + + /* + * Phase2 init step 13_b + * PCIBase + StackBase + 0x0 + * 0x00000000_00000000 + * Clear any spurious FIR + * bits (PFIR)PFIR + */ + phb_write(phb, PHB_PFIR_REG, 0); + + /* + * Phase2 init step 14 + * PCIBase + StackBase + 0x8 + * 0x00000000_00000000 + * Clear any spurious WOF bits (PFIRWOF) + */ + phb_write(phb, PHB_PFIRWOF_REG, 0); + + /* + * Phase2 init step 15 + * PCIBase + StackBase + 0x6 + * Set the per FIR Bit Action 0 register + */ + phb_write(phb, PHB_PFIRACTION0_REG, PCI_PFIR_ACTION0_REG); + + /* + * Phase2 init step 16 + * PCIBase + StackBase + 0x7 + * Set the per FIR Bit Action 1 register + */ + phb_write(phb, PHB_PFIRACTION1_REG, PCI_PFIR_ACTION1_REG); + + /* + * Phase2 init step 17 + * PCIBase + StackBase + 0x3 + * Set FIR Mask Bits to allow errors (PFIRMask) + */ + phb_write(phb, PHB_PFIRMASK_REG, PCI_PFIR_MASK_REG); + + /* + * Phase2 init step 18 + * NestBase + StackBase + 0xE + * Set MMIO Base Address Register 0 (MMIOBAR0) + */ + mmio0_bar += mmio_bar0_offsets[phb]; + mmio0_bar <<= P9_PCIE_CONFIG_BAR_SHIFT; + phb_nest_write(phb, PHB_MMIOBAR0_REG, mmio0_bar); + + /* + * Phase2 init step 19 + * NestBase + StackBase + 0xF + * Set MMIO BASE Address Register Mask 0 (MMIOBAR0_MASK) + */ + phb_nest_write(phb, PHB_MMIOBAR0_MASK_REG, bar_sizes[0]); + + /* + * Phase2 init step 20 + * NestBase + StackBase + 0x10 + * Set MMIO Base + * Address Register 1 (MMIOBAR1) + */ + mmio1_bar += mmio_bar1_offsets[phb]; + mmio1_bar <<= P9_PCIE_CONFIG_BAR_SHIFT; + phb_nest_write(phb, PHB_MMIOBAR1_REG, mmio1_bar); + + /* + * Phase2 init step 21 + * NestBase + StackBase + 0x11 + * Set MMIO Base Address Register Mask 1 (MMIOBAR1_MASK) + */ + phb_nest_write(phb, PHB_MMIOBAR1_MASK_REG, bar_sizes[1]); + + /* + * Phase2 init step 22 + * NestBase + StackBase + 0x12 + * Set PHB Register Base address Register (PHBBAR) + */ + register_bar += register_bar_offsets[phb]; + register_bar <<= P9_PCIE_CONFIG_BAR_SHIFT; + phb_nest_write(phb, PHB_PHBBAR_REG, register_bar); + + /* + * Phase2 init step 23 + * NestBase + StackBase + 0x14 + * Set Base address Enable Register (BARE) + */ + + val = 0; + + if (bar_enables[0]) + val |= PPC_BIT(0); // PHB_BARE_REG_PE_MMIO_BAR0_EN, bit 0 for BAR0 + if (bar_enables[1]) + val |= PPC_BIT(1); // PHB_BARE_REG_PE_MMIO_BAR1_EN, bit 1 for BAR1 + if (bar_enables[2]) + val |= PPC_BIT(2); // PHB_BARE_REG_PE_PHB_BAR_EN, bit 2 for PHB + + phb_nest_write(phb, PHB_BARE_REG, val); + + /* + * Phase2 init step 24 + * PCIBase + StackBase +0x0A + * 0x00000000_00000000 + * Remove ETU/AIB bus from reset (PHBReset) + */ + phb_write(phb, PHB_PHBRESET_REG, 0); + /* Configure ETU FIR (all masked) */ + phb_write(phb, PHB_ACT0_REG, 0); + phb_write(phb, PHB_ACTION1_REG, 0); + phb_write(phb, PHB_MASK_REG, PPC_BITMASK(0, 63)); + } +} + +void istep_14_3(uint8_t phb_active_mask, const uint8_t *iovalid_enable) +{ + printk(BIOS_EMERG, "starting istep 14.3\n"); + report_istep(14, 3); + + init_pecs(iovalid_enable); + init_phbs(phb_active_mask, iovalid_enable); + + printk(BIOS_EMERG, "ending istep 14.3\n"); +} diff --git a/src/soc/ibm/power9/romstage.c b/src/soc/ibm/power9/romstage.c index f623bc1763d..fb12dca384b 100644 --- a/src/soc/ibm/power9/romstage.c +++ b/src/soc/ibm/power9/romstage.c @@ -367,6 +367,8 @@ void main(void) istep_14_1(); istep_14_2(); + istep_14_3(phb_active_mask, iovalid_enable); + report_istep(14, 4); // no-op istep_14_5(); timestamp_add_now(TS_INITRAM_END); From e6dfc68cc6b41285d07f19c2efc77ec8026cacc4 Mon Sep 17 00:00:00 2001 From: Krystian Hebel Date: Wed, 27 Oct 2021 14:53:17 +0200 Subject: [PATCH 097/213] soc/power9/chip.c: unhardcode nominal core frequency from DT Signed-off-by: Krystian Hebel Change-Id: I81a63a56a3409fc481eb418a9abee68993e9d4c3 --- src/soc/ibm/power9/chip.c | 20 +++++++++++--------- src/soc/ibm/power9/chip.h | 2 +- src/soc/ibm/power9/homer.c | 4 +++- 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/src/soc/ibm/power9/chip.c b/src/soc/ibm/power9/chip.c index 89da3e4e121..56acea0753d 100644 --- a/src/soc/ibm/power9/chip.c +++ b/src/soc/ibm/power9/chip.c @@ -15,6 +15,8 @@ #include "istep_13_scom.h" #include "chip.h" +static uint64_t nominal_freq; + /* * These are various definitions of the page sizes and segment sizes supported * by the MMU. Values are the same as dumped from original firmware, comments @@ -172,23 +174,18 @@ static void fill_cpu_node(struct device_tree_node *node, uint32_t phandle, dt_add_u32_prop(node, "ibm,purr", 0x1); dt_add_u32_prop(node, "ibm,spurr", 0x1); - /* - * FIXME: un-hardcode. This is either nominal or safe mode frequency, - * depending on whether OCC has been started successfully. - */ - uint64_t clock_freq = 2700ULL * MHz; /* * Old-style core clock frequency. Only create this property if the * frequency fits in a 32-bit number. Do not create it if it doesn't. */ - if ((clock_freq >> 32) == 0) - dt_add_u32_prop(node, "clock-frequency", clock_freq); + if ((nominal_freq >> 32) == 0) + dt_add_u32_prop(node, "clock-frequency", nominal_freq); /* * Mandatory: 64-bit version of the core clock frequency, always create * this property. */ - dt_add_u64_prop(node, "ibm,extended-clock-frequency", clock_freq); + dt_add_u64_prop(node, "ibm,extended-clock-frequency", nominal_freq); /* Timebase freq has a fixed value, always use that */ dt_add_u32_prop(node, "timebase-frequency", 512 * MHz); @@ -407,7 +404,12 @@ static void enable_soc_dev(struct device *dev) reserved_size = 8*1024 + 4*1024 *8 /* * num_of_cpus */; top -= reserved_size; reserved_ram_resource_kb(dev, idx++, top, reserved_size); - build_homer_image((void *)(top * 1024)); + + /* + * Assumption: OCC boots successfully or coreboot die()s, booting in safe + * mode without runtime power management is not supported. + */ + nominal_freq = build_homer_image((void *)(top * 1024)); rng_init(); istep_18_11(); diff --git a/src/soc/ibm/power9/chip.h b/src/soc/ibm/power9/chip.h index d6ce11b3609..4d223ab2fcb 100644 --- a/src/soc/ibm/power9/chip.h +++ b/src/soc/ibm/power9/chip.h @@ -6,6 +6,6 @@ struct soc_ibm_power9_config { }; -void build_homer_image(void *homer_bar); +uint64_t build_homer_image(void *homer_bar); #endif /* __SOC_CAVIUM_CN81XX_CHIP_H */ diff --git a/src/soc/ibm/power9/homer.c b/src/soc/ibm/power9/homer.c index f47e295659d..58cb1e82863 100644 --- a/src/soc/ibm/power9/homer.c +++ b/src/soc/ibm/power9/homer.c @@ -2285,7 +2285,7 @@ const struct voltage_bucket_data * get_voltage_data(void) /* * This logic is for SMF disabled only! */ -void build_homer_image(void *homer_bar) +uint64_t build_homer_image(void *homer_bar) { static uint8_t rings_buf[300 * KiB]; @@ -2528,4 +2528,6 @@ void build_homer_image(void *homer_bar) istep_21_1(homer, cores); istep_16_1(this_core); + + return (uint64_t)get_voltage_data()->nominal.freq * MHz; } From a4a95f5d7d01603e13c17c951673f88776289992 Mon Sep 17 00:00:00 2001 From: Krystian Hebel Date: Fri, 22 Oct 2021 11:09:25 +0200 Subject: [PATCH 098/213] soc/power9/chip.c: add code for filling unique phandles for all nodes Signed-off-by: Krystian Hebel Change-Id: Ie7630762f1d53cdd1f782b03b8eb607c7dbacc17 --- src/mainboard/raptor-cs/talos-2/mainboard.c | 8 -- src/soc/ibm/power9/chip.c | 97 ++++++++++++--------- 2 files changed, 58 insertions(+), 47 deletions(-) diff --git a/src/mainboard/raptor-cs/talos-2/mainboard.c b/src/mainboard/raptor-cs/talos-2/mainboard.c index dbcd9b8f710..a1a35f08339 100644 --- a/src/mainboard/raptor-cs/talos-2/mainboard.c +++ b/src/mainboard/raptor-cs/talos-2/mainboard.c @@ -8,14 +8,6 @@ static void mainboard_enable(struct device *dev) { if (!dev) die("No dev0; die\n"); - - /* - * Smallest reported to be working (but not officially supported) DIMM is - * 4GB. This means that we always have at least as much available. Last - * 256MB of first 4GB are reserved for hostboot/coreboot, which is also - * included in device tree we are currently passing. - */ - reserved_ram_resource_kb(dev, 0, 4 * 1024 * 1024 - 256 * 1024, 256 * 1024); } struct chip_operations mainboard_ops = { diff --git a/src/soc/ibm/power9/chip.c b/src/soc/ibm/power9/chip.c index 56acea0753d..9d91ef98ebd 100644 --- a/src/soc/ibm/power9/chip.c +++ b/src/soc/ibm/power9/chip.c @@ -60,11 +60,39 @@ static uint8_t pa_features[] = 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00 }; -static void fill_l3_node(struct device_tree_node *node, uint32_t phandle, - uint32_t pir) +static void dt_assign_new_phandle(struct device_tree *tree, + struct device_tree_node *node) { + struct device_tree_property *prop; + uint32_t phandle; + + list_for_each(prop, node->properties, list_node) { + if (!strcmp("phandle", prop->prop.name)) { + /* Node already has phandle set, keep it */ + return; + } + } + + phandle = ++tree->max_phandle; node->phandle = phandle; dt_add_u32_prop(node, "phandle", phandle); +} + +static void dt_fill_all_phandles(struct device_tree *tree, + struct device_tree_node *node) +{ + struct device_tree_node *child; + + dt_assign_new_phandle(tree, node); + + list_for_each(child, node->children, list_node) + dt_fill_all_phandles(tree, child); +} + +static void fill_l3_node(struct device_tree *tree, + struct device_tree_node *node, uint32_t pir) +{ + dt_assign_new_phandle(tree, node); dt_add_u32_prop(node, "reg", pir); dt_add_string_prop(node, "device_type", "cache"); dt_add_bin_prop(node, "cache-unified", NULL, 0); @@ -77,11 +105,11 @@ static void fill_l3_node(struct device_tree_node *node, uint32_t phandle, dt_add_u32_prop(node, "i-cache-sets", 8); /* Per Hostboot. Why not 20? */ } -static void fill_l2_node(struct device_tree_node *node, uint32_t phandle, - uint32_t pir, uint32_t next_lvl_phandle) +static void fill_l2_node(struct device_tree *tree, + struct device_tree_node *node, uint32_t pir, + uint32_t next_lvl_phandle) { - node->phandle = phandle; - dt_add_u32_prop(node, "phandle", phandle); + dt_assign_new_phandle(tree, node); /* This is not a typo, "l2-cache" points to the node of L3 cache */ dt_add_u32_prop(node, "l2-cache", next_lvl_phandle); dt_add_u32_prop(node, "reg", pir); @@ -96,12 +124,12 @@ static void fill_l2_node(struct device_tree_node *node, uint32_t phandle, dt_add_u32_prop(node, "i-cache-sets", 8); } -static void fill_cpu_node(struct device_tree_node *node, uint32_t phandle, - uint32_t pir, uint32_t next_lvl_phandle) +static void fill_cpu_node(struct device_tree *tree, + struct device_tree_node *node, uint32_t pir, + uint32_t next_lvl_phandle) { /* Mandatory/standard properties */ - node->phandle = phandle; - dt_add_u32_prop(node, "phandle", phandle); + dt_assign_new_phandle(tree, node); dt_add_string_prop(node, "device_type", "cpu"); dt_add_bin_prop(node, "64-bit", NULL, 0); dt_add_bin_prop(node, "32-64-bridge", NULL, 0); @@ -112,7 +140,7 @@ static void fill_cpu_node(struct device_tree_node *node, uint32_t phandle, /* * The "status" property indicate whether the core is functional. It's * a string containing "okay" for a good core or "bad" for a non-functional - * one. You can also just ommit the non-functional ones from the DT + * one. You can also just omit the non-functional ones from the DT */ dt_add_string_prop(node, "status", "okay"); @@ -232,25 +260,27 @@ static inline unsigned long size_k(uint64_t reg) return CONVERT_4GB_TO_KB(((reg & SIZE_MASK) >> SIZE_SHIFT) + 1); } +/* + * Device tree passed to Skiboot has to have phandles set either for all nodes + * or none at all. Because relative phandles are set for cpu->l2_cache->l3_cache + * chain, only first option is possible. + */ static int dt_platform_update(struct device_tree *tree) { - struct device_tree_node *node, *cpus; + struct device_tree_node *cpus, *xscom; uint64_t cores = read_scom(0x0006C090); assert(cores != 0); - /* Find "cpus" node, create if necessary */ - cpus = dt_find_node_by_path(tree, "/cpus", NULL, NULL, 1); - assert(cpus != NULL); + /* Find xscom node, halt if not found */ + /* TODO: is the address always the same? */ + xscom = dt_find_node_by_path(tree, "/xscom@603fc00000000", NULL, NULL, 0); + if (xscom == NULL) + die("No 'xscom' node in device tree!\n"); - /* - * First remove all existing "cpu" nodes, then add ours. - * - * TODO: check if any other node relies on phandles of "cpu" or "cache" - * nodes - */ - list_for_each(node, cpus->children, list_node) { - list_remove(&node->list_node); - } + /* Find "cpus" node */ + cpus = dt_find_node_by_path(tree, "/cpus", NULL, NULL, 0); + if (cpus == NULL) + die("No 'cpus' node in device tree!\n"); for (int core_id = 0; core_id <= 24; core_id++) { if (IS_EC_FUNCTIONAL(core_id, cores)) { @@ -305,26 +335,15 @@ static int dt_platform_update(struct device_tree *tree) * are created at the same time, no need to test both. */ if (!l3_node->phandle) { - fill_l3_node(l3_node, ++tree->max_phandle, l3_pir); - fill_l2_node(l2_node, ++tree->max_phandle, l2_pir, - l3_node->phandle); + fill_l3_node(tree, l3_node, l3_pir); + fill_l2_node(tree, l2_node, l2_pir, l3_node->phandle); } - fill_cpu_node(cpu_node, ++tree->max_phandle, pir, l2_node->phandle); + fill_cpu_node(tree, cpu_node, pir, l2_node->phandle); } } - /* Debug for Skiroot's kernel. TODO: make this a config option? */ - node = dt_find_node_by_path(tree, "/chosen", NULL, NULL, 1); - dt_add_string_prop(node, "bootargs", "console=hvc0"); - - /* Will be created by Skiboot. TODO: remove from dts */ - node = dt_find_node_by_path(tree, "/ibm,opal", NULL, NULL, 0); - if (node) - list_remove(&node->list_node); - - node = dt_find_node_by_path(tree, "/ibm,opal/power-mgt", NULL, NULL, 1); - dt_add_u32_prop(node, "ibm,enabled-stop-levels", 0xEC000000); + dt_fill_all_phandles(tree, tree->root); return 0; } From f22046eddbbaaa2fa70638388cbe01561ef1bf23 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Wed, 3 Nov 2021 01:06:50 +0200 Subject: [PATCH 099/213] soc/power9/romstage.c: setup BMC's watchdog to 2m Change-Id: Iea68cf4747db637b61f9e3c989dac81641c618c2 Signed-off-by: Sergii Dmytruk --- src/mainboard/raptor-cs/talos-2/Kconfig | 1 + src/soc/ibm/power9/romstage.c | 12 ++++++++++++ 2 files changed, 13 insertions(+) diff --git a/src/mainboard/raptor-cs/talos-2/Kconfig b/src/mainboard/raptor-cs/talos-2/Kconfig index 43c1852b1d1..ad7ca077226 100644 --- a/src/mainboard/raptor-cs/talos-2/Kconfig +++ b/src/mainboard/raptor-cs/talos-2/Kconfig @@ -24,6 +24,7 @@ config BOARD_SPECIFIC_OPTIONS select MISSING_BOARD_RESET select HAVE_DEBUG_RAM_SETUP select IPMI_BT + select IPMI_BT_ROMSTAGE select FLATTENED_DEVICE_TREE config MEMLAYOUT_LD_FILE diff --git a/src/soc/ibm/power9/romstage.c b/src/soc/ibm/power9/romstage.c index fb12dca384b..d9dada9aa19 100644 --- a/src/soc/ibm/power9/romstage.c +++ b/src/soc/ibm/power9/romstage.c @@ -5,6 +5,8 @@ #include #include #include +#include +#include #include #include #include @@ -342,6 +344,16 @@ void main(void) console_init(); + if (ipmi_premem_init(CONFIG_BMC_BT_BASE, 0) != CB_SUCCESS) + die("Failed to initialize IPMI\n"); + + /* + * Two minutes to load. + * Not handling return code, because the function itself prints log messages + * and its failure is not a critical error. + */ + (void)ipmi_init_and_start_bmc_wdt(CONFIG_BMC_BT_BASE, 120, TIMEOUT_HARD_RESET); + istep_10_10(&phb_active_mask, iovalid_enable); istep_10_12(); istep_10_13(); From 370287257ba022e70a51791ba14767ff3d90e10c Mon Sep 17 00:00:00 2001 From: Krystian Hebel Date: Tue, 2 Nov 2021 12:24:33 +0100 Subject: [PATCH 100/213] cpu/power/spr.h: change SPR numbers definitions to decimal ISA lists those values in decimal, this change makes it easier to compare with documentation. It also fixes wrong values for SRR0/SRR1. Signed-off-by: Krystian Hebel Change-Id: I326f9ec73ab63c4c1cef836d3217e7132f49a4f5 --- src/include/cpu/power/spr.h | 38 ++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/src/include/cpu/power/spr.h b/src/include/cpu/power/spr.h index 67687dc2486..2d36a02a5f7 100644 --- a/src/include/cpu/power/spr.h +++ b/src/include/cpu/power/spr.h @@ -5,29 +5,29 @@ #include // PPC_BIT() -#define SPR_DEC 0x16 +#define SPR_DEC 22 #define SPR_DEC_IMPLEMENTED_BITS 56 #define SPR_DEC_LONGEST_TIME ((1ull << (SPR_DEC_IMPLEMENTED_BITS - 1)) - 1) -#define SPR_SRR0 0x2A -#define SPR_SRR1 0x2B +#define SPR_SRR0 26 +#define SPR_SRR1 27 -#define SPR_DAWR 0xB4 -#define SPR_CIABR 0xBB -#define SPR_DAWRX 0xBC -#define SPR_TB 0x10C +#define SPR_DAWR 180 +#define SPR_CIABR 187 +#define SPR_DAWRX 188 +#define SPR_TB 268 -#define SPR_PVR 0x11F +#define SPR_PVR 287 #define SPR_PVR_REV_MASK (PPC_BITMASK(52, 55) | PPC_BITMASK(60, 63)) #define SPR_PVR_REV(maj, min) (PPC_SHIFT((maj), 55) | PPC_SHIFT((min), 63)) -#define SPR_HSPRG0 0x130 -#define SPR_HSPRG1 0x131 +#define SPR_HSPRG0 304 +#define SPR_HSPRG1 305 -#define SPR_HDEC 0x136 -#define SPR_HRMOR 0x139 +#define SPR_HDEC 310 +#define SPR_HRMOR 313 -#define SPR_LPCR 0x13E +#define SPR_LPCR 318 #define SPR_LPCR_HVEE PPC_BIT(17) #define SPR_LPCR_LD PPC_BIT(46) #define SPR_LPCR_HDEE PPC_BIT(48) @@ -38,8 +38,8 @@ #define SPR_LPCR_HVICE PPC_BIT(62) #define SPR_LPCR_HDICE PPC_BIT(63) -#define SPR_HMER 0x150 -#define SPR_HMEER 0x151 +#define SPR_HMER 336 +#define SPR_HMEER 337 /* Bits in HMER/HMEER */ #define SPR_HMER_MALFUNCTION_ALERT PPC_BIT(0) #define SPR_HMER_PROC_RECV_DONE PPC_BIT(2) @@ -57,10 +57,10 @@ #define SPR_HMER_XSCOM_STATUS PPC_BITMASK(21, 23) #define SPR_HMER_XSCOM_OCCUPIED PPC_BIT(23) -#define SPR_PTCR 0x1D0 -#define SPR_PSSCR 0x357 -#define SPR_PMCR 0x374 -#define SPR_PIR 0x3FF +#define SPR_PTCR 464 +#define SPR_PSSCR 855 +#define SPR_PMCR 884 +#define SPR_PIR 1023 #ifndef __ASSEMBLER__ #include From c10acc8978f226493c2767748926700d1ec99330 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Sun, 24 Oct 2021 00:59:48 +0300 Subject: [PATCH 101/213] arch/ppc64: more advanced PPC_PLACE macro With compile-time checks. Change-Id: I81cd2d338ff6a89b635cf3e1a77dca302d4572cc Signed-off-by: Sergii Dmytruk --- src/arch/ppc64/include/arch/byteorder.h | 48 +++++++++++++++++++++++++ src/soc/ibm/power9/homer.c | 3 -- 2 files changed, 48 insertions(+), 3 deletions(-) diff --git a/src/arch/ppc64/include/arch/byteorder.h b/src/arch/ppc64/include/arch/byteorder.h index 8ff857675cf..a32336a44e7 100644 --- a/src/arch/ppc64/include/arch/byteorder.h +++ b/src/arch/ppc64/include/arch/byteorder.h @@ -11,8 +11,56 @@ #ifndef __ASSEMBLER__ #include + +/* + * The pos parameter specifies MSB/leftmost bit. Passing compile-time constants + * (literals or expressions) for parameters allows for the following + * compile-time checks (not all are performed, depends on which parameter values + * are known at compile-time): + * - pos is in range [0; 63] + * - len is in range [1; 64] + * - (pos + len) <= 64 + * - (val & ~len-based-mask) == 0 + */ +#define PPC_PLACE(val, pos, len) \ + /* Incorrect arguments detected in PPC_PLACE */ __builtin_choose_expr( \ + PPC_PLACE_GOOD_ARGS(val, pos, len), \ + PPC_PLACE_IMPL(val, pos, len), \ + (void)0) + +#define PPC_PLACE_GOOD_ARGS(val, pos, len) ( \ + /* pos value */ \ + __builtin_choose_expr( \ + __builtin_constant_p(pos), \ + ((pos) >= 0) && ((pos) <= 63), \ + 1) && \ + /* len value */ \ + __builtin_choose_expr( \ + __builtin_constant_p(len), \ + ((len) >= 1) && ((len) <= 64), \ + 1) && \ + /* range */ \ + __builtin_choose_expr( \ + __builtin_constant_p(pos) && __builtin_constant_p(len), \ + (pos) + (len) <= 64, \ + 1) && \ + /* value */ \ + __builtin_choose_expr( \ + __builtin_constant_p(val) && __builtin_constant_p(len), \ + ((val) & ~(((uint64_t)1 << (len)) - 1)) == 0, \ + 1) \ + ) + +#define PPC_PLACE_IMPL(val, pos, len) \ + PPC_SHIFT((val) & (((uint64_t)1 << (len)) - 1), ((pos) + ((len) - 1))) + #define PPC_SHIFT(val, lsb) (((uint64_t)(val)) << (63 - (lsb))) +/* Sanity checks and usage examples for PPC_PLACE */ +_Static_assert(PPC_PLACE(0x12345, 0, 20) == 0x1234500000000000, ""); +_Static_assert(PPC_PLACE(0x12345, 0, 24) == 0x0123450000000000, ""); +_Static_assert(PPC_PLACE(0x12345, 8, 24) == 0x0001234500000000, ""); + #else #define PPC_SHIFT(val, lsb) ((val) << (63 - (lsb))) #endif diff --git a/src/soc/ibm/power9/homer.c b/src/soc/ibm/power9/homer.c index 58cb1e82863..861d4ae14e2 100644 --- a/src/soc/ibm/power9/homer.c +++ b/src/soc/ibm/power9/homer.c @@ -39,9 +39,6 @@ #define QUAD_BIT_POS 24 -#define PPC_PLACE(val, pos, len) \ - PPC_SHIFT((val) & ((1 << ((len) + 1)) - 1), ((pos) + ((len) - 1))) - /* Subsections of STOP image that contain SCOM entries */ enum scom_section { STOP_SECTION_CORE_SCOM, From a1bae6c5b17112ed4bdc9d50d2e3429a65b9b06a Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Sun, 24 Oct 2021 01:18:01 +0300 Subject: [PATCH 102/213] soc/power9/: PPC_SHIFT w/o constants => PPC_PLACE These are invocations with literals for position and length except for several places with variables or expressions. Change-Id: I8207a33667edf59b9dd896de96c08962c32b81f3 Signed-off-by: Sergii Dmytruk --- src/soc/ibm/power9/ccs.c | 16 ++++++------ src/soc/ibm/power9/homer.c | 38 +++++++++++++-------------- src/soc/ibm/power9/i2c.c | 10 ++++---- src/soc/ibm/power9/istep_10_10.c | 16 ++++++------ src/soc/ibm/power9/istep_10_13.c | 20 +++++++-------- src/soc/ibm/power9/istep_13_11.c | 2 +- src/soc/ibm/power9/istep_13_13.c | 14 +++++----- src/soc/ibm/power9/istep_13_3.c | 5 ++-- src/soc/ibm/power9/istep_13_8.c | 44 ++++++++++++++++---------------- src/soc/ibm/power9/istep_13_9.c | 22 ++++++++-------- src/soc/ibm/power9/istep_14_3.c | 30 +++++++++++----------- src/soc/ibm/power9/istep_14_5.c | 18 ++++++------- src/soc/ibm/power9/istep_18_11.c | 14 +++++----- src/soc/ibm/power9/istep_18_12.c | 2 +- src/soc/ibm/power9/mcbist.c | 2 +- 15 files changed, 127 insertions(+), 126 deletions(-) diff --git a/src/soc/ibm/power9/ccs.c b/src/soc/ibm/power9/ccs.c index 247a11af7ec..332e4b43e2a 100644 --- a/src/soc/ibm/power9/ccs.c +++ b/src/soc/ibm/power9/ccs.c @@ -44,14 +44,14 @@ void ccs_add_instruction(chiplet_id_t id, mrs_cmd_t mrs, uint8_t csn, * [23] A14 */ uint64_t mrs64 = (reverse_bits(mrs) & PPC_BITMASK(0, 13)) | /* A0-A13 */ - PPC_SHIFT(mrs & (1<<14), 23 + 14) | /* A14 */ - PPC_SHIFT(mrs & (1<<15), 22 + 15) | /* A15 */ - PPC_SHIFT(mrs & (1<<16), 21 + 16) | /* A16 */ - PPC_SHIFT(mrs & (1<<17), 14 + 17) | /* A17 */ - PPC_SHIFT(mrs & (1<<20), 17 + 20) | /* BA0 */ - PPC_SHIFT(mrs & (1<<21), 18 + 21) | /* BA1 */ - PPC_SHIFT(mrs & (1<<22), 19 + 22) | /* BG0 */ - PPC_SHIFT(mrs & (1<<23), 15 + 23); /* BA1 */ + PPC_PLACE(mrs >> 14, 23, 1) | /* A14 */ + PPC_PLACE(mrs >> 15, 22, 1) | /* A15 */ + PPC_PLACE(mrs >> 16, 21, 1) | /* A16 */ + PPC_PLACE(mrs >> 17, 14, 1) | /* A17 */ + PPC_PLACE(mrs >> 20, 17, 1) | /* BA0 */ + PPC_PLACE(mrs >> 21, 18, 1) | /* BA1 */ + PPC_PLACE(mrs >> 22, 19, 1) | /* BG0 */ + PPC_PLACE(mrs >> 23, 15, 1); /* BA1 */ /* MC01.MCBIST.CCS.CCS_INST_ARR0_n [all] 0 diff --git a/src/soc/ibm/power9/homer.c b/src/soc/ibm/power9/homer.c index 861d4ae14e2..1aa0761b290 100644 --- a/src/soc/ibm/power9/homer.c +++ b/src/soc/ibm/power9/homer.c @@ -785,7 +785,7 @@ static void pba_reset(void) */ for (int sl = 0; sl < 3; sl++) { // Fourth is owned by SBE, do not reset time = wait_us(16, - (write_scom(0x00068001, PPC_BIT(0) | PPC_SHIFT(sl, 2)), + (write_scom(0x00068001, PPC_BIT(0) | PPC_PLACE(sl, 1, 2)), (read_scom(0x00068001) & PPC_BIT(4 + sl)) == 0)); if (!time || read_scom(0x00068001) & PPC_BIT(8 + sl)) @@ -867,7 +867,7 @@ static void stop_gpe_init(struct homer_st *homer) */ uint32_t ivpr = 0x80000000 + homer->qpmr.sgpe.header.l1_offset + offsetof(struct homer_st, qpmr); - write_scom(0x00066001, PPC_SHIFT(ivpr, 31)); + write_scom(0x00066001, PPC_PLACE(ivpr, 0, 32)); /* Program XCR to ACTIVATE SGPE TP.TPCHIP.OCC.OCI.GPE3.GPENXIXCR // 0x00066010 @@ -880,9 +880,9 @@ static void stop_gpe_init(struct homer_st *homer) [all] 0 [1-3] PPE_XIXCR_XCR = 2 // resume */ - write_scom(0x00066010, PPC_SHIFT(6, 3)); - write_scom(0x00066010, PPC_SHIFT(4, 3)); - write_scom(0x00066010, PPC_SHIFT(2, 3)); + write_scom(0x00066010, PPC_PLACE(6, 1, 3)); + write_scom(0x00066010, PPC_PLACE(4, 1, 3)); + write_scom(0x00066010, PPC_PLACE(2, 1, 3)); /* * Now wait for SGPE to not be halted and for the HCode to indicate to be @@ -946,7 +946,7 @@ static void psu_command(uint8_t flags, long time) /* https://github.com/open-power/hostboot/blob/master/src/include/usr/sbeio/sbe_psudd.H#L418 */ /* TP.TPCHIP.PIB.PSU.PSU_HOST_SBE_MBOX0_REG */ /* REQUIRE_RESPONSE, CLASS_CORE_STATE, CMD_CONTROL_DEADMAN_LOOP, flags */ - write_scom(0x000D0050, 0x000001000000D101 | PPC_SHIFT(flags, 31)); + write_scom(0x000D0050, 0x000001000000D101 | PPC_PLACE(flags, 24, 8)); /* TP.TPCHIP.PIB.PSU.PSU_HOST_SBE_MBOX0_REG */ write_scom(0x000D0051, time); @@ -1228,7 +1228,7 @@ static void pm_corequad_init(uint64_t cores) /* Restore Quad PPM Error Mask */ err_mask = 0xFFFFFF00; // from Hostboot's log write_scom_for_chiplet(quad_chiplet, EQ_QPPM_ERRMSK, - PPC_SHIFT(err_mask, 31)); + PPC_PLACE(err_mask, 0, 32)); for (int core = quad * 4; core < (quad + 1) * 4; ++core) { chiplet_id_t core_chiplet = EC00_CHIPLET_ID + core; @@ -1285,7 +1285,7 @@ static void pm_corequad_init(uint64_t cores) /* Restore CORE PPM Error Mask */ err_mask = 0xFFF00000; // from Hostboot's log write_scom_for_chiplet(core_chiplet, C_CPPM_ERRMSK, - PPC_SHIFT(err_mask, 31)); + PPC_PLACE(err_mask, 0, 32)); } } } @@ -1371,7 +1371,7 @@ static void pstate_gpe_init(struct homer_st *homer, uint64_t cores) scom_and_or_for_chiplet(EP00_CHIPLET_ID + quad, EQ_QPPM_QPMMR, ~PPC_BITMASK(1, 11), - PPC_SHIFT(safe_mode_freq, 11)); + PPC_PLACE(safe_mode_freq, 1, 11)); } } @@ -1461,7 +1461,7 @@ static void pm_pss_init(void) */ scom_and_or(PU_SPIPSS_ADC_CTRL_REG0, ~PPC_BITMASK(0, 5) & ~PPC_BITMASK(12, 17), - PPC_SHIFT(0x20, 5)); + PPC_PLACE(0x20, 0, 6)); /* * 0 adc_fsm_enable = 1 @@ -1474,7 +1474,7 @@ static void pm_pss_init(void) * Truncating last value to 4 bits gives 0. */ scom_and_or(PU_SPIPSS_ADC_CTRL_REG0 + 1, ~PPC_BITMASK(0, 17), - PPC_BIT(0) | PPC_SHIFT(10, 13) | PPC_SHIFT(0, 17)); + PPC_BIT(0) | PPC_PLACE(10, 4, 10) | PPC_PLACE(0, 14, 4)); /* * 0-16 inter frame delay @@ -1489,7 +1489,7 @@ static void pm_pss_init(void) */ scom_and_or(PU_SPIPSS_P2S_CTRL_REG0, ~PPC_BITMASK(0, 5) & ~PPC_BITMASK(12, 17), - PPC_SHIFT(0x20, 5)); + PPC_PLACE(0x20, 0, 6)); /* * 0 p2s_fsm_enable = 1 @@ -1501,7 +1501,7 @@ static void pm_pss_init(void) */ scom_and_or(PU_SPIPSS_P2S_CTRL_REG0 + 1, ~(PPC_BITMASK(0, 13) | PPC_BIT(17)), - PPC_BIT(0) | PPC_SHIFT(10, 13) | PPC_BIT(17)); + PPC_BIT(0) | PPC_PLACE(10, 4, 10) | PPC_BIT(17)); /* * 0-16 inter frame delay @@ -1515,7 +1515,7 @@ static void pm_pss_init(void) */ scom_and_or(PU_SPIPSS_100NS_REG, PPC_BITMASK(0, 31), - PPC_SHIFT(powerbus_cfg()->fabric_freq / 40, 31)); + PPC_PLACE(powerbus_cfg()->fabric_freq / 40, 0, 32)); } /* Initializes power-management and starts OCC */ @@ -2447,7 +2447,7 @@ uint64_t build_homer_image(void *homer_bar) [4-7] PPM_PFDLY_POWUP_DLY = 0x9 */ write_scom_for_chiplet(EC00_CHIPLET_ID + i, 0x200F011B, - PPC_SHIFT(0x9, 3) | PPC_SHIFT(0x9, 7)); + PPC_PLACE(0x9, 0, 4) | PPC_PLACE(0x9, 4, 4)); /* TP.TPCHIP.NET.PCBSLEC14.PPMC.PPM_COMMON_REGS.PPM_PFOF // 0x200F011D [all] 0 @@ -2455,7 +2455,7 @@ uint64_t build_homer_image(void *homer_bar) [4-7] PPM_PFOFF_VCS_VOFF_SEL = 0x8 */ write_scom_for_chiplet(EC00_CHIPLET_ID + i, 0x200F011D, - PPC_SHIFT(0x8, 3) | PPC_SHIFT(0x8, 7)); + PPC_PLACE(0x8, 0, 4) | PPC_PLACE(0x8, 4, 4)); } if ((i % 4) == 0 && IS_EQ_FUNCTIONAL(i/4, cores)) { @@ -2466,7 +2466,7 @@ uint64_t build_homer_image(void *homer_bar) [4-7] PPM_PFDLY_POWUP_DLY = 0x9 */ write_scom_for_chiplet(EP00_CHIPLET_ID + i/4, 0x100F011B, - PPC_SHIFT(0x9, 3) | PPC_SHIFT(0x9, 7)); + PPC_PLACE(0x9, 0, 4) | PPC_PLACE(0x9, 4, 4)); /* TP.TPCHIP.NET.PCBSLEP03.PPMQ.PPM_COMMON_REGS.PPM_PFOF // 0x100F011D [all] 0 @@ -2474,7 +2474,7 @@ uint64_t build_homer_image(void *homer_bar) [4-7] PPM_PFOFF_VCS_VOFF_SEL = 0x8 */ write_scom_for_chiplet(EP00_CHIPLET_ID + i/4, 0x100F011D, - PPC_SHIFT(0x8, 3) | PPC_SHIFT(0x8, 7)); + PPC_PLACE(0x8, 0, 4) | PPC_PLACE(0x8, 4, 4)); } } @@ -2512,7 +2512,7 @@ uint64_t build_homer_image(void *homer_bar) [0-3] GPETSEL_FIT_SEL = 0x1 // FIT - fixed interval timer [4-7] GPETSEL_WATCHDOG_SEL = 0xA */ - write_scom(0x00066000, PPC_SHIFT(0x1, 3) | PPC_SHIFT(0xA, 7)); + write_scom(0x00066000, PPC_PLACE(0x1, 0, 4) | PPC_PLACE(0xA, 4, 4)); /* Clear error injection bits *0x0006C18B // Undocumented, PU_OCB_OCI_OCCFLG2_CLEAR diff --git a/src/soc/ibm/power9/i2c.c b/src/soc/ibm/power9/i2c.c index 86477fb2b32..ebbd8f63875 100644 --- a/src/soc/ibm/power9/i2c.c +++ b/src/soc/ibm/power9/i2c.c @@ -15,8 +15,8 @@ #define RES_ERR_REG(bus) (0xA000C | ((bus) << 12)) // CMD register -#define LEN_SHIFT(x) PPC_SHIFT((x), 31) -#define ADDR_SHIFT(x) PPC_SHIFT((x), 14) +#define LEN_PLACE(x) PPC_PLACE((x), 16, 16) +#define ADDR_PLACE(x) PPC_PLACE((x), 8, 7) #define READ_NOT_WRITE 0x0001000000000000 #define START 0x8000000000000000 #define WITH_ADDR 0x4000000000000000 @@ -143,11 +143,11 @@ int platform_i2c_transfer(unsigned int bus, struct i2c_msg *segment, port = segment[i].slave & 0x80 ? 1 : 0; write_scom(MODE_REG(bus), - PPC_SHIFT(bit_rate_div, 15) | PPC_SHIFT(port, 21)); + PPC_PLACE(bit_rate_div, 0, 16) | PPC_PLACE(port, 16, 6)); write_scom(RES_ERR_REG(bus), CLEAR_ERR); write_scom(CMD_REG(bus), START | stop | WITH_ADDR | read_not_write | read_cont | - ADDR_SHIFT(segment[i].slave & 0x7F) | - LEN_SHIFT(segment[i].len)); + ADDR_PLACE(segment[i].slave) | + LEN_PLACE(segment[i].len)); for (len = 0; len < segment[i].len; len++) { r = read_scom(STATUS_REG(bus)); diff --git a/src/soc/ibm/power9/istep_10_10.c b/src/soc/ibm/power9/istep_10_10.c index 5b408ece84d..90894708abf 100644 --- a/src/soc/ibm/power9/istep_10_10.c +++ b/src/soc/ibm/power9/istep_10_10.c @@ -270,21 +270,21 @@ static void determine_lane_configs(uint8_t *phb_active_mask, } static uint64_t pec_val(int pec_id, uint8_t in, - uint32_t pec0_s, uint32_t pec0_c, - uint32_t pec1_s, uint32_t pec1_c, - uint32_t pec2_s, uint32_t pec2_c) + int pec0_s, int pec0_c, + int pec1_s, int pec1_c, + int pec2_s, int pec2_c) { uint64_t out = 0; switch (pec_id) { case 0: - out = PPC_SHIFT(in & ((1 << pec0_c) - 1), pec0_s + pec0_c - 1); + out = PPC_PLACE(in, pec0_s, pec0_c); break; case 1: - out = PPC_SHIFT(in & ((1 << pec1_c) - 1), pec1_s + pec1_c - 1); + out = PPC_PLACE(in, pec1_s, pec1_c); break; case 2: - out = PPC_SHIFT(in & ((1 << pec2_c) - 1), pec2_s + pec2_c - 1); + out = PPC_PLACE(in, pec2_s, pec2_c); break; default: die("Unknown PEC ID: %d\n", pec_id); @@ -465,7 +465,7 @@ static void phase1(const struct lane_config_row **pec_cfgs, /* RX INITGAIN */ scom_and_or_for_chiplet(chiplet, RX_VGA_CTRL3_REGISTER[lane], ~PPC_BITMASK(48, 52), - PPC_SHIFT(pcs_init_gain, 52)); + PPC_PLACE(pcs_init_gain, 48, 5)); /* RX PKINIT */ scom_and_or_for_chiplet(chiplet, RX_LOFF_CNTL_REGISTER[lane], @@ -522,7 +522,7 @@ static void phase1(const struct lane_config_row **pec_cfgs, /* ATTR_PROC_PCIE_PCS_TX_POWER_SEQ_ENABLE = 0xFF, but field is 7 bits */ scom_and_or_for_chiplet(chiplet, PEC_PCS_TX_POWER_SEQ_ENABLE_REG, ~PPC_BITMASK(56, 62), - PPC_SHIFT(0x7F, 62)); + PPC_PLACE(0x7F, 56, 7)); /* Phase1 init step 20 (RX VGA Control Register 1) */ diff --git a/src/soc/ibm/power9/istep_10_13.c b/src/soc/ibm/power9/istep_10_13.c index b05b6ddef24..229c1dbdf0d 100644 --- a/src/soc/ibm/power9/istep_10_13.c +++ b/src/soc/ibm/power9/istep_10_13.c @@ -36,9 +36,9 @@ void istep_10_13(void) */ scom_and_or_for_chiplet(N0_CHIPLET_ID, 0x020110E1, ~(PPC_BITMASK(0, 1) | PPC_BITMASK(7, 63)), - PPC_SHIFT(1, 1) | PPC_SHIFT(2, 8) | PPC_SHIFT(1, 11) - | PPC_SHIFT(0x32, 23) | PPC_SHIFT(0x32, 35) - | PPC_SHIFT(0x32, 47) | PPC_SHIFT(0x32, 59)); + PPC_PLACE(1, 0, 2) | PPC_PLACE(2, 7, 2) | PPC_PLACE(1, 9, 3) + | PPC_PLACE(0x32, 12, 12) | PPC_PLACE(0x32, 24, 12) + | PPC_PLACE(0x32, 36, 12) | PPC_PLACE(0x32, 48, 12)); /* PU_NX_RNG_ST1 [0-6] ADAPTEST_SOFT_FAIL_TH = 2 @@ -46,8 +46,8 @@ void istep_10_13(void) [23-38] ADAPTEST_1BIT_MATCH_TH_MAX = 415 */ scom_and_or_for_chiplet(N0_CHIPLET_ID, 0x020110E2, ~PPC_BITMASK(0, 38), - PPC_SHIFT(2, 6) | PPC_SHIFT(100, 22) - | PPC_SHIFT(415, 38)); + PPC_PLACE(2, 0, 7) | PPC_PLACE(100, 7, 16) + | PPC_PLACE(415, 23, 16)); /* PU_NX_RNG_ST3 [0] SAMPTEST_RRN_ENABLE = 1 @@ -56,15 +56,15 @@ void istep_10_13(void) [20-35] SAMPTEST_MATCH_TH_MAX = 0x988A (39,050) */ scom_and_or_for_chiplet(N0_CHIPLET_ID, 0x020110E8, ~PPC_BITMASK(0, 35), - PPC_BIT(0) | PPC_SHIFT(7, 3) | PPC_SHIFT(0x6D60, 19) - | PPC_SHIFT(0x988A, 35)); + PPC_BIT(0) | PPC_PLACE(7, 1, 3) | PPC_PLACE(0x6D60, 4, 16) + | PPC_PLACE(0x988A, 20, 16)); /* PU_NX_RNG_RDELAY [6] LFSR_RESEED_EN = 1 [7-11] READ_RTY_RATIO = 0x1D (1/16) */ scom_and_or_for_chiplet(N0_CHIPLET_ID, 0x020110E5, ~PPC_BITMASK(6, 11), - PPC_BIT(6) | PPC_SHIFT(0x1D, 11)); + PPC_BIT(6) | PPC_PLACE(0x1D, 7, 5)); /* PU_NX_RNG_CFG [30-37] ST2_RESET_PERIOD = 0x1B @@ -79,8 +79,8 @@ void istep_10_13(void) scom_and_or_for_chiplet(N0_CHIPLET_ID, 0x020110E0, ~(PPC_BITMASK(30, 37) | PPC_BITMASK(39, 43) | PPC_BITMASK(46, 61) | PPC_BIT(63)), - PPC_SHIFT(0x1B, 37) | PPC_BIT(40) | PPC_BIT(41) - | PPC_BIT(42) | PPC_BIT(43) | PPC_SHIFT(0x07D0, 61) + PPC_PLACE(0x1B, 30, 8) | PPC_BIT(40) | PPC_BIT(41) + | PPC_BIT(42) | PPC_BIT(43) | PPC_PLACE(0x07D0, 46, 16) | PPC_BIT(63)); printk(BIOS_EMERG, "ending istep 10.13\n"); diff --git a/src/soc/ibm/power9/istep_13_11.c b/src/soc/ibm/power9/istep_13_11.c index 2558c14185a..9a2b4ab5941 100644 --- a/src/soc/ibm/power9/istep_13_11.c +++ b/src/soc/ibm/power9/istep_13_11.c @@ -658,7 +658,7 @@ static void read_ctr_pre(int mcs_i, int mca_i, int rp, [48-63] VREF_CAL_EN = 0xffff // We already did this in reset_rd_vref() in 13.8 */ dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_RD_VREF_CAL_EN_P0_0, 0, - PPC_SHIFT(0xFFFF, 63)); + PPC_PLACE(0xFFFF, 48, 16)); } /* This also was part of main diff --git a/src/soc/ibm/power9/istep_13_13.c b/src/soc/ibm/power9/istep_13_13.c index 266e59870ad..aba4db5e6a3 100644 --- a/src/soc/ibm/power9/istep_13_13.c +++ b/src/soc/ibm/power9/istep_13_13.c @@ -287,17 +287,17 @@ enum mc_rank_config { }; #define MCP0XLT0(D, M0, M1, R17, R16, R15) \ -(PPC_SHIFT((D), 39) | PPC_SHIFT((M0), 43) | PPC_SHIFT((M1), 51) | \ - PPC_SHIFT((R17), 55) | PPC_SHIFT((R16), 59) | PPC_SHIFT((R15), 63)) +(PPC_PLACE((D), 35, 5) | PPC_PLACE((M0), 41, 3) | PPC_PLACE((M1), 47, 5) | \ + PPC_PLACE((R17), 53, 3) | PPC_PLACE((R16), 57, 3) | PPC_PLACE((R15), 61, 3)) #define MCP0XLT1(S0, S1, S2, COL4, COL5, COL6, COL7) \ -(PPC_SHIFT((S0), 7) | PPC_SHIFT((S1), 15) | PPC_SHIFT((S2), 23) | \ - PPC_SHIFT((COL4), 39) | PPC_SHIFT((COL5), 47) | PPC_SHIFT((COL6), 55) | \ - PPC_SHIFT((COL7), 63)) +(PPC_PLACE((S0), 3, 5) | PPC_PLACE((S1), 11, 5) | PPC_PLACE((S2), 19, 5) | \ + PPC_PLACE((COL4), 35, 5) | PPC_PLACE((COL5), 43, 5) | PPC_PLACE((COL6), 51, 5) | \ + PPC_PLACE((COL7), 59, 5)) #define MCP0XLT2(COL8, COL9, BA0, BA1, BG0, BG1) \ -(PPC_SHIFT((COL8), 7) | PPC_SHIFT((COL9), 15) | PPC_SHIFT((BA0), 23) | \ - PPC_SHIFT((BA1), 31) | PPC_SHIFT((BG0), 47) | PPC_SHIFT((BG1), 55)) +(PPC_PLACE((COL8), 3, 5) | PPC_PLACE((COL9), 11, 5) | PPC_PLACE((BA0), 19, 5) | \ + PPC_PLACE((BA1), 27, 5) | PPC_PLACE((BG0), 43, 5) | PPC_PLACE((BG1), 51, 5)) /* * xlt_tables[rank_configuration][reg_index] diff --git a/src/soc/ibm/power9/istep_13_3.c b/src/soc/ibm/power9/istep_13_3.c index f8ca15a0fb8..b4d2536d324 100644 --- a/src/soc/ibm/power9/istep_13_3.c +++ b/src/soc/ibm/power9/istep_13_3.c @@ -90,8 +90,9 @@ void istep_13_3(void) // TP.TPCHIP.PIB.PSU.PSU_HOST_SBE_MBOX0_REG /* TARGET_TYPE_PERV, chiplet ID = 0x07, ring ID, RING_MODE_SET_PULSE_NSL */ - write_scom(PSU_HOST_SBE_MBOX1_REG, 0x0002000000000004 | PPC_SHIFT(ring_id, 47) | - PPC_SHIFT(mcs_ids[mcs_i], 31)); + write_scom(PSU_HOST_SBE_MBOX1_REG, 0x0002000000000004 | + PPC_PLACE(ring_id, 32, 16) | + PPC_PLACE(mcs_ids[mcs_i], 24, 8)); // Ring the host->SBE doorbell // TP.TPCHIP.PIB.PSU.PSU_SBE_DOORBELL_REG_OR diff --git a/src/soc/ibm/power9/istep_13_8.c b/src/soc/ibm/power9/istep_13_8.c index 23361273162..a6a1f697b21 100644 --- a/src/soc/ibm/power9/istep_13_8.c +++ b/src/soc/ibm/power9/istep_13_8.c @@ -49,7 +49,7 @@ static void p9n_mca_scom(int mcs_i, int mca_i) [22-27] = 0x20 // AMO_LIMIT */ scom_and_or_for_chiplet(nest, 0x05010823 + mca_i * mca_mul, - ~PPC_BITMASK(22, 27), PPC_SHIFT(0x20, 27)); + ~PPC_BITMASK(22, 27), PPC_PLACE(0x20, 22, 6)); /* P9N2_MCS_PORT02_MCPERF2 (?) [0-2] = 1 // PF_DROP_VALUE0 @@ -86,10 +86,10 @@ static void p9n_mca_scom(int mcs_i, int mca_i) ~(PPC_BITMASK(0, 11) | PPC_BITMASK(13, 18) | PPC_BITMASK(28, 31) | PPC_BITMASK(28, 31) | PPC_BITMASK(50, 54) | PPC_BIT(61)), /* or */ - PPC_SHIFT(1, 2) | PPC_SHIFT(3, 5) | PPC_SHIFT(5, 8) - | PPC_SHIFT(7, 11) /* PF_DROP_VALUEs */ - | PPC_SHIFT(ref_blk_cfg, 15) | PPC_SHIFT(en_ref_blk, 17) - | PPC_SHIFT(0x4, 31) | PPC_SHIFT(0x1C, 54)); + PPC_PLACE(1, 0, 3) | PPC_PLACE(3, 3, 3) | PPC_PLACE(5, 6, 3) + | PPC_PLACE(7, 9, 3) /* PF_DROP_VALUEs */ + | PPC_PLACE(ref_blk_cfg, 13, 3) | PPC_PLACE(en_ref_blk, 16, 2) + | PPC_PLACE(0x4, 28, 4) | PPC_PLACE(0x1C, 50, 5)); /* P9N2_MCS_PORT02_MCAMOC (?) [1] = 0 // FORCE_PF_DROP0 @@ -98,7 +98,7 @@ static void p9n_mca_scom(int mcs_i, int mca_i) */ scom_and_or_for_chiplet(nest, 0x05010825 + mca_i * mca_mul, ~(PPC_BIT(1) | PPC_BITMASK(4, 31)), - PPC_SHIFT(0x19FFFFF, 28) | PPC_SHIFT(1, 31)); + PPC_PLACE(0x19FFFFF, 4, 25) | PPC_PLACE(1, 29, 3)); /* P9N2_MCS_PORT02_MCEPSQ (?) [0-7] = 1 // JITTER_EPSILON @@ -112,12 +112,12 @@ static void p9n_mca_scom(int mcs_i, int mca_i) */ #define F(X) (((X) + 6) / 4) scom_and_or_for_chiplet(nest, 0x05010826 + mca_i * mca_mul, ~PPC_BITMASK(0, 47), - PPC_SHIFT(1, 7) - | PPC_SHIFT(F(pb_cfg->eps_r[0]), 15) - | PPC_SHIFT(F(pb_cfg->eps_r[1]), 23) - | PPC_SHIFT(F(pb_cfg->eps_r[1]), 31) - | PPC_SHIFT(F(pb_cfg->eps_r[2]), 39) - | PPC_SHIFT(F(pb_cfg->eps_r[2]), 47)); + PPC_PLACE(1, 0, 8) + | PPC_PLACE(F(pb_cfg->eps_r[0]), 8, 8) + | PPC_PLACE(F(pb_cfg->eps_r[1]), 16, 8) + | PPC_PLACE(F(pb_cfg->eps_r[1]), 24, 8) + | PPC_PLACE(F(pb_cfg->eps_r[2]), 32, 8) + | PPC_PLACE(F(pb_cfg->eps_r[2]), 40, 8)); #undef F //~ static const uint32_t EPSILON_R_T0_LE[] = { 7, 7, 8, 8, 10, 22 }; // T0, T1 //~ static const uint32_t EPSILON_R_T2_LE[] = { 67, 69, 71, 74, 79, 103 }; // T2 @@ -130,8 +130,8 @@ static void p9n_mca_scom(int mcs_i, int mca_i) [24-33] = 64 // BUSY_COUNTER_THRESHOLD2 */ scom_and_or_for_chiplet(nest, 0x05010827 + mca_i * mca_mul, ~PPC_BITMASK(0, 33), - PPC_BIT(0) | PPC_SHIFT(1, 3) | PPC_SHIFT(38, 13) - | PPC_SHIFT(51, 23) | PPC_SHIFT(64, 33)); + PPC_BIT(0) | PPC_PLACE(1, 1, 3) | PPC_PLACE(38, 4, 10) + | PPC_PLACE(51, 14, 10) | PPC_PLACE(64, 24, 10)); /* P9N2_MCS_PORT02_MCPERF3 (?) [31] = 1 // ENABLE_CL0 @@ -663,7 +663,7 @@ static void p9n_ddrphy_scom(int mcs_i, int mca_i) [48-63] = 0xffff */ dp_mca_and_or(id, dp, mca_i, DDRPHY_ADR_BIT_ENABLE_P0_ADR0, - ~PPC_BITMASK(48, 63), PPC_SHIFT(0xFFFF, 63)); + ~PPC_BITMASK(48, 63), PPC_PLACE(0xFFFF, 48, 16)); } /* IOM0.DDRPHY_ADR_DIFFPAIR_ENABLE_P0_ADR1 = @@ -672,7 +672,7 @@ static void p9n_ddrphy_scom(int mcs_i, int mca_i) [51] DI_ADR6_ADR7: 1 = Lanes 6 and 7 are a differential clock pair */ mca_and_or(id, mca_i, DDRPHY_ADR_DIFFPAIR_ENABLE_P0_ADR1, - ~PPC_BITMASK(48, 63), PPC_SHIFT(0x5000, 63)); + ~PPC_BITMASK(48, 63), PPC_PLACE(0x5000, 48, 16)); /* IOM0.DDRPHY_ADR_DELAY1_P0_ADR1 = [48-63] = 0x4040: @@ -680,7 +680,7 @@ static void p9n_ddrphy_scom(int mcs_i, int mca_i) [57-63] ADR_DELAY3 = 0x40 */ mca_and_or(id, mca_i, DDRPHY_ADR_DELAY1_P0_ADR1, - ~PPC_BITMASK(48, 63), PPC_SHIFT(0x4040, 63)); + ~PPC_BITMASK(48, 63), PPC_PLACE(0x4040, 48, 16)); /* IOM0.DDRPHY_ADR_DELAY3_P0_ADR1 = [48-63] = 0x4040: @@ -688,7 +688,7 @@ static void p9n_ddrphy_scom(int mcs_i, int mca_i) [57-63] ADR_DELAY7 = 0x40 */ mca_and_or(id, mca_i, DDRPHY_ADR_DELAY3_P0_ADR1, - ~PPC_BITMASK(48, 63), PPC_SHIFT(0x4040, 63)); + ~PPC_BITMASK(48, 63), PPC_PLACE(0x4040, 48, 16)); for (dp = 0; dp < 2; dp ++) { /* IOM0.DDRPHY_ADR_DLL_VREG_CONFIG_1_P0_ADR32S{0,1} = @@ -852,7 +852,7 @@ static void set_rank_pairs(int mcs_i, int mca_i) [63] RANK_PAIR1_SEC_V = 1: if (rank_count0 == 4) */ mca_and_or(id, mca_i, DDRPHY_PC_RANK_PAIR0_P0, ~PPC_BITMASK(48, 63), - PPC_SHIFT(0x1537 & F[mca->dimm[0].mranks], 63)); + PPC_PLACE(0x1537 & F[mca->dimm[0].mranks], 48, 16)); /* IOM0.DDRPHY_PC_RANK_PAIR1_P0 = [48-63] = 0x1537 & F[rank_count1]: // F = {0, 0xf000, 0xf0f0, 0xfff0, 0xffff} @@ -866,7 +866,7 @@ static void set_rank_pairs(int mcs_i, int mca_i) [63] RANK_PAIR3_SEC_V = 1: if (rank_count1 == 4) */ mca_and_or(id, mca_i, DDRPHY_PC_RANK_PAIR1_P0, ~PPC_BITMASK(48, 63), - PPC_SHIFT(0x1537 & F[mca->dimm[1].mranks], 63)); + PPC_PLACE(0x1537 & F[mca->dimm[1].mranks], 48, 16)); /* IOM0.DDRPHY_PC_RANK_PAIR2_P0 = [48-63] = 0 @@ -886,7 +886,7 @@ static void set_rank_pairs(int mcs_i, int mca_i) [51] CS3_INIT_CAL_VALUE = 1 */ mca_and_or(id, mca_i, DDRPHY_PC_CSID_CFG_P0, ~PPC_BITMASK(48, 63), - PPC_SHIFT(0xF000, 63)); + PPC_PLACE(0xF000, 48, 16)); /* IOM0.DDRPHY_PC_MIRROR_CONFIG_P0 = [all] = 0 @@ -1292,7 +1292,7 @@ static void rc_reset(int mcs_i, int mca_i) uint64_t wait_time = mem_data.speed == 1866 ? 0x0804 : mem_data.speed == 2133 ? 0x092A : mem_data.speed == 2400 ? 0x0A50 : 0x0B74; - mca_and_or(id, mca_i, DDRPHY_RC_RDVREF_CONFIG0_P0, 0, PPC_SHIFT(wait_time, 63)); + mca_and_or(id, mca_i, DDRPHY_RC_RDVREF_CONFIG0_P0, 0, PPC_PLACE(wait_time, 48, 16)); /* IOM0.DDRPHY_RC_RDVREF_CONFIG1_P0 = [all] 0 diff --git a/src/soc/ibm/power9/istep_13_9.c b/src/soc/ibm/power9/istep_13_9.c index dfce64bb82b..f8ba003b335 100644 --- a/src/soc/ibm/power9/istep_13_9.c +++ b/src/soc/ibm/power9/istep_13_9.c @@ -308,7 +308,7 @@ static void fir_unmask(int mcs_i) */ #define TEST_VREF(dp, scom) \ if ((dp_mca_read(mcs_ids[mcs_i], dp, mca_i, scom) & PPC_BITMASK(56, 62)) == \ - PPC_SHIFT(1,62)) { \ + PPC_PLACE(1, 56, 7)) { \ need_dll_workaround = true; \ break; \ } @@ -628,22 +628,22 @@ void istep_13_9(void) /* Has the same stride as DP16 */ dp_mca_and_or(mcs_ids[mcs_i], 0, mca_i, ADR_SYSCLK_CNTRL_PR_P0_ADR32S0, - ~PPC_BITMASK(48, 63), PPC_SHIFT(0x8024, 63)); + ~PPC_BITMASK(48, 63), PPC_PLACE(0x8024, 48, 16)); dp_mca_and_or(mcs_ids[mcs_i], 1, mca_i, ADR_SYSCLK_CNTRL_PR_P0_ADR32S0, - ~PPC_BITMASK(48, 63), PPC_SHIFT(0x8024, 63)); + ~PPC_BITMASK(48, 63), PPC_PLACE(0x8024, 48, 16)); for (dp = 0; dp < 4; dp++) { dp_mca_and_or(mcs_ids[mcs_i], dp, mca_i, DDRPHY_DP16_SYSCLK_PR0_P0_0, - ~PPC_BITMASK(48, 63), PPC_SHIFT(0x8024, 63)); + ~PPC_BITMASK(48, 63), PPC_PLACE(0x8024, 48, 16)); dp_mca_and_or(mcs_ids[mcs_i], dp, mca_i, DDRPHY_DP16_SYSCLK_PR1_P0_0, - ~PPC_BITMASK(48, 63), PPC_SHIFT(0x8024, 63)); + ~PPC_BITMASK(48, 63), PPC_PLACE(0x8024, 48, 16)); } dp_mca_and_or(mcs_ids[mcs_i], 4, mca_i, DDRPHY_DP16_SYSCLK_PR0_P0_0, - ~PPC_BITMASK(48, 63), PPC_SHIFT(0x8024, 63)); + ~PPC_BITMASK(48, 63), PPC_PLACE(0x8024, 48, 16)); } /* @@ -727,22 +727,22 @@ void istep_13_9(void) /* Has the same stride as DP16 */ dp_mca_and_or(mcs_ids[mcs_i], 0, mca_i, ADR_SYSCLK_CNTRL_PR_P0_ADR32S0, - ~PPC_BITMASK(48, 63), PPC_SHIFT(0x8020, 63)); + ~PPC_BITMASK(48, 63), PPC_PLACE(0x8020, 48, 16)); dp_mca_and_or(mcs_ids[mcs_i], 1, mca_i, ADR_SYSCLK_CNTRL_PR_P0_ADR32S0, - ~PPC_BITMASK(48, 63), PPC_SHIFT(0x8020, 63)); + ~PPC_BITMASK(48, 63), PPC_PLACE(0x8020, 48, 16)); for (dp = 0; dp < 4; dp++) { dp_mca_and_or(mcs_ids[mcs_i], dp, mca_i, DDRPHY_DP16_SYSCLK_PR0_P0_0, - ~PPC_BITMASK(48, 63), PPC_SHIFT(0x8020, 63)); + ~PPC_BITMASK(48, 63), PPC_PLACE(0x8020, 48, 16)); dp_mca_and_or(mcs_ids[mcs_i], dp, mca_i, DDRPHY_DP16_SYSCLK_PR1_P0_0, - ~PPC_BITMASK(48, 63), PPC_SHIFT(0x8020, 63)); + ~PPC_BITMASK(48, 63), PPC_PLACE(0x8020, 48, 16)); } dp_mca_and_or(mcs_ids[mcs_i], 4, mca_i, DDRPHY_DP16_SYSCLK_PR0_P0_0, - ~PPC_BITMASK(48, 63), PPC_SHIFT(0x8020, 63)); + ~PPC_BITMASK(48, 63), PPC_PLACE(0x8020, 48, 16)); } /* Wait at least 32 dphy_nclk clock cycles */ diff --git a/src/soc/ibm/power9/istep_14_3.c b/src/soc/ibm/power9/istep_14_3.c index 93ab8efe1ca..f7059cc78d4 100644 --- a/src/soc/ibm/power9/istep_14_3.c +++ b/src/soc/ibm/power9/istep_14_3.c @@ -57,7 +57,7 @@ static void init_pecs(const uint8_t *iovalid_enable) */ scom_and_or_for_chiplet(N2_CHIPLET_ID, pec_addr(pec, P9N2_PEC_ADDREXTMASK_REG), ~PPC_BITMASK(0, 6), - PPC_SHIFT(0, 6)); + PPC_PLACE(0, 0, 7)); /* * Phase2 init step 1 @@ -78,13 +78,13 @@ static void init_pecs(const uint8_t *iovalid_enable) val = read_scom_for_chiplet(N2_CHIPLET_ID, pec_addr(pec, PEC_PBCQHWCFG_REG)); /* Set hang poll scale */ val &= ~PPC_BITMASK(0, 3); - val |= PPC_SHIFT(1, 3); + val |= PPC_PLACE(1, 0, 4); /* Set data scale */ val &= ~PPC_BITMASK(4, 7); - val |= PPC_SHIFT(1, 7); + val |= PPC_PLACE(1, 4, 4); /* Set hang pe scale */ val &= ~PPC_BITMASK(8, 11); - val |= PPC_SHIFT(1, 11); + val |= PPC_PLACE(1, 8, 4); /* Disable out of order store behavior */ val |= PPC_BIT(22); /* Enable Channel Tag streaming behavior */ @@ -121,7 +121,7 @@ static void init_pecs(const uint8_t *iovalid_enable) * fapi2::ATTR_PROC_PCIE_CACHE_INJ_MODE = 3 by default */ val &= ~PPC_BITMASK(34, 35); - val |= PPC_SHIFT(0x3, 35); + val |= PPC_PLACE(0x3, 34, 2); if (dd == 0x21 || dd == 0x22 || dd == 0x23) { /* @@ -143,7 +143,7 @@ static void init_pecs(const uint8_t *iovalid_enable) * across various workloads. */ val &= ~PPC_BITMASK(46, 48); - val |= PPC_SHIFT(0x3, 48); + val |= PPC_PLACE(0x3, 46, 3); } } @@ -166,7 +166,7 @@ static void init_pecs(const uint8_t *iovalid_enable) */ scom_and_or_for_chiplet(N2_CHIPLET_ID, pec_addr(pec, PEC_NESTTRC_REG), ~PPC_BITMASK(0, 3), - PPC_SHIFT(9, 3)); + PPC_PLACE(9, 0, 4)); /* * Phase2 init step 4 @@ -209,7 +209,7 @@ static void phb_write(uint8_t phb, uint64_t addr, uint64_t data) } addr &= ~PPC_BITMASK(54, 57); - addr |= PPC_SHIFT(sat_id & 0xF, 57); + addr |= PPC_PLACE(sat_id, 54, 4); write_scom_for_chiplet(chiplet, addr, data); } @@ -233,10 +233,10 @@ static void phb_nest_write(uint8_t phb, uint64_t addr, uint64_t data) } addr &= ~PPC_BITMASK(50, 53); - addr |= PPC_SHIFT(ring & 0xF, 53); + addr |= PPC_PLACE(ring, 50, 4); addr &= ~PPC_BITMASK(54, 57); - addr |= PPC_SHIFT(sat_id & 0xF, 57); + addr |= PPC_PLACE(sat_id, 54, 4); write_scom_for_chiplet(N2_CHIPLET_ID, addr, data); } @@ -298,11 +298,11 @@ static void init_phbs(uint8_t phb_active_mask, const uint8_t *iovalid_enable) /* Determine base address of chip MMIO range */ uint64_t base_addr_mmio = 0; - base_addr_mmio |= PPC_SHIFT(0, 12); // 5 bits, ATTR_PROC_FABRIC_SYSTEM_ID - base_addr_mmio |= PPC_SHIFT(0, 18); // 4 bits, ATTR_PROC_EFF_FABRIC_GROUP_ID - base_addr_mmio |= PPC_SHIFT(0, 21); // 3 bits, ATTR_PROC_EFF_FABRIC_CHIP_ID - base_addr_mmio |= PPC_SHIFT(3, 14); // 2 bits, FABRIC_ADDR_MSEL, - // nm = 0b00/01, m = 0b10, mmio = 0b11 + base_addr_mmio |= PPC_PLACE(0, 8, 5); // ATTR_PROC_FABRIC_SYSTEM_ID + base_addr_mmio |= PPC_PLACE(0, 15, 4); // ATTR_PROC_EFF_FABRIC_GROUP_ID + base_addr_mmio |= PPC_PLACE(0, 19, 3); // ATTR_PROC_EFF_FABRIC_CHIP_ID + base_addr_mmio |= PPC_PLACE(3, 13, 2); // FABRIC_ADDR_MSEL + // nm = 0b00/01, m = 0b10, mmio = 0b11 uint8_t phb = 0; for (phb = 0; phb < MAX_PHB_PER_PROC; ++phb) { diff --git a/src/soc/ibm/power9/istep_14_5.c b/src/soc/ibm/power9/istep_14_5.c index 8909988dda6..f46897acb39 100644 --- a/src/soc/ibm/power9/istep_14_5.c +++ b/src/soc/ibm/power9/istep_14_5.c @@ -58,7 +58,7 @@ static void revert_mc_hb_dcbz_config(void) */ scom_and_or_for_chiplet(nest, 0x05010812 + i * mul, ~(PPC_BITMASK(32, 51) | PPC_BITMASK(54, 61)), - PPC_SHIFT(0x40, 51)); + PPC_PLACE(0x40, 33, 19)); /* MCS_MCPERF1 -- enable fast path MCS_n_MCPERF1 // undocumented, 0x05010810, 0x05010890, 0x03010810, 0x03010890 @@ -201,23 +201,23 @@ static void fill_groups(void) */ if (mask & 0x80) { /* MCS = 0, MCA = 0 */ - mcfgp_regs[0][0] = PPC_BIT(0) | PPC_SHIFT(groups[i].group_size, 23) | - PPC_SHIFT(cur_ba, 47); + mcfgp_regs[0][0] = PPC_BIT(0) | PPC_PLACE(groups[i].group_size, 13, 11) | + PPC_PLACE(cur_ba, 24, 24); } if (mask & 0x40) { /* MCS = 0, MCA = 1 */ - mcfgp_regs[0][1] = PPC_BIT(0) | PPC_SHIFT(groups[i].group_size, 23) | - PPC_SHIFT(cur_ba, 47); + mcfgp_regs[0][1] = PPC_BIT(0) | PPC_PLACE(groups[i].group_size, 13, 11) | + PPC_PLACE(cur_ba, 24, 24); } if (mask & 0x08) { /* MCS = 1, MCA = 0 */ - mcfgp_regs[1][0] = PPC_BIT(0) | PPC_SHIFT(groups[i].group_size, 23) | - PPC_SHIFT(cur_ba, 47); + mcfgp_regs[1][0] = PPC_BIT(0) | PPC_PLACE(groups[i].group_size, 13, 11) | + PPC_PLACE(cur_ba, 24, 24); } if (mask & 0x04) { /* MCS = 1, MCA = 1 */ - mcfgp_regs[1][1] = PPC_BIT(0) | PPC_SHIFT(groups[i].group_size, 23) | - PPC_SHIFT(cur_ba, 47); + mcfgp_regs[1][1] = PPC_BIT(0) | PPC_PLACE(groups[i].group_size, 13, 11) | + PPC_PLACE(cur_ba, 24, 24); } cur_ba += groups[i].group_size + 1; diff --git a/src/soc/ibm/power9/istep_18_11.c b/src/soc/ibm/power9/istep_18_11.c index 95947ffe7d4..cc364634762 100644 --- a/src/soc/ibm/power9/istep_18_11.c +++ b/src/soc/ibm/power9/istep_18_11.c @@ -190,7 +190,7 @@ void istep_18_11(void) * [26-27] REMOTE_SYNC_CHECK_CPS_DEVIATION_FACTOR = 0x3 (factor 8) * [28-31] REMOTE_SYNC_CHECK_CPS_DEVIATION = 0xF (93.75%) */ - scom_or(PERV_TOD_S_PATH_CTRL_REG, PPC_SHIFT(0x3, 27) | PPC_SHIFT(0xF, 31)); + scom_or(PERV_TOD_S_PATH_CTRL_REG, PPC_PLACE(0x3, 26, 2) | PPC_PLACE(0xF, 28, 4)); /* * Set PSS_MSS_CTRL_REG for primary configuration, assumptions: @@ -229,10 +229,10 @@ void istep_18_11(void) * [13-15] I_PATH_STEP_CHECK_VALIDITY_COUNT = 0x3 (count = 8) */ scom_and_or(PERV_TOD_PRI_PORT_0_CTRL_REG, ~PPC_BITMASK(32, 39), - PPC_SHIFT(calculate_topology_delay(), 39)); + PPC_PLACE(calculate_topology_delay(), 32, 8)); scom_and_or(PERV_TOD_I_PATH_CTRL_REG, ~(PPC_BIT(0) | PPC_BIT(1) | PPC_BITMASK(6, 11) | PPC_BITMASK(13, 15)), - PPC_SHIFT(0xF, 11) | PPC_SHIFT(0x3, 15)); + PPC_PLACE(0xF, 8, 4) | PPC_PLACE(0x3, 13, 3)); /* Configure INIT_CHIP_CTRL_REG (primary) */ /* [1-3] I_PATH_CORE_SYNC_PERIOD_SELECT = 0 (core sync period is 8us) @@ -245,7 +245,7 @@ void istep_18_11(void) */ scom_and_or(PERV_TOD_CHIP_CTRL_REG, ~(PPC_BITMASK(1, 4) | PPC_BITMASK(7, 15) | PPC_BIT(30)), - PPC_SHIFT(0x3F, 15)); + PPC_PLACE(0x3F, 10, 6)); /* TODO: test if we can skip repeated writes (M_PATH, I_PATH, CHIP) */ @@ -281,10 +281,10 @@ void istep_18_11(void) * [13-15] I_PATH_STEP_CHECK_VALIDITY_COUNT = 0x3 (count = 8) */ scom_and_or(PERV_TOD_SEC_PORT_0_CTRL_REG, ~PPC_BITMASK(32, 39), - PPC_SHIFT(calculate_topology_delay(), 39)); + PPC_PLACE(calculate_topology_delay(), 32, 8)); scom_and_or(PERV_TOD_I_PATH_CTRL_REG, ~(PPC_BIT(0) | PPC_BIT(1) | PPC_BITMASK(6, 11) | PPC_BITMASK(13, 15)), - PPC_SHIFT(0xF, 11) | PPC_SHIFT(0x3, 15)); + PPC_PLACE(0xF, 8, 4) | PPC_PLACE(0x3, 13, 3)); /* Configure INIT_CHIP_CTRL_REG (secondary) */ /* [1-3] I_PATH_CORE_SYNC_PERIOD_SELECT = 0 (core sync period is 8us) @@ -297,7 +297,7 @@ void istep_18_11(void) */ scom_and_or(PERV_TOD_CHIP_CTRL_REG, ~(PPC_BITMASK(1, 4) | PPC_BITMASK(7, 15) | PPC_BIT(30)), - PPC_SHIFT(0x3F, 15)); + PPC_PLACE(0x3F, 10, 6)); printk(BIOS_EMERG, "ending istep 18.11\n"); } diff --git a/src/soc/ibm/power9/istep_18_12.c b/src/soc/ibm/power9/istep_18_12.c index dd638f2066b..eced1e18f74 100644 --- a/src/soc/ibm/power9/istep_18_12.c +++ b/src/soc/ibm/power9/istep_18_12.c @@ -91,7 +91,7 @@ static void init_tod_node(void) /* Chip TOD load value (move TB to TOD) */ write_scom(PERV_TOD_LOAD_TOD_REG, - PPC_SHIFT(0x3FF, 59) | PPC_SHIFT(0xC, 63)); + PPC_PLACE(0x3FF, 0, 60) | PPC_PLACE(0xC, 60, 4)); /* Chip TOD start_tod (switch local Chip TOD to 'Running' state) */ write_scom(PERV_TOD_START_TOD_REG, diff --git a/src/soc/ibm/power9/mcbist.c b/src/soc/ibm/power9/mcbist.c index f635119cd99..7350de83b25 100644 --- a/src/soc/ibm/power9/mcbist.c +++ b/src/soc/ibm/power9/mcbist.c @@ -82,7 +82,7 @@ static void add_mcbist_test(int mcs_i, uint16_t test) commit_mcbist_memreg_cache(mcs_i); /* This assumes cache is properly cleared. */ - mcbist_memreg_cache |= PPC_SHIFT(test, test_i*16 + 15); + mcbist_memreg_cache |= PPC_PLACE(test, test_i*16, 16); tests++; } From a774e00278a007dab81dda0d50b74cfa68012468 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Sun, 24 Oct 2021 01:22:20 +0300 Subject: [PATCH 103/213] soc/power9/: PPC_SHIFT with constants => PPC_PLACE Updating these required updating offset constants and introducing corresponding length constants. Change-Id: I1fb347b3b4e94b820500a30cfb90e1b8ed95b7be Signed-off-by: Sergii Dmytruk --- src/soc/ibm/power9/ccs.c | 42 +- src/soc/ibm/power9/istep_13_10.c | 7 +- src/soc/ibm/power9/istep_13_11.c | 16 +- src/soc/ibm/power9/istep_13_13.c | 4 +- src/soc/ibm/power9/istep_13_2.c | 6 +- src/soc/ibm/power9/istep_13_4.c | 3 +- src/soc/ibm/power9/istep_13_6.c | 3 +- src/soc/ibm/power9/istep_13_8.c | 623 +++++++++++++++++----------- src/soc/ibm/power9/istep_13_scom.h | 635 +++++++++++++++++++---------- src/soc/ibm/power9/istep_14_1.c | 15 +- src/soc/ibm/power9/istep_14_3.c | 6 +- src/soc/ibm/power9/istep_18_11.c | 30 +- 12 files changed, 885 insertions(+), 505 deletions(-) diff --git a/src/soc/ibm/power9/ccs.c b/src/soc/ibm/power9/ccs.c index 332e4b43e2a..0117527556a 100644 --- a/src/soc/ibm/power9/ccs.c +++ b/src/soc/ibm/power9/ccs.c @@ -66,9 +66,12 @@ void ccs_add_instruction(chiplet_id_t id, mrs_cmd_t mrs, uint8_t csn, */ write_scom_for_chiplet(id, CCS_INST_ARR0_00 + instr, mrs64 | PPC_BIT(CCS_INST_ARR0_00_CCS_DDR_ACTN) | - PPC_SHIFT(cke & 0xF, CCS_INST_ARR0_00_CCS_DDR_CKE) | - PPC_SHIFT((csn >> 2) & 3, CCS_INST_ARR0_00_CCS_DDR_CSN_0_1) | - PPC_SHIFT(csn & 3, CCS_INST_ARR0_00_CCS_DDR_CSN_2_3)); + PPC_PLACE(cke, CCS_INST_ARR0_00_CCS_DDR_CKE, + CCS_INST_ARR0_00_CCS_DDR_CKE_LEN) | + PPC_PLACE(csn >> 2, CCS_INST_ARR0_00_CCS_DDR_CSN_0_1, + CCS_INST_ARR0_00_CCS_DDR_CSN_0_1_LEN) | + PPC_PLACE(csn, CCS_INST_ARR0_00_CCS_DDR_CSN_2_3, + CCS_INST_ARR0_00_CCS_DDR_CSN_2_3_LEN)); /* MC01.MCBIST.CCS.CCS_INST_ARR1_n [all] 0 @@ -76,8 +79,10 @@ void ccs_add_instruction(chiplet_id_t id, mrs_cmd_t mrs, uint8_t csn, [59-63] CCS_INST_ARR1_00_GOTO_CMD = instr + 1 */ write_scom_for_chiplet(id, CCS_INST_ARR1_00 + instr, - PPC_SHIFT(idles, CCS_INST_ARR1_00_IDLES) | - PPC_SHIFT(instr + 1, CCS_INST_ARR1_00_GOTO_CMD)); + PPC_PLACE(idles, CCS_INST_ARR1_00_IDLES, + CCS_INST_ARR1_00_IDLES_LEN) | + PPC_PLACE(instr + 1, CCS_INST_ARR1_00_GOTO_CMD, + CCS_INST_ARR1_00_GOTO_CMD_LEN)); /* * For the last instruction in the stream we could decrease it by one (final @@ -186,9 +191,12 @@ void ccs_execute(chiplet_id_t id, int mca_i) */ write_scom_for_chiplet(id, CCS_INST_ARR0_00 + instr, PPC_BIT(CCS_INST_ARR0_00_CCS_DDR_ACTN) | - PPC_SHIFT(0xF, CCS_INST_ARR0_00_CCS_DDR_CKE) | - PPC_SHIFT(3, CCS_INST_ARR0_00_CCS_DDR_CSN_0_1) | - PPC_SHIFT(3, CCS_INST_ARR0_00_CCS_DDR_CSN_2_3)); + PPC_PLACE(0xF, CCS_INST_ARR0_00_CCS_DDR_CKE, + CCS_INST_ARR0_00_CCS_DDR_CKE_LEN) | + PPC_PLACE(3, CCS_INST_ARR0_00_CCS_DDR_CSN_0_1, + CCS_INST_ARR0_00_CCS_DDR_CSN_0_1_LEN) | + PPC_PLACE(3, CCS_INST_ARR0_00_CCS_DDR_CSN_2_3, + CCS_INST_ARR0_00_CCS_DDR_CSN_2_3_LEN)); write_scom_for_chiplet(id, CCS_INST_ARR1_00 + instr, PPC_BIT(CCS_INST_ARR1_00_CCS_END)); @@ -316,10 +324,14 @@ void ccs_phy_hw_step(chiplet_id_t id, int mca_i, int rp, enum cal_config conf, [56-59] CCS_INST_ARR0_00_CCS_DDR_CAL_TYPE = 0xc */ write_scom_for_chiplet(id, CCS_INST_ARR0_00 + instr, - PPC_SHIFT(0xF, CCS_INST_ARR0_00_CCS_DDR_CKE) | - PPC_SHIFT(3, CCS_INST_ARR0_00_CCS_DDR_CSN_0_1) | - PPC_SHIFT(3, CCS_INST_ARR0_00_CCS_DDR_CSN_2_3) | - PPC_SHIFT(0xC, CCS_INST_ARR0_00_CCS_DDR_CAL_TYPE)); + PPC_PLACE(0xF, CCS_INST_ARR0_00_CCS_DDR_CKE, + CCS_INST_ARR0_00_CCS_DDR_CKE_LEN) | + PPC_PLACE(3, CCS_INST_ARR0_00_CCS_DDR_CSN_0_1, + CCS_INST_ARR0_00_CCS_DDR_CSN_0_1_LEN) | + PPC_PLACE(3, CCS_INST_ARR0_00_CCS_DDR_CSN_2_3, + CCS_INST_ARR0_00_CCS_DDR_CSN_2_3_LEN) | + PPC_PLACE(0xC, CCS_INST_ARR0_00_CCS_DDR_CAL_TYPE, + CCS_INST_ARR0_00_CCS_DDR_CAL_TYPE_LEN)); /* MC01.MCBIST.CCS.CCS_INST_ARR1_n [all] 0 @@ -328,9 +340,11 @@ void ccs_phy_hw_step(chiplet_id_t id, int mca_i, int rp, enum cal_config conf, [59-63] CCS_INST_ARR1_00_GOTO_CMD = instr + 1 */ write_scom_for_chiplet(id, CCS_INST_ARR1_00 + instr, - PPC_SHIFT(rp, CCS_INST_ARR1_00_DDR_CAL_RANK) | + PPC_PLACE(rp, CCS_INST_ARR1_00_DDR_CAL_RANK, + CCS_INST_ARR1_00_DDR_CAL_RANK_LEN) | PPC_BIT(CCS_INST_ARR1_00_DDR_CALIBRATION_ENABLE) | - PPC_SHIFT(instr + 1, CCS_INST_ARR1_00_GOTO_CMD)); + PPC_PLACE(instr + 1, CCS_INST_ARR1_00_GOTO_CMD, + CCS_INST_ARR1_00_GOTO_CMD_LEN)); total_cycles += step_cycles; instr++; diff --git a/src/soc/ibm/power9/istep_13_10.c b/src/soc/ibm/power9/istep_13_10.c index 11d4a335a34..ff97897ff81 100644 --- a/src/soc/ibm/power9/istep_13_10.c +++ b/src/soc/ibm/power9/istep_13_10.c @@ -426,8 +426,8 @@ void istep_13_10(void) PPC_BIT(CCS_MODEQ_CCS_UE_DISABLE)), PPC_BIT(CCS_MODEQ_CFG_CCS_PARITY_AFTER_CMD) | PPC_BIT(CCS_MODEQ_COPY_CKE_TO_SPARE_CKE) | - PPC_SHIFT(0xFFFF, CCS_MODEQ_DDR_CAL_TIMEOUT_CNT) | - PPC_SHIFT(3, CCS_MODEQ_DDR_CAL_TIMEOUT_CNT_MULT)); + PPC_PLACE(0xFFFF, CCS_MODEQ_DDR_CAL_TIMEOUT_CNT, CCS_MODEQ_DDR_CAL_TIMEOUT_CNT_LEN) | + PPC_PLACE(0x3, CCS_MODEQ_DDR_CAL_TIMEOUT_CNT_MULT, CCS_MODEQ_DDR_CAL_TIMEOUT_CNT_MULT_LEN)); for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; @@ -449,7 +449,8 @@ void istep_13_10(void) ~PPC_BIT(MBA_FARB5Q_CFG_CCS_INST_RESET_ENABLE), PPC_BIT(MBA_FARB5Q_CFG_CCS_ADDR_MUX_SEL)); mca_and_or(mcs_ids[mcs_i], mca_i, MBA_FARB5Q, ~PPC_BITMASK(0, 3), - PPC_SHIFT(0x6, MBA_FARB5Q_CFG_DDR_DPHY_PCLK)); + PPC_PLACE(0x1, MBA_FARB5Q_CFG_DDR_DPHY_NCLK, MBA_FARB5Q_CFG_DDR_DPHY_NCLK_LEN) | + PPC_PLACE(0x2, MBA_FARB5Q_CFG_DDR_DPHY_PCLK, MBA_FARB5Q_CFG_DDR_DPHY_PCLK_LEN)); mca_and_or(mcs_ids[mcs_i], mca_i, MBA_FARB5Q, ~0, PPC_BIT(MBA_FARB5Q_CFG_DDR_RESETN)); diff --git a/src/soc/ibm/power9/istep_13_11.c b/src/soc/ibm/power9/istep_13_11.c index 9a2b4ab5941..546f2a3d215 100644 --- a/src/soc/ibm/power9/istep_13_11.c +++ b/src/soc/ibm/power9/istep_13_11.c @@ -213,7 +213,7 @@ static void dqs_align_turn_on_refresh(int mcs_i, int mca_i) */ /* See note in seq_reset() in 13.8. This may not be necessary. */ mca_and_or(id, mca_i, DDRPHY_SEQ_MEM_TIMING_PARAM0_P0, ~PPC_BITMASK(60, 63), - PPC_SHIFT(9, TRFC_CYCLES)); + PPC_PLACE(9, TRFC_CYCLES, TRFC_CYCLES_LEN)); /* IOM0.DDRPHY_PC_INIT_CAL_CONFIG1_P0 // > Hard coded settings provided by Ryan King for this workaround @@ -227,8 +227,10 @@ static void dqs_align_turn_on_refresh(int mcs_i, int mca_i) */ mca_and_or(id, mca_i, DDRPHY_PC_INIT_CAL_CONFIG1_P0, ~(PPC_BITMASK(48, 55) | PPC_BITMASK(57, 63)), - PPC_SHIFT(0xF, REFRESH_COUNT) | PPC_SHIFT(3, REFRESH_CONTROL) | - PPC_BIT(REFRESH_ALL_RANKS) | PPC_SHIFT(0x13, REFRESH_INTERVAL)); + PPC_PLACE(0xF, REFRESH_COUNT, REFRESH_COUNT_LEN) | + PPC_PLACE(0x3, REFRESH_CONTROL, REFRESH_CONTROL_LEN) | + PPC_BIT(REFRESH_ALL_RANKS) | + PPC_PLACE(0x13, REFRESH_INTERVAL, REFRESH_INTERVAL_LEN)); } static void wr_level_pre(int mcs_i, int mca_i, int rp, @@ -409,8 +411,8 @@ static void wr_level_post(int mcs_i, int mca_i, int rp, /* 2 DIMMs -> odd vpd_idx */ uint64_t val = 0; if (vpd_idx % 2) - val = PPC_SHIFT(F(ATTR_MSS_VPD_MT_ODT_RD[vpd_idx][1][0]), ODT_RD_VALUES0) | - PPC_SHIFT(F(ATTR_MSS_VPD_MT_ODT_RD[vpd_idx][1][1]), ODT_RD_VALUES1); + val = PPC_PLACE(F(ATTR_MSS_VPD_MT_ODT_RD[vpd_idx][1][0]), ODT_RD_VALUES0, ODT_RD_VALUES0_LEN) + | PPC_PLACE(F(ATTR_MSS_VPD_MT_ODT_RD[vpd_idx][1][1]), ODT_RD_VALUES1, ODT_RD_VALUES1_LEN); mca_and_or(id, mca_i, DDRPHY_SEQ_ODT_RD_CONFIG1_P0, 0, val); @@ -422,8 +424,8 @@ static void wr_level_post(int mcs_i, int mca_i, int rp, [56-59] ODT_WR_VALUES1 = F(ATTR_MSS_VPD_MT_ODT_WR[index(MCA)][0][1]) */ mca_and_or(id, mca_i, DDRPHY_SEQ_ODT_WR_CONFIG0_P0, 0, - PPC_SHIFT(F(ATTR_MSS_VPD_MT_ODT_WR[vpd_idx][0][0]), ODT_RD_VALUES0) | - PPC_SHIFT(F(ATTR_MSS_VPD_MT_ODT_WR[vpd_idx][0][1]), ODT_RD_VALUES1)); + PPC_PLACE(F(ATTR_MSS_VPD_MT_ODT_WR[vpd_idx][0][0]), ODT_RD_VALUES0, ODT_RD_VALUES0_LEN) | + PPC_PLACE(F(ATTR_MSS_VPD_MT_ODT_WR[vpd_idx][0][1]), ODT_RD_VALUES1, ODT_RD_VALUES1_LEN)); #undef F /* MR2 = // redo the rest of the bits diff --git a/src/soc/ibm/power9/istep_13_13.c b/src/soc/ibm/power9/istep_13_13.c index aba4db5e6a3..cd77fd9f912 100644 --- a/src/soc/ibm/power9/istep_13_13.c +++ b/src/soc/ibm/power9/istep_13_13.c @@ -630,7 +630,7 @@ void istep_13_13(void) */ mca_and_or(id, mca_i, RECR, ~(PPC_BITMASK(6, 8) | PPC_BIT(MBSECCQ_ENABLE_TCE_CORRECTION)), - PPC_SHIFT(1, MBSECCQ_READ_POINTER_DELAY) | + PPC_PLACE(1, MBSECCQ_READ_POINTER_DELAY, MBSECCQ_READ_POINTER_DELAY_LEN) | PPC_BIT(MBSECCQ_ENABLE_TCE_CORRECTION)); enable_pm(mcs_i, mca_i); @@ -707,7 +707,7 @@ void istep_13_13(void) mca_and_or(id, mca_i, RECR, ~(PPC_BITMASK(0, 1) | PPC_BITMASK(29, 31)), PPC_BIT(MBSECCQ_USE_ADDRESS_HASH) | - PPC_SHIFT(3, MBSECCQ_DATA_INVERSION)); + PPC_PLACE(3, MBSECCQ_DATA_INVERSION, MBSECCQ_DATA_INVERSION_LEN)); apply_mark_store(mcs_i, mca_i); } diff --git a/src/soc/ibm/power9/istep_13_2.c b/src/soc/ibm/power9/istep_13_2.c index a92998eabd1..294479e83ab 100644 --- a/src/soc/ibm/power9/istep_13_2.c +++ b/src/soc/ibm/power9/istep_13_2.c @@ -103,8 +103,10 @@ void istep_13_2(void) [52-63] OPCG_WAIT_CYCLES = 0x20 */ write_scom_for_chiplet(mcs_ids[i], MCSLOW_OPCG_ALIGN, - PPC_SHIFT(5, MCSLOW_OPCG_ALIGN_INOP_ALIGN) | - PPC_SHIFT(0x20, MCSLOW_OPCG_ALIGN_OPCG_WAIT_CYCLES)); + PPC_PLACE(5, MCSLOW_OPCG_ALIGN_INOP_ALIGN, + MCSLOW_OPCG_ALIGN_INOP_ALIGN_LEN) | + PPC_PLACE(0x20, MCSLOW_OPCG_ALIGN_OPCG_WAIT_CYCLES, + MCSLOW_OPCG_ALIGN_OPCG_WAIT_CYCLES_LEN)); // scan0 flush PLL boundary ring /* diff --git a/src/soc/ibm/power9/istep_13_4.c b/src/soc/ibm/power9/istep_13_4.c index fed7db98dd3..07602a304fb 100644 --- a/src/soc/ibm/power9/istep_13_4.c +++ b/src/soc/ibm/power9/istep_13_4.c @@ -96,7 +96,8 @@ void istep_13_4(void) [47-51] SCAN_RATIO = 3 // 4:1 */ scom_and_or_for_chiplet(mcs_ids[i], MCSLOW_OPCG_ALIGN, ~PPC_BITMASK(47,51), - PPC_SHIFT(3, MCSLOW_OPCG_ALIGN_SCAN_RATIO)); + PPC_PLACE(3, MCSLOW_OPCG_ALIGN_SCAN_RATIO, + MCSLOW_OPCG_ALIGN_SCAN_RATIO_LEN)); // > end if diff --git a/src/soc/ibm/power9/istep_13_6.c b/src/soc/ibm/power9/istep_13_6.c index 2f0b80080a3..b2b17982578 100644 --- a/src/soc/ibm/power9/istep_13_6.c +++ b/src/soc/ibm/power9/istep_13_6.c @@ -139,7 +139,8 @@ static void p9_sbe_common_clock_start_stop(chiplet_id_t id) */ scom_and_or_for_chiplet(id, MCSLOW_CLK_REGION, ~(PPC_BITMASK(0, 14) | PPC_BITMASK(48, 50)), - PPC_SHIFT(1, MCSLOW_CLK_REGION_CLOCK_CMD) | + PPC_PLACE(1, MCSLOW_CLK_REGION_CLOCK_CMD, + MCSLOW_CLK_REGION_CLOCK_CMD_LEN) | PPC_BIT(MCSLOW_CLK_REGION_SEL_THOLD_SL) | PPC_BIT(MCSLOW_CLK_REGION_SEL_THOLD_NSL) | PPC_BIT(MCSLOW_CLK_REGION_SEL_THOLD_ARY) | diff --git a/src/soc/ibm/power9/istep_13_8.c b/src/soc/ibm/power9/istep_13_8.c index a6a1f697b21..206346f932e 100644 --- a/src/soc/ibm/power9/istep_13_8.c +++ b/src/soc/ibm/power9/istep_13_8.c @@ -163,12 +163,18 @@ static void p9n_mca_scom(int mcs_i, int mca_i) uint64_t rdtag_dly = mem_data.speed == 2666 ? 9 : mem_data.speed == 2400 ? 8 : 7; mca_and_or(id, mca_i, MBA_DSM0Q, ~PPC_BITMASK(0, 41), - PPC_SHIFT(mca->cl - mem_data.cwl, MBA_DSM0Q_CFG_RODT_START_DLY) | - PPC_SHIFT(mca->cl - mem_data.cwl + 5, MBA_DSM0Q_CFG_RODT_END_DLY) | - PPC_SHIFT(5, MBA_DSM0Q_CFG_WODT_END_DLY) | - PPC_SHIFT(24, MBA_DSM0Q_CFG_WRDONE_DLY) | - PPC_SHIFT(mem_data.cwl + /* 1 */ 3 - 8, MBA_DSM0Q_CFG_WRDATA_DLY) | - PPC_SHIFT(mca->cl + rdtag_dly, MBA_DSM0Q_CFG_RDTAG_DLY)); + PPC_PLACE(mca->cl - mem_data.cwl, MBA_DSM0Q_CFG_RODT_START_DLY, + MBA_DSM0Q_CFG_RODT_START_DLY_LEN) | + PPC_PLACE(mca->cl - mem_data.cwl + 5, MBA_DSM0Q_CFG_RODT_END_DLY, + MBA_DSM0Q_CFG_RODT_END_DLY_LEN) | + PPC_PLACE(5, MBA_DSM0Q_CFG_WODT_END_DLY, + MBA_DSM0Q_CFG_WODT_END_DLY_LEN) | + PPC_PLACE(24, MBA_DSM0Q_CFG_WRDONE_DLY, + MBA_DSM0Q_CFG_WRDONE_DLY_LEN) | + PPC_PLACE(mem_data.cwl + /* 1 */ 3 - 8, MBA_DSM0Q_CFG_WRDATA_DLY, + MBA_DSM0Q_CFG_WRDATA_DLY_LEN) | + PPC_PLACE(mca->cl + rdtag_dly, MBA_DSM0Q_CFG_RDTAG_DLY, + MBA_DSM0Q_CFG_RDTAG_DLY_LEN)); /* MC01.PORT0.SRQ.MBA_TMR0Q = [0-3] MBA_TMR0Q_RRDM_DLY = @@ -206,20 +212,26 @@ static void p9n_mca_scom(int mcs_i, int mca_i) mem_data.speed == 2400 ? 10 : mem_data.speed == 2133 ? 9 : 8; mca_and_or(id, mca_i, MBA_TMR0Q, PPC_BIT(63), - PPC_SHIFT(var_dly, MBA_TMR0Q_RRDM_DLY) | - PPC_SHIFT(4, MBA_TMR0Q_RRSMSR_DLY) | - PPC_SHIFT(4, MBA_TMR0Q_RRSMDR_DLY) | - PPC_SHIFT(mca->nccd_l, MBA_TMR0Q_RROP_DLY) | - PPC_SHIFT(var_dly, MBA_TMR0Q_WWDM_DLY) | - PPC_SHIFT(4, MBA_TMR0Q_WWSMSR_DLY) | - PPC_SHIFT(4, MBA_TMR0Q_WWSMDR_DLY) | - PPC_SHIFT(mca->nccd_l, MBA_TMR0Q_WWOP_DLY) | - PPC_SHIFT(mca->cl - mem_data.cwl + var_dly, MBA_TMR0Q_RWDM_DLY) | - PPC_SHIFT(mca->cl - mem_data.cwl + var_dly, MBA_TMR0Q_RWSMSR_DLY) | - PPC_SHIFT(mca->cl - mem_data.cwl + var_dly, MBA_TMR0Q_RWSMDR_DLY) | - PPC_SHIFT(mem_data.cwl - mca->cl + var_dly, MBA_TMR0Q_WRDM_DLY) | - PPC_SHIFT(mem_data.cwl + mca->nwtr_s + 4, MBA_TMR0Q_WRSMSR_DLY) | - PPC_SHIFT(mem_data.cwl + mca->nwtr_s + 4, MBA_TMR0Q_WRSMDR_DLY)); + PPC_PLACE(var_dly, MBA_TMR0Q_RRDM_DLY, MBA_TMR0Q_RRDM_DLY_LEN) | + PPC_PLACE(4, MBA_TMR0Q_RRSMSR_DLY, MBA_TMR0Q_RRSMSR_DLY_LEN) | + PPC_PLACE(4, MBA_TMR0Q_RRSMDR_DLY, MBA_TMR0Q_RRSMDR_DLY_LEN) | + PPC_PLACE(mca->nccd_l, MBA_TMR0Q_RROP_DLY, MBA_TMR0Q_RROP_DLY_LEN) | + PPC_PLACE(var_dly, MBA_TMR0Q_WWDM_DLY, MBA_TMR0Q_WWDM_DLY_LEN) | + PPC_PLACE(4, MBA_TMR0Q_WWSMSR_DLY, MBA_TMR0Q_WWSMSR_DLY_LEN) | + PPC_PLACE(4, MBA_TMR0Q_WWSMDR_DLY, MBA_TMR0Q_WWSMDR_DLY_LEN) | + PPC_PLACE(mca->nccd_l, MBA_TMR0Q_WWOP_DLY, MBA_TMR0Q_WWOP_DLY_LEN) | + PPC_PLACE(mca->cl - mem_data.cwl + var_dly, MBA_TMR0Q_RWDM_DLY, + MBA_TMR0Q_RWDM_DLY_LEN) | + PPC_PLACE(mca->cl - mem_data.cwl + var_dly, MBA_TMR0Q_RWSMSR_DLY, + MBA_TMR0Q_RWSMSR_DLY_LEN) | + PPC_PLACE(mca->cl - mem_data.cwl + var_dly, MBA_TMR0Q_RWSMDR_DLY, + MBA_TMR0Q_RWSMDR_DLY_LEN) | + PPC_PLACE(mem_data.cwl - mca->cl + var_dly, MBA_TMR0Q_WRDM_DLY, + MBA_TMR0Q_WRDM_DLY_LEN) | + PPC_PLACE(mem_data.cwl + mca->nwtr_s + 4, MBA_TMR0Q_WRSMSR_DLY, + MBA_TMR0Q_WRSMSR_DLY_LEN) | + PPC_PLACE(mem_data.cwl + mca->nwtr_s + 4, MBA_TMR0Q_WRSMDR_DLY, + MBA_TMR0Q_WRSMDR_DLY_LEN)); /* MC01.PORT0.SRQ.MBA_TMR1Q = [0-3] MBA_TMR1Q_RRSBG_DLY = ATTR_EFF_DRAM_TCCD_L @@ -239,17 +251,20 @@ static void p9n_mca_scom(int mcs_i, int mca_i) MSS_FREQ_EQ_2666: 11 */ mca_and_or(id, mca_i, MBA_TMR1Q, 0, - PPC_SHIFT(mca->nccd_l, MBA_TMR1Q_RRSBG_DLY) | - PPC_SHIFT(mem_data.cwl + mca->nwtr_l + 4, MBA_TMR1Q_WRSBG_DLY) | - PPC_SHIFT(mca->nfaw, MBA_TMR1Q_CFG_TFAW) | - PPC_SHIFT(mca->nrcd, MBA_TMR1Q_CFG_TRCD) | - PPC_SHIFT(mca->nrp, MBA_TMR1Q_CFG_TRP) | - PPC_SHIFT(mca->nras, MBA_TMR1Q_CFG_TRAS) | - PPC_SHIFT(mem_data.cwl + mca->nwr + 4, MBA_TMR1Q_CFG_WR2PRE) | - PPC_SHIFT(mem_data.nrtp, MBA_TMR1Q_CFG_RD2PRE) | - PPC_SHIFT(mca->nrrd_s, MBA_TMR1Q_TRRD) | - PPC_SHIFT(mca->nrrd_l, MBA_TMR1Q_TRRD_SBG) | - PPC_SHIFT(var_dly, MBA_TMR1Q_CFG_ACT_TO_DIFF_RANK_DLY)); + PPC_PLACE(mca->nccd_l, MBA_TMR1Q_RRSBG_DLY, MBA_TMR1Q_RRSBG_DLY_LEN) | + PPC_PLACE(mem_data.cwl + mca->nwtr_l + 4, MBA_TMR1Q_WRSBG_DLY, + MBA_TMR1Q_WRSBG_DLY_LEN) | + PPC_PLACE(mca->nfaw, MBA_TMR1Q_CFG_TFAW, MBA_TMR1Q_CFG_TFAW_LEN) | + PPC_PLACE(mca->nrcd, MBA_TMR1Q_CFG_TRCD, MBA_TMR1Q_CFG_TRCD_LEN) | + PPC_PLACE(mca->nrp, MBA_TMR1Q_CFG_TRP, MBA_TMR1Q_CFG_TRP_LEN) | + PPC_PLACE(mca->nras, MBA_TMR1Q_CFG_TRAS, MBA_TMR1Q_CFG_TRAS_LEN) | + PPC_PLACE(mem_data.cwl + mca->nwr + 4, MBA_TMR1Q_CFG_WR2PRE, + MBA_TMR1Q_CFG_WR2PRE_LEN) | + PPC_PLACE(mem_data.nrtp, MBA_TMR1Q_CFG_RD2PRE, MBA_TMR1Q_CFG_RD2PRE_LEN) | + PPC_PLACE(mca->nrrd_s, MBA_TMR1Q_TRRD, MBA_TMR1Q_TRRD_LEN) | + PPC_PLACE(mca->nrrd_l, MBA_TMR1Q_TRRD_SBG, MBA_TMR1Q_TRRD_SBG_LEN) | + PPC_PLACE(var_dly, MBA_TMR1Q_CFG_ACT_TO_DIFF_RANK_DLY, + MBA_TMR1Q_CFG_ACT_TO_DIFF_RANK_DLY_LEN)); /* MC01.PORT0.SRQ.MBA_WRQ0Q = [5] MBA_WRQ0Q_CFG_WRQ_FIFO_MODE = 0 // ATTR_MSS_REORDER_QUEUE_SETTING, 0 = reorder @@ -261,7 +276,8 @@ static void p9n_mca_scom(int mcs_i, int mca_i) PPC_BIT(MBA_WRQ0Q_CFG_DISABLE_WR_PG_MODE) | PPC_BITMASK(55, 58)), PPC_BIT(MBA_WRQ0Q_CFG_DISABLE_WR_PG_MODE) | - PPC_SHIFT(8, MBA_WRQ0Q_CFG_WRQ_ACT_NUM_WRITES_PENDING)); + PPC_PLACE(8, MBA_WRQ0Q_CFG_WRQ_ACT_NUM_WRITES_PENDING, + MBA_WRQ0Q_CFG_WRQ_ACT_NUM_WRITES_PENDING_LEN)); /* MC01.PORT0.SRQ.MBA_RRQ0Q = [6] MBA_RRQ0Q_CFG_RRQ_FIFO_MODE = 0 // ATTR_MSS_REORDER_QUEUE_SETTING @@ -269,7 +285,8 @@ static void p9n_mca_scom(int mcs_i, int mca_i) */ mca_and_or(id, mca_i, MBA_RRQ0Q, ~(PPC_BIT(MBA_RRQ0Q_CFG_RRQ_FIFO_MODE) | PPC_BITMASK(57, 60)), - PPC_SHIFT(8, MBA_RRQ0Q_CFG_RRQ_ACT_NUM_READS_PENDING)); + PPC_PLACE(8, MBA_RRQ0Q_CFG_RRQ_ACT_NUM_READS_PENDING, + MBA_RRQ0Q_CFG_RRQ_ACT_NUM_READS_PENDING_LEN)); /* MC01.PORT0.SRQ.MBA_FARB0Q = if (l_TGT3_ATTR_MSS_MRW_DRAM_2N_MODE == 0x02 || (l_TGT3_ATTR_MSS_MRW_DRAM_2N_MODE == 0x00 && l_TGT2_ATTR_MSS_VPD_MR_MC_2N_MODE_AUTOSET == 0x02)) @@ -282,7 +299,7 @@ static void p9n_mca_scom(int mcs_i, int mca_i) PPC_BIT(MBA_FARB0Q_CFG_PARITY_AFTER_CMD) | PPC_BITMASK(61, 63)), PPC_BIT(MBA_FARB0Q_CFG_PARITY_AFTER_CMD) | - PPC_SHIFT(3, MBA_FARB0Q_CFG_OPT_RD_SIZE)); + PPC_PLACE(3, MBA_FARB0Q_CFG_OPT_RD_SIZE, MBA_FARB0Q_CFG_OPT_RD_SIZE_LEN)); /* MC01.PORT0.SRQ.MBA_FARB1Q = [0-2] MBA_FARB1Q_CFG_SLOT0_S0_CID = 0 @@ -327,10 +344,10 @@ static void p9n_mca_scom(int mcs_i, int mca_i) cids_4_7 = (cids_4_7 & ~(7ull << 9)) | (4 << 9); mca_and_or(id, mca_i, MBA_FARB1Q, ~PPC_BITMASK(0, 47), - PPC_SHIFT(cids_even, MBA_FARB1Q_CFG_SLOT0_S3_CID) | - PPC_SHIFT(cids_4_7, MBA_FARB1Q_CFG_SLOT0_S7_CID) | - PPC_SHIFT(cids_even, MBA_FARB1Q_CFG_SLOT1_S3_CID) | - PPC_SHIFT(cids_4_7, MBA_FARB1Q_CFG_SLOT1_S7_CID)); + PPC_PLACE(cids_even, MBA_FARB1Q_CFG_SLOT0_S0_CID, 12) | + PPC_PLACE(cids_4_7, MBA_FARB1Q_CFG_SLOT0_S4_CID, 12) | + PPC_PLACE(cids_even, MBA_FARB1Q_CFG_SLOT1_S0_CID, 12) | + PPC_PLACE(cids_4_7, MBA_FARB1Q_CFG_SLOT1_S4_CID, 12)); /* MC01.PORT0.SRQ.MBA_FARB2Q = F(X) = (((X >> 4) & 0xc) | ((X >> 2) & 0x3)) // Bits 0,1,4,5 of uint8_t X, big endian numbering @@ -353,14 +370,22 @@ static void p9n_mca_scom(int mcs_i, int mca_i) */ #define F(X) ((((X) >> 4) & 0xc) | (((X) >> 2) & 0x3)) mca_and_or(id, mca_i, MBA_FARB2Q, 0, - PPC_SHIFT(F(ATTR_MSS_VPD_MT_ODT_RD[vpd_idx][0][0]), MBA_FARB2Q_CFG_RANK0_RD_ODT) | - PPC_SHIFT(F(ATTR_MSS_VPD_MT_ODT_RD[vpd_idx][0][1]), MBA_FARB2Q_CFG_RANK1_RD_ODT) | - PPC_SHIFT(F(ATTR_MSS_VPD_MT_ODT_RD[vpd_idx][1][0]), MBA_FARB2Q_CFG_RANK4_RD_ODT) | - PPC_SHIFT(F(ATTR_MSS_VPD_MT_ODT_RD[vpd_idx][1][1]), MBA_FARB2Q_CFG_RANK5_RD_ODT) | - PPC_SHIFT(F(ATTR_MSS_VPD_MT_ODT_WR[vpd_idx][0][0]), MBA_FARB2Q_CFG_RANK0_WR_ODT) | - PPC_SHIFT(F(ATTR_MSS_VPD_MT_ODT_WR[vpd_idx][0][1]), MBA_FARB2Q_CFG_RANK1_WR_ODT) | - PPC_SHIFT(F(ATTR_MSS_VPD_MT_ODT_WR[vpd_idx][1][0]), MBA_FARB2Q_CFG_RANK4_WR_ODT) | - PPC_SHIFT(F(ATTR_MSS_VPD_MT_ODT_WR[vpd_idx][1][1]), MBA_FARB2Q_CFG_RANK5_WR_ODT) ); + PPC_PLACE(F(ATTR_MSS_VPD_MT_ODT_RD[vpd_idx][0][0]), + MBA_FARB2Q_CFG_RANK0_RD_ODT, MBA_FARB2Q_CFG_RANK0_RD_ODT_LEN) | + PPC_PLACE(F(ATTR_MSS_VPD_MT_ODT_RD[vpd_idx][0][1]), + MBA_FARB2Q_CFG_RANK1_RD_ODT, MBA_FARB2Q_CFG_RANK1_RD_ODT_LEN) | + PPC_PLACE(F(ATTR_MSS_VPD_MT_ODT_RD[vpd_idx][1][0]), + MBA_FARB2Q_CFG_RANK4_RD_ODT, MBA_FARB2Q_CFG_RANK4_RD_ODT_LEN) | + PPC_PLACE(F(ATTR_MSS_VPD_MT_ODT_RD[vpd_idx][1][1]), + MBA_FARB2Q_CFG_RANK5_RD_ODT, MBA_FARB2Q_CFG_RANK5_RD_ODT_LEN) | + PPC_PLACE(F(ATTR_MSS_VPD_MT_ODT_WR[vpd_idx][0][0]), + MBA_FARB2Q_CFG_RANK0_WR_ODT, MBA_FARB2Q_CFG_RANK0_WR_ODT_LEN) | + PPC_PLACE(F(ATTR_MSS_VPD_MT_ODT_WR[vpd_idx][0][1]), + MBA_FARB2Q_CFG_RANK1_WR_ODT, MBA_FARB2Q_CFG_RANK1_WR_ODT_LEN) | + PPC_PLACE(F(ATTR_MSS_VPD_MT_ODT_WR[vpd_idx][1][0]), + MBA_FARB2Q_CFG_RANK4_WR_ODT, MBA_FARB2Q_CFG_RANK4_WR_ODT_LEN) | + PPC_PLACE(F(ATTR_MSS_VPD_MT_ODT_WR[vpd_idx][1][1]), + MBA_FARB2Q_CFG_RANK5_WR_ODT, MBA_FARB2Q_CFG_RANK5_WR_ODT_LEN) ); #undef F /* MC01.PORT0.SRQ.PC.MBAREF0Q = @@ -377,11 +402,21 @@ static void p9n_mca_scom(int mcs_i, int mca_i) * arithmetic. */ mca_and_or(id, mca_i, MBAREF0Q, ~(PPC_BITMASK(5, 18) | PPC_BITMASK(30, 60)), - PPC_SHIFT(3, MBAREF0Q_CFG_REFRESH_PRIORITY_THRESHOLD) | - PPC_SHIFT(mem_data.nrefi / (8 * 2 * log_ranks), MBAREF0Q_CFG_REFRESH_INTERVAL) | - PPC_SHIFT(mca->nrfc, MBAREF0Q_CFG_TRFC) | - PPC_SHIFT(mca->nrfc_dlr, MBAREF0Q_CFG_REFR_TSV_STACK) | - PPC_SHIFT(((mem_data.nrefi / 8) * 6) / 5, MBAREF0Q_CFG_REFR_CHECK_INTERVAL)); + PPC_PLACE(3, + MBAREF0Q_CFG_REFRESH_PRIORITY_THRESHOLD, + MBAREF0Q_CFG_REFRESH_PRIORITY_THRESHOLD_LEN) | + PPC_PLACE(mem_data.nrefi / (8 * 2 * log_ranks), + MBAREF0Q_CFG_REFRESH_INTERVAL, + MBAREF0Q_CFG_REFRESH_INTERVAL_LEN) | + PPC_PLACE(mca->nrfc, + MBAREF0Q_CFG_TRFC, + MBAREF0Q_CFG_TRFC_LEN) | + PPC_PLACE(mca->nrfc_dlr, + MBAREF0Q_CFG_REFR_TSV_STACK, + MBAREF0Q_CFG_REFR_TSV_STACK_LEN) | + PPC_PLACE(((mem_data.nrefi / 8) * 6) / 5, + MBAREF0Q_CFG_REFR_CHECK_INTERVAL, + MBAREF0Q_CFG_REFR_CHECK_INTERVAL_LEN)); /* MC01.PORT0.SRQ.PC.MBARPC0Q = [6-10] MBARPC0Q_CFG_PUP_AVAIL = @@ -410,9 +445,9 @@ static void p9n_mca_scom(int mcs_i, int mca_i) uint64_t p_up_dn = mem_data.speed == 1866 ? 5 : mem_data.speed == 2666 ? 7 : 6; mca_and_or(id, mca_i, MBARPC0Q, ~PPC_BITMASK(6, 21), - PPC_SHIFT(pup_avail, MBARPC0Q_CFG_PUP_AVAIL) | - PPC_SHIFT(p_up_dn, MBARPC0Q_CFG_PDN_PUP) | - PPC_SHIFT(p_up_dn, MBARPC0Q_CFG_PUP_PDN) | + PPC_PLACE(pup_avail, MBARPC0Q_CFG_PUP_AVAIL, MBARPC0Q_CFG_PUP_AVAIL_LEN) | + PPC_PLACE(p_up_dn, MBARPC0Q_CFG_PDN_PUP, MBARPC0Q_CFG_PDN_PUP_LEN) | + PPC_PLACE(p_up_dn, MBARPC0Q_CFG_PUP_PDN, MBARPC0Q_CFG_PUP_PDN_LEN) | (mranks == 4 ? PPC_BIT(MBARPC0Q_RESERVED_21) : 0)); /* MC01.PORT0.SRQ.PC.MBASTR0Q = @@ -440,13 +475,14 @@ static void p9n_mca_scom(int mcs_i, int mca_i) uint64_t txsdll = mem_data.speed == 1866 ? 597 : mem_data.speed == 2666 ? 939 : 768; mca_and_or(id, mca_i, MBASTR0Q, ~(PPC_BITMASK(12, 37) | PPC_BITMASK(46, 56)), - PPC_SHIFT(5, MBASTR0Q_CFG_TCKESR) | - PPC_SHIFT(tcksr_ex, MBASTR0Q_CFG_TCKSRE) | - PPC_SHIFT(tcksr_ex, MBASTR0Q_CFG_TCKSRX) | - PPC_SHIFT(txsdll, MBASTR0Q_CFG_TXSDLL) | - PPC_SHIFT(mem_data.nrefi / + PPC_PLACE(5, MBASTR0Q_CFG_TCKESR, MBASTR0Q_CFG_TCKESR_LEN) | + PPC_PLACE(tcksr_ex, MBASTR0Q_CFG_TCKSRE, MBASTR0Q_CFG_TCKSRE_LEN) | + PPC_PLACE(tcksr_ex, MBASTR0Q_CFG_TCKSRX, MBASTR0Q_CFG_TCKSRX_LEN) | + PPC_PLACE(txsdll, MBASTR0Q_CFG_TXSDLL, MBASTR0Q_CFG_TXSDLL_LEN) | + PPC_PLACE(mem_data.nrefi / (8 * (mca->dimm[0].log_ranks + mca->dimm[1].log_ranks)), - MBASTR0Q_CFG_SAFE_REFRESH_INTERVAL)); + MBASTR0Q_CFG_SAFE_REFRESH_INTERVAL, + MBASTR0Q_CFG_SAFE_REFRESH_INTERVAL_LEN)); /* MC01.PORT0.ECC64.SCOM.RECR = [16-18] MBSECCQ_VAL_TO_DATA_DELAY = @@ -489,8 +525,10 @@ static void p9n_mca_scom(int mcs_i, int mca_i) mn_freq_ratio < 1300 ? 0 : mn_freq_ratio < 1400 ? 1 : 0; mca_and_or(id, mca_i, RECR, ~(PPC_BITMASK(16, 22) | PPC_BIT(MBSECCQ_RESERVED_40)), - PPC_SHIFT(val_to_data, MBSECCQ_VAL_TO_DATA_DELAY) | - PPC_SHIFT(nest_val_to_data, MBSECCQ_NEST_VAL_TO_DATA_DELAY) | + PPC_PLACE(val_to_data, MBSECCQ_VAL_TO_DATA_DELAY, + MBSECCQ_VAL_TO_DATA_DELAY_LEN) | + PPC_PLACE(nest_val_to_data, MBSECCQ_NEST_VAL_TO_DATA_DELAY, + MBSECCQ_NEST_VAL_TO_DATA_DELAY_LEN) | (mn_freq_ratio < 1215 ? 0 : PPC_BIT(MBSECCQ_DELAY_NONBYPASS)) | PPC_BIT(MBSECCQ_RESERVED_40)); @@ -518,7 +556,8 @@ static void thermal_throttle_scominit(int mcs_i, int mca_i) [23-32] MBARPC0Q_CFG_MIN_DOMAIN_REDUCTION_TIME = 959 */ mca_and_or(id, mca_i, MBARPC0Q, ~(PPC_BITMASK(3, 5) | PPC_BITMASK(22, 32)), - PPC_SHIFT(959, MBARPC0Q_CFG_MIN_DOMAIN_REDUCTION_TIME)); + PPC_PLACE(959, MBARPC0Q_CFG_MIN_DOMAIN_REDUCTION_TIME, + MBARPC0Q_CFG_MIN_DOMAIN_REDUCTION_TIME_LEN )); /* Set STR register */ /* MC01.PORT0.SRQ.PC.MBASTR0Q = @@ -530,7 +569,8 @@ static void thermal_throttle_scominit(int mcs_i, int mca_i) [2-11] MBASTR0Q_CFG_ENTER_STR_TIME = 1023 */ mca_and_or(id, mca_i, MBASTR0Q, ~(PPC_BIT(0) | PPC_BITMASK(2, 11)), - PPC_SHIFT(1023, MBASTR0Q_CFG_ENTER_STR_TIME)); + PPC_PLACE(1023, MBASTR0Q_CFG_ENTER_STR_TIME, + MBASTR0Q_CFG_ENTER_STR_TIME_LEN)); /* Set N/M throttling control register */ /* MC01.PORT0.SRQ.MBA_FARB3Q = @@ -552,10 +592,14 @@ static void thermal_throttle_scominit(int mcs_i, int mca_i) uint64_t nm_n_per_port = 0x80; uint64_t m_dram_clocks = 0x200; mca_and_or(id, mca_i, MBA_FARB3Q, ~(PPC_BITMASK(0, 50) | PPC_BIT(53)), - PPC_SHIFT(nm_n_per_slot, MBA_FARB3Q_CFG_NM_N_PER_SLOT) | - PPC_SHIFT(nm_n_per_port, MBA_FARB3Q_CFG_NM_N_PER_PORT) | - PPC_SHIFT(m_dram_clocks, MBA_FARB3Q_CFG_NM_M) | - PPC_SHIFT(1, MBA_FARB3Q_CFG_NM_CAS_WEIGHT)); + PPC_PLACE(nm_n_per_slot, MBA_FARB3Q_CFG_NM_N_PER_SLOT, + MBA_FARB3Q_CFG_NM_N_PER_SLOT_LEN) | + PPC_PLACE(nm_n_per_port, MBA_FARB3Q_CFG_NM_N_PER_PORT, + MBA_FARB3Q_CFG_NM_N_PER_PORT_LEN) | + PPC_PLACE(m_dram_clocks, MBA_FARB3Q_CFG_NM_M, + MBA_FARB3Q_CFG_NM_M_LEN) | + PPC_PLACE(1, MBA_FARB3Q_CFG_NM_CAS_WEIGHT, + MBA_FARB3Q_CFG_NM_CAS_WEIGHT_LEN)); /* Set safemode throttles */ /* MC01.PORT0.SRQ.MBA_FARB4Q = @@ -564,8 +608,10 @@ static void thermal_throttle_scominit(int mcs_i, int mca_i) */ uint64_t nm_throttled_n_per_port = 0x20; mca_and_or(id, mca_i, MBA_FARB4Q, ~PPC_BITMASK(27, 55), - PPC_SHIFT(nm_throttled_n_per_port, MBA_FARB4Q_EMERGENCY_N) | - PPC_SHIFT(m_dram_clocks, MBA_FARB4Q_EMERGENCY_M)); + PPC_PLACE(nm_throttled_n_per_port, MBA_FARB4Q_EMERGENCY_N, + MBA_FARB4Q_EMERGENCY_N_LEN) | + PPC_PLACE(m_dram_clocks, MBA_FARB4Q_EMERGENCY_M, + MBA_FARB4Q_EMERGENCY_M_LEN)); } /* @@ -599,9 +645,9 @@ static void p9n_ddrphy_scom(int mcs_i, int mca_i) /* Same as default value after reset? */ dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_DLL_VREG_CONTROL0_P0_0, ~(PPC_BITMASK(48, 50) | PPC_BITMASK(52, 59) | PPC_BITMASK(62, 63)), - PPC_SHIFT(3, RXREG_VREG_COMPCON_DC) | - PPC_SHIFT(7, RXREG_VREG_DRVCON_DC) | - PPC_SHIFT(2, RXREG_VREG_REF_SEL_DC)); + PPC_PLACE(3, RXREG_VREG_COMPCON_DC, RXREG_VREG_COMPCON_DC_LEN) | + PPC_PLACE(7, RXREG_VREG_DRVCON_DC, RXREG_VREG_DRVCON_DC_LEN) | + PPC_PLACE(2, RXREG_VREG_REF_SEL_DC, RXREG_VREG_REF_SEL_DC_LEN)); /* IOM0.DDRPHY_DP16_DLL_VREG_CONTROL1_P0_{0,1,2,3,4} = [48-50] RXREG_VREG_COMPCON_DC = 3 @@ -615,9 +661,9 @@ static void p9n_ddrphy_scom(int mcs_i, int mca_i) /* Same as default value after reset? */ dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_DLL_VREG_CONTROL1_P0_0, ~(PPC_BITMASK(48, 50) | PPC_BITMASK(52, 59) | PPC_BITMASK(62, 63)), - PPC_SHIFT(3, RXREG_VREG_COMPCON_DC) | - PPC_SHIFT(7, RXREG_VREG_DRVCON_DC) | - PPC_SHIFT(2, RXREG_VREG_REF_SEL_DC)); + PPC_PLACE(3, RXREG_VREG_COMPCON_DC, RXREG_VREG_COMPCON_DC_LEN) | + PPC_PLACE(7, RXREG_VREG_DRVCON_DC, RXREG_VREG_DRVCON_DC_LEN) | + PPC_PLACE(2, RXREG_VREG_REF_SEL_DC, RXREG_VREG_REF_SEL_DC_LEN)); /* IOM0.DDRPHY_DP16_WRCLK_PR_P0_{0,1,2,3,4} = // For zero delay simulations, or simulations where the delay of the SysClk tree and the WrClk tree are equal, @@ -625,7 +671,8 @@ static void p9n_ddrphy_scom(int mcs_i, int mca_i) [49-55] TSYS_WRCLK = 0x60 */ dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_WRCLK_PR_P0_0, - ~PPC_BITMASK(49, 55), PPC_SHIFT(0x60, TSYS_WRCLK)); + ~PPC_BITMASK(49, 55), + PPC_PLACE(0x60, TSYS_WRCLK, TSYS_WRCLK_LEN)); /* IOM0.DDRPHY_DP16_IO_TX_CONFIG0_P0_{0,1,2,3,4} = [48-51] STRENGTH = 0x4 // 2400 MT/s @@ -634,7 +681,8 @@ static void p9n_ddrphy_scom(int mcs_i, int mca_i) */ dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_IO_TX_CONFIG0_P0_0, ~PPC_BITMASK(48, 52), - PPC_SHIFT(strength, DDRPHY_DP16_IO_TX_CONFIG0_STRENGTH)); + PPC_PLACE(strength, DDRPHY_DP16_IO_TX_CONFIG0_STRENGTH, + DDRPHY_DP16_IO_TX_CONFIG0_STRENGTH_LEN)); /* IOM0.DDRPHY_DP16_DLL_CONFIG1_P0_{0,1,2,3,4} = [48-63] = 0x0006: @@ -649,13 +697,13 @@ static void p9n_ddrphy_scom(int mcs_i, int mca_i) /* IOM0.DDRPHY_DP16_IO_TX_FET_SLICE_P0_{0,1,2,3,4} = [48-63] = 0x7f7f: - [59-55] EN_SLICE_N_WR = 0x7f + [49-55] EN_SLICE_N_WR = 0x7f [57-63] EN_SLICE_P_WR = 0x7f */ dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_IO_TX_FET_SLICE_P0_0, ~PPC_BITMASK(48, 63), - PPC_SHIFT(0x7F, EN_SLICE_N_WR) | - PPC_SHIFT(0x7F, EN_SLICE_P_WR)); + PPC_PLACE(0x7F, EN_SLICE_N_WR, EN_SLICE_N_WR_LEN) | + PPC_PLACE(0x7F, EN_SLICE_P_WR, EN_SLICE_P_WR_LEN)); } for (dp = 0; dp < 4; dp++) { @@ -698,7 +746,8 @@ static void p9n_ddrphy_scom(int mcs_i, int mca_i) */ dp_mca_and_or(id, dp, mca_i, DDRPHY_ADR_DLL_VREG_CONFIG_1_P0_ADR32S0, ~PPC_BITMASK(48, 63), - PPC_SHIFT(strength, DDRPHY_ADR_DLL_VREG_CONFIG_1_STRENGTH)); + PPC_PLACE(strength, DDRPHY_ADR_DLL_VREG_CONFIG_1_STRENGTH, + DDRPHY_ADR_DLL_VREG_CONFIG_1_STRENGTH_LEN)); /* IOM0.DDRPHY_ADR_MCCLK_WRCLK_PR_STATIC_OFFSET_P0_ADR32S{0,1} = [48-63] = 0x6000 @@ -709,7 +758,7 @@ static void p9n_ddrphy_scom(int mcs_i, int mca_i) dp_mca_and_or(id, dp, mca_i, DDRPHY_ADR_MCCLK_WRCLK_PR_STATIC_OFFSET_P0_ADR32S0, ~PPC_BITMASK(48, 63), - PPC_SHIFT(0x60, TSYS_WRCLK)); + PPC_PLACE(0x60, TSYS_WRCLK, TSYS_WRCLK_LEN)); /* IOM0.DDRPHY_ADR_DLL_VREG_CONTROL_P0_ADR32S{0,1} = [48-50] RXREG_VREG_COMPCON_DC = 3 @@ -720,9 +769,9 @@ static void p9n_ddrphy_scom(int mcs_i, int mca_i) */ dp_mca_and_or(id, dp, mca_i, DDRPHY_ADR_DLL_VREG_CONTROL_P0_ADR32S0, ~PPC_BITMASK(48, 63), - PPC_SHIFT(3, RXREG_VREG_COMPCON_DC) | - PPC_SHIFT(7, RXREG_VREG_DRVCON_DC) | - PPC_SHIFT(2, RXREG_VREG_REF_SEL_DC)); + PPC_PLACE(3, RXREG_VREG_COMPCON_DC, RXREG_VREG_COMPCON_DC_LEN) | + PPC_PLACE(7, RXREG_VREG_DRVCON_DC, RXREG_VREG_DRVCON_DC_LEN) | + PPC_PLACE(2, RXREG_VREG_REF_SEL_DC, RXREG_VREG_REF_SEL_DC_LEN)); } /* IOM0.DDRPHY_PC_CONFIG0_P0 = @@ -751,35 +800,42 @@ static void p9n_mcbist_scom(int mcs_i) [0-47] WATCFG0AQ_CFG_WAT_EVENT_SEL = 0x400000000000 */ scom_and_or_for_chiplet(id, WATCFG0AQ, ~PPC_BITMASK(0, 47), - PPC_SHIFT(0x400000000000, WATCFG0AQ_CFG_WAT_EVENT_SEL)); + PPC_PLACE(0x400000000000, WATCFG0AQ_CFG_WAT_EVENT_SEL, + WATCFG0AQ_CFG_WAT_EVENT_SEL_LEN)); /* MC01.MCBIST.MBA_SCOMFIR.WATCFG0BQ = [0-43] WATCFG0BQ_CFG_WAT_MSKA = 0x3fbfff [44-60] WATCFG0BQ_CFG_WAT_CNTL = 0x10000 */ scom_and_or_for_chiplet(id, WATCFG0BQ, ~PPC_BITMASK(0, 60), - PPC_SHIFT(0x3fbfff, WATCFG0BQ_CFG_WAT_MSKA) | - PPC_SHIFT(0x10000, WATCFG0BQ_CFG_WAT_CNTL)); + PPC_PLACE(0x3fbfff, WATCFG0BQ_CFG_WAT_MSKA, + WATCFG0BQ_CFG_WAT_MSKA_LEN) | + PPC_PLACE(0x10000, WATCFG0BQ_CFG_WAT_CNTL, + WATCFG0BQ_CFG_WAT_CNTL_LEN)); /* MC01.MCBIST.MBA_SCOMFIR.WATCFG0DQ = [0-43] WATCFG0DQ_CFG_WAT_PATA = 0x80200004000 */ scom_and_or_for_chiplet(id, WATCFG0DQ, ~PPC_BITMASK(0, 43), - PPC_SHIFT(0x80200004000, WATCFG0DQ_CFG_WAT_PATA)); + PPC_PLACE(0x80200004000, WATCFG0DQ_CFG_WAT_PATA, + WATCFG0DQ_CFG_WAT_PATA_LEN)); /* MC01.MCBIST.MBA_SCOMFIR.WATCFG3AQ = [0-47] WATCFG3AQ_CFG_WAT_EVENT_SEL = 0x800000000000 */ scom_and_or_for_chiplet(id, WATCFG3AQ, ~PPC_BITMASK(0, 47), - PPC_SHIFT(0x800000000000, WATCFG3AQ_CFG_WAT_EVENT_SEL)); + PPC_PLACE(0x800000000000, WATCFG3AQ_CFG_WAT_EVENT_SEL, + WATCFG3AQ_CFG_WAT_EVENT_SEL_LEN)); /* MC01.MCBIST.MBA_SCOMFIR.WATCFG3BQ = [0-43] WATCFG3BQ_CFG_WAT_MSKA = 0xfffffffffff [44-60] WATCFG3BQ_CFG_WAT_CNTL = 0x10400 */ scom_and_or_for_chiplet(id, WATCFG3BQ, ~PPC_BITMASK(0, 60), - PPC_SHIFT(0xfffffffffff, WATCFG3BQ_CFG_WAT_MSKA) | - PPC_SHIFT(0x10400, WATCFG3BQ_CFG_WAT_CNTL)); + PPC_PLACE(0xfffffffffff, WATCFG3BQ_CFG_WAT_MSKA, + WATCFG3BQ_CFG_WAT_MSKA_LEN) | + PPC_PLACE(0x10400, WATCFG3BQ_CFG_WAT_CNTL, + WATCFG3BQ_CFG_WAT_CNTL_LEN)); /* MC01.MCBIST.MBA_SCOMFIR.MCBCFGQ = [36] MCBCFGQ_CFG_LOG_COUNTS_IN_TRACE = 0 @@ -792,7 +848,8 @@ static void p9n_mcbist_scom(int mcs_i) */ scom_and_or_for_chiplet(id, DBGCFG0Q, ~PPC_BITMASK(23, 33), PPC_BIT(DBGCFG0Q_CFG_DBG_ENABLE) | - PPC_SHIFT(0x780, DBGCFG0Q_CFG_DBG_PICK_MCBIST01)); + PPC_PLACE(0x780, DBGCFG0Q_CFG_DBG_PICK_MCBIST01, + DBGCFG0Q_CFG_DBG_PICK_MCBIST01_LEN)); /* MC01.MCBIST.MBA_SCOMFIR.DBGCFG1Q = [0] DBGCFG1Q_CFG_WAT_ENABLE = 1 @@ -804,8 +861,10 @@ static void p9n_mcbist_scom(int mcs_i) [20-39] DBGCFG2Q_CFG_WAT_LOC_EVENT1_SEL = 0x08000 */ scom_and_or_for_chiplet(id, DBGCFG2Q, ~PPC_BITMASK(0, 39), - PPC_SHIFT(0x10000, DBGCFG2Q_CFG_WAT_LOC_EVENT0_SEL) | - PPC_SHIFT(0x08000, DBGCFG2Q_CFG_WAT_LOC_EVENT1_SEL)); + PPC_PLACE(0x10000, DBGCFG2Q_CFG_WAT_LOC_EVENT0_SEL, + DBGCFG2Q_CFG_WAT_LOC_EVENT0_SEL_LEN) | + PPC_PLACE(0x08000, DBGCFG2Q_CFG_WAT_LOC_EVENT1_SEL, + DBGCFG2Q_CFG_WAT_LOC_EVENT1_SEL_LEN)); /* MC01.MCBIST.MBA_SCOMFIR.DBGCFG3Q = [20-22] DBGCFG3Q_CFG_WAT_GLOB_EVENT0_SEL = 0x4 @@ -813,9 +872,12 @@ static void p9n_mcbist_scom(int mcs_i) [37-40] DBGCFG3Q_CFG_WAT_ACT_SET_SPATTN_PULSE = 0x4 */ scom_and_or_for_chiplet(id, DBGCFG3Q, ~(PPC_BITMASK(20, 25) | PPC_BITMASK(37, 40)), - PPC_SHIFT(0x4, DBGCFG3Q_CFG_WAT_GLOB_EVENT0_SEL) | - PPC_SHIFT(0x4, DBGCFG3Q_CFG_WAT_GLOB_EVENT1_SEL) | - PPC_SHIFT(0x4, DBGCFG3Q_CFG_WAT_ACT_SET_SPATTN_PULSE)); + PPC_PLACE(0x4, DBGCFG3Q_CFG_WAT_GLOB_EVENT0_SEL, + DBGCFG3Q_CFG_WAT_GLOB_EVENT0_SEL_LEN) | + PPC_PLACE(0x4, DBGCFG3Q_CFG_WAT_GLOB_EVENT1_SEL, + DBGCFG3Q_CFG_WAT_GLOB_EVENT1_SEL_LEN) | + PPC_PLACE(0x4, DBGCFG3Q_CFG_WAT_ACT_SET_SPATTN_PULSE, + DBGCFG3Q_CFG_WAT_ACT_SET_SPATTN_PULSE_LEN)); } static void set_rank_pairs(int mcs_i, int mca_i) @@ -896,7 +958,12 @@ static void set_rank_pairs(int mcs_i, int mca_i) // - the mirror mode attribute is set for the rank's DIMM (SPD[136]) // - We are not in quad encoded mode (so master ranks <= 2) [48] ADDR_MIRROR_RP0_PRI - ... + [49] ADDR_MIRROR_RP0_SEC + [50] ADDR_MIRROR_RP1_PRI + [51] ADDR_MIRROR_RP1_SEC + [52] ADDR_MIRROR_RP2_PRI + [53] ADDR_MIRROR_RP2_SEC + [54] ADDR_MIRROR_RP3_PRI [55] ADDR_MIRROR_RP3_SEC [58] ADDR_MIRROR_A3_A4 = 1 [59] ADDR_MIRROR_A5_A6 = 1 @@ -918,10 +985,10 @@ static void set_rank_pairs(int mcs_i, int mca_i) uint64_t mirr = mca->dimm[0].present ? mca->dimm[0].spd[136] : mca->dimm[1].spd[136]; mca_and_or(id, mca_i, DDRPHY_PC_MIRROR_CONFIG_P0, ~PPC_BITMASK(48, 63), - PPC_SHIFT(mirr, ADDR_MIRROR_RP1_PRI) | - PPC_SHIFT(mirr, ADDR_MIRROR_RP1_SEC) | - PPC_SHIFT(mirr, ADDR_MIRROR_RP3_PRI) | - PPC_SHIFT(mirr, ADDR_MIRROR_RP3_SEC) | + PPC_PLACE(mirr, ADDR_MIRROR_RP1_PRI, 1) | + PPC_PLACE(mirr, ADDR_MIRROR_RP1_SEC, 1) | + PPC_PLACE(mirr, ADDR_MIRROR_RP3_PRI, 1) | + PPC_PLACE(mirr, ADDR_MIRROR_RP3_SEC, 1) | PPC_BITMASK(58, 63)); /* IOM0.DDRPHY_PC_RANK_GROUP_EXT_P0 = // 0x8000C0350701103F @@ -1064,45 +1131,45 @@ static void reset_rd_vref(int mcs_i, int mca_i) /* SCOM addresses are not regular for DAC, so no inner loop. */ dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_RD_VREF_DAC_0_P0_0, ~(PPC_BITMASK(49, 55) | PPC_BITMASK(57, 63)), - PPC_SHIFT(vref_bf, BIT0_VREF_DAC) | - PPC_SHIFT(vref_bf, BIT1_VREF_DAC)); + PPC_PLACE(vref_bf, BIT0_VREF_DAC, BIT0_VREF_DAC_LEN) | + PPC_PLACE(vref_bf, BIT1_VREF_DAC, BIT1_VREF_DAC_LEN)); dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_RD_VREF_DAC_1_P0_0, ~(PPC_BITMASK(49, 55) | PPC_BITMASK(57, 63)), - PPC_SHIFT(vref_bf, BIT0_VREF_DAC) | - PPC_SHIFT(vref_bf, BIT1_VREF_DAC)); + PPC_PLACE(vref_bf, BIT0_VREF_DAC, BIT0_VREF_DAC_LEN) | + PPC_PLACE(vref_bf, BIT1_VREF_DAC, BIT1_VREF_DAC_LEN)); dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_RD_VREF_DAC_2_P0_0, ~(PPC_BITMASK(49, 55) | PPC_BITMASK(57, 63)), - PPC_SHIFT(vref_bf, BIT0_VREF_DAC) | - PPC_SHIFT(vref_bf, BIT1_VREF_DAC)); + PPC_PLACE(vref_bf, BIT0_VREF_DAC, BIT0_VREF_DAC_LEN) | + PPC_PLACE(vref_bf, BIT1_VREF_DAC, BIT1_VREF_DAC_LEN)); dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_RD_VREF_DAC_3_P0_0, ~(PPC_BITMASK(49, 55) | PPC_BITMASK(57, 63)), - PPC_SHIFT(vref_bf, BIT0_VREF_DAC) | - PPC_SHIFT(vref_bf, BIT1_VREF_DAC)); + PPC_PLACE(vref_bf, BIT0_VREF_DAC, BIT0_VREF_DAC_LEN) | + PPC_PLACE(vref_bf, BIT1_VREF_DAC, BIT1_VREF_DAC_LEN)); if (dp == 4) break; dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_RD_VREF_DAC_4_P0_0, ~(PPC_BITMASK(49, 55) | PPC_BITMASK(57, 63)), - PPC_SHIFT(vref_bf, BIT0_VREF_DAC) | - PPC_SHIFT(vref_bf, BIT1_VREF_DAC)); + PPC_PLACE(vref_bf, BIT0_VREF_DAC, BIT0_VREF_DAC_LEN) | + PPC_PLACE(vref_bf, BIT1_VREF_DAC, BIT1_VREF_DAC_LEN)); dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_RD_VREF_DAC_5_P0_0, ~(PPC_BITMASK(49, 55) | PPC_BITMASK(57, 63)), - PPC_SHIFT(vref_bf, BIT0_VREF_DAC) | - PPC_SHIFT(vref_bf, BIT1_VREF_DAC)); + PPC_PLACE(vref_bf, BIT0_VREF_DAC, BIT0_VREF_DAC_LEN) | + PPC_PLACE(vref_bf, BIT1_VREF_DAC, BIT1_VREF_DAC_LEN)); dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_RD_VREF_DAC_6_P0_0, ~(PPC_BITMASK(49, 55) | PPC_BITMASK(57, 63)), - PPC_SHIFT(vref_bf, BIT0_VREF_DAC) | - PPC_SHIFT(vref_bf, BIT1_VREF_DAC)); + PPC_PLACE(vref_bf, BIT0_VREF_DAC, BIT0_VREF_DAC_LEN) | + PPC_PLACE(vref_bf, BIT1_VREF_DAC, BIT1_VREF_DAC_LEN)); dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_RD_VREF_DAC_7_P0_0, ~(PPC_BITMASK(49, 55) | PPC_BITMASK(57, 63)), - PPC_SHIFT(vref_bf, BIT0_VREF_DAC) | - PPC_SHIFT(vref_bf, BIT1_VREF_DAC)); + PPC_PLACE(vref_bf, BIT0_VREF_DAC, BIT0_VREF_DAC_LEN) | + PPC_PLACE(vref_bf, BIT1_VREF_DAC, BIT1_VREF_DAC_LEN)); } /* IOM0.DDRPHY_DP16_RD_VREF_CAL_EN_P0_{0-4} @@ -1148,9 +1215,12 @@ static void pc_reset(int mcs_i, int mca_i) */ mca_and_or(id, mca_i, DDRPHY_PC_CONFIG1_P0, ~(PPC_BITMASK(48, 55) | PPC_BITMASK(59, 62)), - PPC_SHIFT(/* ATTR_MSS_EFF_DPHY_WLO */ 3, WRITE_LATENCY_OFFSET) | - PPC_SHIFT(/* ATTR_MSS_EFF_DPHY_RLO */ 5, READ_LATENCY_OFFSET) | - PPC_SHIFT(0x5, MEMORY_TYPE) | PPC_BIT(DDR4_LATENCY_SW)); + PPC_PLACE(/* ATTR_MSS_EFF_DPHY_WLO */ 3, WRITE_LATENCY_OFFSET, + WRITE_LATENCY_OFFSET_LEN) | + PPC_PLACE(/* ATTR_MSS_EFF_DPHY_RLO */ 5, READ_LATENCY_OFFSET, + READ_LATENCY_OFFSET_LEN) | + PPC_PLACE(0x5, MEMORY_TYPE, MEMORY_TYPE_LEN) | + PPC_BIT(DDR4_LATENCY_SW)); /* IOM0.DDRPHY_PC_ERROR_STATUS0_P0 = [all] 0 @@ -1208,8 +1278,10 @@ static void wc_reset(int mcs_i, int mca_i) */ uint64_t tWLO_tWLOE = 12 + MAX((tWLDQSEN + tMOD), (tWLO + tWLOE)) + 1 + 1; mca_and_or(id, mca_i, DDRPHY_WC_CONFIG0_P0, 0, - PPC_SHIFT(tWLO_tWLOE, TWLO_TWLOE) | PPC_BIT(WL_ONE_DQS_PULSE) | - PPC_SHIFT(0x20, FW_WR_RD) | PPC_BIT(CUSTOM_INIT_WRITE)); + PPC_PLACE(tWLO_tWLOE, TWLO_TWLOE, TWLO_TWLOE_LEN) | + PPC_BIT(WL_ONE_DQS_PULSE) | + PPC_PLACE(0x20, FW_WR_RD, FW_WR_RD_LEN) | + PPC_BIT(CUSTOM_INIT_WRITE)); /* IOM0.DDRPHY_WC_CONFIG1_P0 = [all] 0 @@ -1218,7 +1290,8 @@ static void wc_reset(int mcs_i, int mca_i) [55-60] WR_PRE_DLY = 0x2a (42) */ mca_and_or(id, mca_i, DDRPHY_WC_CONFIG1_P0, 0, - PPC_SHIFT(7, BIG_STEP) | PPC_SHIFT(0x2A, WR_PRE_DLY)); + PPC_PLACE(7, BIG_STEP, BIG_STEP_LEN) | + PPC_PLACE(0x2A, WR_PRE_DLY, WR_PRE_DLY_LEN)); /* IOM0.DDRPHY_WC_CONFIG2_P0 = [all] 0 @@ -1228,15 +1301,16 @@ static void wc_reset(int mcs_i, int mca_i) */ /* There is no Additive Latency. */ mca_and_or(id, mca_i, DDRPHY_WC_CONFIG2_P0, 0, - PPC_SHIFT(5, NUM_VALID_SAMPLES) | - PPC_SHIFT(MAX(mca->nwtr_s + 11, mem_data.nrtp + 3), FW_RD_WR) | - PPC_SHIFT(5, IPW_WR_WR)); + PPC_PLACE(5, NUM_VALID_SAMPLES, NUM_VALID_SAMPLES_LEN) | + PPC_PLACE(MAX(mca->nwtr_s + 11, mem_data.nrtp + 3), FW_RD_WR, FW_RD_WR_LEN) | + PPC_PLACE(5, IPW_WR_WR, IPW_WR_WR_LEN)); /* IOM0.DDRPHY_WC_CONFIG3_P0 = [all] 0 [55-60] MRS_CMD_DQ_OFF = 0x3f */ - mca_and_or(id, mca_i, DDRPHY_WC_CONFIG3_P0, 0, PPC_SHIFT(0x3F, MRS_CMD_DQ_OFF)); + mca_and_or(id, mca_i, DDRPHY_WC_CONFIG3_P0, 0, + PPC_PLACE(0x3F, MRS_CMD_DQ_OFF, MRS_CMD_DQ_OFF_LEN)); /* IOM0.DDRPHY_WC_RTT_WR_SWAP_ENABLE_P0 [48] WL_ENABLE_RTT_SWAP = 0 @@ -1244,7 +1318,8 @@ static void wc_reset(int mcs_i, int mca_i) [50-59] WR_CTR_VREF_COUNTER_RESET_VAL = 150ns in clock cycles // JESD79-4C Table 67 */ mca_and_or(id, mca_i, DDRPHY_WC_RTT_WR_SWAP_ENABLE_P0, ~PPC_BITMASK(48, 59), - PPC_SHIFT(ns_to_nck(150), WR_CTR_VREF_COUNTER_RESET_VAL)); + PPC_PLACE(ns_to_nck(150), WR_CTR_VREF_COUNTER_RESET_VAL, + WR_CTR_VREF_COUNTER_RESET_VAL_LEN)); } static void rc_reset(int mcs_i, int mca_i) @@ -1258,7 +1333,8 @@ static void rc_reset(int mcs_i, int mca_i) [62] PERFORM_RDCLK_ALIGN = 1 */ mca_and_or(id, mca_i, DDRPHY_RC_CONFIG0_P0, 0, - PPC_SHIFT(0x5, GLOBAL_PHY_OFFSET) | PPC_BIT(PERFORM_RDCLK_ALIGN)); + PPC_PLACE(0x5, GLOBAL_PHY_OFFSET, GLOBAL_PHY_OFFSET_LEN) | + PPC_BIT(PERFORM_RDCLK_ALIGN)); /* IOM0.DDRPHY_RC_CONFIG1_P0 [all] 0 @@ -1271,14 +1347,15 @@ static void rc_reset(int mcs_i, int mca_i) [57-58] 3 // not documented, BURST_WINDOW? */ mca_and_or(id, mca_i, DDRPHY_RC_CONFIG2_P0, 0, - PPC_SHIFT(8, CONSEC_PASS) | PPC_SHIFT(3, 58)); + PPC_PLACE(8, CONSEC_PASS, CONSEC_PASS_LEN) | + PPC_PLACE(3, 57, 2)); /* IOM0.DDRPHY_RC_CONFIG3_P0 [all] 0 [51-54] COARSE_CAL_STEP_SIZE = 4 // 5/128 */ mca_and_or(id, mca_i, DDRPHY_RC_CONFIG3_P0, 0, - PPC_SHIFT(4, COARSE_CAL_STEP_SIZE)); + PPC_PLACE(4, COARSE_CAL_STEP_SIZE, COARSE_CAL_STEP_SIZE_LEN)); /* IOM0.DDRPHY_RC_RDVREF_CONFIG0_P0 = [all] 0 @@ -1300,8 +1377,8 @@ static void rc_reset(int mcs_i, int mca_i) [56-59] MPR_LOCATION = 4 // "From R. King." */ mca_and_or(id, mca_i, DDRPHY_RC_RDVREF_CONFIG1_P0, 0, - PPC_SHIFT(mca->cl + 15, CMD_PRECEDE_TIME) | - PPC_SHIFT(4, MPR_LOCATION)); + PPC_PLACE(mca->cl + 15, CMD_PRECEDE_TIME, CMD_PRECEDE_TIME_LEN) | + PPC_PLACE(4, MPR_LOCATION, MPR_LOCATION_LEN)); } static inline int log2_up(uint32_t x) @@ -1356,10 +1433,10 @@ static void seq_reset(int mcs_i, int mca_i) * https://github.com/open-power/hostboot/blob/master/src/import/chips/p9/procedures/hwp/memory/lib/phy/seq.C#L142 */ mca_and_or(id, mca_i, DDRPHY_SEQ_MEM_TIMING_PARAM0_P0, 0, - PPC_SHIFT(5, TMOD_CYCLES) | - PPC_SHIFT(log2_up(mca->nrcd), TRCD_CYCLES) | - PPC_SHIFT(log2_up(mca->nrp), TRP_CYCLES) | - PPC_SHIFT(log2_up(mca->nrfc), TRFC_CYCLES)); + PPC_PLACE(5, TMOD_CYCLES, TMOD_CYCLES_LEN) | + PPC_PLACE(log2_up(mca->nrcd), TRCD_CYCLES, TRCD_CYCLES_LEN) | + PPC_PLACE(log2_up(mca->nrp), TRP_CYCLES, TRP_CYCLES_LEN) | + PPC_PLACE(log2_up(mca->nrfc), TRFC_CYCLES, TRFC_CYCLES_LEN)); /* IOM0.DDRPHY_SEQ_MEM_TIMING_PARAM1_P0 = [all] 0 @@ -1369,8 +1446,10 @@ static void seq_reset(int mcs_i, int mca_i) [60-63] TWRMRD_CYCLES = 6 // log2(40) rounded up, JEDEC tables 169 and 170 */ mca_and_or(id, mca_i, DDRPHY_SEQ_MEM_TIMING_PARAM1_P0, 0, - PPC_SHIFT(10, TZQINIT_CYCLES) | PPC_SHIFT(7, TZQCS_CYCLES) | - PPC_SHIFT(6, TWLDQSEN_CYCLES) | PPC_SHIFT(6, TWRMRD_CYCLES)); + PPC_PLACE(10, TZQINIT_CYCLES, TZQINIT_CYCLES_LEN) | + PPC_PLACE(7, TZQCS_CYCLES, TZQCS_CYCLES_LEN) | + PPC_PLACE(6, TWLDQSEN_CYCLES, TWLDQSEN_CYCLES_LEN) | + PPC_PLACE(6, TWRMRD_CYCLES, TWRMRD_CYCLES_LEN)); /* IOM0.DDRPHY_SEQ_MEM_TIMING_PARAM2_P0 = [all] 0 @@ -1379,22 +1458,23 @@ static void seq_reset(int mcs_i, int mca_i) */ /* AL and PL are disabled (0) */ mca_and_or(id, mca_i, DDRPHY_SEQ_MEM_TIMING_PARAM2_P0, 0, - PPC_SHIFT(log2_up(mem_data.cwl - 2), TODTLON_OFF_CYCLES) | - PPC_SHIFT(0x777, 63)); + PPC_PLACE(log2_up(mem_data.cwl - 2), TODTLON_OFF_CYCLES, + TODTLON_OFF_CYCLES_LEN) | + PPC_PLACE(0x777, 52, 12)); /* IOM0.DDRPHY_SEQ_RD_WR_DATA0_P0 = [all] 0 [48-63] RD_RW_DATA_REG0 = 0xaa00 */ mca_and_or(id, mca_i, DDRPHY_SEQ_RD_WR_DATA0_P0, 0, - PPC_SHIFT(0xAA00, RD_RW_DATA_REG0)); + PPC_PLACE(0xAA00, RD_RW_DATA_REG0, RD_RW_DATA_REG0_LEN)); /* IOM0.DDRPHY_SEQ_RD_WR_DATA1_P0 = [all] 0 [48-63] RD_RW_DATA_REG1 = 0x00aa */ mca_and_or(id, mca_i, DDRPHY_SEQ_RD_WR_DATA1_P0, 0, - PPC_SHIFT(0x00AA, RD_RW_DATA_REG1)); + PPC_PLACE(0x00AA, RD_RW_DATA_REG1, RD_RW_DATA_REG1_LEN)); /* * For all registers below, assume RDIMM (max 2 ranks). @@ -1411,8 +1491,10 @@ static void seq_reset(int mcs_i, int mca_i) [56-59] ODT_RD_VALUES1 = F(ATTR_MSS_VPD_MT_ODT_RD[index(MCA)][0][1]) */ mca_and_or(id, mca_i, DDRPHY_SEQ_ODT_RD_CONFIG0_P0, 0, - PPC_SHIFT(F(ATTR_MSS_VPD_MT_ODT_RD[vpd_idx][0][0]), ODT_RD_VALUES0) | - PPC_SHIFT(F(ATTR_MSS_VPD_MT_ODT_RD[vpd_idx][0][1]), ODT_RD_VALUES1)); + PPC_PLACE(F(ATTR_MSS_VPD_MT_ODT_RD[vpd_idx][0][0]), ODT_RD_VALUES0, + ODT_RD_VALUES0_LEN) | + PPC_PLACE(F(ATTR_MSS_VPD_MT_ODT_RD[vpd_idx][0][1]), ODT_RD_VALUES1, + ODT_RD_VALUES1_LEN)); /* IOM0.DDRPHY_SEQ_ODT_RD_CONFIG1_P0 = F(X) = (((X >> 4) & 0xc) | ((X >> 2) & 0x3)) // Bits 0,1,4,5 of X, see also MC01.PORT0.SRQ.MBA_FARB2Q @@ -1427,8 +1509,10 @@ static void seq_reset(int mcs_i, int mca_i) /* 2 DIMMs -> odd vpd_idx */ uint64_t val = 0; if (vpd_idx % 2) - val = PPC_SHIFT(F(ATTR_MSS_VPD_MT_ODT_RD[vpd_idx][1][0]), ODT_RD_VALUES2) | - PPC_SHIFT(F(ATTR_MSS_VPD_MT_ODT_RD[vpd_idx][1][1]), ODT_RD_VALUES3); + val = PPC_PLACE(F(ATTR_MSS_VPD_MT_ODT_RD[vpd_idx][1][0]), ODT_RD_VALUES2, + ODT_RD_VALUES2_LEN) | + PPC_PLACE(F(ATTR_MSS_VPD_MT_ODT_RD[vpd_idx][1][1]), ODT_RD_VALUES3, + ODT_RD_VALUES3_LEN); mca_and_or(id, mca_i, DDRPHY_SEQ_ODT_RD_CONFIG1_P0, 0, val); @@ -1440,8 +1524,10 @@ static void seq_reset(int mcs_i, int mca_i) [56-59] ODT_WR_VALUES1 = F(ATTR_MSS_VPD_MT_ODT_WR[index(MCA)][0][1]) */ mca_and_or(id, mca_i, DDRPHY_SEQ_ODT_WR_CONFIG0_P0, 0, - PPC_SHIFT(F(ATTR_MSS_VPD_MT_ODT_WR[vpd_idx][0][0]), ODT_WR_VALUES0) | - PPC_SHIFT(F(ATTR_MSS_VPD_MT_ODT_WR[vpd_idx][0][1]), ODT_WR_VALUES1)); + PPC_PLACE(F(ATTR_MSS_VPD_MT_ODT_WR[vpd_idx][0][0]), ODT_WR_VALUES0, + ODT_WR_VALUES0_LEN) | + PPC_PLACE(F(ATTR_MSS_VPD_MT_ODT_WR[vpd_idx][0][1]), ODT_WR_VALUES1, + ODT_WR_VALUES1_LEN)); /* IOM0.DDRPHY_SEQ_ODT_WR_CONFIG1_P0 = F(X) = (((X >> 4) & 0xc) | ((X >> 2) & 0x3)) // Bits 0,1,4,5 of X, see also MC01.PORT0.SRQ.MBA_FARB2Q @@ -1455,8 +1541,10 @@ static void seq_reset(int mcs_i, int mca_i) */ val = 0; if (vpd_idx % 2) - val = PPC_SHIFT(F(ATTR_MSS_VPD_MT_ODT_WR[vpd_idx][1][0]), ODT_WR_VALUES2) | - PPC_SHIFT(F(ATTR_MSS_VPD_MT_ODT_WR[vpd_idx][1][1]), ODT_WR_VALUES3); + val = PPC_PLACE(F(ATTR_MSS_VPD_MT_ODT_WR[vpd_idx][1][0]), ODT_WR_VALUES2, + ODT_WR_VALUES2_LEN) | + PPC_PLACE(F(ATTR_MSS_VPD_MT_ODT_WR[vpd_idx][1][1]), ODT_WR_VALUES3, + ODT_WR_VALUES3_LEN); mca_and_or(id, mca_i, DDRPHY_SEQ_ODT_WR_CONFIG1_P0, 0, val); #undef F @@ -1497,12 +1585,12 @@ static void reset_ac_boost_cntl(int mcs_i, int mca_i) for (dp = 0; dp < 5; dp++) { dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_ACBOOST_CTL_BYTE0_P0_0, ~PPC_BITMASK(48, 56), - PPC_SHIFT(1, S0ACENSLICENDRV_DC) | - PPC_SHIFT(1, S0ACENSLICEPDRV_DC)); + PPC_PLACE(1, S0ACENSLICENDRV_DC, S0ACENSLICENDRV_DC_LEN) | + PPC_PLACE(1, S0ACENSLICEPDRV_DC, S0ACENSLICEPDRV_DC_LEN)); dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_ACBOOST_CTL_BYTE1_P0_0, ~PPC_BITMASK(48, 56), - PPC_SHIFT(1, S1ACENSLICENDRV_DC) | - PPC_SHIFT(1, S1ACENSLICEPDRV_DC)); + PPC_PLACE(1, S1ACENSLICENDRV_DC, S1ACENSLICENDRV_DC_LEN) | + PPC_PLACE(1, S1ACENSLICEPDRV_DC, S1ACENSLICEPDRV_DC_LEN)); } } @@ -1536,12 +1624,16 @@ static void reset_ctle_cntl(int mcs_i, int mca_i) for (dp = 0; dp < 5; dp++) { dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_CTLE_CTL_BYTE0_P0_0, ~(PPC_BITMASK(48, 49) | PPC_BITMASK(53, 57) | PPC_BITMASK(61, 63)), - PPC_SHIFT(1, NIB_0_DQSEL_CAP) | PPC_SHIFT(5, NIB_0_DQSEL_RES) | - PPC_SHIFT(1, NIB_1_DQSEL_CAP) | PPC_SHIFT(5, NIB_1_DQSEL_RES)); + PPC_PLACE(1, NIB_0_DQSEL_CAP, NIB_0_DQSEL_CAP_LEN) | + PPC_PLACE(5, NIB_0_DQSEL_RES, NIB_0_DQSEL_RES_LEN) | + PPC_PLACE(1, NIB_1_DQSEL_CAP, NIB_1_DQSEL_CAP_LEN) | + PPC_PLACE(5, NIB_1_DQSEL_RES, NIB_1_DQSEL_RES_LEN)); dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_CTLE_CTL_BYTE1_P0_0, ~(PPC_BITMASK(48, 49) | PPC_BITMASK(53, 57) | PPC_BITMASK(61, 63)), - PPC_SHIFT(1, NIB_2_DQSEL_CAP) | PPC_SHIFT(5, NIB_2_DQSEL_RES) | - PPC_SHIFT(1, NIB_3_DQSEL_CAP) | PPC_SHIFT(5, NIB_3_DQSEL_RES)); + PPC_PLACE(1, NIB_2_DQSEL_CAP, NIB_2_DQSEL_CAP_LEN) | + PPC_PLACE(5, NIB_2_DQSEL_RES, NIB_2_DQSEL_RES_LEN) | + PPC_PLACE(1, NIB_3_DQSEL_CAP, NIB_3_DQSEL_CAP_LEN) | + PPC_PLACE(5, NIB_3_DQSEL_RES, NIB_3_DQSEL_RES_LEN)); } } @@ -1571,8 +1663,10 @@ static void reset_delay(int mcs_i, int mca_i) [57-63] ADR_DELAY1 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_ADDR_WEN_A14 */ mca_and_or(id, mca_i, DDRPHY_ADR_DELAY0_P0_ADR0, 0, - PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_CSN0[vpd_idx][mca_i], ADR_DELAY_EVEN) | - PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_ADDR_WEN_A14[vpd_idx][mca_i], ADR_DELAY_ODD)); + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_CSN0[vpd_idx][mca_i], + ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN) | + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_ADDR_WEN_A14[vpd_idx][mca_i], + ADR_DELAY_ODD, ADR_DELAY_ODD_LEN)); /* IOM0.DDRPHY_ADR_DELAY1_P0_ADR0 = [all] 0 @@ -1580,8 +1674,10 @@ static void reset_delay(int mcs_i, int mca_i) [57-63] ADR_DELAY3 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_C0 */ mca_and_or(id, mca_i, DDRPHY_ADR_DELAY1_P0_ADR0, 0, - PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_ODT1[vpd_idx][mca_i], ADR_DELAY_EVEN) | - PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_C0[vpd_idx][mca_i], ADR_DELAY_ODD)); + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_ODT1[vpd_idx][mca_i], + ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN) | + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_C0[vpd_idx][mca_i], + ADR_DELAY_ODD, ADR_DELAY_ODD_LEN)); /* IOM0.DDRPHY_ADR_DELAY2_P0_ADR0 = [all] 0 @@ -1589,8 +1685,10 @@ static void reset_delay(int mcs_i, int mca_i) [57-63] ADR_DELAY5 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A10 */ mca_and_or(id, mca_i, DDRPHY_ADR_DELAY2_P0_ADR0, 0, - PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_BA1[vpd_idx][mca_i], ADR_DELAY_EVEN) | - PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A10[vpd_idx][mca_i], ADR_DELAY_ODD)); + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_BA1[vpd_idx][mca_i], + ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN) | + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A10[vpd_idx][mca_i], + ADR_DELAY_ODD, ADR_DELAY_ODD_LEN)); /* IOM0.DDRPHY_ADR_DELAY3_P0_ADR0 = [all] 0 @@ -1598,8 +1696,10 @@ static void reset_delay(int mcs_i, int mca_i) [57-63] ADR_DELAY7 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_BA0 */ mca_and_or(id, mca_i, DDRPHY_ADR_DELAY3_P0_ADR0, 0, - PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_ODT1[vpd_idx][mca_i], ADR_DELAY_EVEN) | - PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_BA0[vpd_idx][mca_i], ADR_DELAY_ODD)); + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_ODT1[vpd_idx][mca_i], + ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN) | + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_BA0[vpd_idx][mca_i], + ADR_DELAY_ODD, ADR_DELAY_ODD_LEN)); /* IOM0.DDRPHY_ADR_DELAY4_P0_ADR0 = [all] 0 @@ -1607,8 +1707,10 @@ static void reset_delay(int mcs_i, int mca_i) [57-63] ADR_DELAY9 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_ODT0 */ mca_and_or(id, mca_i, DDRPHY_ADR_DELAY4_P0_ADR0, 0, - PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A00[vpd_idx][mca_i], ADR_DELAY_EVEN) | - PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_ODT0[vpd_idx][mca_i], ADR_DELAY_ODD)); + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A00[vpd_idx][mca_i], + ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN) | + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_ODT0[vpd_idx][mca_i], + ADR_DELAY_ODD, ADR_DELAY_ODD_LEN)); /* IOM0.DDRPHY_ADR_DELAY5_P0_ADR0 = [all] 0 @@ -1616,8 +1718,10 @@ static void reset_delay(int mcs_i, int mca_i) [57-63] ADR_DELAY11 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_ADDR_CASN_A15 */ mca_and_or(id, mca_i, DDRPHY_ADR_DELAY5_P0_ADR0, 0, - PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_ODT0[vpd_idx][mca_i], ADR_DELAY_EVEN) | - PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_ADDR_CASN_A15[vpd_idx][mca_i], ADR_DELAY_ODD)); + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_ODT0[vpd_idx][mca_i], + ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN) | + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_ADDR_CASN_A15[vpd_idx][mca_i], + ADR_DELAY_ODD, ADR_DELAY_ODD_LEN)); /* IOM0.DDRPHY_ADR_DELAY0_P0_ADR1 = @@ -1626,8 +1730,10 @@ static void reset_delay(int mcs_i, int mca_i) [57-63] ADR_DELAY1 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_CSN1 */ mca_and_or(id, mca_i, DDRPHY_ADR_DELAY0_P0_ADR1, 0, - PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A13[vpd_idx][mca_i], ADR_DELAY_EVEN) | - PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_CSN1[vpd_idx][mca_i], ADR_DELAY_ODD)); + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A13[vpd_idx][mca_i], + ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN) | + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_CSN1[vpd_idx][mca_i], + ADR_DELAY_ODD, ADR_DELAY_ODD_LEN)); /* IOM0.DDRPHY_ADR_DELAY1_P0_ADR1 = [all] 0 @@ -1635,8 +1741,10 @@ static void reset_delay(int mcs_i, int mca_i) [57-63] ADR_DELAY3 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_D0_CLKP */ mca_and_or(id, mca_i, DDRPHY_ADR_DELAY1_P0_ADR1, 0, - PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_D0_CLKN[vpd_idx][mca_i], ADR_DELAY_EVEN) | - PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_D0_CLKP[vpd_idx][mca_i], ADR_DELAY_ODD)); + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_D0_CLKN[vpd_idx][mca_i], + ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN) | + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_D0_CLKP[vpd_idx][mca_i], + ADR_DELAY_ODD, ADR_DELAY_ODD_LEN)); /* IOM0.DDRPHY_ADR_DELAY2_P0_ADR1 = [all] 0 @@ -1644,8 +1752,10 @@ static void reset_delay(int mcs_i, int mca_i) [57-63] ADR_DELAY5 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_C1 */ mca_and_or(id, mca_i, DDRPHY_ADR_DELAY2_P0_ADR1, 0, - PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A17[vpd_idx][mca_i], ADR_DELAY_EVEN) | - PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_C1[vpd_idx][mca_i], ADR_DELAY_ODD)); + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A17[vpd_idx][mca_i], + ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN) | + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_C1[vpd_idx][mca_i], + ADR_DELAY_ODD, ADR_DELAY_ODD_LEN)); /* IOM0.DDRPHY_ADR_DELAY3_P0_ADR1 = [all] 0 @@ -1653,8 +1763,10 @@ static void reset_delay(int mcs_i, int mca_i) [57-63] ADR_DELAY7 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_D1_CLKP */ mca_and_or(id, mca_i, DDRPHY_ADR_DELAY3_P0_ADR1, 0, - PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_D1_CLKN[vpd_idx][mca_i], ADR_DELAY_EVEN) | - PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_D1_CLKP[vpd_idx][mca_i], ADR_DELAY_ODD)); + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_D1_CLKN[vpd_idx][mca_i], + ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN) | + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_D1_CLKP[vpd_idx][mca_i], + ADR_DELAY_ODD, ADR_DELAY_ODD_LEN)); /* IOM0.DDRPHY_ADR_DELAY4_P0_ADR1 = [all] 0 @@ -1662,8 +1774,10 @@ static void reset_delay(int mcs_i, int mca_i) [57-63] ADR_DELAY9 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_CSN1 */ mca_and_or(id, mca_i, DDRPHY_ADR_DELAY4_P0_ADR1, 0, - PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_C2[vpd_idx][mca_i], ADR_DELAY_EVEN) | - PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_CSN1[vpd_idx][mca_i], ADR_DELAY_ODD)); + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_C2[vpd_idx][mca_i], + ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN) | + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_CSN1[vpd_idx][mca_i], + ADR_DELAY_ODD, ADR_DELAY_ODD_LEN)); /* IOM0.DDRPHY_ADR_DELAY5_P0_ADR1 = [all] 0 @@ -1671,8 +1785,10 @@ static void reset_delay(int mcs_i, int mca_i) [57-63] ADR_DELAY11 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_PAR */ mca_and_or(id, mca_i, DDRPHY_ADR_DELAY5_P0_ADR1, 0, - PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A02[vpd_idx][mca_i], ADR_DELAY_EVEN) | - PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_PAR[vpd_idx][mca_i], ADR_DELAY_ODD)); + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A02[vpd_idx][mca_i], + ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN) | + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_PAR[vpd_idx][mca_i], + ADR_DELAY_ODD, ADR_DELAY_ODD_LEN)); /* IOM0.DDRPHY_ADR_DELAY0_P0_ADR2 = @@ -1681,8 +1797,10 @@ static void reset_delay(int mcs_i, int mca_i) [57-63] ADR_DELAY1 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_ADDR_RASN_A16 */ mca_and_or(id, mca_i, DDRPHY_ADR_DELAY0_P0_ADR2, 0, - PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_CSN0[vpd_idx][mca_i], ADR_DELAY_EVEN) | - PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_ADDR_RASN_A16[vpd_idx][mca_i], ADR_DELAY_ODD)); + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_CSN0[vpd_idx][mca_i], + ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN) | + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_ADDR_RASN_A16[vpd_idx][mca_i], + ADR_DELAY_ODD, ADR_DELAY_ODD_LEN)); /* IOM0.DDRPHY_ADR_DELAY1_P0_ADR2 = [all] 0 @@ -1690,8 +1808,10 @@ static void reset_delay(int mcs_i, int mca_i) [57-63] ADR_DELAY3 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A05 */ mca_and_or(id, mca_i, DDRPHY_ADR_DELAY1_P0_ADR2, 0, - PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A08[vpd_idx][mca_i], ADR_DELAY_EVEN) | - PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A05[vpd_idx][mca_i], ADR_DELAY_ODD)); + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A08[vpd_idx][mca_i], + ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN) | + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A05[vpd_idx][mca_i], + ADR_DELAY_ODD, ADR_DELAY_ODD_LEN)); /* IOM0.DDRPHY_ADR_DELAY2_P0_ADR2 = [all] 0 @@ -1699,8 +1819,10 @@ static void reset_delay(int mcs_i, int mca_i) [57-63] ADR_DELAY5 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A01 */ mca_and_or(id, mca_i, DDRPHY_ADR_DELAY2_P0_ADR2, 0, - PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A03[vpd_idx][mca_i], ADR_DELAY_EVEN) | - PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A01[vpd_idx][mca_i], ADR_DELAY_ODD)); + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A03[vpd_idx][mca_i], + ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN) | + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A01[vpd_idx][mca_i], + ADR_DELAY_ODD, ADR_DELAY_ODD_LEN)); /* IOM0.DDRPHY_ADR_DELAY3_P0_ADR2 = [all] 0 @@ -1708,8 +1830,10 @@ static void reset_delay(int mcs_i, int mca_i) [57-63] ADR_DELAY7 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A07 */ mca_and_or(id, mca_i, DDRPHY_ADR_DELAY3_P0_ADR2, 0, - PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A04[vpd_idx][mca_i], ADR_DELAY_EVEN) | - PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A07[vpd_idx][mca_i], ADR_DELAY_ODD)); + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A04[vpd_idx][mca_i], + ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN) | + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A07[vpd_idx][mca_i], + ADR_DELAY_ODD, ADR_DELAY_ODD_LEN)); /* IOM0.DDRPHY_ADR_DELAY4_P0_ADR2 = [all] 0 @@ -1717,8 +1841,10 @@ static void reset_delay(int mcs_i, int mca_i) [57-63] ADR_DELAY9 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A06 */ mca_and_or(id, mca_i, DDRPHY_ADR_DELAY4_P0_ADR2, 0, - PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A09[vpd_idx][mca_i], ADR_DELAY_EVEN) | - PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A06[vpd_idx][mca_i], ADR_DELAY_ODD)); + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A09[vpd_idx][mca_i], + ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN) | + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A06[vpd_idx][mca_i], + ADR_DELAY_ODD, ADR_DELAY_ODD_LEN)); /* IOM0.DDRPHY_ADR_DELAY5_P0_ADR2 = [all] 0 @@ -1726,8 +1852,10 @@ static void reset_delay(int mcs_i, int mca_i) [57-63] ADR_DELAY11 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A12 */ mca_and_or(id, mca_i, DDRPHY_ADR_DELAY5_P0_ADR2, 0, - PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_CKE1[vpd_idx][mca_i], ADR_DELAY_EVEN) | - PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A12[vpd_idx][mca_i], ADR_DELAY_ODD)); + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_CKE1[vpd_idx][mca_i], + ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN) | + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A12[vpd_idx][mca_i], + ADR_DELAY_ODD, ADR_DELAY_ODD_LEN)); /* IOM0.DDRPHY_ADR_DELAY0_P0_ADR3 = @@ -1736,8 +1864,10 @@ static void reset_delay(int mcs_i, int mca_i) [57-63] ADR_DELAY1 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A11 */ mca_and_or(id, mca_i, DDRPHY_ADR_DELAY0_P0_ADR3, 0, - PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_ACTN[vpd_idx][mca_i], ADR_DELAY_EVEN) | - PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A11[vpd_idx][mca_i], ADR_DELAY_ODD)); + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_ACTN[vpd_idx][mca_i], + ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN) | + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A11[vpd_idx][mca_i], + ADR_DELAY_ODD, ADR_DELAY_ODD_LEN)); /* IOM0.DDRPHY_ADR_DELAY1_P0_ADR3 = [all] 0 @@ -1745,8 +1875,10 @@ static void reset_delay(int mcs_i, int mca_i) [57-63] ADR_DELAY3 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_CKE0 */ mca_and_or(id, mca_i, DDRPHY_ADR_DELAY1_P0_ADR3, 0, - PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_BG0[vpd_idx][mca_i], ADR_DELAY_EVEN) | - PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_CKE0[vpd_idx][mca_i], ADR_DELAY_ODD)); + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_BG0[vpd_idx][mca_i], + ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN) | + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_CKE0[vpd_idx][mca_i], + ADR_DELAY_ODD, ADR_DELAY_ODD_LEN)); /* IOM0.DDRPHY_ADR_DELAY2_P0_ADR3 = [all] 0 @@ -1754,15 +1886,18 @@ static void reset_delay(int mcs_i, int mca_i) [57-63] ADR_DELAY5 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_BG1 */ mca_and_or(id, mca_i, DDRPHY_ADR_DELAY2_P0_ADR3, 0, - PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_CKE1[vpd_idx][mca_i], ADR_DELAY_EVEN) | - PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_BG1[vpd_idx][mca_i], ADR_DELAY_ODD)); + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_CKE1[vpd_idx][mca_i], + ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN) | + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_BG1[vpd_idx][mca_i], + ADR_DELAY_ODD, ADR_DELAY_ODD_LEN)); /* IOM0.DDRPHY_ADR_DELAY3_P0_ADR3 = [all] 0 [49-55] ADR_DELAY6 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_CKE0 */ mca_and_or(id, mca_i, DDRPHY_ADR_DELAY3_P0_ADR3, 0, - PPC_SHIFT(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_CKE0[vpd_idx][mca_i], ADR_DELAY_EVEN)); + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_CKE0[vpd_idx][mca_i], + ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN)); } @@ -1784,9 +1919,9 @@ static void reset_tsys_adr(int mcs_i, int mca_i) */ /* Has the same stride as DP16. */ dp_mca_and_or(id, 0, mca_i, DDRPHY_ADR_MCCLK_WRCLK_PR_STATIC_OFFSET_P0_ADR32S0, - 0, PPC_SHIFT(ATTR_MSS_VPD_MR_TSYS_ADR[i], TSYS_WRCLK)); + 0, PPC_PLACE(ATTR_MSS_VPD_MR_TSYS_ADR[i], TSYS_WRCLK, TSYS_WRCLK_LEN)); dp_mca_and_or(id, 1, mca_i, DDRPHY_ADR_MCCLK_WRCLK_PR_STATIC_OFFSET_P0_ADR32S0, - 0, PPC_SHIFT(ATTR_MSS_VPD_MR_TSYS_ADR[i], TSYS_WRCLK)); + 0, PPC_PLACE(ATTR_MSS_VPD_MR_TSYS_ADR[i], TSYS_WRCLK, TSYS_WRCLK_LEN)); } static void reset_tsys_data(int mcs_i, int mca_i) @@ -1808,7 +1943,8 @@ static void reset_tsys_data(int mcs_i, int mca_i) */ for (dp = 0; dp < 5; dp++) { dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_WRCLK_PR_P0_0, 0, - PPC_SHIFT(ATTR_MSS_VPD_MR_TSYS_DATA[i], TSYS_WRCLK)); + PPC_PLACE(ATTR_MSS_VPD_MR_TSYS_DATA[i], TSYS_WRCLK, + TSYS_WRCLK_LEN)); } } @@ -1830,8 +1966,8 @@ static void reset_io_impedances(int mcs_i, int mca_i) * default value, but set it just to be safe. */ dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_IO_TX_FET_SLICE_P0_0, 0, - PPC_SHIFT(0x7F, EN_SLICE_N_WR) | - PPC_SHIFT(0x7F, EN_SLICE_P_WR)); + PPC_PLACE(0x7F, EN_SLICE_N_WR, EN_SLICE_N_WR_LEN) | + PPC_PLACE(0x7F, EN_SLICE_P_WR, EN_SLICE_P_WR_LEN)); /* IOM0.DDRPHY_DP16_IO_TX_PFET_TERM_P0_{0,1,2,3,4} = [all] 0 @@ -1840,7 +1976,7 @@ static void reset_io_impedances(int mcs_i, int mca_i) */ /* 60 Ohms for all configurations, 240/60 = 4 bits set. */ dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_IO_TX_PFET_TERM_P0_0, 0, - PPC_SHIFT(0x0F, EN_SLICE_N_WR)); + PPC_PLACE(0x0F, EN_SLICE_N_WR, EN_SLICE_N_WR_LEN)); } /* IOM0.DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR1 = // yes, ADR1 @@ -1977,8 +2113,10 @@ static void reset_wr_vref_registers(int mcs_i, int mca_i) */ dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_WR_VREF_CONFIG0_P0_0, 0, PPC_BIT(WR_CTR_RUN_FULL_1D) | - PPC_SHIFT(1, WR_CTR_2D_BIG_STEP_VAL) | - PPC_SHIFT(7, WR_CTR_NUM_NO_INC_VREF_COMP)); + PPC_PLACE(1, WR_CTR_2D_BIG_STEP_VAL, + WR_CTR_2D_BIG_STEP_VAL_LEN) | + PPC_PLACE(7, WR_CTR_NUM_NO_INC_VREF_COMP, + WR_CTR_NUM_NO_INC_VREF_COMP_LEN)); /* IOM0.DDRPHY_DP16_WR_VREF_CONFIG1_P0_{0,1,2,3,4} = [all] 0 @@ -1987,8 +2125,10 @@ static void reset_wr_vref_registers(int mcs_i, int mca_i) [56-62] WR_CTR_VREF_SINGLE_RANGE_MAX = 0x32 // JEDEC table 34 */ dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_WR_VREF_CONFIG1_P0_0, 0, - PPC_SHIFT(0x18, WR_CTR_VREF_RANGE_CROSSOVER) | - PPC_SHIFT(0x32, WR_CTR_VREF_SINGLE_RANGE_MAX)); + PPC_PLACE(0x18, WR_CTR_VREF_RANGE_CROSSOVER, + WR_CTR_VREF_RANGE_CROSSOVER_LEN) | + PPC_PLACE(0x32, WR_CTR_VREF_SINGLE_RANGE_MAX, + WR_CTR_VREF_SINGLE_RANGE_MAX_LEN)); /* IOM0.DDRPHY_DP16_WR_VREF_STATUS0_P0_{0,1,2,3,4} = [all] 0 @@ -2027,29 +2167,45 @@ static void reset_wr_vref_registers(int mcs_i, int mca_i) [58-63] WR_VREF_VALUE_DRAM{1,3} = ATTR_MSS_VPD_MT_VREF_DRAM_WR & 0x3f */ dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_WR_VREF_VALUE0_RANK_PAIR0_P0_0, 0, - PPC_SHIFT(ATTR_MSS_VPD_MT_VREF_DRAM_WR[vpd_idx], WR_VREF_VALUE_DRAM0) | - PPC_SHIFT(ATTR_MSS_VPD_MT_VREF_DRAM_WR[vpd_idx], WR_VREF_VALUE_DRAM1)); + PPC_PLACE(ATTR_MSS_VPD_MT_VREF_DRAM_WR[vpd_idx], + WR_VREF_VALUE_DRAM0, WR_VREF_VALUE_DRAM0_LEN) | + PPC_PLACE(ATTR_MSS_VPD_MT_VREF_DRAM_WR[vpd_idx], + WR_VREF_VALUE_DRAM1, WR_VREF_VALUE_DRAM1_LEN)); dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_WR_VREF_VALUE1_RANK_PAIR0_P0_0, 0, - PPC_SHIFT(ATTR_MSS_VPD_MT_VREF_DRAM_WR[vpd_idx], WR_VREF_VALUE_DRAM2) | - PPC_SHIFT(ATTR_MSS_VPD_MT_VREF_DRAM_WR[vpd_idx], WR_VREF_VALUE_DRAM3)); + PPC_PLACE(ATTR_MSS_VPD_MT_VREF_DRAM_WR[vpd_idx], + WR_VREF_VALUE_DRAM2, WR_VREF_VALUE_DRAM2_LEN) | + PPC_PLACE(ATTR_MSS_VPD_MT_VREF_DRAM_WR[vpd_idx], + WR_VREF_VALUE_DRAM3, WR_VREF_VALUE_DRAM3_LEN)); dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_WR_VREF_VALUE0_RANK_PAIR1_P0_0, 0, - PPC_SHIFT(ATTR_MSS_VPD_MT_VREF_DRAM_WR[vpd_idx], WR_VREF_VALUE_DRAM0) | - PPC_SHIFT(ATTR_MSS_VPD_MT_VREF_DRAM_WR[vpd_idx], WR_VREF_VALUE_DRAM1)); + PPC_PLACE(ATTR_MSS_VPD_MT_VREF_DRAM_WR[vpd_idx], + WR_VREF_VALUE_DRAM0, WR_VREF_VALUE_DRAM0_LEN) | + PPC_PLACE(ATTR_MSS_VPD_MT_VREF_DRAM_WR[vpd_idx], + WR_VREF_VALUE_DRAM1, WR_VREF_VALUE_DRAM1_LEN)); dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_WR_VREF_VALUE1_RANK_PAIR1_P0_0, 0, - PPC_SHIFT(ATTR_MSS_VPD_MT_VREF_DRAM_WR[vpd_idx], WR_VREF_VALUE_DRAM2) | - PPC_SHIFT(ATTR_MSS_VPD_MT_VREF_DRAM_WR[vpd_idx], WR_VREF_VALUE_DRAM3)); + PPC_PLACE(ATTR_MSS_VPD_MT_VREF_DRAM_WR[vpd_idx], + WR_VREF_VALUE_DRAM2, WR_VREF_VALUE_DRAM2_LEN) | + PPC_PLACE(ATTR_MSS_VPD_MT_VREF_DRAM_WR[vpd_idx], + WR_VREF_VALUE_DRAM3, WR_VREF_VALUE_DRAM3_LEN)); dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_WR_VREF_VALUE0_RANK_PAIR2_P0_0, 0, - PPC_SHIFT(ATTR_MSS_VPD_MT_VREF_DRAM_WR[vpd_idx], WR_VREF_VALUE_DRAM0) | - PPC_SHIFT(ATTR_MSS_VPD_MT_VREF_DRAM_WR[vpd_idx], WR_VREF_VALUE_DRAM1)); + PPC_PLACE(ATTR_MSS_VPD_MT_VREF_DRAM_WR[vpd_idx], + WR_VREF_VALUE_DRAM0, WR_VREF_VALUE_DRAM0_LEN) | + PPC_PLACE(ATTR_MSS_VPD_MT_VREF_DRAM_WR[vpd_idx], + WR_VREF_VALUE_DRAM1, WR_VREF_VALUE_DRAM1_LEN)); dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_WR_VREF_VALUE1_RANK_PAIR2_P0_0, 0, - PPC_SHIFT(ATTR_MSS_VPD_MT_VREF_DRAM_WR[vpd_idx], WR_VREF_VALUE_DRAM2) | - PPC_SHIFT(ATTR_MSS_VPD_MT_VREF_DRAM_WR[vpd_idx], WR_VREF_VALUE_DRAM3)); + PPC_PLACE(ATTR_MSS_VPD_MT_VREF_DRAM_WR[vpd_idx], + WR_VREF_VALUE_DRAM2, WR_VREF_VALUE_DRAM2_LEN) | + PPC_PLACE(ATTR_MSS_VPD_MT_VREF_DRAM_WR[vpd_idx], + WR_VREF_VALUE_DRAM3, WR_VREF_VALUE_DRAM3_LEN)); dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_WR_VREF_VALUE0_RANK_PAIR3_P0_0, 0, - PPC_SHIFT(ATTR_MSS_VPD_MT_VREF_DRAM_WR[vpd_idx], WR_VREF_VALUE_DRAM0) | - PPC_SHIFT(ATTR_MSS_VPD_MT_VREF_DRAM_WR[vpd_idx], WR_VREF_VALUE_DRAM1)); + PPC_PLACE(ATTR_MSS_VPD_MT_VREF_DRAM_WR[vpd_idx], + WR_VREF_VALUE_DRAM0, WR_VREF_VALUE_DRAM0_LEN) | + PPC_PLACE(ATTR_MSS_VPD_MT_VREF_DRAM_WR[vpd_idx], + WR_VREF_VALUE_DRAM1, WR_VREF_VALUE_DRAM1_LEN)); dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_WR_VREF_VALUE1_RANK_PAIR3_P0_0, 0, - PPC_SHIFT(ATTR_MSS_VPD_MT_VREF_DRAM_WR[vpd_idx], WR_VREF_VALUE_DRAM2) | - PPC_SHIFT(ATTR_MSS_VPD_MT_VREF_DRAM_WR[vpd_idx], WR_VREF_VALUE_DRAM3)); + PPC_PLACE(ATTR_MSS_VPD_MT_VREF_DRAM_WR[vpd_idx], + WR_VREF_VALUE_DRAM2, WR_VREF_VALUE_DRAM2_LEN) | + PPC_PLACE(ATTR_MSS_VPD_MT_VREF_DRAM_WR[vpd_idx], + WR_VREF_VALUE_DRAM3, WR_VREF_VALUE_DRAM3_LEN)); } } @@ -2064,7 +2220,8 @@ static void reset_drift_limits(int mcs_i, int mca_i) */ dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_DRIFT_LIMITS_P0_0, ~PPC_BITMASK(48, 49), - PPC_SHIFT(1, DD2_BLUE_EXTEND_RANGE)); + PPC_PLACE(1, DD2_BLUE_EXTEND_RANGE, + DD2_BLUE_EXTEND_RANGE_LEN)); } } @@ -2100,7 +2257,7 @@ static void dqsclk_offset(int mcs_i, int mca_i) [49-55] DQS_OFFSET = 0x08 // Config provided by S. Wyatt 9/13 */ dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_DQSCLK_OFFSET_P0_0, 0, - PPC_SHIFT(0x08, DQS_OFFSET)); + PPC_PLACE(0x08, DQS_OFFSET, DQS_OFFSET_LEN)); } } diff --git a/src/soc/ibm/power9/istep_13_scom.h b/src/soc/ibm/power9/istep_13_scom.h index 2f09f6b8c28..cdb7e729c6d 100644 --- a/src/soc/ibm/power9/istep_13_scom.h +++ b/src/soc/ibm/power9/istep_13_scom.h @@ -57,51 +57,87 @@ #define MBACALFIR_RCD_CAL_PARITY_ERROR 14 #define MBA_DSM0Q 0x0701090A -#define MBA_DSM0Q_CFG_RODT_START_DLY 5 -#define MBA_DSM0Q_CFG_RODT_END_DLY 11 -#define MBA_DSM0Q_CFG_WODT_START_DLY 17 -#define MBA_DSM0Q_CFG_WODT_END_DLY 23 -#define MBA_DSM0Q_CFG_WRDONE_DLY 29 -#define MBA_DSM0Q_CFG_WRDATA_DLY 35 -#define MBA_DSM0Q_CFG_RDTAG_DLY 41 +#define MBA_DSM0Q_CFG_RODT_START_DLY 0 +#define MBA_DSM0Q_CFG_RODT_START_DLY_LEN 6 +#define MBA_DSM0Q_CFG_RODT_END_DLY 6 +#define MBA_DSM0Q_CFG_RODT_END_DLY_LEN 6 +#define MBA_DSM0Q_CFG_WODT_START_DLY 12 +#define MBA_DSM0Q_CFG_WODT_START_DLY_LEN 6 +#define MBA_DSM0Q_CFG_WODT_END_DLY 18 +#define MBA_DSM0Q_CFG_WODT_END_DLY_LEN 6 +#define MBA_DSM0Q_CFG_WRDONE_DLY 24 +#define MBA_DSM0Q_CFG_WRDONE_DLY_LEN 6 +#define MBA_DSM0Q_CFG_WRDATA_DLY 30 +#define MBA_DSM0Q_CFG_WRDATA_DLY_LEN 6 +#define MBA_DSM0Q_CFG_RDTAG_DLY 36 +#define MBA_DSM0Q_CFG_RDTAG_DLY_LEN 6 #define MBA_TMR0Q 0x0701090B -#define MBA_TMR0Q_RRDM_DLY 3 -#define MBA_TMR0Q_RRSMSR_DLY 7 -#define MBA_TMR0Q_RRSMDR_DLY 11 -#define MBA_TMR0Q_RROP_DLY 15 -#define MBA_TMR0Q_WWDM_DLY 19 -#define MBA_TMR0Q_WWSMSR_DLY 23 -#define MBA_TMR0Q_WWSMDR_DLY 27 -#define MBA_TMR0Q_WWOP_DLY 31 -#define MBA_TMR0Q_RWDM_DLY 36 -#define MBA_TMR0Q_RWSMSR_DLY 41 -#define MBA_TMR0Q_RWSMDR_DLY 46 -#define MBA_TMR0Q_WRDM_DLY 50 -#define MBA_TMR0Q_WRSMSR_DLY 56 -#define MBA_TMR0Q_WRSMDR_DLY 62 +#define MBA_TMR0Q_RRDM_DLY 0 +#define MBA_TMR0Q_RRDM_DLY_LEN 4 +#define MBA_TMR0Q_RRSMSR_DLY 4 +#define MBA_TMR0Q_RRSMSR_DLY_LEN 4 +#define MBA_TMR0Q_RRSMDR_DLY 8 +#define MBA_TMR0Q_RRSMDR_DLY_LEN 4 +#define MBA_TMR0Q_RROP_DLY 12 +#define MBA_TMR0Q_RROP_DLY_LEN 4 +#define MBA_TMR0Q_WWDM_DLY 16 +#define MBA_TMR0Q_WWDM_DLY_LEN 4 +#define MBA_TMR0Q_WWSMSR_DLY 20 +#define MBA_TMR0Q_WWSMSR_DLY_LEN 4 +#define MBA_TMR0Q_WWSMDR_DLY 24 +#define MBA_TMR0Q_WWSMDR_DLY_LEN 4 +#define MBA_TMR0Q_WWOP_DLY 28 +#define MBA_TMR0Q_WWOP_DLY_LEN 4 +#define MBA_TMR0Q_RWDM_DLY 32 +#define MBA_TMR0Q_RWDM_DLY_LEN 5 +#define MBA_TMR0Q_RWSMSR_DLY 37 +#define MBA_TMR0Q_RWSMSR_DLY_LEN 5 +#define MBA_TMR0Q_RWSMDR_DLY 42 +#define MBA_TMR0Q_RWSMDR_DLY_LEN 5 +#define MBA_TMR0Q_WRDM_DLY 47 +#define MBA_TMR0Q_WRDM_DLY_LEN 4 +#define MBA_TMR0Q_WRSMSR_DLY 51 +#define MBA_TMR0Q_WRSMSR_DLY_LEN 6 +#define MBA_TMR0Q_WRSMDR_DLY 57 +#define MBA_TMR0Q_WRSMDR_DLY_LEN 6 #define MBA_TMR1Q 0x0701090C -#define MBA_TMR1Q_RRSBG_DLY 3 -#define MBA_TMR1Q_WRSBG_DLY 9 -#define MBA_TMR1Q_CFG_TFAW 15 -#define MBA_TMR1Q_CFG_TRCD 20 -#define MBA_TMR1Q_CFG_TRP 25 -#define MBA_TMR1Q_CFG_TRAS 31 -#define MBA_TMR1Q_CFG_WR2PRE 47 -#define MBA_TMR1Q_CFG_RD2PRE 51 -#define MBA_TMR1Q_TRRD 55 -#define MBA_TMR1Q_TRRD_SBG 59 -#define MBA_TMR1Q_CFG_ACT_TO_DIFF_RANK_DLY 63 +#define MBA_TMR1Q_RRSBG_DLY 0 +#define MBA_TMR1Q_RRSBG_DLY_LEN 4 +#define MBA_TMR1Q_WRSBG_DLY 4 +#define MBA_TMR1Q_WRSBG_DLY_LEN 6 +#define MBA_TMR1Q_CFG_TFAW 10 +#define MBA_TMR1Q_CFG_TFAW_LEN 6 +#define MBA_TMR1Q_CFG_TRCD 16 +#define MBA_TMR1Q_CFG_TRCD_LEN 5 +#define MBA_TMR1Q_CFG_TRP 21 +#define MBA_TMR1Q_CFG_TRP_LEN 5 +#define MBA_TMR1Q_CFG_TRAS 26 +#define MBA_TMR1Q_CFG_TRAS_LEN 6 +#define MBA_TMR1Q_CFG_WR2PRE 41 +#define MBA_TMR1Q_CFG_WR2PRE_LEN 7 +#define MBA_TMR1Q_CFG_RD2PRE 48 +#define MBA_TMR1Q_CFG_RD2PRE_LEN 4 +#define MBA_TMR1Q_TRRD 52 +#define MBA_TMR1Q_TRRD_LEN 4 +#define MBA_TMR1Q_TRRD_SBG 56 +#define MBA_TMR1Q_TRRD_SBG_LEN 4 +#define MBA_TMR1Q_CFG_ACT_TO_DIFF_RANK_DLY 60 +#define MBA_TMR1Q_CFG_ACT_TO_DIFF_RANK_DLY_LEN 4 #define MBA_WRQ0Q 0x0701090D #define MBA_WRQ0Q_CFG_WRQ_FIFO_MODE 5 +#define MBA_WRQ0Q_CFG_WRQ_FIFO_MODE_LEN 1 #define MBA_WRQ0Q_CFG_DISABLE_WR_PG_MODE 6 -#define MBA_WRQ0Q_CFG_WRQ_ACT_NUM_WRITES_PENDING 58 +#define MBA_WRQ0Q_CFG_WRQ_ACT_NUM_WRITES_PENDING 55 +#define MBA_WRQ0Q_CFG_WRQ_ACT_NUM_WRITES_PENDING_LEN 4 #define MBA_RRQ0Q 0x0701090E #define MBA_RRQ0Q_CFG_RRQ_FIFO_MODE 6 -#define MBA_RRQ0Q_CFG_RRQ_ACT_NUM_READS_PENDING 60 +#define MBA_RRQ0Q_CFG_RRQ_FIFO_MODE_LEN 1 +#define MBA_RRQ0Q_CFG_RRQ_ACT_NUM_READS_PENDING 57 +#define MBA_RRQ0Q_CFG_RRQ_ACT_NUM_READS_PENDING_LEN 4 #define MBA_CAL0Q 0x0701090F #define MBA_CAL0Q_RESET_RECOVER 57 @@ -111,63 +147,89 @@ #define MBA_FARB0Q 0x07010913 #define MBA_FARB0Q_CFG_2N_ADDR 17 #define MBA_FARB0Q_CFG_PARITY_AFTER_CMD 38 -#define MBA_FARB0Q_CFG_RCD_PROTECTION_TIME 53 +#define MBA_FARB0Q_CFG_RCD_PROTECTION_TIME 48 +#define MBA_FARB0Q_CFG_RCD_PROTECTION_TIME_LEN 6 #define MBA_FARB0Q_CFG_DISABLE_RCD_RECOVERY 54 #define MBA_FARB0Q_CFG_OE_ALWAYS_ON 55 #define MBA_FARB0Q_CFG_PORT_FAIL_DISABLE 57 -#define MBA_FARB0Q_CFG_OPT_RD_SIZE 63 +#define MBA_FARB0Q_CFG_OPT_RD_SIZE 61 +#define MBA_FARB0Q_CFG_OPT_RD_SIZE_LEN 3 #define MBA_FARB1Q 0x07010914 -#define MBA_FARB1Q_CFG_SLOT0_S0_CID 2 -#define MBA_FARB1Q_CFG_SLOT0_S1_CID 5 -#define MBA_FARB1Q_CFG_SLOT0_S2_CID 8 -#define MBA_FARB1Q_CFG_SLOT0_S3_CID 11 -#define MBA_FARB1Q_CFG_SLOT0_S4_CID 14 -#define MBA_FARB1Q_CFG_SLOT0_S5_CID 17 -#define MBA_FARB1Q_CFG_SLOT0_S6_CID 20 -#define MBA_FARB1Q_CFG_SLOT0_S7_CID 23 -#define MBA_FARB1Q_CFG_SLOT1_S0_CID 26 -#define MBA_FARB1Q_CFG_SLOT1_S1_CID 29 -#define MBA_FARB1Q_CFG_SLOT1_S2_CID 32 -#define MBA_FARB1Q_CFG_SLOT1_S3_CID 35 -#define MBA_FARB1Q_CFG_SLOT1_S4_CID 38 -#define MBA_FARB1Q_CFG_SLOT1_S5_CID 41 -#define MBA_FARB1Q_CFG_SLOT1_S6_CID 44 -#define MBA_FARB1Q_CFG_SLOT1_S7_CID 47 +#define MBA_FARB1Q_CFG_SLOT0_S0_CID 0 +#define MBA_FARB1Q_CFG_SLOT0_S1_CID 3 +#define MBA_FARB1Q_CFG_SLOT0_S2_CID 6 +#define MBA_FARB1Q_CFG_SLOT0_S3_CID 9 +#define MBA_FARB1Q_CFG_SLOT0_S4_CID 12 +#define MBA_FARB1Q_CFG_SLOT0_S5_CID 15 +#define MBA_FARB1Q_CFG_SLOT0_S6_CID 18 +#define MBA_FARB1Q_CFG_SLOT0_S7_CID 21 +#define MBA_FARB1Q_CFG_SLOT1_S0_CID 24 +#define MBA_FARB1Q_CFG_SLOT1_S1_CID 27 +#define MBA_FARB1Q_CFG_SLOT1_S2_CID 30 +#define MBA_FARB1Q_CFG_SLOT1_S3_CID 33 +#define MBA_FARB1Q_CFG_SLOT1_S4_CID 36 +#define MBA_FARB1Q_CFG_SLOT1_S5_CID 39 +#define MBA_FARB1Q_CFG_SLOT1_S6_CID 42 +#define MBA_FARB1Q_CFG_SLOT1_S7_CID 45 #define MBA_FARB2Q 0x07010915 -#define MBA_FARB2Q_CFG_RANK0_RD_ODT 3 -#define MBA_FARB2Q_CFG_RANK1_RD_ODT 7 -#define MBA_FARB2Q_CFG_RANK2_RD_ODT 11 -#define MBA_FARB2Q_CFG_RANK3_RD_ODT 15 -#define MBA_FARB2Q_CFG_RANK4_RD_ODT 19 -#define MBA_FARB2Q_CFG_RANK5_RD_ODT 23 -#define MBA_FARB2Q_CFG_RANK6_RD_ODT 27 -#define MBA_FARB2Q_CFG_RANK7_RD_ODT 31 -#define MBA_FARB2Q_CFG_RANK0_WR_ODT 35 -#define MBA_FARB2Q_CFG_RANK1_WR_ODT 39 -#define MBA_FARB2Q_CFG_RANK2_WR_ODT 43 -#define MBA_FARB2Q_CFG_RANK3_WR_ODT 47 -#define MBA_FARB2Q_CFG_RANK4_WR_ODT 51 -#define MBA_FARB2Q_CFG_RANK5_WR_ODT 55 -#define MBA_FARB2Q_CFG_RANK6_WR_ODT 59 -#define MBA_FARB2Q_CFG_RANK7_WR_ODT 63 +#define MBA_FARB2Q_CFG_RANK0_RD_ODT 0 +#define MBA_FARB2Q_CFG_RANK0_RD_ODT_LEN 4 +#define MBA_FARB2Q_CFG_RANK1_RD_ODT 4 +#define MBA_FARB2Q_CFG_RANK1_RD_ODT_LEN 4 +#define MBA_FARB2Q_CFG_RANK2_RD_ODT 8 +#define MBA_FARB2Q_CFG_RANK2_RD_ODT_LEN 4 +#define MBA_FARB2Q_CFG_RANK3_RD_ODT 12 +#define MBA_FARB2Q_CFG_RANK3_RD_ODT_LEN 4 +#define MBA_FARB2Q_CFG_RANK4_RD_ODT 16 +#define MBA_FARB2Q_CFG_RANK4_RD_ODT_LEN 4 +#define MBA_FARB2Q_CFG_RANK5_RD_ODT 20 +#define MBA_FARB2Q_CFG_RANK5_RD_ODT_LEN 4 +#define MBA_FARB2Q_CFG_RANK6_RD_ODT 24 +#define MBA_FARB2Q_CFG_RANK6_RD_ODT_LEN 4 +#define MBA_FARB2Q_CFG_RANK7_RD_ODT 28 +#define MBA_FARB2Q_CFG_RANK7_RD_ODT_LEN 4 +#define MBA_FARB2Q_CFG_RANK0_WR_ODT 32 +#define MBA_FARB2Q_CFG_RANK0_WR_ODT_LEN 4 +#define MBA_FARB2Q_CFG_RANK1_WR_ODT 36 +#define MBA_FARB2Q_CFG_RANK1_WR_ODT_LEN 4 +#define MBA_FARB2Q_CFG_RANK2_WR_ODT 40 +#define MBA_FARB2Q_CFG_RANK2_WR_ODT_LEN 4 +#define MBA_FARB2Q_CFG_RANK3_WR_ODT 44 +#define MBA_FARB2Q_CFG_RANK3_WR_ODT_LEN 4 +#define MBA_FARB2Q_CFG_RANK4_WR_ODT 48 +#define MBA_FARB2Q_CFG_RANK4_WR_ODT_LEN 4 +#define MBA_FARB2Q_CFG_RANK5_WR_ODT 52 +#define MBA_FARB2Q_CFG_RANK5_WR_ODT_LEN 4 +#define MBA_FARB2Q_CFG_RANK6_WR_ODT 56 +#define MBA_FARB2Q_CFG_RANK6_WR_ODT_LEN 4 +#define MBA_FARB2Q_CFG_RANK7_WR_ODT 60 +#define MBA_FARB2Q_CFG_RANK7_WR_ODT_LEN 4 #define MBA_FARB3Q 0x07010916 -#define MBA_FARB3Q_CFG_NM_N_PER_SLOT 14 -#define MBA_FARB3Q_CFG_NM_N_PER_PORT 30 -#define MBA_FARB3Q_CFG_NM_M 44 +#define MBA_FARB3Q_CFG_NM_N_PER_SLOT 0 +#define MBA_FARB3Q_CFG_NM_N_PER_SLOT_LEN 15 +#define MBA_FARB3Q_CFG_NM_N_PER_PORT 15 +#define MBA_FARB3Q_CFG_NM_N_PER_PORT_LEN 16 +#define MBA_FARB3Q_CFG_NM_M 31 +#define MBA_FARB3Q_CFG_NM_M_LEN 14 #define MBA_FARB3Q_CFG_NM_RAS_WEIGHT 47 -#define MBA_FARB3Q_CFG_NM_CAS_WEIGHT 50 +#define MBA_FARB3Q_CFG_NM_CAS_WEIGHT 48 +#define MBA_FARB3Q_CFG_NM_CAS_WEIGHT_LEN 3 #define MBA_FARB3Q_CFG_NM_CHANGE_AFTER_SYNC 53 #define MBA_FARB4Q 0x07010917 -#define MBA_FARB4Q_EMERGENCY_N 41 -#define MBA_FARB4Q_EMERGENCY_M 55 +#define MBA_FARB4Q_EMERGENCY_N 27 +#define MBA_FARB4Q_EMERGENCY_N_LEN 15 +#define MBA_FARB4Q_EMERGENCY_M 42 +#define MBA_FARB4Q_EMERGENCY_M_LEN 14 #define MBA_FARB5Q 0x07010918 -#define MBA_FARB5Q_CFG_DDR_DPHY_NCLK 1 -#define MBA_FARB5Q_CFG_DDR_DPHY_PCLK 3 +#define MBA_FARB5Q_CFG_DDR_DPHY_NCLK 0 +#define MBA_FARB5Q_CFG_DDR_DPHY_NCLK_LEN 2 +#define MBA_FARB5Q_CFG_DDR_DPHY_PCLK 2 +#define MBA_FARB5Q_CFG_DDR_DPHY_PCLK_LEN 2 #define MBA_FARB5Q_CFG_DDR_RESETN 4 #define MBA_FARB5Q_CFG_CCS_ADDR_MUX_SEL 5 #define MBA_FARB5Q_CFG_CCS_INST_RESET_ENABLE 6 @@ -175,30 +237,45 @@ #define MBAREF0Q 0x07010932 #define MBAREF0Q_CFG_REFRESH_ENABLE 0 -#define MBAREF0Q_CFG_REFRESH_PRIORITY_THRESHOLD 7 -#define MBAREF0Q_CFG_REFRESH_INTERVAL 18 -#define MBAREF0Q_CFG_TRFC 39 -#define MBAREF0Q_CFG_REFR_TSV_STACK 49 -#define MBAREF0Q_CFG_REFR_CHECK_INTERVAL 60 +#define MBAREF0Q_CFG_REFRESH_PRIORITY_THRESHOLD 5 +#define MBAREF0Q_CFG_REFRESH_PRIORITY_THRESHOLD_LEN 3 +#define MBAREF0Q_CFG_REFRESH_INTERVAL 8 +#define MBAREF0Q_CFG_REFRESH_INTERVAL_LEN 11 +#define MBAREF0Q_CFG_TRFC 30 +#define MBAREF0Q_CFG_TRFC_LEN 10 +#define MBAREF0Q_CFG_REFR_TSV_STACK 40 +#define MBAREF0Q_CFG_REFR_TSV_STACK_LEN 10 +#define MBAREF0Q_CFG_REFR_CHECK_INTERVAL 50 +#define MBAREF0Q_CFG_REFR_CHECK_INTERVAL_LEN 11 #define MBARPC0Q 0x07010934 #define MBARPC0Q_CFG_MIN_MAX_DOMAINS_ENABLE 2 #define MBARPC0Q_CFG_MIN_MAX_DOMAINS 5 -#define MBARPC0Q_CFG_PUP_AVAIL 10 -#define MBARPC0Q_CFG_PDN_PUP 15 -#define MBARPC0Q_CFG_PUP_PDN 20 +#define MBARPC0Q_CFG_PUP_AVAIL 6 +#define MBARPC0Q_CFG_PUP_AVAIL_LEN 5 +#define MBARPC0Q_CFG_PDN_PUP 11 +#define MBARPC0Q_CFG_PDN_PUP_LEN 5 +#define MBARPC0Q_CFG_PUP_PDN 16 +#define MBARPC0Q_CFG_PUP_PDN_LEN 5 #define MBARPC0Q_RESERVED_21 21 #define MBARPC0Q_CFG_MIN_DOMAIN_REDUCTION_ENABLE 22 -#define MBARPC0Q_CFG_MIN_DOMAIN_REDUCTION_TIME 32 +#define MBARPC0Q_CFG_MIN_DOMAIN_REDUCTION_TIME 23 +#define MBARPC0Q_CFG_MIN_DOMAIN_REDUCTION_TIME_LEN 10 #define MBASTR0Q 0x07010935 -#define MBASTR0Q_CFG_STR_ENABLE 0 -#define MBASTR0Q_CFG_ENTER_STR_TIME 11 -#define MBASTR0Q_CFG_TCKESR 16 -#define MBASTR0Q_CFG_TCKSRE 21 -#define MBASTR0Q_CFG_TCKSRX 26 -#define MBASTR0Q_CFG_TXSDLL 37 -#define MBASTR0Q_CFG_SAFE_REFRESH_INTERVAL 56 +#define MBASTR0Q_CFG_STR_ENABLE 0 +#define MBASTR0Q_CFG_ENTER_STR_TIME 2 +#define MBASTR0Q_CFG_ENTER_STR_TIME_LEN 10 +#define MBASTR0Q_CFG_TCKESR 12 +#define MBASTR0Q_CFG_TCKESR_LEN 5 +#define MBASTR0Q_CFG_TCKSRE 17 +#define MBASTR0Q_CFG_TCKSRE_LEN 5 +#define MBASTR0Q_CFG_TCKSRX 22 +#define MBASTR0Q_CFG_TCKSRX_LEN 5 +#define MBASTR0Q_CFG_TXSDLL 27 +#define MBASTR0Q_CFG_TXSDLL_LEN 11 +#define MBASTR0Q_CFG_SAFE_REFRESH_INTERVAL 46 +#define MBASTR0Q_CFG_SAFE_REFRESH_INTERVAL_LEN 11 #define ECC_FIR_MASK 0x07010A03 #define ECC_FIR_ACTION0 0x07010A06 @@ -218,15 +295,19 @@ #define RECR 0x07010A0A #define MBSECCQ_DISABLE_MEMORY_ECC_CHECK_CORRECT 0 #define MBSECCQ_DISABLE_MEMORY_ECC_CORRECT 1 -#define MBSECCQ_READ_POINTER_DELAY 8 -#define MBSECCQ_VAL_TO_DATA_DELAY 18 -#define MBSECCQ_DELAY_VALID_1X 19 -#define MBSECCQ_NEST_VAL_TO_DATA_DELAY 21 -#define MBSECCQ_DELAY_NONBYPASS 22 -#define MBSECCQ_ENABLE_UE_NOISE_WINDOW 26 +#define MBSECCQ_READ_POINTER_DELAY 6 +#define MBSECCQ_READ_POINTER_DELAY_LEN 3 +#define MBSECCQ_VAL_TO_DATA_DELAY 16 +#define MBSECCQ_VAL_TO_DATA_DELAY_LEN 3 +#define MBSECCQ_DELAY_VALID_1X 19 +#define MBSECCQ_NEST_VAL_TO_DATA_DELAY 20 +#define MBSECCQ_NEST_VAL_TO_DATA_DELAY_LEN 2 +#define MBSECCQ_DELAY_NONBYPASS 22 +#define MBSECCQ_ENABLE_UE_NOISE_WINDOW 26 #define MBSECCQ_ENABLE_TCE_CORRECTION 27 #define MBSECCQ_USE_ADDRESS_HASH 29 -#define MBSECCQ_DATA_INVERSION 31 +#define MBSECCQ_DATA_INVERSION 30 +#define MBSECCQ_DATA_INVERSION_LEN 2 #define MBSECCQ_RESERVED_40 40 #define DBGR 0x07010A0B @@ -236,7 +317,8 @@ #define FWMS0 0x07010A18 #define AACR 0x07010A29 -#define AACR_ADDRESS 9 +#define AACR_ADDRESS 1 +#define AACR_ADDRESS_LEN 9 #define AACR_AUTOINC 10 #define AACR_ECCGEN 11 @@ -271,17 +353,24 @@ #define CCS_INST_ARR0_00 0x07012315 #define CCS_INST_ARR0_00_CCS_DDR_ACTN 20 -#define CCS_INST_ARR0_00_CCS_DDR_CKE 27 -#define CCS_INST_ARR0_00_CCS_DDR_CSN_0_1 33 -#define CCS_INST_ARR0_00_CCS_DDR_CSN_2_3 37 -#define CCS_INST_ARR0_00_CCS_DDR_CAL_TYPE 59 +#define CCS_INST_ARR0_00_CCS_DDR_CKE 24 +#define CCS_INST_ARR0_00_CCS_DDR_CKE_LEN 4 +#define CCS_INST_ARR0_00_CCS_DDR_CSN_0_1 32 +#define CCS_INST_ARR0_00_CCS_DDR_CSN_0_1_LEN 2 +#define CCS_INST_ARR0_00_CCS_DDR_CSN_2_3 36 +#define CCS_INST_ARR0_00_CCS_DDR_CSN_2_3_LEN 2 +#define CCS_INST_ARR0_00_CCS_DDR_CAL_TYPE 56 +#define CCS_INST_ARR0_00_CCS_DDR_CAL_TYPE_LEN 4 #define CCS_INST_ARR1_00 0x07012335 -#define CCS_INST_ARR1_00_IDLES 15 -#define CCS_INST_ARR1_00_DDR_CAL_RANK 56 -#define CCS_INST_ARR1_00_DDR_CALIBRATION_ENABLE 57 +#define CCS_INST_ARR1_00_IDLES 0 +#define CCS_INST_ARR1_00_IDLES_LEN 16 +#define CCS_INST_ARR1_00_DDR_CAL_RANK 53 +#define CCS_INST_ARR1_00_DDR_CAL_RANK_LEN 4 +#define CCS_INST_ARR1_00_DDR_CALIBRATION_ENABLE 57 #define CCS_INST_ARR1_00_CCS_END 58 -#define CCS_INST_ARR1_00_GOTO_CMD 63 +#define CCS_INST_ARR1_00_GOTO_CMD 59 +#define CCS_INST_ARR1_00_GOTO_CMD_LEN 5 #define MBSEC0Q 0x07012355 #define MBSEC1Q 0x07012356 @@ -297,21 +386,28 @@ #define MCBSTATQ 0x07012366 #define WATCFG0AQ 0x07012380 -#define WATCFG0AQ_CFG_WAT_EVENT_SEL 47 +#define WATCFG0AQ_CFG_WAT_EVENT_SEL 0 +#define WATCFG0AQ_CFG_WAT_EVENT_SEL_LEN 48 #define WATCFG0BQ 0x07012381 -#define WATCFG0BQ_CFG_WAT_MSKA 43 -#define WATCFG0BQ_CFG_WAT_CNTL 60 +#define WATCFG0BQ_CFG_WAT_MSKA 0 +#define WATCFG0BQ_CFG_WAT_MSKA_LEN 44 +#define WATCFG0BQ_CFG_WAT_CNTL 44 +#define WATCFG0BQ_CFG_WAT_CNTL_LEN 17 #define WATCFG0DQ 0x07012383 -#define WATCFG0DQ_CFG_WAT_PATA 43 +#define WATCFG0DQ_CFG_WAT_PATA 0 +#define WATCFG0DQ_CFG_WAT_PATA_LEN 44 #define WATCFG3AQ 0x0701238F -#define WATCFG3AQ_CFG_WAT_EVENT_SEL 47 +#define WATCFG3AQ_CFG_WAT_EVENT_SEL 0 +#define WATCFG3AQ_CFG_WAT_EVENT_SEL_LEN 48 #define WATCFG3BQ 0x07012390 -#define WATCFG3BQ_CFG_WAT_MSKA 43 -#define WATCFG3BQ_CFG_WAT_CNTL 60 +#define WATCFG3BQ_CFG_WAT_MSKA 0 +#define WATCFG3BQ_CFG_WAT_MSKA_LEN 44 +#define WATCFG3BQ_CFG_WAT_CNTL 44 +#define WATCFG3BQ_CFG_WAT_CNTL_LEN 17 #define CCS_CNTLQ 0x070123A5 #define CCS_CNTLQ_CCS_START 0 @@ -324,10 +420,12 @@ #define CCS_MODEQ 0x070123A7 #define CCS_MODEQ_CCS_STOP_ON_ERR 0 #define CCS_MODEQ_CCS_UE_DISABLE 1 -#define CCS_MODEQ_DDR_CAL_TIMEOUT_CNT 23 +#define CCS_MODEQ_DDR_CAL_TIMEOUT_CNT 8 +#define CCS_MODEQ_DDR_CAL_TIMEOUT_CNT_LEN 16 #define CCS_MODEQ_CFG_CCS_PARITY_AFTER_CMD 24 -#define CCS_MODEQ_COPY_CKE_TO_SPARE_CKE 26 -#define CCS_MODEQ_DDR_CAL_TIMEOUT_CNT_MULT 31 +#define CCS_MODEQ_COPY_CKE_TO_SPARE_CKE 26 +#define CCS_MODEQ_DDR_CAL_TIMEOUT_CNT_MULT 30 +#define CCS_MODEQ_DDR_CAL_TIMEOUT_CNT_MULT_LEN 2 #define MCBMR0Q 0x070123A8 @@ -354,33 +452,43 @@ #define MCBCFGQ 0x070123E0 #define MCBCFGQ_CFG_LOG_COUNTS_IN_TRACE 36 -#define MCBCFGQ_CFG_PAUSE_ON_ERROR_MODE 58 +#define MCBCFGQ_CFG_PAUSE_ON_ERROR_MODE 57 +#define MCBCFGQ_CFG_PAUSE_ON_ERROR_MODE_LEN 2 #define DBGCFG0Q 0x070123E8 -#define DBGCFG0Q_CFG_DBG_ENABLE 0 -#define DBGCFG0Q_CFG_DBG_PICK_MCBIST01 33 +#define DBGCFG0Q_CFG_DBG_ENABLE 0 +#define DBGCFG0Q_CFG_DBG_PICK_MCBIST01 23 +#define DBGCFG0Q_CFG_DBG_PICK_MCBIST01_LEN 11 #define DBGCFG1Q 0x070123E9 #define DBGCFG1Q_CFG_WAT_ENABLE 0 #define DBGCFG2Q 0x070123EA -#define DBGCFG2Q_CFG_WAT_LOC_EVENT0_SEL 19 -#define DBGCFG2Q_CFG_WAT_LOC_EVENT1_SEL 39 +#define DBGCFG2Q_CFG_WAT_LOC_EVENT0_SEL 0 +#define DBGCFG2Q_CFG_WAT_LOC_EVENT0_SEL_LEN 20 +#define DBGCFG2Q_CFG_WAT_LOC_EVENT1_SEL 20 +#define DBGCFG2Q_CFG_WAT_LOC_EVENT1_SEL_LEN 20 #define DBGCFG3Q 0x070123EB -#define DBGCFG3Q_CFG_WAT_GLOB_EVENT0_SEL 22 -#define DBGCFG3Q_CFG_WAT_GLOB_EVENT1_SEL 25 -#define DBGCFG3Q_CFG_WAT_ACT_SET_SPATTN_PULSE 40 +#define DBGCFG3Q_CFG_WAT_GLOB_EVENT0_SEL 20 +#define DBGCFG3Q_CFG_WAT_GLOB_EVENT0_SEL_LEN 3 +#define DBGCFG3Q_CFG_WAT_GLOB_EVENT1_SEL 23 +#define DBGCFG3Q_CFG_WAT_GLOB_EVENT1_SEL_LEN 3 +#define DBGCFG3Q_CFG_WAT_ACT_SET_SPATTN_PULSE 37 +#define DBGCFG3Q_CFG_WAT_ACT_SET_SPATTN_PULSE_LEN 4 #define MCSLOW_SYNC_CONFIG 0x07030000 #define MCSLOW_SYNC_CONFIG_LISTEN_TO_SYNC_PULSE_DIS 4 #define MCSLOW_SYNC_CONFIG_CLEAR_CHIPLET_IS_ALIGNED 7 #define MCSLOW_OPCG_ALIGN 0x07030001 -#define MCSLOW_OPCG_ALIGN_INOP_ALIGN 3 +#define MCSLOW_OPCG_ALIGN_INOP_ALIGN 0 +#define MCSLOW_OPCG_ALIGN_INOP_ALIGN_LEN 4 #define MCSLOW_OPCG_ALIGN_INOP_WAIT 19 -#define MCSLOW_OPCG_ALIGN_SCAN_RATIO 51 -#define MCSLOW_OPCG_ALIGN_OPCG_WAIT_CYCLES 63 +#define MCSLOW_OPCG_ALIGN_SCAN_RATIO 47 +#define MCSLOW_OPCG_ALIGN_SCAN_RATIO_LEN 5 +#define MCSLOW_OPCG_ALIGN_OPCG_WAIT_CYCLES 52 +#define MCSLOW_OPCG_ALIGN_OPCG_WAIT_CYCLES_LEN 12 #define MCSLOW_OPCG_REG0 0x07030002 #define MCSLOW_OPCG_RUNN_MODE 0 @@ -391,7 +499,8 @@ #define MCSLOW_SCAN_REGION_TYPE_SCAN_TYPE_BNDY 56 #define MCSLOW_CLK_REGION 0x07030006 -#define MCSLOW_CLK_REGION_CLOCK_CMD 1 +#define MCSLOW_CLK_REGION_CLOCK_CMD 0 +#define MCSLOW_CLK_REGION_CLOCK_CMD_LEN 2 #define MCSLOW_CLK_REGION_CLOCK_REGION_UNIT10 14 #define MCSLOW_CLK_REGION_SEL_THOLD_SL 48 #define MCSLOW_CLK_REGION_SEL_THOLD_NSL 49 @@ -457,7 +566,8 @@ #define DDRPHY_DP16_DQS_RD_PHASE_SELECT_RANK_PAIR0_P0_0 0x800000090701103F #define DDRPHY_DP16_DRIFT_LIMITS_P0_0 0x8000000A0701103F -#define DD2_BLUE_EXTEND_RANGE 49 +#define DD2_BLUE_EXTEND_RANGE 48 +#define DD2_BLUE_EXTEND_RANGE_LEN 2 #define DDRPHY_DP16_RD_LVL_STATUS0_P0_0 0x8000000E0701103F #define DDRPHY_DP16_RD_LVL_STATUS2_P0_0 0x800000100701103F @@ -474,39 +584,53 @@ #define DDRPHY_DP16_RD_STATUS0_P0_0 0x800000140701103F -#define DDRPHY_DP16_RD_VREF_DAC_0_P0_0 0x800000160701103F -#define DDRPHY_DP16_RD_VREF_DAC_1_P0_0 0x8000001F0701103F -#define DDRPHY_DP16_RD_VREF_DAC_2_P0_0 0x800000C00701103F -#define DDRPHY_DP16_RD_VREF_DAC_3_P0_0 0x800000C10701103F -#define DDRPHY_DP16_RD_VREF_DAC_4_P0_0 0x800000C20701103F -#define DDRPHY_DP16_RD_VREF_DAC_5_P0_0 0x800000C30701103F -#define DDRPHY_DP16_RD_VREF_DAC_6_P0_0 0x800000C40701103F -#define DDRPHY_DP16_RD_VREF_DAC_7_P0_0 0x800000C50701103F -#define BIT0_VREF_DAC 55 -#define BIT1_VREF_DAC 63 +#define DDRPHY_DP16_RD_VREF_DAC_0_P0_0 0x800000160701103F +#define DDRPHY_DP16_RD_VREF_DAC_1_P0_0 0x8000001F0701103F +#define DDRPHY_DP16_RD_VREF_DAC_2_P0_0 0x800000C00701103F +#define DDRPHY_DP16_RD_VREF_DAC_3_P0_0 0x800000C10701103F +#define DDRPHY_DP16_RD_VREF_DAC_4_P0_0 0x800000C20701103F +#define DDRPHY_DP16_RD_VREF_DAC_5_P0_0 0x800000C30701103F +#define DDRPHY_DP16_RD_VREF_DAC_6_P0_0 0x800000C40701103F +#define DDRPHY_DP16_RD_VREF_DAC_7_P0_0 0x800000C50701103F +#define BIT0_VREF_DAC 49 +#define BIT0_VREF_DAC_LEN 7 +#define BIT1_VREF_DAC 57 +#define BIT1_VREF_DAC_LEN 7 #define DDRPHY_DP16_WR_ERROR0_P0_0 0x8000001B0701103F #define DDRPHY_DP16_CTLE_CTL_BYTE0_P0_0 0x800000200701103F -#define NIB_0_DQSEL_CAP 49 -#define NIB_0_DQSEL_RES 55 -#define NIB_1_DQSEL_CAP 57 -#define NIB_1_DQSEL_RES 63 +#define NIB_0_DQSEL_CAP 48 +#define NIB_0_DQSEL_CAP_LEN 2 +#define NIB_0_DQSEL_RES 53 +#define NIB_0_DQSEL_RES_LEN 3 +#define NIB_1_DQSEL_CAP 56 +#define NIB_1_DQSEL_CAP_LEN 2 +#define NIB_1_DQSEL_RES 61 +#define NIB_1_DQSEL_RES_LEN 3 #define DDRPHY_DP16_CTLE_CTL_BYTE1_P0_0 0x800000210701103F -#define NIB_2_DQSEL_CAP 49 -#define NIB_2_DQSEL_RES 55 -#define NIB_3_DQSEL_CAP 57 -#define NIB_3_DQSEL_RES 63 +#define NIB_2_DQSEL_CAP 48 +#define NIB_2_DQSEL_CAP_LEN 2 +#define NIB_2_DQSEL_RES 53 +#define NIB_2_DQSEL_RES_LEN 3 +#define NIB_3_DQSEL_CAP 56 +#define NIB_3_DQSEL_CAP_LEN 2 +#define NIB_3_DQSEL_RES 61 +#define NIB_3_DQSEL_RES_LEN 3 #define DDRPHY_DP16_ACBOOST_CTL_BYTE0_P0_0 0x800000220701103F -#define S0ACENSLICENDRV_DC 50 -#define S0ACENSLICEPDRV_DC 53 +#define S0ACENSLICENDRV_DC 48 +#define S0ACENSLICENDRV_DC_LEN 3 +#define S0ACENSLICEPDRV_DC 51 +#define S0ACENSLICEPDRV_DC_LEN 3 #define S0ACENSLICEPTERM_DC 56 #define DDRPHY_DP16_ACBOOST_CTL_BYTE1_P0_0 0x800000230701103F -#define S1ACENSLICENDRV_DC 50 -#define S1ACENSLICEPDRV_DC 53 +#define S1ACENSLICENDRV_DC 48 +#define S1ACENSLICENDRV_DC_LEN 3 +#define S1ACENSLICEPDRV_DC 51 +#define S1ACENSLICEPDRV_DC_LEN 3 #define S1ACENSLICEPTERM_DC 56 #define DDRPHY_DP16_DLL_CNTL0_P0_0 0x800000240701103F @@ -515,9 +639,12 @@ #define DDRPHY_DP16_DLL_VREG_CONTROL0_P0_0 0x8000002A0701103F #define DDRPHY_DP16_DLL_VREG_CONTROL1_P0_0 0x8000002B0701103F -#define RXREG_VREG_COMPCON_DC 50 -#define RXREG_VREG_DRVCON_DC 55 -#define RXREG_VREG_REF_SEL_DC 58 +#define RXREG_VREG_COMPCON_DC 48 +#define RXREG_VREG_COMPCON_DC_LEN 3 +#define RXREG_VREG_DRVCON_DC 53 +#define RXREG_VREG_DRVCON_DC_LEN 3 +#define RXREG_VREG_REF_SEL_DC 56 +#define RXREG_VREG_REF_SEL_DC_LEN 3 #define DDRPHY_DP16_DLL_VREG_COARSE0_P0_0 0x8000002C0701103F #define DDRPHY_DP16_DLL_VREG_COARSE1_P0_0 0x8000002D0701103F @@ -526,7 +653,8 @@ #define DDRPHY_DP16_WR_VREF_STATUS1_P0_0 0x8000002F0701103F #define DDRPHY_DP16_DQSCLK_OFFSET_P0_0 0x800000370701103F -#define DQS_OFFSET 55 +#define DQS_OFFSET 49 +#define DQS_OFFSET_LEN 7 #define DDRPHY_DP16_WR_DELAY_VALUE_0_RP0_REG_P0_0 0x800000380701103F @@ -535,36 +663,45 @@ #define DDRPHY_DP16_WR_VREF_VALUE0_RANK_PAIR2_P0_0 0x8000025E0701103F #define DDRPHY_DP16_WR_VREF_VALUE0_RANK_PAIR3_P0_0 0x8000035E0701103F #define WR_VREF_RANGE_DRAM0 49 -#define WR_VREF_VALUE_DRAM0 55 +#define WR_VREF_VALUE_DRAM0 50 +#define WR_VREF_VALUE_DRAM0_LEN 6 #define WR_VREF_RANGE_DRAM1 57 -#define WR_VREF_VALUE_DRAM1 63 +#define WR_VREF_VALUE_DRAM1 58 +#define WR_VREF_VALUE_DRAM1_LEN 6 #define DDRPHY_DP16_WR_VREF_VALUE1_RANK_PAIR0_P0_0 0x8000005F0701103F #define DDRPHY_DP16_WR_VREF_VALUE1_RANK_PAIR1_P0_0 0x8000015F0701103F #define DDRPHY_DP16_WR_VREF_VALUE1_RANK_PAIR2_P0_0 0x8000025F0701103F #define DDRPHY_DP16_WR_VREF_VALUE1_RANK_PAIR3_P0_0 0x8000035F0701103F #define WR_VREF_RANGE_DRAM2 49 -#define WR_VREF_VALUE_DRAM2 55 +#define WR_VREF_VALUE_DRAM2 50 +#define WR_VREF_VALUE_DRAM2_LEN 6 #define WR_VREF_RANGE_DRAM3 57 -#define WR_VREF_VALUE_DRAM3 63 +#define WR_VREF_VALUE_DRAM3 58 +#define WR_VREF_VALUE_DRAM3_LEN 6 #define DDRPHY_DP16_WR_VREF_CONFIG0_P0_0 0x8000006C0701103F #define WR_CTR_1D_MODE_SWITCH 48 #define WR_CTR_RUN_FULL_1D 49 #define WR_CTR_2D_SMALL_STEP_VAL 52 -#define WR_CTR_2D_BIG_STEP_VAL 56 -#define WR_CTR_NUM_BITS_TO_SKIP 59 -#define WR_CTR_NUM_NO_INC_VREF_COMP 62 +#define WR_CTR_2D_BIG_STEP_VAL 53 +#define WR_CTR_2D_BIG_STEP_VAL_LEN 4 +#define WR_CTR_NUM_BITS_TO_SKIP 57 +#define WR_CTR_NUM_BITS_TO_SKIP_LEN 3 +#define WR_CTR_NUM_NO_INC_VREF_COMP 60 +#define WR_CTR_NUM_NO_INC_VREF_COMP_LEN 3 #define DDRPHY_DP16_SYSCLK_PR_VALUE_P0_0 0x800000730701103F #define BB_LOCK0 48 #define BB_LOCK1 56 #define DDRPHY_DP16_WRCLK_PR_P0_0 0x800000740701103F -#define TSYS_WRCLK 55 +#define TSYS_WRCLK 49 +#define TSYS_WRCLK_LEN 7 -#define DDRPHY_DP16_IO_TX_CONFIG0_P0_0 0x800000750701103F -#define DDRPHY_DP16_IO_TX_CONFIG0_STRENGTH 51 +#define DDRPHY_DP16_IO_TX_CONFIG0_P0_0 0x800000750701103F +#define DDRPHY_DP16_IO_TX_CONFIG0_STRENGTH 48 +#define DDRPHY_DP16_IO_TX_CONFIG0_STRENGTH_LEN 4 #define DDRPHY_DP16_RD_VREF_CAL_EN_P0_0 0x800000760701103F @@ -573,8 +710,10 @@ #define S1INSDLYTAP 62 #define DDRPHY_DP16_IO_TX_FET_SLICE_P0_0 0x800000780701103F -#define EN_SLICE_N_WR 55 -#define EN_SLICE_P_WR 63 +#define EN_SLICE_N_WR 49 +#define EN_SLICE_N_WR_LEN 7 +#define EN_SLICE_P_WR 57 +#define EN_SLICE_P_WR_LEN 7 #define DDRPHY_DP16_RD_VREF_CAL_ERROR_P0_0 0x8000007A0701103F @@ -588,8 +727,10 @@ #define DDRPHY_DP16_WR_VREF_CONFIG1_P0_0 0x800000EC0701103F #define WR_CTR_VREF_RANGE_SELECT 48 -#define WR_CTR_VREF_RANGE_CROSSOVER 55 -#define WR_CTR_VREF_SINGLE_RANGE_MAX 62 +#define WR_CTR_VREF_RANGE_CROSSOVER 49 +#define WR_CTR_VREF_RANGE_CROSSOVER_LEN 7 +#define WR_CTR_VREF_SINGLE_RANGE_MAX 56 +#define WR_CTR_VREF_SINGLE_RANGE_MAX_LEN 7 /* Yes, MASK1 is supposed to be before MASK0. */ #define DDRPHY_DP16_WR_VREF_ERROR_MASK1_P0_0 0x800000FA0701103F @@ -620,8 +761,10 @@ #define DDRPHY_ADR_DELAY1_P0_ADR3 0x80004C050701103F #define DDRPHY_ADR_DELAY2_P0_ADR3 0x80004C060701103F #define DDRPHY_ADR_DELAY3_P0_ADR3 0x80004C070701103F -#define ADR_DELAY_EVEN 55 -#define ADR_DELAY_ODD 63 +#define ADR_DELAY_EVEN 49 +#define ADR_DELAY_EVEN_LEN 7 +#define ADR_DELAY_ODD 57 +#define ADR_DELAY_ODD_LEN 7 #define DDRPHY_ADR_DELAY1_P0_ADR1 0x800044050701103F #define DDRPHY_ADR_DELAY3_P0_ADR1 0x800044070701103F @@ -643,8 +786,9 @@ #define SLICE_SEL6 60 #define SLICE_SEL7 62 -#define DDRPHY_ADR_DLL_VREG_CONFIG_1_P0_ADR32S0 0x800080310701103F -#define DDRPHY_ADR_DLL_VREG_CONFIG_1_STRENGTH 62 +#define DDRPHY_ADR_DLL_VREG_CONFIG_1_P0_ADR32S0 0x800080310701103F +#define DDRPHY_ADR_DLL_VREG_CONFIG_1_STRENGTH 59 +#define DDRPHY_ADR_DLL_VREG_CONFIG_1_STRENGTH_LEN 4 #define ADR_SYSCLK_CNTRL_PR_P0_ADR32S0 0x800080320701103F @@ -682,10 +826,13 @@ #define DDR4_VLEVEL_BANK_GROUP 62 #define DDRPHY_PC_CONFIG1_P0 0x8000C00D0701103F -#define WRITE_LATENCY_OFFSET 51 -#define READ_LATENCY_OFFSET 55 -#define MEMORY_TYPE 61 -#define DDR4_LATENCY_SW 62 +#define WRITE_LATENCY_OFFSET 48 +#define WRITE_LATENCY_OFFSET_LEN 4 +#define READ_LATENCY_OFFSET 52 +#define READ_LATENCY_OFFSET_LEN 4 +#define MEMORY_TYPE 59 +#define MEMORY_TYPE_LEN 3 +#define DDR4_LATENCY_SW 62 #define DDRPHY_PC_RESETS_P0 0x8000C00E0701103F #define SYSCLK_RESET 49 @@ -704,11 +851,14 @@ #define ENA_RANK_PAIR_MSB 60 #define DDRPHY_PC_INIT_CAL_CONFIG1_P0 0x8000C0170701103F -#define REFRESH_COUNT 51 -#define REFRESH_CONTROL 53 +#define REFRESH_COUNT 48 +#define REFRESH_COUNT_LEN 4 +#define REFRESH_CONTROL 52 +#define REFRESH_CONTROL_LEN 2 #define REFRESH_ALL_RANKS 54 #define CMD_SNOOP_DIS 55 -#define REFRESH_INTERVAL 63 +#define REFRESH_INTERVAL 57 +#define REFRESH_INTERVAL_LEN 7 #define DDRPHY_PC_INIT_CAL_ERROR_P0 0x8000C0180701103F @@ -723,94 +873,129 @@ #define DDRPHY_SEQ_ODT_WR_CONFIG0_P0 0x8000C40A0701103F #define DDRPHY_SEQ_ODT_RD_CONFIG1_P0 0x8000C40F0701103F -#define ODT_RD_VALUES0 51 -#define ODT_RD_VALUES1 59 - -#define DDRPHY_SEQ_MEM_TIMING_PARAM0_P0 0x8000C4120701103F -#define TMOD_CYCLES 51 -#define TRCD_CYCLES 55 -#define TRP_CYCLES 59 -#define TRFC_CYCLES 63 - -#define DDRPHY_SEQ_MEM_TIMING_PARAM1_P0 0x8000C4130701103F -#define TZQINIT_CYCLES 51 -#define TZQCS_CYCLES 55 -#define TWLDQSEN_CYCLES 59 -#define TWRMRD_CYCLES 63 - -#define DDRPHY_SEQ_MEM_TIMING_PARAM2_P0 0x8000C4140701103F -#define TODTLON_OFF_CYCLES 51 +#define ODT_RD_VALUES0 48 +#define ODT_RD_VALUES0_LEN 4 +#define ODT_RD_VALUES1 56 +#define ODT_RD_VALUES1_LEN 4 + +#define DDRPHY_SEQ_MEM_TIMING_PARAM0_P0 0x8000C4120701103F +#define TMOD_CYCLES 48 +#define TMOD_CYCLES_LEN 4 +#define TRCD_CYCLES 52 +#define TRCD_CYCLES_LEN 4 +#define TRP_CYCLES 56 +#define TRP_CYCLES_LEN 4 +#define TRFC_CYCLES 60 +#define TRFC_CYCLES_LEN 4 + +#define DDRPHY_SEQ_MEM_TIMING_PARAM1_P0 0x8000C4130701103F +#define TZQINIT_CYCLES 48 +#define TZQINIT_CYCLES_LEN 4 +#define TZQCS_CYCLES 52 +#define TZQCS_CYCLES_LEN 4 +#define TWLDQSEN_CYCLES 56 +#define TWLDQSEN_CYCLES_LEN 4 +#define TWRMRD_CYCLES 60 +#define TWRMRD_CYCLES_LEN 4 + +#define DDRPHY_SEQ_MEM_TIMING_PARAM2_P0 0x8000C4140701103F +#define TODTLON_OFF_CYCLES 48 +#define TODTLON_OFF_CYCLES_LEN 4 #define DDRPHY_SEQ_RD_WR_DATA0_P0 0x8000C4000701103F #define DDRPHY_SEQ_RD_WR_DATA1_P0 0x8000C4010701103F -#define RD_RW_DATA_REG0 63 -#define RD_RW_DATA_REG1 63 +#define RD_RW_DATA_REG0 48 +#define RD_RW_DATA_REG0_LEN 16 +#define RD_RW_DATA_REG1 48 +#define RD_RW_DATA_REG1_LEN 16 #define DDRPHY_SEQ_ERROR_STATUS0_P0 0x8000C4080701103F #define DDRPHY_SEQ_ODT_WR_CONFIG0_P0 0x8000C40A0701103F -#define ODT_WR_VALUES0 51 -#define ODT_WR_VALUES1 59 +#define ODT_WR_VALUES0 48 +#define ODT_WR_VALUES0_LEN 4 +#define ODT_WR_VALUES1 56 +#define ODT_WR_VALUES1_LEN 4 #define DDRPHY_SEQ_ODT_WR_CONFIG1_P0 0x8000C40B0701103F -#define ODT_WR_VALUES2 51 -#define ODT_WR_VALUES3 59 +#define ODT_WR_VALUES2 48 +#define ODT_WR_VALUES2_LEN 4 +#define ODT_WR_VALUES3 56 +#define ODT_WR_VALUES3_LEN 4 #define DDRPHY_SEQ_ODT_RD_CONFIG0_P0 0x8000C40E0701103F -#define ODT_RD_VALUES0 51 -#define ODT_RD_VALUES1 59 +#define ODT_RD_VALUES0 48 +#define ODT_RD_VALUES0_LEN 4 +#define ODT_RD_VALUES1 56 +#define ODT_RD_VALUES1_LEN 4 #define DDRPHY_SEQ_ODT_RD_CONFIG1_P0 0x8000C40F0701103F -#define ODT_RD_VALUES2 51 -#define ODT_RD_VALUES3 59 +#define ODT_RD_VALUES2 48 +#define ODT_RD_VALUES2_LEN 4 +#define ODT_RD_VALUES3 56 +#define ODT_RD_VALUES3_LEN 4 #define DDRPHY_RC_CONFIG0_P0 0x8000C8000701103F -#define GLOBAL_PHY_OFFSET 51 +#define GLOBAL_PHY_OFFSET 48 +#define GLOBAL_PHY_OFFSET_LEN 4 #define PERFORM_RDCLK_ALIGN 62 #define DDRPHY_RC_CONFIG1_P0 0x8000C8010701103F #define DDRPHY_RC_CONFIG2_P0 0x8000C8020701103F -#define CONSEC_PASS 52 +#define CONSEC_PASS 48 +#define CONSEC_PASS_LEN 5 #define DDRPHY_RC_ERROR_STATUS0_P0 0x8000C8050701103F #define DDRPHY_RC_CONFIG3_P0 0x8000C8070701103F -#define COARSE_CAL_STEP_SIZE 54 +#define COARSE_CAL_STEP_SIZE 51 +#define COARSE_CAL_STEP_SIZE_LEN 4 #define DDRPHY_RC_RDVREF_CONFIG0_P0 0x8000C8090701103F #define DDRPHY_RC_RDVREF_CONFIG1_P0 0x8000C80A0701103F -#define CMD_PRECEDE_TIME 55 -#define MPR_LOCATION 59 +#define CMD_PRECEDE_TIME 48 +#define CMD_PRECEDE_TIME_LEN 8 +#define MPR_LOCATION 56 +#define MPR_LOCATION_LEN 4 #define CALIBRATION_ENABLE 60 #define SKIP_RDCENTERING 61 #define DDRPHY_WC_CONFIG0_P0 0x8000CC000701103F -#define TWLO_TWLOE 55 +#define TWLO_TWLOE 48 +#define TWLO_TWLOE_LEN 8 #define WL_ONE_DQS_PULSE 56 -#define FW_WR_RD 62 +#define FW_WR_RD 57 +#define FW_WR_RD_LEN 6 #define CUSTOM_INIT_WRITE 63 #define DDRPHY_WC_CONFIG1_P0 0x8000CC010701103F -#define BIG_STEP 51 +#define BIG_STEP 48 +#define BIG_STEP_LEN 4 #define SMALL_STEP 54 -#define WR_PRE_DLY 60 +#define WR_PRE_DLY 55 +#define WR_PRE_DLY_LEN 6 #define DDRPHY_WC_CONFIG2_P0 0x8000CC020701103F -#define NUM_VALID_SAMPLES 51 -#define FW_RD_WR 57 -#define IPW_WR_WR 61 +#define NUM_VALID_SAMPLES 48 +#define NUM_VALID_SAMPLES_LEN 4 +#define FW_RD_WR 52 +#define FW_RD_WR_LEN 6 +#define IPW_WR_WR 58 +#define IPW_WR_WR_LEN 4 #define DDRPHY_WC_ERROR_STATUS0_P0 0x8000CC030701103F #define DDRPHY_WC_CONFIG3_P0 0x8000CC050701103F -#define MRS_CMD_DQ_OFF 60 +#define MRS_CMD_DQ_OFF 55 +#define MRS_CMD_DQ_OFF_LEN 6 -#define DDRPHY_WC_RTT_WR_SWAP_ENABLE_P0 0x8000CC060701103F +#define DDRPHY_WC_RTT_WR_SWAP_ENABLE_P0 0x8000CC060701103F #define WL_ENABLE_RTT_SWAP 48 -#define WR_CTR_ENABLE_RTT_SWAP 49 -#define WR_CTR_VREF_COUNTER_RESET_VAL 59 +#define WR_CTR_ENABLE_RTT_SWAP 49 +#define WR_CTR_VREF_COUNTER_RESET_VAL 50 +#define WR_CTR_VREF_COUNTER_RESET_VAL_LEN 10 #define DDRPHY_APB_CONFIG0_P0 0x8000D0000701103F #define RESET_ERR_RPT 49 diff --git a/src/soc/ibm/power9/istep_14_1.c b/src/soc/ibm/power9/istep_14_1.c index 6ccc797655c..47236d94d91 100644 --- a/src/soc/ibm/power9/istep_14_1.c +++ b/src/soc/ibm/power9/istep_14_1.c @@ -48,7 +48,8 @@ static void fir_unmask(int mcs_i) (val & PPC_BITMASK(36, 41)) >> 41); mca_and_or(id, mca_i, MBA_FARB0Q, ~PPC_BITMASK(48, 53), - PPC_SHIFT(val, MBA_FARB0Q_CFG_RCD_PROTECTION_TIME)); + PPC_PLACE(val, MBA_FARB0Q_CFG_RCD_PROTECTION_TIME, + MBA_FARB0Q_CFG_RCD_PROTECTION_TIME_LEN)); /* * Due to hardware defect with DD2.0 certain errors are not handled @@ -151,9 +152,11 @@ static void set_fifo_mode(int mcs_i, int fifo) continue; mca_and_or(id, mca_i, MBA_RRQ0Q, ~PPC_BIT(MBA_RRQ0Q_CFG_RRQ_FIFO_MODE), - PPC_SHIFT(fifo, MBA_RRQ0Q_CFG_RRQ_FIFO_MODE)); + PPC_PLACE(fifo, MBA_RRQ0Q_CFG_RRQ_FIFO_MODE, + MBA_RRQ0Q_CFG_RRQ_FIFO_MODE_LEN)); mca_and_or(id, mca_i, MBA_WRQ0Q, ~PPC_BIT(MBA_WRQ0Q_CFG_WRQ_FIFO_MODE), - PPC_SHIFT(fifo, MBA_WRQ0Q_CFG_WRQ_FIFO_MODE)); + PPC_PLACE(fifo, MBA_WRQ0Q_CFG_WRQ_FIFO_MODE, + MBA_WRQ0Q_CFG_WRQ_FIFO_MODE_LEN)); } } @@ -181,7 +184,8 @@ static void load_maint_pattern(int mcs_i, const uint64_t pat[16]) * [11] AACR_ECCGEN = 1 */ mca_write(id, mca_i, AACR, - PPC_SHIFT(0x1F0, AACR_ADDRESS) | PPC_BIT(AACR_AUTOINC) | + PPC_PLACE(0x1F0, AACR_ADDRESS, AACR_ADDRESS_LEN) | + PPC_BIT(AACR_AUTOINC) | PPC_BIT(AACR_ECCGEN)); for (i = 0; i < 16; i++) { @@ -319,7 +323,8 @@ static void init_mcbist(int mcs_i) * [63] MCBCFGQ_CFG_ENABLE_HOST_ATTN = see above */ write_scom_for_chiplet(id, MCBCFGQ, - PPC_SHIFT(0b10, MCBCFGQ_CFG_PAUSE_ON_ERROR_MODE)); + PPC_PLACE(0x2, MCBCFGQ_CFG_PAUSE_ON_ERROR_MODE, + MCBCFGQ_CFG_PAUSE_ON_ERROR_MODE_LEN)); /* * This sets up memory parameters, mostly gaps between commands. For as fast diff --git a/src/soc/ibm/power9/istep_14_3.c b/src/soc/ibm/power9/istep_14_3.c index f7059cc78d4..26f7dc2abe1 100644 --- a/src/soc/ibm/power9/istep_14_3.c +++ b/src/soc/ibm/power9/istep_14_3.c @@ -23,7 +23,8 @@ static void init_pecs(const uint8_t *iovalid_enable) PEC_PBCQHWCFG_REG_PE_DISABLE_TCE_ARBITRATION = 60, PEC_PBAIBHWCFG_REG_PE_PCIE_CLK_TRACE_EN = 30, - PEC_AIB_HWCFG_OSBM_HOL_BLK_CNT = 42, + PEC_AIB_HWCFG_OSBM_HOL_BLK_CNT = 40, + PEC_AIB_HWCFG_OSBM_HOL_BLK_CNT_LEN = 3, PEC_PBCQHWCFG_REG_PE_DISABLE_OOO_MODE = 0x16, PEC_PBCQHWCFG_REG_PE_DISABLE_WR_SCOPE_GROUP = 42, PEC_PBCQHWCFG_REG_PE_CHANNEL_STREAMING_EN = 33, @@ -187,7 +188,8 @@ static void init_pecs(const uint8_t *iovalid_enable) */ val = 0; val |= PPC_BIT(PEC_PBAIBHWCFG_REG_PE_PCIE_CLK_TRACE_EN); - val |= PPC_SHIFT(7, PEC_AIB_HWCFG_OSBM_HOL_BLK_CNT); + val |= PPC_PLACE(7, PEC_AIB_HWCFG_OSBM_HOL_BLK_CNT, + PEC_AIB_HWCFG_OSBM_HOL_BLK_CNT_LEN); write_scom_for_chiplet(PCI0_CHIPLET_ID + pec, PEC_PBAIBHWCFG_REG, val); } } diff --git a/src/soc/ibm/power9/istep_18_11.c b/src/soc/ibm/power9/istep_18_11.c index cc364634762..430f1f08feb 100644 --- a/src/soc/ibm/power9/istep_18_11.c +++ b/src/soc/ibm/power9/istep_18_11.c @@ -17,10 +17,14 @@ #define M_PATH_1_OSC_NOT_VALID 1 #define M_PATH_0_STEP_ALIGN_DISABLE 2 -#define M_PATH_STEP_CHECK_CPS_DEVIATION_FACTOR_OFFSET 25 -#define M_PATH_0_STEP_CHECK_CPS_DEVIATION_OFFSET 11 -#define M_PATH_SYNC_CREATE_SPS_SELECT_OFFSET 7 -#define M_PATH_0_STEP_CHECK_VALIDITY_COUNT_OFFSET 15 +#define M_PATH_STEP_CHECK_CPS_DEVIATION_FACTOR_OFFSET 24 +#define M_PATH_STEP_CHECK_CPS_DEVIATION_FACTOR_LEN 2 +#define M_PATH_0_STEP_CHECK_CPS_DEVIATION_OFFSET 8 +#define M_PATH_0_STEP_CHECK_CPS_DEVIATION_LEN 4 +#define M_PATH_SYNC_CREATE_SPS_SELECT_OFFSET 5 +#define M_PATH_SYNC_CREATE_SPS_SELECT_LEN 3 +#define M_PATH_0_STEP_CHECK_VALIDITY_COUNT_OFFSET 13 +#define M_PATH_0_STEP_CHECK_VALIDITY_COUNT_LEN 3 #define BUS_DELAY_63 PPC_BITMASK(52, 63) #define BUS_DELAY_47 PPC_BITMASK(36, 47) @@ -162,13 +166,19 @@ static void calculate_m_path(void) ~(PPC_BIT(M_PATH_0_OSC_NOT_VALID) | PPC_BIT(M_PATH_0_STEP_ALIGN_DISABLE) | PPC_BIT(PERV_TOD_M_PATH_CTRL_REG_STEP_CREATE_DUAL_EDGE_DISABLE) | - PPC_SHIFT(0x7, M_PATH_0_STEP_CHECK_VALIDITY_COUNT_OFFSET) | - PPC_SHIFT(0x7, M_PATH_SYNC_CREATE_SPS_SELECT_OFFSET) | - PPC_SHIFT(0xF, M_PATH_0_STEP_CHECK_CPS_DEVIATION_OFFSET) | - PPC_SHIFT(0x3, M_PATH_STEP_CHECK_CPS_DEVIATION_FACTOR_OFFSET)), + PPC_PLACE(0x7, M_PATH_0_STEP_CHECK_VALIDITY_COUNT_OFFSET, + M_PATH_0_STEP_CHECK_VALIDITY_COUNT_LEN) | + PPC_PLACE(0x7, M_PATH_SYNC_CREATE_SPS_SELECT_OFFSET, + M_PATH_SYNC_CREATE_SPS_SELECT_LEN) | + PPC_PLACE(0xF, M_PATH_0_STEP_CHECK_CPS_DEVIATION_OFFSET, + M_PATH_0_STEP_CHECK_CPS_DEVIATION_LEN) | + PPC_PLACE(0x3, M_PATH_STEP_CHECK_CPS_DEVIATION_FACTOR_OFFSET, + M_PATH_STEP_CHECK_CPS_DEVIATION_FACTOR_LEN)), PPC_BIT(M_PATH_1_OSC_NOT_VALID) | - PPC_SHIFT(0x8, M_PATH_0_STEP_CHECK_CPS_DEVIATION_OFFSET) | - PPC_SHIFT(0x3, M_PATH_0_STEP_CHECK_VALIDITY_COUNT_OFFSET) | + PPC_PLACE(0x8, M_PATH_0_STEP_CHECK_CPS_DEVIATION_OFFSET, + M_PATH_0_STEP_CHECK_CPS_DEVIATION_LEN) | + PPC_PLACE(0x3, M_PATH_0_STEP_CHECK_VALIDITY_COUNT_OFFSET, + M_PATH_0_STEP_CHECK_VALIDITY_COUNT_LEN) | dual_edge_disable); } else { scom_and_or(PERV_TOD_M_PATH_CTRL_REG, From 214a40119cc416f29cdf07e37187f4b19aa8573a Mon Sep 17 00:00:00 2001 From: Krystian Hebel Date: Tue, 9 Nov 2021 19:23:14 +0100 Subject: [PATCH 104/213] soc/power9/homer.c: do not enable external interrupts Apparently this isn't a requirement for dead man loop. Signed-off-by: Krystian Hebel Change-Id: I8f90addfe69b280a486da1027c2be9d5e0e45406 --- src/soc/ibm/power9/homer.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/src/soc/ibm/power9/homer.c b/src/soc/ibm/power9/homer.c index 1aa0761b290..58614e7b62b 100644 --- a/src/soc/ibm/power9/homer.c +++ b/src/soc/ibm/power9/homer.c @@ -384,13 +384,8 @@ static void build_self_restore(struct homer_st *homer, (read_spr(SPR_LPCR) & ~(SPR_LPCR_EEE | SPR_LPCR_DEE | SPR_LPCR_OEE | SPR_LPCR_HDICE)) | (SPR_LPCR_HVICE | SPR_LPCR_HVEE | SPR_LPCR_HDEE); - /* - * Timing facilities may be lost. During their restoration Large Decrementer - * in LPCR may be initially turned off, which may result in a spurious - * Decrementer Exception. Disable External Interrupts on self-restore, they - * will be re-enabled later by coreboot. - */ - const uint64_t msr = read_msr() & ~PPC_BIT(48); + + const uint64_t msr = read_msr(); /* Clear en_attn for HID */ const uint64_t hid = read_spr(SPR_HID) & ~PPC_BIT(3); @@ -1085,14 +1080,11 @@ static void istep_16_1(int this_core) configure_xive(this_core); - printk(BIOS_ERR, "XIVE configured, enabling External Interrupt\n"); - write_msr(read_msr() | PPC_BIT(48)); - /* * This will request SBE to wake us up after we enter STOP 15. Hopefully * we will come back to the place where we were before. */ - printk(BIOS_ERR, "Entering dead man loop\n"); + printk(BIOS_ERR, "XIVE configured, entering dead man loop\n"); psu_command(DEADMAN_LOOP_START, time); block_wakeup_int(this_core, 1); From 480a18cdc9a2cb591ee615d6161e152b520f5ce1 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Mon, 13 Dec 2021 00:48:24 +0200 Subject: [PATCH 105/213] soc/power9/: implement FSI access Change-Id: If646d290ebb599f5f937c97bfea8e73227a33ae3 Signed-off-by: Sergii Dmytruk --- src/soc/ibm/power9/Makefile.inc | 1 + src/soc/ibm/power9/fsi.c | 403 ++++++++++++++++++++++++++++++++ src/soc/ibm/power9/fsi.h | 59 +++++ src/soc/ibm/power9/romstage.c | 8 + 4 files changed, 471 insertions(+) create mode 100644 src/soc/ibm/power9/fsi.c create mode 100644 src/soc/ibm/power9/fsi.h diff --git a/src/soc/ibm/power9/Makefile.inc b/src/soc/ibm/power9/Makefile.inc index 6bcda75e801..04dbd51ea98 100644 --- a/src/soc/ibm/power9/Makefile.inc +++ b/src/soc/ibm/power9/Makefile.inc @@ -29,6 +29,7 @@ romstage-y += i2c.c romstage-y += ccs.c romstage-y += mcbist.c romstage-y += timer.c +romstage-y += fsi.c ramstage-y += chip.c ramstage-y += homer.c ramstage-y += rom_media.c diff --git a/src/soc/ibm/power9/fsi.c b/src/soc/ibm/power9/fsi.c new file mode 100644 index 00000000000..6fe42633bb4 --- /dev/null +++ b/src/soc/ibm/power9/fsi.c @@ -0,0 +1,403 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include "fsi.h" + +#include +#include +#include +#include +#include +#include + +/* + * Some of the code relies on the fact that we're only interested in the current + * CPU (its MFSI) and the other CPU on port #1. Nothing is actually connected to + * any other ports, so using chip==0 for MFSI of CPU and chip==1 (port #1) and + * not being able to work with port #0 is OK. Getting rid of this requires + * passing both chip and port around (possibly encoded into a single variable), + * which is unnecessary otherwise. + */ + +enum { + MAX_SLAVE_PORTS = 8, + + FSI2OPB_OFFSET_0 = 0x00020000, // SCOM address for FSI interactions + + MFSI_CONTROL_REG = 0x003400, // MFSI Control Register + + OPB_REG_CMD = 0x0000, // Command Register + OPB_REG_STAT = 0x0001, // Status Register + OPB_REG_RES = 0x0004, // Reset Register + + /* FSI Control Registers */ + FSI_MMODE_000 = 0x000, + FSI_MDLYR_004 = 0x004, + FSI_MLEVP0_018 = 0x018, + FSI_MSENP0_018 = 0x018, + FSI_MCENP0_020 = 0x020, + FSI_MSIEP0_030 = 0x030, + FSI_MAEB_070 = 0x070, // MREFP0 + FSI_MRESP0_0D0 = 0x0D0, + FSI_MESRB0_1D0 = 0x1D0, + FSI_MECTRL_2E0 = 0x2E0, + + /* FSI2PIB Engine (SCOM) */ + FSI2PIB_ENGINE = 0x001000, + FSI2PIB_RESET = FSI2PIB_ENGINE | 0x18, // see CFAM 1006 + FSI2PIB_COMPMASK = FSI2PIB_ENGINE | 0x30, // see CFAM 100C + FSI2PIB_TRUEMASK = FSI2PIB_ENGINE | 0x34, // see CFAM 100D + + /* MFSI Ports (512KB for each of 8 slaves) */ + MFSI_PORT_0 = 0x080000, + + /* FSI Slave Register */ + SLAVE_REGS = 0x000800, + SMODE_00 = SLAVE_REGS | 0x00, + SLRES_34 = SLAVE_REGS | 0x34, + + /* Bitmasks for OPB status register */ + OPB_STAT_ANYERR = 0x8000000000000000, // 0 is Any error + OPB_STAT_ERR_OPB = 0x7FEC000000000000, // 1:10,12:13 are OPB errors + OPB_STAT_ERRACK = 0x0010000000000000, // 11 is OPB errAck + OPB_STAT_READ_VALID = 0x0002000000000000, // 14 is the Valid Read bit + OPB_STAT_BUSY = 0x0001000000000000, // 15 is the Busy bit + OPB_STAT_ERR_CMFSI = 0x0000FC0000000000, // 16:21 are cMFSI errors + OPB_STAT_ERR_MFSI = 0x000000FC00000000, // 24:29 are MFSI errors + + OPB_STAT_NON_MFSI_ERR = (OPB_STAT_ERR_OPB | + OPB_STAT_ERRACK | + OPB_STAT_ANYERR), + OPB_STAT_ERR_ANY = (OPB_STAT_NON_MFSI_ERR | + OPB_STAT_ERR_CMFSI | + OPB_STAT_ERR_MFSI), +}; + +static void reset_pib2opb(void) +{ + write_scom(FSI2OPB_OFFSET_0 | OPB_REG_RES, 0x8000000000000000); + write_scom(FSI2OPB_OFFSET_0 | OPB_REG_STAT, 0x8000000000000000); +} + +static void cleanup_port_maeb_error(uint8_t port) +{ + /* See comment at the top of the file */ + const uint8_t master_chip = 0; + const uint8_t slave_chip = port; + + uint32_t compmask; + uint32_t truemask; + + /* + * Reset the bridge to clear up the residual errors + * 0=Bridge: General reset + */ + write_fsi(master_chip, MFSI_CONTROL_REG | FSI_MESRB0_1D0, 0x80000000); + + /* + * Perform error reset on Centaur FSI slave: write 0x4000000 to addr=834 + * + * Hostboot does this unconditionally, even though not all Power9 models + * have Centaur chips. Kept here just in case. + */ + write_fsi(slave_chip, SLRES_34, 0x4000000); + + /* Need to save/restore the true/comp masks or the FSP will get annoyed */ + compmask = read_fsi(slave_chip, FSI2PIB_COMPMASK); + truemask = read_fsi(slave_chip, FSI2PIB_TRUEMASK); + + /* Then, write arbitrary data to 1018 (putcfam 1006) to reset any + * pending FSI2PIB errors */ + write_fsi(slave_chip, FSI2PIB_RESET, 0xFFFFFFFF); + + /* Restore the true/comp masks */ + write_fsi(slave_chip, FSI2PIB_COMPMASK, compmask); + write_fsi(slave_chip, FSI2PIB_TRUEMASK, truemask); +} + +static void init_fsi_port(uint8_t port) +{ + /* See comment at the top of the file */ + const uint8_t master_chip = 0; + const uint8_t slave_chip = port; + + uint8_t port_bit = (0x80 >> port); + + /* Write the port enable (enables clocks for FSI link) */ + write_fsi(master_chip, MFSI_CONTROL_REG | FSI_MSENP0_018, (uint32_t)port_bit << 24); + + /* Hostboot reads FSI_MESRB0_1D0 and does nothing to it, skipped here + * with the assumption that it has no effect */ + + /* + * Send the BREAK command to all slaves on this port (target slave0) + * part of FSI definition, write magic string into address zero. + */ + write_fsi(slave_chip, 0x00, 0xC0DE0000); + + if (read_fsi(master_chip, MFSI_CONTROL_REG | FSI_MAEB_070) != 0) { + /* Alternative is to pretend this slave doesn't exist */ + die("Detected MAEB error on FSI port #%d.\n", port); + } + + /* + * Setup the FSI slave to enable HW recovery, lbus ratio + * 2= Enable HW error recovery (bit 2) + * 6:7= Slave ID: 3 (default) + * 8:11= Echo delay: 0xF (default) + * 12:15= Send delay cycles: 0xF + * 20:23= Local bus ratio: 0x1 + */ + write_fsi(slave_chip, SMODE_00, 0x23FF0100); + + /* Wait for a little bit to be sure everything is done */ + udelay(1000); // 1ms + + /* + * Reset the port to clear up any previous error state (using idec reg + * as arbitrary address for lookups). Note, initial cfam reset should + * have cleaned up everything but this makes sure we're in a consistent + * state. + */ + cleanup_port_maeb_error(port); +} + +static void basic_master_init(void) +{ + const uint8_t chip = 0; + const uint32_t ctrl_reg = MFSI_CONTROL_REG; + + uint64_t tmp; + + /* Cleanup any initial error states */ + reset_pib2opb(); + + /* Ensure we don't have any errors before we even start */ + tmp = read_scom(FSI2OPB_OFFSET_0 | OPB_REG_STAT); + if (tmp & OPB_STAT_NON_MFSI_ERR) + die("Unclearable errors on MFSI initialization: 0x%016llx\n", tmp); + + /* + * Setup clock ratios and some error checking + * 1= Enable hardware error recovery + * 3= Enable parity checking + * 4:13= FSI clock ratio 0 is 1:1 + * 14:23= FSI clock ratio 1 is 4:1 + */ + write_fsi(chip, ctrl_reg | FSI_MMODE_000, 0x50040400); + + /* + * Setup error control reg to do nothing + * 16= Enable OPB_errAck [=1] + * 18= Freeze FSI port on FSI/OPB bridge error [=0] + */ + write_fsi(chip, ctrl_reg | FSI_MECTRL_2E0, 0x00008000); + + /* + * Note that this actually resets 4 ports twice rather than 8 ports + * once: OR makes 0x01XX equivalent to 0x00XX due to 0xD being + * 0b00001101 and ORing 0b00000001 to it changes nothing. Hostboot does + * it this way... + */ + for (uint8_t port = 0; port < MAX_SLAVE_PORTS; port++) { + /* + * 0= Port: General reset + * 1= Port: Error reset + * 2= General reset to all bridges + * 3= General reset to all port controllers + * 4= Reset all FSI Master control registers + * 5= Reset parity error source latch + */ + write_fsi(chip, ctrl_reg | FSI_MRESP0_0D0 | (port * 4), 0xFC000000); + } + + /* Wait a little bit to be sure the reset is done */ + udelay(1000); // 1ms delay + + /* + * Setup error control reg for regular use + * (somehow this is the same as "to do nothing", a bug in Hostboot?) + * 16= Enable OPB_errAck [=1] + * 18= Freeze FSI port on FSI/OPB bridge error [=0] + */ + write_fsi(chip, ctrl_reg | FSI_MECTRL_2E0, 0x00008000); + + /* + * Set MMODE reg to enable HW recovery, parity checking, setup clock + * ratio + * 1= Enable hardware error recovery + * 3= Enable parity checking + * 4:13= FSI clock ratio 0 is 1:1 + * 14:23= FSI clock ratio 1 is 4:1 + */ + tmp = 0x50040400; + /* + * Setup timeout so that: + * code(10ms) > masterproc (0.9ms) > remote fsi master (0.8ms) + */ + tmp |= 0x00000010; // 26:27= Timeout (b01) = 0.9ms + write_fsi(chip, ctrl_reg | FSI_MMODE_000, tmp); +} + +static void basic_slave_init(void) +{ + const uint8_t chip = 0; + const uint32_t ctrl_reg = MFSI_CONTROL_REG; + + uint64_t tmp; + + /* Clear FSI Slave Interrupt on ports 0-7 */ + write_fsi(chip, ctrl_reg | FSI_MSIEP0_030, 0x00000000); + + /* + * Set the delay rates: + * 0:3,8:11= Echo delay cycles is 15 + * 4:7,12:15= Send delay cycles is 15 + */ + write_fsi(chip, ctrl_reg | FSI_MDLYR_004, 0xFFFF0000); + + /* Enable the ports */ + write_fsi(chip, ctrl_reg | FSI_MSENP0_018, 0xFF000000); + + udelay(1000); // 1ms + + /* Clear the port enable */ + write_fsi(chip, ctrl_reg | FSI_MCENP0_020, 0xFF000000); + + /* + * Reset all bridges and ports (again?). + * Line above is from Hostboot. Actually this seems to reset only port + * 0 and with a bit different mask (0xFC000000 above). + */ + write_fsi(chip, ctrl_reg | FSI_MRESP0_0D0, 0xF0000000); + + /* Wait a little bit to be sure reset is done */ + udelay(1000); // 1ms + + /* Note: not enabling IPOLL because hotplug is not supported */ + + /* Turn off Legacy mode */ + tmp = read_fsi(chip, ctrl_reg | FSI_MMODE_000); + tmp &= ~0x00000040; // bit 25: clock/4 mode + write_fsi(chip, ctrl_reg | FSI_MMODE_000, tmp); +} + +void fsi_init(void) +{ + uint8_t chips; + + basic_master_init(); + basic_slave_init(); + + chips = fsi_get_present_chips(); + if (chips & 0x2) + init_fsi_port(/*port=*/1); +} + +uint8_t fsi_get_present_chips(void) +{ + const uint8_t chip = 0; + const uint32_t ctrl_reg = MFSI_CONTROL_REG; + + uint8_t chips; + uint8_t present_slaves; + + present_slaves = (read_fsi(chip, ctrl_reg | FSI_MLEVP0_018) >> 24); + + /* First CPU is always there (it executes this code) */ + chips = 0x01; + /* Status of the second CPU (connected to port #1) */ + chips |= ((present_slaves & 0x40) >> 5); + + return chips; +} + +/* Polls OPB dying on error or timeout */ +static inline uint64_t poll_opb(uint8_t chip) +{ + enum { + MAX_WAIT_LOOPS = 1000, + TIMEOUT_STEP_US = 10, + }; + + const uint64_t stat_addr = FSI2OPB_OFFSET_0 | OPB_REG_STAT; + + int i; + uint64_t tmp; + + uint64_t err_mask; + + /* MFSI are irrelevant for access to the chip we're running on, only + * OPB bits are of interest */ + err_mask = OPB_STAT_NON_MFSI_ERR; + if (chip == 1) { + /* Second CPU is routed through MFSI of the first CPU */ + err_mask |= OPB_STAT_ERR_MFSI; + } + + /* Timeout after 10ms, check every 10us, supposedly there is hardware + * timeout after 1ms */ + tmp = read_scom(stat_addr); + for (i = 0; (tmp & OPB_STAT_BUSY) && !(tmp & err_mask) && i < MAX_WAIT_LOOPS; i++) { + udelay(TIMEOUT_STEP_US); + tmp = read_scom(stat_addr); + } + + if (tmp & err_mask) + die("Detected an error while polling OPB for chip #%d: 0x%016llx\n", chip, tmp); + + if (i == MAX_WAIT_LOOPS) { + die("Timed out while polling OPB for chip #%d, last response: 0x%016llx\n", + chip, tmp); + } + + return tmp; +} + +uint32_t fsi_op(uint8_t chip, uint32_t addr, uint32_t data, bool is_read, uint8_t size) +{ + enum { + WRITE_NOT_READ = PPC_BIT(0), + SIZE_1B = PPC_PLACE(0, 1, 2), + SIZE_4B = PPC_PLACE(3, 1, 2), + }; + + uint64_t cmd; + uint64_t response; + + assert(size == 1 || size == 4); + + /* See comment at the top of the file */ + if (chip != 0) { + const uint8_t port = chip; + addr |= MFSI_PORT_0 * (port + 1); + } + + /* Make sure there are no other ops running before we start. The + * function will die on error, so not handling return value. */ + (void)poll_opb(chip); + + /* + * Register is mentioned in the docs, but contains mostly reserved + * fields. This is what can be decoded from code: + * [0] WRITE_NOT_READ = 1 for write, 0 for read + * [1-2] size = 3 // 0b00 - 1B; 0b01 - 2B; 0b11 - 4B + * [3-31] FSI address = addr // FSI spec says address is 23 bits + * [32-63] data to write = data // don't care for read + */ + cmd = (size == 4 ? SIZE_4B : SIZE_1B) | PPC_PLACE(addr, 3, 29) | data; + if (!is_read) + cmd |= WRITE_NOT_READ; + + write_scom(FSI2OPB_OFFSET_0 | OPB_REG_CMD, cmd); + + /* Poll for complete and get the data back. */ + response = poll_opb(chip); + + /* A write operation is done if poll_opb hasn't died */ + if (!is_read) + return 0; + + if (!(response & OPB_STAT_READ_VALID)) + die("FSI read has failed.\n"); + return (response & 0xFFFFFFFF); +} diff --git a/src/soc/ibm/power9/fsi.h b/src/soc/ibm/power9/fsi.h new file mode 100644 index 00000000000..2d31cf9c164 --- /dev/null +++ b/src/soc/ibm/power9/fsi.h @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __SOC_IBM_POWER9_FSI_H +#define __SOC_IBM_POWER9_FSI_H + +#include +#include + +void fsi_init(void); + +/* Returns mask of available CPU chips (either 0x01 or 0x03) */ +uint8_t fsi_get_present_chips(void); + +/* This isn't meant to be used directly, see below for interface of this unit */ +uint32_t fsi_op(uint8_t chip, uint32_t addr, uint32_t data, bool is_read, uint8_t size); + +/* FSI-functions operate on byte addresses */ + +static inline uint32_t read_fsi(uint8_t chip, uint32_t addr) +{ + return fsi_op(chip, addr, /*data=*/0, /*is_read=*/true, /*size=*/4); +} + +static inline void write_fsi(uint8_t chip, uint32_t addr, uint32_t data) +{ + (void)fsi_op(chip, addr, data, /*is_read=*/false, /*size=*/4); +} + +/* CFAM-functions are FSI-functions that operate on 4-byte word addresses */ + +static inline uint32_t cfam_addr_to_fsi(uint32_t cfam) +{ + /* + * Such masks allow overlapping of two components after address + * translation (real engine mask is probably 0xF100), but let's be in + * sync with Hostboot on this to play it safe. + */ + const uint32_t CFAM_ADDRESS_MASK = 0x1FF; + const uint32_t CFAM_ENGINE_OFFSET_MASK = 0xFE00; + + /* + * Address needs to be multiplied by 4 because CFAM register addresses + * are word offsets but FSI addresses are byte offsets. Address + * modification needs to preserve the engine's offset in the top byte. + */ + return ((cfam & CFAM_ADDRESS_MASK) * 4) | (cfam & CFAM_ENGINE_OFFSET_MASK); +} + +static inline uint32_t read_cfam(uint8_t chip, uint32_t addr) +{ + return read_fsi(chip, cfam_addr_to_fsi(addr)); +} + +static inline void write_cfam(uint8_t chip, uint32_t addr, uint32_t data) +{ + write_fsi(chip, cfam_addr_to_fsi(addr), data); +} + +#endif /* __SOC_IBM_POWER9_FSI_H */ diff --git a/src/soc/ibm/power9/romstage.c b/src/soc/ibm/power9/romstage.c index d9dada9aa19..a45cdd2e56b 100644 --- a/src/soc/ibm/power9/romstage.c +++ b/src/soc/ibm/power9/romstage.c @@ -13,6 +13,7 @@ #include #include +#include "fsi.h" #include "pci.h" /* DIMM SPD addresses */ @@ -335,6 +336,8 @@ static void prepare_dimm_data(void) void main(void) { + uint8_t chips; + uint8_t phb_active_mask = 0; uint8_t iovalid_enable[MAX_PEC_PER_PROC] = { 0 }; @@ -354,6 +357,11 @@ void main(void) */ (void)ipmi_init_and_start_bmc_wdt(CONFIG_BMC_BT_BASE, 120, TIMEOUT_HARD_RESET); + printk(BIOS_EMERG, "Initializing FSI...\n"); + fsi_init(); + chips = fsi_get_present_chips(); + printk(BIOS_EMERG, "Initialized FSI (chips mask: 0x%02X)\n", chips); + istep_10_10(&phb_active_mask, iovalid_enable); istep_10_12(); istep_10_13(); From f01b25c62acb50058a51df268a74cd62358e4d30 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Thu, 23 Dec 2021 01:53:54 +0200 Subject: [PATCH 106/213] soc/power9/: support FSI I2C Change-Id: I8f0f3e392c588e45f11d2a3cbd33b72570980d48 Signed-off-by: Sergii Dmytruk --- src/soc/ibm/power9/Makefile.inc | 1 + src/soc/ibm/power9/fsi.c | 2 + src/soc/ibm/power9/fsi.h | 19 ++++ src/soc/ibm/power9/i2c.c | 165 +++++++++++++++++++++++++++----- 4 files changed, 163 insertions(+), 24 deletions(-) diff --git a/src/soc/ibm/power9/Makefile.inc b/src/soc/ibm/power9/Makefile.inc index 04dbd51ea98..3865685c366 100644 --- a/src/soc/ibm/power9/Makefile.inc +++ b/src/soc/ibm/power9/Makefile.inc @@ -45,6 +45,7 @@ ramstage-y += xive.c ramstage-y += int_vectors.S ramstage-y += i2c.c ramstage-y += occ.c +ramstage-y += fsi.c MB_DIR = src/mainboard/$(MAINBOARDDIR) ONECPU_DTB = 1-cpu.dtb diff --git a/src/soc/ibm/power9/fsi.c b/src/soc/ibm/power9/fsi.c index 6fe42633bb4..2d651f37105 100644 --- a/src/soc/ibm/power9/fsi.c +++ b/src/soc/ibm/power9/fsi.c @@ -291,6 +291,8 @@ void fsi_init(void) chips = fsi_get_present_chips(); if (chips & 0x2) init_fsi_port(/*port=*/1); + + fsi_i2c_init(chips); } uint8_t fsi_get_present_chips(void) diff --git a/src/soc/ibm/power9/fsi.h b/src/soc/ibm/power9/fsi.h index 2d31cf9c164..ae3f302e3dd 100644 --- a/src/soc/ibm/power9/fsi.h +++ b/src/soc/ibm/power9/fsi.h @@ -6,11 +6,16 @@ #include #include +/* Base FSI address for registers of a FSI I2C master */ +#define I2C_FSI_MASTER_BASE_ADDR 0x01800 + void fsi_init(void); /* Returns mask of available CPU chips (either 0x01 or 0x03) */ uint8_t fsi_get_present_chips(void); +void fsi_i2c_init(uint8_t chips); + /* This isn't meant to be used directly, see below for interface of this unit */ uint32_t fsi_op(uint8_t chip, uint32_t addr, uint32_t data, bool is_read, uint8_t size); @@ -56,4 +61,18 @@ static inline void write_cfam(uint8_t chip, uint32_t addr, uint32_t data) write_fsi(chip, cfam_addr_to_fsi(addr), data); } +/* Operations on FSI I2C registers */ + +static inline void write_fsi_i2c(uint8_t chip, uint8_t reg, uint32_t data, uint8_t size) +{ + uint32_t addr = I2C_FSI_MASTER_BASE_ADDR + reg * 4; + fsi_op(chip, addr, data, /*is_read=*/false, size); +} + +static inline uint32_t read_fsi_i2c(uint8_t chip, uint8_t reg, uint8_t size) +{ + uint32_t addr = I2C_FSI_MASTER_BASE_ADDR + reg * 4; + return fsi_op(chip, addr, /*data=*/0, /*is_read=*/true, size); +} + #endif /* __SOC_IBM_POWER9_FSI_H */ diff --git a/src/soc/ibm/power9/i2c.c b/src/soc/ibm/power9/i2c.c index ebbd8f63875..e0b7c8a7c8e 100644 --- a/src/soc/ibm/power9/i2c.c +++ b/src/soc/ibm/power9/i2c.c @@ -8,11 +8,17 @@ #include #include -#define FIFO_REG(bus) (0xA0004 | ((bus) << 12)) -#define CMD_REG(bus) (0xA0005 | ((bus) << 12)) -#define MODE_REG(bus) (0xA0006 | ((bus) << 12)) -#define STATUS_REG(bus) (0xA000B | ((bus) << 12)) -#define RES_ERR_REG(bus) (0xA000C | ((bus) << 12)) +#include "fsi.h" + +/* Base FSI address for registers of an FSI I2C master */ +#define I2C_HOST_MASTER_BASE_ADDR 0xA0004 + +#define FIFO_REG 0 +#define CMD_REG 1 +#define MODE_REG 2 +#define STATUS_REG 7 +#define RESET_REG 7 +#define RES_ERR_REG 8 // CMD register #define LEN_PLACE(x) PPC_PLACE((x), 16, 16) @@ -28,13 +34,18 @@ #define CMD_COMPLETE 0x0100000000000000 #define FIFO_COUNT_FLD 0x0000000F00000000 #define BUSY 0x0000030000000000 +#define SCL 0x0000080000000000 +#define SDA 0x0000040000000000 #define UNRECOVERABLE 0xFC80000000000000 -#define CLEAR_ERR 0x8000000000000000 - #define I2C_MAX_FIFO_CAPACITY 8 #define SPD_I2C_BUS 3 +enum i2c_type { + HOST_I2C, // I2C via XSCOM (first CPU) + FSI_I2C, // I2C via FSI (second CPU) +}; + /* return -1 if SMBus errors otherwise return 0 */ static int get_spd(u8 *spd, u8 addr) { @@ -82,17 +93,79 @@ void get_spd_smbus(struct spd_block *blk) blk->len = SPD_PAGE_LEN_DDR4; } +/* The four functions below use 64-bit address and data as for SCOM and do + * translation for FSI which is 32-bit (as is actual I2C interface). They also + * interpret address as register number for FSI I2C. */ + +static void write_i2c(enum i2c_type type, uint64_t addr, uint64_t data) +{ + if (type == HOST_I2C) + write_scom(addr, data); + else + write_fsi_i2c(/*chip=*/1, addr, data >> 32, /*size=*/4); +} + +static uint64_t read_i2c(enum i2c_type type, uint64_t addr) +{ + if (type == HOST_I2C) + return read_scom(addr); + else + return (uint64_t)read_fsi_i2c(/*chip=*/1, addr, /*size=*/4) << 32; +} + +static void write_i2c_byte(enum i2c_type type, uint64_t addr, uint8_t data) +{ + if (type == HOST_I2C) + write_scom(addr, (uint64_t)data << 56); + else + write_fsi_i2c(/*chip=*/1, addr, (uint32_t)data << 24, /*size=*/1); +} + +static uint8_t read_i2c_byte(enum i2c_type type, uint64_t addr) +{ + if (type == HOST_I2C) + return read_scom(addr) >> 56; + else + return read_fsi_i2c(/*chip=*/1, addr, /*size=*/1) >> 24; +} + +/* + * There are 4 buses/engines, but the function accepts bus [0-7] in order to + * allow specifying bus of the second CPU while still following coreboot's + * prototype for this function. [0-3] are buses of the first CPU and [4-7] of + * the second one (0-3 correspondingly). However, looks like only one bus is + * available through FSI, because its number is never set. + */ int platform_i2c_transfer(unsigned int bus, struct i2c_msg *segment, int seg_count) { + enum { BUSES_PER_CPU = 4 }; + int i; uint64_t r; - if (bus > 3) { + enum i2c_type type = HOST_I2C; + if (bus >= BUSES_PER_CPU) { + bus -= BUSES_PER_CPU; + type = FSI_I2C; + } + + if (bus >= BUSES_PER_CPU) { printk(BIOS_ERR, "I2C bus out of range (%d)\n", bus); return -1; } + /* There seems to be only one engine on FSI I2C */ + uint32_t base = (type == FSI_I2C ? 0 : I2C_HOST_MASTER_BASE_ADDR | (bus << 12)); + /* Addition is fine, because there will be no carry in bus number bits */ + uint32_t fifo_reg = base + FIFO_REG; + uint32_t cmd_reg = base + CMD_REG; + uint32_t mode_reg = base + MODE_REG; + uint32_t status_reg = base + STATUS_REG; + uint32_t res_err_reg = base + RES_ERR_REG; + + uint64_t clear_err = (type == HOST_I2C ? PPC_BIT(0) : 0); + /* * Divisor fields in this register are poorly documented: * @@ -125,7 +198,7 @@ int platform_i2c_transfer(unsigned int bus, struct i2c_msg *segment, bit_rate_div = 0x0048; } - write_scom(RES_ERR_REG(bus), CLEAR_ERR); + write_i2c(type, res_err_reg, clear_err); for (i = 0; i < seg_count; i++) { unsigned int len; @@ -142,15 +215,17 @@ int platform_i2c_transfer(unsigned int bus, struct i2c_msg *segment, read_cont = (!stop && !read_not_write) ? READ_CONT : 0; port = segment[i].slave & 0x80 ? 1 : 0; - write_scom(MODE_REG(bus), - PPC_PLACE(bit_rate_div, 0, 16) | PPC_PLACE(port, 16, 6)); - write_scom(RES_ERR_REG(bus), CLEAR_ERR); - write_scom(CMD_REG(bus), START | stop | WITH_ADDR | read_not_write | read_cont | - ADDR_PLACE(segment[i].slave) | - LEN_PLACE(segment[i].len)); + write_i2c(type, mode_reg, + PPC_PLACE(bit_rate_div, 0, 16) | PPC_PLACE(port, 16, 6)); + + write_i2c(type, res_err_reg, clear_err); + write_i2c(type, cmd_reg, + START | stop | WITH_ADDR | read_not_write | read_cont | + ADDR_PLACE(segment[i].slave) | + LEN_PLACE(segment[i].len)); for (len = 0; len < segment[i].len; len++) { - r = read_scom(STATUS_REG(bus)); + r = read_i2c(type, status_reg); if (read_not_write) { /* Read */ @@ -160,11 +235,10 @@ int platform_i2c_transfer(unsigned int bus, struct i2c_msg *segment, printk(BIOS_INFO, "I2C transfer failed (0x%16.16llx)\n", r); return -1; } - r = read_scom(STATUS_REG(bus)); + r = read_i2c(type, status_reg); } - r = read_scom(FIFO_REG(bus)); - segment[i].buf[len] = r >> 56; + segment[i].buf[len] = read_i2c_byte(type, fifo_reg); } else { @@ -174,23 +248,66 @@ int platform_i2c_transfer(unsigned int bus, struct i2c_msg *segment, printk(BIOS_INFO, "I2C transfer failed (0x%16.16llx)\n", r); return -1; } - r = read_scom(STATUS_REG(bus)); + r = read_i2c(type, status_reg); } - write_scom(FIFO_REG(bus), (uint64_t) segment[i].buf[len] << 56); + write_i2c_byte(type, fifo_reg, segment[i].buf[len]); } } - r = read_scom(STATUS_REG(bus)); + r = read_i2c(type, status_reg); while ((r & CMD_COMPLETE) == 0) { if (r & UNRECOVERABLE) { - printk(BIOS_INFO, "I2C transfer failed (0x%16.16llx)\n", r); + printk(BIOS_INFO, "I2C transfer failed to complete (0x%16.16llx)\n", r); return -1; } - r = read_scom(STATUS_REG(bus)); + r = read_i2c(type, status_reg); } } return 0; } + +/* Defined in fsi.h */ +void fsi_i2c_init(uint8_t chips) +{ + uint64_t status; + + /* Nothing to do if second CPU isn't present */ + if (!(chips & 0x02)) + return; + + /* + * Sometimes I2C status looks like 0x_____8__ (i.e., SCL is set, but + * not SDA), which indicates I2C hardware is in a messed up state that + * it won't leave on its own. Sending an additional STOP *before* reset + * addresses this and doesn't hurt when I2C isn't broken. + */ + write_i2c(FSI_I2C, CMD_REG, STOP); + + /* Reset I2C */ + write_i2c(FSI_I2C, RESET_REG, 0); + + /* Wait for SCL */ + status = read_i2c(FSI_I2C, STATUS_REG); + while ((status & SCL) == 0) { + if (status & UNRECOVERABLE) + die("Unrecoverable I2C error while waiting for SCL: 0x%016llx\n", + status); + status = read_i2c(FSI_I2C, STATUS_REG); + } + + /* Send STOP command */ + write_i2c(FSI_I2C, CMD_REG, STOP); + + status = read_i2c(FSI_I2C, STATUS_REG); + while ((status & CMD_COMPLETE) == 0) { + if (status & UNRECOVERABLE) + die("Unrecoverable I2C error on STOP: 0x%016llx\n", status); + status = read_i2c(FSI_I2C, STATUS_REG); + } + + if ((status & (SCL | SDA | BUSY)) != (SCL | SDA)) + die("Invalid I2C state after initialization: 0x%016llx\n", status); +} From 71bcf112fdc99c5ac00db5d081c39e8907b585d0 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Tue, 14 Dec 2021 01:09:36 +0200 Subject: [PATCH 107/213] soc/power9/mvpd.c: support getting MVPD of second CPU These are just internal code changes without exposing this functionality through the API. Change-Id: I34b020e201693bcd643fbfa36d7964855e4ef123 Signed-off-by: Sergii Dmytruk --- src/soc/ibm/power9/mvpd.c | 61 ++++++++++++++++++++++----------------- 1 file changed, 34 insertions(+), 27 deletions(-) diff --git a/src/soc/ibm/power9/mvpd.c b/src/soc/ibm/power9/mvpd.c index 86c969b4c10..a56c266d6ef 100644 --- a/src/soc/ibm/power9/mvpd.c +++ b/src/soc/ibm/power9/mvpd.c @@ -62,9 +62,9 @@ struct pt_record { /* Reads from a single EEPROM chip, which is deduced from offset. Returns zero on success. */ -static int read_eeprom_chip(uint32_t offset, void *data, uint16_t len) +static int read_eeprom_chip(uint8_t cpu, uint32_t offset, void *data, uint16_t len) { - const unsigned int bus = 1; + const unsigned int bus = 1 + 4 * cpu; // four I2C buses per CPU uint16_t addr = 0xA0; uint16_t slave = 0; uint16_t actual_offset = 0; @@ -97,21 +97,21 @@ static int read_eeprom_chip(uint32_t offset, void *data, uint16_t len) /* Reads from EEPROM handling accesses across chip boundaries (64 KiB). Returns zero on success. */ -static int read_eeprom(uint32_t offset, void *data, uint32_t len) +static int read_eeprom(uint8_t cpu, uint32_t offset, void *data, uint32_t len) { uint16_t len1 = 0; uint16_t len2 = 0; assert(len != 0); if (offset / EEPROM_CHIP_SIZE == (offset + len - 1) / EEPROM_CHIP_SIZE) - return read_eeprom_chip(offset, data, len); + return read_eeprom_chip(cpu, offset, data, len); len1 = EEPROM_CHIP_SIZE - offset; len2 = len - len1; - if (read_eeprom_chip(offset, data, len1)) + if (read_eeprom_chip(cpu, offset, data, len1)) return 1; - if (read_eeprom_chip(EEPROM_CHIP_SIZE, (uint8_t *)data + len1, len2)) + if (read_eeprom_chip(cpu, EEPROM_CHIP_SIZE, (uint8_t *)data + len1, len2)) return 1; return 0; @@ -119,7 +119,7 @@ static int read_eeprom(uint32_t offset, void *data, uint32_t len) /* Finds and extracts i-th keyword (`index` specifies which one) from a record in EEPROM that starts at specified offset */ -static bool eeprom_extract_kwd(uint64_t offset, uint8_t index, +static bool eeprom_extract_kwd(uint8_t cpu, uint64_t offset, uint8_t index, const char *record_name, const char *kwd_name, uint8_t *buf, size_t *size) { @@ -131,7 +131,7 @@ static bool eeprom_extract_kwd(uint64_t offset, uint8_t index, if (strlen(kwd_name) != VPD_KWD_NAME_LEN) die("Keyword name has wrong length: %s!\n", kwd_name); - if (read_eeprom(offset, &record_size, sizeof(record_size))) + if (read_eeprom(cpu, offset, &record_size, sizeof(record_size))) die("Failed to read record size from EEPROM\n"); offset += VPD_RECORD_SIZE_LEN; @@ -140,7 +140,7 @@ static bool eeprom_extract_kwd(uint64_t offset, uint8_t index, /* Skip mandatory "RT" and one byte of keyword size (always 4) */ offset += VPD_KWD_NAME_LEN + 1; - if (read_eeprom(offset, name, sizeof(name))) + if (read_eeprom(cpu, offset, name, sizeof(name))) die("Failed to read record name from EEPROM\n"); if (memcmp(name, record_name, VPD_RECORD_NAME_LEN)) @@ -153,7 +153,7 @@ static bool eeprom_extract_kwd(uint64_t offset, uint8_t index, uint8_t name_buf[VPD_KWD_NAME_LEN]; uint16_t kwd_size = 0; - if (read_eeprom(offset, name_buf, sizeof(name_buf))) + if (read_eeprom(cpu, offset, name_buf, sizeof(name_buf))) die("Failed to read keyword name from EEPROM\n"); /* This is always the last keyword */ @@ -164,13 +164,13 @@ static bool eeprom_extract_kwd(uint64_t offset, uint8_t index, if (name_buf[0] == '#') { /* This is a large (two-byte size) keyword */ - if (read_eeprom(offset, &kwd_size, sizeof(kwd_size))) + if (read_eeprom(cpu, offset, &kwd_size, sizeof(kwd_size))) die("Failed to read large keyword size from EEPROM\n"); kwd_size = le16toh(kwd_size); offset += 2; } else { uint8_t small_size; - if (read_eeprom(offset, &small_size, sizeof(small_size))) + if (read_eeprom(cpu, offset, &small_size, sizeof(small_size))) die("Failed to read small keyword size from EEPROM\n"); kwd_size = small_size; offset += 1; @@ -182,7 +182,7 @@ static bool eeprom_extract_kwd(uint64_t offset, uint8_t index, (unsigned long long)*size, (unsigned long long)kwd_size); } - if (read_eeprom(offset, buf, kwd_size)) + if (read_eeprom(cpu, offset, buf, kwd_size)) die("Failed to read keyword body from EEPROM\n"); *size = kwd_size; @@ -197,11 +197,13 @@ static bool eeprom_extract_kwd(uint64_t offset, uint8_t index, /* Builds MVPD partition for a single processor (64 KiB per chip) or returns an already built one */ -static const uint8_t *mvpd_get(void) +static const uint8_t *mvpd_get(uint8_t cpu) { - enum { SECTION_SIZE = 64 * KiB }; + /* Actual size of MVPD is a bit less than 42 KiB while maximum is 64 + * KiB, save some memory */ + enum { MVPD_SIZE = 42 * KiB }; - static uint8_t mvpd_buf[SECTION_SIZE]; + static uint8_t mvpd_bufs[2][MVPD_SIZE]; static const char *const mvpd_records[] = { "CRP0", "CP00", "VINI", @@ -210,7 +212,9 @@ static const uint8_t *mvpd_get(void) "VRML", "VWML", "VER0", "MER0", "VMSC", }; - struct mvpd_toc_entry *toc = (void *)&mvpd_buf[0]; + uint8_t *mvpd_buf = mvpd_bufs[cpu]; + + struct mvpd_toc_entry *toc = (void *)mvpd_buf; uint16_t mvpd_offset = MVPD_TOC_SIZE; uint8_t pt_buf[256]; @@ -222,11 +226,14 @@ static const uint8_t *mvpd_get(void) /* Skip the ECC data + "large resource" byte (0x84) in the VHDR */ uint64_t offset = 12; + if (cpu >= 2) + die("Unsupported CPU number for MVPD query: %d.\n", cpu); + /* Partition is already constructed (filled one can't be empty) */ if (mvpd_buf[0] != '\0') return mvpd_buf; - if (!eeprom_extract_kwd(offset, 0, "VHDR", "PT", pt_buf, &pt_size)) + if (!eeprom_extract_kwd(cpu, offset, 0, "VHDR", "PT", pt_buf, &pt_size)) die("Failed to find PT keyword of VHDR record in EEPROM.\n"); if (memcmp(pt_record->record_name, "VTOC", VPD_RECORD_NAME_LEN)) @@ -245,7 +252,7 @@ static const uint8_t *mvpd_get(void) uint8_t entry_count; pt_size = sizeof(pt_buf); - if (!eeprom_extract_kwd(offset, i, "VTOC", "PT", pt_buf, &pt_size)) { + if (!eeprom_extract_kwd(cpu, offset, i, "VTOC", "PT", pt_buf, &pt_size)) { if (i == 0) die("Failed to find any PT keyword of VTOC record in EEPROM\n"); break; @@ -268,7 +275,7 @@ static const uint8_t *mvpd_get(void) if (k == ARRAY_SIZE(mvpd_records)) continue; - if (mvpd_offset + record_size > SECTION_SIZE) { + if (mvpd_offset + record_size > MVPD_SIZE) { die("MVPD section doesn't have space for %.4s record of " "size %d\n", record_name, record_size); } @@ -280,7 +287,8 @@ static const uint8_t *mvpd_get(void) toc->reserved[0] = 0x5A; toc->reserved[1] = 0x5A; - if (read_eeprom(record_offset, mvpd_buf + mvpd_offset, record_size)) + if (read_eeprom(cpu, record_offset, mvpd_buf + mvpd_offset, + record_size)) die("Failed to read %.4s record from EEPROM\n", record_name); ++toc; @@ -371,11 +379,10 @@ static struct ring_hdr *find_ring(uint8_t chiplet_id, uint8_t even_odd, return NULL; } -static const uint8_t *mvpd_get_keyword(const char *record_name, - const char *kwd_name, - size_t *kwd_size) +static const uint8_t *mvpd_get_keyword(uint8_t cpu, const char *record_name, + const char *kwd_name, size_t *kwd_size) { - const uint8_t *mvpd = mvpd_get(); + const uint8_t *mvpd = mvpd_get(cpu); struct mvpd_toc_entry *mvpd_toc = (void *)mvpd; struct mvpd_toc_entry *toc_entry = NULL; @@ -403,7 +410,7 @@ bool mvpd_extract_keyword(const char *record_name, const char *kwd_name, size_t kwd_size = 0; bool copied_data = false; - kwd = mvpd_get_keyword(record_name, kwd_name, &kwd_size); + kwd = mvpd_get_keyword(/*cpu=*/0, record_name, kwd_name, &kwd_size); if (kwd == NULL) die("Failed to find %s keyword in %s!\n", kwd_name, record_name); @@ -459,7 +466,7 @@ bool mvpd_extract_ring(const char *record_name, const char *kwd_name, struct ring_hdr *ring = NULL; uint32_t ring_size = 0; - rings = mvpd_get_keyword(record_name, kwd_name, &rings_size); + rings = mvpd_get_keyword(/*cpu=*/0, record_name, kwd_name, &rings_size); if (rings == NULL) die("Failed to find %s keyword in %s!\n", kwd_name, record_name); From 2ca4a413c81fb7922bacb40f046a0cf99cfedcc4 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Tue, 14 Dec 2021 01:16:19 +0200 Subject: [PATCH 108/213] soc/power9: require chip number on MVPD accesses Chip number is hard-coded to be zero for now, this should change when we start constructing HOMER for the second CPU. Change-Id: I90f2f26581eebad423fbb8a42cc452c2b13e1faa Signed-off-by: Sergii Dmytruk --- src/include/cpu/power/mvpd.h | 11 ++++++----- src/soc/ibm/power9/homer.c | 3 ++- src/soc/ibm/power9/mvpd.c | 22 +++++++++++++--------- src/soc/ibm/power9/powerbus.c | 3 ++- src/soc/ibm/power9/pstates.c | 9 ++++++--- src/soc/ibm/power9/tor.c | 4 +++- 6 files changed, 32 insertions(+), 20 deletions(-) diff --git a/src/include/cpu/power/mvpd.h b/src/include/cpu/power/mvpd.h index d69e2d41d37..ea80e02cb15 100644 --- a/src/include/cpu/power/mvpd.h +++ b/src/include/cpu/power/mvpd.h @@ -50,16 +50,17 @@ void mvpd_device_unmount(void); const struct region_device *mvpd_device_ro(void); /* Reads #V of one of LRP records (mind that there is only one buffer) */ -const struct voltage_kwd *mvpd_get_voltage_data(int lrp); +const struct voltage_kwd *mvpd_get_voltage_data(uint8_t chip, int lrp); /* Finds a specific keyword in MVPD partition and extracts it. *size is updated * to reflect needed or used space in the buffer. */ -bool mvpd_extract_keyword(const char *record_name, const char *kwd_name, - uint8_t *buf, uint32_t *size); +bool mvpd_extract_keyword(uint8_t chip, const char *record_name, + const char *kwd_name, uint8_t *buf, uint32_t *size); /* Finds a specific ring in MVPD partition and extracts it */ -bool mvpd_extract_ring(const char *record_name, const char *kwd_name, - uint8_t chiplet_id, uint8_t even_odd, uint16_t ring_id, +bool mvpd_extract_ring(uint8_t chip, const char *record_name, + const char *kwd_name, uint8_t chiplet_id, + uint8_t even_odd, uint16_t ring_id, uint8_t *buf, uint32_t buf_size); #endif /* CPU_PPC64_MVPD_H */ diff --git a/src/soc/ibm/power9/homer.c b/src/soc/ibm/power9/homer.c index 58614e7b62b..695bbad1971 100644 --- a/src/soc/ibm/power9/homer.c +++ b/src/soc/ibm/power9/homer.c @@ -2257,7 +2257,8 @@ const struct voltage_bucket_data * get_voltage_data(void) uint8_t i = 0; /* Using LRP0 because frequencies are the same in all LRP records */ - voltage = mvpd_get_voltage_data(0); + /* TODO: don't hard-code chip if values are not the same among them */ + voltage = mvpd_get_voltage_data(/*chip=*/0, /*lrp=*/0); for (i = 0; i < VOLTAGE_BUCKET_COUNT; ++i) { bucket = &voltage->buckets[i]; diff --git a/src/soc/ibm/power9/mvpd.c b/src/soc/ibm/power9/mvpd.c index a56c266d6ef..421865a8ffb 100644 --- a/src/soc/ibm/power9/mvpd.c +++ b/src/soc/ibm/power9/mvpd.c @@ -403,14 +403,14 @@ static const uint8_t *mvpd_get_keyword(uint8_t cpu, const char *record_name, return kwd; } -bool mvpd_extract_keyword(const char *record_name, const char *kwd_name, - uint8_t *buf, uint32_t *size) +bool mvpd_extract_keyword(uint8_t chip, const char *record_name, + const char *kwd_name, uint8_t *buf, uint32_t *size) { const uint8_t *kwd = NULL; size_t kwd_size = 0; bool copied_data = false; - kwd = mvpd_get_keyword(/*cpu=*/0, record_name, kwd_name, &kwd_size); + kwd = mvpd_get_keyword(chip, record_name, kwd_name, &kwd_size); if (kwd == NULL) die("Failed to find %s keyword in %s!\n", kwd_name, record_name); @@ -425,8 +425,9 @@ bool mvpd_extract_keyword(const char *record_name, const char *kwd_name, return copied_data; } -const struct voltage_kwd *mvpd_get_voltage_data(int lrp) +const struct voltage_kwd *mvpd_get_voltage_data(uint8_t chip, int lrp) { + static int inited_chip = -1; static int inited_lrp = -1; static uint8_t buf[sizeof(struct voltage_kwd)]; @@ -435,12 +436,13 @@ const struct voltage_kwd *mvpd_get_voltage_data(int lrp) struct voltage_kwd *voltage = (void *)buf; assert(lrp >= 0 && lrp < 6); - if (inited_lrp == lrp) + if (inited_chip == chip && inited_lrp == lrp) return voltage; + inited_chip = -1; inited_lrp = -1; - if (!mvpd_extract_keyword(record_name, "#V", buf, &buf_size)) { + if (!mvpd_extract_keyword(chip, record_name, "#V", buf, &buf_size)) { printk(BIOS_ERR, "Failed to read LRP0 record from MVPD\n"); return NULL; } @@ -451,13 +453,15 @@ const struct voltage_kwd *mvpd_get_voltage_data(int lrp) return NULL; } + inited_chip = chip; inited_lrp = lrp; return voltage; } /* Finds a specific ring in MVPD partition and extracts it */ -bool mvpd_extract_ring(const char *record_name, const char *kwd_name, - uint8_t chiplet_id, uint8_t even_odd, uint16_t ring_id, +bool mvpd_extract_ring(uint8_t chip, const char *record_name, + const char *kwd_name, uint8_t chiplet_id, + uint8_t even_odd, uint16_t ring_id, uint8_t *buf, uint32_t buf_size) { const uint8_t *rings = NULL; @@ -466,7 +470,7 @@ bool mvpd_extract_ring(const char *record_name, const char *kwd_name, struct ring_hdr *ring = NULL; uint32_t ring_size = 0; - rings = mvpd_get_keyword(/*cpu=*/0, record_name, kwd_name, &rings_size); + rings = mvpd_get_keyword(chip, record_name, kwd_name, &rings_size); if (rings == NULL) die("Failed to find %s keyword in %s!\n", kwd_name, record_name); diff --git a/src/soc/ibm/power9/powerbus.c b/src/soc/ibm/power9/powerbus.c index 2e011300be5..55905294c89 100644 --- a/src/soc/ibm/power9/powerbus.c +++ b/src/soc/ibm/power9/powerbus.c @@ -41,7 +41,8 @@ static bool read_voltage_data(struct powerbus_cfg *cfg) uint32_t freq_floor = 0; /* Using LRP0 because frequencies are the same in all LRP records */ - voltage = mvpd_get_voltage_data(0); + /* TODO: don't hard-code chip if values are not the same among them */ + voltage = mvpd_get_voltage_data(/*chip=*/0, /*lrp=*/0); for (i = 0; i < VOLTAGE_BUCKET_COUNT; ++i) { const struct voltage_bucket_data *bucket = &voltage->buckets[i]; diff --git a/src/soc/ibm/power9/pstates.c b/src/soc/ibm/power9/pstates.c index 2a21ae53d12..8bfbe938f23 100644 --- a/src/soc/ibm/power9/pstates.c +++ b/src/soc/ibm/power9/pstates.c @@ -711,7 +711,8 @@ void build_parameter_blocks(struct homer_st *homer, uint64_t functional_cores) record[3] = '0' + quad; size = sizeof(buf); - if (!mvpd_extract_keyword(record, "#V", buf, &size)) { + /* TODO: don't hard-code chip if values are not the same among them */ + if (!mvpd_extract_keyword(/*chip=*/0, record, "#V", buf, &size)) { die("Failed to read %s record from MVPD", record); } @@ -873,7 +874,8 @@ void build_parameter_blocks(struct homer_st *homer, uint64_t functional_cores) * first parses/writes, then tests if bucket ID even match. */ size = sizeof(buf); - if (!mvpd_extract_keyword("CRP0", "#W", buf, &size)) { + /* TODO: don't hard-code chip if values are not the same among them */ + if (!mvpd_extract_keyword(/*chip=*/0, "CRP0", "#W", buf, &size)) { die("Failed to read %s record from MVPD", "CRP0"); } @@ -995,7 +997,8 @@ void build_parameter_blocks(struct homer_st *homer, uint64_t functional_cores) * in struct definition. */ size = sizeof(buf); - if (!mvpd_extract_keyword("CRP0", "IQ", buf, &size)) { + /* TODO: don't hard-code chip if values are not the same among them */ + if (!mvpd_extract_keyword(/*chip=*/0, "CRP0", "IQ", buf, &size)) { die("Failed to read %s record from MVPD", "CRP0"); } assert(size >= sizeof(IddqTable)); diff --git a/src/soc/ibm/power9/tor.c b/src/soc/ibm/power9/tor.c index a6386d46df1..c5623d4c202 100644 --- a/src/soc/ibm/power9/tor.c +++ b/src/soc/ibm/power9/tor.c @@ -639,7 +639,9 @@ static void tor_fetch_and_insert_vpd_ring(struct tor_hdr *ring_section, uint8_t instance_id = 0; struct ring_hdr *ring = NULL; - success = mvpd_extract_ring("CP00", query->kwd_name, chiplet_id, even_odd, + /* TODO: don't hard-code chip if values are not the same among them */ + success = mvpd_extract_ring(/*chip=*/0, "CP00", query->kwd_name, + chiplet_id, even_odd, query->ring_id, buf1, MAX_RING_BUF_SIZE); if (!success) { *ring_status = RING_NOT_FOUND; From 7961eedce54a334d4e0e136a97d2fea246225354 Mon Sep 17 00:00:00 2001 From: Krystian Hebel Date: Wed, 2 Feb 2022 14:57:28 +0100 Subject: [PATCH 109/213] soc/power9/mvpd.c: cache MVPD in memory between stages Signed-off-by: Krystian Hebel Change-Id: I263932bbdf246ed7fb62cd742cc97bcc653bac06 --- src/include/symbols.h | 1 + src/mainboard/raptor-cs/talos-2/memlayout.ld | 4 +++- src/soc/ibm/power9/mvpd.c | 11 ++++------- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/include/symbols.h b/src/include/symbols.h index a03af08463f..e2623b09f71 100644 --- a/src/include/symbols.h +++ b/src/include/symbols.h @@ -77,6 +77,7 @@ DECLARE_OPTIONAL_REGION(opensbi) DECLARE_OPTIONAL_REGION(bl31) DECLARE_REGION(transfer_buffer) DECLARE_OPTIONAL_REGION(watchdog_tombstone) +DECLARE_REGION(mvpd_cache) /* Returns true when pre-RAM symbols are known to the linker. * (Does not necessarily mean that the memory is accessible.) */ diff --git a/src/mainboard/raptor-cs/talos-2/memlayout.ld b/src/mainboard/raptor-cs/talos-2/memlayout.ld index c9e7c9502e9..7eb2cbe67c1 100644 --- a/src/mainboard/raptor-cs/talos-2/memlayout.ld +++ b/src/mainboard/raptor-cs/talos-2/memlayout.ld @@ -66,12 +66,14 @@ SECTIONS ROMSTAGE( 0xF8240000, 256K) + REGION(mvpd_cache, 0xF8280000, 128K, 8) + /* * bootblock_crt0.S assumes this is the last part of L3, leaving for * interrupt vectors at least 0.5M because of cache associativity. If * more CBFS_CACHE is needed, split this into pre-/postram caches. */ - CBFS_CACHE( 0xF8280000, 7M) + CBFS_CACHE( 0xF8380000, 6M) RAMSTAGE( 0xF9000000, 2M) } diff --git a/src/soc/ibm/power9/mvpd.c b/src/soc/ibm/power9/mvpd.c index 421865a8ffb..3e0cb2a40f9 100644 --- a/src/soc/ibm/power9/mvpd.c +++ b/src/soc/ibm/power9/mvpd.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include "tor.h" @@ -199,11 +200,7 @@ static bool eeprom_extract_kwd(uint8_t cpu, uint64_t offset, uint8_t index, already built one */ static const uint8_t *mvpd_get(uint8_t cpu) { - /* Actual size of MVPD is a bit less than 42 KiB while maximum is 64 - * KiB, save some memory */ - enum { MVPD_SIZE = 42 * KiB }; - - static uint8_t mvpd_bufs[2][MVPD_SIZE]; + enum { MAX_MVPD_SIZE = 64 * KiB }; static const char *const mvpd_records[] = { "CRP0", "CP00", "VINI", @@ -212,7 +209,7 @@ static const uint8_t *mvpd_get(uint8_t cpu) "VRML", "VWML", "VER0", "MER0", "VMSC", }; - uint8_t *mvpd_buf = mvpd_bufs[cpu]; + uint8_t *mvpd_buf = &_mvpd_cache[cpu * MAX_MVPD_SIZE]; struct mvpd_toc_entry *toc = (void *)mvpd_buf; uint16_t mvpd_offset = MVPD_TOC_SIZE; @@ -275,7 +272,7 @@ static const uint8_t *mvpd_get(uint8_t cpu) if (k == ARRAY_SIZE(mvpd_records)) continue; - if (mvpd_offset + record_size > MVPD_SIZE) { + if (mvpd_offset + record_size > MAX_MVPD_SIZE) { die("MVPD section doesn't have space for %.4s record of " "size %d\n", record_name, record_size); } From 755737908d592e44e7fd087c66de3f50a949f518 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Thu, 19 May 2022 17:07:00 +0300 Subject: [PATCH 110/213] soc/power9/istep_8_1.c: implement istep 8.1 Configures essential registers of slave SBEs via FSI. Change-Id: I53c62da915c0e8f88f59a9275b2eda260f8ca113 Signed-off-by: Sergii Dmytruk --- src/arch/ppc64/include/arch/byteorder.h | 9 + src/include/cpu/power/istep_8.h | 10 + src/include/cpu/power/mvpd.h | 4 + src/soc/ibm/power9/Makefile.inc | 1 + src/soc/ibm/power9/homer.h | 1 + src/soc/ibm/power9/istep_8_1.c | 320 ++++++++++++++++++++++++ src/soc/ibm/power9/mvpd.c | 33 +++ src/soc/ibm/power9/romstage.c | 3 + 8 files changed, 381 insertions(+) create mode 100644 src/include/cpu/power/istep_8.h create mode 100644 src/soc/ibm/power9/istep_8_1.c diff --git a/src/arch/ppc64/include/arch/byteorder.h b/src/arch/ppc64/include/arch/byteorder.h index a32336a44e7..8b8c0865b3c 100644 --- a/src/arch/ppc64/include/arch/byteorder.h +++ b/src/arch/ppc64/include/arch/byteorder.h @@ -12,6 +12,15 @@ #include +/* + * Assigns part of a 64-bit value: lhs[pos:pos + len] = rhs + */ +#define PPC_INSERT(lhs, rhs, pos, len) do { \ + uint64_t __placed = PPC_PLACE(rhs, pos, len); \ + uint64_t __mask = PPC_BITMASK(pos, (pos) + (len) - 1); \ + (lhs) = ((lhs) & ~__mask) | __placed; \ + } while (0) + /* * The pos parameter specifies MSB/leftmost bit. Passing compile-time constants * (literals or expressions) for parameters allows for the following diff --git a/src/include/cpu/power/istep_8.h b/src/include/cpu/power/istep_8.h new file mode 100644 index 00000000000..9b3ef98930a --- /dev/null +++ b/src/include/cpu/power/istep_8.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef CPU_PPC64_ISTEP8_H +#define CPU_PPC64_ISTEP8_H + +#include + +void istep_8_1(uint8_t chips); + +#endif /* CPU_PPC64_ISTEP8_H */ diff --git a/src/include/cpu/power/mvpd.h b/src/include/cpu/power/mvpd.h index ea80e02cb15..68b1533dd0a 100644 --- a/src/include/cpu/power/mvpd.h +++ b/src/include/cpu/power/mvpd.h @@ -52,6 +52,10 @@ const struct region_device *mvpd_device_ro(void); /* Reads #V of one of LRP records (mind that there is only one buffer) */ const struct voltage_kwd *mvpd_get_voltage_data(uint8_t chip, int lrp); +/* Builds bitmask of functional cores based on Partial Good vector stored in PG + * keyword of CP00 record */ +uint64_t mvpd_get_available_cores(uint8_t chip); + /* Finds a specific keyword in MVPD partition and extracts it. *size is updated * to reflect needed or used space in the buffer. */ bool mvpd_extract_keyword(uint8_t chip, const char *record_name, diff --git a/src/soc/ibm/power9/Makefile.inc b/src/soc/ibm/power9/Makefile.inc index 3865685c366..7921926a218 100644 --- a/src/soc/ibm/power9/Makefile.inc +++ b/src/soc/ibm/power9/Makefile.inc @@ -9,6 +9,7 @@ romstage-y += romstage.c romstage-y += mvpd.c romstage-y += vpd.c romstage-y += powerbus.c +romstage-y += istep_8_1.c romstage-y += istep_10_10.c romstage-y += istep_10_12.c romstage-y += istep_10_13.c diff --git a/src/soc/ibm/power9/homer.h b/src/soc/ibm/power9/homer.h index 3d6c03bdf4f..a0303cc28a2 100644 --- a/src/soc/ibm/power9/homer.h +++ b/src/soc/ibm/power9/homer.h @@ -55,6 +55,7 @@ #define CORE_SCOM_RESTORE_SIZE_PER_CORE \ (SCOM_RESTORE_ENTRY_SIZE * CORE_SCOM_RESTORE_REGS_PER_CORE) +#define MAX_CHIPS 2 #define MAX_CORES_PER_CHIP 24 #define MAX_CORES_PER_EX 2 #define MAX_QUADS_PER_CHIP (MAX_CORES_PER_CHIP / 4) diff --git a/src/soc/ibm/power9/istep_8_1.c b/src/soc/ibm/power9/istep_8_1.c new file mode 100644 index 00000000000..cf87d3b297e --- /dev/null +++ b/src/soc/ibm/power9/istep_8_1.c @@ -0,0 +1,320 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include +#include +#include +#include +#include + +#include "fsi.h" +#include "homer.h" +#include "scratch.h" + +/* + * 8.1 host_slave_sbe_config + * + * Need to run this from master processor to all slave processors for + * Secureboot hole (need to ensure that SP didn't leave compromised P8 Slave). + * + * - Update SBE config data area with any configs/parameters required by SBE + * (see step 0 for more details) + * - This includes the nest (and memory frequency if in synchronous mode) + * - Configuration flags (MPIPL, etc) + */ + +/* + * Legend for constant names: + * - *_FSI is a CFAM address (in 4 byte words, that's how it is in Hostboot) + * - *_FSI_BYTE is an FSI address + */ + +/* Used to read SBE Boot Side from processor */ +const uint64_t SBE_BOOT_SELECT_MASK = 0x0000400000000000; + +static void compute_chip_gards(uint8_t chip, + uint8_t *eq_gard, uint32_t *ec_gard) +{ + const uint64_t cores = mvpd_get_available_cores(chip); + + *eq_gard = 0xFF; + *ec_gard = 0xFFFFFFFF; + + for (int quad = 0; quad < MAX_QUADS_PER_CHIP; quad++) { + if (IS_EQ_FUNCTIONAL(quad, cores)) + *eq_gard &= ~(0x80 >> quad); + } + + for (int core = 0; core < MAX_CORES_PER_CHIP; core++) { + if (IS_EC_FUNCTIONAL(core, cores)) + *ec_gard &= ~(0x80000000 >> core); + } + + /* Shift the first meaningful bit to LSB position */ + *eq_gard >>= 2; + *ec_gard >>= 8; +} + +static void setup_sbe_config(uint8_t chip) +{ + /* These aren't defined in scratch.h because they are used only here and + * this allows for much shorter names. */ + enum { + /* SCRATCH_REGISTER_1 */ + EQ_GARD_START = 0, + EQ_GARD_LEN = 6, + EC_GARD_START = 8, + EC_GARD_LEN = 24, + + /* SCRATCH_REGISTER_2 */ + I2C_BUS_DIV_REF_START = 0, + I2C_BUS_DIV_REF_LEN = 16, + OPTICS_CONFIG_MODE_OBUS0 = 16, + OPTICS_CONFIG_MODE_OBUS1 = 17, + OPTICS_CONFIG_MODE_OBUS2 = 18, + OPTICS_CONFIG_MODE_OBUS3 = 19, + MC_PLL_BUCKET_START = 21, + MC_PLL_BUCKET_LEN = 3, + OB0_PLL_BUCKET_START = 24, + OB0_PLL_BUCKET_LEN = 2, + OB1_PLL_BUCKET_START = 26, + OB1_PLL_BUCKET_LEN = 2, + OB2_PLL_BUCKET_START = 28, + OB2_PLL_BUCKET_LEN = 2, + OB3_PLL_BUCKET_START = 30, + OB3_PLL_BUCKET_LEN = 2, + + /* SCRATCH_REGISTER_3 */ + BOOT_FLAGS_START = 0, + BOOT_FLAGS_LEN = 32, + RISK_LEVEL_START = 28, + RISK_LEVEL_LEN = 4, + + /* SCRATCH_REGISTER_4 */ + BOOT_FREQ_MULT_START = 0, + BOOT_FREQ_MULT_LEN = 16, + CP_FILTER_BYPASS = 16, + SS_FILTER_BYPASS = 17, + IO_FILTER_BYPASS = 18, + DPLL_BYPASS = 19, + NEST_MEM_X_O_PCI_BYPASS = 20, + OBUS_RATIO_VALUE = 21, + NEST_PLL_BUCKET_START = 29, + NEST_PLL_BUCKET_LEN = 3, + + /* SCRATCH_REGISTER_5 */ + PLL_MUX_START = 12, + PLL_MUX_LEN = 20, + CC_IPL = 0, + INIT_ALL_CORES = 1, + RISK_LEVEL_BIT_DEPRECATED = 2, + DISABLE_HBBL_VECTORS = 3, + MC_SYNC_MODE = 4, + SLOW_PCI_REF_CLOCK = 5, + + /* SCRATCH_REGISTER_6 */ + SMF_CONFIG = 16, + PROC_EFF_FABRIC_GROUP_ID_START = 17, + PROC_EFF_FABRIC_GROUP_ID_LEN = 3, + PROC_EFF_FABRIC_CHIP_ID_START = 20, + PROC_EFF_FABRIC_CHIP_ID_LEN = 3, + PUMP_CHIP_IS_GROUP = 23, + SLAVE_CHIP_SBE = 24, + PROC_FABRIC_GROUP_ID_START = 26, + PROC_FABRIC_GROUP_ID_LEN = 3, + PROC_FABRIC_CHIP_ID_START = 29, + PROC_FABRIC_CHIP_ID_LEN = 3, + PROC_MEM_TO_USE_START = 1, + PROC_MEM_TO_USE_LEN = 6, + }; + + uint64_t scratch; + + uint32_t boot_flags; + uint8_t risk_level; + + uint8_t eq_gard; + uint32_t ec_gard; + + compute_chip_gards(chip, &eq_gard, &ec_gard); + + /* SCRATCH_REGISTER_1 */ + + scratch = PPC_SHIFT(read_cfam(chip, MBOX_SCRATCH_REG1_FSI), 31); + + /* ATTR_EQ_GARD (computed at runtime) */ + PPC_INSERT(scratch, eq_gard, EQ_GARD_START, EQ_GARD_LEN); + /* ATTR_EC_GARD (computed at runtime)*/ + PPC_INSERT(scratch, ec_gard, EC_GARD_START, EC_GARD_LEN); + + write_cfam(chip, MBOX_SCRATCH_REG1_FSI, scratch >> 32); + + /* SCRATCH_REGISTER_2 */ + + scratch = PPC_SHIFT(read_cfam(chip, MBOX_SCRATCH_REG1_FSI + 1), 31); + + /* ATTR_I2C_BUS_DIV_REF (default, talos.xml) */ + PPC_INSERT(scratch, 0x0003, I2C_BUS_DIV_REF_START, I2C_BUS_DIV_REF_LEN); + /* ATTR_MC_PLL_BUCKET (seems to not by relevant for Nimbus) */ + PPC_INSERT(scratch, 0x00, MC_PLL_BUCKET_START, MC_PLL_BUCKET_LEN); + + /* + * TODO: if OBUS support is needed: + * + * - set OPTICS_CONFIG_MODE_OBUS[0-3] bits + * - set OB[0-3]_PLL_BUCKET bits + * see getObusPllBucket() in Hostboot for values of ATTR_OB*_PLL_BUCKET + */ + + write_cfam(chip, MBOX_SCRATCH_REG1_FSI + 1, scratch >> 32); + + /* SCRATCH_REGISTER_3 */ + + scratch = PPC_SHIFT(read_cfam(chip, MBOX_SCRATCH_REG1_FSI + 2), 31); + + boot_flags = (read_scom(MBOX_SCRATCH_REG1 + 2) >> 32); + risk_level = (get_dd() < 0x23 ? 0 : 4); + + /* Note that the two fields overlap (boot flags include risk level), so + order in which they are set is important. */ + + /* ATTR_BOOT_FLAGS (computed) */ + PPC_INSERT(scratch, boot_flags, BOOT_FLAGS_START, BOOT_FLAGS_LEN); + /* ATTR_RISK_LEVEL (computed) */ + PPC_INSERT(scratch, risk_level, RISK_LEVEL_START, RISK_LEVEL_LEN); + + write_cfam(chip, MBOX_SCRATCH_REG1_FSI + 2, scratch >> 32); + + /* SCRATCH_REGISTER_4 */ + + scratch = PPC_SHIFT(read_cfam(chip, MBOX_SCRATCH_REG1_FSI + 3), 31); + + /* ATTR_BOOT_FREQ_MULT (talos.xml) */ + PPC_INSERT(scratch, 96, BOOT_FREQ_MULT_START, BOOT_FREQ_MULT_LEN); + /* ATTR_NEST_PLL_BUCKET (index of nest frequency (1866 MHz) in + ATTR_NEST_PLL_FREQ_LIST array base 1, see setFrequencyAttributes() in + Hostboot) */ + PPC_INSERT(scratch, 2, NEST_PLL_BUCKET_START, NEST_PLL_BUCKET_LEN); + + /* ATTR_CP_FILTER_BYPASS (default, talos.xml) */ + PPC_INSERT(scratch, 0, CP_FILTER_BYPASS, 1); + /* ATTR_SS_FILTER_BYPASS (default, talos.xml) */ + PPC_INSERT(scratch, 0, SS_FILTER_BYPASS, 1); + /* ATTR_IO_FILTER_BYPASS (default, talos.xml) */ + PPC_INSERT(scratch, 0, IO_FILTER_BYPASS, 1); + /* ATTR_DPLL_BYPASS (default, talos.xml) */ + PPC_INSERT(scratch, 0, DPLL_BYPASS, 1); + /* ATTR_NEST_MEM_X_O_PCI_BYPASS (default, talos.xml) */ + PPC_INSERT(scratch, 0, NEST_MEM_X_O_PCI_BYPASS, 1); + + /* ATTR_OBUS_RATIO_VALUE (empty default in talos.xml) */ + PPC_INSERT(scratch, 0, OBUS_RATIO_VALUE, 1); + + write_cfam(chip, MBOX_SCRATCH_REG1_FSI + 3, scratch >> 32); + + /* SCRATCH_REGISTER_5 */ + + scratch = PPC_SHIFT(read_cfam(chip, MBOX_SCRATCH_REG1_FSI + 4), 31); + + /* ATTR_SYSTEM_IPL_PHASE (default, talos.xml) == HB_IPL, not CACHE_CONTAINED + XXX: but we're not Hostboot and in ROM stage, so set this bit to 1? */ + PPC_INSERT(scratch, 0, CC_IPL, 1); + /* ATTR_SYS_FORCE_ALL_CORES (talos.xml) */ + PPC_INSERT(scratch, 0, INIT_ALL_CORES, 1); + /* Risk level flag is deprecated here, moved to SCRATCH_REG_3 */ + PPC_INSERT(scratch, 0, RISK_LEVEL_BIT_DEPRECATED, 1); + /* ATTR_DISABLE_HBBL_VECTORS (default, talos.xml) */ + PPC_INSERT(scratch, 0, DISABLE_HBBL_VECTORS, 1); + /* Hostboot reads it from SBE, but we assume it's 0 in p9n_mca_scom() */ + PPC_INSERT(scratch, 0, MC_SYNC_MODE, 1); + /* ATTR_DD1_SLOW_PCI_REF_CLOCK (we're not DD1, but Hostboot sets this bit) */ + PPC_INSERT(scratch, 1, SLOW_PCI_REF_CLOCK, 1); + + /* ATTR_CLOCK_PLL_MUX (talos.xml) */ + PPC_INSERT(scratch, 0x80030, PLL_MUX_START, PLL_MUX_LEN); + + write_cfam(chip, MBOX_SCRATCH_REG1_FSI + 4, scratch >> 32); + + /* SCRATCH_REGISTER_6 */ + + scratch = PPC_SHIFT(read_cfam(chip, MBOX_SCRATCH_REG1_FSI + 5), 31); + + /* ATTR_PROC_SBE_MASTER_CHIP is always zero here */ + PPC_INSERT(scratch, 1, SLAVE_CHIP_SBE, 1); + /* ATTR_SMF_CONFIG */ + PPC_INSERT(scratch, 0, SMF_CONFIG, 1); + /* ATTR_PROC_FABRIC_PUMP_MODE (talos.xml) */ + PPC_INSERT(scratch, 1, PUMP_CHIP_IS_GROUP, 1); + + /* ATTR_PROC_FABRIC_GROUP_ID */ + PPC_INSERT(scratch, chip, PROC_FABRIC_GROUP_ID_START, PROC_FABRIC_GROUP_ID_LEN); + /* ATTR_PROC_FABRIC_CHIP_ID */ + PPC_INSERT(scratch, 0, PROC_FABRIC_CHIP_ID_START, PROC_FABRIC_CHIP_ID_LEN); + + /* ATTR_PROC_EFF_FABRIC_GROUP_ID */ + PPC_INSERT(scratch, chip, PROC_EFF_FABRIC_GROUP_ID_START, PROC_EFF_FABRIC_GROUP_ID_LEN); + /* ATTR_PROC_EFF_FABRIC_CHIP_ID */ + PPC_INSERT(scratch, 0, PROC_EFF_FABRIC_CHIP_ID_START, PROC_EFF_FABRIC_CHIP_ID_LEN); + + /* Not documented what this is */ + scratch |= PPC_BIT(0); + + /* ATTR_PROC_MEM_TO_USE (talos.xml; each CPU uses its own memory) */ + PPC_INSERT(scratch, chip << 3, PROC_MEM_TO_USE_START, PROC_MEM_TO_USE_LEN); + + write_cfam(chip, MBOX_SCRATCH_REG1_FSI + 5, scratch >> 32); + + /* SCRATCH_REGISTER_7 is left as is (it's related to DRTM payload) */ + + /* SCRATCH_REGISTER_8 */ + + scratch = PPC_SHIFT(read_cfam(chip, MBOX_SCRATCH_REG1_FSI + 7), 31); + + /* Indicate validity of SCRATCH_REGISTER_[1-6] */ + scratch |= PPC_BITMASK(0, 5); + + write_cfam(chip, MBOX_SCRATCH_REG1_FSI + 7, scratch >> 32); +} + +static int get_master_sbe_boot_seeprom(void) +{ + enum { PERV_SB_CS_SCOM = 0x00050008 }; + return (read_scom(PERV_SB_CS_SCOM) & SBE_BOOT_SELECT_MASK) ? 1 : 0; +} + +static void set_sbe_boot_seeprom(uint8_t chip, int seeprom_side) +{ + enum { PERV_SB_CS_FSI_BYTE = 0x00002820 }; + + const uint32_t sbe_boot_select_mask = SBE_BOOT_SELECT_MASK >> 32; + + uint32_t sb_cs = read_fsi(chip, PERV_SB_CS_FSI_BYTE); + + if (seeprom_side == 0) + sb_cs &= ~sbe_boot_select_mask; + else + sb_cs |= sbe_boot_select_mask; + + write_fsi(chip, PERV_SB_CS_FSI_BYTE, sb_cs); +} + +void istep_8_1(uint8_t chips) +{ + int boot_seeprom_side; + + printk(BIOS_EMERG, "starting istep 8.1\n"); + report_istep(8, 1); + + boot_seeprom_side = get_master_sbe_boot_seeprom(); + + /* Skipping master chip */ + for (uint8_t chip = 1; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) { + setup_sbe_config(chip); + set_sbe_boot_seeprom(chip, boot_seeprom_side); + } + } + + printk(BIOS_EMERG, "ending istep 8.1\n"); +} diff --git a/src/soc/ibm/power9/mvpd.c b/src/soc/ibm/power9/mvpd.c index 3e0cb2a40f9..ad04e5b721a 100644 --- a/src/soc/ibm/power9/mvpd.c +++ b/src/soc/ibm/power9/mvpd.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -13,6 +14,7 @@ #include #include +#include "homer.h" #include "tor.h" #define MVPD_TOC_ENTRIES 32 @@ -455,6 +457,37 @@ const struct voltage_kwd *mvpd_get_voltage_data(uint8_t chip, int lrp) return voltage; } +uint64_t mvpd_get_available_cores(uint8_t chip) +{ + enum { + VPD_CP00_PG_HDR_LENGTH = 1, + VPD_CP00_PG_DATA_LENGTH = 128, + VPD_CP00_PG_DATA_ENTRIES = VPD_CP00_PG_DATA_LENGTH / 2, + + ALL_ON_PG_MASK = 0xFFFF, + EC_AG_MASK = 0xE1FF, + }; + + uint64_t cores = 0; + + uint8_t raw_pg_data[VPD_CP00_PG_HDR_LENGTH + VPD_CP00_PG_DATA_LENGTH]; + uint16_t pg_data[VPD_CP00_PG_DATA_ENTRIES]; + uint32_t size = sizeof(raw_pg_data); + + if (!mvpd_extract_keyword(chip, "CP00", "PG", raw_pg_data, &size)) + die("Failed to read CPU%d/MVPD/CP00/PG", chip); + + memcpy(pg_data, raw_pg_data + VPD_CP00_PG_HDR_LENGTH, sizeof(pg_data)); + + for (int core = 0; core < MAX_CORES_PER_CHIP; core++) { + chiplet_id_t core_chiplet = EC00_CHIPLET_ID + core; + if ((pg_data[core_chiplet] & ALL_ON_PG_MASK) == EC_AG_MASK) + cores |= PPC_BIT(core); + } + + return cores; +} + /* Finds a specific ring in MVPD partition and extracts it */ bool mvpd_extract_ring(uint8_t chip, const char *record_name, const char *kwd_name, uint8_t chiplet_id, diff --git a/src/soc/ibm/power9/romstage.c b/src/soc/ibm/power9/romstage.c index a45cdd2e56b..6b8c3bded0e 100644 --- a/src/soc/ibm/power9/romstage.c +++ b/src/soc/ibm/power9/romstage.c @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -362,6 +363,8 @@ void main(void) chips = fsi_get_present_chips(); printk(BIOS_EMERG, "Initialized FSI (chips mask: 0x%02X)\n", chips); + istep_8_1(chips); + istep_10_10(&phb_active_mask, iovalid_enable); istep_10_12(); istep_10_13(); From 2b54520d3f91f6d8dcf8463ee7bf2773a7747d48 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Thu, 19 May 2022 17:20:55 +0300 Subject: [PATCH 111/213] soc/power9/istep_8_2.c: implement istep 8.2 Second configuration step for slave SBEs. Change-Id: I0f0cbd52fa0a4c48185bd8267ec4e1ea587cf9f4 Signed-off-by: Sergii Dmytruk --- src/include/cpu/power/istep_8.h | 1 + src/soc/ibm/power9/Makefile.inc | 1 + src/soc/ibm/power9/istep_8_2.c | 49 +++++++++++++++++++++++++++++++++ src/soc/ibm/power9/romstage.c | 1 + 4 files changed, 52 insertions(+) create mode 100644 src/soc/ibm/power9/istep_8_2.c diff --git a/src/include/cpu/power/istep_8.h b/src/include/cpu/power/istep_8.h index 9b3ef98930a..fe94fc3296d 100644 --- a/src/include/cpu/power/istep_8.h +++ b/src/include/cpu/power/istep_8.h @@ -6,5 +6,6 @@ #include void istep_8_1(uint8_t chips); +void istep_8_2(uint8_t chips); #endif /* CPU_PPC64_ISTEP8_H */ diff --git a/src/soc/ibm/power9/Makefile.inc b/src/soc/ibm/power9/Makefile.inc index 7921926a218..163ca7219f2 100644 --- a/src/soc/ibm/power9/Makefile.inc +++ b/src/soc/ibm/power9/Makefile.inc @@ -10,6 +10,7 @@ romstage-y += mvpd.c romstage-y += vpd.c romstage-y += powerbus.c romstage-y += istep_8_1.c +romstage-y += istep_8_2.c romstage-y += istep_10_10.c romstage-y += istep_10_12.c romstage-y += istep_10_13.c diff --git a/src/soc/ibm/power9/istep_8_2.c b/src/soc/ibm/power9/istep_8_2.c new file mode 100644 index 00000000000..b677a6c24ae --- /dev/null +++ b/src/soc/ibm/power9/istep_8_2.c @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include +#include +#include + +#include "fsi.h" +#include "homer.h" + +/* + * 8.2 host_setup_sbe + * + * - Done for all boots - some settings will change based on system type and IPL + * type + * - Set the GP bits to default state + * - Needs to take into account to not change values set up in + * p9_set_clock_term.C procedure + */ + +static void set_fsi_gp_shadow(uint8_t chip) +{ + enum { + PERV_PERV_CTRL0_COPY_FSI = 0x0000291A, + PERV_PERV_CTRL0_TP_OTP_SCOM_FUSED_CORE_MODE = 23, + }; + + uint32_t ctrl0_copy = read_cfam(chip, PERV_PERV_CTRL0_COPY_FSI); + + /* ATTR_FUSED_CORE_MODE (seems to be zero by default) */ + PPC_INSERT(ctrl0_copy, 0, PERV_PERV_CTRL0_TP_OTP_SCOM_FUSED_CORE_MODE, 1); + + write_cfam(chip, PERV_PERV_CTRL0_COPY_FSI, ctrl0_copy); +} + +void istep_8_2(uint8_t chips) +{ + printk(BIOS_EMERG, "starting istep 8.2\n"); + report_istep(8, 2); + + /* Skipping master chip */ + for (uint8_t chip = 1; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) + set_fsi_gp_shadow(chip); + } + + printk(BIOS_EMERG, "ending istep 8.2\n"); +} diff --git a/src/soc/ibm/power9/romstage.c b/src/soc/ibm/power9/romstage.c index 6b8c3bded0e..8e6d1e603ea 100644 --- a/src/soc/ibm/power9/romstage.c +++ b/src/soc/ibm/power9/romstage.c @@ -364,6 +364,7 @@ void main(void) printk(BIOS_EMERG, "Initialized FSI (chips mask: 0x%02X)\n", chips); istep_8_1(chips); + istep_8_2(chips); istep_10_10(&phb_active_mask, iovalid_enable); istep_10_12(); From b5f1b053a706c1d28ffd9d5509745c051698f0df Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Thu, 19 May 2022 17:25:10 +0300 Subject: [PATCH 112/213] soc/power9/istep_8_3.c: implement istep 8.3 Starts SBE on slave chips. Change-Id: I8080817430faba5c838a206ec703e907939dc50c Signed-off-by: Sergii Dmytruk --- src/include/cpu/power/istep_8.h | 1 + src/soc/ibm/power9/Makefile.inc | 1 + src/soc/ibm/power9/istep_8_3.c | 146 ++++++++++++++++++++++++++++++++ src/soc/ibm/power9/romstage.c | 1 + 4 files changed, 149 insertions(+) create mode 100644 src/soc/ibm/power9/istep_8_3.c diff --git a/src/include/cpu/power/istep_8.h b/src/include/cpu/power/istep_8.h index fe94fc3296d..d5bd5086d23 100644 --- a/src/include/cpu/power/istep_8.h +++ b/src/include/cpu/power/istep_8.h @@ -7,5 +7,6 @@ void istep_8_1(uint8_t chips); void istep_8_2(uint8_t chips); +void istep_8_3(uint8_t chips); #endif /* CPU_PPC64_ISTEP8_H */ diff --git a/src/soc/ibm/power9/Makefile.inc b/src/soc/ibm/power9/Makefile.inc index 163ca7219f2..044c161b486 100644 --- a/src/soc/ibm/power9/Makefile.inc +++ b/src/soc/ibm/power9/Makefile.inc @@ -11,6 +11,7 @@ romstage-y += vpd.c romstage-y += powerbus.c romstage-y += istep_8_1.c romstage-y += istep_8_2.c +romstage-y += istep_8_3.c romstage-y += istep_10_10.c romstage-y += istep_10_12.c romstage-y += istep_10_13.c diff --git a/src/soc/ibm/power9/istep_8_3.c b/src/soc/ibm/power9/istep_8_3.c new file mode 100644 index 00000000000..dee26a43b3a --- /dev/null +++ b/src/soc/ibm/power9/istep_8_3.c @@ -0,0 +1,146 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include +#include +#include +#include + +#include "fsi.h" +#include "homer.h" + +/* + * 8.3 host_cbs_start + * + * - Set a bit to start the SBE engine on slave chips. Located in FSI GP region. + * - This same bit performs the scan0 flush of pervasive. + */ + +static void send_fifo_reset(uint8_t chip) +{ + enum { SBE_FIFO_DNFIFO_RESET = 0x00002450 }; + + /* Perform a write to the DNFIFO Reset to cleanup the FIFO */ + write_fsi(chip, SBE_FIFO_DNFIFO_RESET, 0xDEAD); +} + +static void start_cbs(uint8_t chip) +{ + enum { + PERV_SB_MSG_FSI = 0x00002809, + + PERV_CBS_CS_FSI = 0x00002801, + PERV_CBS_CS_START_BOOT_SEQUENCER = 0, + PERV_CBS_CS_OPTION_SKIP_SCAN0_CLOCKSTART = 2, + PERV_CBS_CS_OPTION_PREVENT_SBE_START = 3, + + PERV_SB_CS_FSI = 0x00002808, + PERV_SB_CS_START_RESTART_VECTOR0 = 12, + PERV_SB_CS_START_RESTART_VECTOR1 = 13, + + PERV_CBS_ENVSTAT_FSI = 0x00002804, + PERV_CBS_ENVSTAT_C4_VDN_GPOOD = 2, + + /* Observed Number of times CBS read for CBS_INTERNAL_STATE_VECTOR */ + P9_CFAM_CBS_POLL_COUNT = 20, + /* + * unit is micro seconds [min : 64k x (1/100MHz) = 64k x 10(-8) = 640 us + * max : 64k x (1/50MHz) = 128k x 10(-8) = 1280 us] + */ + P9_CBS_IDLE_HW_US_DELAY = 640, + + CBS_IDLE_VALUE = 0x0002, + + PERV_FSB_FSB_DOWNFIFO_RESET_FSI = 0x00002414, + FIFO_RESET = 0x80000000, + + PERV_FSI2PIB_STATUS_FSI = 0x00001007, + PERV_FSI2PIB_STATUS_VDD_NEST_OBSERVE = 16, + }; + + int poll_count; + uint64_t tmp; + + /* Clear Selfboot message register before every boot */ + write_cfam(chip, PERV_SB_MSG_FSI, 0); + + /* Configure Prevent SBE start option */ + tmp = PPC_SHIFT(read_cfam(chip, PERV_CBS_CS_FSI), 31); + tmp |= PPC_BIT(PERV_CBS_CS_OPTION_PREVENT_SBE_START); + write_cfam(chip, PERV_CBS_CS_FSI, tmp >> 32); + + /* Setup hreset to 0 */ + tmp = PPC_SHIFT(read_cfam(chip, PERV_SB_CS_FSI), 31); + tmp &= ~PPC_BIT(PERV_SB_CS_START_RESTART_VECTOR0); + tmp &= ~PPC_BIT(PERV_SB_CS_START_RESTART_VECTOR1); + write_cfam(chip, PERV_SB_CS_FSI, tmp >> 32); + + /* Check for VDN_PGOOD */ + tmp = PPC_SHIFT(read_cfam(chip, PERV_CBS_ENVSTAT_FSI), 31); + if (!(tmp & PPC_BIT(PERV_CBS_ENVSTAT_C4_VDN_GPOOD))) + die("CBS startup: VDN_PGOOD is OFF, can't proceed\n"); + + /* Reset CFAM Boot Sequencer (CBS) to flush value */ + tmp = PPC_SHIFT(read_cfam(chip, PERV_CBS_CS_FSI), 31); + tmp &= ~PPC_BIT(PERV_CBS_CS_START_BOOT_SEQUENCER); + tmp &= ~PPC_BIT(PERV_CBS_CS_OPTION_SKIP_SCAN0_CLOCKSTART); + write_cfam(chip, PERV_CBS_CS_FSI, tmp >> 32); + + /* Trigger CFAM Boot Sequencer (CBS) to start (no read, we know register's contents) */ + tmp |= PPC_BIT(PERV_CBS_CS_START_BOOT_SEQUENCER); + write_cfam(chip, PERV_CBS_CS_FSI, tmp >> 32); + + for (poll_count = 0; poll_count < P9_CFAM_CBS_POLL_COUNT; poll_count++) { + /* + * PERV_CBS_CS_INTERNAL_STATE_VECTOR_START = 16 + * PERV_CBS_CS_INTERNAL_STATE_VECTOR_LEN = 16 + */ + uint16_t cbs_state = (read_cfam(chip, PERV_CBS_CS_FSI) & 0xFF); + if (cbs_state == CBS_IDLE_VALUE) + break; + + udelay(P9_CBS_IDLE_HW_US_DELAY); + } + + if (poll_count == P9_CFAM_CBS_POLL_COUNT) + die("CBS startup: CBS has not reached idle state!\n"); + + /* Reset FIFO (ATTR_START_CBS_FIFO_RESET_SKIP is set only for some specific test) */ + write_cfam(chip, PERV_FSB_FSB_DOWNFIFO_RESET_FSI, FIFO_RESET); + + /* Setup up hreset (clear -> set -> clear again) */ + tmp = PPC_SHIFT(read_cfam(chip, PERV_SB_CS_FSI), 31); + tmp &= ~PPC_BIT(PERV_SB_CS_START_RESTART_VECTOR0); + write_cfam(chip, PERV_SB_CS_FSI, tmp >> 32); + tmp |= PPC_BIT(PERV_SB_CS_START_RESTART_VECTOR0); + write_cfam(chip, PERV_SB_CS_FSI, tmp >> 32); + tmp &= ~PPC_BIT(PERV_SB_CS_START_RESTART_VECTOR0); + write_cfam(chip, PERV_SB_CS_FSI, tmp >> 32); + + /* Check for VDD status */ + tmp = PPC_SHIFT(read_cfam(chip, PERV_FSI2PIB_STATUS_FSI), 31); + if (!(tmp & PPC_BIT(PERV_FSI2PIB_STATUS_VDD_NEST_OBSERVE))) + die("CBS startup: VDD is OFF!\n"); +} + +void istep_8_3(uint8_t chips) +{ + printk(BIOS_EMERG, "starting istep 8.3\n"); + report_istep(8, 3); + + /* Skipping master chip */ + for (uint8_t chip = 1; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) { + /* + * Before starting the CBS (and thus the SBE) on slave + * procs make sure the SBE FIFO is clean by doing a full + * reset of the FIFO + */ + send_fifo_reset(chip); + start_cbs(chip); + } + } + + printk(BIOS_EMERG, "ending istep 8.3\n"); +} diff --git a/src/soc/ibm/power9/romstage.c b/src/soc/ibm/power9/romstage.c index 8e6d1e603ea..41d04aa17d0 100644 --- a/src/soc/ibm/power9/romstage.c +++ b/src/soc/ibm/power9/romstage.c @@ -365,6 +365,7 @@ void main(void) istep_8_1(chips); istep_8_2(chips); + istep_8_3(chips); istep_10_10(&phb_active_mask, iovalid_enable); istep_10_12(); From b148ec813a2a776e144718da5d628d72fea939d0 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Thu, 19 May 2022 17:29:31 +0300 Subject: [PATCH 113/213] soc/power9/istep_8_4.c: implement istep 8.4 Waits for slave SBEs to start by periodically checking SBE status register. Change-Id: I3d94d6dededfc3020945f4d4055f009ab50657db Signed-off-by: Sergii Dmytruk --- src/include/cpu/power/istep_8.h | 1 + src/soc/ibm/power9/Makefile.inc | 1 + src/soc/ibm/power9/istep_8_4.c | 113 ++++++++++++++++++++++++++++++++ src/soc/ibm/power9/romstage.c | 1 + 4 files changed, 116 insertions(+) create mode 100644 src/soc/ibm/power9/istep_8_4.c diff --git a/src/include/cpu/power/istep_8.h b/src/include/cpu/power/istep_8.h index d5bd5086d23..39427419213 100644 --- a/src/include/cpu/power/istep_8.h +++ b/src/include/cpu/power/istep_8.h @@ -8,5 +8,6 @@ void istep_8_1(uint8_t chips); void istep_8_2(uint8_t chips); void istep_8_3(uint8_t chips); +void istep_8_4(uint8_t chips); #endif /* CPU_PPC64_ISTEP8_H */ diff --git a/src/soc/ibm/power9/Makefile.inc b/src/soc/ibm/power9/Makefile.inc index 044c161b486..421e4f2eb33 100644 --- a/src/soc/ibm/power9/Makefile.inc +++ b/src/soc/ibm/power9/Makefile.inc @@ -12,6 +12,7 @@ romstage-y += powerbus.c romstage-y += istep_8_1.c romstage-y += istep_8_2.c romstage-y += istep_8_3.c +romstage-y += istep_8_4.c romstage-y += istep_10_10.c romstage-y += istep_10_12.c romstage-y += istep_10_13.c diff --git a/src/soc/ibm/power9/istep_8_4.c b/src/soc/ibm/power9/istep_8_4.c new file mode 100644 index 00000000000..979cd091eec --- /dev/null +++ b/src/soc/ibm/power9/istep_8_4.c @@ -0,0 +1,113 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include +#include +#include + +#include "fsi.h" +#include "homer.h" + +/* + * 8.4 proc_check_slave_sbe_seeprom_complete : Check Slave SBE Complete + * + * - Check to make sure that the slave SBE engines have completed their IPL + * - FW will poll for up to 1 second to see if the "done" signature is in + * the status reg (not tied to istep number) + * - If "done" signature is not found then FW must extract FFDC from the SBE + */ + +static bool sbe_run_extract_msg_reg(uint8_t chip) +{ + enum { + PERV_SB_MSG_FSI = 0x00002809, + + /* SBE is in its operational (runtime) state */ + SBE_STATE_RUNTIME = 0x4, + + /* + * Much higher frequency of polling buys us about 333ms here. + * Can also wait with second precision at first (4 seconds) as SBE boots in + * 4.7s every time. + */ + SBE_RETRY_TIMEOUT_HW_MS = 60 * 1000, + SBE_RETRY_NUM_LOOPS = 60 * 100, // 100 times per second + }; + + /* Each sbe gets 60s to respond with the fact that it's booted and at + * runtime (stable state). */ + uint64_t SBE_WAIT_SLEEP_MS = (SBE_RETRY_TIMEOUT_HW_MS / SBE_RETRY_NUM_LOOPS); + + /* + * Layout of the register: + * [0] = SBE control loop initialized + * [1] = async FFDC present on SBE + * [2-3] = reserved + * [4-7] = previous SBE state + * [8-11] = current SBE state + * [12-19] = last major istep executed by the SBE + * [20-25] = last minor istep executed by the SBE + * [26-31] = reserved + */ + uint32_t msg_reg; + + for (uint64_t i = 0; i < SBE_RETRY_NUM_LOOPS; i++) { + uint8_t curr_state; + + msg_reg = read_cfam(chip, PERV_SB_MSG_FSI); + + curr_state = (msg_reg >> 20) & 0xF; + if (curr_state == SBE_STATE_RUNTIME) + return true; + + /* Check async FFDC bit (indicates SBE is failing to boot) */ + if (msg_reg & (1 << 30)) + break; + + if ((i * SBE_WAIT_SLEEP_MS) % 1000 == 0) + printk(BIOS_EMERG, "SBE for chip #%d is booting...\n", chip); + + /* Hostboot resets watchdog before sleeping, we might want to + do it too or just increase timer after experimenting */ + mdelay(SBE_WAIT_SLEEP_MS); + } + + /* We reach this line only if something is wrong with SBE */ + + printk(BIOS_ERR, "Message register: 0x%08x\n", msg_reg); + + if (msg_reg & (1 << 30)) + printk(BIOS_ERR, "SBE reports an error.\n"); + else + printk(BIOS_ERR, "SBE takes too long to boot.\n"); + + printk(BIOS_ERR, "SBE for chip #%d failed to boot!\n", chip); + + /* If SBE did boot (started its control loop) and then failed, can read + some debug information from it (p9_extract_sbe_rc() in Hostboot) */ + + /* + * Might want to restart SBE here if boot failure is something that can + * happen under normal circumstances. Hostboot gives current SBE side + * two tries, switches sides and gives up if it also fails twice. + */ + + return false; +} + +void istep_8_4(uint8_t chips) +{ + printk(BIOS_EMERG, "starting istep 8.4\n"); + report_istep(8, 4); + + /* Skipping master chip */ + for (uint8_t chip = 1; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) { + if (!sbe_run_extract_msg_reg(chip)) + die("SBE for chip #%d did not boot properly.\n", chip); + } + } + + printk(BIOS_EMERG, "ending istep 8.4\n"); +} diff --git a/src/soc/ibm/power9/romstage.c b/src/soc/ibm/power9/romstage.c index 41d04aa17d0..b0e0e4fed5e 100644 --- a/src/soc/ibm/power9/romstage.c +++ b/src/soc/ibm/power9/romstage.c @@ -366,6 +366,7 @@ void main(void) istep_8_1(chips); istep_8_2(chips); istep_8_3(chips); + istep_8_4(chips); istep_10_10(&phb_active_mask, iovalid_enable); istep_10_12(); From 779b3b1164bb6a87c78577956ca3517a0f7dae9a Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Sun, 2 Jan 2022 18:24:11 +0200 Subject: [PATCH 114/213] soc/power9/sbeio.c: implement SBE SCOM operations They can be used to read and write SCOM of the second CPU after its SBE has started which happens in istep 8.4. Change-Id: If8d8089dbf94cc53c6ac04ee7e4b9bb81b85ea14 Signed-off-by: Sergii Dmytruk --- src/soc/ibm/power9/Makefile.inc | 1 + src/soc/ibm/power9/sbeio.c | 234 ++++++++++++++++++++++++++++++++ src/soc/ibm/power9/sbeio.h | 11 ++ 3 files changed, 246 insertions(+) create mode 100644 src/soc/ibm/power9/sbeio.c create mode 100644 src/soc/ibm/power9/sbeio.h diff --git a/src/soc/ibm/power9/Makefile.inc b/src/soc/ibm/power9/Makefile.inc index 421e4f2eb33..74d8493f19c 100644 --- a/src/soc/ibm/power9/Makefile.inc +++ b/src/soc/ibm/power9/Makefile.inc @@ -34,6 +34,7 @@ romstage-y += ccs.c romstage-y += mcbist.c romstage-y += timer.c romstage-y += fsi.c +romstage-y += sbeio.c ramstage-y += chip.c ramstage-y += homer.c ramstage-y += rom_media.c diff --git a/src/soc/ibm/power9/sbeio.c b/src/soc/ibm/power9/sbeio.c new file mode 100644 index 00000000000..2c049ce68db --- /dev/null +++ b/src/soc/ibm/power9/sbeio.c @@ -0,0 +1,234 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include "sbeio.h" + +#include +#include +#include +#include + +#include "fsi.h" + +enum fifo_regs { + SBE_FIFO_UPFIFO_DATA_IN = 0x00002400, + SBE_FIFO_UPFIFO_STATUS = 0x00002404, + SBE_FIFO_UPFIFO_SIG_EOT = 0x00002408, + SBE_FIFO_UPFIFO_REQ_RESET = 0x0000240C, + SBE_FIFO_DNFIFO_DATA_OUT = 0x00002440, + SBE_FIFO_DNFIFO_STATUS = 0x00002444, + SBE_FIFO_DNFIFO_RESET = 0x00002450, + SBE_FIFO_DNFIFO_ACK_EOT = 0x00002454, + SBE_FIFO_DNFIFO_MAX_TSFR = 0x00002458, +}; + +enum { + SBE_FIFO_CLASS_SCOM_ACCESS = 0xA2, + SBE_FIFO_CMD_GET_SCOM = 0x01, + SBE_FIFO_CMD_PUT_SCOM = 0x02, + + FSB_FIFO_SIG_EOT = 0x80000000, + MAX_FIFO_TIMEOUT_US = 2 * 1000 * 1000, // Hostboot waits up to 90s! + + FIFO_STATUS_MAGIC = 0xC0DE, +}; + +struct get_scom_request_t { + uint32_t word_count; // size in uint32_t (4) + uint16_t reserved; // 0 + uint8_t cmd_class; // SBE_FIFO_CLASS_SCOM_ACCESS + uint8_t cmd; // SBE_FIFO_CMD_GET_SCOM + uint64_t addr; +} __attribute__((packed)); + +struct put_scom_request_t { + uint32_t word_count; // size in uint32_t (6) + uint16_t reserved; // 0 + uint8_t cmd_class; // SBE_FIFO_CLASS_SCOM_ACCESS + uint8_t cmd; // SBE_FIFO_CMD_PUT_SCOM + uint64_t addr; + uint64_t data; +} __attribute__((packed)); + +/* This structure is part of every response */ +struct status_hdr_t { + uint16_t magic; // FIFO_STATUS_MAGIC + uint8_t cmd_class; + uint8_t cmd; + uint16_t primary_status; + uint16_t secondary_status; +} __attribute__((packed)); + +static void fifo_push(uint8_t chip, uint32_t addr, uint32_t data) +{ + enum { UPFIFO_STATUS_FIFO_FULL = 0x00200000 }; + + uint64_t elapsed_time_us = 0; + + while (true) { + uint32_t status = read_fsi(chip, SBE_FIFO_UPFIFO_STATUS); + if (!(status & UPFIFO_STATUS_FIFO_FULL)) + break; + + if (elapsed_time_us >= MAX_FIFO_TIMEOUT_US) + die("Timeout waiting for upstream SBE FIFO to be not full"); + + udelay(10); + elapsed_time_us += 10; + } + + write_fsi(chip, addr, data); +} + +static void write_request(uint8_t chip, const void *request, uint32_t word_count) +{ + const uint32_t *words = request; + + /* + * Ensure Downstream Max Transfer Counter is 0 since we have no need for + * it and non-0 can cause protocol issues. + */ + write_fsi(chip, SBE_FIFO_DNFIFO_MAX_TSFR, 0x0); + + for (uint32_t i = 0; i < word_count; i++) + fifo_push(chip, SBE_FIFO_UPFIFO_DATA_IN, words[i]); + + /* Notify SBE that last word has been sent */ + fifo_push(chip, SBE_FIFO_UPFIFO_SIG_EOT, FSB_FIFO_SIG_EOT); +} + +/* Returns true when there is no more data to be read */ +static bool fifo_pop(uint8_t chip, uint32_t *data) +{ + enum { + DNFIFO_STATUS_DEQUEUED_EOT_FLAG = 0x00800000, + DNFIFO_STATUS_FIFO_EMPTY = 0x00100000, + }; + + uint64_t elapsed_time_us = 0; + + while (true) { + uint32_t status = read_fsi(chip, SBE_FIFO_DNFIFO_STATUS); + + /* If we're done receiving response */ + if (status & DNFIFO_STATUS_DEQUEUED_EOT_FLAG) + return false; + + /* If there is more data */ + if (!(status & DNFIFO_STATUS_FIFO_EMPTY)) + break; + + if (elapsed_time_us >= MAX_FIFO_TIMEOUT_US) { + printk(BIOS_INFO, "Last downstream SBE status: 0x%08x\n", status); + die("Timeout waiting for downstream SBE FIFO to be not empty\n"); + } + + udelay(10); + elapsed_time_us += 10; + } + + *data = read_fsi(chip, SBE_FIFO_DNFIFO_DATA_OUT); + return true; +} + +static void read_response(uint8_t chip, void *response, uint32_t word_count) +{ + enum { + MSG_BUFFER_SIZE = 2048, + + STATUS_SIZE_WORDS = sizeof(struct status_hdr_t) / sizeof(uint32_t), + + SBE_PRI_OPERATION_SUCCESSFUL = 0x00, + SBE_SEC_OPERATION_SUCCESSFUL = 0x00, + }; + + /* Large enough to receive FFDC */ + static uint32_t buffer[MSG_BUFFER_SIZE]; + + uint32_t idx; + uint32_t offset_idx; + uint32_t status_idx; + struct status_hdr_t *status_hdr; + + uint32_t *words = response; + + /* + * Message Schema: + * |Return Data (optional)| Status Header | FFDC (optional) + * |Offset to Status Header (starting from EOT) | EOT | + */ + + for (idx = 0; idx < MSG_BUFFER_SIZE; ++idx) { + if (!fifo_pop(chip, &buffer[idx])) + break; + + if (idx < word_count) + words[idx] = buffer[idx]; + } + + if (idx == MSG_BUFFER_SIZE) + die("SBE IO response exceeded maximum allowed size\n"); + + /* Notify SBE that EOT has been received */ + write_fsi(chip, SBE_FIFO_DNFIFO_ACK_EOT, FSB_FIFO_SIG_EOT); + + /* + * Final index for a minimum complete message (No return data and no FFDC): + * Word Length of status header + Length of Offset (1) + Length of EOT (1) + */ + if (idx < STATUS_SIZE_WORDS + 2) { + printk(BIOS_INFO, "Response length in words: 0x%08x\n", idx); + die("SBE IO response is too short\n"); + } + + /* + * |offset to header| EOT marker | current insert pos | <- idx + * The offset is how far to move back from from the EOT position to + * to get the index of the Status Header. + */ + offset_idx = idx - 2; + + /* Validate the offset to the status header */ + if (buffer[offset_idx] - 1 > offset_idx) + die("SBE response offset is too large\n"); + else if (buffer[offset_idx] < STATUS_SIZE_WORDS + 1) + die("SBE response offset is too small\n"); + + status_idx = offset_idx - (buffer[offset_idx] - 1); + status_hdr = (struct status_hdr_t *)&buffer[status_idx]; + + /* Check status for success */ + if (status_hdr->magic != FIFO_STATUS_MAGIC || + status_hdr->primary_status != SBE_PRI_OPERATION_SUCCESSFUL || + status_hdr->secondary_status != SBE_SEC_OPERATION_SUCCESSFUL) + die("Invalid status in SBE IO response\n"); +} + +void write_sbe_scom(uint8_t chip, uint64_t addr, uint64_t data) +{ + struct put_scom_request_t request = { + .word_count = sizeof(request) / sizeof(uint32_t), + .cmd_class = SBE_FIFO_CLASS_SCOM_ACCESS, + .cmd = SBE_FIFO_CMD_PUT_SCOM, + .addr = addr, + .data = data, + }; + + write_request(chip, &request, request.word_count); + read_response(chip, NULL, 0); +} + +uint64_t read_sbe_scom(uint8_t chip, uint64_t addr) +{ + uint64_t data; + struct get_scom_request_t request = { + .word_count = sizeof(request) / sizeof(uint32_t), + .cmd_class = SBE_FIFO_CLASS_SCOM_ACCESS, + .cmd = SBE_FIFO_CMD_GET_SCOM, + .addr = addr, + }; + + write_request(chip, &request, request.word_count); + read_response(chip, &data, sizeof(data) / sizeof(uint32_t)); + + return data; +} diff --git a/src/soc/ibm/power9/sbeio.h b/src/soc/ibm/power9/sbeio.h new file mode 100644 index 00000000000..b2b3ae2ae01 --- /dev/null +++ b/src/soc/ibm/power9/sbeio.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __SOC_IBM_POWER9_SBEIO_H +#define __SOC_IBM_POWER9_SBEIO_H + +#include + +void write_sbe_scom(uint8_t chip, uint64_t addr, uint64_t data); +uint64_t read_sbe_scom(uint8_t chip, uint64_t addr); + +#endif /* __SOC_IBM_POWER9_SBEIO_H */ From 092454e5b6d99bd0eb715163cad097f7abb60e4a Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Sun, 2 Jan 2022 19:25:23 +0200 Subject: [PATCH 115/213] soc/power9/istep_8_9.c: scominits of XBus chiplet Change-Id: I63660faff3bcdcb74d9f99692df6196527351e1e Signed-off-by: Sergii Dmytruk --- src/include/cpu/power/istep_18.h | 3 + src/include/cpu/power/istep_8.h | 6 + src/soc/ibm/power9/Makefile.inc | 1 + src/soc/ibm/power9/istep_18_11.c | 1 - src/soc/ibm/power9/istep_8_9.c | 388 +++++++++++++++++++++++++++++++ src/soc/ibm/power9/romstage.c | 1 + 6 files changed, 399 insertions(+), 1 deletion(-) create mode 100644 src/soc/ibm/power9/istep_8_9.c diff --git a/src/include/cpu/power/istep_18.h b/src/include/cpu/power/istep_18.h index 3e9c154f75b..a09faba5db5 100644 --- a/src/include/cpu/power/istep_18.h +++ b/src/include/cpu/power/istep_18.h @@ -3,6 +3,9 @@ #ifndef CPU_PPC64_ISTEP18_H #define CPU_PPC64_ISTEP18_H +/* Frequency of XBus for Nimbus */ +#define FREQ_X_MHZ 2000 + void istep_18_11(void); void istep_18_12(void); diff --git a/src/include/cpu/power/istep_8.h b/src/include/cpu/power/istep_8.h index 39427419213..1079d76455b 100644 --- a/src/include/cpu/power/istep_8.h +++ b/src/include/cpu/power/istep_8.h @@ -9,5 +9,11 @@ void istep_8_1(uint8_t chips); void istep_8_2(uint8_t chips); void istep_8_3(uint8_t chips); void istep_8_4(uint8_t chips); +void istep_8_9(uint8_t chips); + +/* These functions access SCOM of the second CPU using SBE IO, thus they can be + * used only in isteps that come after 8.4 */ +void put_scom(uint8_t chip, uint64_t addr, uint64_t data); +uint64_t get_scom(uint8_t chip, uint64_t addr); #endif /* CPU_PPC64_ISTEP8_H */ diff --git a/src/soc/ibm/power9/Makefile.inc b/src/soc/ibm/power9/Makefile.inc index 74d8493f19c..7e6af1c57ff 100644 --- a/src/soc/ibm/power9/Makefile.inc +++ b/src/soc/ibm/power9/Makefile.inc @@ -13,6 +13,7 @@ romstage-y += istep_8_1.c romstage-y += istep_8_2.c romstage-y += istep_8_3.c romstage-y += istep_8_4.c +romstage-y += istep_8_9.c romstage-y += istep_10_10.c romstage-y += istep_10_12.c romstage-y += istep_10_13.c diff --git a/src/soc/ibm/power9/istep_18_11.c b/src/soc/ibm/power9/istep_18_11.c index 430f1f08feb..2a3282da939 100644 --- a/src/soc/ibm/power9/istep_18_11.c +++ b/src/soc/ibm/power9/istep_18_11.c @@ -7,7 +7,6 @@ #include #define MDMT_TOD_GRID_CYCLE_STAGING_DELAY 6 -#define FREQ_X_MHZ 2000 #define TOD_GRID_PS 400 #define PERV_ROOT_CTRL8_TP_PLL_CLKIN_SEL9_DC 21 diff --git a/src/soc/ibm/power9/istep_8_9.c b/src/soc/ibm/power9/istep_8_9.c new file mode 100644 index 00000000000..bd8c8896c31 --- /dev/null +++ b/src/soc/ibm/power9/istep_8_9.c @@ -0,0 +1,388 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include + +#include +#include +#include +#include +#include + +#include "homer.h" +#include "sbeio.h" + +/* + * This code omits initialization of OBus which isn't present. It also assumes + * there is only one XBus (X1). Both of these statements are true for Nimbus + * Sforza. + * + * For consistency with Hostboot some read values are unused, written + * unmodified or ANDed with 0, this simplifies verification that the code + * operates correctly by comparing against Hostboot logs. + */ + +/* Updates address that targets XBus chiplet to use specified XBus link number. + * Does nothing to non-XBus addresses. */ +static uint64_t xbus_addr(uint8_t xbus, uint64_t addr) +{ + enum { + XBUS_COUNT = 0x3, // number of XBus links + XB_IOX_0_RING_ID = 0x3, // IOX_0 + XB_PBIOX_0_RING_ID = 0x6, // PBIOX_0 + }; + + uint8_t ring = (addr >> 10) & 0xF; + uint8_t chiplet = (addr >> 24) & 0x3F; + + if (chiplet != XB_CHIPLET_ID) + return addr; + + if (ring >= XB_IOX_0_RING_ID && ring < XB_IOX_0_RING_ID + XBUS_COUNT) + ring = XB_IOX_0_RING_ID + xbus; + else if (ring >= XB_PBIOX_0_RING_ID && ring < XB_PBIOX_0_RING_ID + XBUS_COUNT) + ring = XB_PBIOX_0_RING_ID + xbus; + + addr &= ~PPC_BITMASK(50, 53); + addr |= PPC_PLACE(ring, 50, 4); + + return addr; +} + +void put_scom(uint8_t chip, uint64_t addr, uint64_t data) +{ + addr = xbus_addr(/*xbus=*/1, addr); + + if (chip == 0) + write_scom(addr, data); + else + write_sbe_scom(chip, addr, data); +} + +uint64_t get_scom(uint8_t chip, uint64_t addr) +{ + addr = xbus_addr(/*xbus=*/1, addr); + + if (chip == 0) + return read_scom(addr); + else + return read_sbe_scom(chip, addr); +} + +static void p9_fbc_no_hp_scom(bool is_xbus_active, uint8_t chip) +{ + enum { + /* Power Bus PB West Mode Configuration Register */ + PB_WEST_MODE = 0x501180A, + /* Power Bus PB CENT Mode Register */ + PB_CENT_MODE = 0x5011C0A, + /* Power Bus PB CENT GP command RATE DP0 Register */ + PB_CENT_GP_CMD_RATE_DP0 = 0x5011C26, + /* Power Bus PB CENT GP command RATE DP1 Register */ + PB_CENT_GP_CMD_RATE_DP1 = 0x5011C27, + /* Power Bus PB CENT RGP command RATE DP0 Register */ + PB_CENT_RGP_CMD_RATE_DP0 = 0x5011C28, + /* Power Bus PB CENT RGP command RATE DP1 Register */ + PB_CENT_RGP_CMD_RATE_DP1 = 0x5011C29, + /* Power Bus PB CENT SP command RATE DP0 Register */ + PB_CENT_SP_CMD_RATE_DP0 = 0x5011C2A, + /* Power Bus PB CENT SP command RATE DP1 Register */ + PB_CENT_SP_CMD_RATE_DP1 = 0x5011C2B, + /* Power Bus PB East Mode Configuration Register */ + PB_EAST_MODE = 0x501200A, + + PB_CFG_CHIP_IS_SYSTEM = 4, + + PB_CFG_SP_HW_MARK = 16, + PB_CFG_SP_HW_MARK_LEN = 7, + + PB_CFG_GP_HW_MARK = 23, + PB_CFG_GP_HW_MARK_LEN = 7, + + PB_CFG_LCL_HW_MARK = 30, + PB_CFG_LCL_HW_MARK_LEN = 6, + }; + + /* + * ATTR_PROC_FABRIC_X_LINKS_CNFG + * Number of active XBus links: 1 for two CPUs, 0 for one CPU. + */ + const int num_x_links_cfg = (is_xbus_active ? 1 : 0); + + uint64_t pb_west_mode, pb_cent_mode, pb_east_mode; + uint64_t pb_cent_rgp_cmd_rate_dp0, pb_cent_rgp_cmd_rate_dp1; + uint64_t pb_cent_sp_cmd_rate_dp0, pb_cent_sp_cmd_rate_dp1; + + pb_west_mode = get_scom(chip, PB_WEST_MODE); + PPC_INSERT(pb_west_mode, (num_x_links_cfg == 0), PB_CFG_CHIP_IS_SYSTEM, 1); + PPC_INSERT(pb_west_mode, 0x3F, PB_CFG_SP_HW_MARK, PB_CFG_SP_HW_MARK_LEN); + PPC_INSERT(pb_west_mode, 0x3F, PB_CFG_GP_HW_MARK, PB_CFG_GP_HW_MARK_LEN); + PPC_INSERT(pb_west_mode, 0x2A, PB_CFG_LCL_HW_MARK, PB_CFG_LCL_HW_MARK_LEN); + put_scom(chip, PB_WEST_MODE, pb_west_mode); + + pb_cent_mode = get_scom(chip, PB_CENT_MODE); + PPC_INSERT(pb_cent_mode, (num_x_links_cfg == 0), PB_CFG_CHIP_IS_SYSTEM, 1); + PPC_INSERT(pb_cent_mode, 0x3F, PB_CFG_SP_HW_MARK, PB_CFG_SP_HW_MARK_LEN); + PPC_INSERT(pb_cent_mode, 0x3F, PB_CFG_GP_HW_MARK, PB_CFG_GP_HW_MARK_LEN); + PPC_INSERT(pb_cent_mode, 0x2A, PB_CFG_LCL_HW_MARK, PB_CFG_LCL_HW_MARK_LEN); + put_scom(chip, PB_CENT_MODE, pb_cent_mode); + + put_scom(chip, PB_CENT_GP_CMD_RATE_DP0, get_scom(chip, PB_CENT_GP_CMD_RATE_DP0) & 0); + put_scom(chip, PB_CENT_GP_CMD_RATE_DP1, get_scom(chip, PB_CENT_GP_CMD_RATE_DP1) & 0); + + (void)get_scom(chip, PB_CENT_RGP_CMD_RATE_DP0); + pb_cent_rgp_cmd_rate_dp0 = (num_x_links_cfg == 0 ? 0 : 0x030406080A0C1218); + put_scom(chip, PB_CENT_RGP_CMD_RATE_DP0, pb_cent_rgp_cmd_rate_dp0); + + (void)get_scom(chip, PB_CENT_RGP_CMD_RATE_DP1); + pb_cent_rgp_cmd_rate_dp1 = (num_x_links_cfg == 0 ? 0 : 0x040508080A0C1218); + put_scom(chip, PB_CENT_RGP_CMD_RATE_DP1, pb_cent_rgp_cmd_rate_dp1); + + pb_cent_sp_cmd_rate_dp0 = get_scom(chip, PB_CENT_SP_CMD_RATE_DP0); + pb_cent_sp_cmd_rate_dp0 = (num_x_links_cfg == 0 ? 0 : 0x030406080A0C1218); + put_scom(chip, PB_CENT_SP_CMD_RATE_DP0, pb_cent_sp_cmd_rate_dp0); + + pb_cent_sp_cmd_rate_dp1 = get_scom(chip, PB_CENT_SP_CMD_RATE_DP1); + pb_cent_sp_cmd_rate_dp1 = (num_x_links_cfg == 0 ? 0 : 0x030406080A0C1218); + put_scom(chip, PB_CENT_SP_CMD_RATE_DP1, pb_cent_sp_cmd_rate_dp1); + + pb_east_mode = get_scom(chip, PB_EAST_MODE); + PPC_INSERT(pb_east_mode, (num_x_links_cfg == 0), PB_CFG_CHIP_IS_SYSTEM, 1); + PPC_INSERT(pb_east_mode, 0x3F, PB_CFG_SP_HW_MARK, PB_CFG_SP_HW_MARK_LEN); + PPC_INSERT(pb_east_mode, 0x3F, PB_CFG_GP_HW_MARK, PB_CFG_GP_HW_MARK_LEN); + PPC_INSERT(pb_east_mode, 0x2A, PB_CFG_LCL_HW_MARK, PB_CFG_LCL_HW_MARK_LEN); + put_scom(chip, PB_EAST_MODE, pb_east_mode); +} + +static void p9_fbc_ioe_tl_scom(bool is_xbus_active, uint8_t chip) +{ + enum { + /* Processor bus Electrical Framer/Parser 01 configuration register */ + PB_FP01_CFG = 0x501340A, + /* Power Bus Electrical Framer/Parser 23 Configuration Register */ + PB_FP23_CFG = 0x501340B, + /* Power Bus Electrical Framer/Parser 45 Configuration Register */ + PB_FP45_CFG = 0x501340C, + /* Power Bus Electrical Link Data Buffer 01 Configuration Register */ + PB_ELINK_DATA_01_CFG_REG = 0x5013410, + /* Power Bus Electrical Link Data Buffer 23 Configuration Register */ + PB_ELINK_DATA_23_CFG_REG = 0x5013411, + /* Power Bus Electrical Link Data Buffer 45 Configuration Register */ + PB_ELINK_DATA_45_CFG_REG = 0x5013412, + /* Power Bus Electrical Miscellaneous Configuration Register */ + PB_MISC_CFG = 0x5013423, + /* Power Bus Electrical Link Trace Configuration Register */ + PB_TRACE_CFG = 0x5013424, + + FP0_FMR_DISABLE = 20, + FP0_PRS_DISABLE = 25, + FP1_FMR_DISABLE = 52, + FP1_PRS_DISABLE = 57, + + FP2_FMR_DISABLE = 20, + FP2_PRS_DISABLE = 25, + FP3_FMR_DISABLE = 52, + FP3_PRS_DISABLE = 57, + + FP4_FMR_DISABLE = 20, + FP4_PRS_DISABLE = 25, + FP5_FMR_DISABLE = 52, + FP5_PRS_DISABLE = 57, + + IOE01_IS_LOGICAL_PAIR = 0, + IOE23_IS_LOGICAL_PAIR = 1, + IOE45_IS_LOGICAL_PAIR = 2, + }; + + /* + * According to schematics we only support one XBus with + * ATTR_PROC_FABRIC_X_ATTACHED_CHIP_CNFG = { false, true, false } + * Meaning that X1 is present and X0 and X2 aren't. + */ + + const uint64_t pb_freq_mhz = powerbus_cfg()->fabric_freq; + + const uint64_t dd2_lo_limit_d = (FREQ_X_MHZ * 10); + const uint64_t dd2_lo_limit_n = pb_freq_mhz * 82; + + uint64_t pb_fp01_cfg, pb_fp23_cfg, pb_fp45_cfg; + uint64_t pb_elink_data_23_cfg_reg; + uint64_t pb_misc_cfg, pb_trace_cfg; + + pb_fp01_cfg = get_scom(chip, PB_FP01_CFG); + pb_fp01_cfg |= PPC_BIT(FP0_FMR_DISABLE); + pb_fp01_cfg |= PPC_BIT(FP0_PRS_DISABLE); + pb_fp01_cfg |= PPC_BIT(FP1_FMR_DISABLE); + pb_fp01_cfg |= PPC_BIT(FP1_PRS_DISABLE); + put_scom(chip, PB_FP01_CFG, pb_fp01_cfg); + + pb_fp23_cfg = get_scom(chip, PB_FP23_CFG); + + PPC_INSERT(pb_fp23_cfg, !is_xbus_active, FP2_FMR_DISABLE, 1); + PPC_INSERT(pb_fp23_cfg, !is_xbus_active, FP2_PRS_DISABLE, 1); + PPC_INSERT(pb_fp23_cfg, !is_xbus_active, FP3_FMR_DISABLE, 1); + PPC_INSERT(pb_fp23_cfg, !is_xbus_active, FP3_PRS_DISABLE, 1); + + if (is_xbus_active) { + PPC_INSERT(pb_fp23_cfg, 0x01, 22, 2); + PPC_INSERT(pb_fp23_cfg, 0x20, 12, 8); + PPC_INSERT(pb_fp23_cfg, 0x15 - (dd2_lo_limit_n / dd2_lo_limit_d), 4, 8); + PPC_INSERT(pb_fp23_cfg, 0x20, 44, 8); + PPC_INSERT(pb_fp23_cfg, 0x15 - (dd2_lo_limit_n / dd2_lo_limit_d), 36, 8); + } + + put_scom(chip, PB_FP23_CFG, pb_fp23_cfg); + + pb_fp45_cfg = get_scom(chip, PB_FP45_CFG); + pb_fp45_cfg |= PPC_BIT(FP4_FMR_DISABLE); + pb_fp45_cfg |= PPC_BIT(FP4_PRS_DISABLE); + pb_fp45_cfg |= PPC_BIT(FP5_FMR_DISABLE); + pb_fp45_cfg |= PPC_BIT(FP5_PRS_DISABLE); + put_scom(chip, PB_FP45_CFG, pb_fp45_cfg); + + put_scom(chip, PB_ELINK_DATA_01_CFG_REG, get_scom(chip, PB_ELINK_DATA_01_CFG_REG)); + + pb_elink_data_23_cfg_reg = get_scom(chip, PB_ELINK_DATA_23_CFG_REG); + if (is_xbus_active) { + PPC_INSERT(pb_elink_data_23_cfg_reg, 0x1F, 24, 5); + PPC_INSERT(pb_elink_data_23_cfg_reg, 0x40, 1, 7); + PPC_INSERT(pb_elink_data_23_cfg_reg, 0x40, 33, 7); + PPC_INSERT(pb_elink_data_23_cfg_reg, 0x3C, 9, 7); + PPC_INSERT(pb_elink_data_23_cfg_reg, 0x3C, 41, 7); + PPC_INSERT(pb_elink_data_23_cfg_reg, 0x3C, 17, 7); + PPC_INSERT(pb_elink_data_23_cfg_reg, 0x3C, 49, 7); + } + put_scom(chip, PB_ELINK_DATA_23_CFG_REG, pb_elink_data_23_cfg_reg); + + put_scom(chip, PB_ELINK_DATA_45_CFG_REG, get_scom(chip, PB_ELINK_DATA_45_CFG_REG)); + + pb_misc_cfg = get_scom(chip, PB_MISC_CFG); + PPC_INSERT(pb_misc_cfg, 0x00, IOE01_IS_LOGICAL_PAIR, 1); + PPC_INSERT(pb_misc_cfg, is_xbus_active, IOE23_IS_LOGICAL_PAIR, 1); + PPC_INSERT(pb_misc_cfg, 0x00, IOE45_IS_LOGICAL_PAIR, 1); + put_scom(chip, PB_MISC_CFG, pb_misc_cfg); + + pb_trace_cfg = get_scom(chip, PB_TRACE_CFG); + if (is_xbus_active) { + PPC_INSERT(pb_trace_cfg, 0x4, 16, 4); + PPC_INSERT(pb_trace_cfg, 0x4, 24, 4); + PPC_INSERT(pb_trace_cfg, 0x1, 20, 4); + PPC_INSERT(pb_trace_cfg, 0x1, 28, 4); + } + put_scom(chip, PB_TRACE_CFG, pb_trace_cfg); +} + +static void p9_fbc_ioe_dl_scom(uint8_t chip) +{ + enum { + /* ELL Configuration Register */ + IOEL_CONFIG = 0x601180A, + /* ELL Replay Threshold Register */ + IOEL_REPLAY_THRESHOLD = 0x6011818, + /* ELL SL ECC Threshold Register */ + IOEL_SL_ECC_THRESHOLD = 0x6011819, + + LL1_CONFIG_LINK_PAIR = 0, + LL1_CONFIG_CRC_LANE_ID = 2, + LL1_CONFIG_SL_UE_CRC_ERR = 4, + }; + + /* ATTR_LINK_TRAIN == fapi2::ENUM_ATTR_LINK_TRAIN_BOTH (from logs) */ + + uint64_t ioel_config, ioel_replay_threshold, ioel_sl_ecc_threshold; + + ioel_config = get_scom(chip, IOEL_CONFIG); + ioel_config |= PPC_BIT(LL1_CONFIG_LINK_PAIR); + ioel_config |= PPC_BIT(LL1_CONFIG_CRC_LANE_ID); + ioel_config |= PPC_BIT(LL1_CONFIG_SL_UE_CRC_ERR); + PPC_INSERT(ioel_config, 0xF, 11, 5); + PPC_INSERT(ioel_config, 0xF, 28, 4); + put_scom(chip, IOEL_CONFIG, ioel_config); + + ioel_replay_threshold = get_scom(chip, IOEL_REPLAY_THRESHOLD); + PPC_INSERT(ioel_replay_threshold, 0x7, 8, 3); + PPC_INSERT(ioel_replay_threshold, 0xF, 4, 4); + PPC_INSERT(ioel_replay_threshold, 0x6, 0, 4); + put_scom(chip, IOEL_REPLAY_THRESHOLD, ioel_replay_threshold); + + ioel_sl_ecc_threshold = get_scom(chip, IOEL_SL_ECC_THRESHOLD); + PPC_INSERT(ioel_sl_ecc_threshold, 0x7, 8, 3); + PPC_INSERT(ioel_sl_ecc_threshold, 0xF, 4, 4); + PPC_INSERT(ioel_sl_ecc_threshold, 0x7, 0, 4); + put_scom(chip, IOEL_SL_ECC_THRESHOLD, ioel_sl_ecc_threshold); +} + +static void chiplet_fabric_scominit(bool is_xbus_active, uint8_t chip) +{ + enum { + PU_PB_CENT_SM0_FIR_REG = 0x05011C00, + PU_PB_CENT_SM0_FIR_MASK_REG_SPARE_13 = 13, + + PU_PB_IOE_FIR_ACTION0_REG = 0x05013406, + FBC_IOE_TL_FIR_ACTION0 = 0x0000000000000000, + + PU_PB_IOE_FIR_ACTION1_REG = 0x05013407, + FBC_IOE_TL_FIR_ACTION1 = 0x0049000000000000, + + PU_PB_IOE_FIR_MASK_REG = 0x05013403, + FBC_IOE_TL_FIR_MASK = 0xFF24F0303FFFF11, + FBC_IOE_TL_FIR_MASK_X0_NF = 0x00C00C0C00000880, + FBC_IOE_TL_FIR_MASK_X2_NF = 0x000300C0C0000220, + + XBUS_LL0_IOEL_FIR_ACTION0_REG = 0x06011806, + FBC_IOE_DL_FIR_ACTION0 = 0x0000000000000000, + + XBUS_LL0_IOEL_FIR_ACTION1_REG = 0x06011807, + FBC_IOE_DL_FIR_ACTION1 = 0x0303C00000001FFC, + + XBUS_LL0_IOEL_FIR_MASK_REG = 0x06011803, + FBC_IOE_DL_FIR_MASK = 0xFCFC3FFFFFFFE003, + }; + + bool init_firs; + uint64_t fbc_cent_fir; + + /* Apply FBC non-hotplug initfile */ + p9_fbc_no_hp_scom(is_xbus_active, chip); + + /* Setup IOE (XBUS FBC IO) TL SCOMs */ + p9_fbc_ioe_tl_scom(is_xbus_active, chip); + + /* TL/DL FIRs are configured by us only if not already setup by SBE */ + fbc_cent_fir = get_scom(chip, PU_PB_CENT_SM0_FIR_REG); + init_firs = !(fbc_cent_fir & PPC_BIT(PU_PB_CENT_SM0_FIR_MASK_REG_SPARE_13)); + + if (init_firs) { + uint64_t fir_mask; + + put_scom(chip, PU_PB_IOE_FIR_ACTION0_REG, FBC_IOE_TL_FIR_ACTION0); + put_scom(chip, PU_PB_IOE_FIR_ACTION1_REG, FBC_IOE_TL_FIR_ACTION1); + + fir_mask = FBC_IOE_TL_FIR_MASK + | FBC_IOE_TL_FIR_MASK_X0_NF + | FBC_IOE_TL_FIR_MASK_X2_NF; + put_scom(chip, PU_PB_IOE_FIR_MASK_REG, fir_mask); + } + + /* Setup IOE (XBUS FBC IO) DL SCOMs */ + p9_fbc_ioe_dl_scom(chip); + + if (init_firs) { + put_scom(chip, XBUS_LL0_IOEL_FIR_ACTION0_REG, FBC_IOE_DL_FIR_ACTION0); + put_scom(chip, XBUS_LL0_IOEL_FIR_ACTION1_REG, FBC_IOE_DL_FIR_ACTION1); + put_scom(chip, XBUS_LL0_IOEL_FIR_MASK_REG, FBC_IOE_DL_FIR_MASK); + } +} + +void istep_8_9(uint8_t chips) +{ + printk(BIOS_EMERG, "starting istep 8.9\n"); + report_istep(8,9); + + /* Not skipping master chip and initializing it even if we don't have a second chip */ + for (uint8_t chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) + chiplet_fabric_scominit(/*is_xbus_active=*/chips == 0x03, chip); + } + + printk(BIOS_EMERG, "ending istep 8.9\n"); +} diff --git a/src/soc/ibm/power9/romstage.c b/src/soc/ibm/power9/romstage.c index b0e0e4fed5e..f4c357b6931 100644 --- a/src/soc/ibm/power9/romstage.c +++ b/src/soc/ibm/power9/romstage.c @@ -367,6 +367,7 @@ void main(void) istep_8_2(chips); istep_8_3(chips); istep_8_4(chips); + istep_8_9(chips); istep_10_10(&phb_active_mask, iovalid_enable); istep_10_12(); From 1e4b662f182b8852482556f43f721835a950cadf Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Sun, 2 Jan 2022 19:30:09 +0200 Subject: [PATCH 116/213] soc/power9/istep_8_10.c: scominits for XBus Change-Id: I8ad545aa99496875f0b880113a3bf88dc687e6ce Signed-off-by: Sergii Dmytruk --- src/include/cpu/power/istep_8.h | 1 + src/soc/ibm/power9/Makefile.inc | 1 + src/soc/ibm/power9/istep_8_10.c | 395 ++++++++++++++++++++++++++++++++ src/soc/ibm/power9/romstage.c | 1 + 4 files changed, 398 insertions(+) create mode 100644 src/soc/ibm/power9/istep_8_10.c diff --git a/src/include/cpu/power/istep_8.h b/src/include/cpu/power/istep_8.h index 1079d76455b..4063e3b5fb8 100644 --- a/src/include/cpu/power/istep_8.h +++ b/src/include/cpu/power/istep_8.h @@ -10,6 +10,7 @@ void istep_8_2(uint8_t chips); void istep_8_3(uint8_t chips); void istep_8_4(uint8_t chips); void istep_8_9(uint8_t chips); +void istep_8_10(uint8_t chips); /* These functions access SCOM of the second CPU using SBE IO, thus they can be * used only in isteps that come after 8.4 */ diff --git a/src/soc/ibm/power9/Makefile.inc b/src/soc/ibm/power9/Makefile.inc index 7e6af1c57ff..2f50e0b8b00 100644 --- a/src/soc/ibm/power9/Makefile.inc +++ b/src/soc/ibm/power9/Makefile.inc @@ -14,6 +14,7 @@ romstage-y += istep_8_2.c romstage-y += istep_8_3.c romstage-y += istep_8_4.c romstage-y += istep_8_9.c +romstage-y += istep_8_10.c romstage-y += istep_10_10.c romstage-y += istep_10_12.c romstage-y += istep_10_13.c diff --git a/src/soc/ibm/power9/istep_8_10.c b/src/soc/ibm/power9/istep_8_10.c new file mode 100644 index 00000000000..c32e5e7cb47 --- /dev/null +++ b/src/soc/ibm/power9/istep_8_10.c @@ -0,0 +1,395 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include + +#include +#include +#include + +static inline void and_scom(uint8_t chip, uint64_t addr, uint64_t mask) +{ + put_scom(chip, addr, get_scom(chip, addr) & mask); +} + +static inline void or_scom(uint8_t chip, uint64_t addr, uint64_t mask) +{ + put_scom(chip, addr, get_scom(chip, addr) | mask); +} + +static inline void and_or_scom(uint8_t chip, uint64_t addr, uint64_t and, uint64_t or) +{ + uint64_t data = get_scom(chip, addr); + data &= and; + data |= or; + put_scom(chip, addr, data); +} + +static void xbus_scom(uint8_t chip, uint8_t group) +{ + /* ATTR_IO_XBUS_CHAN_EQ is 0 by default and Hostboot logs seem to confirm this*/ + + /* ATTR_IO_XBUS_MASTER_MODE */ + const bool xbus_master_mode = (chip == 0); + /* + * Offset for group. + * + * Note that several registers are initialized for both groups and don't + * use the offset. Some other writes are group-specific and don't need + * it either. + */ + const uint64_t offset = group * 0x2000000000; + + int i; + + /* *_RX_DATA_DAC_SPARE_MODE_PL */ + for (i = 0; i < 18; i++) { + uint64_t addr = 0x8000000006010C3F + offset + 0x100000000 * i; + // 53 - *_RX_DAC_REGS_RX_DAC_REGS_RX_PL_DATA_DAC_SPARE_MODE_5_OFF + // 54 - *_RX_DAC_REGS_RX_DAC_REGS_RX_PL_DATA_DAC_SPARE_MODE_6_OFF + // 55 - *_RX_DAC_REGS_RX_DAC_REGS_RX_PL_DATA_DAC_SPARE_MODE_7_OFF + and_scom(chip, addr, ~PPC_BITMASK(53, 55)); + } + + /* *_RX_DAC_CNTL1_EO_PL */ + for (i = 0; i < 18; i++) { + uint64_t addr = 0x8000080006010C3F + offset + 0x100000000 * i; + // 54 - *_RX_DAC_REGS_RX_DAC_REGS_RX_LANE_ANA_PDWN_{OFF,ON} + if (i < 17) + and_scom(chip, addr, ~PPC_BIT(54)); + else + or_scom(chip, addr, PPC_BIT(54)); + } + + /* *_RX_DAC_CNTL5_EO_PL */ + for (i = 0; i < 18; i++) { + uint64_t addr = 0x8000280006010C3F + offset + 0x100000000 * i; + and_scom(chip, addr, + ~(PPC_BITMASK(48, 51) | PPC_BITMASK(52, 56) | PPC_BITMASK(57, 61))); + } + + /* *_RX_DAC_CNTL6_EO_PL */ + for (i = 0; i < 18; i++) { + uint64_t addr = 0x8000300006010C3F + offset + 0x100000000 * i; + and_or_scom(chip, addr, + ~(PPC_BITMASK(53, 56) | PPC_BITMASK(48, 52)), + PPC_PLACE(0x7, 53, 4) | PPC_PLACE(0x0C, 48, 5)); + } + + /* *_RX_DAC_CNTL9_E_PL */ + for (i = 0; i < 18; i++) { + uint64_t addr = 0x8000C00006010C3F + offset + 0x100000000 * i; + and_scom(chip, addr, ~(PPC_BITMASK(48, 54) | PPC_BITMASK(55, 60))); + } + + /* *_RX_BIT_MODE1_EO_PL */ + for (i = 0; i < 18; i++) { + uint64_t addr = 0x8002200006010C3F + offset + 0x100000000 * i; + // 48 - *_RX_BIT_REGS_RX_LANE_DIG_PDWN_{OFF,ON} + if (i < 17) + and_scom(chip, addr, ~PPC_BIT(48)); + else + or_scom(chip, addr, PPC_BIT(48)); + } + + /* *_RX_BIT_MODE1_E_PL */ + for (i = 0; i < 17; i++) { + uint64_t addr = 0x8002C00006010C3F + offset + 0x100000000 * i; + const uint16_t data[17] = { + 0x1000, 0xF03E, 0x07BC, 0x07C7, 0x03EF, 0x1F0F, 0x1800, 0x9C00, + 0x1000, 0x9C00, 0x1800, 0x1F0F, 0x03EF, 0x07C7, 0x07BC, 0xF03E, + 0x1000 + }; + and_or_scom(chip, addr, ~PPC_BITMASK(48, 63), PPC_PLACE(data[i], 48, 16)); + } + + /* *_RX_BIT_MODE2_E_PL */ + for (i = 0; i < 17; i++) { + uint64_t addr = 0x8002C80006010C3F + offset + 0x100000000 * i; + const uint8_t data[17] = { + 0x42, 0x3E, 0x00, 0x60, 0x40, 0x40, 0x03, 0x03, + 0x42, 0x03, 0x03, 0x40, 0x40, 0x60, 0x00, 0x3E, + 0x42 + }; + and_or_scom(chip, addr, ~PPC_BITMASK(48, 54), PPC_PLACE(data[i], 48, 7)); + } + + /* *_TX_MODE1_PL */ + for (i = 0; i < 17; i++) { + uint64_t addr = 0x8004040006010C3F + offset + 0x100000000 * i; + and_scom(chip, addr, ~PPC_BIT(48)); + } + + /* *_TX_MODE2_PL */ + for (i = 0; i < 17; i++) { + uint64_t addr = 0x80040C0006010C3F + offset + 0x100000000 * i; + or_scom(chip, addr, PPC_BIT(62)); + } + + /* *_TX_BIT_MODE1_E_PL */ + for (i = 0; i < 17; i++) { + uint64_t addr = 0x80043C0006010C3F + offset + 0x100000000 * i; + const uint16_t data[17] = { + 0x000, 0x000, 0x01E, 0x01F, 0x00F, 0x07C, 0xC63, 0xE73, + 0x000, 0xE73, 0xC63, 0x07C, 0x00F, 0x01F, 0x01E, 0x000, + 0x000, + }; + and_or_scom(chip, addr, ~PPC_BITMASK(48, 63), PPC_PLACE(data[i], 48, 16)); + } + + /* *_TX_BIT_MODE2_E_PL */ + for (i = 0; i < 17; i++) { + uint64_t addr = 0x8004440006010C3F + offset + 0x100000000 * i; + const uint8_t data[17] = { + 0x01, 0x7C, 0x7B, 0x0C, 0x5E, 0x10, 0x0C, 0x4E, + 0x01, 0x4E, 0x0C, 0x10, 0x5E, 0x0C, 0x7B, 0x7C, + 0x01, + }; + and_or_scom(chip, addr, ~PPC_BITMASK(48, 54), PPC_PLACE(data[i], 48, 7)); + } + + // P9A_XBUS_0_RX[01]_RX_SPARE_MODE_PG + // 49 - IOF1_RX_RX0_RXCTL_CTL_REGS_RX_CTL_REGS_RX_PG_SPARE_MODE_1_ON + or_scom(chip, 0x8008000006010C3F + offset, PPC_BIT(49)); + + // P9A_XBUS_0_RX[01]_RX_ID1_PG + and_or_scom(chip, 0x8008080006010C3F + offset, + ~PPC_BITMASK(48, 53), + PPC_PLACE((group == 0 ? 0x00 : 0x01), 48, 6)); + + // P9A_XBUS_0_RX[01]_RX_CTL_MODE1_EO_PG + // 48 - IOF1_RX_RX0_RXCTL_CTL_REGS_RX_CTL_REGS_RX_CLKDIST_PDWN_OFF + and_scom(chip, 0x8008100006010C3F + offset, ~PPC_BIT(48)); + + // P9A_XBUS_0_RX[01]_RX_CTL_MODE5_EO_PG + // 51-53 - IOF1_RX_RX0_RXCTL_CTL_REGS_RX_CTL_REGS_RX_DYN_RECAL_INTERVAL_TIMEOUT_SEL_TAP5 + // 54-55 - IOF1_RX_RX0_RXCTL_CTL_REGS_RX_CTL_REGS_RX_DYN_RECAL_STATUS_RPT_TIMEOUT_SEL_TAP1 + and_or_scom(chip, 0x8008300006010C3F + offset, + ~(PPC_BITMASK(51, 53) | PPC_BITMASK(54, 55)), + PPC_PLACE(0x5, 51, 3) | PPC_PLACE(0x1, 54, 2)); + + // P9A_XBUS_0_RX[01]_RX_CTL_MODE7_EO_PG + and_or_scom(chip, 0x8008400006010C3F + offset, + ~PPC_BITMASK(60, 63), + PPC_PLACE(0xA, 60, 4)); + + // P9A_XBUS_0_RX0_RX_CTL_MODE23_EO_PG (same address for both groups) + // 55 - IOF1_RX_RX0_RXCTL_CTL_REGS_RX_CTL_REGS_RX_PEAK_TUNE_OFF + // 56 - IOF1_RX_RX0_RXCTL_CTL_REGS_RX_CTL_REGS_RX_LTE_EN_ON + // 59 - IOF1_RX_RX0_RXCTL_CTL_REGS_RX_CTL_REGS_RX_DFEHISPD_EN_ON + // 60 - IOF1_RX_RX0_RXCTL_CTL_REGS_RX_CTL_REGS_RX_DFE12_EN_ON + if (group == 0) { + and_or_scom(chip, 0x8008C00006010C3F, + ~(PPC_BITMASK(48, 49) | PPC_BITMASK(55, 60)), + PPC_PLACE(0x1, 48, 2) | PPC_BIT(56) | PPC_PLACE(0x3, 57, 2) | + PPC_BIT(59) | PPC_BIT(60)); + } else { + and_or_scom(chip, 0x8008C00006010C3F, + ~PPC_BITMASK(48, 49), PPC_PLACE(0x1, 48, 2)); + } + + // P9A_XBUS_0_RX0_RX_CTL_MODE23_EO_PG + // 55 - IOF1_RX_RX1_RXCTL_CTL_REGS_RX_CTL_REGS_RX_PEAK_TUNE_OFF + // 56 - IOF1_RX_RX1_RXCTL_CTL_REGS_RX_CTL_REGS_RX_LTE_EN_ON + // 57 - 0b11 + // 59 - IOF1_RX_RX1_RXCTL_CTL_REGS_RX_CTL_REGS_RX_DFEHISPD_EN_ON + // 60 - IOF1_RX_RX1_RXCTL_CTL_REGS_RX_CTL_REGS_RX_DFE12_EN_ON + if (group == 1) { + and_or_scom(chip, 0x8008C02006010C3F, + ~PPC_BITMASK(55, 60), + PPC_BIT(56) | PPC_PLACE(0x3, 57, 2) | PPC_BIT(59) | PPC_BIT(60)); + } + + // P9A_XBUS_0_RX0_RX_CTL_MODE29_EO_PG (identical for both groups) + and_or_scom(chip, 0x8008D00006010C3F + offset, + ~(PPC_BITMASK(48, 55) | PPC_BITMASK(56, 63)), + PPC_PLACE(0x66, 48, 8) | PPC_PLACE(0x44, 56, 8)); + + // P9A_XBUS_0_RX[01]_RX_CTL_MODE27_EO_PG + // 48 - IOF1_RX_RX0_RXCTL_CTL_REGS_RX_CTL_REGS_RX_RC_ENABLE_CTLE_1ST_LATCH_OFFSET_CAL_ON + or_scom(chip, 0x8009700006010C3F + offset, PPC_BIT(48)); + + // P9A_XBUS_0_RX[01]_RX_ID2_PG + and_or_scom(chip, 0x8009800006010C3F + offset, + ~(PPC_BITMASK(49, 55) | PPC_BITMASK(57, 63)), + PPC_PLACE(0x00, 49, 7) | PPC_PLACE(0x10, 57, 7)); + + // P9A_XBUS_0_RX[01]_RX_CTL_MODE1_E_PG + // 48 - IOF1_RX_RX0_RXCTL_CTL_REGS_RX_CTL_REGS_RX_MASTER_MODE_MASTER + // 57 - IOF1_RX_RX0_RXCTL_CTL_REGS_RX_CTL_REGS_RX_FENCE_FENCED + // 58 - IOF1_RX_RX0_RXCTL_CTL_REGS_RX_CTL_REGS_RX_PDWN_LITE_DISABLE_ON + or_scom(chip, 0x8009900006010C3F + offset, + (xbus_master_mode ? PPC_BIT(48) : 0) | PPC_BIT(57) | PPC_BIT(58)); + + // P9A_XBUS_0_RX[01]_RX_CTL_MODE2_E_PG + and_or_scom(chip, 0x8009980006010C3F + offset, + ~PPC_BITMASK(48, 52), PPC_PLACE(0x01, 48, 5)); + + // P9A_XBUS_0_RX[01]_RX_CTL_MODE3_E_PG + and_or_scom(chip, 0x8009A00006010C3F + offset, + ~PPC_BITMASK(48, 51), PPC_PLACE(0xB, 48, 4)); + + // P9A_XBUS_0_RX[01]_RX_CTL_MODE5_E_PG + and_or_scom(chip, 0x8009B00006010C3F + offset, + ~PPC_BITMASK(52, 55), PPC_PLACE(0x1, 52, 4)); + + // P9A_XBUS_0_RX[01]_RX_CTL_MODE6_E_PG + and_or_scom(chip, 0x8009B80006010C3F + offset, + ~(PPC_BITMASK(48, 54) | PPC_BITMASK(55, 61)), + PPC_PLACE(0x11, 48, 7) | PPC_PLACE(0x11, 55, 7)); + + // P9A_XBUS_0_RX[01]_RX_CTL_MODE8_E_PG + // 55-58 - IOF1_RX_RX0_RXCTL_CTL_REGS_RX_CTL_REGS_RX_DYN_RPR_ERR_CNTR1_DURATION_TAP5 + and_or_scom(chip, 0x8009C80006010C3F + offset, + ~(PPC_BITMASK(48, 54) | PPC_BITMASK(55, 58) | PPC_BITMASK(61, 63)), + PPC_PLACE(0xF, 48, 7) | PPC_PLACE(0x5, 55, 4) | PPC_PLACE(0x5, 61, 3)); + + // P9A_XBUS_0_RX[01]_RX_CTL_MODE9_E_PG + // 55-58 - IOF1_RX_RX0_RXCTL_CTL_REGS_RX_CTL_REGS_RX_DYN_RPR_ERR_CNTR2_DURATION_TAP5 + and_or_scom(chip, 0x8009D00006010C3F + offset, + ~(PPC_BITMASK(48, 54) | PPC_BITMASK(55, 58)), + PPC_PLACE(0x3F, 48, 7) | PPC_PLACE(0x5, 55, 4)); + + // P9A_XBUS_0_RX[01]_RX_CTL_MODE11_E_PG + and_scom(chip, 0x8009E00006010C3F + offset, ~PPC_BITMASK(48, 63)); + + // P9A_XBUS_0_RX[01]_RX_CTL_MODE12_E_PG + and_or_scom(chip, 0x8009E80006010C3F + offset, + ~PPC_BITMASK(48, 55), PPC_PLACE(0x7F, 48, 8)); + + // P9A_XBUS_0_RX[01]_RX_GLBSM_SPARE_MODE_PG + // 50 - IOF1_RX_RX0_RXCTL_GLBSM_REGS_RX_PG_GLBSM_SPARE_MODE_2_ON + // 56 - IOF1_RX_RX0_RXCTL_GLBSM_REGS_RX_DESKEW_BUMP_AFTER_AFTER + or_scom(chip, 0x800A800006010C3F + offset, PPC_BIT(50) | PPC_BIT(56)); + + // P9A_XBUS_0_RX[01]_RX_GLBSM_CNTL3_EO_PG + and_or_scom(chip, 0x800AE80006010C3F + offset, + ~PPC_BITMASK(56, 57), PPC_PLACE(0x2, 56, 2)); + + // P9A_XBUS_0_RX[01]_RX_GLBSM_MODE1_EO_PG + and_or_scom(chip, 0x800AF80006010C3F + offset, + ~(PPC_BITMASK(48, 51) | PPC_BITMASK(52, 55)), + PPC_PLACE(0xC, 48, 4) | PPC_PLACE(0xC, 52, 4)); + + // P9A_XBUS_0_RX[01]_RX_DATASM_SPARE_MODE_PG + // 60 - IOF1_RX_RX0_RXCTL_DATASM_DATASM_REGS_RX_CTL_DATASM_CLKDIST_PDWN_OFF + and_scom(chip, 0x800B800006010C3F + offset, ~PPC_BIT(60)); + + // P9A_XBUS_0_TX[01]_TX_SPARE_MODE_PG + and_scom(chip, 0x800C040006010C3F + offset, ~PPC_BITMASK(56, 57)); + + // P9A_XBUS_0_TX[01]_TX_ID1_PG + and_or_scom(chip, 0x800C0C0006010C3F + offset, + ~PPC_BITMASK(48, 53), + PPC_PLACE((group == 0 ? 0x00 : 0x01), 48, 6)); + + // P9A_XBUS_0_TX[01]_TX_CTL_MODE1_EO_PG + // 48 - IOF1_TX_WRAP_TX0_TXCTL_CTL_REGS_TX_CTL_REGS_TX_CLKDIST_PDWN_OFF + // 59 - IOF1_TX_WRAP_TX0_TXCTL_CTL_REGS_TX_CTL_REGS_TX_PDWN_LITE_DISABLE_ON + and_or_scom(chip, 0x800C140006010C3F + offset, + ~(PPC_BIT(48) | PPC_BITMASK(53, 57) | PPC_BIT(59)), + PPC_PLACE(0x01, 53, 5) | PPC_BIT(59)); + + // P9A_XBUS_0_TX[01]_TX_CTL_MODE2_EO_PG + and_or_scom(chip, 0x800C1C0006010C3F + offset, + ~PPC_BITMASK(56, 62), PPC_PLACE(0x11, 56, 7)); + + // P9A_XBUS_0_TX[01]_TX_CTL_CNTLG1_EO_PG + // 48-49 - IOF1_TX_WRAP_TX0_TXCTL_CTL_REGS_TX_CTL_REGS_TX_DRV_CLK_PATTERN_GCRMSG_DRV_0S + and_scom(chip, 0x800C240006010C3F + offset, ~PPC_BITMASK(48, 49)); + + // P9A_XBUS_0_TX[01]_TX_ID2_PG + and_or_scom(chip, 0x800C840006010C3F + offset, + ~(PPC_BITMASK(49, 55) | PPC_BITMASK(57, 63)), + PPC_PLACE(0x0, 49, 7) | PPC_PLACE(0x10, 57, 7)); + + // P9A_XBUS_0_TX[01]_TX_CTL_MODE1_E_PG + // 55-57 - IOF1_TX_WRAP_TX0_TXCTL_CTL_REGS_TX_CTL_REGS_TX_DYN_RECAL_INTERVAL_TIMEOUT_SEL_TAP5 + // 58-59 - IOF1_TX_WRAP_TX0_TXCTL_CTL_REGS_TX_CTL_REGS_TX_DYN_RECAL_STATUS_RPT_TIMEOUT_SEL_TAP1 + and_or_scom(chip, 0x800C8C0006010C3F + offset, + ~(PPC_BITMASK(55, 57) | PPC_BITMASK(58, 59)), + PPC_PLACE(0x5, 55, 3) | PPC_PLACE(0x1, 58, 2)); + + // P9A_XBUS_0_TX[01]_TX_CTL_MODE2_E_PG + and_scom(chip, 0x800CEC0006010C3F + offset, ~PPC_BITMASK(48, 63)); + + // P9A_XBUS_0_TX[01]_TX_CTL_MODE3_E_PG + and_or_scom(chip, 0x800CF40006010C3F + offset, + ~PPC_BITMASK(48, 55), PPC_PLACE(0x7F, 48, 8)); + + // P9A_XBUS_0_TX[01]_TX_CTLSM_MODE1_EO_PG + // 59 - IOF1_TX_WRAP_TX0_TXCTL_TX_CTL_SM_REGS_TX_FFE_BOOST_EN_ON + or_scom(chip, 0x800D2C0006010C3F + offset, PPC_BIT(59)); + + // P9A_XBUS_0_TX_IMPCAL_P_4X_PB (identical for both groups) + and_or_scom(chip, 0x800F1C0006010C3F, + ~PPC_BITMASK(48, 54), PPC_PLACE(0x0E, 48, 5)); +} + +static void set_msb_swap(uint8_t chip, int group) +{ + enum { + TX_CTL_MODE1_EO_PG = 0x800C140006010C3F, + EDIP_TX_MSBSWAP = 58, + }; + + const uint64_t offset = group * 0x2000000000; + + /* ATTR_EI_BUS_TX_MSBSWAP seems to be 0x80 which is GROUP_0_SWAP */ + if (group == 0) + or_scom(chip, TX_CTL_MODE1_EO_PG + offset, PPC_BIT(EDIP_TX_MSBSWAP)); + else + and_scom(chip, TX_CTL_MODE1_EO_PG + offset, ~PPC_BIT(EDIP_TX_MSBSWAP)); +} + +static void xbus_scominit(int group) +{ + enum { + PU_PB_CENT_SM0_PB_CENT_FIR_REG = 0x05011C00, + + XBUS_PHY_FIR_ACTION0 = 0x0000000000000000ULL, + XBUS_FIR_ACTION0_REG = 0x06010C06, + XBUS_PHY_FIR_ACTION1 = 0x2068680000000000ULL, + XBUS_FIR_ACTION1_REG = 0x06010C07, + XBUS_PHY_FIR_MASK = 0xDF9797FFFFFFC000ULL, + XBUS_FIR_MASK_REG = 0x06010C03, + + EDIP_RX_IORESET = 0x8009F80006010C3F, + EDIP_TX_IORESET = 0x800C9C0006010C3F, + }; + + const uint64_t offset = group * 0x2000000000; + + /* Assert IO reset to power-up bus endpoint logic */ + or_scom(0, EDIP_RX_IORESET + offset, PPC_BIT(52)); + or_scom(1, EDIP_RX_IORESET + offset, PPC_BIT(52)); + udelay(50); + or_scom(0, EDIP_TX_IORESET + offset, PPC_BIT(48)); + or_scom(1, EDIP_TX_IORESET + offset, PPC_BIT(48)); + udelay(50); + + set_msb_swap(/*chip=*/0, group); + set_msb_swap(/*chip=*/1, group); + + xbus_scom(/*chip=*/0, group); + xbus_scom(/*chip=*/1, group); + + /* PU_PB_CENT_SM0_PB_CENT_FIR_MASK_REG_SPARE_13 */ + if (!(get_scom(/*chip=*/0, PU_PB_CENT_SM0_PB_CENT_FIR_REG) & PPC_BIT(13))) { + put_scom(/*chip=*/0, XBUS_FIR_ACTION0_REG, XBUS_PHY_FIR_ACTION0); + put_scom(/*chip=*/0, XBUS_FIR_ACTION1_REG, XBUS_PHY_FIR_ACTION1); + put_scom(/*chip=*/0, XBUS_FIR_MASK_REG, XBUS_PHY_FIR_MASK); + } +} + +void istep_8_10(uint8_t chips) +{ + printk(BIOS_EMERG, "starting istep 8.10\n"); + report_istep(8,10); + + if (chips != 0x01) { + xbus_scominit(/*group=*/0); + xbus_scominit(/*group=*/1); + } + + printk(BIOS_EMERG, "ending istep 8.10\n"); +} diff --git a/src/soc/ibm/power9/romstage.c b/src/soc/ibm/power9/romstage.c index f4c357b6931..65e9398c19d 100644 --- a/src/soc/ibm/power9/romstage.c +++ b/src/soc/ibm/power9/romstage.c @@ -368,6 +368,7 @@ void main(void) istep_8_3(chips); istep_8_4(chips); istep_8_9(chips); + istep_8_10(chips); istep_10_10(&phb_active_mask, iovalid_enable); istep_10_12(); From eb5e856d7359f6b86facb55f6bea3a5f97c61f04 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Sun, 2 Jan 2022 19:31:57 +0200 Subject: [PATCH 117/213] soc/power9/istep_8_11.c: enable RI/DI for XBus Change-Id: I8e1f18080ecf11caea23890a826969394eaa658d Signed-off-by: Sergii Dmytruk --- src/include/cpu/power/istep_8.h | 1 + src/soc/ibm/power9/Makefile.inc | 1 + src/soc/ibm/power9/istep_8_11.c | 41 +++++++++++++++++++++++++++++++++ src/soc/ibm/power9/romstage.c | 1 + 4 files changed, 44 insertions(+) create mode 100644 src/soc/ibm/power9/istep_8_11.c diff --git a/src/include/cpu/power/istep_8.h b/src/include/cpu/power/istep_8.h index 4063e3b5fb8..92b2e252f95 100644 --- a/src/include/cpu/power/istep_8.h +++ b/src/include/cpu/power/istep_8.h @@ -11,6 +11,7 @@ void istep_8_3(uint8_t chips); void istep_8_4(uint8_t chips); void istep_8_9(uint8_t chips); void istep_8_10(uint8_t chips); +void istep_8_11(uint8_t chips); /* These functions access SCOM of the second CPU using SBE IO, thus they can be * used only in isteps that come after 8.4 */ diff --git a/src/soc/ibm/power9/Makefile.inc b/src/soc/ibm/power9/Makefile.inc index 2f50e0b8b00..76f6fa9357a 100644 --- a/src/soc/ibm/power9/Makefile.inc +++ b/src/soc/ibm/power9/Makefile.inc @@ -15,6 +15,7 @@ romstage-y += istep_8_3.c romstage-y += istep_8_4.c romstage-y += istep_8_9.c romstage-y += istep_8_10.c +romstage-y += istep_8_11.c romstage-y += istep_10_10.c romstage-y += istep_10_12.c romstage-y += istep_10_13.c diff --git a/src/soc/ibm/power9/istep_8_11.c b/src/soc/ibm/power9/istep_8_11.c new file mode 100644 index 00000000000..dd6456b2f3b --- /dev/null +++ b/src/soc/ibm/power9/istep_8_11.c @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include + +#include +#include + +#include "homer.h" + +static void xbus_enable_ridi(uint8_t chip) +{ + enum { + PERV_NET_CTRL0 = 0x060F0040, + PERV_NET_CTRL0_WOR = 0x060F0042, + }; + + /* Getting NET_CTRL0 register value and checking its CHIPLET_ENABLE bit */ + if (get_scom(chip, PERV_NET_CTRL0) & PPC_BIT(0)) { + /* Enable Recievers, Drivers DI1 & DI2 */ + uint64_t val = 0; + val |= PPC_BIT(19); // NET_CTRL0.RI_N = 1 + val |= PPC_BIT(20); // NET_CTRL0.DI1_N = 1 + val |= PPC_BIT(21); // NET_CTRL0.DI2_N = 1 + put_scom(chip, PERV_NET_CTRL0_WOR, val); + } +} + +void istep_8_11(uint8_t chips) +{ + uint8_t chip; + + printk(BIOS_EMERG, "starting istep 8.11\n"); + report_istep(8,11); + + for (chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) + xbus_enable_ridi(chip); + } + + printk(BIOS_EMERG, "ending istep 8.11\n"); +} diff --git a/src/soc/ibm/power9/romstage.c b/src/soc/ibm/power9/romstage.c index 65e9398c19d..f02ce0f33c7 100644 --- a/src/soc/ibm/power9/romstage.c +++ b/src/soc/ibm/power9/romstage.c @@ -369,6 +369,7 @@ void main(void) istep_8_4(chips); istep_8_9(chips); istep_8_10(chips); + istep_8_11(chips); istep_10_10(&phb_active_mask, iovalid_enable); istep_10_12(); From 847c578451ba4790f3a30c1d0566063cd1098100 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Sat, 8 Jan 2022 18:21:32 +0200 Subject: [PATCH 118/213] soc/power9/xbus.c: extract XBus helpers to a unit Same kind of SCOM accesses are used since SBE of the second CPU has started and until it can be accessed via XSCOM. There are also a couple of XBus-related constants. Change-Id: Ieadbd29c15a435a59d24fd490c93cda6c1945bf7 Signed-off-by: Sergii Dmytruk --- src/include/cpu/power/istep_8.h | 5 --- src/soc/ibm/power9/Makefile.inc | 1 + src/soc/ibm/power9/istep_8_10.c | 24 ++--------- src/soc/ibm/power9/istep_8_11.c | 1 + src/soc/ibm/power9/istep_8_9.c | 49 +-------------------- src/soc/ibm/power9/xbus.c | 55 ++++++++++++++++++++++++ src/soc/ibm/power9/xbus.h | 75 +++++++++++++++++++++++++++++++++ 7 files changed, 137 insertions(+), 73 deletions(-) create mode 100644 src/soc/ibm/power9/xbus.c create mode 100644 src/soc/ibm/power9/xbus.h diff --git a/src/include/cpu/power/istep_8.h b/src/include/cpu/power/istep_8.h index 92b2e252f95..4eaef81a9f8 100644 --- a/src/include/cpu/power/istep_8.h +++ b/src/include/cpu/power/istep_8.h @@ -13,9 +13,4 @@ void istep_8_9(uint8_t chips); void istep_8_10(uint8_t chips); void istep_8_11(uint8_t chips); -/* These functions access SCOM of the second CPU using SBE IO, thus they can be - * used only in isteps that come after 8.4 */ -void put_scom(uint8_t chip, uint64_t addr, uint64_t data); -uint64_t get_scom(uint8_t chip, uint64_t addr); - #endif /* CPU_PPC64_ISTEP8_H */ diff --git a/src/soc/ibm/power9/Makefile.inc b/src/soc/ibm/power9/Makefile.inc index 76f6fa9357a..3b8c52113df 100644 --- a/src/soc/ibm/power9/Makefile.inc +++ b/src/soc/ibm/power9/Makefile.inc @@ -38,6 +38,7 @@ romstage-y += mcbist.c romstage-y += timer.c romstage-y += fsi.c romstage-y += sbeio.c +romstage-y += xbus.c ramstage-y += chip.c ramstage-y += homer.c ramstage-y += rom_media.c diff --git a/src/soc/ibm/power9/istep_8_10.c b/src/soc/ibm/power9/istep_8_10.c index c32e5e7cb47..263e7a00d7c 100644 --- a/src/soc/ibm/power9/istep_8_10.c +++ b/src/soc/ibm/power9/istep_8_10.c @@ -6,23 +6,7 @@ #include #include -static inline void and_scom(uint8_t chip, uint64_t addr, uint64_t mask) -{ - put_scom(chip, addr, get_scom(chip, addr) & mask); -} - -static inline void or_scom(uint8_t chip, uint64_t addr, uint64_t mask) -{ - put_scom(chip, addr, get_scom(chip, addr) | mask); -} - -static inline void and_or_scom(uint8_t chip, uint64_t addr, uint64_t and, uint64_t or) -{ - uint64_t data = get_scom(chip, addr); - data &= and; - data |= or; - put_scom(chip, addr, data); -} +#include "xbus.h" static void xbus_scom(uint8_t chip, uint8_t group) { @@ -37,7 +21,7 @@ static void xbus_scom(uint8_t chip, uint8_t group) * use the offset. Some other writes are group-specific and don't need * it either. */ - const uint64_t offset = group * 0x2000000000; + const uint64_t offset = group * XBUS_LINK_GROUP_OFFSET; int i; @@ -332,7 +316,7 @@ static void set_msb_swap(uint8_t chip, int group) EDIP_TX_MSBSWAP = 58, }; - const uint64_t offset = group * 0x2000000000; + const uint64_t offset = group * XBUS_LINK_GROUP_OFFSET; /* ATTR_EI_BUS_TX_MSBSWAP seems to be 0x80 which is GROUP_0_SWAP */ if (group == 0) @@ -357,7 +341,7 @@ static void xbus_scominit(int group) EDIP_TX_IORESET = 0x800C9C0006010C3F, }; - const uint64_t offset = group * 0x2000000000; + const uint64_t offset = group * XBUS_LINK_GROUP_OFFSET; /* Assert IO reset to power-up bus endpoint logic */ or_scom(0, EDIP_RX_IORESET + offset, PPC_BIT(52)); diff --git a/src/soc/ibm/power9/istep_8_11.c b/src/soc/ibm/power9/istep_8_11.c index dd6456b2f3b..d7c5081bc71 100644 --- a/src/soc/ibm/power9/istep_8_11.c +++ b/src/soc/ibm/power9/istep_8_11.c @@ -6,6 +6,7 @@ #include #include "homer.h" +#include "xbus.h" static void xbus_enable_ridi(uint8_t chip) { diff --git a/src/soc/ibm/power9/istep_8_9.c b/src/soc/ibm/power9/istep_8_9.c index bd8c8896c31..d919e345f10 100644 --- a/src/soc/ibm/power9/istep_8_9.c +++ b/src/soc/ibm/power9/istep_8_9.c @@ -9,7 +9,7 @@ #include #include "homer.h" -#include "sbeio.h" +#include "xbus.h" /* * This code omits initialization of OBus which isn't present. It also assumes @@ -21,53 +21,6 @@ * operates correctly by comparing against Hostboot logs. */ -/* Updates address that targets XBus chiplet to use specified XBus link number. - * Does nothing to non-XBus addresses. */ -static uint64_t xbus_addr(uint8_t xbus, uint64_t addr) -{ - enum { - XBUS_COUNT = 0x3, // number of XBus links - XB_IOX_0_RING_ID = 0x3, // IOX_0 - XB_PBIOX_0_RING_ID = 0x6, // PBIOX_0 - }; - - uint8_t ring = (addr >> 10) & 0xF; - uint8_t chiplet = (addr >> 24) & 0x3F; - - if (chiplet != XB_CHIPLET_ID) - return addr; - - if (ring >= XB_IOX_0_RING_ID && ring < XB_IOX_0_RING_ID + XBUS_COUNT) - ring = XB_IOX_0_RING_ID + xbus; - else if (ring >= XB_PBIOX_0_RING_ID && ring < XB_PBIOX_0_RING_ID + XBUS_COUNT) - ring = XB_PBIOX_0_RING_ID + xbus; - - addr &= ~PPC_BITMASK(50, 53); - addr |= PPC_PLACE(ring, 50, 4); - - return addr; -} - -void put_scom(uint8_t chip, uint64_t addr, uint64_t data) -{ - addr = xbus_addr(/*xbus=*/1, addr); - - if (chip == 0) - write_scom(addr, data); - else - write_sbe_scom(chip, addr, data); -} - -uint64_t get_scom(uint8_t chip, uint64_t addr) -{ - addr = xbus_addr(/*xbus=*/1, addr); - - if (chip == 0) - return read_scom(addr); - else - return read_sbe_scom(chip, addr); -} - static void p9_fbc_no_hp_scom(bool is_xbus_active, uint8_t chip) { enum { diff --git a/src/soc/ibm/power9/xbus.c b/src/soc/ibm/power9/xbus.c new file mode 100644 index 00000000000..22aa182a3aa --- /dev/null +++ b/src/soc/ibm/power9/xbus.c @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include "xbus.h" + +#include +#include + +#include "sbeio.h" + +/* Updates address that targets XBus chiplet to use specified XBus link number. + * Does nothing to non-XBus addresses. */ +static uint64_t xbus_addr(uint8_t xbus, uint64_t addr) +{ + enum { + XBUS_COUNT = 0x3, // number of XBus links + XB_IOX_0_RING_ID = 0x3, // IOX_0 + XB_PBIOX_0_RING_ID = 0x6, // PBIOX_0 + }; + + uint8_t ring = (addr >> 10) & 0xF; + uint8_t chiplet = (addr >> 24) & 0x3F; + + if (chiplet != XB_CHIPLET_ID) + return addr; + + if (ring >= XB_IOX_0_RING_ID && ring < XB_IOX_0_RING_ID + XBUS_COUNT) + ring = XB_IOX_0_RING_ID + xbus; + else if (ring >= XB_PBIOX_0_RING_ID && ring < XB_PBIOX_0_RING_ID + XBUS_COUNT) + ring = XB_PBIOX_0_RING_ID + xbus; + + addr &= ~PPC_BITMASK(50, 53); + addr |= PPC_PLACE(ring, 50, 4); + + return addr; +} + +void put_scom(uint8_t chip, uint64_t addr, uint64_t data) +{ + addr = xbus_addr(/*xbus=*/1, addr); + + if (chip == 0) + write_scom(addr, data); + else + write_sbe_scom(chip, addr, data); +} + +uint64_t get_scom(uint8_t chip, uint64_t addr) +{ + addr = xbus_addr(/*xbus=*/1, addr); + + if (chip == 0) + return read_scom(addr); + else + return read_sbe_scom(chip, addr); +} diff --git a/src/soc/ibm/power9/xbus.h b/src/soc/ibm/power9/xbus.h new file mode 100644 index 00000000000..6455b95fcfa --- /dev/null +++ b/src/soc/ibm/power9/xbus.h @@ -0,0 +1,75 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __SOC_IBM_POWER9_XBUS_H +#define __SOC_IBM_POWER9_XBUS_H + +#include + +/* + * Define DEBUG_XBUS before including this header to get debug prints from this + * unit + */ + +#define XBUS_LANE_COUNT 17 + +#define XBUS_LINK_GROUP_OFFSET 0x2000000000 + +/* + * The API below is meant to be used after SBE for the second CPU is up (so + * after istep 8.4), but prior to XSCOM working for it, which covers range of + * isteps that initialize XBus and SMP. + * + * The functions use XSCOM for the first CPU and SBE IO for the second one. When + * SCOM address targets XBus chiplet, ring part of the address is updated to + * XBus link #1 if necessary (addresses in code use link #0, which also matches + * Hostboot logs). + * + * No need to use this interface once Powerbus is activated (after istep 10.1) + * and XSCOM can access SCOMs on both CPUs. + */ + +void put_scom(uint8_t chip, uint64_t addr, uint64_t data); +uint64_t get_scom(uint8_t chip, uint64_t addr); + +#ifdef DEBUG_XBUS +#include + +#define put_scom(c, x, y) \ +({ \ + uint8_t __cw = c; \ + uint64_t __xw = x; \ + uint64_t __yw = y; \ + printk(BIOS_EMERG, "PUTSCOM %d %016llX %016llX\n", __cw, __xw, __yw); \ + put_scom(__cw, __xw, __yw); \ +}) + +#define get_scom(c, x) \ +({ \ + uint8_t __cr = c; \ + uint64_t __xr = x; \ + uint64_t __yr = get_scom(__cr, __xr); \ + printk(BIOS_EMERG, "GETSCOM %d %016llX %016llX\n", __cr, __xr, __yr); \ + __yr; \ +}) + +#endif + +static inline void and_scom(uint8_t chip, uint64_t addr, uint64_t mask) +{ + put_scom(chip, addr, get_scom(chip, addr) & mask); +} + +static inline void or_scom(uint8_t chip, uint64_t addr, uint64_t mask) +{ + put_scom(chip, addr, get_scom(chip, addr) | mask); +} + +static inline void and_or_scom(uint8_t chip, uint64_t addr, uint64_t and, uint64_t or) +{ + uint64_t data = get_scom(chip, addr); + data &= and; + data |= or; + put_scom(chip, addr, data); +} + +#endif /* __SOC_IBM_POWER9_XBUS_H */ From 5f35121e4e53800081b578016f8b8bc82fb76e32 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Sat, 8 Jan 2022 18:27:32 +0200 Subject: [PATCH 119/213] soc/power9/istep_9_2.c: XBus calibration Change-Id: I373bd7b931be73a58634db3390d0739b1d881789 Signed-off-by: Sergii Dmytruk --- src/include/cpu/power/istep_9.h | 10 + src/soc/ibm/power9/Makefile.inc | 1 + src/soc/ibm/power9/istep_9_2.c | 312 ++++++++++++++++++++++++++++++++ src/soc/ibm/power9/romstage.c | 3 + 4 files changed, 326 insertions(+) create mode 100644 src/include/cpu/power/istep_9.h create mode 100644 src/soc/ibm/power9/istep_9_2.c diff --git a/src/include/cpu/power/istep_9.h b/src/include/cpu/power/istep_9.h new file mode 100644 index 00000000000..fe4773a7498 --- /dev/null +++ b/src/include/cpu/power/istep_9.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef CPU_PPC64_ISTEP9_H +#define CPU_PPC64_ISTEP9_H + +#include + +void istep_9_2(uint8_t chips); + +#endif /* CPU_PPC64_ISTEP8_H */ diff --git a/src/soc/ibm/power9/Makefile.inc b/src/soc/ibm/power9/Makefile.inc index 3b8c52113df..9e57b253d0b 100644 --- a/src/soc/ibm/power9/Makefile.inc +++ b/src/soc/ibm/power9/Makefile.inc @@ -16,6 +16,7 @@ romstage-y += istep_8_4.c romstage-y += istep_8_9.c romstage-y += istep_8_10.c romstage-y += istep_8_11.c +romstage-y += istep_9_2.c romstage-y += istep_10_10.c romstage-y += istep_10_12.c romstage-y += istep_10_13.c diff --git a/src/soc/ibm/power9/istep_9_2.c b/src/soc/ibm/power9/istep_9_2.c new file mode 100644 index 00000000000..e22a827d50e --- /dev/null +++ b/src/soc/ibm/power9/istep_9_2.c @@ -0,0 +1,312 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include + +#include +#include +#include +#include + +#include "xbus.h" + +struct edip_data_t { + uint32_t en_margin_pu; + uint32_t en_margin_pd; + uint32_t en_main; + uint32_t sel_pre; +}; + +static void compute_margin_data(uint32_t margin, struct edip_data_t *d) +{ + /* ATTR_IO_XBUS_TX_FFE_PRECURSOR = 6 (default, talos.xml) */ + const uint8_t ffe_pre_coef = 6; + + /* Need to convert the 8R value to a 4R equivalent */ + + const uint32_t val = margin >> 1; + const uint32_t en_pre = 18; + + uint32_t val_4r = val - en_pre; + + d->en_margin_pu = 32; + d->en_margin_pd = 32; + d->en_main = 0; + d->sel_pre = 0; + + if (val_4r < 64) { + if (val_4r % 4 != 0) { + d->en_main = 2; + val_4r -= d->en_main; + } + d->en_margin_pd = val_4r / 2; + d->en_margin_pu = val_4r - d->en_margin_pd; + } + + d->en_main += val_4r - d->en_margin_pu - d->en_margin_pd; + d->en_main = MIN(d->en_main, 50); + d->sel_pre = (val * ffe_pre_coef) / 128; + d->sel_pre = MIN(d->sel_pre, en_pre); +} + +/* Converts a 4R decimal value to a 1R thermometer code */ +static uint32_t convert_4r(uint32_t val_4r) +{ + /* + * 1. Add 2 for averaging since we will truncate the last 2 bits. + * 2. Divide by 4 to bring back to a 1r value. + * 3. Convert the decimal number to number of bits set by shifting a 0x1 + * over by the amount and subtracting 1. + */ + return (0x1 << ((val_4r + 2) / 4)) - 1; +} + +static uint32_t convert_4r_with_2r(uint32_t val_4r, uint8_t width) +{ + /* Add 1 for rounding, then shift the 4r bit off. We now have a 2r equivalent */ + const uint32_t val_2r = (val_4r + 1) >> 1; + + /* If the LSB of the 2r equivalent is on, then we need to set the 2r bit (MSB) */ + const uint32_t on_2r = val_2r & 0x1; + + /* Shift the 2r equivalent to a 1r value and convert to a thermometer code */ + const uint32_t val_1r = (1 << (val_2r >> 0x1)) - 1; + + /* Combine 1r equivalent thermometer code + the 2r MSB value */ + return (on_2r << (width - 1)) | val_1r; +} + +static void config_run_bus_group_mode(uint8_t chip, int group) +{ + enum { + P9A_XBUS_TX_IMPCAL_PVAL_PB = 0x800F140006010C3F, + P9A_XBUS_TX_IMPCAL_NVAL_PB = 0x800F0C0006010C3F, + }; + + /* ATTR_IO_XBUS_TX_MARGIN_RATIO = 0 (default) */ + const uint8_t margin_ratio = 0; + + const uint8_t PRE_WIDTH = 5; + /* 4R Total = (1R * 4) + (2R * 2); */ + const uint32_t PRE_4R_TOTAL = ( 4 * 4) + ( 1 * 2); + + const uint64_t offset = group * XBUS_LINK_GROUP_OFFSET; + + /* Same registers are read for both groups */ + uint32_t pval = (get_scom(chip, P9A_XBUS_TX_IMPCAL_PVAL_PB) >> 7) & 0x1FF; + uint32_t nval = (get_scom(chip, P9A_XBUS_TX_IMPCAL_NVAL_PB) >> 7) & 0x1FF; + + uint32_t sel_margin_pu; + uint32_t sel_margin_pd; + + struct edip_data_t p; + struct edip_data_t n; + + uint64_t val; + + compute_margin_data(pval, &p); + compute_margin_data(nval, &n); + + sel_margin_pu = (pval * margin_ratio) / 256; + sel_margin_pu = MIN(sel_margin_pu, MIN(p.en_margin_pu, n.en_margin_pu)); + + sel_margin_pd = (nval * margin_ratio) / 256; + sel_margin_pd = MIN(sel_margin_pd, + MIN(p.en_margin_pd, MIN(n.en_margin_pd, sel_margin_pu))); + + val = get_scom(chip, 0x800D340006010C3F + offset); + + /* EDIP_TX_PSEG_PRE_EN (pre bank pseg enable) */ + PPC_INSERT(val, convert_4r_with_2r(PRE_4R_TOTAL, PRE_WIDTH), 51, 5); + /* EDIP_TX_PSEG_PRE_SEL (pre bank pseg mode selection) */ + PPC_INSERT(val, convert_4r_with_2r(p.sel_pre, PRE_WIDTH), 56, 5); + + put_scom(chip, 0x800D340006010C3F + offset, val); + val = get_scom(chip, 0x800D3C0006010C3F + offset); + + /* EDIP_TX_NSEG_PRE_EN (pre bank nseg enable) */ + PPC_INSERT(val, convert_4r_with_2r(PRE_4R_TOTAL, PRE_WIDTH), 51, 5); + /* EDIP_TX_NSEG_PRE_SEL (pre bank nseg mode selection) */ + PPC_INSERT(val, convert_4r_with_2r(n.sel_pre, PRE_WIDTH), 56, 5); + + put_scom(chip, 0x800D3C0006010C3F + offset, val); + val = get_scom(chip, 0x800D440006010C3F + offset); + + /* EDIP_TX_PSEG_MARGINPD_EN (margin pull-down bank pseg enable) */ + PPC_INSERT(val, convert_4r(p.en_margin_pd), 56, 8); + /* EDIP_TX_PSEG_MARGINPU_EN (margin pull-up bank pseg enable) */ + PPC_INSERT(val, convert_4r(p.en_margin_pu), 48, 8); + + put_scom(chip, 0x800D440006010C3F + offset, val); + val = get_scom(chip, 0x800D4C0006010C3F + offset); + + /* EDIP_TX_NSEG_MARGINPD_EN (margin pull-down bank nseg enable) */ + PPC_INSERT(val, convert_4r(n.en_margin_pd), 56, 8); + /* EDIP_TX_NSEG_MARGINPU_EN (margin pull-up bank nseg enable) */ + PPC_INSERT(val, convert_4r(n.en_margin_pu), 48, 8); + + put_scom(chip, 0x800D4C0006010C3F + offset, val); + val = get_scom(chip, 0x800D540006010C3F + offset); + + /* EDIP_TX_MARGINPD_SEL (margin pull-down bank mode selection) */ + PPC_INSERT(val, convert_4r(sel_margin_pd), 56, 8); + /* EDIP_TX_MARGINPU_SEL (margin pull-up bank mode selection) */ + PPC_INSERT(val, convert_4r(sel_margin_pu), 48, 8); + + put_scom(chip, 0x800D540006010C3F + offset, val); + + /* EDIP_TX_PSEG_MAIN_EN (main bank pseg enable) */ + val = get_scom(chip, 0x800D5C0006010C3F + offset); + PPC_INSERT(val, convert_4r_with_2r(p.en_main, 13), 51, 13); + put_scom(chip, 0x800D5C0006010C3F + offset, val); + + /* EDIP_TX_NSEG_MAIN_EN (main bank nseg enable) */ + val = get_scom(chip, 0x800D640006010C3F + offset); + PPC_INSERT(val, convert_4r_with_2r(n.en_main, 13), 51, 13); + put_scom(chip, 0x800D640006010C3F + offset, val); +} + +static void config_run_bus_mode(uint8_t chip) +{ + enum { + P9A_XBUS_TX_IMPCAL_PB = 0x800F040006010C3F, + EDIP_TX_ZCAL_DONE = 50, + EDIP_TX_ZCAL_ERROR = 51, + }; + + long time; + + /* Set EDIP_TX_ZCAL_REQ to start Tx Impedance Calibration */ + or_scom(chip, P9A_XBUS_TX_IMPCAL_PB, PPC_BIT(49)); + mdelay(20); + + time = wait_us(200 * 10, get_scom(chip, P9A_XBUS_TX_IMPCAL_PB) & + (PPC_BIT(EDIP_TX_ZCAL_DONE) | PPC_BIT(EDIP_TX_ZCAL_ERROR))); + if (!time) + die("Timed out waiting for I/O EDI+ Xbus Tx Z Calibration\n"); + + if (get_scom(chip, P9A_XBUS_TX_IMPCAL_PB) & PPC_BIT(EDIP_TX_ZCAL_ERROR)) + die("I/O EDI+ Xbus Tx Z Calibration failed\n"); + + config_run_bus_group_mode(chip, /*group=*/0); + config_run_bus_group_mode(chip, /*group=*/1); +} + +static void rx_dc_calibration_start(uint8_t chip, int group) +{ + const uint64_t offset = group * XBUS_LINK_GROUP_OFFSET; + + /* Must set lane invalid bit to 0 to run rx dccal, this enables us to + * run dccal on the specified lane. These bits are normally set by + * wiretest although we are not running that now. */ + for (int i = 0; i < XBUS_LANE_COUNT; i++) { + uint64_t lane_offset = PPC_PLACE(i, 27, 5); + /* EDIP_RX_LANE_INVALID */ + and_scom(chip, 0x8002400006010C3F | offset | lane_offset, ~PPC_BIT(50)); + } + + /* Start Cleanup Pll */ + + /* + * EDIP_RX_CTL_CNTL4_E_PG + * 50 - EDIP_RX_WT_PLL_REFCLKSEL (0 - io clock, 1 - bist) + * 51 - EDIP_RX_PLL_REFCLKSEL_SCOM_EN (0 - pll controls selects refclk, + * 1 - and gcr register does it) + */ + or_scom(chip, 0x8009F80006010C3F + offset, PPC_BIT(50) | PPC_BIT(51)); + udelay(150); + + /* + * EDIP_RX_CTL_CNTL4_E_PG + * 48 - EDIP_RX_WT_CU_PLL_PGOOD (0 - places rx pll in reset, + * 1 - sets pgood on rx pll for locking) + */ + or_scom(chip, 0x8009F80006010C3F + offset, PPC_BIT(48)); + udelay(5); + + /* + * EDIP_RX_DC_CALIBRATE_DONE + * (when this bit is read as a 1, the dc calibration steps have been completed) + */ + and_scom(chip, 0x800A380006010C3F + offset, ~PPC_BIT(53)); + + /* + * EDIP_RX_START_DC_CALIBRATE + * (when this register is written to a 1 the training state machine will run the dc + * calibrate substeps defined in eye optimizations) + */ + or_scom(chip, 0x8009F00006010C3F + offset, PPC_BIT(53)); +} + +static void rx_dc_calibration_poll(uint8_t chip, int group) +{ + const uint64_t offset = group * XBUS_LINK_GROUP_OFFSET; + + long time; + + /* + * EDIP_RX_DC_CALIBRATE_DONE + * (when this bit is read as a 1, the dc calibration steps have been completed) + */ + time = wait_ms(200 * 10, get_scom(chip, 0x800A380006010C3F + offset) & PPC_BIT(53)); + if (!time) + die("Timed out waiting for Rx Dc Calibration\n"); + + /* + * EDIP_RX_START_DC_CALIBRATE + * (when this register is written to a 1 the training state machine will run the dc + * calibrate substeps defined in eye optimizations) + */ + and_scom(chip, 0x8009F00006010C3F + offset, ~PPC_BIT(53)); + + /* + * EDIP_RX_CTL_CNTL4_E_PG + * 48 - EDIP_RX_WT_CU_PLL_PGOOD (0 - places rx pll in reset, + * 1 - sets pgood on rx pll for locking) + * 50 - EDIP_RX_WT_PLL_REFCLKSEL (0 - io clock, 1 - bist) + * 51 - EDIP_RX_PLL_REFCLKSEL_SCOM_EN (0 - pll controls selects refclk, + * 1 - and gcr register does it) + */ + and_scom(chip, 0x8009F80006010C3F + offset, ~(PPC_BIT(48) | PPC_BIT(50) | PPC_BIT(51))); + udelay(111); + + /* Restore the invalid bits, Wiretest will modify these as training is run */ + for (int i = 0; i < XBUS_LANE_COUNT; i++) { + uint64_t lane_offset = PPC_PLACE(i, 27, 5); + /* EDIP_RX_LANE_INVALID */ + or_scom(chip, 0x8002400006010C3F | offset | lane_offset, PPC_BIT(50)); + } +} + +static void config_bus_mode(void) +{ + /* Initiate Dc calibration in parallel */ + rx_dc_calibration_start(/*chip=*/0, /*group=*/0); + rx_dc_calibration_start(/*chip=*/1, /*group=*/0); + rx_dc_calibration_start(/*chip=*/0, /*group=*/1); + rx_dc_calibration_start(/*chip=*/1, /*group=*/1); + + /* HB does this delay inside rx_dc_calibration_poll(), but doing it + * once instead of four times should be enough */ + mdelay(100); + + /* Then wait for each combination of chip and group */ + rx_dc_calibration_poll(/*chip=*/0, /*group=*/0); + rx_dc_calibration_poll(/*chip=*/1, /*group=*/0); + rx_dc_calibration_poll(/*chip=*/0, /*group=*/1); + rx_dc_calibration_poll(/*chip=*/1, /*group=*/1); +} + +void istep_9_2(uint8_t chips) +{ + printk(BIOS_EMERG, "starting istep 9.2\n"); + report_istep(9,2); + + if (chips != 0x01) { + config_run_bus_mode(/*chip=*/0); + config_run_bus_mode(/*chip=*/1); + + config_bus_mode(); + } + + printk(BIOS_EMERG, "ending istep 9.2\n"); +} diff --git a/src/soc/ibm/power9/romstage.c b/src/soc/ibm/power9/romstage.c index f02ce0f33c7..8e08652f22f 100644 --- a/src/soc/ibm/power9/romstage.c +++ b/src/soc/ibm/power9/romstage.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -371,6 +372,8 @@ void main(void) istep_8_10(chips); istep_8_11(chips); + istep_9_2(chips); + istep_10_10(&phb_active_mask, iovalid_enable); istep_10_12(); istep_10_13(); From 7f79db50cca54ca05b6eef0de23792186df75efd Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Sat, 8 Jan 2022 18:31:30 +0200 Subject: [PATCH 120/213] soc/power9/istep_9_4.c: XBus link training Change-Id: I0f72b3d356ccef6d5a8c3cd8332e2d2c77126586 Signed-off-by: Sergii Dmytruk --- src/include/cpu/power/istep_9.h | 1 + src/soc/ibm/power9/Makefile.inc | 1 + src/soc/ibm/power9/istep_9_4.c | 114 ++++++++++++++++++++++++++++++++ src/soc/ibm/power9/romstage.c | 1 + 4 files changed, 117 insertions(+) create mode 100644 src/soc/ibm/power9/istep_9_4.c diff --git a/src/include/cpu/power/istep_9.h b/src/include/cpu/power/istep_9.h index fe4773a7498..39b94e80624 100644 --- a/src/include/cpu/power/istep_9.h +++ b/src/include/cpu/power/istep_9.h @@ -6,5 +6,6 @@ #include void istep_9_2(uint8_t chips); +void istep_9_4(uint8_t chips); #endif /* CPU_PPC64_ISTEP8_H */ diff --git a/src/soc/ibm/power9/Makefile.inc b/src/soc/ibm/power9/Makefile.inc index 9e57b253d0b..bdabbdf823f 100644 --- a/src/soc/ibm/power9/Makefile.inc +++ b/src/soc/ibm/power9/Makefile.inc @@ -17,6 +17,7 @@ romstage-y += istep_8_9.c romstage-y += istep_8_10.c romstage-y += istep_8_11.c romstage-y += istep_9_2.c +romstage-y += istep_9_4.c romstage-y += istep_10_10.c romstage-y += istep_10_12.c romstage-y += istep_10_13.c diff --git a/src/soc/ibm/power9/istep_9_4.c b/src/soc/ibm/power9/istep_9_4.c new file mode 100644 index 00000000000..23e9209f31b --- /dev/null +++ b/src/soc/ibm/power9/istep_9_4.c @@ -0,0 +1,114 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include + +#include +#include +#include + +#include "xbus.h" + +static void tx_serializer_sync_power_on(uint8_t master_chip, uint8_t slave_chip, int group) +{ + const uint64_t offset = group * XBUS_LINK_GROUP_OFFSET; + + /* + * EDIP_TX_CLK_UNLOAD_CLK_DISABLE + * (set to 0 to clock off sync logic on the clock slice and save power; + * it should not be necessary to use the sync logic on the clock slice + * since it has no fifo but control is available just in case) + */ + and_scom(master_chip, 0x800C1C0006010C3F + offset, ~PPC_BIT(50)); + and_scom(slave_chip, 0x800C1C0006010C3F + offset, ~PPC_BIT(50)); + + /* + * EDIP_TX_CLK_RUN_COUNT + * (set to 1 to enable the tx clock slice serializer; this should be + * enabled at all times but control is available just in case) + */ + and_scom(master_chip, 0x800C1C0006010C3F + offset, ~PPC_BIT(51)); + and_scom(slave_chip, 0x800C1C0006010C3F + offset, ~PPC_BIT(51)); + + /* EDIP_TX_CLK_RUN_COUNT (see above) */ + or_scom(master_chip, 0x800C1C0006010C3F + offset, PPC_BIT(51)); + or_scom(slave_chip, 0x800C1C0006010C3F + offset, PPC_BIT(51)); + + /* EDIP_TX_CLK_UNLOAD_CLK_DISABLE (see above) */ + or_scom(master_chip, 0x800C1C0006010C3F + offset, PPC_BIT(50)); + or_scom(slave_chip, 0x800C1C0006010C3F + offset, PPC_BIT(50)); + + for (int i = 0; i < XBUS_LANE_COUNT; ++i) { + uint64_t lane_offset = PPC_PLACE(i, 27, 5); + /* + * EDIP_TX_UNLOAD_CLK_DISABLE + * (set to 0 to enable sync of tx custom serializer via tx_fifo_init register, + * set to 1 to clock off sync logic and save power) + */ + and_scom(master_chip, 0x80040C0006010C3F | offset | lane_offset, ~PPC_BIT(56)); + and_scom(slave_chip, 0x80040C0006010C3F | offset | lane_offset, ~PPC_BIT(56)); + } +} + +static void xbus_linktrain(uint8_t master_chip, uint8_t slave_chip, int group) +{ + enum { + /* I/O EDI+ Training Substeps */ + NONE = 0x00000000, + WIRETEST = 0x00000001, + DESKEW = 0x00000002, + EYEOPT = 0x00000004, + REPAIR = 0x00000008, + FUNCTIONAL = 0x00000010, + WDERF = 0x0000001F, // all of the above + }; + + const uint64_t offset = group * XBUS_LINK_GROUP_OFFSET; + + uint64_t tmp; + + /* Hostboot collects bad lane information here, we don't */ + + /* + * Clock Serializer Init + * Isn't strictly necessary but does line up the clock serializer + * counter with the data slices. + */ + tx_serializer_sync_power_on(master_chip, slave_chip, group); + + /* Start Slave/Master Target Link Training */ + + /* + * EDIP_RX_START_WDERF_ALIAS (alias for rx_start_* bits) + * Slave training must start first. + */ + and_or_scom(slave_chip, 0x8009F00006010C3F + offset, + ~PPC_BITMASK(48, 52), PPC_PLACE(WDERF, 48, 5)); + and_or_scom(master_chip, 0x8009F00006010C3F + offset, + ~PPC_BITMASK(48, 52), PPC_PLACE(WDERF, 48, 5)); + + /* + * 48-52 EDIP_RX_WDERF_DONE_ALIAS (alias for rx_*_done bits) + * 56-60 EDIP_RX_WDERF_FAILED_ALIAS (alias for rx_*_failed bits) + */ + wait_ms(100 * 1, + (tmp = get_scom(master_chip, 0x800A380006010C3F + offset), + (((tmp >> 11) & 0x1F) == WDERF || ((tmp >> 3) & 0x1F) != 0))); + if (((tmp >> 3) & 0x1F) != 0) + die("I/O EDI+ Xbus link training failed.\n"); + if (((tmp >> 11) & 0x1F) != WDERF) + die("I/O EDI+ Xbus link training timeout.\n"); + +} + +void istep_9_4(uint8_t chips) +{ + printk(BIOS_EMERG, "starting istep 9.4\n"); + report_istep(9,4); + + if (chips != 0x01) { + xbus_linktrain(/*master_chip=*/0, /*slave_chip=*/1, /*group=*/0); + xbus_linktrain(/*master_chip=*/0, /*slave_chip=*/1, /*group=*/1); + } + + printk(BIOS_EMERG, "ending istep 9.4\n"); +} diff --git a/src/soc/ibm/power9/romstage.c b/src/soc/ibm/power9/romstage.c index 8e08652f22f..1668d083c2b 100644 --- a/src/soc/ibm/power9/romstage.c +++ b/src/soc/ibm/power9/romstage.c @@ -373,6 +373,7 @@ void main(void) istep_8_11(chips); istep_9_2(chips); + istep_9_4(chips); istep_10_10(&phb_active_mask, iovalid_enable); istep_10_12(); From 4c2f2f8aaeec169a9ad067562d28c4bd4b201537 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Sat, 8 Jan 2022 18:34:29 +0200 Subject: [PATCH 121/213] soc/power9/istep_9_6.c: SMP link layer Change-Id: Ic3e1eadf03f54cf6e8ed82641dfd2cd668b1237a Signed-off-by: Sergii Dmytruk --- src/include/cpu/power/istep_9.h | 1 + src/soc/ibm/power9/Makefile.inc | 1 + src/soc/ibm/power9/istep_9_6.c | 38 +++++++++++++++++++++++++++++++++ src/soc/ibm/power9/romstage.c | 1 + 4 files changed, 41 insertions(+) create mode 100644 src/soc/ibm/power9/istep_9_6.c diff --git a/src/include/cpu/power/istep_9.h b/src/include/cpu/power/istep_9.h index 39b94e80624..dce51563c63 100644 --- a/src/include/cpu/power/istep_9.h +++ b/src/include/cpu/power/istep_9.h @@ -7,5 +7,6 @@ void istep_9_2(uint8_t chips); void istep_9_4(uint8_t chips); +void istep_9_6(uint8_t chips); #endif /* CPU_PPC64_ISTEP8_H */ diff --git a/src/soc/ibm/power9/Makefile.inc b/src/soc/ibm/power9/Makefile.inc index bdabbdf823f..7b2dd7dc482 100644 --- a/src/soc/ibm/power9/Makefile.inc +++ b/src/soc/ibm/power9/Makefile.inc @@ -18,6 +18,7 @@ romstage-y += istep_8_10.c romstage-y += istep_8_11.c romstage-y += istep_9_2.c romstage-y += istep_9_4.c +romstage-y += istep_9_6.c romstage-y += istep_10_10.c romstage-y += istep_10_12.c romstage-y += istep_10_13.c diff --git a/src/soc/ibm/power9/istep_9_6.c b/src/soc/ibm/power9/istep_9_6.c new file mode 100644 index 00000000000..b21f0037749 --- /dev/null +++ b/src/soc/ibm/power9/istep_9_6.c @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include + +#include +#include + +#include "xbus.h" + +static void smp_link_layer(uint8_t chip) +{ + enum { + /* dl_control_addr */ + XBUS_LL1_IOEL_CONTROL = 0x0000000006011C0B, + + XBUS_LL0_IOEL_CONTROL_LINK0_STARTUP = 1, + XBUS_LL0_IOEL_CONTROL_LINK1_STARTUP = 33, + }; + + /* Hostboot uses PUTSCOMMASK operation of SBE IO. Assuming that it's + * equivalent to a RMW sequence. */ + or_scom(chip, XBUS_LL1_IOEL_CONTROL, + PPC_BIT(XBUS_LL0_IOEL_CONTROL_LINK0_STARTUP) | + PPC_BIT(XBUS_LL0_IOEL_CONTROL_LINK1_STARTUP)); +} + +void istep_9_6(uint8_t chips) +{ + printk(BIOS_EMERG, "starting istep 9.6\n"); + report_istep(9,6); + + if (chips != 0x01) { + smp_link_layer(/*chip=*/0); + smp_link_layer(/*chip=*/1); + } + + printk(BIOS_EMERG, "ending istep 9.6\n"); +} diff --git a/src/soc/ibm/power9/romstage.c b/src/soc/ibm/power9/romstage.c index 1668d083c2b..70ec0a60f47 100644 --- a/src/soc/ibm/power9/romstage.c +++ b/src/soc/ibm/power9/romstage.c @@ -374,6 +374,7 @@ void main(void) istep_9_2(chips); istep_9_4(chips); + istep_9_6(chips); istep_10_10(&phb_active_mask, iovalid_enable); istep_10_12(); From f500f6cb827ab9e6abbeac900e7b1fea09b3bdac Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Sat, 8 Jan 2022 18:38:09 +0200 Subject: [PATCH 122/213] soc/power9/istep_9_7.c: XBus link training validation Change-Id: Idbb17e19ec3f36e7f7e64e7d90a27f8ea9e0fe95 Signed-off-by: Sergii Dmytruk --- src/include/cpu/power/istep_9.h | 1 + src/soc/ibm/power9/Makefile.inc | 1 + src/soc/ibm/power9/istep_9_7.c | 99 +++++++++++++++++++++++++++++++++ src/soc/ibm/power9/romstage.c | 1 + 4 files changed, 102 insertions(+) create mode 100644 src/soc/ibm/power9/istep_9_7.c diff --git a/src/include/cpu/power/istep_9.h b/src/include/cpu/power/istep_9.h index dce51563c63..ef5c3484072 100644 --- a/src/include/cpu/power/istep_9.h +++ b/src/include/cpu/power/istep_9.h @@ -8,5 +8,6 @@ void istep_9_2(uint8_t chips); void istep_9_4(uint8_t chips); void istep_9_6(uint8_t chips); +void istep_9_7(uint8_t chips); #endif /* CPU_PPC64_ISTEP8_H */ diff --git a/src/soc/ibm/power9/Makefile.inc b/src/soc/ibm/power9/Makefile.inc index 7b2dd7dc482..6dd2b2971fb 100644 --- a/src/soc/ibm/power9/Makefile.inc +++ b/src/soc/ibm/power9/Makefile.inc @@ -19,6 +19,7 @@ romstage-y += istep_8_11.c romstage-y += istep_9_2.c romstage-y += istep_9_4.c romstage-y += istep_9_6.c +romstage-y += istep_9_7.c romstage-y += istep_10_10.c romstage-y += istep_10_12.c romstage-y += istep_10_13.c diff --git a/src/soc/ibm/power9/istep_9_7.c b/src/soc/ibm/power9/istep_9_7.c new file mode 100644 index 00000000000..af068e7fc50 --- /dev/null +++ b/src/soc/ibm/power9/istep_9_7.c @@ -0,0 +1,99 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include + +#include +#include +#include +#include + +#include "xbus.h" + +static void p9_fab_iovalid_link_validate(uint8_t chip) +{ + enum { + XBUS_LL1_IOEL_FIR_REG = 0x06011C00, + DL_FIR_LINK0_TRAINED_BIT = 0, + DL_FIR_LINK1_TRAINED_BIT = 1, + }; + + int i; + + for (i = 0; i < 100; ++i) { + /* Only OBus seems to be retrained, so this XBus-only code is + * much simpler compared to corresponding code in Hostboot */ + + uint64_t dl_fir_reg = get_scom(chip, XBUS_LL1_IOEL_FIR_REG); + + bool dl_trained = (dl_fir_reg & PPC_BIT(DL_FIR_LINK0_TRAINED_BIT)) + && (dl_fir_reg & PPC_BIT(DL_FIR_LINK1_TRAINED_BIT)); + if (dl_trained) + break; + + mdelay(1); + } + + if (i == 100) + die("XBus link DL training failed\n"); +} + +static void p9_fab_iovalid(uint8_t chip) +{ + enum { + PERV_XB_CPLT_CONF1_OR = 0x06000019, + PERV_CPLT_CONF1_IOVALID_6D = 6, + + PU_PB_CENT_SM0_PB_CENT_FIR_REG = 0x05011C00, + PU_PB_CENT_SM0_PB_CENT_FIR_MASK_REG_SPARE_13 = 13, + + PU_PB_CENT_SM1_EXTFIR_ACTION0_REG = 0x05011C34, + PU_PB_CENT_SM1_EXTFIR_ACTION1_REG = 0x05011C35, + + PU_PB_CENT_SM1_EXTFIR_MASK_REG_AND = 0x05011C32, + }; + + uint64_t fbc_cent_fir_data; + + p9_fab_iovalid_link_validate(chip); + + /* Clear RAS FIR mask for link if not already set up by SBE */ + fbc_cent_fir_data = get_scom(chip, PU_PB_CENT_SM0_PB_CENT_FIR_REG); + if (!(fbc_cent_fir_data & PPC_BIT(PU_PB_CENT_SM0_PB_CENT_FIR_MASK_REG_SPARE_13))) { + and_scom(chip, PU_PB_CENT_SM1_EXTFIR_ACTION0_REG, + ~PPC_BIT(PERV_CPLT_CONF1_IOVALID_6D)); + and_scom(chip, PU_PB_CENT_SM1_EXTFIR_ACTION1_REG, + ~PPC_BIT(PERV_CPLT_CONF1_IOVALID_6D)); + put_scom(chip, PU_PB_CENT_SM1_EXTFIR_MASK_REG_AND, + ~PPC_BIT(PERV_CPLT_CONF1_IOVALID_6D)); + } + + /* + * Use AND/OR mask registers to atomically update link specific fields + * in iovalid control register. + */ + put_scom(chip, PERV_XB_CPLT_CONF1_OR, + PPC_BIT(PERV_CPLT_CONF1_IOVALID_6D) | + PPC_BIT(PERV_CPLT_CONF1_IOVALID_6D + 1)); +} + +void istep_9_7(uint8_t chips) +{ + printk(BIOS_EMERG, "starting istep 9.7\n"); + report_istep(9,7); + + if (chips != 0x01) { + /* + * Add delay for DD1.1+ procedure to compensate for lack of lane + * lock polls. + * + * HB does this inside p9_fab_iovalid(), which doubles the + * delay, which is probably unnecessary. + */ + mdelay(100); + + p9_fab_iovalid(/*chip=*/0); + p9_fab_iovalid(/*chip=*/1); + } + + printk(BIOS_EMERG, "ending istep 9.7\n"); +} diff --git a/src/soc/ibm/power9/romstage.c b/src/soc/ibm/power9/romstage.c index 70ec0a60f47..02bffd82e67 100644 --- a/src/soc/ibm/power9/romstage.c +++ b/src/soc/ibm/power9/romstage.c @@ -375,6 +375,7 @@ void main(void) istep_9_2(chips); istep_9_4(chips); istep_9_6(chips); + istep_9_7(chips); istep_10_10(&phb_active_mask, iovalid_enable); istep_10_12(); From 00f8b62955b2973af559ab65292225b247fefa87 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Tue, 18 Jan 2022 00:16:26 +0200 Subject: [PATCH 123/213] src/cpu/power9: SCOM access to other CPUs This generalizes implementation to accept CPU/chip number, implements "rscom" API for accesses that specify chip and reimplements "scom" functions using "rscom" variants with hard-coded chip number 0. Change-Id: I6f877dda0565f50ae9e55d1f11368b0f16c348d6 Signed-off-by: Sergii Dmytruk --- src/arch/ppc64/include/arch/io.h | 1 + src/cpu/power9/scom.c | 32 ++++---- src/include/cpu/power/scom.h | 129 +++++++++++++++++++++++-------- 3 files changed, 114 insertions(+), 48 deletions(-) diff --git a/src/arch/ppc64/include/arch/io.h b/src/arch/ppc64/include/arch/io.h index bc122f12018..69efc4c1ada 100644 --- a/src/arch/ppc64/include/arch/io.h +++ b/src/arch/ppc64/include/arch/io.h @@ -14,6 +14,7 @@ #define LPC_BASE_ADDR (MMIO_GROUP0_CHIP0_LPC_BASE_ADDR + LPCHC_IO_SPACE) #define FLASH_BASE_ADDR (MMIO_GROUP0_CHIP0_LPC_BASE_ADDR + FLASH_IO_SPACE) #define MMIO_GROUP0_CHIP0_SCOM_BASE_ADDR 0x800603FC00000000 +#define MMIO_GROUP_SIZE 0x0000200000000000 /* Enforce In-order Execution of I/O */ static inline void eieio(void) diff --git a/src/cpu/power9/scom.c b/src/cpu/power9/scom.c index 3e973e52e3b..4ec762cf5cd 100644 --- a/src/cpu/power9/scom.c +++ b/src/cpu/power9/scom.c @@ -18,7 +18,7 @@ #define XSCOM_LOG_REG 0x00090012 #define XSCOM_ERR_REG 0x00090013 -static void reset_scom_engine(void) +static void reset_scom_engine(uint8_t chip) { /* * With cross-CPU SCOM accesses, first register should be cleared on the @@ -26,14 +26,14 @@ static void reset_scom_engine(void) * necessary to do the remote writes in assembly directly to skip checking * HMER and possibly end in a loop. */ - write_scom_direct(XSCOM_RCVED_STAT_REG, 0); - write_scom_direct(XSCOM_LOG_REG, 0); - write_scom_direct(XSCOM_ERR_REG, 0); + write_scom_direct(0, XSCOM_RCVED_STAT_REG, 0); + write_scom_direct(chip, XSCOM_LOG_REG, 0); + write_scom_direct(chip, XSCOM_ERR_REG, 0); clear_hmer(); eieio(); } -uint64_t read_scom_direct(uint64_t reg_address) +uint64_t read_scom_direct(uint8_t chip, uint64_t reg_address) { uint64_t val; uint64_t hmer = 0; @@ -51,14 +51,14 @@ uint64_t read_scom_direct(uint64_t reg_address) asm volatile( "ldcix %0, %1, %2" : "=r"(val) : - "b"(MMIO_GROUP0_CHIP0_SCOM_BASE_ADDR), + "b"(MMIO_GROUP0_CHIP0_SCOM_BASE_ADDR + chip * MMIO_GROUP_SIZE), "r"(reg_address << 3)); eieio(); hmer = read_hmer(); } while ((hmer & SPR_HMER_XSCOM_STATUS) == SPR_HMER_XSCOM_OCCUPIED); if (hmer & SPR_HMER_XSCOM_STATUS) { - reset_scom_engine(); + reset_scom_engine(chip); /* * All F's are returned in case of error, but code polls for a set bit * after changes that can make such error appear (e.g. clock settings). @@ -69,7 +69,7 @@ uint64_t read_scom_direct(uint64_t reg_address) return val; } -void write_scom_direct(uint64_t reg_address, uint64_t data) +void write_scom_direct(uint8_t chip, uint64_t reg_address, uint64_t data) { uint64_t hmer = 0; do { @@ -81,17 +81,17 @@ void write_scom_direct(uint64_t reg_address, uint64_t data) asm volatile( "stdcix %0, %1, %2":: "r"(data), - "b"(MMIO_GROUP0_CHIP0_SCOM_BASE_ADDR), + "b"(MMIO_GROUP0_CHIP0_SCOM_BASE_ADDR + chip * MMIO_GROUP_SIZE ), "r"(reg_address << 3)); eieio(); hmer = read_hmer(); } while ((hmer & SPR_HMER_XSCOM_STATUS) == SPR_HMER_XSCOM_OCCUPIED); if (hmer & SPR_HMER_XSCOM_STATUS) - reset_scom_engine(); + reset_scom_engine(chip); } -void write_scom_indirect(uint64_t reg_address, uint64_t value) +void write_scom_indirect(uint8_t chip, uint64_t reg_address, uint64_t value) { uint64_t addr; uint64_t data; @@ -99,10 +99,10 @@ void write_scom_indirect(uint64_t reg_address, uint64_t value) data = reg_address & XSCOM_ADDR_IND_ADDR; data |= value & XSCOM_ADDR_IND_DATA; - write_scom_direct(addr, data); + write_scom_direct(chip, addr, data); for (int retries = 0; retries < XSCOM_IND_MAX_RETRIES; ++retries) { - data = read_scom_direct(addr); + data = read_scom_direct(chip, addr); if ((data & XSCOM_DATA_IND_COMPLETE) && ((data & XSCOM_DATA_IND_ERR) == 0)) { return; } else if (data & XSCOM_DATA_IND_COMPLETE) { @@ -113,17 +113,17 @@ void write_scom_indirect(uint64_t reg_address, uint64_t value) } } -uint64_t read_scom_indirect(uint64_t reg_address) +uint64_t read_scom_indirect(uint8_t chip, uint64_t reg_address) { uint64_t addr; uint64_t data; addr = reg_address & 0x7FFFFFFF; data = XSCOM_DATA_IND_READ | (reg_address & XSCOM_ADDR_IND_ADDR); - write_scom_direct(addr, data); + write_scom_direct(chip, addr, data); for (int retries = 0; retries < XSCOM_IND_MAX_RETRIES; ++retries) { - data = read_scom_direct(addr); + data = read_scom_direct(chip, addr); if ((data & XSCOM_DATA_IND_COMPLETE) && ((data & XSCOM_DATA_IND_ERR) == 0)) { break; } else if (data & XSCOM_DATA_IND_COMPLETE) { diff --git a/src/include/cpu/power/scom.h b/src/include/cpu/power/scom.h index b55a03c2368..802568dee5b 100644 --- a/src/include/cpu/power/scom.h +++ b/src/include/cpu/power/scom.h @@ -91,53 +91,123 @@ static const chiplet_id_t mcs_to_nest[] = [MC23_CHIPLET_ID] = N1_CHIPLET_ID, }; -uint64_t read_scom_direct(uint64_t reg_address); -void write_scom_direct(uint64_t reg_address, uint64_t data); +/* These are implementation functions that do all the work. The interface + * functions in sections below are calling these. */ -uint64_t read_scom_indirect(uint64_t reg_address); -void write_scom_indirect(uint64_t reg_address, uint64_t data); +uint64_t read_scom_direct(uint8_t chip, uint64_t reg_address); +void write_scom_direct(uint8_t chip, uint64_t reg_address, uint64_t data); -static inline void write_scom(uint64_t addr, uint64_t data) +uint64_t read_scom_indirect(uint8_t chip, uint64_t reg_address); +void write_scom_indirect(uint8_t chip, uint64_t reg_address, uint64_t data); + +/* "rscom" are generic ("r" is for remote) XSCOM functions, other functions are + * equivalent to rscom calls for chip #0 */ + +static inline void write_rscom(uint8_t chip, uint64_t addr, uint64_t data) { if (addr & XSCOM_ADDR_IND_FLAG) - write_scom_indirect(addr, data); + write_scom_indirect(chip, addr, data); else - write_scom_direct(addr, data); + write_scom_direct(chip, addr, data); } -static inline uint64_t read_scom(uint64_t addr) +static inline uint64_t read_rscom(uint8_t chip, uint64_t addr) { if (addr & XSCOM_ADDR_IND_FLAG) - return read_scom_indirect(addr); + return read_scom_indirect(chip, addr); else - return read_scom_direct(addr); + return read_scom_direct(chip, addr); +} + +static inline void rscom_and_or(uint8_t chip, uint64_t addr, uint64_t and, uint64_t or) +{ + uint64_t data = read_rscom(chip, addr); + write_rscom(chip, addr, (data & and) | or); +} + +static inline void rscom_and(uint8_t chip, int64_t addr, uint64_t and) +{ + rscom_and_or(chip, addr, and, 0); +} + +static inline void rscom_or(uint8_t chip, uint64_t addr, uint64_t or) +{ + rscom_and_or(chip, addr, ~0, or); +} + +static inline void write_rscom_for_chiplet(uint8_t chip, chiplet_id_t chiplet, + uint64_t addr, uint64_t data) +{ + addr &= ~PPC_BITMASK(34,39); + addr |= ((chiplet & 0x3F) << 24); + write_rscom(chip, addr, data); +} + +static inline uint64_t read_rscom_for_chiplet(uint8_t chip, chiplet_id_t chiplet, uint64_t addr) +{ + addr &= ~PPC_BITMASK(34,39); + addr |= ((chiplet & 0x3F) << 24); + return read_rscom(chip, addr); +} + +static inline void rscom_and_or_for_chiplet(uint8_t chip, chiplet_id_t chiplet, + uint64_t addr, uint64_t and, uint64_t or) +{ + uint64_t data = read_rscom_for_chiplet(chip, chiplet, addr); + write_rscom_for_chiplet(chip, chiplet, addr, (data & and) | or); +} + +static inline void rscom_and_for_chiplet(uint8_t chip, chiplet_id_t chiplet, uint64_t addr, uint64_t and) +{ + rscom_and_or_for_chiplet(chip, chiplet, addr, and, 0); +} + +static inline void rscom_or_for_chiplet(uint8_t chip, chiplet_id_t chiplet, uint64_t addr, uint64_t or) +{ + rscom_and_or_for_chiplet(chip, chiplet, addr, ~0, or); +} + +/* "scom" are functions with chip number being fixed at 0 */ + +static inline void write_scom(uint64_t addr, uint64_t data) +{ + return write_rscom(0, addr, data); +} + +static inline uint64_t read_scom(uint64_t addr) +{ + return read_rscom(0, addr); } #if CONFIG(DEBUG_SCOM) && !defined(SKIP_SCOM_DEBUG) #include -#define write_scom(x, y) \ -({ \ - uint64_t __xw = x; \ - uint64_t __yw = y; \ - printk(BIOS_SPEW, "SCOM W %16.16llX %16.16llX\n", __xw, __yw); \ - write_scom((__xw), (__yw)); \ +#define write_scom(x, y) write_rscom(0, x, y) +#define read_scom(x) read_rscom(0, x) + +#define write_rscom(c, x, y) \ +({ \ + uint8_t __cw = (c); \ + uint64_t __xw = (x); \ + uint64_t __yw = (y); \ + printk(BIOS_SPEW, "SCOM W P%d %016llX %016llX\n", __cw, __xw, __yw); \ + write_rscom(__cw, __xw, __yw); \ }) -#define read_scom(x) \ -({ \ - uint64_t __xr = x; \ - uint64_t __yr = read_scom(__xr); \ - printk(BIOS_SPEW, "SCOM R %16.16llX %16.16llX\n", __xr, __yr); \ - __yr; \ +#define read_rscom(c, x) \ +({ \ + uint8_t __cr = (c); \ + uint64_t __xr = (x); \ + uint64_t __yr = read_rscom(__cr, __xr); \ + printk(BIOS_SPEW, "SCOM R P%d %016llX %016llX\n", __cr, __xr, __yr); \ + __yr; \ }) #endif static inline void scom_and_or(uint64_t addr, uint64_t and, uint64_t or) { - uint64_t data = read_scom(addr); - write_scom(addr, (data & and) | or); + rscom_and_or(0, addr, and, or); } static inline void scom_and(uint64_t addr, uint64_t and) @@ -152,23 +222,18 @@ static inline void scom_or(uint64_t addr, uint64_t or) static inline void write_scom_for_chiplet(chiplet_id_t chiplet, uint64_t addr, uint64_t data) { - addr &= ~PPC_BITMASK(34, 39); - addr |= ((chiplet & 0x3F) << 24); - write_scom(addr, data); + write_rscom_for_chiplet(0, chiplet, addr, data); } static inline uint64_t read_scom_for_chiplet(chiplet_id_t chiplet, uint64_t addr) { - addr &= ~PPC_BITMASK(34, 39); - addr |= ((chiplet & 0x3F) << 24); - return read_scom(addr); + return read_rscom_for_chiplet(0, chiplet, addr); } static inline void scom_and_or_for_chiplet(chiplet_id_t chiplet, uint64_t addr, uint64_t and, uint64_t or) { - uint64_t data = read_scom_for_chiplet(chiplet, addr); - write_scom_for_chiplet(chiplet, addr, (data & and) | or); + rscom_and_or_for_chiplet(0, chiplet, addr, and, or); } static inline void scom_and_for_chiplet(chiplet_id_t chiplet, uint64_t addr, uint64_t and) From b4d0bdc98af0f3429ed20515c1418f57c77ce70e Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Tue, 18 Jan 2022 00:19:15 +0200 Subject: [PATCH 124/213] soc/power9/fsi.c: generalize reset_pib2opb() This makes it work with arbitrary CPU and also exposes the function to other units. Change-Id: I2c98fc376ebf6761ace7e6874e6b1d50f06374b1 Signed-off-by: Sergii Dmytruk --- src/soc/ibm/power9/fsi.c | 8 ++++---- src/soc/ibm/power9/fsi.h | 2 ++ 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/soc/ibm/power9/fsi.c b/src/soc/ibm/power9/fsi.c index 2d651f37105..89c50ba86c9 100644 --- a/src/soc/ibm/power9/fsi.c +++ b/src/soc/ibm/power9/fsi.c @@ -72,10 +72,10 @@ enum { OPB_STAT_ERR_MFSI), }; -static void reset_pib2opb(void) +void fsi_reset_pib2opb(uint8_t chip) { - write_scom(FSI2OPB_OFFSET_0 | OPB_REG_RES, 0x8000000000000000); - write_scom(FSI2OPB_OFFSET_0 | OPB_REG_STAT, 0x8000000000000000); + write_rscom(chip, FSI2OPB_OFFSET_0 | OPB_REG_RES, 0x8000000000000000); + write_rscom(chip, FSI2OPB_OFFSET_0 | OPB_REG_STAT, 0x8000000000000000); } static void cleanup_port_maeb_error(uint8_t port) @@ -169,7 +169,7 @@ static void basic_master_init(void) uint64_t tmp; /* Cleanup any initial error states */ - reset_pib2opb(); + fsi_reset_pib2opb(chip); /* Ensure we don't have any errors before we even start */ tmp = read_scom(FSI2OPB_OFFSET_0 | OPB_REG_STAT); diff --git a/src/soc/ibm/power9/fsi.h b/src/soc/ibm/power9/fsi.h index ae3f302e3dd..7c9cf26b0c2 100644 --- a/src/soc/ibm/power9/fsi.h +++ b/src/soc/ibm/power9/fsi.h @@ -16,6 +16,8 @@ uint8_t fsi_get_present_chips(void); void fsi_i2c_init(uint8_t chips); +void fsi_reset_pib2opb(uint8_t chip); + /* This isn't meant to be used directly, see below for interface of this unit */ uint32_t fsi_op(uint8_t chip, uint32_t addr, uint32_t data, bool is_read, uint8_t size); From a00348b3a47192cdaabbc2df207c62230a211c07 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Tue, 18 Jan 2022 00:23:49 +0200 Subject: [PATCH 125/213] soc/power9/istep_10_1.c: configure SMP and enable XSCOM Change-Id: I8b48503bc995b9d187f2e5edc011d1be6731af3d Signed-off-by: Sergii Dmytruk --- src/include/cpu/power/istep_10.h | 1 + src/soc/ibm/power9/Makefile.inc | 1 + src/soc/ibm/power9/istep_10_1.c | 778 +++++++++++++++++++++++++++++++ src/soc/ibm/power9/romstage.c | 1 + 4 files changed, 781 insertions(+) create mode 100644 src/soc/ibm/power9/istep_10_1.c diff --git a/src/include/cpu/power/istep_10.h b/src/include/cpu/power/istep_10.h index c79d0edd0d2..cc52c506012 100644 --- a/src/include/cpu/power/istep_10.h +++ b/src/include/cpu/power/istep_10.h @@ -5,6 +5,7 @@ #include +void istep_10_1(uint8_t chips); void istep_10_10(uint8_t *phb_active_mask, uint8_t *iovalid_enable); void istep_10_12(void); void istep_10_13(void); diff --git a/src/soc/ibm/power9/Makefile.inc b/src/soc/ibm/power9/Makefile.inc index 6dd2b2971fb..2798396c68a 100644 --- a/src/soc/ibm/power9/Makefile.inc +++ b/src/soc/ibm/power9/Makefile.inc @@ -20,6 +20,7 @@ romstage-y += istep_9_2.c romstage-y += istep_9_4.c romstage-y += istep_9_6.c romstage-y += istep_9_7.c +romstage-y += istep_10_1.c romstage-y += istep_10_10.c romstage-y += istep_10_12.c romstage-y += istep_10_13.c diff --git a/src/soc/ibm/power9/istep_10_1.c b/src/soc/ibm/power9/istep_10_1.c new file mode 100644 index 00000000000..6f9d73c047d --- /dev/null +++ b/src/soc/ibm/power9/istep_10_1.c @@ -0,0 +1,778 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include + +#include +#include +#include +#include + +#include "fsi.h" +#include "homer.h" +#include "xbus.h" + +enum build_smp_adu_action { + SWITCH_AB = 1, + SWITCH_CD = 2, + QUIESCE = 4, + RESET_SWITCH = 8 +}; + +enum adu_op { + PB_DIS_OPER, // pbop.disable_all + PMISC_OPER, // pmisc switch + PRE_SWITCH_CD, // do not issue PB command, pre-set for switch CD operation + PRE_SWITCH_AB, // do not issue PB command, pre-set for switch AB operation + POST_SWITCH // do not issue PB command, clear switch CD/AB flags +}; + +enum sbe_memory_access_flags { + SBE_MEM_ACCESS_FLAGS_TARGET_PROC = 0x00000001, + SBE_MEM_ACCESS_FLAGS_PB_DIS_MODE = 0x00000400, + SBE_MEM_ACCESS_FLAGS_SWITCH_MODE = 0x00000800, + SBE_MEM_ACCESS_FLAGS_PRE_SWITCH_CD_MODE = 0x00002000, + SBE_MEM_ACCESS_FLAGS_PRE_SWITCH_AB_MODE = 0x00004000, + SBE_MEM_ACCESS_FLAGS_POST_SWITCH_MODE = 0x00008000, +}; + +enum { + PU_ALTD_ADDR_REG = 0x00090000, + + PU_SND_MODE_REG = 0x00090021, + PU_SND_MODE_REG_PB_STOP = 22, + PU_SND_MODE_REG_ENABLE_PB_SWITCH_AB = 30, + PU_SND_MODE_REG_ENABLE_PB_SWITCH_CD = 31, + + PU_ALTD_CMD_REG = 0x00090001, + PU_ALTD_CMD_REG_FBC_START_OP = 2, + PU_ALTD_CMD_REG_FBC_CLEAR_STATUS = 3, + PU_ALTD_CMD_REG_FBC_RESET_FSM = 4, + PU_ALTD_CMD_REG_FBC_AXTYPE = 6, + PU_ALTD_CMD_REG_FBC_LOCKED = 11, + PU_ALTD_CMD_REG_FBC_SCOPE = 16, + PU_ALTD_CMD_REG_FBC_SCOPE_LEN = 3, + PU_ALTD_CMD_REG_FBC_DROP_PRIORITY = 20, + PU_ALTD_CMD_REG_FBC_OVERWRITE_PBINIT = 22, + PU_ALTD_CMD_REG_FBC_WITH_TM_QUIESCE = 24, + PU_ALTD_CMD_REG_FBC_TTYPE = 25, + PU_ALTD_CMD_REG_FBC_TTYPE_LEN = 7, + PU_ALTD_CMD_REG_FBC_TSIZE = 32, + PU_ALTD_CMD_REG_FBC_TSIZE_LEN = 8, + + ALTD_CMD_TTYPE_PB_OPER = 0x3F, + ALTD_CMD_TTYPE_PMISC_OPER = 0x31, + ALTD_CMD_PMISC_TSIZE_1 = 2, // PMISC SWITCH + ALTD_CMD_SCOPE_SYSTEM = 5, + ALTD_CMD_PB_DIS_OPERATION_TSIZE = 8, + + PU_ALTD_STATUS_REG = 0x00090003, + PU_ALTD_STATUS_REG_FBC_ALTD_BUSY = 0, + PU_ALTD_STATUS_REG_FBC_WAIT_CMD_ARBIT = 1, + PU_ALTD_STATUS_REG_FBC_ADDR_DONE = 2, + PU_ALTD_STATUS_REG_FBC_DATA_DONE = 3, + PU_ALTD_STATUS_REG_FBC_WAIT_RESP = 4, + PU_ALTD_STATUS_REG_FBC_OVERRUN_ERROR = 5, + PU_ALTD_STATUS_REG_FBC_AUTOINC_ERROR = 6, + PU_ALTD_STATUS_REG_FBC_COMMAND_ERROR = 7, + PU_ALTD_STATUS_REG_FBC_ADDRESS_ERROR = 8, + PU_ALTD_STATUS_REG_FBC_PBINIT_MISSING = 18, + PU_ALTD_STATUS_REG_FBC_ECC_CE = 48, + PU_ALTD_STATUS_REG_FBC_ECC_UE = 49, + PU_ALTD_STATUS_REG_FBC_ECC_SUE = 50, + + PU_ALTD_OPTION_REG = 0x00090002, + PU_ALTD_OPTION_REG_FBC_WITH_PRE_QUIESCE = 23, + PU_ALTD_OPTION_REG_FBC_AFTER_QUIESCE_WAIT_COUNT = 28, + PU_ALTD_OPTION_REG_FBC_AFTER_QUIESCE_WAIT_COUNT_LEN = 20, + PU_ALTD_OPTION_REG_FBC_WITH_POST_INIT = 51, + PU_ALTD_OPTION_REG_FBC_ALTD_HW397129 = 52, + PU_ALTD_OPTION_REG_FBC_BEFORE_INIT_WAIT_COUNT = 54, + PU_ALTD_OPTION_REG_FBC_BEFORE_INIT_WAIT_COUNT_LEN = 10, + + PU_ALTD_DATA_REG = 0x00090004, + + PU_PB_CENT_SM0_PB_CENT_MODE = 0x05011C0A, + + P9_BUILD_SMP_NUM_SHADOWS = 3, + + PU_PB_WEST_SM0_PB_WEST_HP_MODE_CURR = 0x0501180C, + PU_PB_CENT_SM0_PB_CENT_HP_MODE_CURR = 0x05011C0C, + PU_PB_EAST_HP_MODE_CURR = 0x0501200C, + + PU_PB_WEST_SM0_PB_WEST_HP_MODE_NEXT = 0x0501180B, + PU_PB_CENT_SM0_PB_CENT_HP_MODE_NEXT = 0x05011C0B, + PU_PB_EAST_HP_MODE_NEXT = 0x0501200B, + + PU_PB_WEST_SM0_PB_WEST_HPX_MODE_CURR = 0x05011810, + PU_PB_CENT_SM0_PB_CENT_HPX_MODE_CURR = 0x05011C10, + PU_PB_EAST_HPX_MODE_CURR = 0x05012010, + + PU_PB_WEST_SM0_PB_WEST_HPX_MODE_NEXT = 0x0501180F, + PU_PB_CENT_SM0_PB_CENT_HPX_MODE_NEXT = 0x05011C0F, + PU_PB_EAST_HPX_MODE_NEXT = 0x0501200F, + + PU_PB_WEST_SM0_PB_WEST_HPA_MODE_CURR = 0x0501180E, + PU_PB_CENT_SM0_PB_CENT_HPA_MODE_CURR = 0x05011C0E, + PU_PB_EAST_HPA_MODE_CURR = 0x0501200E, + + PU_PB_WEST_SM0_PB_WEST_HPA_MODE_NEXT = 0x0501180D, + PU_PB_CENT_SM0_PB_CENT_HPA_MODE_NEXT = 0x05011C0D, + PU_PB_EAST_HPA_MODE_NEXT = 0x0501200D, +}; + +/* HP (HotPlug Mode Register) */ +static const uint64_t PB_HP_MODE_CURR_SHADOWS[P9_BUILD_SMP_NUM_SHADOWS] = { + PU_PB_WEST_SM0_PB_WEST_HP_MODE_CURR, + PU_PB_CENT_SM0_PB_CENT_HP_MODE_CURR, + PU_PB_EAST_HP_MODE_CURR +}; +static const uint64_t PB_HP_MODE_NEXT_SHADOWS[P9_BUILD_SMP_NUM_SHADOWS] = { + PU_PB_WEST_SM0_PB_WEST_HP_MODE_NEXT, + PU_PB_CENT_SM0_PB_CENT_HP_MODE_NEXT, + PU_PB_EAST_HP_MODE_NEXT +}; + +/* HPX (Hotplug Mode Register Extension) */ +static const uint64_t PB_HPX_MODE_CURR_SHADOWS[P9_BUILD_SMP_NUM_SHADOWS] = { + PU_PB_WEST_SM0_PB_WEST_HPX_MODE_CURR, + PU_PB_CENT_SM0_PB_CENT_HPX_MODE_CURR, + PU_PB_EAST_HPX_MODE_CURR +}; +static const uint64_t PB_HPX_MODE_NEXT_SHADOWS[P9_BUILD_SMP_NUM_SHADOWS] = { + PU_PB_WEST_SM0_PB_WEST_HPX_MODE_NEXT, + PU_PB_CENT_SM0_PB_CENT_HPX_MODE_NEXT, + PU_PB_EAST_HPX_MODE_NEXT +}; + +/* HPA */ +static const uint64_t PB_HPA_MODE_CURR_SHADOWS[P9_BUILD_SMP_NUM_SHADOWS] = { + PU_PB_WEST_SM0_PB_WEST_HPA_MODE_CURR, + PU_PB_CENT_SM0_PB_CENT_HPA_MODE_CURR, + PU_PB_EAST_HPA_MODE_CURR +}; +static const uint64_t PB_HPA_MODE_NEXT_SHADOWS[P9_BUILD_SMP_NUM_SHADOWS] = { + PU_PB_WEST_SM0_PB_WEST_HPA_MODE_NEXT, + PU_PB_CENT_SM0_PB_CENT_HPA_MODE_NEXT, + PU_PB_EAST_HPA_MODE_NEXT +}; + +/* + * SCOM registers in this function are not documented. SCOM addresses that start with 0x9 + * are form 1 indirect addresses (bit 3 is set in this case) despite + * documentation ("1.2.2 PCB Address Space" section) not mentioning this form. + */ +static void p9_fbc_cd_hp1_scom(uint8_t chip, bool is_xbus_active) +{ + const struct powerbus_cfg *pb_cfg = powerbus_cfg(); + const uint32_t pb_freq_mhz = pb_cfg->fabric_freq; + + /* Frequency of XBus for Nimbus DD2 */ + const uint32_t xbus_freq_mhz = 2000; + + uint64_t val; + uint64_t tmp; + + val = PPC_PLACE(is_xbus_active ? 0x08 : 0x06, 54, 5) | PPC_PLACE(0x03, 59, 5); + put_scom(chip, 0x90000CB205012011, val); + + tmp = 0; + if (100 * xbus_freq_mhz >= 120 * pb_freq_mhz) + tmp = 0x09; + else if (100 * xbus_freq_mhz >= 100 * pb_freq_mhz) + tmp = 0x0A; + else if (105 * xbus_freq_mhz >= 100 * pb_freq_mhz) + tmp = 0x0B; + else if (125 * xbus_freq_mhz >= 100 * pb_freq_mhz) + tmp = 0x0C; + val = PPC_PLACE(tmp, 54, 5) | PPC_PLACE(3, 59, 5); + put_scom(chip, 0x90000CB305012011, val); + + val = PPC_PLACE(0x10, 51, 5) | PPC_PLACE(2, 58, 2) + | PPC_PLACE(is_xbus_active ? 0xF : 0x8, 60, 4); + put_scom(chip, 0x90000CDB05011C11, val); + + val = PPC_PLACE(7, 49, 3) | PPC_PLACE(4, 52, 6); + put_scom(chip, 0x90000CF405011C11, val); + + val = PPC_PLACE(0xC, 45, 4) | PPC_PLACE(1, 57, 2); + put_scom(chip, 0x90000D3F05011C11, val); + + val = PPC_PLACE(3, 41, 2) | PPC_PLACE(1, 43, 2) | PPC_PLACE(3, 45, 4) + | PPC_PLACE(0xC0, 49, 8); + put_scom(chip, 0x90000D7805011C11, val); + + val = PPC_PLACE(8, 38, 4) | PPC_PLACE(4, 42, 4) | PPC_PLACE(1, 57, 3) + | PPC_PLACE(is_xbus_active ? 0xF : 0x8, 60, 4); + put_scom(chip, 0x90000DAA05011C11, val); + + val = PPC_PLACE(4, 36, 3) | PPC_PLACE(0x20, 41, 8) | PPC_BIT(49) | PPC_BIT(51) + | PPC_BIT(52) | PPC_BIT(53) | PPC_BIT(55) | PPC_BIT(56) | PPC_BIT(57) + | PPC_PLACE(is_xbus_active ? 0xF : 0x8, 60, 4); + put_scom(chip, 0x90000DCC05011C11, val); + + val = PPC_PLACE(1, 41, 3) | PPC_PLACE(1, 44, 3) | PPC_PLACE(2, 47, 3) + | PPC_PLACE(3, 50, 3) | PPC_PLACE(5, 53, 3) | PPC_PLACE(5, 57, 3); + put_scom(chip, 0x90000E0605011C11, val); + + val = PPC_PLACE(0x06, 33, 5) | PPC_PLACE(0x0D, 38, 5) | PPC_PLACE(0x1E, 48, 5) + | PPC_PLACE(0x19, 53, 5) | PPC_BIT(63); + put_scom(chip, 0x90000E4305011C11, val); + + val = PPC_PLACE(0x400, 22, 12) | PPC_PLACE(0x400, 34, 12) + | PPC_PLACE(2, 46, 3) | PPC_PLACE(2, 49, 3) | PPC_PLACE(2, 52, 3) + | PPC_PLACE(2, 55, 3) | PPC_PLACE(2, 58, 3) | PPC_PLACE(2, 61, 3); + put_scom(chip, 0x90000EA205011C11, val); + + /* 44 - set because ATTR_CHIP_EC_FEATURE_HW409019 == 1 */ + val = PPC_PLACE(0x0C, 20, 8) | PPC_BIT(44); + put_scom(chip, 0x90000EC705011C11, val); + + val = PPC_PLACE(0x4, 18, 10) | PPC_PLACE(0x141, 28, 12) | PPC_PLACE(0x21B, 40, 12) + | PPC_PLACE(0x30D, 52, 12); + put_scom(chip, 0x90000EE105011C11, val); + + val = PPC_PLACE(1, 25, 3) | PPC_PLACE(1, 28, 3) | PPC_PLACE(2, 31, 3) + | PPC_PLACE(3, 34, 3) | PPC_PLACE(5, 37, 3) | PPC_PLACE(1, 49, 3) + | PPC_PLACE(1, 52, 3) | PPC_PLACE(2, 55, 3) | PPC_PLACE(3, 58, 3) + | PPC_PLACE(5, 61, 3); + put_scom(chip, 0x90000F0505011C11, val); + + val = PPC_PLACE(0x7, 14, 10) | PPC_PLACE(0x5, 24, 10) | PPC_PLACE(0x5, 34, 10) + | PPC_PLACE(0x4, 44, 10) | PPC_PLACE(0x5, 54, 10); + put_scom(chip, 0x90000F2005011C11, val); + + val = PPC_BIT(20) | PPC_PLACE(3, 32, 2) | PPC_PLACE(7, 34, 3) | PPC_PLACE(3, 37, 2) + | PPC_PLACE(1, 41, 1) | PPC_PLACE(1, 42, 1); + if (pb_cfg->core_ceiling_ratio != FABRIC_CORE_CEILING_RATIO_RATIO_8_8) + val |= PPC_PLACE(3, 24, 2) | PPC_PLACE(3, 44, 2); + tmp = (pb_cfg->core_ceiling_ratio == FABRIC_CORE_CEILING_RATIO_RATIO_8_8 ? 3 : 2); + val |= PPC_PLACE(tmp, 28, 2); + put_scom(chip, 0x90000F4005011811, val); + put_scom(chip, 0x90000F4005012011, val); + + val = PPC_BIT(12) | PPC_PLACE(4, 13, 4) | PPC_PLACE(4, 17, 4) | PPC_PLACE(4, 21, 4) + | PPC_PLACE(1, 25, 3) | PPC_PLACE(1, 28, 3) | PPC_PLACE(1, 31, 3) + | PPC_PLACE(0xFE, 34, 8) | PPC_PLACE(0xFE, 42, 8) | PPC_PLACE(1, 50, 2) + | PPC_PLACE(2, 54, 3) | PPC_PLACE(2, 57, 2) | PPC_BIT(60) | PPC_BIT(61) + | PPC_BIT(63); + put_scom(chip, 0x90000F4D05011C11, val); + + val = PPC_BIT(35) | PPC_PLACE(1, 36, 2) | PPC_PLACE(2, 39, 2) | PPC_BIT(49) + | PPC_PLACE(1, 51, 2); + + if (pb_cfg->core_floor_ratio == FABRIC_CORE_FLOOR_RATIO_RATIO_2_8) + tmp = 3; + else if (pb_cfg->core_floor_ratio == FABRIC_CORE_FLOOR_RATIO_RATIO_4_8) + tmp = 2; + else + tmp = 1; + val |= PPC_PLACE(tmp, 41, 2); + + if (pb_cfg->core_floor_ratio == FABRIC_CORE_FLOOR_RATIO_RATIO_2_8) + tmp = 0; + else if (pb_cfg->core_floor_ratio == FABRIC_CORE_FLOOR_RATIO_RATIO_4_8) + tmp = 3; + else + tmp = 2; + val |= PPC_PLACE(tmp, 44, 2); + + put_scom(chip, 0x90000E6105011811, val); + put_scom(chip, 0x90000E6105012011, val); +} + +/* + * SCOM registers in this function are not documented. SCOM addresses that start with 0x9 + * are form 1 indirect addresses (bit 3 is set in this case) despite + * documentation ("1.2.2 PCB Address Space" section) not mentioning this form. + */ +static void p9_fbc_cd_hp23_scom(uint8_t chip, bool is_xbus_active, int seq) +{ + const uint64_t tmp = (seq == 2); + + uint64_t val; + + val = PPC_PLACE(8, 38, 4) | PPC_PLACE(4, 42, 4) | PPC_PLACE(tmp, 50, 1) + | PPC_PLACE(1, 57, 3) | PPC_PLACE((seq == 2 && is_xbus_active) ? 0xF : 0x8, 60, 4); + put_scom(chip, 0x90000DAA05011C11, val); + + val = PPC_BIT(12) | PPC_PLACE(4, 13, 4) | PPC_PLACE(4, 17, 4) | PPC_PLACE(4, 21, 4) + | PPC_PLACE(1, 25, 3) | PPC_PLACE(1, 28, 3) | PPC_PLACE(1, 31, 3) + | PPC_PLACE(0xFE, 34, 8) | PPC_PLACE(0xFE, 42, 8) | PPC_PLACE(1, 50, 2) + | PPC_PLACE(2, 54, 3) | PPC_PLACE(2, 57, 2) | PPC_PLACE(tmp, 59, 1) + | PPC_PLACE(tmp, 60, 1) | PPC_BIT(61) | PPC_BIT(63); + put_scom(chip, 0x90000F4D05011C11, val); +} + +/* Set action which will occur on fabric pmisc switch command */ +static void p9_adu_coherent_utils_set_switch_action(uint8_t chip, enum adu_op adu_op) +{ + uint64_t mask = PPC_BIT(PU_SND_MODE_REG_ENABLE_PB_SWITCH_AB) + | PPC_BIT(PU_SND_MODE_REG_ENABLE_PB_SWITCH_CD); + + uint64_t data = 0; + if (adu_op == PRE_SWITCH_AB) + data |= PPC_BIT(PU_SND_MODE_REG_ENABLE_PB_SWITCH_AB); + if (adu_op == PRE_SWITCH_CD) + data |= PPC_BIT(PU_SND_MODE_REG_ENABLE_PB_SWITCH_CD); + + and_or_scom(chip, PU_SND_MODE_REG, ~mask, data); +} + +static void p9_adu_coherent_utils_check_fbc_state(uint8_t chip) +{ + /* PU_PB_CENT_SM0_PB_CENT_MODE_PB_CENT_PBIXXX_INIT */ + if (!(get_scom(chip, PU_PB_CENT_SM0_PB_CENT_MODE) & PPC_BIT(0))) + die("FBC isn't initialized!\n"); + + if (get_scom(chip, PU_SND_MODE_REG) & PPC_BIT(PU_SND_MODE_REG_PB_STOP)) + die("FBC isn't running!\n"); +} + +static void lock_adu(uint8_t chip) +{ + uint64_t data = 0; + + /* Configuring lock manipulation control data buffer to perform lock acquisition */ + data |= PPC_BIT(PU_ALTD_CMD_REG_FBC_LOCKED); + data |= PPC_BIT(PU_ALTD_CMD_REG_FBC_RESET_FSM); + data |= PPC_BIT(PU_ALTD_CMD_REG_FBC_CLEAR_STATUS); + + /* Write ADU command register to attempt lock manipulation */ + put_scom(chip, PU_ALTD_CMD_REG, data); +} + +/* Setup the value for ADU option register to enable quiesce & init around a + * switch operation */ +static void set_quiesce_init(uint8_t chip) +{ + enum { + QUIESCE_SWITCH_WAIT_COUNT = 128, + INIT_SWITCH_WAIT_COUNT = 128, + }; + + uint64_t data = 0; + + /* Setup quiesce */ + data |= PPC_BIT(PU_ALTD_OPTION_REG_FBC_WITH_PRE_QUIESCE); + PPC_INSERT(data, QUIESCE_SWITCH_WAIT_COUNT, + PU_ALTD_OPTION_REG_FBC_AFTER_QUIESCE_WAIT_COUNT, + PU_ALTD_OPTION_REG_FBC_AFTER_QUIESCE_WAIT_COUNT_LEN); + + /* Setup post-command init */ + data |= PPC_BIT(PU_ALTD_OPTION_REG_FBC_WITH_POST_INIT); + PPC_INSERT(data, INIT_SWITCH_WAIT_COUNT, + PU_ALTD_OPTION_REG_FBC_BEFORE_INIT_WAIT_COUNT, + PU_ALTD_OPTION_REG_FBC_BEFORE_INIT_WAIT_COUNT_LEN); + + /* Setup workaround for HW397129 to re-enable fastpath for DD2 */ + data |= PPC_BIT(PU_ALTD_OPTION_REG_FBC_ALTD_HW397129); + + put_scom(chip, PU_ALTD_OPTION_REG, data); +} + +static void p9_adu_coherent_setup_adu(uint8_t chip, enum adu_op adu_op) +{ + uint64_t cmd = 0x0; + uint32_t ttype = 0; + uint32_t tsize = 0; + + /* Write the address. Not sure if operations we support actually need + * this. */ + put_scom(chip, PU_ALTD_ADDR_REG, 0); + + /* This routine assumes the lock is held by the caller, preserve this + * locked state */ + cmd |= PPC_BIT(PU_ALTD_CMD_REG_FBC_LOCKED); + + if (adu_op == PB_DIS_OPER || adu_op == PMISC_OPER) { + cmd |= PPC_BIT(PU_ALTD_CMD_REG_FBC_START_OP); + + PPC_INSERT(cmd, ALTD_CMD_SCOPE_SYSTEM, + PU_ALTD_CMD_REG_FBC_SCOPE, PU_ALTD_CMD_REG_FBC_SCOPE_LEN); + + /* DROP_PRIORITY = HIGH */ + cmd |= PPC_BIT(PU_ALTD_CMD_REG_FBC_DROP_PRIORITY); + /* AXTYPE = Address only */ + cmd |= PPC_BIT(PU_ALTD_CMD_REG_FBC_AXTYPE); + cmd |= PPC_BIT(PU_ALTD_CMD_REG_FBC_WITH_TM_QUIESCE); + + if (adu_op == PB_DIS_OPER) { + ttype = ALTD_CMD_TTYPE_PB_OPER; + tsize = ALTD_CMD_PB_DIS_OPERATION_TSIZE; + } else { + ttype = ALTD_CMD_TTYPE_PMISC_OPER; + tsize = ALTD_CMD_PMISC_TSIZE_1; + + /* Set quiesce and init around a switch operation in option reg */ + set_quiesce_init(chip); + } + } + + PPC_INSERT(cmd, ttype, PU_ALTD_CMD_REG_FBC_TTYPE, PU_ALTD_CMD_REG_FBC_TTYPE_LEN); + PPC_INSERT(cmd, tsize, PU_ALTD_CMD_REG_FBC_TSIZE, PU_ALTD_CMD_REG_FBC_TSIZE_LEN); + + put_scom(chip, PU_ALTD_CMD_REG, cmd); +} + +static void p9_adu_setup(uint8_t chip, enum adu_op adu_op) +{ + /* Don't generate fabric command, just pre-condition ADU for upcoming switch */ + if (adu_op == PRE_SWITCH_AB || adu_op == PRE_SWITCH_CD || adu_op == POST_SWITCH) { + p9_adu_coherent_utils_set_switch_action(chip, adu_op); + return; + } + + /* Ensure fabric is running */ + p9_adu_coherent_utils_check_fbc_state(chip); + + /* + * Acquire ADU lock to guarantee exclusive use of the ADU resources. + * ADU state machine will be reset/cleared by this routine. + */ + lock_adu(chip); + + /* Setup the ADU registers for operation */ + p9_adu_coherent_setup_adu(chip, adu_op); +} + +static void p9_adu_coherent_status_check(uint8_t chip, bool is_addr_only) +{ + int i; + uint64_t status; + + //Check for a successful status 10 times + for (i = 0; i < 10; i++) { + status = get_scom(chip, PU_ALTD_STATUS_REG); + + if (!(status & PPC_BIT(PU_ALTD_STATUS_REG_FBC_ALTD_BUSY))) + break; + + /* Delay to allow the write/read/other command to finish */ + udelay(1); // actually need only 100ns, so delaying at the bottom + } + + if (!(status & PPC_BIT(PU_ALTD_STATUS_REG_FBC_ADDR_DONE))) + die("The address portion of ADU operation is not complete!\n"); + if (!is_addr_only && !(status & PPC_BIT(PU_ALTD_STATUS_REG_FBC_DATA_DONE))) + die("The data portion of ADU operation is not complete!\n"); + + if (status & PPC_BIT(PU_ALTD_STATUS_REG_FBC_WAIT_CMD_ARBIT)) + die("ADU is still waiting for command arbitrage!\n"); + if (status & PPC_BIT(PU_ALTD_STATUS_REG_FBC_WAIT_RESP)) + die("ADU is still waiting for a clean combined response!\n"); + if (status & PPC_BIT(PU_ALTD_STATUS_REG_FBC_OVERRUN_ERROR)) + die("ADU data overrun!\n"); + if (status & PPC_BIT(PU_ALTD_STATUS_REG_FBC_AUTOINC_ERROR)) + die("Internal ADU address counter rolled over the 0.5M boundary!\n"); + if (status & PPC_BIT(PU_ALTD_STATUS_REG_FBC_COMMAND_ERROR)) + die("New ADU command was issued before previous one finished!\n"); + if (status & PPC_BIT(PU_ALTD_STATUS_REG_FBC_ADDRESS_ERROR)) + die("Invalid ADU Address!\n"); + if (status & PPC_BIT(PU_ALTD_STATUS_REG_FBC_PBINIT_MISSING)) + die("Attempt to start an ADU command without pb_init active!\n"); + if (status & PPC_BIT(PU_ALTD_STATUS_REG_FBC_ECC_CE)) + die("ECC Correctable error from ADU!\n"); + if (status & PPC_BIT(PU_ALTD_STATUS_REG_FBC_ECC_UE)) + die("ECC Uncorrectable error from ADU!\n"); + if (status & PPC_BIT(PU_ALTD_STATUS_REG_FBC_ECC_SUE)) + die("ECC Special Uncorrectable error!\n"); + + if (i == 10) + die("ADU is busy for too long with status: 0x%016llx!\n", status); +} + +static void p9_adu_access(uint8_t chip, enum adu_op adu_op) +{ + const bool is_addr_only = (adu_op == PB_DIS_OPER || adu_op == PMISC_OPER); + + /* Don't generate fabric command */ + if (adu_op == PRE_SWITCH_AB || adu_op == PRE_SWITCH_CD || adu_op == POST_SWITCH) + return; + + if (is_addr_only) { + udelay(10); + } else { + put_scom(chip, PU_ALTD_DATA_REG, 0); + or_scom(chip, PU_ALTD_CMD_REG, PPC_BIT(PU_ALTD_CMD_REG_FBC_START_OP)); + + /* If it's not a cache inhibit operation, we just want to delay + * for a while and then it's done */ + udelay(10); + } + + /* We expect the busy bit to be cleared */ + p9_adu_coherent_status_check(chip, is_addr_only); + + /* If it's the last read/write cleanup the ADU */ + put_scom(chip, PU_ALTD_CMD_REG, 0); +} + +/* We don't write any specific data to ADU, just execute an action on it */ +static void p9_putmemproc(uint8_t chip, uint32_t mem_flags) +{ + enum adu_op adu_op; + + if (mem_flags & SBE_MEM_ACCESS_FLAGS_PB_DIS_MODE) + adu_op = PB_DIS_OPER; + else if (mem_flags & SBE_MEM_ACCESS_FLAGS_SWITCH_MODE) + adu_op = PMISC_OPER; + else if (mem_flags & SBE_MEM_ACCESS_FLAGS_PRE_SWITCH_CD_MODE) + adu_op = PRE_SWITCH_CD; + else if (mem_flags & SBE_MEM_ACCESS_FLAGS_PRE_SWITCH_AB_MODE) + adu_op = PRE_SWITCH_AB; + else if (mem_flags & SBE_MEM_ACCESS_FLAGS_POST_SWITCH_MODE) + adu_op = POST_SWITCH; + else + die("Invalid ADU putmem flags."); + + p9_adu_setup(chip, adu_op); + p9_adu_access(chip, adu_op); +} + +static void p9_build_smp_adu_set_switch_action(uint8_t chip, enum build_smp_adu_action action) +{ + uint32_t flags = SBE_MEM_ACCESS_FLAGS_TARGET_PROC; + + if (action == SWITCH_AB) + flags |= SBE_MEM_ACCESS_FLAGS_PRE_SWITCH_AB_MODE; + else if (action == SWITCH_CD) + flags |= SBE_MEM_ACCESS_FLAGS_PRE_SWITCH_CD_MODE; + else + flags |= SBE_MEM_ACCESS_FLAGS_POST_SWITCH_MODE; + + return p9_putmemproc(chip, flags); +} + +static void p9_build_smp_sequence_adu(uint8_t chips, enum build_smp_adu_action action) +{ + uint32_t flags = SBE_MEM_ACCESS_FLAGS_TARGET_PROC; + + switch (action) { + case SWITCH_AB: + case SWITCH_CD: + flags |= SBE_MEM_ACCESS_FLAGS_SWITCH_MODE; + break; + case QUIESCE: + flags |= SBE_MEM_ACCESS_FLAGS_PB_DIS_MODE; + break; + case RESET_SWITCH: + die("RESET_SWITCH is not a valid ADU action to request\n"); + } + + /* + * Condition for hotplug switch operation. All chips which were not + * quiesced prior to switch AB will need to observe the switch. + */ + if (action != QUIESCE) { + for (int chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) + p9_build_smp_adu_set_switch_action(chip, action); + } + } + + if (action == SWITCH_CD || action == SWITCH_AB) + p9_putmemproc(/*chip=*/0, flags); + if ((action == SWITCH_CD || action == QUIESCE) && (chips & 0x02)) + p9_putmemproc(/*chip=*/1, flags); + + if (action != QUIESCE) { + /* Operation complete, reset switch controls */ + for (int chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) + p9_build_smp_adu_set_switch_action(chip, RESET_SWITCH); + } + } +} + +static void p9_fbc_ab_hp_scom(uint8_t chip, bool is_xbus_active) +{ + const struct powerbus_cfg *pb_cfg = powerbus_cfg(); + const uint32_t pb_freq_mhz = pb_cfg->fabric_freq; + + /* Frequency of XBus for Nimbus DD2 */ + const uint32_t xbus_freq_mhz = 2000; + + const bool hw407123 = (get_dd() <= 0x20); + + const bool is_fabric_master = (chip == 0); + const uint8_t attached_chip = (is_xbus_active && chip == 0 ? 1 : 0); + + const uint64_t cmd_rate_4b_r = ((6 * pb_freq_mhz) % xbus_freq_mhz); + + const uint64_t cmd_rate_d = xbus_freq_mhz; + const uint64_t cmd_rate_4b_n = (6 * pb_freq_mhz); + + for (uint8_t i = 0; i < P9_BUILD_SMP_NUM_SHADOWS; i++) { + uint64_t val; + uint64_t tmp; + + /* *_HP_MODE_NEXT */ + + val = get_scom(chip, PB_HP_MODE_NEXT_SHADOWS[i]); + + if (!is_fabric_master) { + val &= ~PPC_BIT(0); // PB_COM_PB_CFG_MASTER_CHIP_NEXT_OFF + val &= ~PPC_BIT(1); // PB_COM_PB_CFG_TM_MASTER_NEXT_OFF + } + + val &= ~PPC_BIT(2); // PB_COM_PB_CFG_CHG_RATE_GP_MASTER_NEXT_OFF + + if (is_fabric_master) + val |= PPC_BIT(3); // PB_COM_PB_CFG_CHG_RATE_SP_MASTER_NEXT_ON + else + val &= ~PPC_BIT(3); // PB_COM_PB_CFG_CHG_RATE_SP_MASTER_NEXT_OFF + + val &= ~PPC_BIT(29); // PB_COM_PB_CFG_HOP_MODE_NEXT_OFF + + put_scom(chip, PB_HP_MODE_NEXT_SHADOWS[i], val); + + /* *_HPX_MODE_NEXT */ + + val = get_scom(chip, PB_HPX_MODE_NEXT_SHADOWS[i]); + + PPC_INSERT(val, is_xbus_active, 1, 1); // PB_COM_PB_CFG_LINK_X1_EN_NEXT + PPC_INSERT(val, attached_chip, 19, 3); // PB_COM_PB_CFG_LINK_X1_CHIPID_NEXT_ID + + val |= PPC_BIT(49); // PB_COM_PB_CFG_X_INDIRECT_EN_NEXT_ON + val |= PPC_BIT(50); // PB_COM_PB_CFG_X_GATHER_ENABLE_NEXT_ON + + if (cmd_rate_4b_r != 0 && hw407123) + tmp = (cmd_rate_4b_n / cmd_rate_d) + 3; + else if (cmd_rate_4b_r == 0 && hw407123) + tmp = (cmd_rate_4b_n / cmd_rate_d) + 2; + else if (cmd_rate_4b_r != 0) + tmp = (cmd_rate_4b_n / cmd_rate_d); + else + tmp = (cmd_rate_4b_n / cmd_rate_d) - 1; + PPC_INSERT(val, tmp, 56, 8); + + put_scom(chip, PB_HPX_MODE_NEXT_SHADOWS[i], val); + } +} + +static uint64_t p9_build_smp_get_hp_ab_shadow(uint8_t chip, const uint64_t shadow_regs[]) +{ + uint64_t last_data = 0; + + for (uint8_t i = 0; i < P9_BUILD_SMP_NUM_SHADOWS; i++) { + const uint64_t data = get_scom(chip, shadow_regs[i]); + + /* Check consistency of west/center/east register copies while + * reading them */ + if (i != 0 && data != last_data) + die("Values in shadow registers differ!\n"); + + last_data = data; + } + + return last_data; +} + +static void p9_build_smp_set_hp_ab_shadow(uint8_t chip, const uint64_t shadow_regs[], + uint64_t data) +{ + for (uint8_t i = 0; i < P9_BUILD_SMP_NUM_SHADOWS; i++) + put_scom(chip, shadow_regs[i], data); +} + +static void p9_build_smp_copy_hp_ab_next_curr(uint8_t chip) +{ + /* Read NEXT */ + uint64_t hp_mode_data = p9_build_smp_get_hp_ab_shadow(chip, PB_HP_MODE_NEXT_SHADOWS); + uint64_t hpx_mode_data = p9_build_smp_get_hp_ab_shadow(chip, PB_HPX_MODE_NEXT_SHADOWS); + uint64_t hpa_mode_data = p9_build_smp_get_hp_ab_shadow(chip, PB_HPA_MODE_NEXT_SHADOWS); + + /* Write CURR */ + p9_build_smp_set_hp_ab_shadow(chip, PB_HP_MODE_CURR_SHADOWS, hp_mode_data); + p9_build_smp_set_hp_ab_shadow(chip, PB_HPX_MODE_CURR_SHADOWS, hpx_mode_data); + p9_build_smp_set_hp_ab_shadow(chip, PB_HPA_MODE_CURR_SHADOWS, hpa_mode_data); +} + +static void p9_build_smp_copy_hp_ab_curr_next(uint8_t chip) +{ + /* Read CURR */ + uint64_t hp_mode_data = p9_build_smp_get_hp_ab_shadow(chip, PB_HP_MODE_CURR_SHADOWS); + uint64_t hpx_mode_data = p9_build_smp_get_hp_ab_shadow(chip, PB_HPX_MODE_CURR_SHADOWS); + uint64_t hpa_mode_data = p9_build_smp_get_hp_ab_shadow(chip, PB_HPA_MODE_CURR_SHADOWS); + + /* Write NEXT */ + p9_build_smp_set_hp_ab_shadow(chip, PB_HP_MODE_NEXT_SHADOWS, hp_mode_data); + p9_build_smp_set_hp_ab_shadow(chip, PB_HPX_MODE_NEXT_SHADOWS, hpx_mode_data); + p9_build_smp_set_hp_ab_shadow(chip, PB_HPA_MODE_NEXT_SHADOWS, hpa_mode_data); +} + +static void p9_build_smp_set_fbc_ab(uint8_t chips) +{ + const bool is_xbus_active = (chips == 0x03); + + /* + * quiesce 'slave' fabrics in preparation for joining + * PHASE1 -> quiesce all chips except the chip which is the new fabric master + * PHASE2 -> quiesce all drawers except the drawer containing the new fabric master + */ + p9_build_smp_sequence_adu(chips, QUIESCE); + + /* Program NEXT register set for all chips via initfile */ + for (int chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) + p9_fbc_ab_hp_scom(chip, is_xbus_active); + } + + /* Program CURR register set only for chips which were just quiesced */ + if (chips & 0x02) + p9_build_smp_copy_hp_ab_next_curr(/*chip=*/1); + + /* + * Issue switch AB reconfiguration from chip designated as new master + * (which is guaranteed to be a master now) + */ + p9_build_smp_sequence_adu(chips, SWITCH_AB); + + /* Reset NEXT register set (copy CURR->NEXT) for all chips */ + for (int chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) + p9_build_smp_copy_hp_ab_curr_next(chip); + } +} + +static void p9_build_smp(uint8_t chips) +{ + const bool is_xbus_active = (chips == 0x03); + + /* Apply three CD hotplug sequences to each chip to initialize SCOM + * chains */ + for (int seq = 1; seq <= 3; seq++) { + for (int chip = 0; chip < MAX_CHIPS; chip++) { + if (!(chips & (1 << chip))) + continue; + + if (seq == 1) + p9_fbc_cd_hp1_scom(chip, is_xbus_active); + else + p9_fbc_cd_hp23_scom(chip, is_xbus_active, seq); + } + + /* Issue switch CD on all chips to force updates to occur */ + p9_build_smp_sequence_adu(chips, SWITCH_CD); + } + + p9_build_smp_set_fbc_ab(chips); +} + +void istep_10_1(uint8_t chips) +{ + printk(BIOS_EMERG, "starting istep 10.1\n"); + report_istep(10,1); + + p9_build_smp(chips); + + if (chips & 0x02) { + /* Sanity check that XSCOM works for the second CPU */ + if (read_rscom(1, 0xF000F) == 0xFFFFFFFFFFFFFFFF) + die("XSCOM doesn't work for the second CPU\n"); + + fsi_reset_pib2opb(/*chip=*/1); + } + + printk(BIOS_EMERG, "ending istep 10.1\n"); +} diff --git a/src/soc/ibm/power9/romstage.c b/src/soc/ibm/power9/romstage.c index 02bffd82e67..ca2cc48927a 100644 --- a/src/soc/ibm/power9/romstage.c +++ b/src/soc/ibm/power9/romstage.c @@ -377,6 +377,7 @@ void main(void) istep_9_6(chips); istep_9_7(chips); + istep_10_1(chips); istep_10_10(&phb_active_mask, iovalid_enable); istep_10_12(); istep_10_13(); From 88ec88a54e8e16e61a451cef945f5acf5b4e6b73 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Sun, 3 Jul 2022 00:52:39 +0300 Subject: [PATCH 126/213] cpu/power9/scom.c: handle form 1 of indirect SCOM It's write-only and doesn't even return any status unlike form 0. Change-Id: Ie8242c64fd554cfe00ad0a6a8769ef5364094d90 Signed-off-by: Sergii Dmytruk --- src/cpu/power9/scom.c | 63 +++++++++++++++++++++++++++++------- src/include/cpu/power/scom.h | 28 ++-------------- 2 files changed, 54 insertions(+), 37 deletions(-) diff --git a/src/cpu/power9/scom.c b/src/cpu/power9/scom.c index 4ec762cf5cd..a723620c3fb 100644 --- a/src/cpu/power9/scom.c +++ b/src/cpu/power9/scom.c @@ -4,6 +4,8 @@ #include // HMER #include +#define XSCOM_ADDR_IND_FLAG PPC_BIT(0) +#define XSCOM_DATA_IND_FORM1 PPC_BIT(3) #define XSCOM_ADDR_IND_ADDR PPC_BITMASK(11, 31) #define XSCOM_ADDR_IND_DATA PPC_BITMASK(48, 63) @@ -14,6 +16,10 @@ #define XSCOM_DATA_IND_FORM1_DATA PPC_BITMASK(12, 63) #define XSCOM_IND_MAX_RETRIES 10 +#define XSCOM_IND_FORM1_ADDR PPC_BITMASK(32, 63) +#define XSCOM_IND_FORM1_DATA_FROM_ADDR PPC_BITMASK(0, 11) +#define XSCOM_IND_FORM1_DATA_IN_ADDR PPC_BITMASK(20, 31) + #define XSCOM_RCVED_STAT_REG 0x00090018 #define XSCOM_LOG_REG 0x00090012 #define XSCOM_ERR_REG 0x00090013 @@ -26,14 +32,14 @@ static void reset_scom_engine(uint8_t chip) * necessary to do the remote writes in assembly directly to skip checking * HMER and possibly end in a loop. */ - write_scom_direct(0, XSCOM_RCVED_STAT_REG, 0); - write_scom_direct(chip, XSCOM_LOG_REG, 0); - write_scom_direct(chip, XSCOM_ERR_REG, 0); + write_rscom(0, XSCOM_RCVED_STAT_REG, 0); + write_rscom(chip, XSCOM_LOG_REG, 0); + write_rscom(chip, XSCOM_ERR_REG, 0); clear_hmer(); eieio(); } -uint64_t read_scom_direct(uint8_t chip, uint64_t reg_address) +static uint64_t read_scom_direct(uint8_t chip, uint64_t reg_address) { uint64_t val; uint64_t hmer = 0; @@ -69,7 +75,7 @@ uint64_t read_scom_direct(uint8_t chip, uint64_t reg_address) return val; } -void write_scom_direct(uint8_t chip, uint64_t reg_address, uint64_t data) +static void write_scom_direct(uint8_t chip, uint64_t reg_address, uint64_t data) { uint64_t hmer = 0; do { @@ -91,7 +97,7 @@ void write_scom_direct(uint8_t chip, uint64_t reg_address, uint64_t data) reset_scom_engine(chip); } -void write_scom_indirect(uint8_t chip, uint64_t reg_address, uint64_t value) +static void write_scom_indirect_form0(uint8_t chip, uint64_t reg_address, uint64_t value) { uint64_t addr; uint64_t data; @@ -103,9 +109,10 @@ void write_scom_indirect(uint8_t chip, uint64_t reg_address, uint64_t value) for (int retries = 0; retries < XSCOM_IND_MAX_RETRIES; ++retries) { data = read_scom_direct(chip, addr); - if ((data & XSCOM_DATA_IND_COMPLETE) && ((data & XSCOM_DATA_IND_ERR) == 0)) { + if((data & XSCOM_DATA_IND_COMPLETE) && ((data & XSCOM_DATA_IND_ERR) == 0)) { return; - } else if (data & XSCOM_DATA_IND_COMPLETE) { + } + else if(data & XSCOM_DATA_IND_COMPLETE) { printk(BIOS_EMERG, "SCOM WR error %16.16llx = %16.16llx : %16.16llx\n", reg_address, value, data); } @@ -113,7 +120,21 @@ void write_scom_indirect(uint8_t chip, uint64_t reg_address, uint64_t value) } } -uint64_t read_scom_indirect(uint8_t chip, uint64_t reg_address) +static void write_scom_indirect_form1(uint8_t chip, uint64_t reg_address, uint64_t value) +{ + uint64_t addr; + uint64_t data; + + if (value & XSCOM_IND_FORM1_DATA_FROM_ADDR) + die("Value for form 1 indirect SCOM must have bits 0-11 zeroed!"); + + data = value | ((reg_address & XSCOM_IND_FORM1_DATA_IN_ADDR) << 20); + addr = reg_address & XSCOM_IND_FORM1_ADDR; + + write_scom_direct(chip, addr, data); +} + +static uint64_t read_scom_indirect_form0(uint8_t chip, uint64_t reg_address) { uint64_t addr; uint64_t data; @@ -124,9 +145,9 @@ uint64_t read_scom_indirect(uint8_t chip, uint64_t reg_address) for (int retries = 0; retries < XSCOM_IND_MAX_RETRIES; ++retries) { data = read_scom_direct(chip, addr); - if ((data & XSCOM_DATA_IND_COMPLETE) && ((data & XSCOM_DATA_IND_ERR) == 0)) { + if((data & XSCOM_DATA_IND_COMPLETE) && ((data & XSCOM_DATA_IND_ERR) == 0)) { break; - } else if (data & XSCOM_DATA_IND_COMPLETE) { + } else if(data & XSCOM_DATA_IND_COMPLETE) { printk(BIOS_EMERG, "SCOM RD error %16.16llx : %16.16llx\n", reg_address, data); } @@ -135,3 +156,23 @@ uint64_t read_scom_indirect(uint8_t chip, uint64_t reg_address) return data & XSCOM_DATA_IND_DATA; } + +void write_rscom(uint8_t chip, uint64_t addr, uint64_t data) +{ + if (!(addr & XSCOM_ADDR_IND_FLAG)) + write_scom_direct(chip, addr, data); + else if (!(addr & XSCOM_DATA_IND_FORM1)) + write_scom_indirect_form0(chip, addr, data); + else + write_scom_indirect_form1(chip, addr, data); +} + +uint64_t read_rscom(uint8_t chip, uint64_t addr) +{ + if (!(addr & XSCOM_ADDR_IND_FLAG)) + return read_scom_direct(chip, addr); + else if (!(addr & XSCOM_DATA_IND_FORM1)) + return read_scom_indirect_form0(chip, addr); + else + die("Form 1 indirect SCOM does not have a read operation!"); +} diff --git a/src/include/cpu/power/scom.h b/src/include/cpu/power/scom.h index 802568dee5b..e6a84110636 100644 --- a/src/include/cpu/power/scom.h +++ b/src/include/cpu/power/scom.h @@ -29,8 +29,6 @@ // F - Sat Offset (6 bits) [58:63] // Higher bits specify indirect address -#define XSCOM_ADDR_IND_FLAG PPC_BIT(0) - #ifndef __ASSEMBLER__ #include #include @@ -91,33 +89,11 @@ static const chiplet_id_t mcs_to_nest[] = [MC23_CHIPLET_ID] = N1_CHIPLET_ID, }; -/* These are implementation functions that do all the work. The interface - * functions in sections below are calling these. */ - -uint64_t read_scom_direct(uint8_t chip, uint64_t reg_address); -void write_scom_direct(uint8_t chip, uint64_t reg_address, uint64_t data); - -uint64_t read_scom_indirect(uint8_t chip, uint64_t reg_address); -void write_scom_indirect(uint8_t chip, uint64_t reg_address, uint64_t data); - /* "rscom" are generic ("r" is for remote) XSCOM functions, other functions are * equivalent to rscom calls for chip #0 */ -static inline void write_rscom(uint8_t chip, uint64_t addr, uint64_t data) -{ - if (addr & XSCOM_ADDR_IND_FLAG) - write_scom_indirect(chip, addr, data); - else - write_scom_direct(chip, addr, data); -} - -static inline uint64_t read_rscom(uint8_t chip, uint64_t addr) -{ - if (addr & XSCOM_ADDR_IND_FLAG) - return read_scom_indirect(chip, addr); - else - return read_scom_direct(chip, addr); -} +void write_rscom(uint8_t chip, uint64_t addr, uint64_t data); +uint64_t read_rscom(uint8_t chip, uint64_t addr); static inline void rscom_and_or(uint8_t chip, uint64_t addr, uint64_t and, uint64_t or) { From 1b8e133d376099c1f04e452fb18cce37189e527a Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Tue, 18 Jan 2022 19:00:56 +0200 Subject: [PATCH 127/213] soc/power9/istep_10_10.c: update for second CPU * MAX_CHIPS constant * fix header guards in pci.h * CPU-specific PCIe masks * CPU-specific IOP swap * chip-specific SCOM operations * now computed PCIe is returned as an array of structures Change-Id: Ib39b9f97ed8dfb0b77bc0001cd1a951b44e58a32 Signed-off-by: Sergii Dmytruk --- src/include/cpu/power/istep_10.h | 4 +- src/soc/ibm/power9/istep_10_10.c | 194 ++++++++++++++++++------------- src/soc/ibm/power9/pci.h | 13 +++ src/soc/ibm/power9/romstage.c | 8 +- 4 files changed, 130 insertions(+), 89 deletions(-) diff --git a/src/include/cpu/power/istep_10.h b/src/include/cpu/power/istep_10.h index cc52c506012..1c7f442b625 100644 --- a/src/include/cpu/power/istep_10.h +++ b/src/include/cpu/power/istep_10.h @@ -5,8 +5,10 @@ #include +struct pci_info; + void istep_10_1(uint8_t chips); -void istep_10_10(uint8_t *phb_active_mask, uint8_t *iovalid_enable); +void istep_10_10(uint8_t chips, struct pci_info *pci_info); void istep_10_12(void); void istep_10_13(void); diff --git a/src/soc/ibm/power9/istep_10_10.c b/src/soc/ibm/power9/istep_10_10.c index 90894708abf..a2a11749bed 100644 --- a/src/soc/ibm/power9/istep_10_10.c +++ b/src/soc/ibm/power9/istep_10_10.c @@ -9,6 +9,7 @@ #include #include +#include "homer.h" #include "pci.h" #define MAX_LANE_GROUPS_PER_PEC 4 @@ -161,13 +162,30 @@ static const size_t pec_lane_cfg_sizes[] = { }; /* - * PEC_PCIE_LANE_MASK_NON_BIFURCATED in processed talos.xml for the first - * processor chip. Values correspond to lane_width enumeration. + * Rows correspond to PEC_PCIE_LANE_MASK_NON_BIFURCATED values in processed + * talos.xml for each processor chip. Values correspond to lane_width + * enumeration. */ -static uint16_t lane_masks[MAX_PEC_PER_PROC][MAX_LANE_GROUPS_PER_PEC] = { - { LANE_MASK_X16, 0x0, 0x0, 0x0 }, - { LANE_MASK_X8_GRP0, 0x0, LANE_MASK_X8_GRP1, 0x0 }, - { LANE_MASK_X8_GRP0, 0x0, LANE_MASK_X4_GRP0, LANE_MASK_X4_GRP1 }, +static uint16_t lane_masks[MAX_CHIPS][MAX_PEC_PER_PROC][MAX_LANE_GROUPS_PER_PEC] = { + { + { LANE_MASK_X16, 0x0, 0x0, 0x0 }, + { LANE_MASK_X8_GRP0, 0x0, LANE_MASK_X8_GRP1, 0x0 }, + { LANE_MASK_X8_GRP0, 0x0, LANE_MASK_X4_GRP0, LANE_MASK_X4_GRP1 }, + }, + { + { LANE_MASK_X16, 0x0, 0x0, 0x0 }, + { LANE_MASK_X8_GRP0, 0x0, LANE_MASK_X8_GRP1, 0x0 }, + { LANE_MASK_X16, 0x0, 0x0, 0x0 }, + }, +}; + +/* + * PROC_PCIE_IOP_SWAP from processed talos.xml for each PEC of each processor + * chip + */ +static uint8_t pcie_iop_swap[MAX_CHIPS][MAX_PEC_PER_PROC] = { + { 1, 0, 0 }, + { 1, 0, 4 }, }; static const uint64_t RX_VGA_CTRL3_REGISTER[NUM_PCIE_LANES] = { @@ -222,7 +240,7 @@ static enum lane_width lane_mask_to_width(uint16_t mask) return width; } -static void determine_lane_configs(uint8_t *phb_active_mask, +static void determine_lane_configs(uint8_t chip, uint8_t *phb_active_mask, const struct lane_config_row **pec_cfgs) { uint8_t pec = 0; @@ -242,8 +260,8 @@ static void determine_lane_configs(uint8_t *phb_active_mask, /* Transform effective config to match lane config table format */ for (lane_group = 0; lane_group < MAX_LANE_GROUPS_PER_PEC; ++lane_group) { - config.lane_set[lane_group] = - lane_mask_to_width(lane_masks[pec][lane_group]); + const uint16_t mask = lane_masks[chip][pec][lane_group]; + config.lane_set[lane_group] = lane_mask_to_width(mask); } for (i = 0; i < pec_lane_cfg_sizes[pec]; ++i) { @@ -293,7 +311,7 @@ static uint64_t pec_val(int pec_id, uint8_t in, return out; } -static void phase1(const struct lane_config_row **pec_cfgs, +static void phase1(uint8_t chip, const struct lane_config_row **pec_cfgs, const uint8_t *iovalid_enable) { enum { @@ -359,7 +377,6 @@ static void phase1(const struct lane_config_row **pec_cfgs, long time; uint8_t i; uint64_t val; - uint8_t proc_pcie_iop_swap; chiplet_id_t chiplet = PCI0_CHIPLET_ID + pec; @@ -381,18 +398,15 @@ static void phase1(const struct lane_config_row **pec_cfgs, PEC0_IOP_CONFIG_START_BIT, PEC0_IOP_BIT_COUNT * 2, PEC1_IOP_CONFIG_START_BIT, PEC1_IOP_BIT_COUNT * 2, PEC2_IOP_CONFIG_START_BIT, PEC2_IOP_BIT_COUNT * 2); - write_scom_for_chiplet(chiplet, PEC_CPLT_CONF1_OR, val); + write_rscom_for_chiplet(chip, chiplet, PEC_CPLT_CONF1_OR, val); /* Phase1 init step 2b */ - /* ATTR_PROC_PCIE_IOP_SWAP from processed talos.xml for first proc */ - proc_pcie_iop_swap = (pec == 0); - - val = pec_val(pec, proc_pcie_iop_swap, + val = pec_val(pec, pcie_iop_swap[chip][pec], PEC0_IOP_SWAP_START_BIT, PEC0_IOP_BIT_COUNT, PEC1_IOP_SWAP_START_BIT, PEC1_IOP_BIT_COUNT, PEC2_IOP_SWAP_START_BIT, PEC2_IOP_BIT_COUNT); - write_scom_for_chiplet(chiplet, PEC_CPLT_CONF1_OR, val); + write_rscom_for_chiplet(chip, chiplet, PEC_CPLT_CONF1_OR, val); /* Phase1 init step 3a */ @@ -408,23 +422,23 @@ static void phase1(const struct lane_config_row **pec_cfgs, val |= PPC_BIT(PEC_IOP_IOVALID_ENABLE_STACK1_BIT); } - write_scom_for_chiplet(chiplet, PEC_CPLT_CONF1_OR, val); + write_rscom_for_chiplet(chip, chiplet, PEC_CPLT_CONF1_OR, val); /* Phase1 init step 3b (enable clock) */ /* ATTR_PROC_PCIE_REFCLOCK_ENABLE, all PECs are enabled. */ - write_scom_for_chiplet(chiplet, PEC_CPLT_CTRL0_OR, - PPC_BIT(PEC_IOP_REFCLOCK_ENABLE_START_BIT)); + write_rscom_for_chiplet(chip, chiplet, PEC_CPLT_CTRL0_OR, + PPC_BIT(PEC_IOP_REFCLOCK_ENABLE_START_BIT)); /* Phase1 init step 4 (PMA reset) */ - write_scom_for_chiplet(chiplet, PEC_CPLT_CONF1_CLEAR, - PPC_BIT(PEC_IOP_PMA_RESET_START_BIT)); + write_rscom_for_chiplet(chip, chiplet, PEC_CPLT_CONF1_CLEAR, + PPC_BIT(PEC_IOP_PMA_RESET_START_BIT)); udelay(1); /* at least 400ns */ - write_scom_for_chiplet(chiplet, PEC_CPLT_CONF1_OR, - PPC_BIT(PEC_IOP_PMA_RESET_START_BIT)); + write_rscom_for_chiplet(chip, chiplet, PEC_CPLT_CONF1_OR, + PPC_BIT(PEC_IOP_PMA_RESET_START_BIT)); udelay(1); /* at least 400ns */ - write_scom_for_chiplet(chiplet, PEC_CPLT_CONF1_CLEAR, - PPC_BIT(PEC_IOP_PMA_RESET_START_BIT)); + write_rscom_for_chiplet(chip, chiplet, PEC_CPLT_CONF1_CLEAR, + PPC_BIT(PEC_IOP_PMA_RESET_START_BIT)); /* * Poll for PRTREADY status on PLLA and PLLB: @@ -433,19 +447,22 @@ static void phase1(const struct lane_config_row **pec_cfgs, * PEC_IOP_HSS_PORT_READY_START_BIT = 58 */ time = wait_us(40, - (read_scom_for_chiplet(chiplet, 0x800005010D010C3F) & PPC_BIT(58)) || - (read_scom_for_chiplet(chiplet, 0x800005410D010C3F) & PPC_BIT(58))); + (read_rscom_for_chiplet(chip, chiplet, 0x800005010D010C3F) & PPC_BIT(58)) || + (read_rscom_for_chiplet(chip, chiplet, 0x800005410D010C3F) & PPC_BIT(58))); if (!time) die("IOP HSS Port Ready status is not set!"); /* Phase1 init step 5 (Set IOP FIR action0) */ - write_scom_for_chiplet(chiplet, PEC_FIR_ACTION0_REG, PCI_IOP_FIR_ACTION0_REG); + write_rscom_for_chiplet(chip, chiplet, PEC_FIR_ACTION0_REG, + PCI_IOP_FIR_ACTION0_REG); /* Phase1 init step 6 (Set IOP FIR action1) */ - write_scom_for_chiplet(chiplet, PEC_FIR_ACTION1_REG, PCI_IOP_FIR_ACTION1_REG); + write_rscom_for_chiplet(chip, chiplet, PEC_FIR_ACTION1_REG, + PCI_IOP_FIR_ACTION1_REG); /* Phase1 init step 7 (Set IOP FIR mask) */ - write_scom_for_chiplet(chiplet, PEC_FIR_MASK_REG, PCI_IOP_FIR_MASK_REG); + write_rscom_for_chiplet(chip, chiplet, PEC_FIR_MASK_REG, + PCI_IOP_FIR_MASK_REG); /* Phase1 init step 8-11 (Config 0 - 3) */ @@ -453,30 +470,32 @@ static void phase1(const struct lane_config_row **pec_cfgs, uint8_t lane; /* RX Config Mode */ - write_scom_for_chiplet(chiplet, PEC_PCS_RX_CONFIG_MODE_REG, - pcs_config_mode[i]); + write_rscom_for_chiplet(chip, chiplet, PEC_PCS_RX_CONFIG_MODE_REG, + pcs_config_mode[i]); /* RX CDR GAIN */ - scom_and_or_for_chiplet(chiplet, PEC_PCS_RX_CDR_GAIN_REG, - ~PPC_BITMASK(56, 63), - pcs_cdr_gain[i]); + rscom_and_or_for_chiplet(chip, chiplet, PEC_PCS_RX_CDR_GAIN_REG, + ~PPC_BITMASK(56, 63), + pcs_cdr_gain[i]); for (lane = 0; lane < NUM_PCIE_LANES; ++lane) { /* RX INITGAIN */ - scom_and_or_for_chiplet(chiplet, RX_VGA_CTRL3_REGISTER[lane], - ~PPC_BITMASK(48, 52), - PPC_PLACE(pcs_init_gain, 48, 5)); + rscom_and_or_for_chiplet(chip, chiplet, + RX_VGA_CTRL3_REGISTER[lane], + ~PPC_BITMASK(48, 52), + PPC_PLACE(pcs_init_gain, 48, 5)); /* RX PKINIT */ - scom_and_or_for_chiplet(chiplet, RX_LOFF_CNTL_REGISTER[lane], - ~PPC_BITMASK(58, 63), - pcs_pk_init); + rscom_and_or_for_chiplet(chip, chiplet, + RX_LOFF_CNTL_REGISTER[lane], + ~PPC_BITMASK(58, 63), + pcs_pk_init); } /* RX SIGDET LVL */ - scom_and_or_for_chiplet(chiplet, PEC_PCS_RX_SIGDET_CONTROL_REG, - ~PPC_BITMASK(59, 63), - pcs_sigdet_lvl); + rscom_and_or_for_chiplet(chip, chiplet, PEC_PCS_RX_SIGDET_CONTROL_REG, + ~PPC_BITMASK(59, 63), + pcs_sigdet_lvl); } /* @@ -488,41 +507,43 @@ static void phase1(const struct lane_config_row **pec_cfgs, * - ATTR_PROC_PCIE_PCS_RX_ROT_EXTEL (59) * - ATTR_PROC_PCIE_PCS_RX_ROT_RST_FW (62) */ - scom_and_for_chiplet(chiplet, PEC_PCS_RX_ROT_CNTL_REG, - ~(PPC_BIT(55) | PPC_BIT(63) | PPC_BIT(59) | PPC_BIT(62))); + rscom_and_for_chiplet(chip, chiplet, PEC_PCS_RX_ROT_CNTL_REG, + ~(PPC_BIT(55) | PPC_BIT(63) | PPC_BIT(59) | PPC_BIT(62))); /* Phase1 init step 13 (RX Config Mode Enable External Config Control) */ - write_scom_for_chiplet(chiplet, PEC_PCS_RX_CONFIG_MODE_REG, 0x8600); + write_rscom_for_chiplet(chip, chiplet, PEC_PCS_RX_CONFIG_MODE_REG, 0x8600); /* Phase1 init step 14 (PCLCK Control Register - PLLA) */ /* ATTR_PROC_PCIE_PCS_PCLCK_CNTL_PLLA = 0xF8 */ - scom_and_or_for_chiplet(chiplet, PEC_PCS_PCLCK_CNTL_PLLA_REG, - ~PPC_BITMASK(56, 63), - 0xF8); + rscom_and_or_for_chiplet(chip, chiplet, PEC_PCS_PCLCK_CNTL_PLLA_REG, + ~PPC_BITMASK(56, 63), + 0xF8); /* Phase1 init step 15 (PCLCK Control Register - PLLB) */ /* ATTR_PROC_PCIE_PCS_PCLCK_CNTL_PLLB = 0xF8 */ - scom_and_or_for_chiplet(chiplet, PEC_PCS_PCLCK_CNTL_PLLB_REG, - ~PPC_BITMASK(56, 63), - 0xF8); + rscom_and_or_for_chiplet(chip, chiplet, PEC_PCS_PCLCK_CNTL_PLLB_REG, + ~PPC_BITMASK(56, 63), + 0xF8); /* Phase1 init step 16 (TX DCLCK Rotator Override) */ /* ATTR_PROC_PCIE_PCS_TX_DCLCK_ROT = 0x0022 */ - write_scom_for_chiplet(chiplet, PEC_PCS_TX_DCLCK_ROTATOR_REG, 0x0022); + write_rscom_for_chiplet(chip, chiplet, PEC_PCS_TX_DCLCK_ROTATOR_REG, 0x0022); /* Phase1 init step 17 (TX PCIe Receiver Detect Control Register 1) */ /* ATTR_PROC_PCIE_PCS_TX_PCIE_RECV_DETECT_CNTL_REG1 = 0xAA7A */ - write_scom_for_chiplet(chiplet, PEC_PCS_TX_PCIE_REC_DETECT_CNTL1_REG, 0xaa7a); + write_rscom_for_chiplet(chip, chiplet, PEC_PCS_TX_PCIE_REC_DETECT_CNTL1_REG, + 0xAA7A); /* Phase1 init step 18 (TX PCIe Receiver Detect Control Register 2) */ /* ATTR_PROC_PCIE_PCS_TX_PCIE_RECV_DETECT_CNTL_REG2 = 0x2000 */ - write_scom_for_chiplet(chiplet, PEC_PCS_TX_PCIE_REC_DETECT_CNTL2_REG, 0x2000); + write_rscom_for_chiplet(chip, chiplet, PEC_PCS_TX_PCIE_REC_DETECT_CNTL2_REG, + 0x2000); /* Phase1 init step 19 (TX Power Sequence Enable) */ /* ATTR_PROC_PCIE_PCS_TX_POWER_SEQ_ENABLE = 0xFF, but field is 7 bits */ - scom_and_or_for_chiplet(chiplet, PEC_PCS_TX_POWER_SEQ_ENABLE_REG, - ~PPC_BITMASK(56, 62), - PPC_PLACE(0x7F, 56, 7)); + rscom_and_or_for_chiplet(chip, chiplet, PEC_PCS_TX_POWER_SEQ_ENABLE_REG, + ~PPC_BITMASK(56, 62), + PPC_PLACE(0x7F, 56, 7)); /* Phase1 init step 20 (RX VGA Control Register 1) */ @@ -531,21 +552,21 @@ static void phase1(const struct lane_config_row **pec_cfgs, /* ATTR_CHIP_EC_FEATURE_HW414759 = 0, so not setting PEC_SCOM0X0B_EDMOD */ - write_scom_for_chiplet(chiplet, PEC_PCS_RX_VGA_CONTROL1_REG, val); + write_rscom_for_chiplet(chip, chiplet, PEC_PCS_RX_VGA_CONTROL1_REG, val); /* Phase1 init step 21 (RX VGA Control Register 2) */ /* ATTR_PROC_PCIE_PCS_RX_VGA_CNTL_REG2 = 0 */ - write_scom_for_chiplet(chiplet, PEC_PCS_RX_VGA_CONTROL2_REG, 0); + write_rscom_for_chiplet(chip, chiplet, PEC_PCS_RX_VGA_CONTROL2_REG, 0); /* Phase1 init step 22 (RX DFE Func Control Register 1) */ /* ATTR_PROC_PCIE_PCS_RX_DFE_FDDC = 1 */ - scom_or_for_chiplet(chiplet, PEC_IOP_RX_DFE_FUNC_REGISTER1, PPC_BIT(50)); + rscom_or_for_chiplet(chip, chiplet, PEC_IOP_RX_DFE_FUNC_REGISTER1, PPC_BIT(50)); /* Phase1 init step 23 (PCS System Control) */ /* ATTR_PROC_PCIE_PCS_SYSTEM_CNTL computed above */ - scom_and_or_for_chiplet(chiplet, PEC_PCS_SYS_CONTROL_REG, - ~PPC_BITMASK(55, 63), - pec_cfgs[pec]->phb_to_pcie_mac); + rscom_and_or_for_chiplet(chip, chiplet, PEC_PCS_SYS_CONTROL_REG, + ~PPC_BITMASK(55, 63), + pec_cfgs[pec]->phb_to_pcie_mac); /* * All values in ATTR_PROC_PCIE_PCS_M_CNTL are 0. @@ -554,20 +575,24 @@ static void phase1(const struct lane_config_row **pec_cfgs, */ /* Phase1 init step 24 (PCS M1 Control) */ - scom_and_for_chiplet(chiplet, PEC_PCS_M1_CONTROL_REG, ~PPC_BITMASK(55, 63)); + rscom_and_for_chiplet(chip, chiplet, PEC_PCS_M1_CONTROL_REG, + ~PPC_BITMASK(55, 63)); /* Phase1 init step 25 (PCS M2 Control) */ - scom_and_for_chiplet(chiplet, PEC_PCS_M2_CONTROL_REG, ~PPC_BITMASK(55, 63)); + rscom_and_for_chiplet(chip, chiplet, PEC_PCS_M2_CONTROL_REG, + ~PPC_BITMASK(55, 63)); /* Phase1 init step 26 (PCS M3 Control) */ - scom_and_for_chiplet(chiplet, PEC_PCS_M3_CONTROL_REG, ~PPC_BITMASK(55, 63)); + rscom_and_for_chiplet(chip, chiplet, PEC_PCS_M3_CONTROL_REG, + ~PPC_BITMASK(55, 63)); /* Phase1 init step 27 (PCS M4 Control) */ - scom_and_for_chiplet(chiplet, PEC_PCS_M4_CONTROL_REG, ~PPC_BITMASK(55, 63)); + rscom_and_for_chiplet(chip, chiplet, PEC_PCS_M4_CONTROL_REG, + ~PPC_BITMASK(55, 63)); /* Delay a minimum of 200ns to allow prior SCOM programming to take effect */ udelay(1); /* Phase1 init step 28 */ - write_scom_for_chiplet(chiplet, PEC_CPLT_CONF1_CLEAR, - PPC_BIT(PEC_IOP_PIPE_RESET_START_BIT)); + write_rscom_for_chiplet(chip, chiplet, PEC_CPLT_CONF1_CLEAR, + PPC_BIT(PEC_IOP_PIPE_RESET_START_BIT)); /* * Delay a minimum of 300ns for reset to complete. @@ -576,24 +601,25 @@ static void phase1(const struct lane_config_row **pec_cfgs, } } -void istep_10_10(uint8_t *phb_active_mask, uint8_t *iovalid_enable) +void istep_10_10(uint8_t chips, struct pci_info *pci_info) { - const struct lane_config_row *pec_cfgs[MAX_PEC_PER_PROC] = { NULL }; - printk(BIOS_EMERG, "starting istep 10.10\n"); report_istep(10, 10); - determine_lane_configs(phb_active_mask, pec_cfgs); + for (uint8_t chip = 0; chip < MAX_CHIPS; chip++) { + const struct lane_config_row *pec_cfgs[MAX_PEC_PER_PROC] = { NULL }; - /* - * Mask of functional PHBs for each PEC, ATTR_PROC_PCIE_IOVALID_ENABLE in Hostboot. - * LSB is the PHB with the highest number for the given PEC. - */ - iovalid_enable[0] = pec_cfgs[0]->phb_active >> PEC0_PHB_SHIFT; - iovalid_enable[1] = pec_cfgs[1]->phb_active >> PEC1_PHB_SHIFT; - iovalid_enable[2] = pec_cfgs[2]->phb_active >> PEC2_PHB_SHIFT; + if (!(chips & (1 << chip))) + continue; - phase1(pec_cfgs, iovalid_enable); + determine_lane_configs(chip, &pci_info[chip].phb_active_mask, pec_cfgs); + + pci_info[chip].iovalid_enable[0] = pec_cfgs[0]->phb_active >> PEC0_PHB_SHIFT; + pci_info[chip].iovalid_enable[1] = pec_cfgs[1]->phb_active >> PEC1_PHB_SHIFT; + pci_info[chip].iovalid_enable[2] = pec_cfgs[2]->phb_active >> PEC2_PHB_SHIFT; + + phase1(chip, pec_cfgs, pci_info[chip].iovalid_enable); + } printk(BIOS_EMERG, "ending istep 10.10\n"); } diff --git a/src/soc/ibm/power9/pci.h b/src/soc/ibm/power9/pci.h index 345fbe1505d..6f90ca40130 100644 --- a/src/soc/ibm/power9/pci.h +++ b/src/soc/ibm/power9/pci.h @@ -17,4 +17,17 @@ enum phb_active_mask { PHB5_MASK = 0x04, // PHB5 enabled }; +struct pci_info { + /* Combination of values from phb_active_mask enumeration */ + uint8_t phb_active_mask; + + /* + * Mask of functional PHBs for each PEC, corresponds to + * ATTR_PROC_PCIE_IOVALID_ENABLE in Hostboot. + * + * LSB is the PHB with the highest number for the given PEC. + */ + uint8_t iovalid_enable[MAX_PEC_PER_PROC]; +}; + #endif /* __SOC_IBM_POWER9_PCI_H */ diff --git a/src/soc/ibm/power9/romstage.c b/src/soc/ibm/power9/romstage.c index ca2cc48927a..cfcd97fb636 100644 --- a/src/soc/ibm/power9/romstage.c +++ b/src/soc/ibm/power9/romstage.c @@ -16,6 +16,7 @@ #include #include "fsi.h" +#include "homer.h" #include "pci.h" /* DIMM SPD addresses */ @@ -340,8 +341,7 @@ void main(void) { uint8_t chips; - uint8_t phb_active_mask = 0; - uint8_t iovalid_enable[MAX_PEC_PER_PROC] = { 0 }; + struct pci_info pci_info[MAX_CHIPS] = { 0 }; init_timer(); @@ -378,7 +378,7 @@ void main(void) istep_9_7(chips); istep_10_1(chips); - istep_10_10(&phb_active_mask, iovalid_enable); + istep_10_10(chips, pci_info); istep_10_12(); istep_10_13(); @@ -403,7 +403,7 @@ void main(void) istep_14_1(); istep_14_2(); - istep_14_3(phb_active_mask, iovalid_enable); + istep_14_3(pci_info[0].phb_active_mask, pci_info[0].iovalid_enable); report_istep(14, 4); // no-op istep_14_5(); From ef89417080a00c66cee3776f9a21cc18837e32b4 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Tue, 18 Jan 2022 19:05:47 +0200 Subject: [PATCH 128/213] soc/power9/istep_10_12.c: update for second CPU Use chip-specific SCOM accesses. Change-Id: Id6c782d6ad1a956855d7e0a8b5faff248fbe3b26 Signed-off-by: Sergii Dmytruk --- src/include/cpu/power/istep_10.h | 2 +- src/soc/ibm/power9/istep_10_12.c | 18 +++++++++++------- src/soc/ibm/power9/romstage.c | 2 +- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/src/include/cpu/power/istep_10.h b/src/include/cpu/power/istep_10.h index 1c7f442b625..c51ae593107 100644 --- a/src/include/cpu/power/istep_10.h +++ b/src/include/cpu/power/istep_10.h @@ -9,7 +9,7 @@ struct pci_info; void istep_10_1(uint8_t chips); void istep_10_10(uint8_t chips, struct pci_info *pci_info); -void istep_10_12(void); +void istep_10_12(uint8_t chips); void istep_10_13(void); #endif /* CPU_PPC64_ISTEP10_H */ diff --git a/src/soc/ibm/power9/istep_10_12.c b/src/soc/ibm/power9/istep_10_12.c index 563996bf85f..295554b5329 100644 --- a/src/soc/ibm/power9/istep_10_12.c +++ b/src/soc/ibm/power9/istep_10_12.c @@ -5,10 +5,11 @@ #include #include +#include "homer.h" #include "pci.h" -/* PCIe only at the moment, should also do other buses */ -static void enable_ridi(void) +/* PCIe only at the moment, Hostboot also updates MC and OBus chiplets too */ +static void enable_ridi(uint8_t chip) { enum { PERV_NET_CTRL0 = 0x000F0040, @@ -21,23 +22,26 @@ static void enable_ridi(void) chiplet_id_t chiplet = PCI0_CHIPLET_ID + pec; /* Getting NET_CTRL0 register value and checking its CHIPLET_ENABLE bit */ - if (read_scom_for_chiplet(chiplet, PERV_NET_CTRL0) & PPC_BIT(0)) { - /* Enable Recievers, Drivers DI1 & DI2 */ + if (read_rscom_for_chiplet(chip, chiplet, PERV_NET_CTRL0) & PPC_BIT(0)) { + /* Enable Receivers, Drivers DI1 & DI2 */ uint64_t val = 0; val |= PPC_BIT(19); // NET_CTRL0.RI_N = 1 val |= PPC_BIT(20); // NET_CTRL0.DI1_N = 1 val |= PPC_BIT(21); // NET_CTRL0.DI2_N = 1 - write_scom_for_chiplet(chiplet, PERV_NET_CTRL0_WOR, val); + write_rscom_for_chiplet(chip, chiplet, PERV_NET_CTRL0_WOR, val); } } } -void istep_10_12(void) +void istep_10_12(uint8_t chips) { printk(BIOS_EMERG, "starting istep 10.12\n"); report_istep(10, 12); - enable_ridi(); + for (uint8_t chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) + enable_ridi(chip); + } printk(BIOS_EMERG, "ending istep 10.12\n"); } diff --git a/src/soc/ibm/power9/romstage.c b/src/soc/ibm/power9/romstage.c index cfcd97fb636..a23e2bb613e 100644 --- a/src/soc/ibm/power9/romstage.c +++ b/src/soc/ibm/power9/romstage.c @@ -379,7 +379,7 @@ void main(void) istep_10_1(chips); istep_10_10(chips, pci_info); - istep_10_12(); + istep_10_12(chips); istep_10_13(); timestamp_add_now(TS_INITRAM_START); From 801e08c9a27452cd19624ffaba175d95b392de5f Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Tue, 18 Jan 2022 19:07:29 +0200 Subject: [PATCH 129/213] soc/power9/istep_14_3.c: update for second CPU Chip-specific SCOM accesses and values computed in istep 10.10. Change-Id: Ic5b4932cfe3fe960ea5464b0df71603a528ca4ed Signed-off-by: Sergii Dmytruk --- src/include/cpu/power/istep_14.h | 4 +- src/soc/ibm/power9/istep_14_3.c | 121 +++++++++++++++++-------------- src/soc/ibm/power9/romstage.c | 2 +- 3 files changed, 71 insertions(+), 56 deletions(-) diff --git a/src/include/cpu/power/istep_14.h b/src/include/cpu/power/istep_14.h index d79f38dd804..e7e9804c075 100644 --- a/src/include/cpu/power/istep_14.h +++ b/src/include/cpu/power/istep_14.h @@ -5,9 +5,11 @@ #include +struct pci_info; + void istep_14_1(void); void istep_14_2(void); -void istep_14_3(uint8_t phb_active_mask, const uint8_t *iovalid_enable); +void istep_14_3(uint8_t chips, const struct pci_info *pci_info); void istep_14_5(void); #endif /* CPU_PPC64_ISTEP_14_H */ diff --git a/src/soc/ibm/power9/istep_14_3.c b/src/soc/ibm/power9/istep_14_3.c index 26f7dc2abe1..ba50844cea2 100644 --- a/src/soc/ibm/power9/istep_14_3.c +++ b/src/soc/ibm/power9/istep_14_3.c @@ -5,6 +5,7 @@ #include #include +#include "homer.h" #include "pci.h" #include "scratch.h" @@ -13,7 +14,7 @@ static uint64_t pec_addr(uint8_t pec, uint64_t addr) return addr + pec * 0x400; } -static void init_pecs(const uint8_t *iovalid_enable) +static void init_pecs(uint8_t chip, const uint8_t *iovalid_enable) { enum { P9N2_PEC_ADDREXTMASK_REG = 0x4010C05, @@ -42,7 +43,7 @@ static void init_pecs(const uint8_t *iovalid_enable) bool node_pump_mode = false; uint8_t dd = get_dd(); - scratch_reg6 = read_scom(MBOX_SCRATCH_REG1 + 5); + scratch_reg6 = read_rscom(chip, MBOX_SCRATCH_REG1 + 5); /* ATTR_PROC_FABRIC_PUMP_MODE, it's either node or group pump mode */ node_pump_mode = !(scratch_reg6 & PPC_BIT(MBOX_SCRATCH_REG6_GROUP_PUMP_MODE)); @@ -56,9 +57,10 @@ static void init_pecs(const uint8_t *iovalid_enable) * ATTR_FABRIC_ADDR_EXTENSION_GROUP_ID = 0 * ATTR_FABRIC_ADDR_EXTENSION_CHIP_ID = 0 */ - scom_and_or_for_chiplet(N2_CHIPLET_ID, pec_addr(pec, P9N2_PEC_ADDREXTMASK_REG), - ~PPC_BITMASK(0, 6), - PPC_PLACE(0, 0, 7)); + rscom_and_or_for_chiplet(chip, N2_CHIPLET_ID, + pec_addr(pec, P9N2_PEC_ADDREXTMASK_REG), + ~PPC_BITMASK(0, 6), + PPC_PLACE(0, 0, 7)); /* * Phase2 init step 1 @@ -76,7 +78,8 @@ static void init_pecs(const uint8_t *iovalid_enable) * scope */ - val = read_scom_for_chiplet(N2_CHIPLET_ID, pec_addr(pec, PEC_PBCQHWCFG_REG)); + val = read_rscom_for_chiplet(chip, N2_CHIPLET_ID, + pec_addr(pec, PEC_PBCQHWCFG_REG)); /* Set hang poll scale */ val &= ~PPC_BITMASK(0, 3); val |= PPC_PLACE(1, 0, 4); @@ -151,7 +154,8 @@ static void init_pecs(const uint8_t *iovalid_enable) if (pec == 1 || (pec == 2 && iovalid_enable[pec] != 0x4)) val |= PPC_BIT(PEC_PBCQHWCFG_REG_PE_DISABLE_TCE_ARBITRATION); - write_scom_for_chiplet(N2_CHIPLET_ID, pec_addr(pec, PEC_PBCQHWCFG_REG), val); + write_rscom_for_chiplet(chip, N2_CHIPLET_ID, pec_addr(pec, PEC_PBCQHWCFG_REG), + val); /* * Phase2 init step 2 @@ -165,9 +169,9 @@ static void init_pecs(const uint8_t *iovalid_enable) * Set bits 00:03 = 0b1001 Enable trace, and select * inbound operations with addr information */ - scom_and_or_for_chiplet(N2_CHIPLET_ID, pec_addr(pec, PEC_NESTTRC_REG), - ~PPC_BITMASK(0, 3), - PPC_PLACE(9, 0, 4)); + rscom_and_or_for_chiplet(chip, N2_CHIPLET_ID, pec_addr(pec, PEC_NESTTRC_REG), + ~PPC_BITMASK(0, 3), + PPC_PLACE(9, 0, 4)); /* * Phase2 init step 4 @@ -190,12 +194,12 @@ static void init_pecs(const uint8_t *iovalid_enable) val |= PPC_BIT(PEC_PBAIBHWCFG_REG_PE_PCIE_CLK_TRACE_EN); val |= PPC_PLACE(7, PEC_AIB_HWCFG_OSBM_HOL_BLK_CNT, PEC_AIB_HWCFG_OSBM_HOL_BLK_CNT_LEN); - write_scom_for_chiplet(PCI0_CHIPLET_ID + pec, PEC_PBAIBHWCFG_REG, val); + write_rscom_for_chiplet(chip, PCI0_CHIPLET_ID + pec, PEC_PBAIBHWCFG_REG, val); } } /* See src/import/chips/p9/common/scominfo/p9_scominfo.C in Hostboot */ -static void phb_write(uint8_t phb, uint64_t addr, uint64_t data) +static void phb_write(uint8_t chip, uint8_t phb, uint64_t addr, uint64_t data) { chiplet_id_t chiplet; uint8_t sat_id = (addr >> 6) & 0xF; @@ -213,11 +217,11 @@ static void phb_write(uint8_t phb, uint64_t addr, uint64_t data) addr &= ~PPC_BITMASK(54, 57); addr |= PPC_PLACE(sat_id, 54, 4); - write_scom_for_chiplet(chiplet, addr, data); + write_rscom_for_chiplet(chip, chiplet, addr, data); } /* See src/import/chips/p9/common/scominfo/p9_scominfo.C in Hostboot */ -static void phb_nest_write(uint8_t phb, uint64_t addr, uint64_t data) +static void phb_nest_write(uint8_t chip, uint8_t phb, uint64_t addr, uint64_t data) { enum { N2_PCIS0_0_RING_ID = 0x3 }; @@ -240,10 +244,10 @@ static void phb_nest_write(uint8_t phb, uint64_t addr, uint64_t data) addr &= ~PPC_BITMASK(54, 57); addr |= PPC_PLACE(sat_id, 54, 4); - write_scom_for_chiplet(N2_CHIPLET_ID, addr, data); + write_rscom_for_chiplet(chip, N2_CHIPLET_ID, addr, data); } -static void init_phbs(uint8_t phb_active_mask, const uint8_t *iovalid_enable) +static void init_phbs(uint8_t chip, uint8_t phb_active_mask, const uint8_t *iovalid_enable) { enum { PHB_CERR_RPT0_REG = 0x4010C4A, @@ -290,26 +294,30 @@ static void init_phbs(uint8_t phb_active_mask, const uint8_t *iovalid_enable) }; /* ATTR_PROC_PCIE_MMIO_BAR0_BASE_ADDR_OFFSET */ - uint64_t mmio_bar0_offsets[MAX_PHB_PER_PROC] = { 0 }; + const uint64_t mmio_bar0_offsets[MAX_PHB_PER_PROC] = { 0 }; /* ATTR_PROC_PCIE_MMIO_BAR1_BASE_ADDR_OFFSET */ - uint64_t mmio_bar1_offsets[MAX_PHB_PER_PROC] = { 0 }; + const uint64_t mmio_bar1_offsets[MAX_PHB_PER_PROC] = { 0 }; /* ATTR_PROC_PCIE_REGISTER_BAR_BASE_ADDR_OFFSET */ - uint64_t register_bar_offsets[MAX_PHB_PER_PROC] = { 0 }; + const uint64_t register_bar_offsets[MAX_PHB_PER_PROC] = { 0 }; /* ATTR_PROC_PCIE_BAR_SIZE */ - uint64_t bar_sizes[3] = { 0 }; + const uint64_t bar_sizes[3] = { 0 }; - /* Determine base address of chip MMIO range */ + /* + * Determine base address of chip MMIO range. + * This is for ATTR_PROC_FABRIC_PUMP_MODE == PUMP_MODE_CHIP_IS_GROUP, + * when chip ID is actually a group ID and "chip ID" field is zero. + */ uint64_t base_addr_mmio = 0; - base_addr_mmio |= PPC_PLACE(0, 8, 5); // ATTR_PROC_FABRIC_SYSTEM_ID - base_addr_mmio |= PPC_PLACE(0, 15, 4); // ATTR_PROC_EFF_FABRIC_GROUP_ID - base_addr_mmio |= PPC_PLACE(0, 19, 3); // ATTR_PROC_EFF_FABRIC_CHIP_ID - base_addr_mmio |= PPC_PLACE(3, 13, 2); // FABRIC_ADDR_MSEL - // nm = 0b00/01, m = 0b10, mmio = 0b11 + base_addr_mmio |= PPC_PLACE(0, 8, 5); // ATTR_PROC_FABRIC_SYSTEM_ID + base_addr_mmio |= PPC_PLACE(chip, 15, 4); // ATTR_PROC_EFF_FABRIC_GROUP_ID + base_addr_mmio |= PPC_PLACE(0, 19, 3); // ATTR_PROC_EFF_FABRIC_CHIP_ID + base_addr_mmio |= PPC_PLACE(3, 13, 2); // FABRIC_ADDR_MSEL + // nm = 0b00/01, m = 0b10, mmio = 0b11 uint8_t phb = 0; for (phb = 0; phb < MAX_PHB_PER_PROC; ++phb) { /* BAR enable attribute (ATTR_PROC_PCIE_BAR_ENABLE) */ - uint8_t bar_enables[3] = { 0 }; + const uint8_t bar_enables[3] = { 0 }; uint64_t val = 0; uint64_t mmio0_bar = base_addr_mmio; @@ -327,7 +335,7 @@ static void init_phbs(uint8_t phb_active_mask, const uint8_t *iovalid_enable) * 0xFFFFFFFF_FFFFFFFF * Clear any spurious cerr_rpt0 bits (cerr_rpt0) */ - phb_nest_write(phb, PHB_CERR_RPT0_REG, PPC_BITMASK(0, 63)); + phb_nest_write(chip, phb, PHB_CERR_RPT0_REG, PPC_BITMASK(0, 63)); /* * Phase2 init step 12_b (yes, out of order) @@ -335,7 +343,7 @@ static void init_phbs(uint8_t phb_active_mask, const uint8_t *iovalid_enable) * 0xFFFFFFFF_FFFFFFFF * Clear any spurious cerr_rpt1 bits (cerr_rpt1) */ - phb_nest_write(phb, PHB_CERR_RPT1_REG, PPC_BITMASK(0, 63)); + phb_nest_write(chip, phb, PHB_CERR_RPT1_REG, PPC_BITMASK(0, 63)); /* * Phase2 init step 7_c @@ -344,7 +352,7 @@ static void init_phbs(uint8_t phb_active_mask, const uint8_t *iovalid_enable) * Clear any spurious FIR * bits (NFIR)NFIR */ - phb_nest_write(phb, PHB_NFIR_REG, 0); + phb_nest_write(chip, phb, PHB_NFIR_REG, 0); /* * Phase2 init step 8 @@ -352,28 +360,28 @@ static void init_phbs(uint8_t phb_active_mask, const uint8_t *iovalid_enable) * 0x00000000_00000000 * Clear any spurious WOF bits (NFIRWOF) */ - phb_nest_write(phb, PHB_NFIRWOF_REG, 0); + phb_nest_write(chip, phb, PHB_NFIRWOF_REG, 0); /* * Phase2 init step 9 * NestBase + StackBase + 0x6 * Set the per FIR Bit Action 0 register */ - phb_nest_write(phb, PHB_NFIRACTION0_REG, PCI_NFIR_ACTION0_REG); + phb_nest_write(chip, phb, PHB_NFIRACTION0_REG, PCI_NFIR_ACTION0_REG); /* * Phase2 init step 10 * NestBase + StackBase + 0x7 * Set the per FIR Bit Action 1 register */ - phb_nest_write(phb, PHB_NFIRACTION1_REG, PCI_NFIR_ACTION1_REG); + phb_nest_write(chip, phb, PHB_NFIRACTION1_REG, PCI_NFIR_ACTION1_REG); /* * Phase2 init step 11 * NestBase + StackBase + 0x3 * Set FIR Mask Bits to allow errors (NFIRMask) */ - phb_nest_write(phb, PHB_NFIRMASK_REG, PCI_NFIR_MASK_REG); + phb_nest_write(chip, phb, PHB_NFIRMASK_REG, PCI_NFIR_MASK_REG); /* * Phase2 init step 12 @@ -381,7 +389,7 @@ static void init_phbs(uint8_t phb_active_mask, const uint8_t *iovalid_enable) * 0x00000000_00000000 * Set Data Freeze Type Register for SUE handling (DFREEZE) */ - phb_nest_write(phb, PHB_PE_DFREEZE_REG, 0); + phb_nest_write(chip, phb, PHB_PE_DFREEZE_REG, 0); /* * Phase2 init step 13_a @@ -389,7 +397,7 @@ static void init_phbs(uint8_t phb_active_mask, const uint8_t *iovalid_enable) * 0x00000000_00000000 * Clear any spurious pbaib_cerr_rpt bits */ - phb_write(phb, PHB_PBAIB_CERR_RPT_REG, 0); + phb_write(chip, phb, PHB_PBAIB_CERR_RPT_REG, 0); /* * Phase2 init step 13_b @@ -398,7 +406,7 @@ static void init_phbs(uint8_t phb_active_mask, const uint8_t *iovalid_enable) * Clear any spurious FIR * bits (PFIR)PFIR */ - phb_write(phb, PHB_PFIR_REG, 0); + phb_write(chip, phb, PHB_PFIR_REG, 0); /* * Phase2 init step 14 @@ -406,28 +414,28 @@ static void init_phbs(uint8_t phb_active_mask, const uint8_t *iovalid_enable) * 0x00000000_00000000 * Clear any spurious WOF bits (PFIRWOF) */ - phb_write(phb, PHB_PFIRWOF_REG, 0); + phb_write(chip, phb, PHB_PFIRWOF_REG, 0); /* * Phase2 init step 15 * PCIBase + StackBase + 0x6 * Set the per FIR Bit Action 0 register */ - phb_write(phb, PHB_PFIRACTION0_REG, PCI_PFIR_ACTION0_REG); + phb_write(chip, phb, PHB_PFIRACTION0_REG, PCI_PFIR_ACTION0_REG); /* * Phase2 init step 16 * PCIBase + StackBase + 0x7 * Set the per FIR Bit Action 1 register */ - phb_write(phb, PHB_PFIRACTION1_REG, PCI_PFIR_ACTION1_REG); + phb_write(chip, phb, PHB_PFIRACTION1_REG, PCI_PFIR_ACTION1_REG); /* * Phase2 init step 17 * PCIBase + StackBase + 0x3 * Set FIR Mask Bits to allow errors (PFIRMask) */ - phb_write(phb, PHB_PFIRMASK_REG, PCI_PFIR_MASK_REG); + phb_write(chip, phb, PHB_PFIRMASK_REG, PCI_PFIR_MASK_REG); /* * Phase2 init step 18 @@ -436,14 +444,14 @@ static void init_phbs(uint8_t phb_active_mask, const uint8_t *iovalid_enable) */ mmio0_bar += mmio_bar0_offsets[phb]; mmio0_bar <<= P9_PCIE_CONFIG_BAR_SHIFT; - phb_nest_write(phb, PHB_MMIOBAR0_REG, mmio0_bar); + phb_nest_write(chip, phb, PHB_MMIOBAR0_REG, mmio0_bar); /* * Phase2 init step 19 * NestBase + StackBase + 0xF * Set MMIO BASE Address Register Mask 0 (MMIOBAR0_MASK) */ - phb_nest_write(phb, PHB_MMIOBAR0_MASK_REG, bar_sizes[0]); + phb_nest_write(chip, phb, PHB_MMIOBAR0_MASK_REG, bar_sizes[0]); /* * Phase2 init step 20 @@ -453,14 +461,14 @@ static void init_phbs(uint8_t phb_active_mask, const uint8_t *iovalid_enable) */ mmio1_bar += mmio_bar1_offsets[phb]; mmio1_bar <<= P9_PCIE_CONFIG_BAR_SHIFT; - phb_nest_write(phb, PHB_MMIOBAR1_REG, mmio1_bar); + phb_nest_write(chip, phb, PHB_MMIOBAR1_REG, mmio1_bar); /* * Phase2 init step 21 * NestBase + StackBase + 0x11 * Set MMIO Base Address Register Mask 1 (MMIOBAR1_MASK) */ - phb_nest_write(phb, PHB_MMIOBAR1_MASK_REG, bar_sizes[1]); + phb_nest_write(chip, phb, PHB_MMIOBAR1_MASK_REG, bar_sizes[1]); /* * Phase2 init step 22 @@ -469,7 +477,7 @@ static void init_phbs(uint8_t phb_active_mask, const uint8_t *iovalid_enable) */ register_bar += register_bar_offsets[phb]; register_bar <<= P9_PCIE_CONFIG_BAR_SHIFT; - phb_nest_write(phb, PHB_PHBBAR_REG, register_bar); + phb_nest_write(chip, phb, PHB_PHBBAR_REG, register_bar); /* * Phase2 init step 23 @@ -486,7 +494,7 @@ static void init_phbs(uint8_t phb_active_mask, const uint8_t *iovalid_enable) if (bar_enables[2]) val |= PPC_BIT(2); // PHB_BARE_REG_PE_PHB_BAR_EN, bit 2 for PHB - phb_nest_write(phb, PHB_BARE_REG, val); + phb_nest_write(chip, phb, PHB_BARE_REG, val); /* * Phase2 init step 24 @@ -494,21 +502,26 @@ static void init_phbs(uint8_t phb_active_mask, const uint8_t *iovalid_enable) * 0x00000000_00000000 * Remove ETU/AIB bus from reset (PHBReset) */ - phb_write(phb, PHB_PHBRESET_REG, 0); + phb_write(chip, phb, PHB_PHBRESET_REG, 0); /* Configure ETU FIR (all masked) */ - phb_write(phb, PHB_ACT0_REG, 0); - phb_write(phb, PHB_ACTION1_REG, 0); - phb_write(phb, PHB_MASK_REG, PPC_BITMASK(0, 63)); + phb_write(chip, phb, PHB_ACT0_REG, 0); + phb_write(chip, phb, PHB_ACTION1_REG, 0); + phb_write(chip, phb, PHB_MASK_REG, PPC_BITMASK(0, 63)); } } -void istep_14_3(uint8_t phb_active_mask, const uint8_t *iovalid_enable) +void istep_14_3(uint8_t chips, const struct pci_info *pci_info) { printk(BIOS_EMERG, "starting istep 14.3\n"); report_istep(14, 3); - init_pecs(iovalid_enable); - init_phbs(phb_active_mask, iovalid_enable); + for (uint8_t chip = 0; chip < MAX_CHIPS; chip++) { + if (!(chips & (1 << chip))) + continue; + + init_pecs(chip, pci_info[chip].iovalid_enable); + init_phbs(chip, pci_info[chip].phb_active_mask, pci_info[chip].iovalid_enable); + } printk(BIOS_EMERG, "ending istep 14.3\n"); } diff --git a/src/soc/ibm/power9/romstage.c b/src/soc/ibm/power9/romstage.c index a23e2bb613e..214c1c67944 100644 --- a/src/soc/ibm/power9/romstage.c +++ b/src/soc/ibm/power9/romstage.c @@ -403,7 +403,7 @@ void main(void) istep_14_1(); istep_14_2(); - istep_14_3(pci_info[0].phb_active_mask, pci_info[0].iovalid_enable); + istep_14_3(chips, pci_info); report_istep(14, 4); // no-op istep_14_5(); From 80392850dfd1f10ce29c0707a75741b758177534 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Wed, 9 Feb 2022 00:40:07 +0200 Subject: [PATCH 130/213] soc/power9/: extract chip unit limits to proc.h Change-Id: I7b3c315977491dc4990ce0557cda382ec870480d Signed-off-by: Sergii Dmytruk --- src/include/cpu/power/istep_13.h | 7 +------ src/include/cpu/power/istep_18.h | 3 --- src/include/cpu/power/proc.h | 29 +++++++++++++++++++++++++++++ src/soc/ibm/power9/chip.c | 2 +- src/soc/ibm/power9/homer.c | 1 + src/soc/ibm/power9/homer.h | 13 ++----------- src/soc/ibm/power9/istep_10_1.c | 2 +- src/soc/ibm/power9/istep_10_10.c | 2 +- src/soc/ibm/power9/istep_10_12.c | 2 +- src/soc/ibm/power9/istep_14_3.c | 2 +- src/soc/ibm/power9/istep_18_11.c | 1 + src/soc/ibm/power9/istep_8_1.c | 2 +- src/soc/ibm/power9/istep_8_11.c | 2 +- src/soc/ibm/power9/istep_8_2.c | 2 +- src/soc/ibm/power9/istep_8_3.c | 2 +- src/soc/ibm/power9/istep_8_4.c | 2 +- src/soc/ibm/power9/istep_8_9.c | 3 +-- src/soc/ibm/power9/mvpd.c | 2 +- src/soc/ibm/power9/occ.c | 2 ++ src/soc/ibm/power9/pstates.c | 1 + src/soc/ibm/power9/romstage.c | 2 +- 21 files changed, 50 insertions(+), 34 deletions(-) create mode 100644 src/include/cpu/power/proc.h diff --git a/src/include/cpu/power/istep_13.h b/src/include/cpu/power/istep_13.h index 4953a6bbfcb..40f7a08001a 100644 --- a/src/include/cpu/power/istep_13.h +++ b/src/include/cpu/power/istep_13.h @@ -4,14 +4,9 @@ #include #include #include +#include #include -#define MCS_PER_PROC 2 -#define MCA_PER_MCS 2 -#define MCA_PER_PROC (MCA_PER_MCS * MCS_PER_PROC) -#define DIMMS_PER_MCA 2 -#define DIMMS_PER_MCS (DIMMS_PER_MCA * MCA_PER_MCS) - /* These should be in one of the SPD headers. */ /* * Note: code in 13.3 depends on width/density having values as encoded in SPD diff --git a/src/include/cpu/power/istep_18.h b/src/include/cpu/power/istep_18.h index a09faba5db5..3e9c154f75b 100644 --- a/src/include/cpu/power/istep_18.h +++ b/src/include/cpu/power/istep_18.h @@ -3,9 +3,6 @@ #ifndef CPU_PPC64_ISTEP18_H #define CPU_PPC64_ISTEP18_H -/* Frequency of XBus for Nimbus */ -#define FREQ_X_MHZ 2000 - void istep_18_11(void); void istep_18_12(void); diff --git a/src/include/cpu/power/proc.h b/src/include/cpu/power/proc.h new file mode 100644 index 00000000000..f5a45f824e5 --- /dev/null +++ b/src/include/cpu/power/proc.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __SOC_IBM_POWER9_PROC_H +#define __SOC_IBM_POWER9_PROC_H + +#include // PPC_BIT(), PPC_BITMASK() + +#define MAX_CHIPS 2 + +#define MAX_CORES_PER_CHIP 24 +#define MAX_CORES_PER_EX 2 +#define MAX_QUADS_PER_CHIP (MAX_CORES_PER_CHIP / 4) +#define MAX_CMES_PER_CHIP (MAX_CORES_PER_CHIP / MAX_CORES_PER_EX) + +#define MCS_PER_PROC 2 +#define MCA_PER_MCS 2 +#define MCA_PER_PROC (MCA_PER_MCS * MCS_PER_PROC) +#define DIMMS_PER_MCA 2 +#define DIMMS_PER_MCS (DIMMS_PER_MCA * MCA_PER_MCS) + +/* cores is a 64-bit map of functional cores of a single chip */ +#define IS_EC_FUNCTIONAL(ec, cores) (!!((cores) & PPC_BIT(ec))) +#define IS_EX_FUNCTIONAL(ex, cores) (!!((cores) & PPC_BITMASK(2 * (ex), 2 * (ex) + 1))) +#define IS_EQ_FUNCTIONAL(eq, cores) (!!((cores) & PPC_BITMASK(4 * (eq), 4 * (eq) + 3))) + +/* Frequency of XBus for Nimbus */ +#define FREQ_X_MHZ 2000 + +#endif /* __SOC_IBM_POWER9_PROC_H */ diff --git a/src/soc/ibm/power9/chip.c b/src/soc/ibm/power9/chip.c index 9d91ef98ebd..8cdd5ca3c94 100644 --- a/src/soc/ibm/power9/chip.c +++ b/src/soc/ibm/power9/chip.c @@ -8,10 +8,10 @@ #include #include #include +#include #include #include // xzalloc -#include "homer.h" #include "istep_13_scom.h" #include "chip.h" diff --git a/src/soc/ibm/power9/homer.c b/src/soc/ibm/power9/homer.c index 695bbad1971..8f93f40d143 100644 --- a/src/soc/ibm/power9/homer.c +++ b/src/soc/ibm/power9/homer.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include diff --git a/src/soc/ibm/power9/homer.h b/src/soc/ibm/power9/homer.h index a0303cc28a2..0682f29d797 100644 --- a/src/soc/ibm/power9/homer.h +++ b/src/soc/ibm/power9/homer.h @@ -3,9 +3,10 @@ #ifndef __SOC_IBM_POWER9_HOMER_H #define __SOC_IBM_POWER9_HOMER_H -#include // PPC_BIT(), PPC_BITMASK() #include +#include + /* All fields are big-endian */ #define HOMER_ONE_REGION_SIZE (1 * MiB) @@ -55,12 +56,6 @@ #define CORE_SCOM_RESTORE_SIZE_PER_CORE \ (SCOM_RESTORE_ENTRY_SIZE * CORE_SCOM_RESTORE_REGS_PER_CORE) -#define MAX_CHIPS 2 -#define MAX_CORES_PER_CHIP 24 -#define MAX_CORES_PER_EX 2 -#define MAX_QUADS_PER_CHIP (MAX_CORES_PER_CHIP / 4) -#define MAX_CMES_PER_CHIP (MAX_CORES_PER_CHIP / MAX_CORES_PER_EX) - /* Offset from HOMER to OCC Host Data Area */ #define HOMER_OFFSET_TO_OCC_HOST_DATA (768 * KiB) @@ -329,10 +324,6 @@ check_member(homer_st, qpmr, 1 * MiB); check_member(homer_st, cpmr, 2 * MiB); check_member(homer_st, ppmr, 3 * MiB); -#define IS_EC_FUNCTIONAL(ec, cores) (!!((cores) & PPC_BIT(ec))) -#define IS_EX_FUNCTIONAL(ex, cores) (!!((cores) & PPC_BITMASK(2*(ex), 2*(ex) + 1))) -#define IS_EQ_FUNCTIONAL(eq, cores) (!!((cores) & PPC_BITMASK(4*(eq), 4*(eq) + 3))) - struct voltage_bucket_data; void build_parameter_blocks(struct homer_st *homer, uint64_t functional_cores); diff --git a/src/soc/ibm/power9/istep_10_1.c b/src/soc/ibm/power9/istep_10_1.c index 6f9d73c047d..ee6be37f0a4 100644 --- a/src/soc/ibm/power9/istep_10_1.c +++ b/src/soc/ibm/power9/istep_10_1.c @@ -4,11 +4,11 @@ #include #include +#include #include #include #include "fsi.h" -#include "homer.h" #include "xbus.h" enum build_smp_adu_action { diff --git a/src/soc/ibm/power9/istep_10_10.c b/src/soc/ibm/power9/istep_10_10.c index a2a11749bed..db696048437 100644 --- a/src/soc/ibm/power9/istep_10_10.c +++ b/src/soc/ibm/power9/istep_10_10.c @@ -3,13 +3,13 @@ #include #include +#include #include #include #include #include #include -#include "homer.h" #include "pci.h" #define MAX_LANE_GROUPS_PER_PEC 4 diff --git a/src/soc/ibm/power9/istep_10_12.c b/src/soc/ibm/power9/istep_10_12.c index 295554b5329..84f0f49e2ea 100644 --- a/src/soc/ibm/power9/istep_10_12.c +++ b/src/soc/ibm/power9/istep_10_12.c @@ -3,9 +3,9 @@ #include #include +#include #include -#include "homer.h" #include "pci.h" /* PCIe only at the moment, Hostboot also updates MC and OBus chiplets too */ diff --git a/src/soc/ibm/power9/istep_14_3.c b/src/soc/ibm/power9/istep_14_3.c index ba50844cea2..86eef066d9d 100644 --- a/src/soc/ibm/power9/istep_14_3.c +++ b/src/soc/ibm/power9/istep_14_3.c @@ -3,9 +3,9 @@ #include #include +#include #include -#include "homer.h" #include "pci.h" #include "scratch.h" diff --git a/src/soc/ibm/power9/istep_18_11.c b/src/soc/ibm/power9/istep_18_11.c index 2a3282da939..e1453e2e80f 100644 --- a/src/soc/ibm/power9/istep_18_11.c +++ b/src/soc/ibm/power9/istep_18_11.c @@ -3,6 +3,7 @@ #include #include +#include #include #include diff --git a/src/soc/ibm/power9/istep_8_1.c b/src/soc/ibm/power9/istep_8_1.c index cf87d3b297e..f0ea338e316 100644 --- a/src/soc/ibm/power9/istep_8_1.c +++ b/src/soc/ibm/power9/istep_8_1.c @@ -6,10 +6,10 @@ #include #include #include +#include #include #include "fsi.h" -#include "homer.h" #include "scratch.h" /* diff --git a/src/soc/ibm/power9/istep_8_11.c b/src/soc/ibm/power9/istep_8_11.c index d7c5081bc71..35e62c58a26 100644 --- a/src/soc/ibm/power9/istep_8_11.c +++ b/src/soc/ibm/power9/istep_8_11.c @@ -3,9 +3,9 @@ #include #include +#include #include -#include "homer.h" #include "xbus.h" static void xbus_enable_ridi(uint8_t chip) diff --git a/src/soc/ibm/power9/istep_8_2.c b/src/soc/ibm/power9/istep_8_2.c index b677a6c24ae..ee7b16d5350 100644 --- a/src/soc/ibm/power9/istep_8_2.c +++ b/src/soc/ibm/power9/istep_8_2.c @@ -5,9 +5,9 @@ #include #include #include +#include #include "fsi.h" -#include "homer.h" /* * 8.2 host_setup_sbe diff --git a/src/soc/ibm/power9/istep_8_3.c b/src/soc/ibm/power9/istep_8_3.c index dee26a43b3a..6836374cf28 100644 --- a/src/soc/ibm/power9/istep_8_3.c +++ b/src/soc/ibm/power9/istep_8_3.c @@ -5,10 +5,10 @@ #include #include #include +#include #include #include "fsi.h" -#include "homer.h" /* * 8.3 host_cbs_start diff --git a/src/soc/ibm/power9/istep_8_4.c b/src/soc/ibm/power9/istep_8_4.c index 979cd091eec..5182153888d 100644 --- a/src/soc/ibm/power9/istep_8_4.c +++ b/src/soc/ibm/power9/istep_8_4.c @@ -4,10 +4,10 @@ #include #include #include +#include #include #include "fsi.h" -#include "homer.h" /* * 8.4 proc_check_slave_sbe_seeprom_complete : Check Slave SBE Complete diff --git a/src/soc/ibm/power9/istep_8_9.c b/src/soc/ibm/power9/istep_8_9.c index d919e345f10..965126a8558 100644 --- a/src/soc/ibm/power9/istep_8_9.c +++ b/src/soc/ibm/power9/istep_8_9.c @@ -3,12 +3,11 @@ #include #include -#include #include +#include #include #include -#include "homer.h" #include "xbus.h" /* diff --git a/src/soc/ibm/power9/mvpd.c b/src/soc/ibm/power9/mvpd.c index ad04e5b721a..3a1e15e3a27 100644 --- a/src/soc/ibm/power9/mvpd.c +++ b/src/soc/ibm/power9/mvpd.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -14,7 +15,6 @@ #include #include -#include "homer.h" #include "tor.h" #define MVPD_TOC_ENTRIES 32 diff --git a/src/soc/ibm/power9/occ.c b/src/soc/ibm/power9/occ.c index 0e65f69bb00..010641a1851 100644 --- a/src/soc/ibm/power9/occ.c +++ b/src/soc/ibm/power9/occ.c @@ -10,6 +10,8 @@ #include #include +#include + #include "homer.h" #include "ops.h" diff --git a/src/soc/ibm/power9/pstates.c b/src/soc/ibm/power9/pstates.c index 8bfbe938f23..03ff932fde9 100644 --- a/src/soc/ibm/power9/pstates.c +++ b/src/soc/ibm/power9/pstates.c @@ -4,6 +4,7 @@ #include "wof.h" #include #include +#include #include #include #include // memcpy diff --git a/src/soc/ibm/power9/romstage.c b/src/soc/ibm/power9/romstage.c index 214c1c67944..594b73d4fa4 100644 --- a/src/soc/ibm/power9/romstage.c +++ b/src/soc/ibm/power9/romstage.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -16,7 +17,6 @@ #include #include "fsi.h" -#include "homer.h" #include "pci.h" /* DIMM SPD addresses */ From cd396bb0980ad656bcb98dd1638de874d6decbf7 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Sat, 2 Jul 2022 16:28:30 +0300 Subject: [PATCH 131/213] soc/power9/: expose CONFIG_MAX_CPUS in menuconfig and use in code Signed-off-by: Sergii Dmytruk --- src/include/cpu/power/proc.h | 4 ++++ src/mainboard/raptor-cs/talos-2/Kconfig | 4 ++-- src/soc/ibm/power9/fsi.c | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/include/cpu/power/proc.h b/src/include/cpu/power/proc.h index f5a45f824e5..c27c8b0da42 100644 --- a/src/include/cpu/power/proc.h +++ b/src/include/cpu/power/proc.h @@ -5,8 +5,12 @@ #include // PPC_BIT(), PPC_BITMASK() +/* Maximum number of chips supported by the code. */ #define MAX_CHIPS 2 +_Static_assert(CONFIG_MAX_CPUS > 0, "At least one CPUs is needed"); +_Static_assert(CONFIG_MAX_CPUS <= MAX_CHIPS, "Too many CPUs requested"); + #define MAX_CORES_PER_CHIP 24 #define MAX_CORES_PER_EX 2 #define MAX_QUADS_PER_CHIP (MAX_CORES_PER_CHIP / 4) diff --git a/src/mainboard/raptor-cs/talos-2/Kconfig b/src/mainboard/raptor-cs/talos-2/Kconfig index ad7ca077226..e719dc86f80 100644 --- a/src/mainboard/raptor-cs/talos-2/Kconfig +++ b/src/mainboard/raptor-cs/talos-2/Kconfig @@ -56,8 +56,8 @@ config DIMM_SPD_SIZE default 512 config MAX_CPUS - int - default 1 + int "Maximum number of CPUs to use (1-2)" + default 2 config MAINBOARD_VENDOR string diff --git a/src/soc/ibm/power9/fsi.c b/src/soc/ibm/power9/fsi.c index 89c50ba86c9..7bc44612ac1 100644 --- a/src/soc/ibm/power9/fsi.c +++ b/src/soc/ibm/power9/fsi.c @@ -310,7 +310,7 @@ uint8_t fsi_get_present_chips(void) /* Status of the second CPU (connected to port #1) */ chips |= ((present_slaves & 0x40) >> 5); - return chips; + return chips & ((1 << CONFIG_MAX_CPUS) - 1); } /* Polls OPB dying on error or timeout */ From 41bab4296f6da629f70f020594113334a896c351 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Sat, 22 Jan 2022 17:29:13 +0200 Subject: [PATCH 132/213] soc/power9/istep_13_6.c: get MCS PG data from MVPD Change-Id: Id5bcd2b74bbf3f74e04c34eb3e92a197871927fd Signed-off-by: Sergii Dmytruk --- src/include/cpu/power/mvpd.h | 4 ++++ src/soc/ibm/power9/istep_13_6.c | 38 ++++++++++++--------------------- src/soc/ibm/power9/mvpd.c | 21 ++++++++++++++++++ 3 files changed, 39 insertions(+), 24 deletions(-) diff --git a/src/include/cpu/power/mvpd.h b/src/include/cpu/power/mvpd.h index 68b1533dd0a..7da12a673c7 100644 --- a/src/include/cpu/power/mvpd.h +++ b/src/include/cpu/power/mvpd.h @@ -52,6 +52,10 @@ const struct region_device *mvpd_device_ro(void); /* Reads #V of one of LRP records (mind that there is only one buffer) */ const struct voltage_kwd *mvpd_get_voltage_data(uint8_t chip, int lrp); +/* Sets pg[0] and pg[1] to partial good values for MC01_CHIPLET_ID and + * MC23_CHIPLET_ID respectively */ +void mvpd_get_mcs_pg(uint8_t chip, uint16_t *pg); + /* Builds bitmask of functional cores based on Partial Good vector stored in PG * keyword of CP00 record */ uint64_t mvpd_get_available_cores(uint8_t chip); diff --git a/src/soc/ibm/power9/istep_13_6.c b/src/soc/ibm/power9/istep_13_6.c index b2b17982578..a467990aefd 100644 --- a/src/soc/ibm/power9/istep_13_6.c +++ b/src/soc/ibm/power9/istep_13_6.c @@ -1,29 +1,13 @@ /* SPDX-License-Identifier: GPL-2.0-only */ #include +#include #include #include #include "istep_13_scom.h" -/* - * FIXME: ATTR_PG value should come from MEMD partition, but it is empty after - * build. Default value from talos.xml (5 for all chiplets) probably never makes - * sense. Value read from already booted MVPD is 0xE0 (?) for both MCSs. We can - * either add functions to read and parse MVPD or just hardcode the values. So - * far I haven't found the code that writes to MVPD in Hostboot, other than for - * PDI keyword (PG keyword should be used here). - * - * Value below comes from a log of booting Hostboot. It isn't even remotely - * similar to values mentioned above. It touches bits marked as reserved in the - * documentation, so we can't rely on specification to be up to date. - * - * As this describes whether clocks on second MCS should be started or not, this - * definitely will be different when more DIMMs are installed. - */ -#define ATTR_PG 0xE1FC000000000000ull - -static inline void p9_mem_startclocks_cplt_ctrl_action_function(chiplet_id_t id) +static inline void p9_mem_startclocks_cplt_ctrl_action_function(chiplet_id_t id, uint64_t pg) { // Drop partial good fences /* @@ -32,7 +16,7 @@ static inline void p9_mem_startclocks_cplt_ctrl_action_function(chiplet_id_t id) [3] TC_VITL_REGION_FENCE = ~ATTR_PG[3] [4-14] TC_REGION{1-3}_FENCE, UNUSED_{8-14}B = ~ATTR_PG[4-14] */ - write_scom_for_chiplet(id, MCSLOW_CPLT_CTRL1_WCLEAR, ~ATTR_PG & PPC_BITMASK(3, 14)); + write_scom_for_chiplet(id, MCSLOW_CPLT_CTRL1_WCLEAR, ~pg & PPC_BITMASK(3, 14)); // Reset abistclk_muxsel and syncclk_muxsel /* @@ -106,7 +90,7 @@ static inline void p9_sbe_common_align_chiplets(chiplet_id_t id) PPC_BIT(MCSLOW_CPLT_CTRL0_CTRL_CC_FORCE_ALIGN_DC)); } -static void p9_sbe_common_clock_start_stop(chiplet_id_t id) +static void p9_sbe_common_clock_start_stop(chiplet_id_t id, uint64_t pg) { // Chiplet exit flush /* @@ -144,7 +128,7 @@ static void p9_sbe_common_clock_start_stop(chiplet_id_t id) PPC_BIT(MCSLOW_CLK_REGION_SEL_THOLD_SL) | PPC_BIT(MCSLOW_CLK_REGION_SEL_THOLD_NSL) | PPC_BIT(MCSLOW_CLK_REGION_SEL_THOLD_ARY) | - (~ATTR_PG & PPC_BITMASK(4, 13))); + (~pg & PPC_BITMASK(4, 13))); // Poll OPCG done bit to check for completeness /* @@ -169,7 +153,7 @@ static void p9_sbe_common_clock_start_stop(chiplet_id_t id) TP.TCMC01.MCSLOW.CLOCK_STAT_ARY assert(([4-14] & ATTR_PG[4-14]) == ATTR_PG[4-14]) */ - uint64_t mask = ATTR_PG & PPC_BITMASK(4, 13); + uint64_t mask = pg & PPC_BITMASK(4, 13); if ((read_scom_for_chiplet(id, MCSLOW_CLOCK_STAT_SL) & PPC_BITMASK(4, 13)) != mask || (read_scom_for_chiplet(id, MCSLOW_CLOCK_STAT_NSL) & PPC_BITMASK(4, 13)) != mask || (read_scom_for_chiplet(id, MCSLOW_CLOCK_STAT_ARY) & PPC_BITMASK(4, 13)) != mask) @@ -257,24 +241,30 @@ void istep_13_6(void) { printk(BIOS_EMERG, "starting istep 13.6\n"); int i; + uint16_t pg[MCS_PER_PROC]; report_istep(13, 6); /* Assuming MC doesn't run in sync mode with Fabric, otherwise this is no-op */ + /* TODO: update for second CPU */ + mvpd_get_mcs_pg(/*chip=*/0, pg); + for (i = 0; i < MCS_PER_PROC; i++) { + const uint64_t mcs_pg = PPC_PLACE(pg[i], 0, 16); + /* According to logs, Hostboot does it also for the second MCS */ //~ if (!mem_data.mcs[i].functional) //~ continue; // Call p9_mem_startclocks_cplt_ctrl_action_function for Mc chiplets - p9_mem_startclocks_cplt_ctrl_action_function(mcs_ids[i]); + p9_mem_startclocks_cplt_ctrl_action_function(mcs_ids[i], mcs_pg); // Call module align chiplets for Mc chiplets p9_sbe_common_align_chiplets(mcs_ids[i]); // Call module clock start stop for MC01, MC23 - p9_sbe_common_clock_start_stop(mcs_ids[i]); + p9_sbe_common_clock_start_stop(mcs_ids[i], mcs_pg); // Call p9_mem_startclocks_fence_setup_function for Mc chiplets p9_mem_startclocks_fence_setup_function(mcs_ids[i]); diff --git a/src/soc/ibm/power9/mvpd.c b/src/soc/ibm/power9/mvpd.c index 3a1e15e3a27..ae42ad64bda 100644 --- a/src/soc/ibm/power9/mvpd.c +++ b/src/soc/ibm/power9/mvpd.c @@ -402,6 +402,27 @@ static const uint8_t *mvpd_get_keyword(uint8_t cpu, const char *record_name, return kwd; } +void mvpd_get_mcs_pg(uint8_t chip, uint16_t *pg) +{ + enum { + VPD_CP00_PG_HDR_LENGTH = 1, + VPD_CP00_PG_DATA_LENGTH = 128, + VPD_CP00_PG_DATA_ENTRIES = VPD_CP00_PG_DATA_LENGTH / 2, + }; + + uint8_t raw_pg_data[VPD_CP00_PG_HDR_LENGTH + VPD_CP00_PG_DATA_LENGTH]; + uint16_t pg_data[VPD_CP00_PG_DATA_ENTRIES]; + uint32_t size = sizeof(raw_pg_data); + + if (!mvpd_extract_keyword(chip, "CP00", "PG", raw_pg_data, &size)) + die("Failed to read CPU%d/MVPD/CP00/PG", chip); + + memcpy(pg_data, raw_pg_data + VPD_CP00_PG_HDR_LENGTH, sizeof(pg_data)); + + pg[0] = pg_data[MC01_CHIPLET_ID]; + pg[1] = pg_data[MC23_CHIPLET_ID]; +} + bool mvpd_extract_keyword(uint8_t chip, const char *record_name, const char *kwd_name, uint8_t *buf, uint32_t *size) { From 9362d055e3a1d8a67dfeaa108e9e6da5185c3bd8 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Sat, 22 Jan 2022 17:37:32 +0200 Subject: [PATCH 133/213] soc/power9/istep_13_2.c: update for second CPU Change-Id: I52b2ee0e4fdf9f842c5a2df90c96d745d6770b0f Signed-off-by: Sergii Dmytruk --- src/include/cpu/power/istep_13.h | 2 +- src/soc/ibm/power9/istep_13_2.c | 91 ++++++++++++++++++-------------- src/soc/ibm/power9/romstage.c | 2 +- 3 files changed, 54 insertions(+), 41 deletions(-) diff --git a/src/include/cpu/power/istep_13.h b/src/include/cpu/power/istep_13.h index 40f7a08001a..c6bd5dd0fb8 100644 --- a/src/include/cpu/power/istep_13.h +++ b/src/include/cpu/power/istep_13.h @@ -295,7 +295,7 @@ static inline enum ddr4_mr1_rtt_nom vpd_to_rtt_nom(uint8_t vpd) } } -void istep_13_2(void); +void istep_13_2(uint8_t chips); void istep_13_3(void); void istep_13_4(void); void istep_13_6(void); diff --git a/src/soc/ibm/power9/istep_13_2.c b/src/soc/ibm/power9/istep_13_2.c index 294479e83ab..095fcfd3704 100644 --- a/src/soc/ibm/power9/istep_13_2.c +++ b/src/soc/ibm/power9/istep_13_2.c @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ #include +#include #include #include @@ -18,16 +19,12 @@ * - Disable listen_to_sync for MEM chiplet, whenever MEM is not in sync to * NEST */ -void istep_13_2(void) + +static void mem_pll_reset(uint8_t chip) { - printk(BIOS_EMERG, "starting istep 13.2\n"); int i; long time_elapsed = 0; - report_istep(13, 2); - - /* Assuming MC doesn't run in sync mode with Fabric, otherwise this is no-op */ - for (i = 0; i < MCS_PER_PROC; i++) { // Assert endpoint reset /* @@ -35,16 +32,16 @@ void istep_13_2(void) [all] 0 [1] PCB_EP_RESET = 1 */ - write_scom_for_chiplet(mcs_ids[i], PCBSLMC01_NET_CTRL0_WOR, - PPC_BIT(PCBSLMC01_NET_CTRL0_PCB_EP_RESET)); + write_rscom_for_chiplet(chip, mcs_ids[i], PCBSLMC01_NET_CTRL0_WOR, + PPC_BIT(PCBSLMC01_NET_CTRL0_PCB_EP_RESET)); // Mask PLL unlock error in PCB slave /* TP.TPCHIP.NET.PCBSLMC01.SLAVE_CONFIG_REG [12] (part of) ERROR_MASK = 1 */ - scom_or_for_chiplet(mcs_ids[i], PCBSLMC01_SLAVE_CONFIG_REG, - PPC_BIT(12)); + rscom_or_for_chiplet(chip, mcs_ids[i], PCBSLMC01_SLAVE_CONFIG_REG, + PPC_BIT(12)); // Move MC PLL into reset state (3 separate writes, no delays between them) /* @@ -58,12 +55,12 @@ void istep_13_2(void) [all] 0 [3] PLL_TEST_EN = 1 */ - write_scom_for_chiplet(mcs_ids[i], PCBSLMC01_NET_CTRL0_WOR, - PPC_BIT(PCBSLMC01_NET_CTRL0_PLL_BYPASS)); - write_scom_for_chiplet(mcs_ids[i], PCBSLMC01_NET_CTRL0_WOR, - PPC_BIT(PCBSLMC01_NET_CTRL0_PLL_RESET)); - write_scom_for_chiplet(mcs_ids[i], PCBSLMC01_NET_CTRL0_WOR, - PPC_BIT(PCBSLMC01_NET_CTRL0_PLL_TEST_EN)); + write_rscom_for_chiplet(chip, mcs_ids[i], PCBSLMC01_NET_CTRL0_WOR, + PPC_BIT(PCBSLMC01_NET_CTRL0_PLL_BYPASS)); + write_rscom_for_chiplet(chip, mcs_ids[i], PCBSLMC01_NET_CTRL0_WOR, + PPC_BIT(PCBSLMC01_NET_CTRL0_PLL_RESET)); + write_rscom_for_chiplet(chip, mcs_ids[i], PCBSLMC01_NET_CTRL0_WOR, + PPC_BIT(PCBSLMC01_NET_CTRL0_PLL_TEST_EN)); // Assert MEM PLDY and DCC bypass /* @@ -72,9 +69,9 @@ void istep_13_2(void) [1] CLK_DCC_BYPASS_EN = 1 [2] CLK_PDLY_BYPASS_EN = 1 */ - write_scom_for_chiplet(mcs_ids[i], PCBSLMC01_NET_CTRL1_WOR, - PPC_BIT(PCBSLMC01_NET_CTRL1_CLK_DCC_BYPASS_EN) | - PPC_BIT(PCBSLMC01_NET_CTRL1_CLK_PDLY_BYPASS_EN)); + write_rscom_for_chiplet(chip, mcs_ids[i], PCBSLMC01_NET_CTRL1_WOR, + PPC_BIT(PCBSLMC01_NET_CTRL1_CLK_DCC_BYPASS_EN) | + PPC_BIT(PCBSLMC01_NET_CTRL1_CLK_PDLY_BYPASS_EN)); // Drop endpoint reset /* @@ -82,16 +79,16 @@ void istep_13_2(void) [all] 1 [1] PCB_EP_RESET = 0 */ - write_scom_for_chiplet(mcs_ids[i], PCBSLMC01_NET_CTRL0_WAND, - ~PPC_BIT(PCBSLMC01_NET_CTRL0_PCB_EP_RESET)); + write_rscom_for_chiplet(chip, mcs_ids[i], PCBSLMC01_NET_CTRL0_WAND, + ~PPC_BIT(PCBSLMC01_NET_CTRL0_PCB_EP_RESET)); // Disable listen to sync pulse to MC chiplet, when MEM is not in sync to nest /* TP.TCMC01.MCSLOW.SYNC_CONFIG [4] LISTEN_TO_SYNC_PULSE_DIS = 1 */ - scom_or_for_chiplet(mcs_ids[i], MCSLOW_SYNC_CONFIG, - PPC_BIT(MCSLOW_SYNC_CONFIG_LISTEN_TO_SYNC_PULSE_DIS)); + rscom_or_for_chiplet(chip, mcs_ids[i], MCSLOW_SYNC_CONFIG, + PPC_BIT(MCSLOW_SYNC_CONFIG_LISTEN_TO_SYNC_PULSE_DIS)); // Initialize OPCG_ALIGN register /* @@ -102,7 +99,7 @@ void istep_13_2(void) [47-51] SCAN_RATIO = 0 // 1:1 [52-63] OPCG_WAIT_CYCLES = 0x20 */ - write_scom_for_chiplet(mcs_ids[i], MCSLOW_OPCG_ALIGN, + write_rscom_for_chiplet(chip, mcs_ids[i], MCSLOW_OPCG_ALIGN, PPC_PLACE(5, MCSLOW_OPCG_ALIGN_INOP_ALIGN, MCSLOW_OPCG_ALIGN_INOP_ALIGN_LEN) | PPC_PLACE(0x20, MCSLOW_OPCG_ALIGN_OPCG_WAIT_CYCLES, @@ -126,18 +123,18 @@ void istep_13_2(void) TP.TCMC01.MCSLOW.OPCG_REG0 [2] RUN_SCAN0 = 1 */ - write_scom_for_chiplet(mcs_ids[i], MCSLOW_CLK_REGION, - PPC_BIT(MCSLOW_CLK_REGION_CLOCK_REGION_UNIT10) | - PPC_BIT(MCSLOW_CLK_REGION_SEL_THOLD_SL) | - PPC_BIT(MCSLOW_CLK_REGION_SEL_THOLD_NSL) | - PPC_BIT(MCSLOW_CLK_REGION_SEL_THOLD_ARY)); - write_scom_for_chiplet(mcs_ids[i], MCSLOW_SCAN_REGION_TYPE, - PPC_BIT(MCSLOW_SCAN_REGION_TYPE_SCAN_REGION_UNIT10) | - PPC_BIT(MCSLOW_SCAN_REGION_TYPE_SCAN_TYPE_BNDY)); - scom_and_for_chiplet(mcs_ids[i], MCSLOW_OPCG_REG0, - ~PPC_BIT(MCSLOW_OPCG_RUNN_MODE)); - scom_or_for_chiplet(mcs_ids[i], MCSLOW_OPCG_REG0, - PPC_BIT(MCSLOW_OPCG_RUN_SCAN0)); + write_rscom_for_chiplet(chip, mcs_ids[i], MCSLOW_CLK_REGION, + PPC_BIT(MCSLOW_CLK_REGION_CLOCK_REGION_UNIT10) | + PPC_BIT(MCSLOW_CLK_REGION_SEL_THOLD_SL) | + PPC_BIT(MCSLOW_CLK_REGION_SEL_THOLD_NSL) | + PPC_BIT(MCSLOW_CLK_REGION_SEL_THOLD_ARY)); + write_rscom_for_chiplet(chip, mcs_ids[i], MCSLOW_SCAN_REGION_TYPE, + PPC_BIT(MCSLOW_SCAN_REGION_TYPE_SCAN_REGION_UNIT10) | + PPC_BIT(MCSLOW_SCAN_REGION_TYPE_SCAN_TYPE_BNDY)); + rscom_and_for_chiplet(chip, mcs_ids[i], MCSLOW_OPCG_REG0, + ~PPC_BIT(MCSLOW_OPCG_RUNN_MODE)); + rscom_or_for_chiplet(chip, mcs_ids[i], MCSLOW_OPCG_REG0, + PPC_BIT(MCSLOW_OPCG_RUN_SCAN0)); } /* Separate loop so we won't have to wait for timeout twice */ @@ -153,7 +150,8 @@ void istep_13_2(void) delay(16us) */ time_elapsed = wait_us(200 * 16 - time_elapsed, - read_scom_for_chiplet(mcs_ids[i], MCSLOW_CPLT_STAT0) & + read_rscom_for_chiplet(chip, mcs_ids[i], + MCSLOW_CPLT_STAT0) & PPC_BIT(MCSLOW_CPLT_STAT0_CC_CTRL_OPCG_DONE_DC)); if (!time_elapsed) @@ -166,8 +164,23 @@ void istep_13_2(void) TP.TCMC01.MCSLOW.SCAN_REGION_TYPE [all] 0 */ - write_scom_for_chiplet(mcs_ids[i], MCSLOW_CLK_REGION, 0); - write_scom_for_chiplet(mcs_ids[i], MCSLOW_SCAN_REGION_TYPE, 0); + write_rscom_for_chiplet(chip, mcs_ids[i], MCSLOW_CLK_REGION, 0); + write_rscom_for_chiplet(chip, mcs_ids[i], MCSLOW_SCAN_REGION_TYPE, 0); + } +} + +void istep_13_2(uint8_t chips) +{ + uint8_t chip; + + printk(BIOS_EMERG, "starting istep 13.2\n"); + report_istep(13, 2); + + /* Assuming MC doesn't run in sync mode with Fabric, otherwise this is no-op */ + + for (chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) + mem_pll_reset(chip); } printk(BIOS_EMERG, "ending istep 13.2\n"); diff --git a/src/soc/ibm/power9/romstage.c b/src/soc/ibm/power9/romstage.c index 594b73d4fa4..22e88175997 100644 --- a/src/soc/ibm/power9/romstage.c +++ b/src/soc/ibm/power9/romstage.c @@ -388,7 +388,7 @@ void main(void) prepare_dimm_data(); report_istep(13, 1); // no-op - istep_13_2(); + istep_13_2(chips); istep_13_3(); istep_13_4(); report_istep(13, 5); // no-op From 6439cd7a0373f11ee6ded166fa17cd420378dda3 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Sat, 22 Jan 2022 17:44:10 +0200 Subject: [PATCH 134/213] soc/power9/istep_13_3.c: update for second CPU Change-Id: Ie2bd4d69be094324c0bd56c4cc4ee7412e138a1c Signed-off-by: Sergii Dmytruk --- src/include/cpu/power/istep_13.h | 2 +- src/soc/ibm/power9/istep_13_3.c | 37 +++++++++++++++++++++----------- src/soc/ibm/power9/romstage.c | 2 +- 3 files changed, 27 insertions(+), 14 deletions(-) diff --git a/src/include/cpu/power/istep_13.h b/src/include/cpu/power/istep_13.h index c6bd5dd0fb8..842ce41d107 100644 --- a/src/include/cpu/power/istep_13.h +++ b/src/include/cpu/power/istep_13.h @@ -296,7 +296,7 @@ static inline enum ddr4_mr1_rtt_nom vpd_to_rtt_nom(uint8_t vpd) } void istep_13_2(uint8_t chips); -void istep_13_3(void); +void istep_13_3(uint8_t chips); void istep_13_4(void); void istep_13_6(void); void istep_13_8(void); // TODO: takes epsilon values from 8.6 and MSS data from 7.4 diff --git a/src/soc/ibm/power9/istep_13_3.c b/src/soc/ibm/power9/istep_13_3.c index b4d2536d324..92b26bd5f8d 100644 --- a/src/soc/ibm/power9/istep_13_3.c +++ b/src/soc/ibm/power9/istep_13_3.c @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ #include +#include #include #include @@ -26,14 +27,12 @@ * - Data is stored as a ring image in the SBE that is frequency specific * - 5 different frequencies (1866, 2133, 2400, 2667, EXP) */ -void istep_13_3(void) + +static void mem_pll_initf(uint8_t chip) { - printk(BIOS_EMERG, "starting istep 13.3\n"); uint64_t ring_id; int mcs_i; - report_istep(13, 3); - /* Assuming MC doesn't run in sync mode with Fabric, otherwise this is no-op */ switch (mem_data.speed) { @@ -66,7 +65,7 @@ void istep_13_3(void) * making a function from this. */ // TP.TPCHIP.PIB.PSU.PSU_SBE_DOORBELL_REG - if (read_scom(PSU_SBE_DOORBELL_REG) & PPC_BIT(0)) + if (read_rscom(chip, PSU_SBE_DOORBELL_REG) & PPC_BIT(0)) die("MBOX to SBE busy, this should not happen\n"); @@ -86,17 +85,17 @@ void istep_13_3(void) * variable for it, which probably implies wrapping this into a function and * moving it to separate file. */ - write_scom(PSU_HOST_SBE_MBOX0_REG, 0x000001000000D301); + write_rscom(chip, PSU_HOST_SBE_MBOX0_REG, 0x000001000000D301); // TP.TPCHIP.PIB.PSU.PSU_HOST_SBE_MBOX0_REG /* TARGET_TYPE_PERV, chiplet ID = 0x07, ring ID, RING_MODE_SET_PULSE_NSL */ - write_scom(PSU_HOST_SBE_MBOX1_REG, 0x0002000000000004 | - PPC_PLACE(ring_id, 32, 16) | - PPC_PLACE(mcs_ids[mcs_i], 24, 8)); + write_rscom(chip, PSU_HOST_SBE_MBOX1_REG, 0x0002000000000004 | + PPC_PLACE(ring_id, 32, 16) | + PPC_PLACE(mcs_ids[mcs_i], 24, 8)); // Ring the host->SBE doorbell // TP.TPCHIP.PIB.PSU.PSU_SBE_DOORBELL_REG_OR - write_scom(PSU_SBE_DOORBELL_REG_WOR, PPC_BIT(0)); + write_rscom(chip, PSU_SBE_DOORBELL_REG_WOR, PPC_BIT(0)); // Wait for response /* @@ -108,7 +107,8 @@ void istep_13_3(void) * thorough testing we probably should trim it. */ // TP.TPCHIP.PIB.PSU.PSU_HOST_DOORBELL_REG - time = wait_ms(90 * MSECS_PER_SEC, read_scom(PSU_HOST_DOORBELL_REG) & PPC_BIT(0)); + time = wait_ms(90 * MSECS_PER_SEC, + read_rscom(chip, PSU_HOST_DOORBELL_REG) & PPC_BIT(0)); if (!time) die("Timed out while waiting for SBE response\n"); @@ -119,7 +119,20 @@ void istep_13_3(void) // Clear SBE->host doorbell // TP.TPCHIP.PIB.PSU.PSU_HOST_DOORBELL_REG_AND - write_scom(PSU_HOST_DOORBELL_REG_WAND, ~PPC_BIT(0)); + write_rscom(chip, PSU_HOST_DOORBELL_REG_WAND, ~PPC_BIT(0)); + } +} + +void istep_13_3(uint8_t chips) +{ + uint8_t chip; + + printk(BIOS_EMERG, "starting istep 13.3\n"); + report_istep(13, 3); + + for (chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) + mem_pll_initf(chip); } printk(BIOS_EMERG, "ending istep 13.3\n"); diff --git a/src/soc/ibm/power9/romstage.c b/src/soc/ibm/power9/romstage.c index 22e88175997..10ea9320c72 100644 --- a/src/soc/ibm/power9/romstage.c +++ b/src/soc/ibm/power9/romstage.c @@ -389,7 +389,7 @@ void main(void) report_istep(13, 1); // no-op istep_13_2(chips); - istep_13_3(); + istep_13_3(chips); istep_13_4(); report_istep(13, 5); // no-op istep_13_6(); From 8a0472293fc105d105943c552e4c3ef53143d672 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Sat, 22 Jan 2022 18:48:10 +0200 Subject: [PATCH 135/213] soc/power9/istep_13_4.c: update for second CPU Change-Id: I5a5275475c32db6541c702c710f49b6ce3737cbb Signed-off-by: Sergii Dmytruk --- src/include/cpu/power/istep_13.h | 2 +- src/soc/ibm/power9/istep_13_4.c | 57 ++++++++++++++++++++------------ src/soc/ibm/power9/romstage.c | 2 +- 3 files changed, 38 insertions(+), 23 deletions(-) diff --git a/src/include/cpu/power/istep_13.h b/src/include/cpu/power/istep_13.h index 842ce41d107..e1059a488ca 100644 --- a/src/include/cpu/power/istep_13.h +++ b/src/include/cpu/power/istep_13.h @@ -297,7 +297,7 @@ static inline enum ddr4_mr1_rtt_nom vpd_to_rtt_nom(uint8_t vpd) void istep_13_2(uint8_t chips); void istep_13_3(uint8_t chips); -void istep_13_4(void); +void istep_13_4(uint8_t chips); void istep_13_6(void); void istep_13_8(void); // TODO: takes epsilon values from 8.6 and MSS data from 7.4 void istep_13_9(void); diff --git a/src/soc/ibm/power9/istep_13_4.c b/src/soc/ibm/power9/istep_13_4.c index 07602a304fb..0041d09a0ff 100644 --- a/src/soc/ibm/power9/istep_13_4.c +++ b/src/soc/ibm/power9/istep_13_4.c @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ #include +#include #include #include "istep_13_scom.h" @@ -16,15 +17,11 @@ * - Moved PLL out of bypass (just DDR) * - Performs PLL checking */ -void istep_13_4(void) + +static void mem_pll_setup(uint8_t chip) { - printk(BIOS_EMERG, "starting istep 13.4\n"); int i; - report_istep(13, 4); - - /* Assuming MC doesn't run in sync mode with Fabric, otherwise this is no-op */ - for (i = 0; i < MCS_PER_PROC; i++) { // Drop PLDY bypass of Progdelay logic /* @@ -32,8 +29,8 @@ void istep_13_4(void) [all] 1 [2] CLK_PDLY_BYPASS_EN = 0 */ - write_scom_for_chiplet(mcs_ids[i], PCBSLMC01_NET_CTRL1_WAND, - ~PPC_BIT(PCBSLMC01_NET_CTRL1_CLK_PDLY_BYPASS_EN)); + write_rscom_for_chiplet(chip, mcs_ids[i], PCBSLMC01_NET_CTRL1_WAND, + ~PPC_BIT(PCBSLMC01_NET_CTRL1_CLK_PDLY_BYPASS_EN)); // Drop DCC bypass of DCC logic /* @@ -41,8 +38,8 @@ void istep_13_4(void) [all] 1 [1] CLK_DCC_BYPASS_EN = 0 */ - write_scom_for_chiplet(mcs_ids[i], PCBSLMC01_NET_CTRL1_WAND, - ~PPC_BIT(PCBSLMC01_NET_CTRL1_CLK_DCC_BYPASS_EN)); + write_rscom_for_chiplet(chip, mcs_ids[i], PCBSLMC01_NET_CTRL1_WAND, + ~PPC_BIT(PCBSLMC01_NET_CTRL1_CLK_DCC_BYPASS_EN)); // ATTR_NEST_MEM_X_O_PCI_BYPASS is set to 0 in talos.xml. // > if (ATTR_NEST_MEM_X_O_PCI_BYPASS == 0) @@ -53,8 +50,8 @@ void istep_13_4(void) [all] 1 [3] PLL_TEST_EN = 0 */ - write_scom_for_chiplet(mcs_ids[i], PCBSLMC01_NET_CTRL0_WAND, - ~PPC_BIT(PCBSLMC01_NET_CTRL0_PLL_TEST_EN)); + write_rscom_for_chiplet(chip, mcs_ids[i], PCBSLMC01_NET_CTRL0_WAND, + ~PPC_BIT(PCBSLMC01_NET_CTRL0_PLL_TEST_EN)); // Drop PLL reset /* @@ -62,8 +59,8 @@ void istep_13_4(void) [all] 1 [4] PLL_RESET = 0 */ - write_scom_for_chiplet(mcs_ids[i], PCBSLMC01_NET_CTRL0_WAND, - ~PPC_BIT(PCBSLMC01_NET_CTRL0_PLL_RESET)); + write_rscom_for_chiplet(chip, mcs_ids[i], PCBSLMC01_NET_CTRL0_WAND, + ~PPC_BIT(PCBSLMC01_NET_CTRL0_PLL_RESET)); /* * TODO: This is how Hosboot does it, maybe it would be better to use @@ -78,7 +75,8 @@ void istep_13_4(void) TP.TPCHIP.NET.PCBSLMC01.PLL_LOCK_REG assert([0] (reserved) == 1) */ - if (!(read_scom_for_chiplet(mcs_ids[i], PCBSLMC01_PLL_LOCK_REG) & PPC_BIT(0))) + if (!(read_rscom_for_chiplet(chip, mcs_ids[i], + PCBSLMC01_PLL_LOCK_REG) & PPC_BIT(0))) die("MCS%d PLL not locked\n", i); // Drop PLL Bypass @@ -87,7 +85,7 @@ void istep_13_4(void) [all] 1 [5] PLL_BYPASS = 0 */ - write_scom_for_chiplet(mcs_ids[i], PCBSLMC01_NET_CTRL0_WAND, + write_rscom_for_chiplet(chip, mcs_ids[i], PCBSLMC01_NET_CTRL0_WAND, ~PPC_BIT(PCBSLMC01_NET_CTRL0_PLL_BYPASS)); // Set scan ratio to 4:1 @@ -95,9 +93,10 @@ void istep_13_4(void) TP.TCMC01.MCSLOW.OPCG_ALIGN [47-51] SCAN_RATIO = 3 // 4:1 */ - scom_and_or_for_chiplet(mcs_ids[i], MCSLOW_OPCG_ALIGN, ~PPC_BITMASK(47,51), - PPC_PLACE(3, MCSLOW_OPCG_ALIGN_SCAN_RATIO, - MCSLOW_OPCG_ALIGN_SCAN_RATIO_LEN)); + rscom_and_or_for_chiplet(chip, mcs_ids[i], MCSLOW_OPCG_ALIGN, + ~PPC_BITMASK(47,51), + PPC_PLACE(3, MCSLOW_OPCG_ALIGN_SCAN_RATIO, + MCSLOW_OPCG_ALIGN_SCAN_RATIO_LEN)); // > end if @@ -106,14 +105,30 @@ void istep_13_4(void) TP.TPCHIP.NET.PCBSLMC01.ERROR_REG [all] 1 // Write 1 to clear */ - write_scom_for_chiplet(mcs_ids[i], PCBSLMC01_ERROR_REG, ~0); + write_rscom_for_chiplet(chip, mcs_ids[i], PCBSLMC01_ERROR_REG, ~0); // Unmask PLL unlock error in PCB slave /* TP.TPCHIP.NET.PCBSLMC01.SLAVE_CONFIG_REG [12] (part of) ERROR_MASK = 0 */ - scom_and_for_chiplet(mcs_ids[i], PCBSLMC01_SLAVE_CONFIG_REG, ~PPC_BIT(12)); + rscom_and_for_chiplet(chip, mcs_ids[i], PCBSLMC01_SLAVE_CONFIG_REG, + ~PPC_BIT(12)); + } +} + +void istep_13_4(uint8_t chips) +{ + uint8_t chip; + + printk(BIOS_EMERG, "starting istep 13.4\n"); + report_istep(13, 4); + + /* Assuming MC doesn't run in sync mode with Fabric, otherwise this is no-op */ + + for (chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) + mem_pll_setup(chip); } printk(BIOS_EMERG, "ending istep 13.4\n"); diff --git a/src/soc/ibm/power9/romstage.c b/src/soc/ibm/power9/romstage.c index 10ea9320c72..7aec098cd4e 100644 --- a/src/soc/ibm/power9/romstage.c +++ b/src/soc/ibm/power9/romstage.c @@ -390,7 +390,7 @@ void main(void) report_istep(13, 1); // no-op istep_13_2(chips); istep_13_3(chips); - istep_13_4(); + istep_13_4(chips); report_istep(13, 5); // no-op istep_13_6(); report_istep(13, 7); // no-op From 818da41460f2f5d4652ca7d8504f63c824f96d6e Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Sat, 22 Jan 2022 18:53:33 +0200 Subject: [PATCH 136/213] soc/power9/istep_13_6.c: update for second CPU Change-Id: I01834226dd559ef39a2d687cdf4334077cc0326f Signed-off-by: Sergii Dmytruk --- src/include/cpu/power/istep_13.h | 2 +- src/soc/ibm/power9/istep_13_6.c | 177 +++++++++++++++++-------------- src/soc/ibm/power9/romstage.c | 2 +- 3 files changed, 98 insertions(+), 83 deletions(-) diff --git a/src/include/cpu/power/istep_13.h b/src/include/cpu/power/istep_13.h index e1059a488ca..6b845bb4a11 100644 --- a/src/include/cpu/power/istep_13.h +++ b/src/include/cpu/power/istep_13.h @@ -298,7 +298,7 @@ static inline enum ddr4_mr1_rtt_nom vpd_to_rtt_nom(uint8_t vpd) void istep_13_2(uint8_t chips); void istep_13_3(uint8_t chips); void istep_13_4(uint8_t chips); -void istep_13_6(void); +void istep_13_6(uint8_t chips); void istep_13_8(void); // TODO: takes epsilon values from 8.6 and MSS data from 7.4 void istep_13_9(void); void istep_13_10(void); diff --git a/src/soc/ibm/power9/istep_13_6.c b/src/soc/ibm/power9/istep_13_6.c index a467990aefd..67174367164 100644 --- a/src/soc/ibm/power9/istep_13_6.c +++ b/src/soc/ibm/power9/istep_13_6.c @@ -2,12 +2,24 @@ #include #include +#include #include #include #include "istep_13_scom.h" -static inline void p9_mem_startclocks_cplt_ctrl_action_function(chiplet_id_t id, uint64_t pg) +/* + * 13.6 mem_startclocks: Start clocks on MBA/MCAs + * + * a) p9_mem_startclocks.C (proc chip) + * - This step is a no-op on cumulus + * - This step is a no-op if memory is running in synchronous mode since the + * MCAs are using the nest PLL, HWP detect and exits + * - Drop fences and tholds on MBA/MCAs to start the functional clocks + */ + +static inline void p9_mem_startclocks_cplt_ctrl_action_function(uint8_t chip, chiplet_id_t id, + uint64_t pg) { // Drop partial good fences /* @@ -16,7 +28,7 @@ static inline void p9_mem_startclocks_cplt_ctrl_action_function(chiplet_id_t id, [3] TC_VITL_REGION_FENCE = ~ATTR_PG[3] [4-14] TC_REGION{1-3}_FENCE, UNUSED_{8-14}B = ~ATTR_PG[4-14] */ - write_scom_for_chiplet(id, MCSLOW_CPLT_CTRL1_WCLEAR, ~pg & PPC_BITMASK(3, 14)); + write_rscom_for_chiplet(chip, id, MCSLOW_CPLT_CTRL1_WCLEAR, ~pg & PPC_BITMASK(3, 14)); // Reset abistclk_muxsel and syncclk_muxsel /* @@ -25,13 +37,13 @@ static inline void p9_mem_startclocks_cplt_ctrl_action_function(chiplet_id_t id, [0] CTRL_CC_ABSTCLK_MUXSEL_DC = 1 [1] TC_UNIT_SYNCCLK_MUXSEL_DC = 1 */ - write_scom_for_chiplet(id, MCSLOW_CPLT_CTRL0_WCLEAR, - PPC_BIT(MCSLOW_CPLT_CTRL0_CTRL_CC_ABSTCLK_MUXSEL_DC) | - PPC_BIT(MCSLOW_CPLT_CTRL0_TC_UNIT_SYNCCLK_MUXSEL_DC)); + write_rscom_for_chiplet(chip, id, MCSLOW_CPLT_CTRL0_WCLEAR, + PPC_BIT(MCSLOW_CPLT_CTRL0_CTRL_CC_ABSTCLK_MUXSEL_DC) | + PPC_BIT(MCSLOW_CPLT_CTRL0_TC_UNIT_SYNCCLK_MUXSEL_DC)); } -static inline void p9_sbe_common_align_chiplets(chiplet_id_t id) +static inline void p9_sbe_common_align_chiplets(uint8_t chip, chiplet_id_t id) { // Exit flush /* @@ -39,8 +51,8 @@ static inline void p9_sbe_common_align_chiplets(chiplet_id_t id) [all] 0 [2] CTRL_CC_FLUSHMODE_INH_DC = 1 */ - write_scom_for_chiplet(id, MCSLOW_CPLT_CTRL0_WOR, - PPC_BIT(MCSLOW_CPLT_CTRL0_CTRL_CC_FLUSHMODE_INH_DC)); + write_rscom_for_chiplet(chip, id, MCSLOW_CPLT_CTRL0_WOR, + PPC_BIT(MCSLOW_CPLT_CTRL0_CTRL_CC_FLUSHMODE_INH_DC)); // Enable alignement /* @@ -48,24 +60,24 @@ static inline void p9_sbe_common_align_chiplets(chiplet_id_t id) [all] 0 [3] CTRL_CC_FORCE_ALIGN_DC = 1 */ - write_scom_for_chiplet(id, MCSLOW_CPLT_CTRL0_WOR, - PPC_BIT(MCSLOW_CPLT_CTRL0_CTRL_CC_FORCE_ALIGN_DC)); + write_rscom_for_chiplet(chip, id, MCSLOW_CPLT_CTRL0_WOR, + PPC_BIT(MCSLOW_CPLT_CTRL0_CTRL_CC_FORCE_ALIGN_DC)); // Clear chiplet is aligned /* TP.TCMC01.MCSLOW.SYNC_CONFIG [7] CLEAR_CHIPLET_IS_ALIGNED = 1 */ - scom_or_for_chiplet(id, MCSLOW_SYNC_CONFIG, - PPC_BIT(MCSLOW_SYNC_CONFIG_CLEAR_CHIPLET_IS_ALIGNED)); + rscom_or_for_chiplet(chip, id, MCSLOW_SYNC_CONFIG, + PPC_BIT(MCSLOW_SYNC_CONFIG_CLEAR_CHIPLET_IS_ALIGNED)); // Unset Clear chiplet is aligned /* TP.TCMC01.MCSLOW.SYNC_CONFIG [7] CLEAR_CHIPLET_IS_ALIGNED = 0 */ - scom_and_for_chiplet(id, MCSLOW_SYNC_CONFIG, - ~PPC_BIT(MCSLOW_SYNC_CONFIG_CLEAR_CHIPLET_IS_ALIGNED)); + rscom_and_for_chiplet(chip, id, MCSLOW_SYNC_CONFIG, + ~PPC_BIT(MCSLOW_SYNC_CONFIG_CLEAR_CHIPLET_IS_ALIGNED)); udelay(100); @@ -76,7 +88,7 @@ static inline void p9_sbe_common_align_chiplets(chiplet_id_t id) if (([9] CC_CTRL_CHIPLET_IS_ALIGNED_DC) == 1) break delay(100us) */ - if (!wait_us(10 * 100, read_scom_for_chiplet(id, MCSLOW_CPLT_STAT0) & + if (!wait_us(10 * 100, read_rscom_for_chiplet(chip, id, MCSLOW_CPLT_STAT0) & PPC_BIT(MCSLOW_CPLT_STAT0_CC_CTRL_CHIPLET_IS_ALIGNED_DC))) die("Timeout while waiting for chiplet alignment\n"); @@ -86,11 +98,11 @@ static inline void p9_sbe_common_align_chiplets(chiplet_id_t id) [all] 0 [3] CTRL_CC_FORCE_ALIGN_DC = 1 */ - write_scom_for_chiplet(id, MCSLOW_CPLT_CTRL0_WCLEAR, - PPC_BIT(MCSLOW_CPLT_CTRL0_CTRL_CC_FORCE_ALIGN_DC)); + write_rscom_for_chiplet(chip, id, MCSLOW_CPLT_CTRL0_WCLEAR, + PPC_BIT(MCSLOW_CPLT_CTRL0_CTRL_CC_FORCE_ALIGN_DC)); } -static void p9_sbe_common_clock_start_stop(chiplet_id_t id, uint64_t pg) +static void p9_sbe_common_clock_start_stop(uint8_t chip, chiplet_id_t id, uint64_t pg) { // Chiplet exit flush /* @@ -98,15 +110,15 @@ static void p9_sbe_common_clock_start_stop(chiplet_id_t id, uint64_t pg) [all] 0 [2] CTRL_CC_FLUSHMODE_INH_DC = 1 */ - write_scom_for_chiplet(id, MCSLOW_CPLT_CTRL0_WOR, - PPC_BIT(MCSLOW_CPLT_CTRL0_CTRL_CC_FLUSHMODE_INH_DC)); + write_rscom_for_chiplet(chip, id, MCSLOW_CPLT_CTRL0_WOR, + PPC_BIT(MCSLOW_CPLT_CTRL0_CTRL_CC_FLUSHMODE_INH_DC)); // Clear Scan region type register /* TP.TCMC01.MCSLOW.SCAN_REGION_TYPE [all] 0 */ - write_scom_for_chiplet(id, MCSLOW_SCAN_REGION_TYPE, 0); + write_rscom_for_chiplet(chip, id, MCSLOW_SCAN_REGION_TYPE, 0); // Setup all Clock Domains and Clock Types /* @@ -121,14 +133,14 @@ static void p9_sbe_common_clock_start_stop(chiplet_id_t id, uint64_t pg) [49] SEL_THOLD_NSL = 1 [50] SEL_THOLD_ARY = 1 */ - scom_and_or_for_chiplet(id, MCSLOW_CLK_REGION, - ~(PPC_BITMASK(0, 14) | PPC_BITMASK(48, 50)), - PPC_PLACE(1, MCSLOW_CLK_REGION_CLOCK_CMD, - MCSLOW_CLK_REGION_CLOCK_CMD_LEN) | - PPC_BIT(MCSLOW_CLK_REGION_SEL_THOLD_SL) | - PPC_BIT(MCSLOW_CLK_REGION_SEL_THOLD_NSL) | - PPC_BIT(MCSLOW_CLK_REGION_SEL_THOLD_ARY) | - (~pg & PPC_BITMASK(4, 13))); + rscom_and_or_for_chiplet(chip, id, MCSLOW_CLK_REGION, + ~(PPC_BITMASK(0, 14) | PPC_BITMASK(48, 50)), + PPC_PLACE(1, MCSLOW_CLK_REGION_CLOCK_CMD, + MCSLOW_CLK_REGION_CLOCK_CMD_LEN) | + PPC_BIT(MCSLOW_CLK_REGION_SEL_THOLD_SL) | + PPC_BIT(MCSLOW_CLK_REGION_SEL_THOLD_NSL) | + PPC_BIT(MCSLOW_CLK_REGION_SEL_THOLD_ARY) | + (~pg & PPC_BITMASK(4, 13))); // Poll OPCG done bit to check for completeness /* @@ -137,7 +149,7 @@ static void p9_sbe_common_clock_start_stop(chiplet_id_t id, uint64_t pg) if (([8] CC_CTRL_OPCG_DONE_DC) == 1) break delay(100us) */ - if (!wait_us(10 * 100, read_scom_for_chiplet(id, MCSLOW_CPLT_STAT0) & + if (!wait_us(10 * 100, read_rscom_for_chiplet(chip, id, MCSLOW_CPLT_STAT0) & PPC_BIT(MCSLOW_CPLT_STAT0_CC_CTRL_OPCG_DONE_DC))) die("Timeout while waiting for OPCG done bit\n"); @@ -153,14 +165,15 @@ static void p9_sbe_common_clock_start_stop(chiplet_id_t id, uint64_t pg) TP.TCMC01.MCSLOW.CLOCK_STAT_ARY assert(([4-14] & ATTR_PG[4-14]) == ATTR_PG[4-14]) */ - uint64_t mask = pg & PPC_BITMASK(4, 13); - if ((read_scom_for_chiplet(id, MCSLOW_CLOCK_STAT_SL) & PPC_BITMASK(4, 13)) != mask || - (read_scom_for_chiplet(id, MCSLOW_CLOCK_STAT_NSL) & PPC_BITMASK(4, 13)) != mask || - (read_scom_for_chiplet(id, MCSLOW_CLOCK_STAT_ARY) & PPC_BITMASK(4, 13)) != mask) + uint64_t mask = PPC_BITMASK(4, 13); + uint64_t expected = pg & mask; + if ((read_rscom_for_chiplet(chip, id, MCSLOW_CLOCK_STAT_SL) & mask) != expected || + (read_rscom_for_chiplet(chip, id, MCSLOW_CLOCK_STAT_NSL) & mask) != expected || + (read_rscom_for_chiplet(chip, id, MCSLOW_CLOCK_STAT_ARY) & mask) != expected) die("Unexpected clock status\n"); } -static inline void p9_mem_startclocks_fence_setup_function(chiplet_id_t id) +static inline void p9_mem_startclocks_fence_setup_function(uint8_t chip, chiplet_id_t id) { /* * Hostboot does it based on pg_vector. It seems to check for Nest IDs to @@ -183,20 +196,20 @@ static inline void p9_mem_startclocks_fence_setup_function(chiplet_id_t id) [all] 1 [18] FENCE_EN = 0 */ - write_scom_for_chiplet(id, PCBSLMC01_NET_CTRL0_WAND, - ~PPC_BIT(PCBSLMC01_NET_CTRL0_FENCE_EN)); + write_rscom_for_chiplet(chip, id, PCBSLMC01_NET_CTRL0_WAND, + ~PPC_BIT(PCBSLMC01_NET_CTRL0_FENCE_EN)); /* }*/ } -static void p9_sbe_common_configure_chiplet_FIR(chiplet_id_t id) +static void p9_sbe_common_configure_chiplet_FIR(uint8_t chip, chiplet_id_t id) { // reset pervasive FIR /* TP.TCMC01.MCSLOW.LOCAL_FIR [all] 0 */ - write_scom_for_chiplet(id, MCSLOW_LOCAL_FIR, 0); + write_rscom_for_chiplet(chip, id, MCSLOW_LOCAL_FIR, 0); // configure pervasive FIR action/mask /* @@ -209,46 +222,31 @@ static void p9_sbe_common_configure_chiplet_FIR(chiplet_id_t id) [all] 0 [4-41] 0x3FFFFFFFFF (every bit set) */ - write_scom_for_chiplet(id, MCSLOW_LOCAL_FIR_ACTION0, 0); - write_scom_for_chiplet(id, MCSLOW_LOCAL_FIR_ACTION1, PPC_BITMASK(0, 3)); - write_scom_for_chiplet(id, MCSLOW_LOCAL_FIR_MASK, PPC_BITMASK(4, 41)); + write_rscom_for_chiplet(chip, id, MCSLOW_LOCAL_FIR_ACTION0, 0); + write_rscom_for_chiplet(chip, id, MCSLOW_LOCAL_FIR_ACTION1, PPC_BITMASK(0, 3)); + write_rscom_for_chiplet(chip, id, MCSLOW_LOCAL_FIR_MASK, PPC_BITMASK(4, 41)); // reset XFIR /* TP.TCMC01.MCSLOW.XFIR [all] 0 */ - write_scom_for_chiplet(id, MCSLOW_XFIR, 0); + write_rscom_for_chiplet(chip, id, MCSLOW_XFIR, 0); // configure XFIR mask /* TP.TCMC01.MCSLOW.FIR_MASK [all] 0 */ - write_scom_for_chiplet(id, MCSLOW_FIR_MASK, 0); + write_rscom_for_chiplet(chip, id, MCSLOW_FIR_MASK, 0); } -/* - * 13.6 mem_startclocks: Start clocks on MBA/MCAs - * - * a) p9_mem_startclocks.C (proc chip) - * - This step is a no-op on cumulus - * - This step is a no-op if memory is running in synchronous mode since the - * MCAs are using the nest PLL, HWP detect and exits - * - Drop fences and tholds on MBA/MCAs to start the functional clocks - */ -void istep_13_6(void) +static void mem_startclocks(uint8_t chip) { - printk(BIOS_EMERG, "starting istep 13.6\n"); int i; uint16_t pg[MCS_PER_PROC]; - report_istep(13, 6); - - /* Assuming MC doesn't run in sync mode with Fabric, otherwise this is no-op */ - - /* TODO: update for second CPU */ - mvpd_get_mcs_pg(/*chip=*/0, pg); + mvpd_get_mcs_pg(chip, pg); for (i = 0; i < MCS_PER_PROC; i++) { const uint64_t mcs_pg = PPC_PLACE(pg[i], 0, 16); @@ -258,16 +256,16 @@ void istep_13_6(void) //~ continue; // Call p9_mem_startclocks_cplt_ctrl_action_function for Mc chiplets - p9_mem_startclocks_cplt_ctrl_action_function(mcs_ids[i], mcs_pg); + p9_mem_startclocks_cplt_ctrl_action_function(chip, mcs_ids[i], mcs_pg); // Call module align chiplets for Mc chiplets - p9_sbe_common_align_chiplets(mcs_ids[i]); + p9_sbe_common_align_chiplets(chip, mcs_ids[i]); // Call module clock start stop for MC01, MC23 - p9_sbe_common_clock_start_stop(mcs_ids[i], mcs_pg); + p9_sbe_common_clock_start_stop(chip, mcs_ids[i], mcs_pg); // Call p9_mem_startclocks_fence_setup_function for Mc chiplets - p9_mem_startclocks_fence_setup_function(mcs_ids[i]); + p9_mem_startclocks_fence_setup_function(chip, mcs_ids[i]); // Clear flush_inhibit to go in to flush mode /* @@ -275,11 +273,11 @@ void istep_13_6(void) [all] 0 [2] CTRL_CC_FLUSHMODE_INH_DC = 1 */ - write_scom_for_chiplet(mcs_ids[i], MCSLOW_CPLT_CTRL0_WCLEAR, - PPC_BIT(MCSLOW_CPLT_CTRL0_CTRL_CC_FLUSHMODE_INH_DC)); + write_rscom_for_chiplet(chip, mcs_ids[i], MCSLOW_CPLT_CTRL0_WCLEAR, + PPC_BIT(MCSLOW_CPLT_CTRL0_CTRL_CC_FLUSHMODE_INH_DC)); // Call p9_sbe_common_configure_chiplet_FIR for MC chiplets - p9_sbe_common_configure_chiplet_FIR(mcs_ids[i]); + p9_sbe_common_configure_chiplet_FIR(chip, mcs_ids[i]); // Reset FBC chiplet configuration /* @@ -293,8 +291,9 @@ void istep_13_6(void) * ATTR_FABRIC_GROUP_ID of parent PROC (same for CHIP_ID). Only * SYSTEM_ID is present in talos.xml with full name. */ - scom_and_for_chiplet(mcs_ids[i], MCSLOW_CPLT_CONF0, - ~(PPC_BITMASK(48, 54) | PPC_BITMASK(56, 60))); + rscom_and_or_for_chiplet(chip, mcs_ids[i], MCSLOW_CPLT_CONF0, + ~(PPC_BITMASK(48, 54) | PPC_BITMASK(56, 60)), + PPC_PLACE(chip, 48, 4)); // Add to Multicast Group /* Avoid setting if register is already set, i.e. [3-5] != 7 */ @@ -306,17 +305,33 @@ void istep_13_6(void) [3-5] MULTICAST1_GROUP: if 7 then set to 2 [16-23] (not described): if [3-5] == 7 then set to 0x1C */ - if ((read_scom_for_chiplet(mcs_ids[i], PCBSLMC01_MULTICAST_GROUP_1) & PPC_BITMASK(3, 5)) - == PPC_BITMASK(3, 5)) - scom_and_or_for_chiplet(mcs_ids[i], PCBSLMC01_MULTICAST_GROUP_1, - ~(PPC_BITMASK(3, 5) | PPC_BITMASK(16, 23)), - PPC_BITMASK(19, 21)); - - if ((read_scom_for_chiplet(mcs_ids[i], PCBSLMC01_MULTICAST_GROUP_2) & PPC_BITMASK(3, 5)) - == PPC_BITMASK(3, 5)) - scom_and_or_for_chiplet(mcs_ids[i], PCBSLMC01_MULTICAST_GROUP_2, - ~(PPC_BITMASK(3, 5) | PPC_BITMASK(16, 23)), - PPC_BIT(4) | PPC_BITMASK(19, 21)); + if ((read_rscom_for_chiplet(chip, mcs_ids[i], PCBSLMC01_MULTICAST_GROUP_1) & + PPC_BITMASK(3, 5)) == PPC_BITMASK(3, 5)) + rscom_and_or_for_chiplet(chip, mcs_ids[i], PCBSLMC01_MULTICAST_GROUP_1, + ~(PPC_BITMASK(3, 5) | PPC_BITMASK(16, 23)), + PPC_BITMASK(19, 21)); + + if ((read_rscom_for_chiplet(chip, mcs_ids[i], PCBSLMC01_MULTICAST_GROUP_2) & + PPC_BITMASK(3, 5)) == PPC_BITMASK(3, 5)) + rscom_and_or_for_chiplet(chip, mcs_ids[i], PCBSLMC01_MULTICAST_GROUP_2, + ~(PPC_BITMASK(3, 5) | PPC_BITMASK(16, 23)), + PPC_BIT(4) | PPC_BITMASK(19, 21)); + } + +} + +void istep_13_6(uint8_t chips) +{ + uint8_t chip; + + printk(BIOS_EMERG, "starting istep 13.6\n"); + report_istep(13, 6); + + /* Assuming MC doesn't run in sync mode with Fabric, otherwise this is no-op */ + + for (chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) + mem_startclocks(chip); } printk(BIOS_EMERG, "ending istep 13.6\n"); diff --git a/src/soc/ibm/power9/romstage.c b/src/soc/ibm/power9/romstage.c index 7aec098cd4e..14619d7e017 100644 --- a/src/soc/ibm/power9/romstage.c +++ b/src/soc/ibm/power9/romstage.c @@ -392,7 +392,7 @@ void main(void) istep_13_3(chips); istep_13_4(chips); report_istep(13, 5); // no-op - istep_13_6(); + istep_13_6(chips); report_istep(13, 7); // no-op istep_13_8(); istep_13_9(); From d560c6d61dc0fd730ce3c507244fa7295c2648c2 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Sat, 22 Jan 2022 19:05:15 +0200 Subject: [PATCH 137/213] soc/power9/istep_13_8.c: update for second CPU Other isteps are updated because they use the same functions: * `css_*` * `mca_*` * `dp_mca_*` Change-Id: I3a1301dea8224e674e2fa8b5ae0f1f7bd0cc6b9c Signed-off-by: Sergii Dmytruk --- src/include/cpu/power/istep_13.h | 33 +- src/soc/ibm/power9/ccs.c | 125 +++---- src/soc/ibm/power9/istep_13_10.c | 37 +- src/soc/ibm/power9/istep_13_11.c | 242 +++++++------ src/soc/ibm/power9/istep_13_13.c | 40 ++- src/soc/ibm/power9/istep_13_8.c | 598 ++++++++++++++++--------------- src/soc/ibm/power9/istep_13_9.c | 139 +++---- src/soc/ibm/power9/istep_14_1.c | 49 +-- src/soc/ibm/power9/istep_14_2.c | 7 +- src/soc/ibm/power9/romstage.c | 2 +- 10 files changed, 662 insertions(+), 610 deletions(-) diff --git a/src/include/cpu/power/istep_13.h b/src/include/cpu/power/istep_13.h index 6b845bb4a11..5c2a656a573 100644 --- a/src/include/cpu/power/istep_13.h +++ b/src/include/cpu/power/istep_13.h @@ -157,9 +157,8 @@ static inline void delay_nck(uint64_t nck) udelay(nck_to_us(nck)); } -/* TODO: discover which MCAs are used on second MCS (0,1,6,7? 0,1,4,5?) */ /* TODO: consider non-RMW variants */ -static inline void mca_and_or(chiplet_id_t mcs, int mca, uint64_t scom, +static inline void mca_and_or(uint8_t chip, chiplet_id_t mcs, int mca, uint64_t scom, uint64_t and, uint64_t or) { /* @@ -168,35 +167,37 @@ static inline void mca_and_or(chiplet_id_t mcs, int mca, uint64_t scom, */ unsigned mul = (scom & PPC_BIT(0) || (scom & 0xFFFFF000) == 0x07011000) ? 0x400 : 0x40; - scom_and_or_for_chiplet(mcs, scom + mca * mul, and, or); + rscom_and_or_for_chiplet(chip, mcs, scom + mca * mul, and, or); } -static inline void dp_mca_and_or(chiplet_id_t mcs, int dp, int mca, +static inline void dp_mca_and_or(uint8_t chip, chiplet_id_t mcs, int dp, int mca, uint64_t scom, uint64_t and, uint64_t or) { - mca_and_or(mcs, mca, scom + dp * 0x40000000000, and, or); + mca_and_or(chip, mcs, mca, scom + dp * 0x40000000000, and, or); } -static inline uint64_t mca_read(chiplet_id_t mcs, int mca, uint64_t scom) +static inline uint64_t mca_read(uint8_t chip, chiplet_id_t mcs, int mca, uint64_t scom) { /* Indirect registers have different stride than the direct ones in * general, except for (only?) direct PHY registers. */ unsigned mul = (scom & PPC_BIT(0) || (scom & 0xFFFFF000) == 0x07011000) ? 0x400 : 0x40; - return read_scom_for_chiplet(mcs, scom + mca * mul); + return read_rscom_for_chiplet(chip, mcs, scom + mca * mul); } -static inline void mca_write(chiplet_id_t mcs, int mca, uint64_t scom, uint64_t val) +static inline void mca_write(uint8_t chip, chiplet_id_t mcs, int mca, uint64_t scom, + uint64_t val) { /* Indirect registers have different stride than the direct ones in * general, except for (only?) direct PHY registers. */ unsigned mul = (scom & PPC_BIT(0) || (scom & 0xFFFFF000) == 0x07011000) ? 0x400 : 0x40; - write_scom_for_chiplet(mcs, scom + mca * mul, val); + write_rscom_for_chiplet(chip, mcs, scom + mca * mul, val); } -static inline uint64_t dp_mca_read(chiplet_id_t mcs, int dp, int mca, uint64_t scom) +static inline uint64_t dp_mca_read(uint8_t chip, chiplet_id_t mcs, int dp, int mca, + uint64_t scom) { - return mca_read(mcs, mca, scom + dp * 0x40000000000); + return mca_read(chip, mcs, mca, scom + dp * 0x40000000000); } enum rank_selection { @@ -224,13 +225,13 @@ enum cal_config { CAL_CUSTOM_WR = PPC_BIT(57) }; -void ccs_add_instruction(chiplet_id_t id, mrs_cmd_t mrs, uint8_t csn, +void ccs_add_instruction(uint8_t chip, chiplet_id_t id, mrs_cmd_t mrs, uint8_t csn, uint8_t cke, uint16_t idles); -void ccs_add_mrs(chiplet_id_t id, mrs_cmd_t mrs, enum rank_selection ranks, +void ccs_add_mrs(uint8_t chip, chiplet_id_t id, mrs_cmd_t mrs, enum rank_selection ranks, int mirror, uint16_t idles); -void ccs_phy_hw_step(chiplet_id_t id, int mca_i, int rp, enum cal_config conf, +void ccs_phy_hw_step(uint8_t chip, chiplet_id_t id, int mca_i, int rp, enum cal_config conf, uint64_t step_cycles); -void ccs_execute(chiplet_id_t id, int mca_i); +void ccs_execute(uint8_t chip, chiplet_id_t id, int mca_i); static inline enum ddr4_mr5_rtt_park vpd_to_rtt_park(uint8_t vpd) { @@ -299,7 +300,7 @@ void istep_13_2(uint8_t chips); void istep_13_3(uint8_t chips); void istep_13_4(uint8_t chips); void istep_13_6(uint8_t chips); -void istep_13_8(void); // TODO: takes epsilon values from 8.6 and MSS data from 7.4 +void istep_13_8(uint8_t chips); // TODO: takes MSS data from 7.4 void istep_13_9(void); void istep_13_10(void); void istep_13_11(void); diff --git a/src/soc/ibm/power9/ccs.c b/src/soc/ibm/power9/ccs.c index 0117527556a..dce6f2b4983 100644 --- a/src/soc/ibm/power9/ccs.c +++ b/src/soc/ibm/power9/ccs.c @@ -29,7 +29,7 @@ static unsigned instr; static uint64_t total_cycles; /* TODO: 4R, CID? */ -void ccs_add_instruction(chiplet_id_t id, mrs_cmd_t mrs, uint8_t csn, +void ccs_add_instruction(uint8_t chip, chiplet_id_t id, mrs_cmd_t mrs, uint8_t csn, uint8_t cke, uint16_t idles) { /* @@ -64,25 +64,25 @@ void ccs_add_instruction(chiplet_id_t id, mrs_cmd_t mrs, uint8_t csn, [32-33] CCS_INST_ARR0_00_CCS_DDR_CSN_0_1 = csn[0:1] [36-37] CCS_INST_ARR0_00_CCS_DDR_CSN_2_3 = csn[2:3] */ - write_scom_for_chiplet(id, CCS_INST_ARR0_00 + instr, - mrs64 | PPC_BIT(CCS_INST_ARR0_00_CCS_DDR_ACTN) | - PPC_PLACE(cke, CCS_INST_ARR0_00_CCS_DDR_CKE, - CCS_INST_ARR0_00_CCS_DDR_CKE_LEN) | - PPC_PLACE(csn >> 2, CCS_INST_ARR0_00_CCS_DDR_CSN_0_1, - CCS_INST_ARR0_00_CCS_DDR_CSN_0_1_LEN) | - PPC_PLACE(csn, CCS_INST_ARR0_00_CCS_DDR_CSN_2_3, - CCS_INST_ARR0_00_CCS_DDR_CSN_2_3_LEN)); + write_rscom_for_chiplet(chip, id, CCS_INST_ARR0_00 + instr, + mrs64 | PPC_BIT(CCS_INST_ARR0_00_CCS_DDR_ACTN) | + PPC_PLACE(cke, CCS_INST_ARR0_00_CCS_DDR_CKE, + CCS_INST_ARR0_00_CCS_DDR_CKE_LEN) | + PPC_PLACE(csn >> 2, CCS_INST_ARR0_00_CCS_DDR_CSN_0_1, + CCS_INST_ARR0_00_CCS_DDR_CSN_0_1_LEN) | + PPC_PLACE(csn, CCS_INST_ARR0_00_CCS_DDR_CSN_2_3, + CCS_INST_ARR0_00_CCS_DDR_CSN_2_3_LEN)); /* MC01.MCBIST.CCS.CCS_INST_ARR1_n [all] 0 [0-15] CCS_INST_ARR1_00_IDLES = idles [59-63] CCS_INST_ARR1_00_GOTO_CMD = instr + 1 */ - write_scom_for_chiplet(id, CCS_INST_ARR1_00 + instr, - PPC_PLACE(idles, CCS_INST_ARR1_00_IDLES, - CCS_INST_ARR1_00_IDLES_LEN) | - PPC_PLACE(instr + 1, CCS_INST_ARR1_00_GOTO_CMD, - CCS_INST_ARR1_00_GOTO_CMD_LEN)); + write_rscom_for_chiplet(chip, id, CCS_INST_ARR1_00 + instr, + PPC_PLACE(idles, CCS_INST_ARR1_00_IDLES, + CCS_INST_ARR1_00_IDLES_LEN) | + PPC_PLACE(instr + 1, CCS_INST_ARR1_00_GOTO_CMD, + CCS_INST_ARR1_00_GOTO_CMD_LEN)); /* * For the last instruction in the stream we could decrease it by one (final @@ -99,10 +99,10 @@ void ccs_add_instruction(chiplet_id_t id, mrs_cmd_t mrs, uint8_t csn, } /* This isn't useful for anything but calibration steps, do we want it? */ -static void dump_cal_errors(chiplet_id_t id, int mca_i) +static void dump_cal_errors(uint8_t chip, chiplet_id_t id, int mca_i) { /* Stop CCS so it won't mess up with the values */ - write_scom_for_chiplet(id, CCS_CNTLQ, PPC_BIT(CCS_CNTLQ_CCS_STOP)); + write_rscom_for_chiplet(chip, id, CCS_CNTLQ, PPC_BIT(CCS_CNTLQ_CCS_STOP)); #if CONFIG(DEBUG_RAM_SETUP) int dp; @@ -156,7 +156,7 @@ static void dump_cal_errors(chiplet_id_t id, int mca_i) die("CCS execution timeout\n"); } -void ccs_execute(chiplet_id_t id, int mca_i) +void ccs_execute(uint8_t chip, chiplet_id_t id, int mca_i) { uint64_t poll_timeout; long time; @@ -171,8 +171,8 @@ void ccs_execute(chiplet_id_t id, int mca_i) total_cycles = 8; poll_timeout = nck_to_us((total_cycles * 7 * 4) / 8); - write_scom_for_chiplet(id, CCS_CNTLQ, PPC_BIT(CCS_CNTLQ_CCS_STOP)); - time = wait_us(1, !(read_scom_for_chiplet(id, CCS_STATQ) & + write_rscom_for_chiplet(chip, id, CCS_CNTLQ, PPC_BIT(CCS_CNTLQ_CCS_STOP)); + time = wait_us(1, !(read_rscom_for_chiplet(chip, id, CCS_STATQ) & PPC_BIT(CCS_STATQ_CCS_IP))); /* Is it always as described below (CKE, CSN) or is it a copy of last instr? */ @@ -189,26 +189,26 @@ void ccs_execute(chiplet_id_t id, int mca_i) [all] 0 [58] CCS_INST_ARR1_00_CCS_END = 1 */ - write_scom_for_chiplet(id, CCS_INST_ARR0_00 + instr, - PPC_BIT(CCS_INST_ARR0_00_CCS_DDR_ACTN) | - PPC_PLACE(0xF, CCS_INST_ARR0_00_CCS_DDR_CKE, - CCS_INST_ARR0_00_CCS_DDR_CKE_LEN) | - PPC_PLACE(3, CCS_INST_ARR0_00_CCS_DDR_CSN_0_1, - CCS_INST_ARR0_00_CCS_DDR_CSN_0_1_LEN) | - PPC_PLACE(3, CCS_INST_ARR0_00_CCS_DDR_CSN_2_3, - CCS_INST_ARR0_00_CCS_DDR_CSN_2_3_LEN)); - write_scom_for_chiplet(id, CCS_INST_ARR1_00 + instr, - PPC_BIT(CCS_INST_ARR1_00_CCS_END)); + write_rscom_for_chiplet(chip, id, CCS_INST_ARR0_00 + instr, + PPC_BIT(CCS_INST_ARR0_00_CCS_DDR_ACTN) | + PPC_PLACE(0xF, CCS_INST_ARR0_00_CCS_DDR_CKE, + CCS_INST_ARR0_00_CCS_DDR_CKE_LEN) | + PPC_PLACE(3, CCS_INST_ARR0_00_CCS_DDR_CSN_0_1, + CCS_INST_ARR0_00_CCS_DDR_CSN_0_1_LEN) | + PPC_PLACE(3, CCS_INST_ARR0_00_CCS_DDR_CSN_2_3, + CCS_INST_ARR0_00_CCS_DDR_CSN_2_3_LEN)); + write_rscom_for_chiplet(chip, id, CCS_INST_ARR1_00 + instr, + PPC_BIT(CCS_INST_ARR1_00_CCS_END)); /* Select ports MC01.MCBIST.MBA_SCOMFIR.MCB_CNTLQ // Broadcast mode is not supported, set only one bit at a time [2-5] MCB_CNTLQ_MCBCNTL_PORT_SEL = bitmap with MCA index */ - scom_and_or_for_chiplet(id, MCB_CNTLQ, ~PPC_BITMASK(2, 5), PPC_BIT(2 + mca_i)); + rscom_and_or_for_chiplet(chip, id, MCB_CNTLQ, ~PPC_BITMASK(2, 5), PPC_BIT(2 + mca_i)); /* Lets go */ - write_scom_for_chiplet(id, CCS_CNTLQ, PPC_BIT(CCS_CNTLQ_CCS_START)); + write_rscom_for_chiplet(chip, id, CCS_CNTLQ, PPC_BIT(CCS_CNTLQ_CCS_START)); /* With microsecond resolution we are probably wasting a lot of time here. */ delay_nck(total_cycles/8); @@ -218,19 +218,20 @@ void ccs_execute(chiplet_id_t id, int mca_i) delay(10ns) if MC01.MCBIST.MBA_SCOMFIR.CCS_STATQ != 0x40..00: report failure // only [1] set, others 0 */ - time = wait_us(poll_timeout, (udelay(1), !(read_scom_for_chiplet(id, CCS_STATQ) & + time = wait_us(poll_timeout, (udelay(1), !(read_rscom_for_chiplet(chip, id, CCS_STATQ) & PPC_BIT(CCS_STATQ_CCS_IP)))); /* This isn't useful for anything but calibration steps, do we want it? */ if (!time) - dump_cal_errors(id, mca_i); + dump_cal_errors(chip, id, mca_i); printk(BIOS_DEBUG, "CCS took %lld us (%lld us timeout), %d instruction(s)\n", time + nck_to_us(total_cycles/8), poll_timeout + nck_to_us(total_cycles/8), instr); - if (read_scom_for_chiplet(id, CCS_STATQ) != PPC_BIT(CCS_STATQ_CCS_DONE)) - die("(%#16.16llx) CCS execution error\n", read_scom_for_chiplet(id, CCS_STATQ)); + if (read_rscom_for_chiplet(chip, id, CCS_STATQ) != PPC_BIT(CCS_STATQ_CCS_DONE)) + die("(%#16.16llx) CCS execution error\n", + read_rscom_for_chiplet(chip, id, CCS_STATQ)); instr = 0; total_cycles = 0; @@ -263,7 +264,7 @@ static const mrs_cmd_t invert = 0xF02BF8; * the order of those operations doesn't matter. */ /* TODO: add support for A17. For now it is blocked in initial SPD parsing. */ -void ccs_add_mrs(chiplet_id_t id, mrs_cmd_t mrs, enum rank_selection ranks, +void ccs_add_mrs(uint8_t chip, chiplet_id_t id, mrs_cmd_t mrs, enum rank_selection ranks, int mirror, uint16_t idles) { if (ranks & DIMM0_RANK0) { @@ -275,10 +276,10 @@ void ccs_add_mrs(chiplet_id_t id, mrs_cmd_t mrs, enum rank_selection ranks, * delays but if we can ever confirm that we only need one we can fix this. * BRS" */ - ccs_add_instruction(id, mrs, 0x7, 0xF, idles); + ccs_add_instruction(chip, id, mrs, 0x7, 0xF, idles); /* DIMM 0, rank 0, side B - invert A3-A9, A11, A13, A17 (TODO), BA0-1, BG0-1 */ - ccs_add_instruction(id, mrs ^ invert, 0x7, 0xF, idles); + ccs_add_instruction(chip, id, mrs ^ invert, 0x7, 0xF, idles); } if (ranks & DIMM0_RANK1) { @@ -286,18 +287,18 @@ void ccs_add_mrs(chiplet_id_t id, mrs_cmd_t mrs, enum rank_selection ranks, if (mirror) mrs = ddr4_mrs_mirror_pins(mrs); - ccs_add_instruction(id, mrs, 0xB, 0xF, idles); + ccs_add_instruction(chip, id, mrs, 0xB, 0xF, idles); /* DIMM 0, rank 1, side B - MRS is already mirrored, just invert it */ - ccs_add_instruction(id, mrs ^ invert, 0xB, 0xF, idles); + ccs_add_instruction(chip, id, mrs ^ invert, 0xB, 0xF, idles); } if (ranks & DIMM1_RANK0) { /* DIMM 1, rank 0, side A */ - ccs_add_instruction(id, mrs, 0xD, 0xF, idles); + ccs_add_instruction(chip, id, mrs, 0xD, 0xF, idles); /* DIMM 1, rank 0, side B - invert A3-A9, A11, A13, A17 (TODO), BA0-1, BG0-1 */ - ccs_add_instruction(id, mrs ^ invert, 0xD, 0xF, idles); + ccs_add_instruction(chip, id, mrs ^ invert, 0xD, 0xF, idles); } if (ranks & DIMM1_RANK1) { @@ -305,14 +306,14 @@ void ccs_add_mrs(chiplet_id_t id, mrs_cmd_t mrs, enum rank_selection ranks, if (mirror) mrs = ddr4_mrs_mirror_pins(mrs); - ccs_add_instruction(id, mrs, 0xE, 0xF, idles); + ccs_add_instruction(chip, id, mrs, 0xE, 0xF, idles); /* DIMM 1, rank 1, side B - MRS is already mirrored, just invert it */ - ccs_add_instruction(id, mrs ^ invert, 0xE, 0xF, idles); + ccs_add_instruction(chip, id, mrs ^ invert, 0xE, 0xF, idles); } } -void ccs_phy_hw_step(chiplet_id_t id, int mca_i, int rp, enum cal_config conf, +void ccs_phy_hw_step(uint8_t chip, chiplet_id_t id, int mca_i, int rp, enum cal_config conf, uint64_t step_cycles) { /* MC01.MCBIST.CCS.CCS_INST_ARR0_n @@ -323,15 +324,15 @@ void ccs_phy_hw_step(chiplet_id_t id, int mca_i, int rp, enum cal_config conf, [36-37] CCS_INST_ARR0_00_CCS_DDR_CSN_2_3 = 3 // Not used by the engine for calibration? [56-59] CCS_INST_ARR0_00_CCS_DDR_CAL_TYPE = 0xc */ - write_scom_for_chiplet(id, CCS_INST_ARR0_00 + instr, - PPC_PLACE(0xF, CCS_INST_ARR0_00_CCS_DDR_CKE, - CCS_INST_ARR0_00_CCS_DDR_CKE_LEN) | - PPC_PLACE(3, CCS_INST_ARR0_00_CCS_DDR_CSN_0_1, - CCS_INST_ARR0_00_CCS_DDR_CSN_0_1_LEN) | - PPC_PLACE(3, CCS_INST_ARR0_00_CCS_DDR_CSN_2_3, - CCS_INST_ARR0_00_CCS_DDR_CSN_2_3_LEN) | - PPC_PLACE(0xC, CCS_INST_ARR0_00_CCS_DDR_CAL_TYPE, - CCS_INST_ARR0_00_CCS_DDR_CAL_TYPE_LEN)); + write_rscom_for_chiplet(chip, id, CCS_INST_ARR0_00 + instr, + PPC_PLACE(0xF, CCS_INST_ARR0_00_CCS_DDR_CKE, + CCS_INST_ARR0_00_CCS_DDR_CKE_LEN) | + PPC_PLACE(3, CCS_INST_ARR0_00_CCS_DDR_CSN_0_1, + CCS_INST_ARR0_00_CCS_DDR_CSN_0_1_LEN) | + PPC_PLACE(3, CCS_INST_ARR0_00_CCS_DDR_CSN_2_3, + CCS_INST_ARR0_00_CCS_DDR_CSN_2_3_LEN) | + PPC_PLACE(0xC, CCS_INST_ARR0_00_CCS_DDR_CAL_TYPE, + CCS_INST_ARR0_00_CCS_DDR_CAL_TYPE_LEN)); /* MC01.MCBIST.CCS.CCS_INST_ARR1_n [all] 0 @@ -339,12 +340,12 @@ void ccs_phy_hw_step(chiplet_id_t id, int mca_i, int rp, enum cal_config conf, [57] CCS_INST_ARR1_00_DDR_CALIBRATION_ENABLE = 1 [59-63] CCS_INST_ARR1_00_GOTO_CMD = instr + 1 */ - write_scom_for_chiplet(id, CCS_INST_ARR1_00 + instr, - PPC_PLACE(rp, CCS_INST_ARR1_00_DDR_CAL_RANK, - CCS_INST_ARR1_00_DDR_CAL_RANK_LEN) | - PPC_BIT(CCS_INST_ARR1_00_DDR_CALIBRATION_ENABLE) | - PPC_PLACE(instr + 1, CCS_INST_ARR1_00_GOTO_CMD, - CCS_INST_ARR1_00_GOTO_CMD_LEN)); + write_rscom_for_chiplet(chip, id, CCS_INST_ARR1_00 + instr, + PPC_PLACE(rp, CCS_INST_ARR1_00_DDR_CAL_RANK, + CCS_INST_ARR1_00_DDR_CAL_RANK_LEN) | + PPC_BIT(CCS_INST_ARR1_00_DDR_CALIBRATION_ENABLE) | + PPC_PLACE(instr + 1, CCS_INST_ARR1_00_GOTO_CMD, + CCS_INST_ARR1_00_GOTO_CMD_LEN)); total_cycles += step_cycles; instr++; @@ -355,9 +356,9 @@ void ccs_phy_hw_step(chiplet_id_t id, int mca_i, int rp, enum cal_config conf, [58] ABORT_ON_CAL_ERROR = 0 [60+rp] ENA_RANK_PAIR = 1 // So, rp must be [0-3] */ - mca_and_or(id, mca_i, DDRPHY_PC_INIT_CAL_CONFIG0_P0, + mca_and_or(chip, id, mca_i, DDRPHY_PC_INIT_CAL_CONFIG0_P0, ~(PPC_BITMASK(48, 58) | PPC_BITMASK(60, 63)), conf | PPC_BIT(ENA_RANK_PAIR_MSB + rp)); - ccs_execute(id, mca_i); + ccs_execute(chip, id, mca_i); } diff --git a/src/soc/ibm/power9/istep_13_10.c b/src/soc/ibm/power9/istep_13_10.c index ff97897ff81..3900ac040bf 100644 --- a/src/soc/ibm/power9/istep_13_10.c +++ b/src/soc/ibm/power9/istep_13_10.c @@ -10,7 +10,7 @@ #define SPD_I2C_BUS 3 -static void draminit_cke_helper(chiplet_id_t id, int mca_i) +static void draminit_cke_helper(uint8_t chip, chiplet_id_t id, int mca_i) { /* * Hostboot stops CCS before sending new programs. I'm not sure it is wise @@ -23,8 +23,8 @@ static void draminit_cke_helper(chiplet_id_t id, int mca_i) delay(10ns) */ - ccs_add_instruction(id, 0, 0xF, 0xF, 400); - ccs_execute(id, mca_i); + ccs_add_instruction(chip, id, 0, 0xF, 0xF, 400); + ccs_execute(chip, id, mca_i); } static void rcd_load(mca_data_t *mca, int d) @@ -279,7 +279,7 @@ static void rcd_load(mca_data_t *mca, int d) * counted in different places, i.e. before and after RCD, and thanks to Encoded * QuadCS 4R DIMMs are visible to the PHY as 2R devices? */ -static void mrs_load(int mcs_i, int mca_i, int d) +static void mrs_load(uint8_t chip, int mcs_i, int mca_i, int d) { chiplet_id_t id = mcs_ids[mcs_i]; mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; @@ -314,13 +314,13 @@ static void mrs_load(int mcs_i, int mca_i, int d) DDR4_MR3_GEARDOWN_1_2_RATE, DDR4_MR3_MPR_NORMAL, 0); - ccs_add_mrs(id, mrs, ranks, mirrored, tMRD); + ccs_add_mrs(chip, id, mrs, ranks, mirrored, tMRD); mrs = ddr4_get_mr6(mca->nccd_l, DDR4_MR6_VREFDQ_TRAINING_DISABLE, DDR4_MR6_VREFDQ_TRAINING_RANGE_1, /* Don't care when disabled */ 0); - ccs_add_mrs(id, mrs, ranks, mirrored, tMRD); + ccs_add_mrs(chip, id, mrs, ranks, mirrored, tMRD); mrs = ddr4_get_mr5(DDR4_MR5_RD_DBI_DISABLE, DDR4_MR5_WR_DBI_DISABLE, @@ -328,7 +328,7 @@ static void mrs_load(int mcs_i, int mca_i, int d) vpd_to_rtt_park(ATTR_MSS_VPD_MT_DRAM_RTT_PARK[vpd_idx]), DDR4_MR5_ODT_PD_ACTIVADED, DDR4_MR5_CA_PARITY_LAT_DISABLE); - ccs_add_mrs(id, mrs, ranks, mirrored, tMRD); + ccs_add_mrs(chip, id, mrs, ranks, mirrored, tMRD); mrs = ddr4_get_mr4(DDR4_MR4_HPPR_DISABLE, DDR4_MR4_WR_PREAMBLE_1, /* ATTR_MSS_VPD_MT_PREAMBLE - always 0 */ @@ -340,7 +340,7 @@ static void mrs_load(int mcs_i, int mca_i, int d) DDR4_MR4_INTERNAL_VREF_MON_DISABLE, DDR4_MR4_TEMP_CONTROLLED_REFR_DISABLE, DDR4_MR4_MAX_PD_MODE_DISABLE); - ccs_add_mrs(id, mrs, ranks, mirrored, tMRD); + ccs_add_mrs(chip, id, mrs, ranks, mirrored, tMRD); /* * Regarding RTT_WR: OFF seems to be the safest option, but it is not always @@ -354,7 +354,7 @@ static void mrs_load(int mcs_i, int mca_i, int d) * Do we need to half tREFI as well? */ DDR4_MR2_ASR_MANUAL_EXTENDED_RANGE, mem_data.cwl); - ccs_add_mrs(id, mrs, ranks, mirrored, tMRD); + ccs_add_mrs(chip, id, mrs, ranks, mirrored, tMRD); mrs = ddr4_get_mr1(DDR4_MR1_QOFF_ENABLE, mca->dimm[d].width == WIDTH_x8 ? DDR4_MR1_TQDS_ENABLE : DDR4_MR1_TQDS_DISABLE, @@ -363,7 +363,7 @@ static void mrs_load(int mcs_i, int mca_i, int d) DDR4_MR1_ODIMP_RZQ_7, /* ATTR_MSS_VPD_MT_DRAM_DRV_IMP_DQ_DQS, always 34 Ohms */ DDR4_MR1_AL_DISABLE, DDR4_MR1_DLL_ENABLE); - ccs_add_mrs(id, mrs, ranks, mirrored, tMRD); + ccs_add_mrs(chip, id, mrs, ranks, mirrored, tMRD); mrs = ddr4_get_mr0(mca->nwr, DDR4_MR0_DLL_RESET_YES, @@ -371,9 +371,9 @@ static void mrs_load(int mcs_i, int mca_i, int d) mca->cl, DDR4_MR0_BURST_TYPE_SEQUENTIAL, DDR4_MR0_BURST_LENGTH_FIXED_8); - ccs_add_mrs(id, mrs, ranks, mirrored, tMOD); + ccs_add_mrs(chip, id, mrs, ranks, mirrored, tMOD); - ccs_execute(id, mca_i); + ccs_execute(chip, id, mca_i); } /* @@ -401,6 +401,7 @@ void istep_13_10(void) { printk(BIOS_EMERG, "starting istep 13.10\n"); int mcs_i, mca_i, dimm; + uint8_t chip = 0; // TODO: support second CPU report_istep(13, 10); @@ -445,13 +446,13 @@ void istep_13_10(void) [5] MBA_FARB5Q_CFG_CCS_ADDR_MUX_SEL = 1 // 1st RMW (optional, only if changes) [6] MBA_FARB5Q_CFG_CCS_INST_RESET_ENABLE = 0 // 1st RMW (optional, only if changes) */ - mca_and_or(mcs_ids[mcs_i], mca_i, MBA_FARB5Q, + mca_and_or(chip, mcs_ids[mcs_i], mca_i, MBA_FARB5Q, ~PPC_BIT(MBA_FARB5Q_CFG_CCS_INST_RESET_ENABLE), PPC_BIT(MBA_FARB5Q_CFG_CCS_ADDR_MUX_SEL)); - mca_and_or(mcs_ids[mcs_i], mca_i, MBA_FARB5Q, ~PPC_BITMASK(0, 3), + mca_and_or(chip, mcs_ids[mcs_i], mca_i, MBA_FARB5Q, ~PPC_BITMASK(0, 3), PPC_PLACE(0x1, MBA_FARB5Q_CFG_DDR_DPHY_NCLK, MBA_FARB5Q_CFG_DDR_DPHY_NCLK_LEN) | PPC_PLACE(0x2, MBA_FARB5Q_CFG_DDR_DPHY_PCLK, MBA_FARB5Q_CFG_DDR_DPHY_PCLK_LEN)); - mca_and_or(mcs_ids[mcs_i], mca_i, MBA_FARB5Q, ~0, + mca_and_or(chip, mcs_ids[mcs_i], mca_i, MBA_FARB5Q, ~0, PPC_BIT(MBA_FARB5Q_CFG_DDR_RESETN)); udelay(500); /* part of 3rd RMW, but delay is unconditional */ @@ -479,7 +480,7 @@ void istep_13_10(void) if (mem_data.mcs[mcs_i].mca[mca_i].functional) break; } - draminit_cke_helper(mcs_ids[mcs_i], mca_i); + draminit_cke_helper(chip, mcs_ids[mcs_i], mca_i); for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; @@ -496,7 +497,7 @@ void istep_13_10(void) MC01.PORT0.SRQ.MBA_FARB5Q [5] MBA_FARB5Q_CFG_CCS_ADDR_MUX_SEL = 0 */ - mca_and_or(mcs_ids[mcs_i], mca_i, MBA_FARB5Q, + mca_and_or(chip, mcs_ids[mcs_i], mca_i, MBA_FARB5Q, ~PPC_BIT(MBA_FARB5Q_CFG_CCS_ADDR_MUX_SEL), 0); } @@ -512,7 +513,7 @@ void istep_13_10(void) rcd_load(mca, dimm); // bcw_load(); /* LRDIMM only */ - mrs_load(mcs_i, mca_i, dimm); + mrs_load(chip, mcs_i, mca_i, dimm); dump_rcd(SPD_I2C_BUS, mca->dimm[dimm].rcd_i2c_addr); } } diff --git a/src/soc/ibm/power9/istep_13_11.c b/src/soc/ibm/power9/istep_13_11.c index 546f2a3d215..5923fd38c79 100644 --- a/src/soc/ibm/power9/istep_13_11.c +++ b/src/soc/ibm/power9/istep_13_11.c @@ -7,7 +7,7 @@ #include "istep_13_scom.h" -static void setup_and_execute_zqcal(int mcs_i, int mca_i, int d) +static void setup_and_execute_zqcal(uint8_t chip, int mcs_i, int mca_i, int d) { chiplet_id_t id = mcs_ids[mcs_i]; mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; @@ -40,11 +40,11 @@ static void setup_and_execute_zqcal(int mcs_i, int mca_i, int d) * CCS and we don't have a timer with enough precision to make it worth the * effort. */ - ccs_add_mrs(id, cmd, ranks, mirrored, tZQinit); - ccs_execute(id, mca_i); + ccs_add_mrs(chip, id, cmd, ranks, mirrored, tZQinit); + ccs_execute(chip, id, mca_i); } -static void clear_initial_cal_errors(int mcs_i, int mca_i) +static void clear_initial_cal_errors(uint8_t chip, int mcs_i, int mca_i) { chiplet_id_t id = mcs_ids[mcs_i]; int dp; @@ -60,55 +60,55 @@ static void clear_initial_cal_errors(int mcs_i, int mca_i) IOM0.DDRPHY_DP16_WR_VREF_ERROR1_P0_{0-4}, [all] 0 */ - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_RD_VREF_CAL_ERROR_P0_0, 0, 0); - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_WR_ERROR0_P0_0, 0, 0); - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_RD_STATUS0_P0_0, 0, 0); - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_RD_LVL_STATUS2_P0_0, 0, 0); - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_RD_LVL_STATUS0_P0_0, 0, 0); - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_WR_VREF_ERROR0_P0_0, 0, 0); - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_WR_VREF_ERROR1_P0_0, 0, 0); + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_RD_VREF_CAL_ERROR_P0_0, 0, 0); + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_WR_ERROR0_P0_0, 0, 0); + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_RD_STATUS0_P0_0, 0, 0); + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_RD_LVL_STATUS2_P0_0, 0, 0); + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_RD_LVL_STATUS0_P0_0, 0, 0); + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_WR_VREF_ERROR0_P0_0, 0, 0); + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_WR_VREF_ERROR1_P0_0, 0, 0); } /* IOM0.DDRPHY_APB_CONFIG0_P0 = [49] RESET_ERR_RPT = 1, then 0 */ - mca_and_or(id, mca_i, DDRPHY_APB_CONFIG0_P0, ~0, PPC_BIT(RESET_ERR_RPT)); - mca_and_or(id, mca_i, DDRPHY_APB_CONFIG0_P0, ~PPC_BIT(RESET_ERR_RPT), 0); + mca_and_or(chip, id, mca_i, DDRPHY_APB_CONFIG0_P0, ~0, PPC_BIT(RESET_ERR_RPT)); + mca_and_or(chip, id, mca_i, DDRPHY_APB_CONFIG0_P0, ~PPC_BIT(RESET_ERR_RPT), 0); /* IOM0.DDRPHY_APB_ERROR_STATUS0_P0 = [all] 0 */ - mca_and_or(id, mca_i, DDRPHY_APB_ERROR_STATUS0_P0, 0, 0); + mca_and_or(chip, id, mca_i, DDRPHY_APB_ERROR_STATUS0_P0, 0, 0); /* IOM0.DDRPHY_RC_ERROR_STATUS0_P0 = [all] 0 */ - mca_and_or(id, mca_i, DDRPHY_RC_ERROR_STATUS0_P0, 0, 0); + mca_and_or(chip, id, mca_i, DDRPHY_RC_ERROR_STATUS0_P0, 0, 0); /* IOM0.DDRPHY_SEQ_ERROR_STATUS0_P0 = [all] 0 */ - mca_and_or(id, mca_i, DDRPHY_SEQ_ERROR_STATUS0_P0, 0, 0); + mca_and_or(chip, id, mca_i, DDRPHY_SEQ_ERROR_STATUS0_P0, 0, 0); /* IOM0.DDRPHY_WC_ERROR_STATUS0_P0 = [all] 0 */ - mca_and_or(id, mca_i, DDRPHY_WC_ERROR_STATUS0_P0, 0, 0); + mca_and_or(chip, id, mca_i, DDRPHY_WC_ERROR_STATUS0_P0, 0, 0); /* IOM0.DDRPHY_PC_ERROR_STATUS0_P0 = [all] 0 */ - mca_and_or(id, mca_i, DDRPHY_PC_ERROR_STATUS0_P0, 0, 0); + mca_and_or(chip, id, mca_i, DDRPHY_PC_ERROR_STATUS0_P0, 0, 0); /* IOM0.DDRPHY_PC_INIT_CAL_ERROR_P0 = [all] 0 */ - mca_and_or(id, mca_i, DDRPHY_PC_INIT_CAL_ERROR_P0, 0, 0); + mca_and_or(chip, id, mca_i, DDRPHY_PC_INIT_CAL_ERROR_P0, 0, 0); /* IOM0.IOM_PHY0_DDRPHY_FIR_REG = [56] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_2 = 0 */ - mca_and_or(id, mca_i, IOM_PHY0_DDRPHY_FIR_REG, + mca_and_or(chip, id, mca_i, IOM_PHY0_DDRPHY_FIR_REG, ~PPC_BIT(IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_2), 0); } @@ -127,57 +127,58 @@ static void dump_cal_errors(int mcs_i, int mca_i) for (dp = 0; dp < 5; dp++) { printk(BIOS_ERR, "DP %d\n", dp); printk(BIOS_ERR, "\t%#16.16llx - RD_VREF_CAL_ERROR\n", - dp_mca_read(id, dp, mca_i, DDRPHY_DP16_RD_VREF_CAL_ERROR_P0_0)); + dp_mca_read(chip, id, dp, mca_i, DDRPHY_DP16_RD_VREF_CAL_ERROR_P0_0)); printk(BIOS_ERR, "\t%#16.16llx - DQ_BIT_DISABLE_RP0\n", - dp_mca_read(id, dp, mca_i, DDRPHY_DP16_DQ_BIT_DISABLE_RP0_P0_0)); + dp_mca_read(chip, id, dp, mca_i, DDRPHY_DP16_DQ_BIT_DISABLE_RP0_P0_0)); printk(BIOS_ERR, "\t%#16.16llx - DQS_BIT_DISABLE_RP0\n", - dp_mca_read(id, dp, mca_i, DDRPHY_DP16_DQS_BIT_DISABLE_RP0_P0_0)); + dp_mca_read(chip, id, dp, mca_i, DDRPHY_DP16_DQS_BIT_DISABLE_RP0_P0_0)); printk(BIOS_ERR, "\t%#16.16llx - WR_ERROR0\n", - dp_mca_read(id, dp, mca_i, DDRPHY_DP16_WR_ERROR0_P0_0)); + dp_mca_read(chip, id, dp, mca_i, DDRPHY_DP16_WR_ERROR0_P0_0)); printk(BIOS_ERR, "\t%#16.16llx - RD_STATUS0\n", - dp_mca_read(id, dp, mca_i, DDRPHY_DP16_RD_STATUS0_P0_0)); + dp_mca_read(chip, id, dp, mca_i, DDRPHY_DP16_RD_STATUS0_P0_0)); printk(BIOS_ERR, "\t%#16.16llx - RD_LVL_STATUS2\n", - dp_mca_read(id, dp, mca_i, DDRPHY_DP16_RD_LVL_STATUS2_P0_0)); + dp_mca_read(chip, id, dp, mca_i, DDRPHY_DP16_RD_LVL_STATUS2_P0_0)); printk(BIOS_ERR, "\t%#16.16llx - RD_LVL_STATUS0\n", - dp_mca_read(id, dp, mca_i, DDRPHY_DP16_RD_LVL_STATUS0_P0_0)); + dp_mca_read(chip, id, dp, mca_i, DDRPHY_DP16_RD_LVL_STATUS0_P0_0)); printk(BIOS_ERR, "\t%#16.16llx - WR_VREF_ERROR0\n", - dp_mca_read(id, dp, mca_i, DDRPHY_DP16_WR_VREF_ERROR0_P0_0)); + dp_mca_read(chip, id, dp, mca_i, DDRPHY_DP16_WR_VREF_ERROR0_P0_0)); printk(BIOS_ERR, "\t%#16.16llx - WR_VREF_ERROR1\n", - dp_mca_read(id, dp, mca_i, DDRPHY_DP16_WR_VREF_ERROR1_P0_0)); + dp_mca_read(chip, id, dp, mca_i, DDRPHY_DP16_WR_VREF_ERROR1_P0_0)); } printk(BIOS_ERR, "%#16.16llx - APB_ERROR_STATUS0\n", - mca_read(id, mca_i, DDRPHY_APB_ERROR_STATUS0_P0)); + mca_read(chip, id, mca_i, DDRPHY_APB_ERROR_STATUS0_P0)); printk(BIOS_ERR, "%#16.16llx - RC_ERROR_STATUS0\n", - mca_read(id, mca_i, DDRPHY_RC_ERROR_STATUS0_P0)); + mca_read(chip, id, mca_i, DDRPHY_RC_ERROR_STATUS0_P0)); printk(BIOS_ERR, "%#16.16llx - SEQ_ERROR_STATUS0\n", - mca_read(id, mca_i, DDRPHY_SEQ_ERROR_STATUS0_P0)); + mca_read(chip, id, mca_i, DDRPHY_SEQ_ERROR_STATUS0_P0)); printk(BIOS_ERR, "%#16.16llx - WC_ERROR_STATUS0\n", - mca_read(id, mca_i, DDRPHY_WC_ERROR_STATUS0_P0)); + mca_read(chip, id, mca_i, DDRPHY_WC_ERROR_STATUS0_P0)); printk(BIOS_ERR, "%#16.16llx - PC_ERROR_STATUS0\n", - mca_read(id, mca_i, DDRPHY_PC_ERROR_STATUS0_P0)); + mca_read(chip, id, mca_i, DDRPHY_PC_ERROR_STATUS0_P0)); printk(BIOS_ERR, "%#16.16llx - PC_INIT_CAL_ERROR\n", - mca_read(id, mca_i, DDRPHY_PC_INIT_CAL_ERROR_P0)); + mca_read(chip, id, mca_i, DDRPHY_PC_INIT_CAL_ERROR_P0)); /* 0x8000 on success for first rank, 0x4000 for second */ printk(BIOS_ERR, "%#16.16llx - PC_INIT_CAL_STATUS\n", - mca_read(id, mca_i, DDRPHY_PC_INIT_CAL_STATUS_P0)); + mca_read(chip, id, mca_i, DDRPHY_PC_INIT_CAL_STATUS_P0)); printk(BIOS_ERR, "%#16.16llx - IOM_PHY0_DDRPHY_FIR_REG\n", - mca_read(id, mca_i, IOM_PHY0_DDRPHY_FIR_REG)); + mca_read(chip, id, mca_i, IOM_PHY0_DDRPHY_FIR_REG)); printk(BIOS_ERR, "%#16.16llx - MBACALFIRQ\n", - mca_read(id, mca_i, MBACALFIR)); + mca_read(chip, id, mca_i, MBACALFIR)); #endif } /* Based on ATTR_MSS_MRW_RESET_DELAY_BEFORE_CAL, by default do it. */ -static void dp16_reset_delay_values(int mcs_i, int mca_i, enum rank_selection ranks_present) +static void dp16_reset_delay_values(uint8_t chip, int mcs_i, int mca_i, + enum rank_selection ranks_present) { chiplet_id_t id = mcs_ids[mcs_i]; int dp; @@ -189,20 +190,24 @@ static void dp16_reset_delay_values(int mcs_i, int mca_i, enum rank_selection ra for (dp = 0; dp < 5; dp++) { /* IOM0.DDRPHY_DP16_DQS_GATE_DELAY_RP0_P0_{0-4} = 0 */ if (ranks_present & DIMM0_RANK0) - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_DQS_GATE_DELAY_RP0_P0_0, 0, 0); + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_DQS_GATE_DELAY_RP0_P0_0, + 0, 0); /* IOM0.DDRPHY_DP16_DQS_GATE_DELAY_RP1_P0_{0-4} = 0 */ if (ranks_present & DIMM0_RANK1) - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_DQS_GATE_DELAY_RP1_P0_0, 0, 0); + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_DQS_GATE_DELAY_RP1_P0_0, + 0, 0); /* IOM0.DDRPHY_DP16_DQS_GATE_DELAY_RP2_P0_{0-4} = 0 */ if (ranks_present & DIMM1_RANK0) - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_DQS_GATE_DELAY_RP2_P0_0, 0, 0); + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_DQS_GATE_DELAY_RP2_P0_0, + 0, 0); /* IOM0.DDRPHY_DP16_DQS_GATE_DELAY_RP3_P0_{0-4} = 0 */ if (ranks_present & DIMM1_RANK1) - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_DQS_GATE_DELAY_RP3_P0_0, 0, 0); + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_DQS_GATE_DELAY_RP3_P0_0, + 0, 0); } } -static void dqs_align_turn_on_refresh(int mcs_i, int mca_i) +static void dqs_align_turn_on_refresh(uint8_t chip, int mcs_i, int mca_i) { chiplet_id_t id = mcs_ids[mcs_i]; @@ -212,7 +217,7 @@ static void dqs_align_turn_on_refresh(int mcs_i, int mca_i) [60-63] TRFC_CYCLES = 9 // tRFC = 2^9 = 512 memcycles */ /* See note in seq_reset() in 13.8. This may not be necessary. */ - mca_and_or(id, mca_i, DDRPHY_SEQ_MEM_TIMING_PARAM0_P0, ~PPC_BITMASK(60, 63), + mca_and_or(chip, id, mca_i, DDRPHY_SEQ_MEM_TIMING_PARAM0_P0, ~PPC_BITMASK(60, 63), PPC_PLACE(9, TRFC_CYCLES, TRFC_CYCLES_LEN)); /* IOM0.DDRPHY_PC_INIT_CAL_CONFIG1_P0 @@ -225,7 +230,7 @@ static void dqs_align_turn_on_refresh(int mcs_i, int mca_i) [55] CMD_SNOOP_DIS = 0 [57-63] REFRESH_INTERVAL = 0x13 // Worst case: 6.08us for 1866 (max tCK). Must be not more than 7.8us */ - mca_and_or(id, mca_i, DDRPHY_PC_INIT_CAL_CONFIG1_P0, + mca_and_or(chip, id, mca_i, DDRPHY_PC_INIT_CAL_CONFIG1_P0, ~(PPC_BITMASK(48, 55) | PPC_BITMASK(57, 63)), PPC_PLACE(0xF, REFRESH_COUNT, REFRESH_COUNT_LEN) | PPC_PLACE(0x3, REFRESH_CONTROL, REFRESH_CONTROL_LEN) | @@ -233,7 +238,7 @@ static void dqs_align_turn_on_refresh(int mcs_i, int mca_i) PPC_PLACE(0x13, REFRESH_INTERVAL, REFRESH_INTERVAL_LEN)); } -static void wr_level_pre(int mcs_i, int mca_i, int rp, +static void wr_level_pre(uint8_t chip, int mcs_i, int mca_i, int rp, enum rank_selection ranks_present) { chiplet_id_t id = mcs_ids[mcs_i]; @@ -257,7 +262,7 @@ static void wr_level_pre(int mcs_i, int mca_i, int rp, vpd_to_rtt_wr(0), DDR4_MR2_ASR_MANUAL_EXTENDED_RANGE, mem_data.cwl); - ccs_add_mrs(id, mrs, rank, mirrored, tMRD); + ccs_add_mrs(chip, id, mrs, rank, mirrored, tMRD); /* MR1 = // redo the rest of the bits // Write properly encoded RTT_WR value as RTT_NOM @@ -278,7 +283,7 @@ static void wr_level_pre(int mcs_i, int mca_i, int rp, * can subtract those. On the other hand, with microsecond precision for * delays in ccs_execute(), this probably doesn't matter anyway. */ - ccs_add_mrs(id, mrs, rank, mirrored, tMOD); + ccs_add_mrs(chip, id, mrs, rank, mirrored, tMOD); /* * This block is done after MRS commands in Hostboot, but we do not call @@ -290,25 +295,29 @@ static void wr_level_pre(int mcs_i, int mca_i, int rp, /* IOM0.DDRPHY_SEQ_ODT_WR_CONFIG0_P0 = [48] = 1 */ - mca_and_or(id, mca_i, DDRPHY_SEQ_ODT_WR_CONFIG0_P0, ~0, PPC_BIT(48)); + mca_and_or(chip, id, mca_i, DDRPHY_SEQ_ODT_WR_CONFIG0_P0, + ~0, PPC_BIT(48)); break; case 1: /* IOM0.DDRPHY_SEQ_ODT_WR_CONFIG0_P0 = [57] = 1 */ - mca_and_or(id, mca_i, DDRPHY_SEQ_ODT_WR_CONFIG0_P0, ~0, PPC_BIT(57)); + mca_and_or(chip, id, mca_i, DDRPHY_SEQ_ODT_WR_CONFIG0_P0, + ~0, PPC_BIT(57)); break; case 2: /* IOM0.DDRPHY_SEQ_ODT_WR_CONFIG1_P0 = [50] = 1 */ - mca_and_or(id, mca_i, DDRPHY_SEQ_ODT_WR_CONFIG0_P0, ~0, PPC_BIT(50)); + mca_and_or(chip, id, mca_i, DDRPHY_SEQ_ODT_WR_CONFIG0_P0, + ~0, PPC_BIT(50)); break; case 3: /* IOM0.DDRPHY_SEQ_ODT_WR_CONFIG1_P0 = [59] = 1 */ - mca_and_or(id, mca_i, DDRPHY_SEQ_ODT_WR_CONFIG0_P0, ~0, PPC_BIT(59)); + mca_and_or(chip, id, mca_i, DDRPHY_SEQ_ODT_WR_CONFIG0_P0, + ~0, PPC_BIT(59)); break; } @@ -348,7 +357,7 @@ static void wr_level_pre(int mcs_i, int mca_i, int rp, * ranks. Can we get away with 0 delay? Is it worth it? Remember that * the same delay is currently used between sides of RCD. */ - ccs_add_mrs(id, mrs, rank, mirrored, tMRD); + ccs_add_mrs(chip, id, mrs, rank, mirrored, tMRD); } /* TODO: maybe drop it, next ccs_phy_hw_step() would call it anyway. */ @@ -379,7 +388,7 @@ static uint64_t wr_level_time(mca_data_t *mca) } /* Undo the pre-workaround, basically */ -static void wr_level_post(int mcs_i, int mca_i, int rp, +static void wr_level_post(uint8_t chip, int mcs_i, int mca_i, int rp, enum rank_selection ranks_present) { chiplet_id_t id = mcs_ids[mcs_i]; @@ -414,7 +423,7 @@ static void wr_level_post(int mcs_i, int mca_i, int rp, val = PPC_PLACE(F(ATTR_MSS_VPD_MT_ODT_RD[vpd_idx][1][0]), ODT_RD_VALUES0, ODT_RD_VALUES0_LEN) | PPC_PLACE(F(ATTR_MSS_VPD_MT_ODT_RD[vpd_idx][1][1]), ODT_RD_VALUES1, ODT_RD_VALUES1_LEN); - mca_and_or(id, mca_i, DDRPHY_SEQ_ODT_RD_CONFIG1_P0, 0, val); + mca_and_or(chip, id, mca_i, DDRPHY_SEQ_ODT_RD_CONFIG1_P0, 0, val); /* IOM0.DDRPHY_SEQ_ODT_WR_CONFIG0_P0 = @@ -423,7 +432,7 @@ static void wr_level_post(int mcs_i, int mca_i, int rp, [48-51] ODT_WR_VALUES0 = F(ATTR_MSS_VPD_MT_ODT_WR[index(MCA)][0][0]) [56-59] ODT_WR_VALUES1 = F(ATTR_MSS_VPD_MT_ODT_WR[index(MCA)][0][1]) */ - mca_and_or(id, mca_i, DDRPHY_SEQ_ODT_WR_CONFIG0_P0, 0, + mca_and_or(chip, id, mca_i, DDRPHY_SEQ_ODT_WR_CONFIG0_P0, 0, PPC_PLACE(F(ATTR_MSS_VPD_MT_ODT_WR[vpd_idx][0][0]), ODT_RD_VALUES0, ODT_RD_VALUES0_LEN) | PPC_PLACE(F(ATTR_MSS_VPD_MT_ODT_WR[vpd_idx][0][1]), ODT_RD_VALUES1, ODT_RD_VALUES1_LEN)); #undef F @@ -435,7 +444,7 @@ static void wr_level_post(int mcs_i, int mca_i, int rp, vpd_to_rtt_wr(ATTR_MSS_VPD_MT_DRAM_RTT_WR[vpd_idx]), DDR4_MR2_ASR_MANUAL_EXTENDED_RANGE, mem_data.cwl); - ccs_add_mrs(id, mrs, rank, mirrored, tMRD); + ccs_add_mrs(chip, id, mrs, rank, mirrored, tMRD); /* MR1 = // redo the rest of the bits // Write properly encoded RTT_NOM value @@ -453,7 +462,7 @@ static void wr_level_post(int mcs_i, int mca_i, int rp, * Next command for this rank should be REF before Initial Pattern Write, * done by PHY hardware, so use tMOD. */ - ccs_add_mrs(id, mrs, rank, mirrored, tMOD); + ccs_add_mrs(chip, id, mrs, rank, mirrored, tMOD); // mss::workarounds::seq::odt_config(); // Not needed on DD2 } @@ -490,7 +499,7 @@ static void wr_level_post(int mcs_i, int mca_i, int rp, * ranks. Can we get away with 0 delay? Is it worth it? Remember that * the same delay is currently used between sides of RCD. */ - ccs_add_mrs(id, mrs, rank, mirrored, tMRD); + ccs_add_mrs(chip, id, mrs, rank, mirrored, tMRD); } /* TODO: maybe drop it, next ccs_phy_hw_step() would call it anyway. */ @@ -552,7 +561,7 @@ static uint64_t dqs_align_time(mca_data_t *mca) return 6 * 600 * 4; } -static void rdclk_align_pre(int mcs_i, int mca_i, int rp, +static void rdclk_align_pre(uint8_t chip, int mcs_i, int mca_i, int rp, enum rank_selection ranks_present) { chiplet_id_t id = mcs_ids[mcs_i]; @@ -567,7 +576,7 @@ static void rdclk_align_pre(int mcs_i, int mca_i, int rp, IOM0.DDRPHY_PC_INIT_CAL_CONFIG1_P0 [52-53] REFRESH_CONTROL = 0 // refresh commands are only sent at start of initial calibration */ - mca_and_or(id, mca_i, DDRPHY_PC_INIT_CAL_CONFIG1_P0, ~PPC_BITMASK(52, 53), 0); + mca_and_or(chip, id, mca_i, DDRPHY_PC_INIT_CAL_CONFIG1_P0, ~PPC_BITMASK(52, 53), 0); } static uint64_t rdclk_align_time(mca_data_t *mca) @@ -585,7 +594,7 @@ static uint64_t rdclk_align_time(mca_data_t *mca) return 24 * ((1024/coarse_cal_step_size + 4*coarse_cal_step_size) * 4 + 32); } -static void rdclk_align_post(int mcs_i, int mca_i, int rp, +static void rdclk_align_post(uint8_t chip, int mcs_i, int mca_i, int rp, enum rank_selection ranks_present) { chiplet_id_t id = mcs_ids[mcs_i]; @@ -607,33 +616,33 @@ static void rdclk_align_post(int mcs_i, int mca_i, int rp, // Can't change non-existing quads */ for (dp = 0; dp < 4; dp++) { - val = dp_mca_read(id, dp, mca_i, + val = dp_mca_read(chip, id, dp, mca_i, DDRPHY_DP16_DQS_RD_PHASE_SELECT_RANK_PAIR0_P0_0 + rp * mul); val += PPC_BIT(49) | PPC_BIT(53) | PPC_BIT(57) | PPC_BIT(61); val &= PPC_BITMASK(48, 49) | PPC_BITMASK(52, 53) | PPC_BITMASK(56, 57) | PPC_BITMASK(60, 61); /* TODO: this can be done with just one read */ - dp_mca_and_or(id, dp, mca_i, + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_DQS_RD_PHASE_SELECT_RANK_PAIR0_P0_0 + rp * mul, ~(PPC_BITMASK(48, 49) | PPC_BITMASK(52, 53) | PPC_BITMASK(56, 57) | PPC_BITMASK(60, 61)), val); } - val = dp_mca_read(id, 4, mca_i, + val = dp_mca_read(chip, id, 4, mca_i, DDRPHY_DP16_DQS_RD_PHASE_SELECT_RANK_PAIR0_P0_0 + rp * mul); val += PPC_BIT(49) | PPC_BIT(53); val &= PPC_BITMASK(48, 49) | PPC_BITMASK(52, 53); - dp_mca_and_or(id, dp, mca_i, + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_DQS_RD_PHASE_SELECT_RANK_PAIR0_P0_0 + rp * mul, ~(PPC_BITMASK(48, 49) | PPC_BITMASK(52, 53)), val); /* Turn on refresh */ - dqs_align_turn_on_refresh(mcs_i, mca_i); + dqs_align_turn_on_refresh(chip, mcs_i, mca_i); } -static void read_ctr_pre(int mcs_i, int mca_i, int rp, +static void read_ctr_pre(uint8_t chip, int mcs_i, int mca_i, int rp, enum rank_selection ranks_present) { chiplet_id_t id = mcs_ids[mcs_i]; @@ -643,14 +652,14 @@ static void read_ctr_pre(int mcs_i, int mca_i, int rp, IOM0.DDRPHY_PC_INIT_CAL_CONFIG1_P0 [52-53] REFRESH_CONTROL = 0 // refresh commands are only sent at start of initial calibration */ - mca_and_or(id, mca_i, DDRPHY_PC_INIT_CAL_CONFIG1_P0, ~PPC_BITMASK(52, 53), 0); + mca_and_or(chip, id, mca_i, DDRPHY_PC_INIT_CAL_CONFIG1_P0, ~PPC_BITMASK(52, 53), 0); for (dp = 0; dp < 5; dp++) { /* IOM0.DDRPHY_DP16_CONFIG0_P0_{0-4} [62] 1 // part of ATESTSEL_0_4 field */ - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_CONFIG0_P0_0, ~0, PPC_BIT(62)); + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_CONFIG0_P0_0, ~0, PPC_BIT(62)); /* * This was a part of main calibration in Hostboot, not pre-workaround, @@ -659,7 +668,7 @@ static void read_ctr_pre(int mcs_i, int mca_i, int rp, [all] 0 [48-63] VREF_CAL_EN = 0xffff // We already did this in reset_rd_vref() in 13.8 */ - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_RD_VREF_CAL_EN_P0_0, 0, + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_RD_VREF_CAL_EN_P0_0, 0, PPC_PLACE(0xFFFF, 48, 16)); } @@ -668,7 +677,7 @@ static void read_ctr_pre(int mcs_i, int mca_i, int rp, [60] CALIBRATION_ENABLE = 1 [61] SKIP_RDCENTERING = 0 */ - mca_and_or(id, mca_i, DDRPHY_RC_RDVREF_CONFIG1_P0, + mca_and_or(chip, id, mca_i, DDRPHY_RC_RDVREF_CONFIG1_P0, ~PPC_BIT(SKIP_RDCENTERING), PPC_BIT(CALIBRATION_ENABLE)); } @@ -692,7 +701,7 @@ static uint64_t read_ctr_time(mca_data_t *mca) * 24; } -static void read_ctr_post(int mcs_i, int mca_i, int rp, +static void read_ctr_post(uint8_t chip, int mcs_i, int mca_i, int rp, enum rank_selection ranks_present) { chiplet_id_t id = mcs_ids[mcs_i]; @@ -702,21 +711,21 @@ static void read_ctr_post(int mcs_i, int mca_i, int rp, // workarounds::dp16::rd_dq::fix_delay_values(); /* Turn on refresh */ - dqs_align_turn_on_refresh(mcs_i, mca_i); + dqs_align_turn_on_refresh(chip, mcs_i, mca_i); for (dp = 0; dp < 5; dp++) { /* IOM0.DDRPHY_DP16_CONFIG0_P0_{0-4} [62] 0 // part of ATESTSEL_0_4 field */ - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_CONFIG0_P0_0, ~PPC_BIT(62), 0); + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_CONFIG0_P0_0, ~PPC_BIT(62), 0); } } /* Assume 18 DRAMs per DIMM ((8 data + 1 ECC) * 2), even for x8 */ static uint16_t write_delays[18]; -static void write_ctr_pre(int mcs_i, int mca_i, int rp, +static void write_ctr_pre(uint8_t chip, int mcs_i, int mca_i, int rp, enum rank_selection ranks_present) { chiplet_id_t id = mcs_ids[mcs_i]; @@ -751,14 +760,14 @@ static void write_ctr_pre(int mcs_i, int mca_i, int rp, ATTR_MSS_VPD_MT_VREF_DRAM_WR[vpd_idx] & 0x3F); /* Step 1 - enter VREFDQ training mode */ - ccs_add_mrs(id, mrs, rank, mirrored, tVREFDQ_E_X); + ccs_add_mrs(chip, id, mrs, rank, mirrored, tVREFDQ_E_X); /* Step 2 - latch VREFDQ value, command exactly the same as step 1 */ - ccs_add_mrs(id, mrs, rank, mirrored, tVREFDQ_E_X); + ccs_add_mrs(chip, id, mrs, rank, mirrored, tVREFDQ_E_X); /* Step 3 - exit VREFDQ training mode */ mrs ^= 1 << 7; // A7 - VREFDQ Training Enable - ccs_add_mrs(id, mrs, rank, mirrored, tVREFDQ_E_X); + ccs_add_mrs(chip, id, mrs, rank, mirrored, tVREFDQ_E_X); /* TODO: maybe drop it, next ccs_phy_hw_step() would call it anyway. */ //ccs_execute(id, mca_i); @@ -777,7 +786,7 @@ static void write_ctr_pre(int mcs_i, int mca_i, int rp, const uint64_t rp_mul = 0x0000010000000000; const uint64_t val_mul = 0x0000000100000000; /* IOM0.DDRPHY_DP16_WR_DELAY_VALUE__RP_REG_P0_ */ - uint64_t val = dp_mca_read(id, dp, mca_i, + uint64_t val = dp_mca_read(chip, id, dp, mca_i, DDRPHY_DP16_WR_DELAY_VALUE_0_RP0_REG_P0_0 + rp * rp_mul + val_idx * val_mul); write_delays[dram] = (uint16_t) val; @@ -826,7 +835,7 @@ static uint64_t write_ctr_time(mca_data_t *mca) 2 * (big_step + 1)/(small_step + 1)) * 24; } -static void write_ctr_post(int mcs_i, int mca_i, int rp, +static void write_ctr_post(uint8_t chip, int mcs_i, int mca_i, int rp, enum rank_selection ranks_present) { chiplet_id_t id = mcs_ids[mcs_i]; @@ -838,7 +847,8 @@ static void write_ctr_post(int mcs_i, int mca_i, int rp, * yet implemented. */ for (dp = 0; dp < 5; dp++) { - bad_bits |= dp_mca_read(id, dp, mca_i, DDRPHY_DP16_DQ_BIT_DISABLE_RP0_P0_0); + bad_bits |= dp_mca_read(chip, id, dp, mca_i, + DDRPHY_DP16_DQ_BIT_DISABLE_RP0_P0_0); } if (!bad_bits) @@ -874,7 +884,7 @@ static uint64_t coarse_wr_rd_time(mca_data_t *mca) return 40 + 32 + 15 * 512; } -typedef void (phy_workaround_t) (int mcs_i, int mca_i, int rp, +typedef void (phy_workaround_t) (uint8_t chip, int mcs_i, int mca_i, int rp, enum rank_selection ranks_present); struct phy_step { @@ -943,30 +953,30 @@ static struct phy_step steps[] = { */ }; -static void dispatch_step(struct phy_step *step, int mcs_i, int mca_i, int rp, +static void dispatch_step(uint8_t chip, struct phy_step *step, int mcs_i, int mca_i, int rp, enum rank_selection ranks_present) { mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; printk(BIOS_DEBUG, "%s starting\n", step->name); if (step->pre) - step->pre(mcs_i, mca_i, rp, ranks_present); + step->pre(chip, mcs_i, mca_i, rp, ranks_present); - ccs_phy_hw_step(mcs_ids[mcs_i], mca_i, rp, step->cfg, step->time(mca)); + ccs_phy_hw_step(chip, mcs_ids[mcs_i], mca_i, rp, step->cfg, step->time(mca)); if (step->post) - step->post(mcs_i, mca_i, rp, ranks_present); + step->post(chip, mcs_i, mca_i, rp, ranks_present); dump_cal_errors(mcs_i, mca_i); - if (mca_read(mcs_ids[mcs_i], mca_i, DDRPHY_PC_INIT_CAL_ERROR_P0) != 0) + if (mca_read(chip, mcs_ids[mcs_i], mca_i, DDRPHY_PC_INIT_CAL_ERROR_P0) != 0) die("%s failed, aborting\n", step->name); printk(BIOS_DEBUG, "%s done\n", step->name); } /* Can we modify dump_cal_errors() for this? */ -static int process_initial_cal_errors(int mcs_i, int mca_i) +static int process_initial_cal_errors(uint8_t chip, int mcs_i, int mca_i) { chiplet_id_t id = mcs_ids[mcs_i]; int dp; @@ -974,22 +984,24 @@ static int process_initial_cal_errors(int mcs_i, int mca_i) for (dp = 0; dp < 5; dp++) { /* IOM0.DDRPHY_DP16_RD_VREF_CAL_ERROR_P0_n */ - err |= dp_mca_read(id, dp, mca_i, DDRPHY_DP16_RD_VREF_CAL_ERROR_P0_0); + err |= dp_mca_read(chip, id, dp, mca_i, DDRPHY_DP16_RD_VREF_CAL_ERROR_P0_0); /* Both ERROR_MASK registers were set to 0xFFFF in 13.8 */ /* IOM0.DDRPHY_DP16_WR_VREF_ERROR0_P0_n & * ~IOM0.DDRPHY_DP16_WR_VREF_ERROR_MASK0_P0_n */ - err |= (dp_mca_read(id, dp, mca_i, DDRPHY_DP16_WR_VREF_ERROR0_P0_0) & - ~dp_mca_read(id, dp, mca_i, DDRPHY_DP16_WR_VREF_ERROR_MASK0_P0_0)); + err |= (dp_mca_read(chip, id, dp, mca_i, DDRPHY_DP16_WR_VREF_ERROR0_P0_0) & + ~dp_mca_read(chip, id, dp, mca_i, + DDRPHY_DP16_WR_VREF_ERROR_MASK0_P0_0)); /* IOM0.DDRPHY_DP16_WR_VREF_ERROR1_P0_n & * ~IOM0.DDRPHY_DP16_WR_VREF_ERROR_MASK1_P0_n */ - err |= (dp_mca_read(id, dp, mca_i, DDRPHY_DP16_WR_VREF_ERROR1_P0_0) & - ~dp_mca_read(id, dp, mca_i, DDRPHY_DP16_WR_VREF_ERROR_MASK1_P0_0)); + err |= (dp_mca_read(chip, id, dp, mca_i, DDRPHY_DP16_WR_VREF_ERROR1_P0_0) & + ~dp_mca_read(chip, id, dp, mca_i, + DDRPHY_DP16_WR_VREF_ERROR_MASK1_P0_0)); } /* IOM0.DDRPHY_PC_INIT_CAL_ERROR_P0 */ - err |= mca_read(id, mca_i, DDRPHY_PC_INIT_CAL_ERROR_P0); + err |= mca_read(chip, id, mca_i, DDRPHY_PC_INIT_CAL_ERROR_P0); if (err) return 1; @@ -1015,7 +1027,7 @@ static int process_initial_cal_errors(int mcs_i, int mca_i) return 0; } -static int can_recover(int mcs_i, int mca_i, int rp) +static int can_recover(uint8_t chip, int mcs_i, int mca_i, int rp) { /* * We can recover from 1 nibble + 1 bit (or less) bad lines. Anything more @@ -1050,7 +1062,7 @@ static int can_recover(int mcs_i, int mca_i, int rp) if total_bad_nibbles > 1: DIMM is FUBAR, return error */ const uint64_t rp_mul = 0x0000010000000000; - reg = dp_mca_read(id, dp, mca_i, + reg = dp_mca_read(chip, id, dp, mca_i, DDRPHY_DP16_DQS_BIT_DISABLE_RP0_P0_0 + rp * rp_mul); /* One bad DQS on x8 is already bad 2 nibbles, can't recover from that. */ @@ -1078,7 +1090,7 @@ static int can_recover(int mcs_i, int mca_i, int rp) if total_bad_bits > 1: total_bad_nibbles += 1, total_bad_bits -= 1 if total_bad_nibbles > 1: DIMM is FUBAR, return error? */ - reg = dp_mca_read(id, dp, mca_i, + reg = dp_mca_read(chip, id, dp, mca_i, DDRPHY_DP16_DQ_BIT_DISABLE_RP0_P0_0 + rp * rp_mul); /* Exclude nibbles corresponding to a bad DQS, it won't get worse. */ @@ -1121,7 +1133,7 @@ static int can_recover(int mcs_i, int mca_i, int rp) return 1; } -static void fir_unmask(int mcs_i) +static void fir_unmask(uint8_t chip, int mcs_i) { chiplet_id_t id = mcs_ids[mcs_i]; int mca_i; @@ -1175,7 +1187,7 @@ static void fir_unmask(int mcs_i) [12] MBACALFIR_MASK_CMD_PARITY_ERROR = 0 // checkstop (0,0,0) [14] MBACALFIR_MASK_RCD_CAL_PARITY_ERROR = 0 // recoverable_error (0,1,0) */ - mca_and_or(id, mca_i, MBACALFIR_ACTION0, + mca_and_or(chip, id, mca_i, MBACALFIR_ACTION0, ~(PPC_BIT(MBACALFIR_REFRESH_OVERRUN) | PPC_BIT(MBACALFIR_DDR_CAL_TIMEOUT_ERR) | PPC_BIT(MBACALFIR_DDR_CAL_RESET_TIMEOUT) | @@ -1184,7 +1196,7 @@ static void fir_unmask(int mcs_i) PPC_BIT(MBACALFIR_CMD_PARITY_ERROR) | PPC_BIT(MBACALFIR_RCD_CAL_PARITY_ERROR)), 0); - mca_and_or(id, mca_i, MBACALFIR_ACTION1, + mca_and_or(chip, id, mca_i, MBACALFIR_ACTION1, ~(PPC_BIT(MBACALFIR_REFRESH_OVERRUN) | PPC_BIT(MBACALFIR_DDR_CAL_TIMEOUT_ERR) | PPC_BIT(MBACALFIR_DDR_CAL_RESET_TIMEOUT) | @@ -1197,7 +1209,7 @@ static void fir_unmask(int mcs_i) PPC_BIT(MBACALFIR_DDR_CAL_RESET_TIMEOUT) | PPC_BIT(MBACALFIR_WRQ_RRQ_HANG_ERR) | PPC_BIT(MBACALFIR_RCD_CAL_PARITY_ERROR)); - mca_and_or(id, mca_i, MBACALFIR_MASK, + mca_and_or(chip, id, mca_i, MBACALFIR_MASK, ~(PPC_BIT(MBACALFIR_REFRESH_OVERRUN) | PPC_BIT(MBACALFIR_DDR_CAL_TIMEOUT_ERR) | PPC_BIT(MBACALFIR_DDR_CAL_RESET_TIMEOUT) | @@ -1228,6 +1240,7 @@ void istep_13_11(void) printk(BIOS_EMERG, "starting istep 13.11\n"); int mcs_i, mca_i, dimm, rp; enum rank_selection ranks_present; + uint8_t chip = 0; // TODO: support second CPU report_istep(13, 11); @@ -1251,11 +1264,13 @@ void istep_13_11(void) else ranks_present |= DIMM0_RANK0 << (2 * dimm); - setup_and_execute_zqcal(mcs_i, mca_i, dimm); + setup_and_execute_zqcal(chip, mcs_i, mca_i, dimm); } /* IOM0.DDRPHY_PC_INIT_CAL_CONFIG0_P0 = 0 */ - mca_and_or(mcs_ids[mcs_i], mca_i, DDRPHY_PC_INIT_CAL_CONFIG0_P0, 0, 0); + mca_and_or(chip, mcs_ids[mcs_i], mca_i, + DDRPHY_PC_INIT_CAL_CONFIG0_P0, + 0, 0); /* * > Disable port fails as it doesn't appear the MC handles initial @@ -1264,7 +1279,7 @@ void istep_13_11(void) MC01.PORT0.SRQ.MBA_FARB0Q = [57] MBA_FARB0Q_CFG_PORT_FAIL_DISABLE = 1 */ - mca_and_or(mcs_ids[mcs_i], mca_i, MBA_FARB0Q, ~0, + mca_and_or(chip, mcs_ids[mcs_i], mca_i, MBA_FARB0Q, ~0, PPC_BIT(MBA_FARB0Q_CFG_PORT_FAIL_DISABLE)); /* @@ -1280,9 +1295,9 @@ void istep_13_11(void) * least know what it is about... */ - clear_initial_cal_errors(mcs_i, mca_i); - dp16_reset_delay_values(mcs_i, mca_i, ranks_present); - dqs_align_turn_on_refresh(mcs_i, mca_i); + clear_initial_cal_errors(chip, mcs_i, mca_i); + dp16_reset_delay_values(chip, mcs_i, mca_i, ranks_present); + dqs_align_turn_on_refresh(chip, mcs_i, mca_i); /* * List of calibration steps for RDIMM, in execution order: @@ -1319,10 +1334,11 @@ void istep_13_11(void) dump_cal_errors(mcs_i, mca_i); for (int i = 0; i < ARRAY_SIZE(steps); i++) - dispatch_step(&steps[i], mcs_i, mca_i, rp, ranks_present); + dispatch_step(chip, &steps[i], mcs_i, mca_i, rp, + ranks_present); - if (process_initial_cal_errors(mcs_i, mca_i) && - !can_recover(mcs_i, mca_i, rp)) { + if (process_initial_cal_errors(chip, mcs_i, mca_i) && + !can_recover(chip, mcs_i, mca_i, rp)) { die("Calibration failed for MCS%d MCA%d DIMM%d\n", mcs_i, mca_i, rp/2); } } @@ -1342,7 +1358,7 @@ void istep_13_11(void) * instruction. */ - fir_unmask(mcs_i); + fir_unmask(chip, mcs_i); } printk(BIOS_EMERG, "ending istep 13.11\n"); diff --git a/src/soc/ibm/power9/istep_13_13.c b/src/soc/ibm/power9/istep_13_13.c index cd77fd9f912..c5382ffe0af 100644 --- a/src/soc/ibm/power9/istep_13_13.c +++ b/src/soc/ibm/power9/istep_13_13.c @@ -404,7 +404,7 @@ static void setup_xlate_map(int mcs_i, int mca_i) } -static void enable_pm(int mcs_i, int mca_i) +static void enable_pm(uint8_t chip, int mcs_i, int mca_i) { const int ATTR_MSS_MRW_POWER_CONTROL_REQUESTED = 0; /* @@ -429,11 +429,11 @@ static void enable_pm(int mcs_i, int mca_i) [2] MBARPC0Q_CFG_MIN_MAX_DOMAINS_ENABLE = 1 */ if (ATTR_MSS_MRW_POWER_CONTROL_REQUESTED) - mca_and_or(mcs_ids[mcs_i], mca_i, MBARPC0Q, ~0, + mca_and_or(chip, mcs_ids[mcs_i], mca_i, MBARPC0Q, ~0, PPC_BIT(MBARPC0Q_CFG_MIN_MAX_DOMAINS_ENABLE)); } -static void apply_mark_store(int mcs_i, int mca_i) +static void apply_mark_store(uint8_t chip, int mcs_i, int mca_i) { /* * FIXME: where do the values written to MVPD come from? They are all 0s in @@ -450,12 +450,12 @@ static void apply_mark_store(int mcs_i, int mca_i) [all] 0 [0-22] from ATTR_MSS_MVPD_FWMS */ - mca_and_or(mcs_ids[mcs_i], mca_i, FWMS0 + i, + mca_and_or(chip, mcs_ids[mcs_i], mca_i, FWMS0 + i, 0, ATTR_MSS_MVPD_FWMS[i]); } } -static void fir_unmask(int mcs_i) +static void fir_unmask(uint8_t chip, int mcs_i) { chiplet_id_t id = mcs_ids[mcs_i]; int mca_i; @@ -499,7 +499,7 @@ static void fir_unmask(int mcs_i) MC01.PORT0.ECC64.SCOM.RECR [26] MBSECCQ_ENABLE_UE_NOISE_WINDOW = 0 */ - mca_and_or(id, mca_i, RECR, ~PPC_BIT(MBSECCQ_ENABLE_UE_NOISE_WINDOW), 0); + mca_and_or(chip, id, mca_i, RECR, ~PPC_BIT(MBSECCQ_ENABLE_UE_NOISE_WINDOW), 0); /* MC01.PORT0.ECC64.SCOM.ACTION0 @@ -566,13 +566,13 @@ static void fir_unmask(int mcs_i) [58] FIR_READ_ASYNC_INTERFACE_PARITY_ERROR = 0 // checkstop (0,0,0) [59] FIR_READ_ASYNC_INTERFACE_SEQUENCE_ERROR = 0 // checkstop (0,0,0) */ - mca_and_or(id, mca_i, ECC_FIR_ACTION0, + mca_and_or(chip, id, mca_i, ECC_FIR_ACTION0, ~(PPC_BIT(ECC_FIR_MAINTENANCE_AUE) | PPC_BIT(ECC_FIR_MAINTENANCE_IAUE) | PPC_BITMASK(41, 46) | PPC_BITMASK(48, 59)), 0); - mca_and_or(id, mca_i, ECC_FIR_ACTION1, + mca_and_or(chip, id, mca_i, ECC_FIR_ACTION1, ~(PPC_BIT(ECC_FIR_MAINTENANCE_AUE) | PPC_BIT(ECC_FIR_MAINTENANCE_IAUE) | PPC_BITMASK(41, 46) | PPC_BITMASK(48, 59)), @@ -582,7 +582,7 @@ static void fir_unmask(int mcs_i) PPC_BIT(ECC_FIR_SCOM_PARITY_CLASS_RECOVERABLE) | PPC_BIT(ECC_FIR_WRITE_RMW_CE)); - mca_and_or(id, mca_i, ECC_FIR_MASK, + mca_and_or(chip, id, mca_i, ECC_FIR_MASK, ~(PPC_BIT(ECC_FIR_MAINTENANCE_AUE) | PPC_BIT(ECC_FIR_MAINTENANCE_IAUE) | PPC_BITMASK(41, 46) | PPC_BITMASK(48, 59)), @@ -605,6 +605,7 @@ void istep_13_13(void) { printk(BIOS_EMERG, "starting istep 13.13\n"); int mcs_i, mca_i; + uint8_t chip = 0; // TODO: support second CPU report_istep(13, 13); @@ -628,12 +629,12 @@ void istep_13_13(void) // Not sure where this attr comes from or what is its default value. Assume !0 = 1 -> TCE correction enabled [27] MBSECCQ_ENABLE_TCE_CORRECTION = !ATTR_MNFG_FLAGS.MNFG_REPAIRS_DISABLED_ATTR */ - mca_and_or(id, mca_i, RECR, + mca_and_or(chip, id, mca_i, RECR, ~(PPC_BITMASK(6, 8) | PPC_BIT(MBSECCQ_ENABLE_TCE_CORRECTION)), PPC_PLACE(1, MBSECCQ_READ_POINTER_DELAY, MBSECCQ_READ_POINTER_DELAY_LEN) | PPC_BIT(MBSECCQ_ENABLE_TCE_CORRECTION)); - enable_pm(mcs_i, mca_i); + enable_pm(chip, mcs_i, mca_i); /* * This was already done after draminit_cke_helper, search for "Per @@ -644,13 +645,13 @@ void istep_13_13(void) * MC01.PORT0.SRQ.MBA_FARB5Q * [5] MBA_FARB5Q_CFG_CCS_ADDR_MUX_SEL = 0 */ - mca_and_or(id, mca_i, MBA_FARB5Q, + mca_and_or(chip, id, mca_i, MBA_FARB5Q, ~PPC_BIT(MBA_FARB5Q_CFG_CCS_ADDR_MUX_SEL), 0); /* MC01.PORT0.SRQ.MBA_FARB0Q [57] MBA_FARB0Q_CFG_PORT_FAIL_DISABLE = 0 */ - mca_and_or(id, mca_i, MBA_FARB0Q, + mca_and_or(chip, id, mca_i, MBA_FARB0Q, ~PPC_BIT(MBA_FARB0Q_CFG_PORT_FAIL_DISABLE), 0); /* @@ -665,13 +666,14 @@ void istep_13_13(void) * MC01.PORT0.SRQ.MBA_FARB0Q * [55] MBA_FARB0Q_CFG_OE_ALWAYS_ON = 1 */ - mca_and_or(id, mca_i, MBA_FARB0Q, ~0, + mca_and_or(chip, id, mca_i, MBA_FARB0Q, ~0, PPC_BIT(MBA_FARB0Q_CFG_OE_ALWAYS_ON)); /* MC01.PORT0.SRQ.PC.MBAREF0Q [0] MBAREF0Q_CFG_REFRESH_ENABLE = 1 */ - mca_and_or(id, mca_i, MBAREF0Q, ~0, PPC_BIT(MBAREF0Q_CFG_REFRESH_ENABLE)); + mca_and_or(chip, id, mca_i, MBAREF0Q, + ~0, PPC_BIT(MBAREF0Q_CFG_REFRESH_ENABLE)); /* Enable periodic calibration */ /* @@ -694,7 +696,7 @@ void istep_13_13(void) [52-59] MBA_CAL3Q_CFG_ALL_PERIODIC_LENGTH = 0xff // Or simpler: 0xfffffffffffffff0 */ - mca_and_or(id, mca_i, MBA_CAL3Q, 0, PPC_BITMASK(0, 59)); + mca_and_or(chip, id, mca_i, MBA_CAL3Q, 0, PPC_BITMASK(0, 59)); /* Enable read ECC MC01.PORT0.ECC64.SCOM.RECR // 0x07010A0A @@ -704,15 +706,15 @@ void istep_13_13(void) // Docs don't describe the encoding, code suggests this inverts data, toggles checks [30-31] MBSECCQ_DATA_INVERSION = 3 */ - mca_and_or(id, mca_i, RECR, + mca_and_or(chip, id, mca_i, RECR, ~(PPC_BITMASK(0, 1) | PPC_BITMASK(29, 31)), PPC_BIT(MBSECCQ_USE_ADDRESS_HASH) | PPC_PLACE(3, MBSECCQ_DATA_INVERSION, MBSECCQ_DATA_INVERSION_LEN)); - apply_mark_store(mcs_i, mca_i); + apply_mark_store(chip, mcs_i, mca_i); } - fir_unmask(mcs_i); + fir_unmask(chip, mcs_i); } printk(BIOS_EMERG, "ending istep 13.13\n"); diff --git a/src/soc/ibm/power9/istep_13_8.c b/src/soc/ibm/power9/istep_13_8.c index 206346f932e..bdb2922e4c7 100644 --- a/src/soc/ibm/power9/istep_13_8.c +++ b/src/soc/ibm/power9/istep_13_8.c @@ -1,13 +1,24 @@ /* SPDX-License-Identifier: GPL-2.0-only */ #include +#include #include #include #include #include "istep_13_scom.h" -#define ATTR_PG 0xE000000000000000ull +/* + * 13.8 mss_scominit: Perform scom inits to MC and PHY + * + * - HW units included are MCBIST, MCA/PHY (Nimbus) or membuf, L4, MBAs (Cumulus) + * - Does not use initfiles, coded into HWP + * - Uses attributes from previous step + * - Pushes memory extent configuration into the MBA/MCAs + * - Addresses are pulled from attributes, set previously by mss_eff_config + * - MBA/MCAs always start at address 0, address map controlled by + * proc_setup_bars below + */ /* * This function was generated from initfiles. Some of the registers used here @@ -20,7 +31,7 @@ * * Undocumented registers are marked with (?) in the comments. */ -static void p9n_mca_scom(int mcs_i, int mca_i) +static void p9n_mca_scom(uint8_t chip, int mcs_i, int mca_i) { const struct powerbus_cfg *pb_cfg = powerbus_cfg(); @@ -48,8 +59,8 @@ static void p9n_mca_scom(int mcs_i, int mca_i) /* P9N2_MCS_PORT02_MCPERF0 (?) [22-27] = 0x20 // AMO_LIMIT */ - scom_and_or_for_chiplet(nest, 0x05010823 + mca_i * mca_mul, - ~PPC_BITMASK(22, 27), PPC_PLACE(0x20, 22, 6)); + rscom_and_or_for_chiplet(chip, nest, 0x05010823 + mca_i * mca_mul, + ~PPC_BITMASK(22, 27), PPC_PLACE(0x20, 22, 6)); /* P9N2_MCS_PORT02_MCPERF2 (?) [0-2] = 1 // PF_DROP_VALUE0 @@ -81,24 +92,24 @@ static void p9n_mca_scom(int mcs_i, int mca_i) uint64_t en_ref_blk = (log_ranks <= 1 || log_ranks > 8) ? 0 : (n_dimms == 1 && mranks == 4 && log_ranks == 8) ? 0 : 3; - scom_and_or_for_chiplet(nest, 0x05010824 + mca_i * mca_mul, - /* and */ - ~(PPC_BITMASK(0, 11) | PPC_BITMASK(13, 18) | PPC_BITMASK(28, 31) - | PPC_BITMASK(28, 31) | PPC_BITMASK(50, 54) | PPC_BIT(61)), - /* or */ - PPC_PLACE(1, 0, 3) | PPC_PLACE(3, 3, 3) | PPC_PLACE(5, 6, 3) - | PPC_PLACE(7, 9, 3) /* PF_DROP_VALUEs */ - | PPC_PLACE(ref_blk_cfg, 13, 3) | PPC_PLACE(en_ref_blk, 16, 2) - | PPC_PLACE(0x4, 28, 4) | PPC_PLACE(0x1C, 50, 5)); + rscom_and_or_for_chiplet(chip, nest, 0x05010824 + mca_i * mca_mul, + /* and */ + ~(PPC_BITMASK(0, 11) | PPC_BITMASK(13, 18) | PPC_BITMASK(28, 31) + | PPC_BITMASK(28, 31) | PPC_BITMASK(50, 54) | PPC_BIT(61)), + /* or */ + PPC_PLACE(1, 0, 3) | PPC_PLACE(3, 3, 3) | PPC_PLACE(5, 6, 3) + | PPC_PLACE(7, 9, 3) /* PF_DROP_VALUEs */ + | PPC_PLACE(ref_blk_cfg, 13, 3) | PPC_PLACE(en_ref_blk, 16, 2) + | PPC_PLACE(0x4, 28, 4) | PPC_PLACE(0x1C, 50, 5)); /* P9N2_MCS_PORT02_MCAMOC (?) [1] = 0 // FORCE_PF_DROP0 [4-28] = 0x19fffff // WRTO_AMO_COLLISION_RULES [29-31] = 1 // AMO_SIZE_SELECT, 128B_RW_64B_DATA */ - scom_and_or_for_chiplet(nest, 0x05010825 + mca_i * mca_mul, - ~(PPC_BIT(1) | PPC_BITMASK(4, 31)), - PPC_PLACE(0x19FFFFF, 4, 25) | PPC_PLACE(1, 29, 3)); + rscom_and_or_for_chiplet(chip, nest, 0x05010825 + mca_i * mca_mul, + ~(PPC_BIT(1) | PPC_BITMASK(4, 31)), + PPC_PLACE(0x19FFFFF, 4, 25) | PPC_PLACE(1, 29, 3)); /* P9N2_MCS_PORT02_MCEPSQ (?) [0-7] = 1 // JITTER_EPSILON @@ -111,13 +122,13 @@ static void p9n_mca_scom(int mcs_i, int mca_i) [40-47] = (ATTR_PROC_EPS_READ_CYCLES_T2 + 6) / 4 // VECTOR_GROUP_EPSILON */ #define F(X) (((X) + 6) / 4) - scom_and_or_for_chiplet(nest, 0x05010826 + mca_i * mca_mul, ~PPC_BITMASK(0, 47), - PPC_PLACE(1, 0, 8) - | PPC_PLACE(F(pb_cfg->eps_r[0]), 8, 8) - | PPC_PLACE(F(pb_cfg->eps_r[1]), 16, 8) - | PPC_PLACE(F(pb_cfg->eps_r[1]), 24, 8) - | PPC_PLACE(F(pb_cfg->eps_r[2]), 32, 8) - | PPC_PLACE(F(pb_cfg->eps_r[2]), 40, 8)); + rscom_and_or_for_chiplet(chip, nest, 0x05010826 + mca_i * mca_mul, ~PPC_BITMASK(0, 47), + PPC_PLACE(1, 0, 8) + | PPC_PLACE(F(pb_cfg->eps_r[0]), 8, 8) + | PPC_PLACE(F(pb_cfg->eps_r[1]), 16, 8) + | PPC_PLACE(F(pb_cfg->eps_r[1]), 24, 8) + | PPC_PLACE(F(pb_cfg->eps_r[2]), 32, 8) + | PPC_PLACE(F(pb_cfg->eps_r[2]), 40, 8)); #undef F //~ static const uint32_t EPSILON_R_T0_LE[] = { 7, 7, 8, 8, 10, 22 }; // T0, T1 //~ static const uint32_t EPSILON_R_T2_LE[] = { 67, 69, 71, 74, 79, 103 }; // T2 @@ -129,9 +140,9 @@ static void p9n_mca_scom(int mcs_i, int mca_i) [14-23] = 51 // BUSY_COUNTER_THRESHOLD1 [24-33] = 64 // BUSY_COUNTER_THRESHOLD2 */ - scom_and_or_for_chiplet(nest, 0x05010827 + mca_i * mca_mul, ~PPC_BITMASK(0, 33), - PPC_BIT(0) | PPC_PLACE(1, 1, 3) | PPC_PLACE(38, 4, 10) - | PPC_PLACE(51, 14, 10) | PPC_PLACE(64, 24, 10)); + rscom_and_or_for_chiplet(chip, nest, 0x05010827 + mca_i * mca_mul, ~PPC_BITMASK(0, 33), + PPC_BIT(0) | PPC_PLACE(1, 1, 3) | PPC_PLACE(38, 4, 10) + | PPC_PLACE(51, 14, 10) | PPC_PLACE(64, 24, 10)); /* P9N2_MCS_PORT02_MCPERF3 (?) [31] = 1 // ENABLE_CL0 @@ -140,9 +151,9 @@ static void p9n_mca_scom(int mcs_i, int mca_i) [44] = 1 // DISABLE_WRTO_IG [45] = 1 // AMO_LIMIT_SEL */ - scom_or_for_chiplet(nest, 0x0501082B + mca_i * mca_mul, - PPC_BIT(31) | PPC_BIT(41) | PPC_BIT(43) | PPC_BIT(44) - | PPC_BIT(45)); + rscom_or_for_chiplet(chip, nest, 0x0501082B + mca_i * mca_mul, + PPC_BIT(31) | PPC_BIT(41) | PPC_BIT(43) | PPC_BIT(44) + | PPC_BIT(45)); /* MC01.PORT0.SRQ.MBA_DSM0Q = // These are set per port so all latencies should be calculated from both DIMMs (if present) @@ -162,7 +173,7 @@ static void p9n_mca_scom(int mcs_i, int mca_i) /* ATTR_MSS_EFF_DPHY_WLO = 1 from VPD, 3 from dump? */ uint64_t rdtag_dly = mem_data.speed == 2666 ? 9 : mem_data.speed == 2400 ? 8 : 7; - mca_and_or(id, mca_i, MBA_DSM0Q, ~PPC_BITMASK(0, 41), + mca_and_or(chip, id, mca_i, MBA_DSM0Q, ~PPC_BITMASK(0, 41), PPC_PLACE(mca->cl - mem_data.cwl, MBA_DSM0Q_CFG_RODT_START_DLY, MBA_DSM0Q_CFG_RODT_START_DLY_LEN) | PPC_PLACE(mca->cl - mem_data.cwl + 5, MBA_DSM0Q_CFG_RODT_END_DLY, @@ -211,7 +222,7 @@ static void p9n_mca_scom(int mcs_i, int mca_i) uint64_t var_dly = mem_data.speed == 2666 ? 11 : mem_data.speed == 2400 ? 10 : mem_data.speed == 2133 ? 9 : 8; - mca_and_or(id, mca_i, MBA_TMR0Q, PPC_BIT(63), + mca_and_or(chip, id, mca_i, MBA_TMR0Q, PPC_BIT(63), PPC_PLACE(var_dly, MBA_TMR0Q_RRDM_DLY, MBA_TMR0Q_RRDM_DLY_LEN) | PPC_PLACE(4, MBA_TMR0Q_RRSMSR_DLY, MBA_TMR0Q_RRSMSR_DLY_LEN) | PPC_PLACE(4, MBA_TMR0Q_RRSMDR_DLY, MBA_TMR0Q_RRSMDR_DLY_LEN) | @@ -250,7 +261,7 @@ static void p9n_mca_scom(int mcs_i, int mca_i) MSS_FREQ_EQ_2400: 10 MSS_FREQ_EQ_2666: 11 */ - mca_and_or(id, mca_i, MBA_TMR1Q, 0, + mca_and_or(chip, id, mca_i, MBA_TMR1Q, 0, PPC_PLACE(mca->nccd_l, MBA_TMR1Q_RRSBG_DLY, MBA_TMR1Q_RRSBG_DLY_LEN) | PPC_PLACE(mem_data.cwl + mca->nwtr_l + 4, MBA_TMR1Q_WRSBG_DLY, MBA_TMR1Q_WRSBG_DLY_LEN) | @@ -271,7 +282,7 @@ static void p9n_mca_scom(int mcs_i, int mca_i) [6] MBA_WRQ0Q_CFG_DISABLE_WR_PG_MODE = 1 [55-58] MBA_WRQ0Q_CFG_WRQ_ACT_NUM_WRITES_PENDING = 8 */ - mca_and_or(id, mca_i, MBA_WRQ0Q, + mca_and_or(chip, id, mca_i, MBA_WRQ0Q, ~(PPC_BIT(MBA_WRQ0Q_CFG_WRQ_FIFO_MODE) | PPC_BIT(MBA_WRQ0Q_CFG_DISABLE_WR_PG_MODE) | PPC_BITMASK(55, 58)), @@ -283,7 +294,7 @@ static void p9n_mca_scom(int mcs_i, int mca_i) [6] MBA_RRQ0Q_CFG_RRQ_FIFO_MODE = 0 // ATTR_MSS_REORDER_QUEUE_SETTING [57-60] MBA_RRQ0Q_CFG_RRQ_ACT_NUM_READS_PENDING = 8 */ - mca_and_or(id, mca_i, MBA_RRQ0Q, + mca_and_or(chip, id, mca_i, MBA_RRQ0Q, ~(PPC_BIT(MBA_RRQ0Q_CFG_RRQ_FIFO_MODE) | PPC_BITMASK(57, 60)), PPC_PLACE(8, MBA_RRQ0Q_CFG_RRQ_ACT_NUM_READS_PENDING, MBA_RRQ0Q_CFG_RRQ_ACT_NUM_READS_PENDING_LEN)); @@ -294,7 +305,7 @@ static void p9n_mca_scom(int mcs_i, int mca_i) [38] MBA_FARB0Q_CFG_PARITY_AFTER_CMD = 1 [61-63] MBA_FARB0Q_CFG_OPT_RD_SIZE = 3 */ - mca_and_or(id, mca_i, MBA_FARB0Q, + mca_and_or(chip, id, mca_i, MBA_FARB0Q, ~(PPC_BIT(MBA_FARB0Q_CFG_2N_ADDR) | PPC_BIT(MBA_FARB0Q_CFG_PARITY_AFTER_CMD) | PPC_BITMASK(61, 63)), @@ -343,7 +354,7 @@ static void p9n_mca_scom(int mcs_i, int mca_i) if (mranks == 4) cids_4_7 = (cids_4_7 & ~(7ull << 9)) | (4 << 9); - mca_and_or(id, mca_i, MBA_FARB1Q, ~PPC_BITMASK(0, 47), + mca_and_or(chip, id, mca_i, MBA_FARB1Q, ~PPC_BITMASK(0, 47), PPC_PLACE(cids_even, MBA_FARB1Q_CFG_SLOT0_S0_CID, 12) | PPC_PLACE(cids_4_7, MBA_FARB1Q_CFG_SLOT0_S4_CID, 12) | PPC_PLACE(cids_even, MBA_FARB1Q_CFG_SLOT1_S0_CID, 12) | @@ -369,7 +380,7 @@ static void p9n_mca_scom(int mcs_i, int mca_i) [60-63] MBA_FARB2Q_CFG_RANK7_WR_ODT = F(ATTR_MSS_VPD_MT_ODT_WR[l_def_PORT_INDEX][1][3]) // always 0 */ #define F(X) ((((X) >> 4) & 0xc) | (((X) >> 2) & 0x3)) - mca_and_or(id, mca_i, MBA_FARB2Q, 0, + mca_and_or(chip, id, mca_i, MBA_FARB2Q, 0, PPC_PLACE(F(ATTR_MSS_VPD_MT_ODT_RD[vpd_idx][0][0]), MBA_FARB2Q_CFG_RANK0_RD_ODT, MBA_FARB2Q_CFG_RANK0_RD_ODT_LEN) | PPC_PLACE(F(ATTR_MSS_VPD_MT_ODT_RD[vpd_idx][0][1]), @@ -401,7 +412,7 @@ static void p9n_mca_scom(int mcs_i, int mca_i) * issues we can do the same, but for now let's try to avoid floating point * arithmetic. */ - mca_and_or(id, mca_i, MBAREF0Q, ~(PPC_BITMASK(5, 18) | PPC_BITMASK(30, 60)), + mca_and_or(chip, id, mca_i, MBAREF0Q, ~(PPC_BITMASK(5, 18) | PPC_BITMASK(30, 60)), PPC_PLACE(3, MBAREF0Q_CFG_REFRESH_PRIORITY_THRESHOLD, MBAREF0Q_CFG_REFRESH_PRIORITY_THRESHOLD_LEN) | @@ -444,7 +455,7 @@ static void p9n_mca_scom(int mcs_i, int mca_i) mem_data.speed == 2400 ? 8 : 9; uint64_t p_up_dn = mem_data.speed == 1866 ? 5 : mem_data.speed == 2666 ? 7 : 6; - mca_and_or(id, mca_i, MBARPC0Q, ~PPC_BITMASK(6, 21), + mca_and_or(chip, id, mca_i, MBARPC0Q, ~PPC_BITMASK(6, 21), PPC_PLACE(pup_avail, MBARPC0Q_CFG_PUP_AVAIL, MBARPC0Q_CFG_PUP_AVAIL_LEN) | PPC_PLACE(p_up_dn, MBARPC0Q_CFG_PDN_PUP, MBARPC0Q_CFG_PDN_PUP_LEN) | PPC_PLACE(p_up_dn, MBARPC0Q_CFG_PUP_PDN, MBARPC0Q_CFG_PUP_PDN_LEN) | @@ -474,7 +485,7 @@ static void p9n_mca_scom(int mcs_i, int mca_i) mem_data.speed == 2400 ? 12 : 14; uint64_t txsdll = mem_data.speed == 1866 ? 597 : mem_data.speed == 2666 ? 939 : 768; - mca_and_or(id, mca_i, MBASTR0Q, ~(PPC_BITMASK(12, 37) | PPC_BITMASK(46, 56)), + mca_and_or(chip, id, mca_i, MBASTR0Q, ~(PPC_BITMASK(12, 37) | PPC_BITMASK(46, 56)), PPC_PLACE(5, MBASTR0Q_CFG_TCKESR, MBASTR0Q_CFG_TCKESR_LEN) | PPC_PLACE(tcksr_ex, MBASTR0Q_CFG_TCKSRE, MBASTR0Q_CFG_TCKSRE_LEN) | PPC_PLACE(tcksr_ex, MBASTR0Q_CFG_TCKSRX, MBASTR0Q_CFG_TCKSRX_LEN) | @@ -524,7 +535,7 @@ static void p9n_mca_scom(int mcs_i, int mca_i) mn_freq_ratio < 1215 ? 1 : mn_freq_ratio < 1300 ? 0 : mn_freq_ratio < 1400 ? 1 : 0; - mca_and_or(id, mca_i, RECR, ~(PPC_BITMASK(16, 22) | PPC_BIT(MBSECCQ_RESERVED_40)), + mca_and_or(chip, id, mca_i, RECR, ~(PPC_BITMASK(16, 22) | PPC_BIT(MBSECCQ_RESERVED_40)), PPC_PLACE(val_to_data, MBSECCQ_VAL_TO_DATA_DELAY, MBSECCQ_VAL_TO_DATA_DELAY_LEN) | PPC_PLACE(nest_val_to_data, MBSECCQ_NEST_VAL_TO_DATA_DELAY, @@ -536,15 +547,15 @@ static void p9n_mca_scom(int mcs_i, int mca_i) [9] DBGR_ECC_WAT_ACTION_SELECT = 0 [10-11] DBGR_ECC_WAT_SOURCE = 0 */ - mca_and_or(id, mca_i, DBGR, ~PPC_BITMASK(9, 11), 0); + mca_and_or(chip, id, mca_i, DBGR, ~PPC_BITMASK(9, 11), 0); /* MC01.PORT0.WRITE.WRTCFG = [9] = 1 // MCP_PORT0_WRITE_NEW_WRITE_64B_MODE this is marked as RO const 0 for bits 8-63 in docs! */ - mca_and_or(id, mca_i, WRTCFG, ~0ull, PPC_BIT(9)); + mca_and_or(chip, id, mca_i, WRTCFG, ~0ull, PPC_BIT(9)); } -static void thermal_throttle_scominit(int mcs_i, int mca_i) +static void thermal_throttle_scominit(uint8_t chip, int mcs_i, int mca_i) { chiplet_id_t id = mcs_ids[mcs_i]; /* Set power control register */ @@ -555,7 +566,7 @@ static void thermal_throttle_scominit(int mcs_i, int mca_i) else: 1 [23-32] MBARPC0Q_CFG_MIN_DOMAIN_REDUCTION_TIME = 959 */ - mca_and_or(id, mca_i, MBARPC0Q, ~(PPC_BITMASK(3, 5) | PPC_BITMASK(22, 32)), + mca_and_or(chip, id, mca_i, MBARPC0Q, ~(PPC_BITMASK(3, 5) | PPC_BITMASK(22, 32)), PPC_PLACE(959, MBARPC0Q_CFG_MIN_DOMAIN_REDUCTION_TIME, MBARPC0Q_CFG_MIN_DOMAIN_REDUCTION_TIME_LEN )); @@ -568,7 +579,7 @@ static void thermal_throttle_scominit(int mcs_i, int mca_i) ATTR_MSS_MRW_POWER_CONTROL_REQUESTED == PD_AND_STR_OFF: 0 // default [2-11] MBASTR0Q_CFG_ENTER_STR_TIME = 1023 */ - mca_and_or(id, mca_i, MBASTR0Q, ~(PPC_BIT(0) | PPC_BITMASK(2, 11)), + mca_and_or(chip, id, mca_i, MBASTR0Q, ~(PPC_BIT(0) | PPC_BITMASK(2, 11)), PPC_PLACE(1023, MBASTR0Q_CFG_ENTER_STR_TIME, MBASTR0Q_CFG_ENTER_STR_TIME_LEN)); @@ -591,7 +602,7 @@ static void thermal_throttle_scominit(int mcs_i, int mca_i) uint64_t nm_n_per_slot = 0x80; uint64_t nm_n_per_port = 0x80; uint64_t m_dram_clocks = 0x200; - mca_and_or(id, mca_i, MBA_FARB3Q, ~(PPC_BITMASK(0, 50) | PPC_BIT(53)), + mca_and_or(chip, id, mca_i, MBA_FARB3Q, ~(PPC_BITMASK(0, 50) | PPC_BIT(53)), PPC_PLACE(nm_n_per_slot, MBA_FARB3Q_CFG_NM_N_PER_SLOT, MBA_FARB3Q_CFG_NM_N_PER_SLOT_LEN) | PPC_PLACE(nm_n_per_port, MBA_FARB3Q_CFG_NM_N_PER_PORT, @@ -607,7 +618,7 @@ static void thermal_throttle_scominit(int mcs_i, int mca_i) [42-55] MBA_FARB4Q_EMERGENCY_M = ATTR_MSS_MRW_MEM_M_DRAM_CLOCKS */ uint64_t nm_throttled_n_per_port = 0x20; - mca_and_or(id, mca_i, MBA_FARB4Q, ~PPC_BITMASK(27, 55), + mca_and_or(chip, id, mca_i, MBA_FARB4Q, ~PPC_BITMASK(27, 55), PPC_PLACE(nm_throttled_n_per_port, MBA_FARB4Q_EMERGENCY_N, MBA_FARB4Q_EMERGENCY_N_LEN) | PPC_PLACE(m_dram_clocks, MBA_FARB4Q_EMERGENCY_M, @@ -620,7 +631,7 @@ static void thermal_throttle_scominit(int mcs_i, int mca_i) * for functional MCAs, maybe this can be called just for magic, non-functional * ones to save time, but for now do it in a way the Hostboot does it. */ -static void p9n_ddrphy_scom(int mcs_i, int mca_i) +static void p9n_ddrphy_scom(uint8_t chip, int mcs_i, int mca_i) { chiplet_id_t id = mcs_ids[mcs_i]; int dp; @@ -643,7 +654,7 @@ static void p9n_ddrphy_scom(int mcs_i, int mca_i) [63] DLL_CAL_CKTS_ACTIVE = After VREG calibration, some analog circuits are powered down */ /* Same as default value after reset? */ - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_DLL_VREG_CONTROL0_P0_0, + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_DLL_VREG_CONTROL0_P0_0, ~(PPC_BITMASK(48, 50) | PPC_BITMASK(52, 59) | PPC_BITMASK(62, 63)), PPC_PLACE(3, RXREG_VREG_COMPCON_DC, RXREG_VREG_COMPCON_DC_LEN) | PPC_PLACE(7, RXREG_VREG_DRVCON_DC, RXREG_VREG_DRVCON_DC_LEN) | @@ -659,7 +670,7 @@ static void p9n_ddrphy_scom(int mcs_i, int mca_i) [63] DLL_CAL_CKTS_ACTIVE = After VREG calibration, some analog circuits are powered down */ /* Same as default value after reset? */ - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_DLL_VREG_CONTROL1_P0_0, + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_DLL_VREG_CONTROL1_P0_0, ~(PPC_BITMASK(48, 50) | PPC_BITMASK(52, 59) | PPC_BITMASK(62, 63)), PPC_PLACE(3, RXREG_VREG_COMPCON_DC, RXREG_VREG_COMPCON_DC_LEN) | PPC_PLACE(7, RXREG_VREG_DRVCON_DC, RXREG_VREG_DRVCON_DC_LEN) | @@ -670,7 +681,7 @@ static void p9n_ddrphy_scom(int mcs_i, int mca_i) // set this field to 60h [49-55] TSYS_WRCLK = 0x60 */ - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_WRCLK_PR_P0_0, + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_WRCLK_PR_P0_0, ~PPC_BITMASK(49, 55), PPC_PLACE(0x60, TSYS_WRCLK, TSYS_WRCLK_LEN)); @@ -679,7 +690,7 @@ static void p9n_ddrphy_scom(int mcs_i, int mca_i) [52] DD2_RESET_READ_FIX_DISABLE = 0 // Enable the DD2 function to remove the register reset on read feature // on status registers */ - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_IO_TX_CONFIG0_P0_0, + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_IO_TX_CONFIG0_P0_0, ~PPC_BITMASK(48, 52), PPC_PLACE(strength, DDRPHY_DP16_IO_TX_CONFIG0_STRENGTH, DDRPHY_DP16_IO_TX_CONFIG0_STRENGTH_LEN)); @@ -691,7 +702,7 @@ static void p9n_ddrphy_scom(int mcs_i, int mca_i) [61] S0INSDLYTAP = 1 // For proper functional operation, this bit must be 0b [62] S1INSDLYTAP = 1 // For proper functional operation, this bit must be 0b */ - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_DLL_CONFIG1_P0_0, + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_DLL_CONFIG1_P0_0, ~(PPC_BITMASK(48, 63)), PPC_BIT(S0INSDLYTAP) | PPC_BIT(S1INSDLYTAP)); @@ -700,7 +711,7 @@ static void p9n_ddrphy_scom(int mcs_i, int mca_i) [49-55] EN_SLICE_N_WR = 0x7f [57-63] EN_SLICE_P_WR = 0x7f */ - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_IO_TX_FET_SLICE_P0_0, + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_IO_TX_FET_SLICE_P0_0, ~PPC_BITMASK(48, 63), PPC_PLACE(0x7F, EN_SLICE_N_WR, EN_SLICE_N_WR_LEN) | PPC_PLACE(0x7F, EN_SLICE_P_WR, EN_SLICE_P_WR_LEN)); @@ -710,7 +721,7 @@ static void p9n_ddrphy_scom(int mcs_i, int mca_i) /* IOM0.DDRPHY_ADR_BIT_ENABLE_P0_ADR{0,1,2,3} = [48-63] = 0xffff */ - dp_mca_and_or(id, dp, mca_i, DDRPHY_ADR_BIT_ENABLE_P0_ADR0, + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_ADR_BIT_ENABLE_P0_ADR0, ~PPC_BITMASK(48, 63), PPC_PLACE(0xFFFF, 48, 16)); } @@ -719,7 +730,7 @@ static void p9n_ddrphy_scom(int mcs_i, int mca_i) [49] DI_ADR2_ADR3: 1 = Lanes 2 and 3 are a differential clock pair [51] DI_ADR6_ADR7: 1 = Lanes 6 and 7 are a differential clock pair */ - mca_and_or(id, mca_i, DDRPHY_ADR_DIFFPAIR_ENABLE_P0_ADR1, + mca_and_or(chip, id, mca_i, DDRPHY_ADR_DIFFPAIR_ENABLE_P0_ADR1, ~PPC_BITMASK(48, 63), PPC_PLACE(0x5000, 48, 16)); /* IOM0.DDRPHY_ADR_DELAY1_P0_ADR1 = @@ -727,7 +738,7 @@ static void p9n_ddrphy_scom(int mcs_i, int mca_i) [49-55] ADR_DELAY2 = 0x40 [57-63] ADR_DELAY3 = 0x40 */ - mca_and_or(id, mca_i, DDRPHY_ADR_DELAY1_P0_ADR1, + mca_and_or(chip, id, mca_i, DDRPHY_ADR_DELAY1_P0_ADR1, ~PPC_BITMASK(48, 63), PPC_PLACE(0x4040, 48, 16)); /* IOM0.DDRPHY_ADR_DELAY3_P0_ADR1 = @@ -735,7 +746,7 @@ static void p9n_ddrphy_scom(int mcs_i, int mca_i) [49-55] ADR_DELAY6 = 0x40 [57-63] ADR_DELAY7 = 0x40 */ - mca_and_or(id, mca_i, DDRPHY_ADR_DELAY3_P0_ADR1, + mca_and_or(chip, id, mca_i, DDRPHY_ADR_DELAY3_P0_ADR1, ~PPC_BITMASK(48, 63), PPC_PLACE(0x4040, 48, 16)); for (dp = 0; dp < 2; dp ++) { @@ -744,7 +755,7 @@ static void p9n_ddrphy_scom(int mcs_i, int mca_i) [48-51] HS_DLLMUX_SEL_0_3 = 0 [59-62] STRENGTH = 4 // 2400 MT/s */ - dp_mca_and_or(id, dp, mca_i, DDRPHY_ADR_DLL_VREG_CONFIG_1_P0_ADR32S0, + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_ADR_DLL_VREG_CONFIG_1_P0_ADR32S0, ~PPC_BITMASK(48, 63), PPC_PLACE(strength, DDRPHY_ADR_DLL_VREG_CONFIG_1_STRENGTH, DDRPHY_ADR_DLL_VREG_CONFIG_1_STRENGTH_LEN)); @@ -755,7 +766,7 @@ static void p9n_ddrphy_scom(int mcs_i, int mca_i) // SysClk tree and the WrClk tree are equal, set this field to 60h [49-55] TSYS_WRCLK = 0x60 */ - dp_mca_and_or(id, dp, mca_i, + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_ADR_MCCLK_WRCLK_PR_STATIC_OFFSET_P0_ADR32S0, ~PPC_BITMASK(48, 63), PPC_PLACE(0x60, TSYS_WRCLK, TSYS_WRCLK_LEN)); @@ -767,7 +778,7 @@ static void p9n_ddrphy_scom(int mcs_i, int mca_i) [56-58] RXREG_VREG_REF_SEL_DC = 0x2 [63] DLL_CAL_CKTS_ACTIVE = 0 // After VREG calibration, some analog circuits are powered down */ - dp_mca_and_or(id, dp, mca_i, DDRPHY_ADR_DLL_VREG_CONTROL_P0_ADR32S0, + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_ADR_DLL_VREG_CONTROL_P0_ADR32S0, ~PPC_BITMASK(48, 63), PPC_PLACE(3, RXREG_VREG_COMPCON_DC, RXREG_VREG_COMPCON_DC_LEN) | PPC_PLACE(7, RXREG_VREG_DRVCON_DC, RXREG_VREG_DRVCON_DC_LEN) | @@ -788,99 +799,100 @@ static void p9n_ddrphy_scom(int mcs_i, int mca_i) [62] DDR4_VLEVEL_BANK_GROUP = 1 [63] VPROTH_PSEL_MODE = 0 */ - mca_and_or(id, mca_i, DDRPHY_PC_CONFIG0_P0, ~PPC_BITMASK(48, 63), + mca_and_or(chip, id, mca_i, DDRPHY_PC_CONFIG0_P0, ~PPC_BITMASK(48, 63), PPC_BIT(DDR4_CMD_SIG_REDUCTION) | PPC_BIT(DDR4_VLEVEL_BANK_GROUP)); } -static void p9n_mcbist_scom(int mcs_i) +static void p9n_mcbist_scom(uint8_t chip, int mcs_i) { chiplet_id_t id = mcs_ids[mcs_i]; /* MC01.MCBIST.MBA_SCOMFIR.WATCFG0AQ = [0-47] WATCFG0AQ_CFG_WAT_EVENT_SEL = 0x400000000000 */ - scom_and_or_for_chiplet(id, WATCFG0AQ, ~PPC_BITMASK(0, 47), - PPC_PLACE(0x400000000000, WATCFG0AQ_CFG_WAT_EVENT_SEL, - WATCFG0AQ_CFG_WAT_EVENT_SEL_LEN)); + rscom_and_or_for_chiplet(chip, id, WATCFG0AQ, ~PPC_BITMASK(0, 47), + PPC_PLACE(0x400000000000, WATCFG0AQ_CFG_WAT_EVENT_SEL, + WATCFG0AQ_CFG_WAT_EVENT_SEL_LEN)); /* MC01.MCBIST.MBA_SCOMFIR.WATCFG0BQ = [0-43] WATCFG0BQ_CFG_WAT_MSKA = 0x3fbfff [44-60] WATCFG0BQ_CFG_WAT_CNTL = 0x10000 */ - scom_and_or_for_chiplet(id, WATCFG0BQ, ~PPC_BITMASK(0, 60), - PPC_PLACE(0x3fbfff, WATCFG0BQ_CFG_WAT_MSKA, - WATCFG0BQ_CFG_WAT_MSKA_LEN) | - PPC_PLACE(0x10000, WATCFG0BQ_CFG_WAT_CNTL, - WATCFG0BQ_CFG_WAT_CNTL_LEN)); + rscom_and_or_for_chiplet(chip, id, WATCFG0BQ, ~PPC_BITMASK(0, 60), + PPC_PLACE(0x3fbfff, WATCFG0BQ_CFG_WAT_MSKA, + WATCFG0BQ_CFG_WAT_MSKA_LEN) | + PPC_PLACE(0x10000, WATCFG0BQ_CFG_WAT_CNTL, + WATCFG0BQ_CFG_WAT_CNTL_LEN)); /* MC01.MCBIST.MBA_SCOMFIR.WATCFG0DQ = [0-43] WATCFG0DQ_CFG_WAT_PATA = 0x80200004000 */ - scom_and_or_for_chiplet(id, WATCFG0DQ, ~PPC_BITMASK(0, 43), - PPC_PLACE(0x80200004000, WATCFG0DQ_CFG_WAT_PATA, - WATCFG0DQ_CFG_WAT_PATA_LEN)); + rscom_and_or_for_chiplet(chip, id, WATCFG0DQ, ~PPC_BITMASK(0, 43), + PPC_PLACE(0x80200004000, WATCFG0DQ_CFG_WAT_PATA, + WATCFG0DQ_CFG_WAT_PATA_LEN)); /* MC01.MCBIST.MBA_SCOMFIR.WATCFG3AQ = [0-47] WATCFG3AQ_CFG_WAT_EVENT_SEL = 0x800000000000 */ - scom_and_or_for_chiplet(id, WATCFG3AQ, ~PPC_BITMASK(0, 47), - PPC_PLACE(0x800000000000, WATCFG3AQ_CFG_WAT_EVENT_SEL, - WATCFG3AQ_CFG_WAT_EVENT_SEL_LEN)); + rscom_and_or_for_chiplet(chip, id, WATCFG3AQ, ~PPC_BITMASK(0, 47), + PPC_PLACE(0x800000000000, WATCFG3AQ_CFG_WAT_EVENT_SEL, + WATCFG3AQ_CFG_WAT_EVENT_SEL_LEN)); /* MC01.MCBIST.MBA_SCOMFIR.WATCFG3BQ = [0-43] WATCFG3BQ_CFG_WAT_MSKA = 0xfffffffffff [44-60] WATCFG3BQ_CFG_WAT_CNTL = 0x10400 */ - scom_and_or_for_chiplet(id, WATCFG3BQ, ~PPC_BITMASK(0, 60), - PPC_PLACE(0xfffffffffff, WATCFG3BQ_CFG_WAT_MSKA, - WATCFG3BQ_CFG_WAT_MSKA_LEN) | - PPC_PLACE(0x10400, WATCFG3BQ_CFG_WAT_CNTL, - WATCFG3BQ_CFG_WAT_CNTL_LEN)); + rscom_and_or_for_chiplet(chip, id, WATCFG3BQ, ~PPC_BITMASK(0, 60), + PPC_PLACE(0xfffffffffff, WATCFG3BQ_CFG_WAT_MSKA, + WATCFG3BQ_CFG_WAT_MSKA_LEN) | + PPC_PLACE(0x10400, WATCFG3BQ_CFG_WAT_CNTL, + WATCFG3BQ_CFG_WAT_CNTL_LEN)); /* MC01.MCBIST.MBA_SCOMFIR.MCBCFGQ = [36] MCBCFGQ_CFG_LOG_COUNTS_IN_TRACE = 0 */ - scom_and_for_chiplet(id, MCBCFGQ, ~PPC_BIT(MCBCFGQ_CFG_LOG_COUNTS_IN_TRACE)); + rscom_and_for_chiplet(chip, id, MCBCFGQ, ~PPC_BIT(MCBCFGQ_CFG_LOG_COUNTS_IN_TRACE)); /* MC01.MCBIST.MBA_SCOMFIR.DBGCFG0Q = [0] DBGCFG0Q_CFG_DBG_ENABLE = 1 [23-33] DBGCFG0Q_CFG_DBG_PICK_MCBIST01 = 0x780 */ - scom_and_or_for_chiplet(id, DBGCFG0Q, ~PPC_BITMASK(23, 33), - PPC_BIT(DBGCFG0Q_CFG_DBG_ENABLE) | - PPC_PLACE(0x780, DBGCFG0Q_CFG_DBG_PICK_MCBIST01, - DBGCFG0Q_CFG_DBG_PICK_MCBIST01_LEN)); + rscom_and_or_for_chiplet(chip, id, DBGCFG0Q, ~PPC_BITMASK(23, 33), + PPC_BIT(DBGCFG0Q_CFG_DBG_ENABLE) | + PPC_PLACE(0x780, DBGCFG0Q_CFG_DBG_PICK_MCBIST01, + DBGCFG0Q_CFG_DBG_PICK_MCBIST01_LEN)); /* MC01.MCBIST.MBA_SCOMFIR.DBGCFG1Q = [0] DBGCFG1Q_CFG_WAT_ENABLE = 1 */ - scom_or_for_chiplet(id, DBGCFG1Q, PPC_BIT(DBGCFG1Q_CFG_WAT_ENABLE)); + rscom_or_for_chiplet(chip, id, DBGCFG1Q, PPC_BIT(DBGCFG1Q_CFG_WAT_ENABLE)); /* MC01.MCBIST.MBA_SCOMFIR.DBGCFG2Q = [0-19] DBGCFG2Q_CFG_WAT_LOC_EVENT0_SEL = 0x10000 [20-39] DBGCFG2Q_CFG_WAT_LOC_EVENT1_SEL = 0x08000 */ - scom_and_or_for_chiplet(id, DBGCFG2Q, ~PPC_BITMASK(0, 39), - PPC_PLACE(0x10000, DBGCFG2Q_CFG_WAT_LOC_EVENT0_SEL, - DBGCFG2Q_CFG_WAT_LOC_EVENT0_SEL_LEN) | - PPC_PLACE(0x08000, DBGCFG2Q_CFG_WAT_LOC_EVENT1_SEL, - DBGCFG2Q_CFG_WAT_LOC_EVENT1_SEL_LEN)); + rscom_and_or_for_chiplet(chip, id, DBGCFG2Q, ~PPC_BITMASK(0, 39), + PPC_PLACE(0x10000, DBGCFG2Q_CFG_WAT_LOC_EVENT0_SEL, + DBGCFG2Q_CFG_WAT_LOC_EVENT0_SEL_LEN) | + PPC_PLACE(0x08000, DBGCFG2Q_CFG_WAT_LOC_EVENT1_SEL, + DBGCFG2Q_CFG_WAT_LOC_EVENT1_SEL_LEN)); /* MC01.MCBIST.MBA_SCOMFIR.DBGCFG3Q = [20-22] DBGCFG3Q_CFG_WAT_GLOB_EVENT0_SEL = 0x4 [23-25] DBGCFG3Q_CFG_WAT_GLOB_EVENT1_SEL = 0x4 [37-40] DBGCFG3Q_CFG_WAT_ACT_SET_SPATTN_PULSE = 0x4 */ - scom_and_or_for_chiplet(id, DBGCFG3Q, ~(PPC_BITMASK(20, 25) | PPC_BITMASK(37, 40)), - PPC_PLACE(0x4, DBGCFG3Q_CFG_WAT_GLOB_EVENT0_SEL, - DBGCFG3Q_CFG_WAT_GLOB_EVENT0_SEL_LEN) | - PPC_PLACE(0x4, DBGCFG3Q_CFG_WAT_GLOB_EVENT1_SEL, - DBGCFG3Q_CFG_WAT_GLOB_EVENT1_SEL_LEN) | - PPC_PLACE(0x4, DBGCFG3Q_CFG_WAT_ACT_SET_SPATTN_PULSE, - DBGCFG3Q_CFG_WAT_ACT_SET_SPATTN_PULSE_LEN)); + rscom_and_or_for_chiplet(chip, id, DBGCFG3Q, + ~(PPC_BITMASK(20, 25) | PPC_BITMASK(37, 40)), + PPC_PLACE(0x4, DBGCFG3Q_CFG_WAT_GLOB_EVENT0_SEL, + DBGCFG3Q_CFG_WAT_GLOB_EVENT0_SEL_LEN) | + PPC_PLACE(0x4, DBGCFG3Q_CFG_WAT_GLOB_EVENT1_SEL, + DBGCFG3Q_CFG_WAT_GLOB_EVENT1_SEL_LEN) | + PPC_PLACE(0x4, DBGCFG3Q_CFG_WAT_ACT_SET_SPATTN_PULSE, + DBGCFG3Q_CFG_WAT_ACT_SET_SPATTN_PULSE_LEN)); } -static void set_rank_pairs(int mcs_i, int mca_i) +static void set_rank_pairs(uint8_t chip, int mcs_i, int mca_i) { chiplet_id_t id = mcs_ids[mcs_i]; mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; @@ -913,7 +925,7 @@ static void set_rank_pairs(int mcs_i, int mca_i) [60-62] RANK_PAIR1_SEC = 3 [63] RANK_PAIR1_SEC_V = 1: if (rank_count0 == 4) */ - mca_and_or(id, mca_i, DDRPHY_PC_RANK_PAIR0_P0, ~PPC_BITMASK(48, 63), + mca_and_or(chip, id, mca_i, DDRPHY_PC_RANK_PAIR0_P0, ~PPC_BITMASK(48, 63), PPC_PLACE(0x1537 & F[mca->dimm[0].mranks], 48, 16)); /* IOM0.DDRPHY_PC_RANK_PAIR1_P0 = @@ -927,18 +939,18 @@ static void set_rank_pairs(int mcs_i, int mca_i) [60-62] RANK_PAIR3_SEC = 3 [63] RANK_PAIR3_SEC_V = 1: if (rank_count1 == 4) */ - mca_and_or(id, mca_i, DDRPHY_PC_RANK_PAIR1_P0, ~PPC_BITMASK(48, 63), + mca_and_or(chip, id, mca_i, DDRPHY_PC_RANK_PAIR1_P0, ~PPC_BITMASK(48, 63), PPC_PLACE(0x1537 & F[mca->dimm[1].mranks], 48, 16)); /* IOM0.DDRPHY_PC_RANK_PAIR2_P0 = [48-63] = 0 */ - mca_and_or(id, mca_i, DDRPHY_PC_RANK_PAIR2_P0, ~PPC_BITMASK(48, 63), 0); + mca_and_or(chip, id, mca_i, DDRPHY_PC_RANK_PAIR2_P0, ~PPC_BITMASK(48, 63), 0); /* IOM0.DDRPHY_PC_RANK_PAIR3_P0 = [48-63] = 0 */ - mca_and_or(id, mca_i, DDRPHY_PC_RANK_PAIR3_P0, ~PPC_BITMASK(48, 63), 0); + mca_and_or(chip, id, mca_i, DDRPHY_PC_RANK_PAIR3_P0, ~PPC_BITMASK(48, 63), 0); /* IOM0.DDRPHY_PC_CSID_CFG_P0 = [0-63] 0xf000: @@ -947,7 +959,7 @@ static void set_rank_pairs(int mcs_i, int mca_i) [50] CS2_INIT_CAL_VALUE = 1 [51] CS3_INIT_CAL_VALUE = 1 */ - mca_and_or(id, mca_i, DDRPHY_PC_CSID_CFG_P0, ~PPC_BITMASK(48, 63), + mca_and_or(chip, id, mca_i, DDRPHY_PC_CSID_CFG_P0, ~PPC_BITMASK(48, 63), PPC_PLACE(0xF000, 48, 16)); /* IOM0.DDRPHY_PC_MIRROR_CONFIG_P0 = @@ -984,7 +996,7 @@ static void set_rank_pairs(int mcs_i, int mca_i) */ uint64_t mirr = mca->dimm[0].present ? mca->dimm[0].spd[136] : mca->dimm[1].spd[136]; - mca_and_or(id, mca_i, DDRPHY_PC_MIRROR_CONFIG_P0, ~PPC_BITMASK(48, 63), + mca_and_or(chip, id, mca_i, DDRPHY_PC_MIRROR_CONFIG_P0, ~PPC_BITMASK(48, 63), PPC_PLACE(mirr, ADDR_MIRROR_RP1_PRI, 1) | PPC_PLACE(mirr, ADDR_MIRROR_RP1_SEC, 1) | PPC_PLACE(mirr, ADDR_MIRROR_RP3_PRI, 1) | @@ -1001,7 +1013,7 @@ static void set_rank_pairs(int mcs_i, int mca_i) /* These are not valid anyway, so don't bother setting anything. */ } -static void reset_data_bit_enable(int mcs_i, int mca_i) +static void reset_data_bit_enable(uint8_t chip, int mcs_i, int mca_i) { chiplet_id_t id = mcs_ids[mcs_i]; int dp; @@ -1011,14 +1023,14 @@ static void reset_data_bit_enable(int mcs_i, int mca_i) [all] = 0 [48-63] DATA_BIT_ENABLE_0_15 = 0xffff */ - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_DQ_BIT_ENABLE0_P0_0, 0, 0xFFFF); + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_DQ_BIT_ENABLE0_P0_0, 0, 0xFFFF); } /* IOM0.DDRPHY_DP16_DQ_BIT_ENABLE0_P0_4 = [all] = 0 [48-63] DATA_BIT_ENABLE_0_15 = 0xff00 */ - dp_mca_and_or(id, 4, mca_i, DDRPHY_DP16_DQ_BIT_ENABLE0_P0_0, 0, 0xFF00); + dp_mca_and_or(chip, id, 4, mca_i, DDRPHY_DP16_DQ_BIT_ENABLE0_P0_0, 0, 0xFF00); /* IOM0.DDRPHY_DP16_DFT_PDA_CONTROL_P0_{0,1,2,3,4} = // This reg is named MCA_DDRPHY_DP16_DATA_BIT_ENABLE1_P0_n in the code. @@ -1026,7 +1038,7 @@ static void reset_data_bit_enable(int mcs_i, int mca_i) [all] = 0 */ for (dp = 0; dp < 5; dp++) { - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_DFT_PDA_CONTROL_P0_0, 0, 0); + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_DFT_PDA_CONTROL_P0_0, 0, 0); } } @@ -1054,7 +1066,7 @@ static const uint16_t x8_clk[8][5] = { // {0x0CC0, 0xC0C0, 0x0F00, 0xC300, 0xC000}, /* Port 7 */ }; -static void reset_clock_enable(int mcs_i, int mca_i) +static void reset_clock_enable(uint8_t chip, int mcs_i, int mca_i) { chiplet_id_t id = mcs_ids[mcs_i]; mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; @@ -1078,36 +1090,36 @@ static void reset_clock_enable(int mcs_i, int mca_i) for (dp = 0; dp < 5; dp++) { /* Note that these correspond to valid rank pairs */ if (mranks[0] > 0) { - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_WRCLK_EN_RP0_P0_0, - 0, clk[dp]); - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_READ_CLOCK_RANK_PAIR0_P0_0, - 0, clk[dp]); + dp_mca_and_or(chip, id, dp, mca_i, + DDRPHY_DP16_WRCLK_EN_RP0_P0_0, 0, clk[dp]); + dp_mca_and_or(chip, id, dp, mca_i, + DDRPHY_DP16_READ_CLOCK_RANK_PAIR0_P0_0, 0, clk[dp]); } if (mranks[0] > 1) { - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_WRCLK_EN_RP1_P0_0, - 0, clk[dp]); - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_READ_CLOCK_RANK_PAIR1_P0_0, - 0, clk[dp]); + dp_mca_and_or(chip, id, dp, mca_i, + DDRPHY_DP16_WRCLK_EN_RP1_P0_0, 0, clk[dp]); + dp_mca_and_or(chip, id, dp, mca_i, + DDRPHY_DP16_READ_CLOCK_RANK_PAIR1_P0_0, 0, clk[dp]); } if (mranks[1] > 0) { - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_WRCLK_EN_RP2_P0_0, - 0, clk[dp]); - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_READ_CLOCK_RANK_PAIR2_P0_0, - 0, clk[dp]); + dp_mca_and_or(chip, id, dp, mca_i, + DDRPHY_DP16_WRCLK_EN_RP2_P0_0, 0, clk[dp]); + dp_mca_and_or(chip, id, dp, mca_i, + DDRPHY_DP16_READ_CLOCK_RANK_PAIR2_P0_0, 0, clk[dp]); } if (mranks[1] > 1) { - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_WRCLK_EN_RP3_P0_0, - 0, clk[dp]); - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_READ_CLOCK_RANK_PAIR3_P0_0, - 0, clk[dp]); + dp_mca_and_or(chip, id, dp, mca_i, + DDRPHY_DP16_WRCLK_EN_RP3_P0_0, 0, clk[dp]); + dp_mca_and_or(chip, id, dp, mca_i, + DDRPHY_DP16_READ_CLOCK_RANK_PAIR3_P0_0, 0, clk[dp]); } } } -static void reset_rd_vref(int mcs_i, int mca_i) +static void reset_rd_vref(uint8_t chip, int mcs_i, int mca_i) { chiplet_id_t id = mcs_ids[mcs_i]; mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; @@ -1129,44 +1141,44 @@ static void reset_rd_vref(int mcs_i, int mca_i) for (dp = 0; dp < 5; dp++) { /* SCOM addresses are not regular for DAC, so no inner loop. */ - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_RD_VREF_DAC_0_P0_0, + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_RD_VREF_DAC_0_P0_0, ~(PPC_BITMASK(49, 55) | PPC_BITMASK(57, 63)), PPC_PLACE(vref_bf, BIT0_VREF_DAC, BIT0_VREF_DAC_LEN) | PPC_PLACE(vref_bf, BIT1_VREF_DAC, BIT1_VREF_DAC_LEN)); - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_RD_VREF_DAC_1_P0_0, + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_RD_VREF_DAC_1_P0_0, ~(PPC_BITMASK(49, 55) | PPC_BITMASK(57, 63)), PPC_PLACE(vref_bf, BIT0_VREF_DAC, BIT0_VREF_DAC_LEN) | PPC_PLACE(vref_bf, BIT1_VREF_DAC, BIT1_VREF_DAC_LEN)); - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_RD_VREF_DAC_2_P0_0, + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_RD_VREF_DAC_2_P0_0, ~(PPC_BITMASK(49, 55) | PPC_BITMASK(57, 63)), PPC_PLACE(vref_bf, BIT0_VREF_DAC, BIT0_VREF_DAC_LEN) | PPC_PLACE(vref_bf, BIT1_VREF_DAC, BIT1_VREF_DAC_LEN)); - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_RD_VREF_DAC_3_P0_0, + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_RD_VREF_DAC_3_P0_0, ~(PPC_BITMASK(49, 55) | PPC_BITMASK(57, 63)), PPC_PLACE(vref_bf, BIT0_VREF_DAC, BIT0_VREF_DAC_LEN) | PPC_PLACE(vref_bf, BIT1_VREF_DAC, BIT1_VREF_DAC_LEN)); if (dp == 4) break; - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_RD_VREF_DAC_4_P0_0, + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_RD_VREF_DAC_4_P0_0, ~(PPC_BITMASK(49, 55) | PPC_BITMASK(57, 63)), PPC_PLACE(vref_bf, BIT0_VREF_DAC, BIT0_VREF_DAC_LEN) | PPC_PLACE(vref_bf, BIT1_VREF_DAC, BIT1_VREF_DAC_LEN)); - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_RD_VREF_DAC_5_P0_0, + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_RD_VREF_DAC_5_P0_0, ~(PPC_BITMASK(49, 55) | PPC_BITMASK(57, 63)), PPC_PLACE(vref_bf, BIT0_VREF_DAC, BIT0_VREF_DAC_LEN) | PPC_PLACE(vref_bf, BIT1_VREF_DAC, BIT1_VREF_DAC_LEN)); - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_RD_VREF_DAC_6_P0_0, + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_RD_VREF_DAC_6_P0_0, ~(PPC_BITMASK(49, 55) | PPC_BITMASK(57, 63)), PPC_PLACE(vref_bf, BIT0_VREF_DAC, BIT0_VREF_DAC_LEN) | PPC_PLACE(vref_bf, BIT1_VREF_DAC, BIT1_VREF_DAC_LEN)); - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_RD_VREF_DAC_7_P0_0, + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_RD_VREF_DAC_7_P0_0, ~(PPC_BITMASK(49, 55) | PPC_BITMASK(57, 63)), PPC_PLACE(vref_bf, BIT0_VREF_DAC, BIT0_VREF_DAC_LEN) | PPC_PLACE(vref_bf, BIT1_VREF_DAC, BIT1_VREF_DAC_LEN)); @@ -1177,12 +1189,12 @@ static void reset_rd_vref(int mcs_i, int mca_i) */ for (dp = 0; dp < 5; dp++) { /* Is it safe to set this before VREF_DAC? If yes, may use one loop for both */ - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_RD_VREF_CAL_EN_P0_0, + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_RD_VREF_CAL_EN_P0_0, 0, PPC_BITMASK(48, 63)); } } -static void pc_reset(int mcs_i, int mca_i) +static void pc_reset(uint8_t chip, int mcs_i, int mca_i) { chiplet_id_t id = mcs_ids[mcs_i]; /* These are from VPD */ @@ -1213,7 +1225,7 @@ static void pc_reset(int mcs_i, int mca_i) * mem_data.speed == 2133 ? 5 : * mem_data.speed == 2400 ? 6 : 7; */ - mca_and_or(id, mca_i, DDRPHY_PC_CONFIG1_P0, + mca_and_or(chip, id, mca_i, DDRPHY_PC_CONFIG1_P0, ~(PPC_BITMASK(48, 55) | PPC_BITMASK(59, 62)), PPC_PLACE(/* ATTR_MSS_EFF_DPHY_WLO */ 3, WRITE_LATENCY_OFFSET, WRITE_LATENCY_OFFSET_LEN) | @@ -1225,15 +1237,15 @@ static void pc_reset(int mcs_i, int mca_i) /* IOM0.DDRPHY_PC_ERROR_STATUS0_P0 = [all] 0 */ - mca_and_or(id, mca_i, DDRPHY_PC_ERROR_STATUS0_P0, 0, 0); + mca_and_or(chip, id, mca_i, DDRPHY_PC_ERROR_STATUS0_P0, 0, 0); /* IOM0.DDRPHY_PC_INIT_CAL_ERROR_P0 = [all] 0 */ - mca_and_or(id, mca_i, DDRPHY_PC_INIT_CAL_ERROR_P0, 0, 0); + mca_and_or(chip, id, mca_i, DDRPHY_PC_INIT_CAL_ERROR_P0, 0, 0); } -static void wc_reset(int mcs_i, int mca_i) +static void wc_reset(uint8_t chip, int mcs_i, int mca_i) { chiplet_id_t id = mcs_ids[mcs_i]; mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; @@ -1277,7 +1289,7 @@ static void wc_reset(int mcs_i, int mca_i) * JEDEC way of doing it so it _should_ work. */ uint64_t tWLO_tWLOE = 12 + MAX((tWLDQSEN + tMOD), (tWLO + tWLOE)) + 1 + 1; - mca_and_or(id, mca_i, DDRPHY_WC_CONFIG0_P0, 0, + mca_and_or(chip, id, mca_i, DDRPHY_WC_CONFIG0_P0, 0, PPC_PLACE(tWLO_tWLOE, TWLO_TWLOE, TWLO_TWLOE_LEN) | PPC_BIT(WL_ONE_DQS_PULSE) | PPC_PLACE(0x20, FW_WR_RD, FW_WR_RD_LEN) | @@ -1289,7 +1301,7 @@ static void wc_reset(int mcs_i, int mca_i) [52-54] SMALL_STEP = 0 [55-60] WR_PRE_DLY = 0x2a (42) */ - mca_and_or(id, mca_i, DDRPHY_WC_CONFIG1_P0, 0, + mca_and_or(chip, id, mca_i, DDRPHY_WC_CONFIG1_P0, 0, PPC_PLACE(7, BIG_STEP, BIG_STEP_LEN) | PPC_PLACE(0x2A, WR_PRE_DLY, WR_PRE_DLY_LEN)); @@ -1300,7 +1312,7 @@ static void wc_reset(int mcs_i, int mca_i) [58-61] IPW_WR_WR = 5 // results in 24 clock cycles */ /* There is no Additive Latency. */ - mca_and_or(id, mca_i, DDRPHY_WC_CONFIG2_P0, 0, + mca_and_or(chip, id, mca_i, DDRPHY_WC_CONFIG2_P0, 0, PPC_PLACE(5, NUM_VALID_SAMPLES, NUM_VALID_SAMPLES_LEN) | PPC_PLACE(MAX(mca->nwtr_s + 11, mem_data.nrtp + 3), FW_RD_WR, FW_RD_WR_LEN) | PPC_PLACE(5, IPW_WR_WR, IPW_WR_WR_LEN)); @@ -1309,7 +1321,7 @@ static void wc_reset(int mcs_i, int mca_i) [all] 0 [55-60] MRS_CMD_DQ_OFF = 0x3f */ - mca_and_or(id, mca_i, DDRPHY_WC_CONFIG3_P0, 0, + mca_and_or(chip, id, mca_i, DDRPHY_WC_CONFIG3_P0, 0, PPC_PLACE(0x3F, MRS_CMD_DQ_OFF, MRS_CMD_DQ_OFF_LEN)); /* IOM0.DDRPHY_WC_RTT_WR_SWAP_ENABLE_P0 @@ -1317,12 +1329,12 @@ static void wc_reset(int mcs_i, int mca_i) [49] WR_CTR_ENABLE_RTT_SWAP = 0 [50-59] WR_CTR_VREF_COUNTER_RESET_VAL = 150ns in clock cycles // JESD79-4C Table 67 */ - mca_and_or(id, mca_i, DDRPHY_WC_RTT_WR_SWAP_ENABLE_P0, ~PPC_BITMASK(48, 59), + mca_and_or(chip, id, mca_i, DDRPHY_WC_RTT_WR_SWAP_ENABLE_P0, ~PPC_BITMASK(48, 59), PPC_PLACE(ns_to_nck(150), WR_CTR_VREF_COUNTER_RESET_VAL, WR_CTR_VREF_COUNTER_RESET_VAL_LEN)); } -static void rc_reset(int mcs_i, int mca_i) +static void rc_reset(uint8_t chip, int mcs_i, int mca_i) { chiplet_id_t id = mcs_ids[mcs_i]; mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; @@ -1332,21 +1344,21 @@ static void rc_reset(int mcs_i, int mca_i) [48-51] GLOBAL_PHY_OFFSET = 0x5 // ATTR_MSS_VPD_MR_DPHY_GPO [62] PERFORM_RDCLK_ALIGN = 1 */ - mca_and_or(id, mca_i, DDRPHY_RC_CONFIG0_P0, 0, + mca_and_or(chip, id, mca_i, DDRPHY_RC_CONFIG0_P0, 0, PPC_PLACE(0x5, GLOBAL_PHY_OFFSET, GLOBAL_PHY_OFFSET_LEN) | PPC_BIT(PERFORM_RDCLK_ALIGN)); /* IOM0.DDRPHY_RC_CONFIG1_P0 [all] 0 */ - mca_and_or(id, mca_i, DDRPHY_RC_CONFIG1_P0, 0, 0); + mca_and_or(chip, id, mca_i, DDRPHY_RC_CONFIG1_P0, 0, 0); /* IOM0.DDRPHY_RC_CONFIG2_P0 [all] 0 [48-52] CONSEC_PASS = 8 [57-58] 3 // not documented, BURST_WINDOW? */ - mca_and_or(id, mca_i, DDRPHY_RC_CONFIG2_P0, 0, + mca_and_or(chip, id, mca_i, DDRPHY_RC_CONFIG2_P0, 0, PPC_PLACE(8, CONSEC_PASS, CONSEC_PASS_LEN) | PPC_PLACE(3, 57, 2)); @@ -1354,7 +1366,7 @@ static void rc_reset(int mcs_i, int mca_i) [all] 0 [51-54] COARSE_CAL_STEP_SIZE = 4 // 5/128 */ - mca_and_or(id, mca_i, DDRPHY_RC_CONFIG3_P0, 0, + mca_and_or(chip, id, mca_i, DDRPHY_RC_CONFIG3_P0, 0, PPC_PLACE(4, COARSE_CAL_STEP_SIZE, COARSE_CAL_STEP_SIZE_LEN)); /* IOM0.DDRPHY_RC_RDVREF_CONFIG0_P0 = @@ -1369,14 +1381,15 @@ static void rc_reset(int mcs_i, int mca_i) uint64_t wait_time = mem_data.speed == 1866 ? 0x0804 : mem_data.speed == 2133 ? 0x092A : mem_data.speed == 2400 ? 0x0A50 : 0x0B74; - mca_and_or(id, mca_i, DDRPHY_RC_RDVREF_CONFIG0_P0, 0, PPC_PLACE(wait_time, 48, 16)); + mca_and_or(chip, id, mca_i, DDRPHY_RC_RDVREF_CONFIG0_P0, + 0, PPC_PLACE(wait_time, 48, 16)); /* IOM0.DDRPHY_RC_RDVREF_CONFIG1_P0 = [all] 0 [48-55] CMD_PRECEDE_TIME = (AL + CL + 15) [56-59] MPR_LOCATION = 4 // "From R. King." */ - mca_and_or(id, mca_i, DDRPHY_RC_RDVREF_CONFIG1_P0, 0, + mca_and_or(chip, id, mca_i, DDRPHY_RC_RDVREF_CONFIG1_P0, 0, PPC_PLACE(mca->cl + 15, CMD_PRECEDE_TIME, CMD_PRECEDE_TIME_LEN) | PPC_PLACE(4, MPR_LOCATION, MPR_LOCATION_LEN)); } @@ -1388,7 +1401,7 @@ static inline int log2_up(uint32_t x) return 63 - lz; } -static void seq_reset(int mcs_i, int mca_i) +static void seq_reset(uint8_t chip, int mcs_i, int mca_i) { chiplet_id_t id = mcs_ids[mcs_i]; mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; @@ -1412,7 +1425,7 @@ static void seq_reset(int mcs_i, int mca_i) (mca->dimm[1].width == WIDTH_x4 && mca->dimm[1].density == DENSITY_16Gb)) par_a17_mask = 0; - mca_and_or(id, mca_i, DDRPHY_SEQ_CONFIG0_P0, 0, + mca_and_or(chip, id, mca_i, DDRPHY_SEQ_CONFIG0_P0, 0, PPC_BIT(DELAYED_PAR) | par_a17_mask); /* All log2 values in timing registers are rounded up. */ @@ -1432,7 +1445,7 @@ static void seq_reset(int mcs_i, int mca_i) * * https://github.com/open-power/hostboot/blob/master/src/import/chips/p9/procedures/hwp/memory/lib/phy/seq.C#L142 */ - mca_and_or(id, mca_i, DDRPHY_SEQ_MEM_TIMING_PARAM0_P0, 0, + mca_and_or(chip, id, mca_i, DDRPHY_SEQ_MEM_TIMING_PARAM0_P0, 0, PPC_PLACE(5, TMOD_CYCLES, TMOD_CYCLES_LEN) | PPC_PLACE(log2_up(mca->nrcd), TRCD_CYCLES, TRCD_CYCLES_LEN) | PPC_PLACE(log2_up(mca->nrp), TRP_CYCLES, TRP_CYCLES_LEN) | @@ -1445,7 +1458,7 @@ static void seq_reset(int mcs_i, int mca_i) [56-59] TWLDQSEN_CYCLES = 6 // log2(37) rounded up, JEDEC tables 169 and 170 [60-63] TWRMRD_CYCLES = 6 // log2(40) rounded up, JEDEC tables 169 and 170 */ - mca_and_or(id, mca_i, DDRPHY_SEQ_MEM_TIMING_PARAM1_P0, 0, + mca_and_or(chip, id, mca_i, DDRPHY_SEQ_MEM_TIMING_PARAM1_P0, 0, PPC_PLACE(10, TZQINIT_CYCLES, TZQINIT_CYCLES_LEN) | PPC_PLACE(7, TZQCS_CYCLES, TZQCS_CYCLES_LEN) | PPC_PLACE(6, TWLDQSEN_CYCLES, TWLDQSEN_CYCLES_LEN) | @@ -1457,7 +1470,7 @@ static void seq_reset(int mcs_i, int mca_i) [52-63] reserved = 0x777 // "Reset value of SEQ_TIMING2 is lucky 7's" */ /* AL and PL are disabled (0) */ - mca_and_or(id, mca_i, DDRPHY_SEQ_MEM_TIMING_PARAM2_P0, 0, + mca_and_or(chip, id, mca_i, DDRPHY_SEQ_MEM_TIMING_PARAM2_P0, 0, PPC_PLACE(log2_up(mem_data.cwl - 2), TODTLON_OFF_CYCLES, TODTLON_OFF_CYCLES_LEN) | PPC_PLACE(0x777, 52, 12)); @@ -1466,14 +1479,14 @@ static void seq_reset(int mcs_i, int mca_i) [all] 0 [48-63] RD_RW_DATA_REG0 = 0xaa00 */ - mca_and_or(id, mca_i, DDRPHY_SEQ_RD_WR_DATA0_P0, 0, + mca_and_or(chip, id, mca_i, DDRPHY_SEQ_RD_WR_DATA0_P0, 0, PPC_PLACE(0xAA00, RD_RW_DATA_REG0, RD_RW_DATA_REG0_LEN)); /* IOM0.DDRPHY_SEQ_RD_WR_DATA1_P0 = [all] 0 [48-63] RD_RW_DATA_REG1 = 0x00aa */ - mca_and_or(id, mca_i, DDRPHY_SEQ_RD_WR_DATA1_P0, 0, + mca_and_or(chip, id, mca_i, DDRPHY_SEQ_RD_WR_DATA1_P0, 0, PPC_PLACE(0x00AA, RD_RW_DATA_REG1, RD_RW_DATA_REG1_LEN)); /* @@ -1490,7 +1503,7 @@ static void seq_reset(int mcs_i, int mca_i) [48-51] ODT_RD_VALUES0 = F(ATTR_MSS_VPD_MT_ODT_RD[index(MCA)][0][0]) [56-59] ODT_RD_VALUES1 = F(ATTR_MSS_VPD_MT_ODT_RD[index(MCA)][0][1]) */ - mca_and_or(id, mca_i, DDRPHY_SEQ_ODT_RD_CONFIG0_P0, 0, + mca_and_or(chip, id, mca_i, DDRPHY_SEQ_ODT_RD_CONFIG0_P0, 0, PPC_PLACE(F(ATTR_MSS_VPD_MT_ODT_RD[vpd_idx][0][0]), ODT_RD_VALUES0, ODT_RD_VALUES0_LEN) | PPC_PLACE(F(ATTR_MSS_VPD_MT_ODT_RD[vpd_idx][0][1]), ODT_RD_VALUES1, @@ -1514,7 +1527,7 @@ static void seq_reset(int mcs_i, int mca_i) PPC_PLACE(F(ATTR_MSS_VPD_MT_ODT_RD[vpd_idx][1][1]), ODT_RD_VALUES3, ODT_RD_VALUES3_LEN); - mca_and_or(id, mca_i, DDRPHY_SEQ_ODT_RD_CONFIG1_P0, 0, val); + mca_and_or(chip, id, mca_i, DDRPHY_SEQ_ODT_RD_CONFIG1_P0, 0, val); /* IOM0.DDRPHY_SEQ_ODT_WR_CONFIG0_P0 = @@ -1523,7 +1536,7 @@ static void seq_reset(int mcs_i, int mca_i) [48-51] ODT_WR_VALUES0 = F(ATTR_MSS_VPD_MT_ODT_WR[index(MCA)][0][0]) [56-59] ODT_WR_VALUES1 = F(ATTR_MSS_VPD_MT_ODT_WR[index(MCA)][0][1]) */ - mca_and_or(id, mca_i, DDRPHY_SEQ_ODT_WR_CONFIG0_P0, 0, + mca_and_or(chip, id, mca_i, DDRPHY_SEQ_ODT_WR_CONFIG0_P0, 0, PPC_PLACE(F(ATTR_MSS_VPD_MT_ODT_WR[vpd_idx][0][0]), ODT_WR_VALUES0, ODT_WR_VALUES0_LEN) | PPC_PLACE(F(ATTR_MSS_VPD_MT_ODT_WR[vpd_idx][0][1]), ODT_WR_VALUES1, @@ -1546,11 +1559,11 @@ static void seq_reset(int mcs_i, int mca_i) PPC_PLACE(F(ATTR_MSS_VPD_MT_ODT_WR[vpd_idx][1][1]), ODT_WR_VALUES3, ODT_WR_VALUES3_LEN); - mca_and_or(id, mca_i, DDRPHY_SEQ_ODT_WR_CONFIG1_P0, 0, val); + mca_and_or(chip, id, mca_i, DDRPHY_SEQ_ODT_WR_CONFIG1_P0, 0, val); #undef F } -static void reset_ac_boost_cntl(int mcs_i, int mca_i) +static void reset_ac_boost_cntl(uint8_t chip, int mcs_i, int mca_i) { chiplet_id_t id = mcs_ids[mcs_i]; int dp; @@ -1583,18 +1596,18 @@ static void reset_ac_boost_cntl(int mcs_i, int mca_i) * readability. */ for (dp = 0; dp < 5; dp++) { - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_ACBOOST_CTL_BYTE0_P0_0, + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_ACBOOST_CTL_BYTE0_P0_0, ~PPC_BITMASK(48, 56), PPC_PLACE(1, S0ACENSLICENDRV_DC, S0ACENSLICENDRV_DC_LEN) | PPC_PLACE(1, S0ACENSLICEPDRV_DC, S0ACENSLICEPDRV_DC_LEN)); - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_ACBOOST_CTL_BYTE1_P0_0, + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_ACBOOST_CTL_BYTE1_P0_0, ~PPC_BITMASK(48, 56), PPC_PLACE(1, S1ACENSLICENDRV_DC, S1ACENSLICENDRV_DC_LEN) | PPC_PLACE(1, S1ACENSLICEPDRV_DC, S1ACENSLICEPDRV_DC_LEN)); } } -static void reset_ctle_cntl(int mcs_i, int mca_i) +static void reset_ctle_cntl(uint8_t chip, int mcs_i, int mca_i) { chiplet_id_t id = mcs_ids[mcs_i]; int dp; @@ -1622,13 +1635,13 @@ static void reset_ctle_cntl(int mcs_i, int mca_i) * 0xb6db6db6db6db6d0 (every 3b field is 0b101 = 5). */ for (dp = 0; dp < 5; dp++) { - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_CTLE_CTL_BYTE0_P0_0, + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_CTLE_CTL_BYTE0_P0_0, ~(PPC_BITMASK(48, 49) | PPC_BITMASK(53, 57) | PPC_BITMASK(61, 63)), PPC_PLACE(1, NIB_0_DQSEL_CAP, NIB_0_DQSEL_CAP_LEN) | PPC_PLACE(5, NIB_0_DQSEL_RES, NIB_0_DQSEL_RES_LEN) | PPC_PLACE(1, NIB_1_DQSEL_CAP, NIB_1_DQSEL_CAP_LEN) | PPC_PLACE(5, NIB_1_DQSEL_RES, NIB_1_DQSEL_RES_LEN)); - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_CTLE_CTL_BYTE1_P0_0, + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_CTLE_CTL_BYTE1_P0_0, ~(PPC_BITMASK(48, 49) | PPC_BITMASK(53, 57) | PPC_BITMASK(61, 63)), PPC_PLACE(1, NIB_2_DQSEL_CAP, NIB_2_DQSEL_CAP_LEN) | PPC_PLACE(5, NIB_2_DQSEL_RES, NIB_2_DQSEL_RES_LEN) | @@ -1637,7 +1650,7 @@ static void reset_ctle_cntl(int mcs_i, int mca_i) } } -static void reset_delay(int mcs_i, int mca_i) +static void reset_delay(uint8_t chip, int mcs_i, int mca_i) { chiplet_id_t id = mcs_ids[mcs_i]; mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; @@ -1662,7 +1675,7 @@ static void reset_delay(int mcs_i, int mca_i) [49-55] ADR_DELAY0 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_CSN0 [57-63] ADR_DELAY1 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_ADDR_WEN_A14 */ - mca_and_or(id, mca_i, DDRPHY_ADR_DELAY0_P0_ADR0, 0, + mca_and_or(chip, id, mca_i, DDRPHY_ADR_DELAY0_P0_ADR0, 0, PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_CSN0[vpd_idx][mca_i], ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN) | PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_ADDR_WEN_A14[vpd_idx][mca_i], @@ -1673,7 +1686,7 @@ static void reset_delay(int mcs_i, int mca_i) [49-55] ADR_DELAY2 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_ODT1 [57-63] ADR_DELAY3 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_C0 */ - mca_and_or(id, mca_i, DDRPHY_ADR_DELAY1_P0_ADR0, 0, + mca_and_or(chip, id, mca_i, DDRPHY_ADR_DELAY1_P0_ADR0, 0, PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_ODT1[vpd_idx][mca_i], ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN) | PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_C0[vpd_idx][mca_i], @@ -1684,7 +1697,7 @@ static void reset_delay(int mcs_i, int mca_i) [49-55] ADR_DELAY4 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_BA1 [57-63] ADR_DELAY5 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A10 */ - mca_and_or(id, mca_i, DDRPHY_ADR_DELAY2_P0_ADR0, 0, + mca_and_or(chip, id, mca_i, DDRPHY_ADR_DELAY2_P0_ADR0, 0, PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_BA1[vpd_idx][mca_i], ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN) | PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A10[vpd_idx][mca_i], @@ -1695,7 +1708,7 @@ static void reset_delay(int mcs_i, int mca_i) [49-55] ADR_DELAY6 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_ODT1 [57-63] ADR_DELAY7 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_BA0 */ - mca_and_or(id, mca_i, DDRPHY_ADR_DELAY3_P0_ADR0, 0, + mca_and_or(chip, id, mca_i, DDRPHY_ADR_DELAY3_P0_ADR0, 0, PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_ODT1[vpd_idx][mca_i], ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN) | PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_BA0[vpd_idx][mca_i], @@ -1706,7 +1719,7 @@ static void reset_delay(int mcs_i, int mca_i) [49-55] ADR_DELAY8 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A00 [57-63] ADR_DELAY9 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_ODT0 */ - mca_and_or(id, mca_i, DDRPHY_ADR_DELAY4_P0_ADR0, 0, + mca_and_or(chip, id, mca_i, DDRPHY_ADR_DELAY4_P0_ADR0, 0, PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A00[vpd_idx][mca_i], ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN) | PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_ODT0[vpd_idx][mca_i], @@ -1717,7 +1730,7 @@ static void reset_delay(int mcs_i, int mca_i) [49-55] ADR_DELAY10 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_ODT0 [57-63] ADR_DELAY11 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_ADDR_CASN_A15 */ - mca_and_or(id, mca_i, DDRPHY_ADR_DELAY5_P0_ADR0, 0, + mca_and_or(chip, id, mca_i, DDRPHY_ADR_DELAY5_P0_ADR0, 0, PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_ODT0[vpd_idx][mca_i], ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN) | PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_ADDR_CASN_A15[vpd_idx][mca_i], @@ -1729,7 +1742,7 @@ static void reset_delay(int mcs_i, int mca_i) [49-55] ADR_DELAY0 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A13 [57-63] ADR_DELAY1 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_CSN1 */ - mca_and_or(id, mca_i, DDRPHY_ADR_DELAY0_P0_ADR1, 0, + mca_and_or(chip, id, mca_i, DDRPHY_ADR_DELAY0_P0_ADR1, 0, PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A13[vpd_idx][mca_i], ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN) | PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_CSN1[vpd_idx][mca_i], @@ -1740,7 +1753,7 @@ static void reset_delay(int mcs_i, int mca_i) [49-55] ADR_DELAY2 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_D0_CLKN [57-63] ADR_DELAY3 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_D0_CLKP */ - mca_and_or(id, mca_i, DDRPHY_ADR_DELAY1_P0_ADR1, 0, + mca_and_or(chip, id, mca_i, DDRPHY_ADR_DELAY1_P0_ADR1, 0, PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_D0_CLKN[vpd_idx][mca_i], ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN) | PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_D0_CLKP[vpd_idx][mca_i], @@ -1751,7 +1764,7 @@ static void reset_delay(int mcs_i, int mca_i) [49-55] ADR_DELAY4 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A17 [57-63] ADR_DELAY5 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_C1 */ - mca_and_or(id, mca_i, DDRPHY_ADR_DELAY2_P0_ADR1, 0, + mca_and_or(chip, id, mca_i, DDRPHY_ADR_DELAY2_P0_ADR1, 0, PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A17[vpd_idx][mca_i], ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN) | PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_C1[vpd_idx][mca_i], @@ -1762,7 +1775,7 @@ static void reset_delay(int mcs_i, int mca_i) [49-55] ADR_DELAY6 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_D1_CLKN [57-63] ADR_DELAY7 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_D1_CLKP */ - mca_and_or(id, mca_i, DDRPHY_ADR_DELAY3_P0_ADR1, 0, + mca_and_or(chip, id, mca_i, DDRPHY_ADR_DELAY3_P0_ADR1, 0, PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_D1_CLKN[vpd_idx][mca_i], ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN) | PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_D1_CLKP[vpd_idx][mca_i], @@ -1773,7 +1786,7 @@ static void reset_delay(int mcs_i, int mca_i) [49-55] ADR_DELAY8 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_C2 [57-63] ADR_DELAY9 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_CSN1 */ - mca_and_or(id, mca_i, DDRPHY_ADR_DELAY4_P0_ADR1, 0, + mca_and_or(chip, id, mca_i, DDRPHY_ADR_DELAY4_P0_ADR1, 0, PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_C2[vpd_idx][mca_i], ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN) | PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_CSN1[vpd_idx][mca_i], @@ -1784,7 +1797,7 @@ static void reset_delay(int mcs_i, int mca_i) [49-55] ADR_DELAY10 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A02 [57-63] ADR_DELAY11 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_PAR */ - mca_and_or(id, mca_i, DDRPHY_ADR_DELAY5_P0_ADR1, 0, + mca_and_or(chip, id, mca_i, DDRPHY_ADR_DELAY5_P0_ADR1, 0, PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A02[vpd_idx][mca_i], ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN) | PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_PAR[vpd_idx][mca_i], @@ -1796,7 +1809,7 @@ static void reset_delay(int mcs_i, int mca_i) [49-55] ADR_DELAY0 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_CSN0 [57-63] ADR_DELAY1 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_ADDR_RASN_A16 */ - mca_and_or(id, mca_i, DDRPHY_ADR_DELAY0_P0_ADR2, 0, + mca_and_or(chip, id, mca_i, DDRPHY_ADR_DELAY0_P0_ADR2, 0, PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_CSN0[vpd_idx][mca_i], ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN) | PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_ADDR_RASN_A16[vpd_idx][mca_i], @@ -1807,7 +1820,7 @@ static void reset_delay(int mcs_i, int mca_i) [49-55] ADR_DELAY2 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A08 [57-63] ADR_DELAY3 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A05 */ - mca_and_or(id, mca_i, DDRPHY_ADR_DELAY1_P0_ADR2, 0, + mca_and_or(chip, id, mca_i, DDRPHY_ADR_DELAY1_P0_ADR2, 0, PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A08[vpd_idx][mca_i], ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN) | PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A05[vpd_idx][mca_i], @@ -1818,7 +1831,7 @@ static void reset_delay(int mcs_i, int mca_i) [49-55] ADR_DELAY4 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A03 [57-63] ADR_DELAY5 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A01 */ - mca_and_or(id, mca_i, DDRPHY_ADR_DELAY2_P0_ADR2, 0, + mca_and_or(chip, id, mca_i, DDRPHY_ADR_DELAY2_P0_ADR2, 0, PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A03[vpd_idx][mca_i], ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN) | PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A01[vpd_idx][mca_i], @@ -1829,7 +1842,7 @@ static void reset_delay(int mcs_i, int mca_i) [49-55] ADR_DELAY6 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A04 [57-63] ADR_DELAY7 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A07 */ - mca_and_or(id, mca_i, DDRPHY_ADR_DELAY3_P0_ADR2, 0, + mca_and_or(chip, id, mca_i, DDRPHY_ADR_DELAY3_P0_ADR2, 0, PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A04[vpd_idx][mca_i], ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN) | PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A07[vpd_idx][mca_i], @@ -1840,7 +1853,7 @@ static void reset_delay(int mcs_i, int mca_i) [49-55] ADR_DELAY8 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A09 [57-63] ADR_DELAY9 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A06 */ - mca_and_or(id, mca_i, DDRPHY_ADR_DELAY4_P0_ADR2, 0, + mca_and_or(chip, id, mca_i, DDRPHY_ADR_DELAY4_P0_ADR2, 0, PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A09[vpd_idx][mca_i], ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN) | PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A06[vpd_idx][mca_i], @@ -1851,7 +1864,7 @@ static void reset_delay(int mcs_i, int mca_i) [49-55] ADR_DELAY10 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_CKE1 [57-63] ADR_DELAY11 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A12 */ - mca_and_or(id, mca_i, DDRPHY_ADR_DELAY5_P0_ADR2, 0, + mca_and_or(chip, id, mca_i, DDRPHY_ADR_DELAY5_P0_ADR2, 0, PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_CKE1[vpd_idx][mca_i], ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN) | PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A12[vpd_idx][mca_i], @@ -1863,7 +1876,7 @@ static void reset_delay(int mcs_i, int mca_i) [49-55] ADR_DELAY0 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_ACTN [57-63] ADR_DELAY1 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A11 */ - mca_and_or(id, mca_i, DDRPHY_ADR_DELAY0_P0_ADR3, 0, + mca_and_or(chip, id, mca_i, DDRPHY_ADR_DELAY0_P0_ADR3, 0, PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_ACTN[vpd_idx][mca_i], ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN) | PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A11[vpd_idx][mca_i], @@ -1874,7 +1887,7 @@ static void reset_delay(int mcs_i, int mca_i) [49-55] ADR_DELAY2 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_BG0 [57-63] ADR_DELAY3 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_CKE0 */ - mca_and_or(id, mca_i, DDRPHY_ADR_DELAY1_P0_ADR3, 0, + mca_and_or(chip, id, mca_i, DDRPHY_ADR_DELAY1_P0_ADR3, 0, PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_BG0[vpd_idx][mca_i], ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN) | PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_CKE0[vpd_idx][mca_i], @@ -1885,7 +1898,7 @@ static void reset_delay(int mcs_i, int mca_i) [49-55] ADR_DELAY4 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_CKE1 [57-63] ADR_DELAY5 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_BG1 */ - mca_and_or(id, mca_i, DDRPHY_ADR_DELAY2_P0_ADR3, 0, + mca_and_or(chip, id, mca_i, DDRPHY_ADR_DELAY2_P0_ADR3, 0, PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_CKE1[vpd_idx][mca_i], ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN) | PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_BG1[vpd_idx][mca_i], @@ -1895,13 +1908,13 @@ static void reset_delay(int mcs_i, int mca_i) [all] 0 [49-55] ADR_DELAY6 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_CKE0 */ - mca_and_or(id, mca_i, DDRPHY_ADR_DELAY3_P0_ADR3, 0, + mca_and_or(chip, id, mca_i, DDRPHY_ADR_DELAY3_P0_ADR3, 0, PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_CKE0[vpd_idx][mca_i], ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN)); } -static void reset_tsys_adr(int mcs_i, int mca_i) +static void reset_tsys_adr(uint8_t chip, int mcs_i, int mca_i) { chiplet_id_t id = mcs_ids[mcs_i]; int i = mem_data.speed == 1866 ? 0 : @@ -1918,13 +1931,13 @@ static void reset_tsys_adr(int mcs_i, int mca_i) // Set to '12'h for 1866 MT/s. */ /* Has the same stride as DP16. */ - dp_mca_and_or(id, 0, mca_i, DDRPHY_ADR_MCCLK_WRCLK_PR_STATIC_OFFSET_P0_ADR32S0, + dp_mca_and_or(chip, id, 0, mca_i, DDRPHY_ADR_MCCLK_WRCLK_PR_STATIC_OFFSET_P0_ADR32S0, 0, PPC_PLACE(ATTR_MSS_VPD_MR_TSYS_ADR[i], TSYS_WRCLK, TSYS_WRCLK_LEN)); - dp_mca_and_or(id, 1, mca_i, DDRPHY_ADR_MCCLK_WRCLK_PR_STATIC_OFFSET_P0_ADR32S0, + dp_mca_and_or(chip, id, 1, mca_i, DDRPHY_ADR_MCCLK_WRCLK_PR_STATIC_OFFSET_P0_ADR32S0, 0, PPC_PLACE(ATTR_MSS_VPD_MR_TSYS_ADR[i], TSYS_WRCLK, TSYS_WRCLK_LEN)); } -static void reset_tsys_data(int mcs_i, int mca_i) +static void reset_tsys_data(uint8_t chip, int mcs_i, int mca_i) { chiplet_id_t id = mcs_ids[mcs_i]; int i = mem_data.speed == 1866 ? 0 : @@ -1942,13 +1955,13 @@ static void reset_tsys_data(int mcs_i, int mca_i) // Set to '0D'h for 1866 MT/s. */ for (dp = 0; dp < 5; dp++) { - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_WRCLK_PR_P0_0, 0, + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_WRCLK_PR_P0_0, 0, PPC_PLACE(ATTR_MSS_VPD_MR_TSYS_DATA[i], TSYS_WRCLK, TSYS_WRCLK_LEN)); } } -static void reset_io_impedances(int mcs_i, int mca_i) +static void reset_io_impedances(uint8_t chip, int mcs_i, int mca_i) { chiplet_id_t id = mcs_ids[mcs_i]; int dp; @@ -1965,7 +1978,7 @@ static void reset_io_impedances(int mcs_i, int mca_i) * is 34 Ohms. 240/34 = 7 bits set. According to documentation this is the * default value, but set it just to be safe. */ - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_IO_TX_FET_SLICE_P0_0, 0, + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_IO_TX_FET_SLICE_P0_0, 0, PPC_PLACE(0x7F, EN_SLICE_N_WR, EN_SLICE_N_WR_LEN) | PPC_PLACE(0x7F, EN_SLICE_P_WR, EN_SLICE_P_WR_LEN)); @@ -1975,7 +1988,7 @@ static void reset_io_impedances(int mcs_i, int mca_i) [49-55] EN_SLICE_N_WR = ATTR_MSS_VPD_MT_MC_RCV_IMP_DQ_DQS[{0,1,2,3,4}] */ /* 60 Ohms for all configurations, 240/60 = 4 bits set. */ - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_IO_TX_PFET_TERM_P0_0, 0, + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_IO_TX_PFET_TERM_P0_0, 0, PPC_PLACE(0x0F, EN_SLICE_N_WR, EN_SLICE_N_WR_LEN)); } @@ -1987,7 +2000,7 @@ static void reset_io_impedances(int mcs_i, int mca_i) [54,52,62,60] = 0 */ /* 30 Ohms for all configurations. */ - mca_and_or(id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR1, ~0, + mca_and_or(chip, id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR1, ~0, PPC_BIT(SLICE_SEL2) | PPC_BIT(SLICE_SEL3) | PPC_BIT(SLICE_SEL6) | PPC_BIT(SLICE_SEL7)); @@ -2011,22 +2024,22 @@ static void reset_io_impedances(int mcs_i, int mca_i) IOM0.DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR3 = [48,50,52,58] = val // ACT_N, ADDR11, BG0, BG1 */ - mca_and_or(id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR0, ~0, + mca_and_or(chip, id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR0, ~0, PPC_BIT(SLICE_SEL1) | PPC_BIT(SLICE_SEL4) | PPC_BIT(SLICE_SEL5) | PPC_BIT(SLICE_SEL7)); - mca_and_or(id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP1_P0_ADR0, ~0, + mca_and_or(chip, id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP1_P0_ADR0, ~0, PPC_BIT(SLICE_SEL0) | PPC_BIT(SLICE_SEL3)); - mca_and_or(id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR1, ~0, + mca_and_or(chip, id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR1, ~0, PPC_BIT(SLICE_SEL0) | PPC_BIT(SLICE_SEL4)); - mca_and_or(id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP1_P0_ADR1, ~0, + mca_and_or(chip, id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP1_P0_ADR1, ~0, PPC_BIT(SLICE_SEL2)); - mca_and_or(id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR2, ~0, + mca_and_or(chip, id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR2, ~0, PPC_BIT(SLICE_SEL1) | PPC_BIT(SLICE_SEL2) | PPC_BIT(SLICE_SEL3) | PPC_BIT(SLICE_SEL4) | PPC_BIT(SLICE_SEL5) | PPC_BIT(SLICE_SEL6) | PPC_BIT(SLICE_SEL7)); - mca_and_or(id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP1_P0_ADR2, ~0, + mca_and_or(chip, id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP1_P0_ADR2, ~0, PPC_BIT(SLICE_SEL0) | PPC_BIT(SLICE_SEL1) | PPC_BIT(SLICE_SEL3)); - mca_and_or(id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR3, ~0, + mca_and_or(chip, id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR3, ~0, PPC_BIT(SLICE_SEL0) | PPC_BIT(SLICE_SEL1) | PPC_BIT(SLICE_SEL2) | PPC_BIT(SLICE_SEL5)); @@ -2046,15 +2059,15 @@ static void reset_io_impedances(int mcs_i, int mca_i) IOM0.DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR3 = // same as CMD/ADDR, however it uses different VPD [54,56,60,62] = val // CKE0, CKE3, CKE2, RESET_N */ - mca_and_or(id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR0, ~0, + mca_and_or(chip, id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR0, ~0, PPC_BIT(SLICE_SEL2) | PPC_BIT(SLICE_SEL6)); - mca_and_or(id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP1_P0_ADR0, ~0, + mca_and_or(chip, id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP1_P0_ADR0, ~0, PPC_BIT(SLICE_SEL1) | PPC_BIT(SLICE_SEL2)); - mca_and_or(id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP1_P0_ADR1, ~0, + mca_and_or(chip, id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP1_P0_ADR1, ~0, PPC_BIT(SLICE_SEL3)); - mca_and_or(id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP1_P0_ADR2, ~0, + mca_and_or(chip, id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP1_P0_ADR2, ~0, PPC_BIT(SLICE_SEL2)); - mca_and_or(id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR3, ~0, + mca_and_or(chip, id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR3, ~0, PPC_BIT(SLICE_SEL3) | PPC_BIT(SLICE_SEL4) | PPC_BIT(SLICE_SEL6) | PPC_BIT(SLICE_SEL7)); @@ -2072,13 +2085,13 @@ static void reset_io_impedances(int mcs_i, int mca_i) IOM0.DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR2 = // same as CMD/ADDR, however it uses different VPD [48] = val // CS2 */ - mca_and_or(id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR0, ~0, + mca_and_or(chip, id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR0, ~0, PPC_BIT(SLICE_SEL0) | PPC_BIT(SLICE_SEL3)); - mca_and_or(id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR1, ~0, + mca_and_or(chip, id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR1, ~0, PPC_BIT(SLICE_SEL1) | PPC_BIT(SLICE_SEL5)); - mca_and_or(id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP1_P0_ADR1, ~0, + mca_and_or(chip, id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP1_P0_ADR1, ~0, PPC_BIT(SLICE_SEL0) | PPC_BIT(SLICE_SEL1)); - mca_and_or(id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR2, ~0, + mca_and_or(chip, id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR2, ~0, PPC_BIT(SLICE_SEL0)); /* @@ -2091,7 +2104,7 @@ static void reset_io_impedances(int mcs_i, int mca_i) */ } -static void reset_wr_vref_registers(int mcs_i, int mca_i) +static void reset_wr_vref_registers(uint8_t chip, int mcs_i, int mca_i) { chiplet_id_t id = mcs_ids[mcs_i]; mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; @@ -2111,7 +2124,7 @@ static void reset_wr_vref_registers(int mcs_i, int mca_i) [57-59] WR_CTR_NUM_BITS_TO_SKIP = 0 // skip nothing [60-62] WR_CTR_NUM_NO_INC_VREF_COMP = 7 */ - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_WR_VREF_CONFIG0_P0_0, 0, + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_WR_VREF_CONFIG0_P0_0, 0, PPC_BIT(WR_CTR_RUN_FULL_1D) | PPC_PLACE(1, WR_CTR_2D_BIG_STEP_VAL, WR_CTR_2D_BIG_STEP_VAL_LEN) | @@ -2124,7 +2137,7 @@ static void reset_wr_vref_registers(int mcs_i, int mca_i) [49-55] WR_CTR_VREF_RANGE_CROSSOVER = 0x18 // JEDEC table 34 [56-62] WR_CTR_VREF_SINGLE_RANGE_MAX = 0x32 // JEDEC table 34 */ - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_WR_VREF_CONFIG1_P0_0, 0, + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_WR_VREF_CONFIG1_P0_0, 0, PPC_PLACE(0x18, WR_CTR_VREF_RANGE_CROSSOVER, WR_CTR_VREF_RANGE_CROSSOVER_LEN) | PPC_PLACE(0x32, WR_CTR_VREF_SINGLE_RANGE_MAX, @@ -2133,27 +2146,27 @@ static void reset_wr_vref_registers(int mcs_i, int mca_i) /* IOM0.DDRPHY_DP16_WR_VREF_STATUS0_P0_{0,1,2,3,4} = [all] 0 */ - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_WR_VREF_STATUS0_P0_0, 0, 0); + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_WR_VREF_STATUS0_P0_0, 0, 0); /* IOM0.DDRPHY_DP16_WR_VREF_STATUS1_P0_{0,1,2,3,4} = [all] 0 */ - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_WR_VREF_STATUS1_P0_0, 0, 0); + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_WR_VREF_STATUS1_P0_0, 0, 0); /* IOM0.DDRPHY_DP16_WR_VREF_ERROR_MASK{0,1}_P0_{0,1,2,3,4} = [all] 0 [48-63] 0xffff */ - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_WR_VREF_ERROR_MASK0_P0_0, 0, + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_WR_VREF_ERROR_MASK0_P0_0, 0, PPC_BITMASK(48, 63)); - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_WR_VREF_ERROR_MASK1_P0_0, 0, + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_WR_VREF_ERROR_MASK1_P0_0, 0, PPC_BITMASK(48, 63)); /* IOM0.DDRPHY_DP16_WR_VREF_ERROR{0,1}_P0_{0,1,2,3,4} = [all] 0 */ - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_WR_VREF_ERROR0_P0_0, 0, 0); - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_WR_VREF_ERROR1_P0_0, 0, 0); + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_WR_VREF_ERROR0_P0_0, 0, 0); + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_WR_VREF_ERROR1_P0_0, 0, 0); /* Assume RDIMM IOM0.DDRPHY_DP16_WR_VREF_VALUE{0,1}_RANK_PAIR0_P0_{0,1,2,3,4} = @@ -2166,42 +2179,50 @@ static void reset_wr_vref_registers(int mcs_i, int mca_i) [57] WR_VREF_RANGE_DRAM{1,3} = ATTR_MSS_VPD_MT_VREF_DRAM_WR & 0x40 [58-63] WR_VREF_VALUE_DRAM{1,3} = ATTR_MSS_VPD_MT_VREF_DRAM_WR & 0x3f */ - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_WR_VREF_VALUE0_RANK_PAIR0_P0_0, 0, + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_WR_VREF_VALUE0_RANK_PAIR0_P0_0, + 0, PPC_PLACE(ATTR_MSS_VPD_MT_VREF_DRAM_WR[vpd_idx], WR_VREF_VALUE_DRAM0, WR_VREF_VALUE_DRAM0_LEN) | PPC_PLACE(ATTR_MSS_VPD_MT_VREF_DRAM_WR[vpd_idx], WR_VREF_VALUE_DRAM1, WR_VREF_VALUE_DRAM1_LEN)); - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_WR_VREF_VALUE1_RANK_PAIR0_P0_0, 0, + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_WR_VREF_VALUE1_RANK_PAIR0_P0_0, + 0, PPC_PLACE(ATTR_MSS_VPD_MT_VREF_DRAM_WR[vpd_idx], WR_VREF_VALUE_DRAM2, WR_VREF_VALUE_DRAM2_LEN) | PPC_PLACE(ATTR_MSS_VPD_MT_VREF_DRAM_WR[vpd_idx], WR_VREF_VALUE_DRAM3, WR_VREF_VALUE_DRAM3_LEN)); - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_WR_VREF_VALUE0_RANK_PAIR1_P0_0, 0, + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_WR_VREF_VALUE0_RANK_PAIR1_P0_0, + 0, PPC_PLACE(ATTR_MSS_VPD_MT_VREF_DRAM_WR[vpd_idx], WR_VREF_VALUE_DRAM0, WR_VREF_VALUE_DRAM0_LEN) | PPC_PLACE(ATTR_MSS_VPD_MT_VREF_DRAM_WR[vpd_idx], WR_VREF_VALUE_DRAM1, WR_VREF_VALUE_DRAM1_LEN)); - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_WR_VREF_VALUE1_RANK_PAIR1_P0_0, 0, + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_WR_VREF_VALUE1_RANK_PAIR1_P0_0, + 0, PPC_PLACE(ATTR_MSS_VPD_MT_VREF_DRAM_WR[vpd_idx], WR_VREF_VALUE_DRAM2, WR_VREF_VALUE_DRAM2_LEN) | PPC_PLACE(ATTR_MSS_VPD_MT_VREF_DRAM_WR[vpd_idx], WR_VREF_VALUE_DRAM3, WR_VREF_VALUE_DRAM3_LEN)); - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_WR_VREF_VALUE0_RANK_PAIR2_P0_0, 0, + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_WR_VREF_VALUE0_RANK_PAIR2_P0_0, + 0, PPC_PLACE(ATTR_MSS_VPD_MT_VREF_DRAM_WR[vpd_idx], WR_VREF_VALUE_DRAM0, WR_VREF_VALUE_DRAM0_LEN) | PPC_PLACE(ATTR_MSS_VPD_MT_VREF_DRAM_WR[vpd_idx], WR_VREF_VALUE_DRAM1, WR_VREF_VALUE_DRAM1_LEN)); - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_WR_VREF_VALUE1_RANK_PAIR2_P0_0, 0, + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_WR_VREF_VALUE1_RANK_PAIR2_P0_0, + 0, PPC_PLACE(ATTR_MSS_VPD_MT_VREF_DRAM_WR[vpd_idx], WR_VREF_VALUE_DRAM2, WR_VREF_VALUE_DRAM2_LEN) | PPC_PLACE(ATTR_MSS_VPD_MT_VREF_DRAM_WR[vpd_idx], WR_VREF_VALUE_DRAM3, WR_VREF_VALUE_DRAM3_LEN)); - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_WR_VREF_VALUE0_RANK_PAIR3_P0_0, 0, + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_WR_VREF_VALUE0_RANK_PAIR3_P0_0, + 0, PPC_PLACE(ATTR_MSS_VPD_MT_VREF_DRAM_WR[vpd_idx], WR_VREF_VALUE_DRAM0, WR_VREF_VALUE_DRAM0_LEN) | PPC_PLACE(ATTR_MSS_VPD_MT_VREF_DRAM_WR[vpd_idx], WR_VREF_VALUE_DRAM1, WR_VREF_VALUE_DRAM1_LEN)); - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_WR_VREF_VALUE1_RANK_PAIR3_P0_0, 0, + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_WR_VREF_VALUE1_RANK_PAIR3_P0_0, + 0, PPC_PLACE(ATTR_MSS_VPD_MT_VREF_DRAM_WR[vpd_idx], WR_VREF_VALUE_DRAM2, WR_VREF_VALUE_DRAM2_LEN) | PPC_PLACE(ATTR_MSS_VPD_MT_VREF_DRAM_WR[vpd_idx], @@ -2209,7 +2230,7 @@ static void reset_wr_vref_registers(int mcs_i, int mca_i) } } -static void reset_drift_limits(int mcs_i, int mca_i) +static void reset_drift_limits(uint8_t chip, int mcs_i, int mca_i) { chiplet_id_t id = mcs_ids[mcs_i]; int dp; @@ -2218,14 +2239,14 @@ static void reset_drift_limits(int mcs_i, int mca_i) /* IOM0.DDRPHY_DP16_DRIFT_LIMITS_P0_{0,1,2,3,4} = [48-49] DD2_BLUE_EXTEND_RANGE = 1 // always ONE_TO_FOUR due to red waterfall workaround */ - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_DRIFT_LIMITS_P0_0, + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_DRIFT_LIMITS_P0_0, ~PPC_BITMASK(48, 49), PPC_PLACE(1, DD2_BLUE_EXTEND_RANGE, DD2_BLUE_EXTEND_RANGE_LEN)); } } -static void rd_dia_config5(int mcs_i, int mca_i) +static void rd_dia_config5(uint8_t chip, int mcs_i, int mca_i) { chiplet_id_t id = mcs_ids[mcs_i]; int dp; @@ -2238,14 +2259,14 @@ static void rd_dia_config5(int mcs_i, int mca_i) [52] PER_CAL_UPDATE_DISABLE = 1 // "This bit must be set to 0 for normal operation" [59] PERCAL_PWR_DIS = 1 */ - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_RD_DIA_CONFIG5_P0_0, 0, + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_RD_DIA_CONFIG5_P0_0, 0, PPC_BIT(DYN_MCTERM_CNTL_EN) | PPC_BIT(PER_CAL_UPDATE_DISABLE) | PPC_BIT(PERCAL_PWR_DIS)); } } -static void dqsclk_offset(int mcs_i, int mca_i) +static void dqsclk_offset(uint8_t chip, int mcs_i, int mca_i) { chiplet_id_t id = mcs_ids[mcs_i]; int dp; @@ -2256,58 +2277,58 @@ static void dqsclk_offset(int mcs_i, int mca_i) [all] 0 [49-55] DQS_OFFSET = 0x08 // Config provided by S. Wyatt 9/13 */ - dp_mca_and_or(id, dp, mca_i, DDRPHY_DP16_DQSCLK_OFFSET_P0_0, 0, + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_DQSCLK_OFFSET_P0_0, 0, PPC_PLACE(0x08, DQS_OFFSET, DQS_OFFSET_LEN)); } } -static void phy_scominit(int mcs_i, int mca_i) +static void phy_scominit(uint8_t chip, int mcs_i, int mca_i) { /* Hostboot here sets strength, we did it in p9n_ddrphy_scom(). */ - set_rank_pairs(mcs_i, mca_i); + set_rank_pairs(chip, mcs_i, mca_i); - reset_data_bit_enable(mcs_i, mca_i); + reset_data_bit_enable(chip, mcs_i, mca_i); /* Assume there are no bad bits (disabled DQ/DQS lines) for now */ // reset_bad_bits(); - reset_clock_enable(mcs_i, mca_i); - reset_rd_vref(mcs_i, mca_i); + reset_clock_enable(chip, mcs_i, mca_i); + reset_rd_vref(chip, mcs_i, mca_i); - pc_reset(mcs_i, mca_i); - wc_reset(mcs_i, mca_i); - rc_reset(mcs_i, mca_i); - seq_reset(mcs_i, mca_i); + pc_reset(chip, mcs_i, mca_i); + wc_reset(chip, mcs_i, mca_i); + rc_reset(chip, mcs_i, mca_i); + seq_reset(chip, mcs_i, mca_i); - reset_ac_boost_cntl(mcs_i, mca_i); - reset_ctle_cntl(mcs_i, mca_i); - reset_delay(mcs_i, mca_i); - reset_tsys_adr(mcs_i, mca_i); - reset_tsys_data(mcs_i, mca_i); - reset_io_impedances(mcs_i, mca_i); - reset_wr_vref_registers(mcs_i, mca_i); - reset_drift_limits(mcs_i, mca_i); + reset_ac_boost_cntl(chip, mcs_i, mca_i); + reset_ctle_cntl(chip, mcs_i, mca_i); + reset_delay(chip, mcs_i, mca_i); + reset_tsys_adr(chip, mcs_i, mca_i); + reset_tsys_data(chip, mcs_i, mca_i); + reset_io_impedances(chip, mcs_i, mca_i); + reset_wr_vref_registers(chip, mcs_i, mca_i); + reset_drift_limits(chip, mcs_i, mca_i); /* Workarounds */ /* Doesn't apply to DD2 */ // dqs_polarity(); - rd_dia_config5(mcs_i, mca_i); - dqsclk_offset(mcs_i, mca_i); + rd_dia_config5(chip, mcs_i, mca_i); + dqsclk_offset(chip, mcs_i, mca_i); /* Doesn't apply to DD2 */ // odt_config(); } -static void fir_unmask(int mcs_i, int mca_i) +static void fir_unmask(uint8_t chip, int mcs_i, int mca_i) { chiplet_id_t id = mcs_ids[mcs_i]; /* IOM0.IOM_PHY0_DDRPHY_FIR_REG = [56] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_2 = 0 // calibration errors [58] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_4 = 0 // DLL errors */ - mca_and_or(id, mca_i, IOM_PHY0_DDRPHY_FIR_REG, + mca_and_or(chip, id, mca_i, IOM_PHY0_DDRPHY_FIR_REG, ~(PPC_BIT(IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_2) | PPC_BIT(IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_4)), 0); @@ -2316,30 +2337,16 @@ static void fir_unmask(int mcs_i, int mca_i) [4] MBACALFIR_RCD_PARITY_ERROR = 0 [8] MBACALFIR_DDR_MBA_EVENT_N = 0 */ - mca_and_or(id, mca_i, MBACALFIR, + mca_and_or(chip, id, mca_i, MBACALFIR, ~(PPC_BIT(MBACALFIR_RCD_PARITY_ERROR) | PPC_BIT(MBACALFIR_DDR_MBA_EVENT_N)), 0); } -/* - * 13.8 mss_scominit: Perform scom inits to MC and PHY - * - * - HW units included are MCBIST, MCA/PHY (Nimbus) or membuf, L4, MBAs (Cumulus) - * - Does not use initfiles, coded into HWP - * - Uses attributes from previous step - * - Pushes memory extent configuration into the MBA/MCAs - * - Addresses are pulled from attributes, set previously by mss_eff_config - * - MBA/MCAs always start at address 0, address map controlled by - * proc_setup_bars below - */ -void istep_13_8(void) +static void mss_scominit(uint8_t chip) { - printk(BIOS_EMERG, "starting istep 13.8\n"); int mcs_i, mca_i; - report_istep(13, 8); - for (mcs_i = 0; mcs_i < MCS_PER_PROC; mcs_i++) { /* No need to initialize a non-functional MCS */ if (!mem_data.mcs[mcs_i].functional) @@ -2358,14 +2365,14 @@ void istep_13_8(void) /* Some registers cannot be initialized without data from SPD */ if (mca->functional) { /* Assume DIMM mixing rules are followed - same rank config on both DIMMs*/ - p9n_mca_scom(mcs_i, mca_i); - thermal_throttle_scominit(mcs_i, mca_i); + p9n_mca_scom(chip, mcs_i, mca_i); + thermal_throttle_scominit(chip, mcs_i, mca_i); } /* The rest can and should be initialized also on magic port */ - p9n_ddrphy_scom(mcs_i, mca_i); + p9n_ddrphy_scom(chip, mcs_i, mca_i); } - p9n_mcbist_scom(mcs_i); + p9n_mcbist_scom(chip, mcs_i); } /* This double loop is a part of phy_scominit() in Hostboot, but this is simpler. */ @@ -2377,16 +2384,25 @@ void istep_13_8(void) mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; /* No magic for phy_scominit(). */ if (mca->functional) - phy_scominit(mcs_i, mca_i); + phy_scominit(chip, mcs_i, mca_i); - /* - * TODO: test this with DIMMs on both MCS. Maybe this has to be done - * in a separate loop, after phy_scominit()'s are done on both MCSs. - */ if (mca_i == 0 || mca->functional) - fir_unmask(mcs_i, mca_i); + fir_unmask(chip, mcs_i, mca_i); } } +} + +void istep_13_8(uint8_t chips) +{ + uint8_t chip; + + printk(BIOS_EMERG, "starting istep 13.8\n"); + report_istep(13, 8); + + for (chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) + mss_scominit(chip); + } printk(BIOS_EMERG, "ending istep 13.8\n"); } diff --git a/src/soc/ibm/power9/istep_13_9.c b/src/soc/ibm/power9/istep_13_9.c index f8ba003b335..9facf55f3f4 100644 --- a/src/soc/ibm/power9/istep_13_9.c +++ b/src/soc/ibm/power9/istep_13_9.c @@ -6,10 +6,10 @@ #include "istep_13_scom.h" -static int test_dll_calib_done(int mcs_i, int mca_i, bool *do_workaround) +static int test_dll_calib_done(uint8_t chip, int mcs_i, int mca_i, bool *do_workaround) { chiplet_id_t id = mcs_ids[mcs_i]; - uint64_t status = mca_read(id, mca_i, DDRPHY_PC_DLL_ZCAL_CAL_STATUS_P0); + uint64_t status = mca_read(chip, id, mca_i, DDRPHY_PC_DLL_ZCAL_CAL_STATUS_P0); /* if (IOM0.DDRPHY_PC_DLL_ZCAL_CAL_STATUS_P0 [48] DP_DLL_CAL_GOOD == 1 @@ -41,7 +41,7 @@ static int test_dll_calib_done(int mcs_i, int mca_i, bool *do_workaround) return 0; } -static int test_bb_lock(int mcs_i) +static int test_bb_lock(uint8_t chip, int mcs_i) { chiplet_id_t id = mcs_ids[mcs_i]; uint64_t res = PPC_BIT(BB_LOCK0) | PPC_BIT(BB_LOCK1); @@ -63,18 +63,19 @@ static int test_bb_lock(int mcs_i) */ /* ADR_SYSCLK_PR_VALUE_RO_P0_ADR32S{0,1}, BB_LOCK0 doesn't matter */ - res &= dp_mca_read(id, 0, mca_i, ADR_SYSCLK_PR_VALUE_RO_P0_ADR32S0) | + res &= dp_mca_read(chip, id, 0, mca_i, ADR_SYSCLK_PR_VALUE_RO_P0_ADR32S0) | PPC_BIT(BB_LOCK0); - res &= dp_mca_read(id, 1, mca_i, ADR_SYSCLK_PR_VALUE_RO_P0_ADR32S0) | + res &= dp_mca_read(chip, id, 1, mca_i, ADR_SYSCLK_PR_VALUE_RO_P0_ADR32S0) | PPC_BIT(BB_LOCK0); /* IOM0.DDRPHY_DP16_SYSCLK_PR_VALUE_P0_{0,1,2,3} */ for (dp = 0; dp < 4; dp++) { - res &= dp_mca_read(id, dp, mca_i, DDRPHY_DP16_SYSCLK_PR_VALUE_P0_0); + res &= dp_mca_read(chip, id, dp, mca_i, + DDRPHY_DP16_SYSCLK_PR_VALUE_P0_0); } /* IOM0.DDRPHY_DP16_SYSCLK_PR_VALUE_P0_4, BB_LOCK1 doesn't matter */ - res &= dp_mca_read(id, dp, mca_i, DDRPHY_DP16_SYSCLK_PR_VALUE_P0_0) | + res &= dp_mca_read(chip, id, dp, mca_i, DDRPHY_DP16_SYSCLK_PR_VALUE_P0_0) | PPC_BIT(BB_LOCK1); /* Do we want early return here? */ @@ -128,7 +129,7 @@ static void fix_bad_voltage_settings(int mcs_i) */ } -static void check_during_phy_reset(int mcs_i) +static void check_during_phy_reset(uint8_t chip, int mcs_i) { /* * Mostly FFDC, which to my current knowledge is just the error logging. If @@ -148,7 +149,7 @@ static void check_during_phy_reset(int mcs_i) [1] MBACALFIR_MBA_NONRECOVERABLE_ERROR [10] MBACALFIR_SM_1HOT_ERR */ - val = mca_read(id, mca_i, MBACALFIR); + val = mca_read(chip, id, mca_i, MBACALFIR); if (val & (PPC_BIT(MBACALFIR_MBA_RECOVERABLE_ERROR) | PPC_BIT(MBACALFIR_MBA_NONRECOVERABLE_ERROR) | PPC_BIT(MBACALFIR_SM_1HOT_ERR))) { @@ -157,7 +158,7 @@ static void check_during_phy_reset(int mcs_i) mca_i, val); } - mca_and_or(id, mca_i, MBACALFIR, + mca_and_or(chip, id, mca_i, MBACALFIR, ~(PPC_BIT(MBACALFIR_MBA_RECOVERABLE_ERROR) | PPC_BIT(MBACALFIR_MBA_NONRECOVERABLE_ERROR) | PPC_BIT(MBACALFIR_SM_1HOT_ERR)), @@ -173,18 +174,18 @@ static void check_during_phy_reset(int mcs_i) [60] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_6 [61] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_7 */ - val = mca_read(id, mca_i, IOM_PHY0_DDRPHY_FIR_REG); + val = mca_read(chip, id, mca_i, IOM_PHY0_DDRPHY_FIR_REG); if (val & PPC_BITMASK(54, 61)) { /* No idea how severe that error is... */ printk(BIOS_ERR, "Error detected in IOM_PHY%d_DDRPHY_FIR_REG: %#llx\n", mca_i , val); } - mca_and_or(id, mca_i, IOM_PHY0_DDRPHY_FIR_REG, ~(PPC_BITMASK(54, 61)), 0); + mca_and_or(chip, id, mca_i, IOM_PHY0_DDRPHY_FIR_REG, ~(PPC_BITMASK(54, 61)), 0); } } -static void fir_unmask(int mcs_i) +static void fir_unmask(uint8_t chip, int mcs_i) { chiplet_id_t id = mcs_ids[mcs_i]; int mca_i; @@ -242,20 +243,20 @@ static void fir_unmask(int mcs_i) [4] MBACALFIR_RCD_PARITY_ERROR = 0 // recoverable_error (0,1,0) [10] MBACALFIR_SM_1HOT_ERR = 0 // checkstop (0,0,0) */ - mca_and_or(id, mca_i, MBACALFIR_ACTION0, + mca_and_or(chip, id, mca_i, MBACALFIR_ACTION0, ~(PPC_BIT(MBACALFIR_MBA_RECOVERABLE_ERROR) | PPC_BIT(MBACALFIR_MBA_NONRECOVERABLE_ERROR) | PPC_BIT(MBACALFIR_RCD_PARITY_ERROR) | PPC_BIT(MBACALFIR_SM_1HOT_ERR)), 0); - mca_and_or(id, mca_i, MBACALFIR_ACTION1, + mca_and_or(chip, id, mca_i, MBACALFIR_ACTION1, ~(PPC_BIT(MBACALFIR_MBA_RECOVERABLE_ERROR) | PPC_BIT(MBACALFIR_MBA_NONRECOVERABLE_ERROR) | PPC_BIT(MBACALFIR_RCD_PARITY_ERROR) | PPC_BIT(MBACALFIR_SM_1HOT_ERR)), PPC_BIT(MBACALFIR_MBA_RECOVERABLE_ERROR) | PPC_BIT(MBACALFIR_RCD_PARITY_ERROR)); - mca_and_or(id, mca_i, MBACALFIR_MASK, + mca_and_or(chip, id, mca_i, MBACALFIR_MASK, ~(PPC_BIT(MBACALFIR_MBA_RECOVERABLE_ERROR) | PPC_BIT(MBACALFIR_MBA_NONRECOVERABLE_ERROR) | PPC_BIT(MBACALFIR_RCD_PARITY_ERROR) | @@ -288,13 +289,13 @@ static void fir_unmask(int mcs_i) [60] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_6 = 0 // recoverable_error (0,1,0) [61] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_7 = 0 // recoverable_error (0,1,0) */ - mca_and_or(id, mca_i, IOM_PHY0_DDRPHY_FIR_ACTION0_REG, + mca_and_or(chip, id, mca_i, IOM_PHY0_DDRPHY_FIR_ACTION0_REG, ~(PPC_BITMASK(54, 55) | PPC_BITMASK(57, 61)), 0); - mca_and_or(id, mca_i, IOM_PHY0_DDRPHY_FIR_ACTION1_REG, + mca_and_or(chip, id, mca_i, IOM_PHY0_DDRPHY_FIR_ACTION1_REG, ~(PPC_BITMASK(54, 55) | PPC_BITMASK(57, 61)), PPC_BITMASK(54, 55) | PPC_BITMASK(57, 61)); - mca_and_or(id, mca_i, IOM_PHY0_DDRPHY_FIR_MASK_REG, + mca_and_or(chip, id, mca_i, IOM_PHY0_DDRPHY_FIR_MASK_REG, ~(PPC_BITMASK(54, 55) | PPC_BITMASK(57, 61)), 0); } @@ -307,7 +308,7 @@ static void fir_unmask(int mcs_i) * "I want to break free" - Freddie Mercury */ #define TEST_VREF(dp, scom) \ -if ((dp_mca_read(mcs_ids[mcs_i], dp, mca_i, scom) & PPC_BITMASK(56, 62)) == \ +if ((dp_mca_read(chip, mcs_ids[mcs_i], dp, mca_i, scom) & PPC_BITMASK(56, 62)) == \ PPC_PLACE(1, 56, 7)) { \ need_dll_workaround = true; \ break; \ @@ -327,6 +328,7 @@ void istep_13_9(void) { printk(BIOS_EMERG, "starting istep 13.9\n"); int mcs_i, mca_i, dp; + uint8_t chip = 0; // TODO: support second CPU long time; bool need_dll_workaround; @@ -356,7 +358,7 @@ void istep_13_9(void) /* MC01.PORT0.SRQ.MBA_FARB5Q = [8] MBA_FARB5Q_CFG_FORCE_MCLK_LOW_N = 0 */ - mca_and_or(mcs_ids[mcs_i], mca_i, MBA_FARB5Q, + mca_and_or(chip, mcs_ids[mcs_i], mca_i, MBA_FARB5Q, ~PPC_BIT(MBA_FARB5Q_CFG_FORCE_MCLK_LOW_N), 0); /* Drive all control signals to their inactive/idle state, or @@ -367,9 +369,11 @@ void istep_13_9(void) [48] reserved = 1 // MCA_DDRPHY_DP16_SYSCLK_PR0_P0_0_01_ENABLE */ for (dp = 0; dp < 5; dp++) { - dp_mca_and_or(mcs_ids[mcs_i], dp, mca_i, DDRPHY_DP16_SYSCLK_PR0_P0_0, - 0, PPC_BIT(48)); - dp_mca_and_or(mcs_ids[mcs_i], dp, mca_i, DDRPHY_DP16_SYSCLK_PR1_P0_0, + dp_mca_and_or(chip, mcs_ids[mcs_i], dp, mca_i, + DDRPHY_DP16_SYSCLK_PR0_P0_0, + 0, PPC_BIT(48)); + dp_mca_and_or(chip, mcs_ids[mcs_i], dp, mca_i, + DDRPHY_DP16_SYSCLK_PR1_P0_0, 0, PPC_BIT(48)); } @@ -377,7 +381,7 @@ void istep_13_9(void) MC01.PORT0.SRQ.MBA_CAL0Q = [57] MBA_CAL0Q_RESET_RECOVER = 1 */ - mca_and_or(mcs_ids[mcs_i], mca_i, MBA_CAL0Q, ~0, + mca_and_or(chip, mcs_ids[mcs_i], mca_i, MBA_CAL0Q, ~0, PPC_BIT(MBA_CAL0Q_RESET_RECOVER)); } @@ -393,7 +397,7 @@ void istep_13_9(void) MC01.PORT0.SRQ.MBA_CAL0Q = [57] MBA_CAL0Q_RESET_RECOVER = 0 */ - mca_and_or(mcs_ids[mcs_i], mca_i, MBA_CAL0Q, + mca_and_or(chip, mcs_ids[mcs_i], mca_i, MBA_CAL0Q, ~PPC_BIT(MBA_CAL0Q_RESET_RECOVER), 0); /* Flush output drivers @@ -403,10 +407,10 @@ void istep_13_9(void) [50] INIT_IO = 1 */ /* Has the same stride as DP16 */ - dp_mca_and_or(mcs_ids[mcs_i], 0, mca_i, + dp_mca_and_or(chip, mcs_ids[mcs_i], 0, mca_i, DDRPHY_ADR_OUTPUT_FORCE_ATEST_CNTL_P0_ADR32S0, 0, PPC_BIT(FLUSH) | PPC_BIT(INIT_IO)); - dp_mca_and_or(mcs_ids[mcs_i], 1, mca_i, + dp_mca_and_or(chip, mcs_ids[mcs_i], 1, mca_i, DDRPHY_ADR_OUTPUT_FORCE_ATEST_CNTL_P0_ADR32S0, 0, PPC_BIT(FLUSH) | PPC_BIT(INIT_IO)); @@ -418,7 +422,8 @@ void istep_13_9(void) [58] DELAY_PING_PONG_HALF = 1 */ for (dp = 0; dp < 5; dp++) { - dp_mca_and_or(mcs_ids[mcs_i], 0, mca_i, DDRPHY_DP16_CONFIG0_P0_0, 0, + dp_mca_and_or(chip, mcs_ids[mcs_i], 0, mca_i, + DDRPHY_DP16_CONFIG0_P0_0, 0, PPC_BIT(DP16_CONFIG0_FLUSH) | PPC_BIT(DP16_CONFIG0_INIT_IO) | PPC_BIT(DP16_CONFIG0_ADVANCE_PING_PONG) | @@ -440,9 +445,9 @@ void istep_13_9(void) [50] INIT_IO = 0 */ /* Has the same stride as DP16 */ - dp_mca_and_or(mcs_ids[mcs_i], 0, mca_i, + dp_mca_and_or(chip, mcs_ids[mcs_i], 0, mca_i, DDRPHY_ADR_OUTPUT_FORCE_ATEST_CNTL_P0_ADR32S0, 0, 0); - dp_mca_and_or(mcs_ids[mcs_i], 1, mca_i, + dp_mca_and_or(chip, mcs_ids[mcs_i], 1, mca_i, DDRPHY_ADR_OUTPUT_FORCE_ATEST_CNTL_P0_ADR32S0, 0, 0); /* IOM0.DDRPHY_DP16_CONFIG0_P0_{0,1,2,3,4} = @@ -453,7 +458,8 @@ void istep_13_9(void) [58] DELAY_PING_PONG_HALF = 1 */ for (dp = 0; dp < 5; dp++) { - dp_mca_and_or(mcs_ids[mcs_i], 0, mca_i, DDRPHY_DP16_CONFIG0_P0_0, 0, + dp_mca_and_or(chip, mcs_ids[mcs_i], 0, mca_i, + DDRPHY_DP16_CONFIG0_P0_0, 0, PPC_BIT(DP16_CONFIG0_ADVANCE_PING_PONG) | PPC_BIT(DP16_CONFIG0_DELAY_PING_PONG_HALF)); } @@ -467,14 +473,15 @@ void istep_13_9(void) // Yet another documentation error: all bits in this register are marked as read-only [51] ENABLE_ZCAL = 1 */ - mca_and_or(mcs_ids[mcs_i], 0, DDRPHY_PC_RESETS_P0, ~0, PPC_BIT(ENABLE_ZCAL)); + mca_and_or(chip, mcs_ids[mcs_i], 0, DDRPHY_PC_RESETS_P0, + ~0, PPC_BIT(ENABLE_ZCAL)); /* Maybe it would be better to add another 1us later instead of this. */ delay_nck(1024); /* for each magic MCA */ /* 50*10ns, but we don't have such precision. */ - time = wait_us(1, mca_read(mcs_ids[mcs_i], 0, + time = wait_us(1, mca_read(chip, mcs_ids[mcs_i], 0, DDRPHY_PC_DLL_ZCAL_CAL_STATUS_P0) & PPC_BIT(ZCAL_DONE)); if (!time) @@ -495,18 +502,21 @@ void istep_13_9(void) [48] INIT_RXDLL_CAL_RESET = 0 */ /* Has the same stride as DP16 */ - dp_mca_and_or(mcs_ids[mcs_i], 0, mca_i, DDRPHY_ADR_DLL_CNTL_P0_ADR32S0, + dp_mca_and_or(chip, mcs_ids[mcs_i], 0, mca_i, + DDRPHY_ADR_DLL_CNTL_P0_ADR32S0, ~PPC_BIT(INIT_RXDLL_CAL_RESET), 0); - dp_mca_and_or(mcs_ids[mcs_i], 1, mca_i, DDRPHY_ADR_DLL_CNTL_P0_ADR32S0, + dp_mca_and_or(chip, mcs_ids[mcs_i], 1, mca_i, + DDRPHY_ADR_DLL_CNTL_P0_ADR32S0, ~PPC_BIT(INIT_RXDLL_CAL_RESET), 0); for (dp = 0; dp < 4; dp++) { /* IOM0.DDRPHY_DP16_DLL_CNTL{0,1}_P0_{0,1,2,3} = [48] INIT_RXDLL_CAL_RESET = 0 */ - dp_mca_and_or(mcs_ids[mcs_i], dp, mca_i, DDRPHY_DP16_DLL_CNTL0_P0_0, + dp_mca_and_or(chip, mcs_ids[mcs_i], dp, mca_i, + DDRPHY_DP16_DLL_CNTL0_P0_0, ~PPC_BIT(INIT_RXDLL_CAL_RESET), 0); - dp_mca_and_or(mcs_ids[mcs_i], dp, mca_i, DDRPHY_DP16_DLL_CNTL1_P0_0, + dp_mca_and_or(chip, mcs_ids[mcs_i], dp, mca_i, DDRPHY_DP16_DLL_CNTL1_P0_0, ~PPC_BIT(INIT_RXDLL_CAL_RESET), 0); } /* Last DP16 is different @@ -515,10 +525,12 @@ void istep_13_9(void) IOM0.DDRPHY_DP16_DLL_CNTL1_P0_4 [48] INIT_RXDLL_CAL_RESET = 1 */ - dp_mca_and_or(mcs_ids[mcs_i], dp, mca_i, DDRPHY_DP16_DLL_CNTL0_P0_0, - ~PPC_BIT(INIT_RXDLL_CAL_RESET), 0); - dp_mca_and_or(mcs_ids[mcs_i], dp, mca_i, DDRPHY_DP16_DLL_CNTL1_P0_0, - ~0, PPC_BIT(INIT_RXDLL_CAL_RESET)); + dp_mca_and_or(chip, mcs_ids[mcs_i], dp, mca_i, + DDRPHY_DP16_DLL_CNTL0_P0_0, + ~PPC_BIT(INIT_RXDLL_CAL_RESET), 0); + dp_mca_and_or(chip, mcs_ids[mcs_i], dp, mca_i, + DDRPHY_DP16_DLL_CNTL1_P0_0, + ~0, PPC_BIT(INIT_RXDLL_CAL_RESET)); } /* From Hostboot's comments: @@ -561,7 +573,8 @@ void istep_13_9(void) break; } /* 50*10ns, but we don't have such precision. */ - time = wait_us(1, test_dll_calib_done(mcs_i, mca_i, &need_dll_workaround)); + time = wait_us(1, test_dll_calib_done(chip, mcs_i, mca_i, + &need_dll_workaround)); if (!time) die("DLL calibration timeout\n"); @@ -590,7 +603,7 @@ void istep_13_9(void) * This is not safe if DLL calibration takes more time for other MCAs, * but this is the way Hostboot does it. */ - test_dll_calib_done(mcs_i, mca_i, &need_dll_workaround); + test_dll_calib_done(chip, mcs_i, mca_i, &need_dll_workaround); /* if (IOM0.DDRPHY_ADR_DLL_VREG_COARSE_P0_ADR32S0 | @@ -626,24 +639,24 @@ void istep_13_9(void) [48-63] 0x8024 // From the DDR PHY workbook */ /* Has the same stride as DP16 */ - dp_mca_and_or(mcs_ids[mcs_i], 0, mca_i, + dp_mca_and_or(chip, mcs_ids[mcs_i], 0, mca_i, ADR_SYSCLK_CNTRL_PR_P0_ADR32S0, ~PPC_BITMASK(48, 63), PPC_PLACE(0x8024, 48, 16)); - dp_mca_and_or(mcs_ids[mcs_i], 1, mca_i, + dp_mca_and_or(chip, mcs_ids[mcs_i], 1, mca_i, ADR_SYSCLK_CNTRL_PR_P0_ADR32S0, ~PPC_BITMASK(48, 63), PPC_PLACE(0x8024, 48, 16)); for (dp = 0; dp < 4; dp++) { - dp_mca_and_or(mcs_ids[mcs_i], dp, mca_i, + dp_mca_and_or(chip, mcs_ids[mcs_i], dp, mca_i, DDRPHY_DP16_SYSCLK_PR0_P0_0, ~PPC_BITMASK(48, 63), PPC_PLACE(0x8024, 48, 16)); - dp_mca_and_or(mcs_ids[mcs_i], dp, mca_i, + dp_mca_and_or(chip, mcs_ids[mcs_i], dp, mca_i, DDRPHY_DP16_SYSCLK_PR1_P0_0, ~PPC_BITMASK(48, 63), PPC_PLACE(0x8024, 48, 16)); } - dp_mca_and_or(mcs_ids[mcs_i], 4, mca_i, - DDRPHY_DP16_SYSCLK_PR0_P0_0, - ~PPC_BITMASK(48, 63), PPC_PLACE(0x8024, 48, 16)); + dp_mca_and_or(chip, mcs_ids[mcs_i], 4, mca_i, + DDRPHY_DP16_SYSCLK_PR0_P0_0, + ~PPC_BITMASK(48, 63), PPC_PLACE(0x8024, 48, 16)); } /* @@ -665,7 +678,7 @@ void istep_13_9(void) * Increasing the timeout helps (maybe that's just luck), but * this probably isn't a proper way to do this. */ - time = wait_ms(1000, test_bb_lock(mcs_i)); + time = wait_ms(1000, test_bb_lock(chip, mcs_i)); if (!time) die("BB lock timeout\n"); @@ -679,7 +692,7 @@ void istep_13_9(void) IOM0.DDRPHY_PC_RESETS_P0 = [49] SYSCLK_RESET = 0 */ - mca_and_or(mcs_ids[mcs_i], mca_i, DDRPHY_PC_RESETS_P0, + mca_and_or(chip, mcs_ids[mcs_i], mca_i, DDRPHY_PC_RESETS_P0, ~PPC_BIT(SYSCLK_RESET), 0); /* Reset the windage registers */ @@ -725,24 +738,24 @@ void istep_13_9(void) [48-63] 0x8020 // From the DDR PHY workbook */ /* Has the same stride as DP16 */ - dp_mca_and_or(mcs_ids[mcs_i], 0, mca_i, + dp_mca_and_or(chip, mcs_ids[mcs_i], 0, mca_i, ADR_SYSCLK_CNTRL_PR_P0_ADR32S0, ~PPC_BITMASK(48, 63), PPC_PLACE(0x8020, 48, 16)); - dp_mca_and_or(mcs_ids[mcs_i], 1, mca_i, + dp_mca_and_or(chip, mcs_ids[mcs_i], 1, mca_i, ADR_SYSCLK_CNTRL_PR_P0_ADR32S0, ~PPC_BITMASK(48, 63), PPC_PLACE(0x8020, 48, 16)); for (dp = 0; dp < 4; dp++) { - dp_mca_and_or(mcs_ids[mcs_i], dp, mca_i, + dp_mca_and_or(chip, mcs_ids[mcs_i], dp, mca_i, DDRPHY_DP16_SYSCLK_PR0_P0_0, ~PPC_BITMASK(48, 63), PPC_PLACE(0x8020, 48, 16)); - dp_mca_and_or(mcs_ids[mcs_i], dp, mca_i, + dp_mca_and_or(chip, mcs_ids[mcs_i], dp, mca_i, DDRPHY_DP16_SYSCLK_PR1_P0_0, ~PPC_BITMASK(48, 63), PPC_PLACE(0x8020, 48, 16)); } - dp_mca_and_or(mcs_ids[mcs_i], 4, mca_i, - DDRPHY_DP16_SYSCLK_PR0_P0_0, - ~PPC_BITMASK(48, 63), PPC_PLACE(0x8020, 48, 16)); + dp_mca_and_or(chip, mcs_ids[mcs_i], 4, mca_i, + DDRPHY_DP16_SYSCLK_PR0_P0_0, + ~PPC_BITMASK(48, 63), PPC_PLACE(0x8020, 48, 16)); } /* Wait at least 32 dphy_nclk clock cycles */ @@ -759,7 +772,7 @@ void istep_13_9(void) /* MC01.PORT0.SRQ.MBA_FARB5Q = [8] MBA_FARB5Q_CFG_FORCE_MCLK_LOW_N = 1 */ - mca_and_or(mcs_ids[mcs_i], mca_i, MBA_FARB5Q, ~0, + mca_and_or(chip, mcs_ids[mcs_i], mca_i, MBA_FARB5Q, ~0, PPC_BIT(MBA_FARB5Q_CFG_FORCE_MCLK_LOW_N)); } @@ -785,8 +798,8 @@ void istep_13_9(void) // mss::adr32s::duty_cycle_distortion_calibration(); /* FIR */ - check_during_phy_reset(mcs_i); - fir_unmask(mcs_i); + check_during_phy_reset(chip, mcs_i); + fir_unmask(chip, mcs_i); } printk(BIOS_EMERG, "ending istep 13.9\n"); diff --git a/src/soc/ibm/power9/istep_14_1.c b/src/soc/ibm/power9/istep_14_1.c index 47236d94d91..cf6b79c98f4 100644 --- a/src/soc/ibm/power9/istep_14_1.c +++ b/src/soc/ibm/power9/istep_14_1.c @@ -9,7 +9,7 @@ #include "istep_13_scom.h" #include "mcbist.h" -static void fir_unmask(int mcs_i) +static void fir_unmask(uint8_t chip, int mcs_i) { chiplet_id_t id = mcs_ids[mcs_i]; int mca_i; @@ -30,7 +30,7 @@ static void fir_unmask(int mcs_i) MC01.PORT0.ECC64.SCOM.RECR [26] MBSECCQ_ENABLE_UE_NOISE_WINDOW = 1 */ - mca_and_or(id, mca_i, RECR, ~0, PPC_BIT(MBSECCQ_ENABLE_UE_NOISE_WINDOW)); + mca_and_or(chip, id, mca_i, RECR, ~0, PPC_BIT(MBSECCQ_ENABLE_UE_NOISE_WINDOW)); /* * Read out the wr_done and rd_tag delays and find min and set the RCD @@ -43,10 +43,10 @@ static void fir_unmask(int mcs_i) * MC01.PORT0.SRQ.MBA_FARB0Q * [48-53] MBA_FARB0Q_CFG_RCD_PROTECTION_TIME */ - val = mca_read(id, mca_i, MBA_DSM0Q); + val = mca_read(chip, id, mca_i, MBA_DSM0Q); val = MIN((val & PPC_BITMASK(24, 29)) >> 29, (val & PPC_BITMASK(36, 41)) >> 41); - mca_and_or(id, mca_i, MBA_FARB0Q, + mca_and_or(chip, id, mca_i, MBA_FARB0Q, ~PPC_BITMASK(48, 53), PPC_PLACE(val, MBA_FARB0Q_CFG_RCD_PROTECTION_TIME, MBA_FARB0Q_CFG_RCD_PROTECTION_TIME_LEN)); @@ -89,15 +89,15 @@ static void fir_unmask(int mcs_i) * [17] FIR_MAINLINE_IUE = 0 // recoverable_error (0,1,0) * [37] MCA_FIR_MAINTENANCE_IUE = 0 // recoverable_error (0,1,0) */ - mca_and_or(id, mca_i, ECC_FIR_ACTION0, + mca_and_or(chip, id, mca_i, ECC_FIR_ACTION0, ~(PPC_BITMASK(13, 17) | PPC_BIT(MCA_FIR_MAINTENANCE_IUE)), 0); - mca_and_or(id, mca_i, ECC_FIR_ACTION1, + mca_and_or(chip, id, mca_i, ECC_FIR_ACTION1, ~(PPC_BITMASK(13, 17) | PPC_BIT(ECC_FIR_MAINTENANCE_AUE) | PPC_BITMASK(36, 37)), (is_dd20 ? 0 : PPC_BIT(FIR_MAINLINE_UE)) | PPC_BIT(FIR_MAINLINE_IUE) | PPC_BIT(MCA_FIR_MAINTENANCE_IUE)); - mca_and_or(id, mca_i, ECC_FIR_MASK, + mca_and_or(chip, id, mca_i, ECC_FIR_MASK, ~(PPC_BITMASK(13, 17) | PPC_BIT(MCA_FIR_MAINTENANCE_IUE)), (is_dd20 ? 0 : PPC_BIT(FIR_MAINLINE_RCD))); @@ -113,13 +113,13 @@ static void fir_unmask(int mcs_i) * MC01.PORT0.SRQ.MBACALFIR_MASK * [13] MBACALFIR_PORT_FAIL = 0 // *recoverable_error (0,1,0) */ - mca_and_or(id, mca_i, MBACALFIR_ACTION0, + mca_and_or(chip, id, mca_i, MBACALFIR_ACTION0, ~PPC_BIT(13), (is_dd20 ? PPC_BIT(MBACALFIR_PORT_FAIL) : 0)); - mca_and_or(id, mca_i, MBACALFIR_ACTION1, + mca_and_or(chip, id, mca_i, MBACALFIR_ACTION1, ~PPC_BIT(MBACALFIR_PORT_FAIL), (is_dd20 ? 0 : PPC_BIT(MBACALFIR_PORT_FAIL))); - mca_and_or(id, mca_i, MBACALFIR_MASK, ~PPC_BIT(MBACALFIR_PORT_FAIL), 0); + mca_and_or(chip, id, mca_i, MBACALFIR_MASK, ~PPC_BIT(MBACALFIR_PORT_FAIL), 0); /* * Enable port fail and RCD recovery @@ -129,13 +129,13 @@ static void fir_unmask(int mcs_i) * [54] MBA_FARB0Q_CFG_DISABLE_RCD_RECOVERY = 0 * [57] MBA_FARB0Q_CFG_PORT_FAIL_DISABLE = 0 */ - mca_and_or(id, mca_i, MBA_FARB0Q, + mca_and_or(chip, id, mca_i, MBA_FARB0Q, ~(PPC_BIT(MBA_FARB0Q_CFG_DISABLE_RCD_RECOVERY) | PPC_BIT(MBA_FARB0Q_CFG_PORT_FAIL_DISABLE)), 0); } } -static void set_fifo_mode(int mcs_i, int fifo) +static void set_fifo_mode(uint8_t chip, int mcs_i, int fifo) { chiplet_id_t id = mcs_ids[mcs_i]; int mca_i; @@ -151,16 +151,16 @@ static void set_fifo_mode(int mcs_i, int fifo) if (!mem_data.mcs[mcs_i].mca[mca_i].functional) continue; - mca_and_or(id, mca_i, MBA_RRQ0Q, ~PPC_BIT(MBA_RRQ0Q_CFG_RRQ_FIFO_MODE), + mca_and_or(chip, id, mca_i, MBA_RRQ0Q, ~PPC_BIT(MBA_RRQ0Q_CFG_RRQ_FIFO_MODE), PPC_PLACE(fifo, MBA_RRQ0Q_CFG_RRQ_FIFO_MODE, MBA_RRQ0Q_CFG_RRQ_FIFO_MODE_LEN)); - mca_and_or(id, mca_i, MBA_WRQ0Q, ~PPC_BIT(MBA_WRQ0Q_CFG_WRQ_FIFO_MODE), + mca_and_or(chip, id, mca_i, MBA_WRQ0Q, ~PPC_BIT(MBA_WRQ0Q_CFG_WRQ_FIFO_MODE), PPC_PLACE(fifo, MBA_WRQ0Q_CFG_WRQ_FIFO_MODE, MBA_WRQ0Q_CFG_WRQ_FIFO_MODE_LEN)); } } -static void load_maint_pattern(int mcs_i, const uint64_t pat[16]) +static void load_maint_pattern(uint8_t chip, int mcs_i, const uint64_t pat[16]) { chiplet_id_t id = mcs_ids[mcs_i]; /* @@ -183,21 +183,21 @@ static void load_maint_pattern(int mcs_i, const uint64_t pat[16]) * [10] AACR_AUTOINC = 1 * [11] AACR_ECCGEN = 1 */ - mca_write(id, mca_i, AACR, + mca_write(chip, id, mca_i, AACR, PPC_PLACE(0x1F0, AACR_ADDRESS, AACR_ADDRESS_LEN) | PPC_BIT(AACR_AUTOINC) | PPC_BIT(AACR_ECCGEN)); for (i = 0; i < 16; i++) { /* MC01.PORT0.ECC64.SCOM.AADR - data */ - mca_write(id, mca_i, AADR, pat[i]); + mca_write(chip, id, mca_i, AADR, pat[i]); /* * Although ECC is generated by hardware, we still have to write to * this register to have address incremented. Comments say that * the data also wouldn't be written to RMW buffer without it. */ /* MC01.PORT0.ECC64.SCOM.AAER - ECC */ - mca_write(id, mca_i, AAER, 0); + mca_write(chip, id, mca_i, AAER, 0); } } } @@ -243,7 +243,7 @@ static const uint64_t patterns[][16] = { * NOTE: Except for setting address ranges, Hostboot repeats all of this for * every subtest, even though most of the registers don't change in between. */ -static void init_mcbist(int mcs_i) +static void init_mcbist(uint8_t chip, int mcs_i) { chiplet_id_t id = mcs_ids[mcs_i]; uint64_t val; @@ -282,7 +282,7 @@ static void init_mcbist(int mcs_i) write_scom_for_chiplet(id, MCBISTFIR, 0); /* Enable FIFO mode */ - set_fifo_mode(mcs_i, 1); + set_fifo_mode(chip, mcs_i, 1); /* * Hostboot clears address maps, but they are not used in maintenance @@ -354,7 +354,7 @@ static void init_mcbist(int mcs_i) * ALTER and the one above for WRITE. ALTER can write 128 different bytes, * while WRITE repeats a sequence of 64B twice. ALTER is ~3-4 times slower. */ - load_maint_pattern(mcs_i, patterns[0]); + load_maint_pattern(chip, mcs_i, patterns[0]); /* * Load the data rotate config and seeds @@ -434,6 +434,7 @@ static void init_mcbist(int mcs_i) void istep_14_1(void) { int mcs_i, mca_i; + uint8_t chip = 0; // TODO: support second CPU printk(BIOS_EMERG, "starting istep 14.1\n"); report_istep(14, 1); @@ -449,7 +450,7 @@ void istep_14_1(void) * to some extent, training done in 13.12 which is TODO. Following the * assumptions made in previous isteps, skip this for now. */ - init_mcbist(mcs_i); + init_mcbist(chip, mcs_i); /* * Add subtests. @@ -532,10 +533,10 @@ void istep_14_1(void) printk(BIOS_ERR, "MCBIST%d took %ld us\n", mcs_i, total_time); /* Unmask mainline FIRs. */ - fir_unmask(mcs_i); + fir_unmask(chip, mcs_i); /* Turn off FIFO mode to improve performance. */ - set_fifo_mode(mcs_i, 0); + set_fifo_mode(chip, mcs_i, 0); } printk(BIOS_EMERG, "ending istep 14.1\n"); diff --git a/src/soc/ibm/power9/istep_14_2.c b/src/soc/ibm/power9/istep_14_2.c index 6a973b7c548..ff94e669fe2 100644 --- a/src/soc/ibm/power9/istep_14_2.c +++ b/src/soc/ibm/power9/istep_14_2.c @@ -15,11 +15,11 @@ #define MCS_MCMODE0_DISABLE_MC_SYNC 27 #define MCS_MCMODE0_DISABLE_MC_PAIR_SYNC 28 -static void thermal_init(void) +static void thermal_init(uint8_t chip) { for (size_t mcs_i = 0; mcs_i < MCS_PER_PROC; ++mcs_i) { for (size_t mca_i = 0; mca_i < MCA_PER_MCS; ++mca_i) { - mca_and_or(mcs_ids[mcs_i], mca_i, MCA_MBA_FARB3Q, + mca_and_or(chip, mcs_ids[mcs_i], mca_i, MCA_MBA_FARB3Q, ~PPC_BITMASK(0, 45), PPC_BIT(10) | PPC_BIT(25) | PPC_BIT(37)); } @@ -52,10 +52,11 @@ static void throttle_sync(void) void istep_14_2(void) { + uint8_t chip = 0; // TODO: support second CPU report_istep(14, 2); printk(BIOS_EMERG, "starting istep 14.2\n"); - thermal_init(); + thermal_init(chip); throttle_sync(); printk(BIOS_EMERG, "ending istep 14.2\n"); diff --git a/src/soc/ibm/power9/romstage.c b/src/soc/ibm/power9/romstage.c index 14619d7e017..6973bb460d7 100644 --- a/src/soc/ibm/power9/romstage.c +++ b/src/soc/ibm/power9/romstage.c @@ -394,7 +394,7 @@ void main(void) report_istep(13, 5); // no-op istep_13_6(chips); report_istep(13, 7); // no-op - istep_13_8(); + istep_13_8(chips); istep_13_9(); istep_13_10(); istep_13_11(); From 2f715eef52368144c063d3a83c14b42d498a5b77 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Sun, 23 Jan 2022 00:30:25 +0200 Subject: [PATCH 138/213] soc/power9/istep_13_9.c: update for second CPU Change-Id: Id097740fa73b7a8209e9598f594caaf61b3cd24f Signed-off-by: Sergii Dmytruk --- src/include/cpu/power/istep_13.h | 2 +- src/soc/ibm/power9/istep_13_9.c | 71 ++++++++++++++++++-------------- src/soc/ibm/power9/romstage.c | 2 +- 3 files changed, 43 insertions(+), 32 deletions(-) diff --git a/src/include/cpu/power/istep_13.h b/src/include/cpu/power/istep_13.h index 5c2a656a573..32eac2f55c1 100644 --- a/src/include/cpu/power/istep_13.h +++ b/src/include/cpu/power/istep_13.h @@ -301,7 +301,7 @@ void istep_13_3(uint8_t chips); void istep_13_4(uint8_t chips); void istep_13_6(uint8_t chips); void istep_13_8(uint8_t chips); // TODO: takes MSS data from 7.4 -void istep_13_9(void); +void istep_13_9(uint8_t chips); void istep_13_10(void); void istep_13_11(void); void istep_13_13(void); diff --git a/src/soc/ibm/power9/istep_13_9.c b/src/soc/ibm/power9/istep_13_9.c index 9facf55f3f4..d2140c393cb 100644 --- a/src/soc/ibm/power9/istep_13_9.c +++ b/src/soc/ibm/power9/istep_13_9.c @@ -1,11 +1,23 @@ /* SPDX-License-Identifier: GPL-2.0-only */ #include +#include #include #include #include "istep_13_scom.h" +/* + * 13.9 mss_ddr_phy_reset: Soft reset of DDR PHY macros + * + * - Lock DDR DLLs + * - Already configured DDR DLL in scaninit + * - Sends Soft DDR Phy reset + * - Kick off internal ZQ Cal + * - Perform any config that wasn't scanned in (TBD) + * - Nothing known here + */ + static int test_dll_calib_done(uint8_t chip, int mcs_i, int mca_i, bool *do_workaround) { chiplet_id_t id = mcs_ids[mcs_i]; @@ -206,21 +218,21 @@ static void fir_unmask(uint8_t chip, int mcs_i) [13] MCBISTFIRQ_SCOM_RECOVERABLE_REG_PE = 0 // recoverable_error (0,1,0) [14] MCBISTFIRQ_SCOM_FATAL_REG_PE = 0 // checkstop (0,0,0) */ - scom_and_or_for_chiplet(id, MCBISTFIRACT0, - ~(PPC_BIT(MCBISTFIRQ_INTERNAL_FSM_ERROR) | - PPC_BIT(MCBISTFIRQ_SCOM_RECOVERABLE_REG_PE) | - PPC_BIT(MCBISTFIRQ_SCOM_FATAL_REG_PE)), - 0); - scom_and_or_for_chiplet(id, MCBISTFIRACT1, - ~(PPC_BIT(MCBISTFIRQ_INTERNAL_FSM_ERROR) | - PPC_BIT(MCBISTFIRQ_SCOM_RECOVERABLE_REG_PE) | - PPC_BIT(MCBISTFIRQ_SCOM_FATAL_REG_PE)), - PPC_BIT(MCBISTFIRQ_SCOM_RECOVERABLE_REG_PE)); - scom_and_or_for_chiplet(id, MCBISTFIRMASK, - ~(PPC_BIT(MCBISTFIRQ_INTERNAL_FSM_ERROR) | - PPC_BIT(MCBISTFIRQ_SCOM_RECOVERABLE_REG_PE) | - PPC_BIT(MCBISTFIRQ_SCOM_FATAL_REG_PE)), - 0); + rscom_and_or_for_chiplet(chip, id, MCBISTFIRACT0, + ~(PPC_BIT(MCBISTFIRQ_INTERNAL_FSM_ERROR) | + PPC_BIT(MCBISTFIRQ_SCOM_RECOVERABLE_REG_PE) | + PPC_BIT(MCBISTFIRQ_SCOM_FATAL_REG_PE)), + 0); + rscom_and_or_for_chiplet(chip, id, MCBISTFIRACT1, + ~(PPC_BIT(MCBISTFIRQ_INTERNAL_FSM_ERROR) | + PPC_BIT(MCBISTFIRQ_SCOM_RECOVERABLE_REG_PE) | + PPC_BIT(MCBISTFIRQ_SCOM_FATAL_REG_PE)), + PPC_BIT(MCBISTFIRQ_SCOM_RECOVERABLE_REG_PE)); + rscom_and_or_for_chiplet(chip, id, MCBISTFIRMASK, + ~(PPC_BIT(MCBISTFIRQ_INTERNAL_FSM_ERROR) | + PPC_BIT(MCBISTFIRQ_SCOM_RECOVERABLE_REG_PE) | + PPC_BIT(MCBISTFIRQ_SCOM_FATAL_REG_PE)), + 0); for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { if (!mem_data.mcs[mcs_i].mca[mca_i].functional) @@ -314,26 +326,12 @@ if ((dp_mca_read(chip, mcs_ids[mcs_i], dp, mca_i, scom) & PPC_BITMASK(56, 62)) = break; \ } -/* - * 13.9 mss_ddr_phy_reset: Soft reset of DDR PHY macros - * - * - Lock DDR DLLs - * - Already configured DDR DLL in scaninit - * - Sends Soft DDR Phy reset - * - Kick off internal ZQ Cal - * - Perform any config that wasn't scanned in (TBD) - * - Nothing known here - */ -void istep_13_9(void) +static void mss_ddr_phy_reset(uint8_t chip) { - printk(BIOS_EMERG, "starting istep 13.9\n"); int mcs_i, mca_i, dp; - uint8_t chip = 0; // TODO: support second CPU long time; bool need_dll_workaround; - report_istep(13, 9); - /* * Most of this istep consists of: * 1. asserting reset bit or starting calibration @@ -801,6 +799,19 @@ void istep_13_9(void) check_during_phy_reset(chip, mcs_i); fir_unmask(chip, mcs_i); } +} + +void istep_13_9(uint8_t chips) +{ + uint8_t chip; + + printk(BIOS_EMERG, "starting istep 13.9\n"); + report_istep(13, 9); + + for (chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) + mss_ddr_phy_reset(chip); + } printk(BIOS_EMERG, "ending istep 13.9\n"); } diff --git a/src/soc/ibm/power9/romstage.c b/src/soc/ibm/power9/romstage.c index 6973bb460d7..35b0b2d0a6e 100644 --- a/src/soc/ibm/power9/romstage.c +++ b/src/soc/ibm/power9/romstage.c @@ -395,7 +395,7 @@ void main(void) istep_13_6(chips); report_istep(13, 7); // no-op istep_13_8(chips); - istep_13_9(); + istep_13_9(chips); istep_13_10(); istep_13_11(); report_istep(13, 12); // optional, not yet implemented From 3b0b68ab4f372dc1d88347e238bcd2a93d022f37 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Sun, 23 Jan 2022 00:34:41 +0200 Subject: [PATCH 139/213] soc/power9/istep_13_10.c: update for second CPU Change-Id: I025edcee50bcbca3906c5a76f051e950865c08f0 Signed-off-by: Sergii Dmytruk --- src/include/cpu/power/istep_13.h | 2 +- src/soc/ibm/power9/istep_13_10.c | 127 ++++++++++++++++++------------- src/soc/ibm/power9/romstage.c | 2 +- 3 files changed, 76 insertions(+), 55 deletions(-) diff --git a/src/include/cpu/power/istep_13.h b/src/include/cpu/power/istep_13.h index 32eac2f55c1..ccd750b3e6b 100644 --- a/src/include/cpu/power/istep_13.h +++ b/src/include/cpu/power/istep_13.h @@ -302,6 +302,6 @@ void istep_13_4(uint8_t chips); void istep_13_6(uint8_t chips); void istep_13_8(uint8_t chips); // TODO: takes MSS data from 7.4 void istep_13_9(uint8_t chips); -void istep_13_10(void); +void istep_13_10(uint8_t chips); void istep_13_11(void); void istep_13_13(void); diff --git a/src/soc/ibm/power9/istep_13_10.c b/src/soc/ibm/power9/istep_13_10.c index 3900ac040bf..aa71c0e35ec 100644 --- a/src/soc/ibm/power9/istep_13_10.c +++ b/src/soc/ibm/power9/istep_13_10.c @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ #include +#include #include #include #include @@ -10,6 +11,28 @@ #define SPD_I2C_BUS 3 +/* + * 13.10 mss_draminit: Dram initialize + * + * a) p9_mss_draminit.C (mcbist) -- Nimbus + * b) p9c_mss_draminit.C (mba) -- Cumulus + * - RCD parity errors are checked before logging other errors - HWP will + * exit with RC + * - De-assert dram reset + * - De-assert bit (Scom) that forces mem clock low - dram clocks start + * - Raise CKE + * - Load RCD Control Words + * - Load MRS - for each dimm pair/ports/rank + * - ODT Values + * - MR0-MR6 + * c) Check for attentions (even if HWP has error) + * - FW + * - Call PRD + * - If finds and error, commit HWP RC as informational + * - Else commit HWP RC as normal + * - Trigger reconfig loop is anything was deconfigured + */ + static void draminit_cke_helper(uint8_t chip, chiplet_id_t id, int mca_i) { /* @@ -27,11 +50,12 @@ static void draminit_cke_helper(uint8_t chip, chiplet_id_t id, int mca_i) ccs_execute(chip, id, mca_i); } -static void rcd_load(mca_data_t *mca, int d) +static void rcd_load(uint8_t chip, mca_data_t *mca, int d) { uint8_t val; rdimm_data_t *dimm = &mca->dimm[d]; uint8_t *spd = dimm->spd; + unsigned int spd_bus = SPD_I2C_BUS + chip * 4; /* Raw card specifications are JEDEC documents MODULE4.20.28.x, where x is A-E */ @@ -50,7 +74,7 @@ static void rcd_load(mca_data_t *mca, int d) * (spd[131] & 0x1F) is 0x02 for C and 0x03 for D, this line tests for both */ val = ((spd[131] & 0x1E) == 0x02) ? 0xC2 : 0x02; - rcd_write_reg(SPD_I2C_BUS, mca->dimm[d].rcd_i2c_addr, F0RC00_01, val); + rcd_write_reg(spd_bus, mca->dimm[d].rcd_i2c_addr, F0RC00_01, val); /* F0RC02 = @@ -64,7 +88,7 @@ static void rcd_load(mca_data_t *mca, int d) val = spd[137] & 0xF0; // F0RC03 if (dimm->density != DENSITY_16Gb || dimm->width != WIDTH_x4) val |= 1; - rcd_write_reg(SPD_I2C_BUS, mca->dimm[d].rcd_i2c_addr, F0RC02_03, val); + rcd_write_reg(spd_bus, mca->dimm[d].rcd_i2c_addr, F0RC02_03, val); /* F0RC04 = @@ -80,7 +104,7 @@ static void rcd_load(mca_data_t *mca, int d) /* First read both nibbles as they are in SPD, then swap pairs of bit fields */ val = (spd[137] & 0x0F) | ((spd[138] & 0x0F) << 4); val = ((val & 0x33) << 2) | ((val & 0xCC) >> 2); - rcd_write_reg(SPD_I2C_BUS, mca->dimm[d].rcd_i2c_addr, F0RC04_05, val); + rcd_write_reg(spd_bus, mca->dimm[d].rcd_i2c_addr, F0RC04_05, val); /* F0RC06 = 0xf // This is a command register, either don't touch it or use NOP (F) @@ -110,7 +134,7 @@ static void rcd_load(mca_data_t *mca, int d) if (dimm->density != DENSITY_16Gb || dimm->width != WIDTH_x4) val |= 8; val |= 0xC0; - rcd_write_reg(SPD_I2C_BUS, mca->dimm[d].rcd_i2c_addr, F0RC08_09, val); + rcd_write_reg(spd_bus, mca->dimm[d].rcd_i2c_addr, F0RC08_09, val); /* F0RC0A = @@ -125,7 +149,7 @@ static void rcd_load(mca_data_t *mca, int d) mem_data.speed == 2133 ? 2 : mem_data.speed == 2400 ? 3 : 4; val |= 0xE0; - rcd_write_reg(SPD_I2C_BUS, mca->dimm[d].rcd_i2c_addr, F0RC0A_0B, val); + rcd_write_reg(spd_bus, mca->dimm[d].rcd_i2c_addr, F0RC0A_0B, val); /* F0RC0C = 0 // Normal operating mode @@ -140,14 +164,14 @@ static void rcd_load(mca_data_t *mca, int d) val = 0x40; if (spd[136]) val |= 0x80; - rcd_write_reg(SPD_I2C_BUS, mca->dimm[d].rcd_i2c_addr, F0RC0C_0D, val); + rcd_write_reg(spd_bus, mca->dimm[d].rcd_i2c_addr, F0RC0C_0D, val); /* F0RC0E = 0xd // Parity enable, ALERT_n assertion and re-enable F0RC0F = 0 // Normal mode */ val = 0x0D; - rcd_write_reg(SPD_I2C_BUS, mca->dimm[d].rcd_i2c_addr, F0RC0E_0F, val); + rcd_write_reg(spd_bus, mca->dimm[d].rcd_i2c_addr, F0RC0E_0F, val); /* F0RC1x = 0 // Normal mode, VDD/2 @@ -164,7 +188,7 @@ static void rcd_load(mca_data_t *mca, int d) val = mem_data.speed == 1866 ? 0x1F : mem_data.speed == 2133 ? 0x2C : mem_data.speed == 2400 ? 0x39 : 0x47; - rcd_write_reg(SPD_I2C_BUS, mca->dimm[d].rcd_i2c_addr, F0RC3x, val); + rcd_write_reg(spd_bus, mca->dimm[d].rcd_i2c_addr, F0RC3x, val); /* F0RC4x = 0 // Should not be touched at all, it is used to access different function spaces @@ -189,7 +213,7 @@ static void rcd_load(mca_data_t *mca, int d) val = (dimm->mranks == dimm->log_ranks) ? 7 : (dimm->log_ranks / dimm->mranks) == 2 ? 6 : (dimm->log_ranks / dimm->mranks) == 4 ? 4 : 0; - rcd_write_reg(SPD_I2C_BUS, mca->dimm[d].rcd_i2c_addr, F0RCBx, val); + rcd_write_reg(spd_bus, mca->dimm[d].rcd_i2c_addr, F0RCBx, val); /* * After all RCWs are set, DRAM gets reset "to ensure it is reset properly". @@ -206,10 +230,10 @@ static void rcd_load(mca_data_t *mca, int d) delay(8000 memclocks) */ val = 0x2; - rcd_write_reg(SPD_I2C_BUS, mca->dimm[d].rcd_i2c_addr, F0RC06_07, val); + rcd_write_reg(spd_bus, mca->dimm[d].rcd_i2c_addr, F0RC06_07, val); delay_nck(8000); val = 0x3; - rcd_write_reg(SPD_I2C_BUS, mca->dimm[d].rcd_i2c_addr, F0RC06_07, val); + rcd_write_reg(spd_bus, mca->dimm[d].rcd_i2c_addr, F0RC06_07, val); delay_nck(8000); /* @@ -223,10 +247,10 @@ static void rcd_load(mca_data_t *mca, int d) * register is changed to NOP (was "Clear DRAM Reset" in dump). */ /* - rcd_write_32b(SPD_I2C_BUS, mca->dimm[d].rcd_i2c_addr, F0RC00_01, 0x0201000f); - rcd_write_32b(SPD_I2C_BUS, mca->dimm[d].rcd_i2c_addr, F0RC08_09, 0xcbe4400d); - rcd_write_reg(SPD_I2C_BUS, mca->dimm[d].rcd_i2c_addr, F0RC3x, 0x47); - rcd_write_reg(SPD_I2C_BUS, mca->dimm[d].rcd_i2c_addr, F0RCBx, 0x07); + rcd_write_32b(spd_bus, mca->dimm[d].rcd_i2c_addr, F0RC00_01, 0x0201000f); + rcd_write_32b(spd_bus, mca->dimm[d].rcd_i2c_addr, F0RC08_09, 0xcbe4400d); + rcd_write_reg(spd_bus, mca->dimm[d].rcd_i2c_addr, F0RC3x, 0x47); + rcd_write_reg(spd_bus, mca->dimm[d].rcd_i2c_addr, F0RCBx, 0x07); */ } @@ -376,34 +400,10 @@ static void mrs_load(uint8_t chip, int mcs_i, int mca_i, int d) ccs_execute(chip, id, mca_i); } -/* - * 13.10 mss_draminit: Dram initialize - * - * a) p9_mss_draminit.C (mcbist) -- Nimbus - * b) p9c_mss_draminit.C (mba) -- Cumulus - * - RCD parity errors are checked before logging other errors - HWP will - * exit with RC - * - De-assert dram reset - * - De-assert bit (Scom) that forces mem clock low - dram clocks start - * - Raise CKE - * - Load RCD Control Words - * - Load MRS - for each dimm pair/ports/rank - * - ODT Values - * - MR0-MR6 - * c) Check for attentions (even if HWP has error) - * - FW - * - Call PRD - * - If finds and error, commit HWP RC as informational - * - Else commit HWP RC as normal - * - Trigger reconfig loop is anything was deconfigured - */ -void istep_13_10(void) +static void mss_draminit(uint8_t chip) { - printk(BIOS_EMERG, "starting istep 13.10\n"); + unsigned int spd_bus = SPD_I2C_BUS + chip * 4; int mcs_i, mca_i, dimm; - uint8_t chip = 0; // TODO: support second CPU - - report_istep(13, 10); for (mcs_i = 0; mcs_i < MCS_PER_PROC; mcs_i++) { if (!mem_data.mcs[mcs_i].functional) @@ -422,13 +422,17 @@ void istep_13_10(void) [8-23] CCS_MODEQ_DDR_CAL_TIMEOUT_CNT = 0xffff [30-31] CCS_MODEQ_DDR_CAL_TIMEOUT_CNT_MULT = 3 */ - scom_and_or_for_chiplet(mcs_ids[mcs_i], CCS_MODEQ, - ~(PPC_BIT(CCS_MODEQ_CCS_STOP_ON_ERR) | - PPC_BIT(CCS_MODEQ_CCS_UE_DISABLE)), - PPC_BIT(CCS_MODEQ_CFG_CCS_PARITY_AFTER_CMD) | - PPC_BIT(CCS_MODEQ_COPY_CKE_TO_SPARE_CKE) | - PPC_PLACE(0xFFFF, CCS_MODEQ_DDR_CAL_TIMEOUT_CNT, CCS_MODEQ_DDR_CAL_TIMEOUT_CNT_LEN) | - PPC_PLACE(0x3, CCS_MODEQ_DDR_CAL_TIMEOUT_CNT_MULT, CCS_MODEQ_DDR_CAL_TIMEOUT_CNT_MULT_LEN)); + rscom_and_or_for_chiplet(chip, mcs_ids[mcs_i], CCS_MODEQ, + ~(PPC_BIT(CCS_MODEQ_CCS_STOP_ON_ERR) | + PPC_BIT(CCS_MODEQ_CCS_UE_DISABLE)), + PPC_BIT(CCS_MODEQ_CFG_CCS_PARITY_AFTER_CMD) | + PPC_BIT(CCS_MODEQ_COPY_CKE_TO_SPARE_CKE) | + PPC_PLACE(0xFFFF, + CCS_MODEQ_DDR_CAL_TIMEOUT_CNT, + CCS_MODEQ_DDR_CAL_TIMEOUT_CNT_LEN) | + PPC_PLACE(0x3, + CCS_MODEQ_DDR_CAL_TIMEOUT_CNT_MULT, + CCS_MODEQ_DDR_CAL_TIMEOUT_CNT_MULT_LEN)); for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; @@ -450,8 +454,12 @@ void istep_13_10(void) ~PPC_BIT(MBA_FARB5Q_CFG_CCS_INST_RESET_ENABLE), PPC_BIT(MBA_FARB5Q_CFG_CCS_ADDR_MUX_SEL)); mca_and_or(chip, mcs_ids[mcs_i], mca_i, MBA_FARB5Q, ~PPC_BITMASK(0, 3), - PPC_PLACE(0x1, MBA_FARB5Q_CFG_DDR_DPHY_NCLK, MBA_FARB5Q_CFG_DDR_DPHY_NCLK_LEN) | - PPC_PLACE(0x2, MBA_FARB5Q_CFG_DDR_DPHY_PCLK, MBA_FARB5Q_CFG_DDR_DPHY_PCLK_LEN)); + PPC_PLACE(0x1, + MBA_FARB5Q_CFG_DDR_DPHY_NCLK, + MBA_FARB5Q_CFG_DDR_DPHY_NCLK_LEN) | + PPC_PLACE(0x2, + MBA_FARB5Q_CFG_DDR_DPHY_PCLK, + MBA_FARB5Q_CFG_DDR_DPHY_PCLK_LEN)); mca_and_or(chip, mcs_ids[mcs_i], mca_i, MBA_FARB5Q, ~0, PPC_BIT(MBA_FARB5Q_CFG_DDR_RESETN)); @@ -511,13 +519,26 @@ void istep_13_10(void) if (!mca->dimm[dimm].present) continue; - rcd_load(mca, dimm); + rcd_load(chip, mca, dimm); // bcw_load(); /* LRDIMM only */ mrs_load(chip, mcs_i, mca_i, dimm); - dump_rcd(SPD_I2C_BUS, mca->dimm[dimm].rcd_i2c_addr); + dump_rcd(spd_bus, mca->dimm[dimm].rcd_i2c_addr); } } } +} + +void istep_13_10(uint8_t chips) +{ + uint8_t chip; + + printk(BIOS_EMERG, "starting istep 13.10\n"); + report_istep(13, 10); + + for (chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) + mss_draminit(chip); + } printk(BIOS_EMERG, "ending istep 13.10\n"); } diff --git a/src/soc/ibm/power9/romstage.c b/src/soc/ibm/power9/romstage.c index 35b0b2d0a6e..75d6c38f9b5 100644 --- a/src/soc/ibm/power9/romstage.c +++ b/src/soc/ibm/power9/romstage.c @@ -396,7 +396,7 @@ void main(void) report_istep(13, 7); // no-op istep_13_8(chips); istep_13_9(chips); - istep_13_10(); + istep_13_10(chips); istep_13_11(); report_istep(13, 12); // optional, not yet implemented istep_13_13(); From 148b02084c09f232c1fa81e2812033f03700d05a Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Sun, 23 Jan 2022 00:38:38 +0200 Subject: [PATCH 140/213] soc/power9/istep_13_11.c: update for second CPU Change-Id: I7e8a29a24bd34775bc852feeb6d4c396eb221b62 Signed-off-by: Sergii Dmytruk --- src/include/cpu/power/istep_13.h | 2 +- src/soc/ibm/power9/istep_13_11.c | 81 ++++++++++++++++++-------------- src/soc/ibm/power9/romstage.c | 2 +- 3 files changed, 48 insertions(+), 37 deletions(-) diff --git a/src/include/cpu/power/istep_13.h b/src/include/cpu/power/istep_13.h index ccd750b3e6b..f3e3b860cfc 100644 --- a/src/include/cpu/power/istep_13.h +++ b/src/include/cpu/power/istep_13.h @@ -303,5 +303,5 @@ void istep_13_6(uint8_t chips); void istep_13_8(uint8_t chips); // TODO: takes MSS data from 7.4 void istep_13_9(uint8_t chips); void istep_13_10(uint8_t chips); -void istep_13_11(void); +void istep_13_11(uint8_t chips); void istep_13_13(void); diff --git a/src/soc/ibm/power9/istep_13_11.c b/src/soc/ibm/power9/istep_13_11.c index 5923fd38c79..80b7580914d 100644 --- a/src/soc/ibm/power9/istep_13_11.c +++ b/src/soc/ibm/power9/istep_13_11.c @@ -1,12 +1,28 @@ /* SPDX-License-Identifier: GPL-2.0-only */ #include +#include #include #include #include #include "istep_13_scom.h" +/* + * 13.11 mss_draminit_training: Dram training + * + * a) p9_mss_draminit_training.C (mcbist) -- Nimbus + * b) p9c_mss_draminit_training.C (mba) -- Cumulus + * - Prior to running this procedure will apply known DQ bad bits to prevent + * them from participating in training. This information is extracted from + * the bad DQ attribute and applied to Hardware + * - Marks the calibration fail array + * - External ZQ Calibration + * - Execute initial dram calibration (7 step - handled by HW) + * - This procedure will update the bad DQ attribute for each dimm based on + * its findings + */ + static void setup_and_execute_zqcal(uint8_t chip, int mcs_i, int mca_i, int d) { chiplet_id_t id = mcs_ids[mcs_i]; @@ -112,7 +128,7 @@ static void clear_initial_cal_errors(uint8_t chip, int mcs_i, int mca_i) ~PPC_BIT(IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_2), 0); } -static void dump_cal_errors(int mcs_i, int mca_i) +static void dump_cal_errors(uint8_t chip, int mcs_i, int mca_i) { #if CONFIG(DEBUG_RAM_SETUP) chiplet_id_t id = mcs_ids[mcs_i]; @@ -967,7 +983,7 @@ static void dispatch_step(uint8_t chip, struct phy_step *step, int mcs_i, int mc if (step->post) step->post(chip, mcs_i, mca_i, rp, ranks_present); - dump_cal_errors(mcs_i, mca_i); + dump_cal_errors(chip, mcs_i, mca_i); if (mca_read(chip, mcs_ids[mcs_i], mca_i, DDRPHY_PC_INIT_CAL_ERROR_P0) != 0) die("%s failed, aborting\n", step->name); @@ -1011,15 +1027,15 @@ static int process_initial_cal_errors(uint8_t chip, int mcs_i, int mca_i) * calibration engine itself. Check for latter. */ /* IOM0.IOM_PHY0_DDRPHY_FIR_REG */ - if (read_scom_for_chiplet(id, IOM_PHY0_DDRPHY_FIR_REG) & + if (read_rscom_for_chiplet(chip, id, IOM_PHY0_DDRPHY_FIR_REG) & PPC_BIT(IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_2)) { /* * "Clear the PHY FIR ERROR 2 bit so we don't keep failing training and * training advance on this port" */ - scom_and_or_for_chiplet(id, IOM_PHY0_DDRPHY_FIR_REG, - ~PPC_BIT(IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_2), - 0); + rscom_and_or_for_chiplet(chip, id, IOM_PHY0_DDRPHY_FIR_REG, + ~PPC_BIT(IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_2), + 0); return 1; } @@ -1147,15 +1163,15 @@ static void fir_unmask(uint8_t chip, int mcs_i) MC01.MCBIST.MBA_SCOMFIR.MCBISTFIRMASK [1] MCBISTFIRQ_COMMAND_ADDRESS_TIMEOUT = 0 //recoverable_error (0,1,0) */ - scom_and_or_for_chiplet(id, MCBISTFIRACT0, - ~PPC_BIT(MCBISTFIRQ_COMMAND_ADDRESS_TIMEOUT), - 0); - scom_and_or_for_chiplet(id, MCBISTFIRACT1, - ~PPC_BIT(MCBISTFIRQ_COMMAND_ADDRESS_TIMEOUT), - PPC_BIT(MCBISTFIRQ_COMMAND_ADDRESS_TIMEOUT)); - scom_and_or_for_chiplet(id, MCBISTFIRMASK, - ~PPC_BIT(MCBISTFIRQ_COMMAND_ADDRESS_TIMEOUT), - 0); + rscom_and_or_for_chiplet(chip, id, MCBISTFIRACT0, + ~PPC_BIT(MCBISTFIRQ_COMMAND_ADDRESS_TIMEOUT), + 0); + rscom_and_or_for_chiplet(chip, id, MCBISTFIRACT1, + ~PPC_BIT(MCBISTFIRQ_COMMAND_ADDRESS_TIMEOUT), + PPC_BIT(MCBISTFIRQ_COMMAND_ADDRESS_TIMEOUT)); + rscom_and_or_for_chiplet(chip, id, MCBISTFIRMASK, + ~PPC_BIT(MCBISTFIRQ_COMMAND_ADDRESS_TIMEOUT), + 0); for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { if (!mem_data.mcs[mcs_i].mca[mca_i].functional) @@ -1221,28 +1237,10 @@ static void fir_unmask(uint8_t chip, int mcs_i) } } -/* - * 13.11 mss_draminit_training: Dram training - * - * a) p9_mss_draminit_training.C (mcbist) -- Nimbus - * b) p9c_mss_draminit_training.C (mba) -- Cumulus - * - Prior to running this procedure will apply known DQ bad bits to prevent - * them from participating in training. This information is extracted from - * the bad DQ attribute and applied to Hardware - * - Marks the calibration fail array - * - External ZQ Calibration - * - Execute initial dram calibration (7 step - handled by HW) - * - This procedure will update the bad DQ attribute for each dimm based on - * its findings - */ -void istep_13_11(void) +static void mss_draminit_training(uint8_t chip) { - printk(BIOS_EMERG, "starting istep 13.11\n"); int mcs_i, mca_i, dimm, rp; enum rank_selection ranks_present; - uint8_t chip = 0; // TODO: support second CPU - - report_istep(13, 11); for (mcs_i = 0; mcs_i < MCS_PER_PROC; mcs_i++) { if (!mem_data.mcs[mcs_i].functional) @@ -1331,7 +1329,7 @@ void istep_13_11(void) if (!(ranks_present & (1 << rp))) continue; - dump_cal_errors(mcs_i, mca_i); + dump_cal_errors(chip, mcs_i, mca_i); for (int i = 0; i < ARRAY_SIZE(steps); i++) dispatch_step(chip, &steps[i], mcs_i, mca_i, rp, @@ -1360,6 +1358,19 @@ void istep_13_11(void) fir_unmask(chip, mcs_i); } +} + +void istep_13_11(uint8_t chips) +{ + uint8_t chip; + + printk(BIOS_EMERG, "starting istep 13.11\n"); + report_istep(13, 11); + + for (chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) + mss_draminit_training(chip); + } printk(BIOS_EMERG, "ending istep 13.11\n"); } diff --git a/src/soc/ibm/power9/romstage.c b/src/soc/ibm/power9/romstage.c index 75d6c38f9b5..497b76f9a34 100644 --- a/src/soc/ibm/power9/romstage.c +++ b/src/soc/ibm/power9/romstage.c @@ -397,7 +397,7 @@ void main(void) istep_13_8(chips); istep_13_9(chips); istep_13_10(chips); - istep_13_11(); + istep_13_11(chips); report_istep(13, 12); // optional, not yet implemented istep_13_13(); From 65c037357f5d4ee129bbc685d3cf14a7f4fe563b Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Sun, 23 Jan 2022 00:41:41 +0200 Subject: [PATCH 141/213] soc/power9/istep_13_13.c: update for second CPU Change-Id: I88c5e144f8390aa59238fedba6c34a7f3e18890e Signed-off-by: Sergii Dmytruk --- src/include/cpu/power/istep_13.h | 2 +- src/soc/ibm/power9/istep_13_13.c | 83 ++++++++++++++++++-------------- src/soc/ibm/power9/romstage.c | 2 +- 3 files changed, 49 insertions(+), 38 deletions(-) diff --git a/src/include/cpu/power/istep_13.h b/src/include/cpu/power/istep_13.h index f3e3b860cfc..6d518fbbd5d 100644 --- a/src/include/cpu/power/istep_13.h +++ b/src/include/cpu/power/istep_13.h @@ -304,4 +304,4 @@ void istep_13_8(uint8_t chips); // TODO: takes MSS data from 7.4 void istep_13_9(uint8_t chips); void istep_13_10(uint8_t chips); void istep_13_11(uint8_t chips); -void istep_13_13(void); +void istep_13_13(uint8_t chips); diff --git a/src/soc/ibm/power9/istep_13_13.c b/src/soc/ibm/power9/istep_13_13.c index c5382ffe0af..fbbba49834e 100644 --- a/src/soc/ibm/power9/istep_13_13.c +++ b/src/soc/ibm/power9/istep_13_13.c @@ -1,10 +1,23 @@ /* SPDX-License-Identifier: GPL-2.0-only */ #include +#include #include #include "istep_13_scom.h" +/* + * 13.13 mss_draminit_mc: Hand off control to MC + * + * a) p9_mss_draminit_mc.C (mcbist) - Nimbus + * b) p9c_mss_draminit_mc.C (membuf) -Cumulus + * - P9 Cumulus -- Set IML complete bit in centaur + * - Start main refresh engine + * - Refresh, periodic calibration, power controls + * - Turn on ECC checking on memory accesses + * - Note at this point memory FIRs can be monitored by PRD + */ + /* * Set up the MC port <-> DIMM address translation registers. * @@ -343,7 +356,7 @@ static const uint64_t xlt_tables[][3] = { /* TODO: 3DS */ }; -static void setup_xlate_map(int mcs_i, int mca_i) +static void setup_xlate_map(uint8_t chip, int mcs_i, int mca_i) { chiplet_id_t id = mcs_ids[mcs_i]; mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; @@ -391,16 +404,16 @@ static void setup_xlate_map(int mcs_i, int mca_i) } /* MCS_PORT02_MCP0XLT0 (?) */ - write_scom_for_chiplet(nest, 0x05010820 + mca_i * mca_mul, - dimms_rank_config(mca, xlt_tables[cfg][0], update_d)); + write_rscom_for_chiplet(chip, nest, 0x05010820 + mca_i * mca_mul, + dimms_rank_config(mca, xlt_tables[cfg][0], update_d)); /* MCS_PORT02_MCP0XLT1 (?) */ - write_scom_for_chiplet(nest, 0x05010821 + mca_i * mca_mul, - xlt_tables[cfg][1]); + write_rscom_for_chiplet(chip, nest, 0x05010821 + mca_i * mca_mul, + xlt_tables[cfg][1]); /* MCS_PORT02_MCP0XLT2 (?) */ - write_scom_for_chiplet(nest, 0x05010822 + mca_i * mca_mul, - xlt_tables[cfg][2]); + write_rscom_for_chiplet(chip, nest, 0x05010822 + mca_i * mca_mul, + xlt_tables[cfg][2]); } @@ -478,18 +491,18 @@ static void fir_unmask(uint8_t chip, int mcs_i) * TODO: check if this works with bootblock in SEEPROM too. We don't have * interrupt handlers set up in that case. */ - scom_and_or_for_chiplet(id, MCBISTFIRACT0, - ~(PPC_BIT(MCBISTFIRQ_MCBIST_BRODCAST_OUT_OF_SYNC) | - PPC_BIT(MCBISTFIRQ_MCBIST_PROGRAM_COMPLETE)), - PPC_BIT(MCBISTFIRQ_MCBIST_PROGRAM_COMPLETE)); - scom_and_or_for_chiplet(id, MCBISTFIRACT1, - ~(PPC_BIT(MCBISTFIRQ_MCBIST_BRODCAST_OUT_OF_SYNC) | - PPC_BIT(MCBISTFIRQ_MCBIST_PROGRAM_COMPLETE)), - PPC_BIT(MCBISTFIRQ_MCBIST_BRODCAST_OUT_OF_SYNC)); - scom_and_or_for_chiplet(id, MCBISTFIRMASK, - ~(PPC_BIT(MCBISTFIRQ_MCBIST_BRODCAST_OUT_OF_SYNC) | - PPC_BIT(MCBISTFIRQ_MCBIST_PROGRAM_COMPLETE)), - 0); + rscom_and_or_for_chiplet(chip, id, MCBISTFIRACT0, + ~(PPC_BIT(MCBISTFIRQ_MCBIST_BRODCAST_OUT_OF_SYNC) | + PPC_BIT(MCBISTFIRQ_MCBIST_PROGRAM_COMPLETE)), + PPC_BIT(MCBISTFIRQ_MCBIST_PROGRAM_COMPLETE)); + rscom_and_or_for_chiplet(chip, id, MCBISTFIRACT1, + ~(PPC_BIT(MCBISTFIRQ_MCBIST_BRODCAST_OUT_OF_SYNC) | + PPC_BIT(MCBISTFIRQ_MCBIST_PROGRAM_COMPLETE)), + PPC_BIT(MCBISTFIRQ_MCBIST_BRODCAST_OUT_OF_SYNC)); + rscom_and_or_for_chiplet(chip, id, MCBISTFIRMASK, + ~(PPC_BIT(MCBISTFIRQ_MCBIST_BRODCAST_OUT_OF_SYNC) | + PPC_BIT(MCBISTFIRQ_MCBIST_PROGRAM_COMPLETE)), + 0); for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { if (!mem_data.mcs[mcs_i].mca[mca_i].functional) @@ -590,24 +603,9 @@ static void fir_unmask(uint8_t chip, int mcs_i) } } -/* - * 13.13 mss_draminit_mc: Hand off control to MC - * - * a) p9_mss_draminit_mc.C (mcbist) - Nimbus - * b) p9c_mss_draminit_mc.C (membuf) -Cumulus - * - P9 Cumulus -- Set IML complete bit in centaur - * - Start main refresh engine - * - Refresh, periodic calibration, power controls - * - Turn on ECC checking on memory accesses - * - Note at this point memory FIRs can be monitored by PRD - */ -void istep_13_13(void) +static void mss_draminit_mc(uint8_t chip) { - printk(BIOS_EMERG, "starting istep 13.13\n"); int mcs_i, mca_i; - uint8_t chip = 0; // TODO: support second CPU - - report_istep(13, 13); for (mcs_i = 0; mcs_i < MCS_PER_PROC; mcs_i++) { /* No need to initialize a non-functional MCS */ @@ -621,7 +619,7 @@ void istep_13_13(void) if (!mca->functional) continue; - setup_xlate_map(mcs_i, mca_i); + setup_xlate_map(chip, mcs_i, mca_i); /* Set up read pointer delay */ /* MC01.PORT0.ECC64.SCOM.RECR @@ -716,6 +714,19 @@ void istep_13_13(void) fir_unmask(chip, mcs_i); } +} + +void istep_13_13(uint8_t chips) +{ + uint8_t chip; + + printk(BIOS_EMERG, "starting istep 13.13\n"); + report_istep(13, 13); + + for (chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) + mss_draminit_mc(chip); + } printk(BIOS_EMERG, "ending istep 13.13\n"); } diff --git a/src/soc/ibm/power9/romstage.c b/src/soc/ibm/power9/romstage.c index 497b76f9a34..5c72e1bbe6b 100644 --- a/src/soc/ibm/power9/romstage.c +++ b/src/soc/ibm/power9/romstage.c @@ -399,7 +399,7 @@ void main(void) istep_13_10(chips); istep_13_11(chips); report_istep(13, 12); // optional, not yet implemented - istep_13_13(); + istep_13_13(chips); istep_14_1(); istep_14_2(); From 46b50d9937d657051511c5ba5904cedc92169e2f Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Sun, 23 Jan 2022 00:46:04 +0200 Subject: [PATCH 142/213] soc/power9/istep_14_1.c: update for second CPU Change-Id: I558939dbc05fa3103310799f7a0bf89393085e6e Signed-off-by: Sergii Dmytruk --- src/include/cpu/power/istep_14.h | 2 +- src/soc/ibm/power9/istep_14_1.c | 128 +++++++++++++++++-------------- src/soc/ibm/power9/romstage.c | 2 +- 3 files changed, 72 insertions(+), 60 deletions(-) diff --git a/src/include/cpu/power/istep_14.h b/src/include/cpu/power/istep_14.h index e7e9804c075..e982d5c32c0 100644 --- a/src/include/cpu/power/istep_14.h +++ b/src/include/cpu/power/istep_14.h @@ -7,7 +7,7 @@ struct pci_info; -void istep_14_1(void); +void istep_14_1(uint8_t chips); void istep_14_2(void); void istep_14_3(uint8_t chips, const struct pci_info *pci_info); void istep_14_5(void); diff --git a/src/soc/ibm/power9/istep_14_1.c b/src/soc/ibm/power9/istep_14_1.c index cf6b79c98f4..fa3a74c3288 100644 --- a/src/soc/ibm/power9/istep_14_1.c +++ b/src/soc/ibm/power9/istep_14_1.c @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -9,6 +10,34 @@ #include "istep_13_scom.h" #include "mcbist.h" +/* + * 14.1 mss_memdiag: Mainstore Pattern Testing + * + * - The following step documents the generalities of this step + * - In FW PRD will control mem diags via interrupts. It doesn't use + * mss_memdiags.C directly but the HWP subroutines + * - In cronus it will execute mss_memdiags.C directly + * b) p9_mss_memdiags.C (mcbist)--Nimbus + * c) p9_mss_memdiags.C (mba) -- Cumulus + * - Prior to running this procedure will apply known DQ bad bits to prevent + * them from participating in training. This information is extracted from + * the bad DQ attribute and applied to Hardware + * - Nimbus uses the mcbist engine + * - Still supports superfast read/init/scrub + * - Cumulus/Centaur uses the scrub engine + * - Modes: + * - Minimal: Write-only with 0's + * - Standard: Write of 0's followed by a Read + * - Medium: Write-followed by Read, 4 patterns, last of 0's + * - Max: Write-followed by Read, 9 patterns, last of 0's + * - Run on the host + * - This procedure will update the bad DQ attribute for each dimm based on + * its findings + * - At the end of this procedure sets FIR masks correctly for runtime + * analysis + * - All subsequent repairs are considered runtime issues + */ + static void fir_unmask(uint8_t chip, int mcs_i) { chiplet_id_t id = mcs_ids[mcs_i]; @@ -18,8 +47,8 @@ static void fir_unmask(uint8_t chip, int mcs_i) MC01.MCBIST.MBA_SCOMFIR.MCBISTFIRACT1 [3] MCBISTFIRQ_MCBIST_BRODCAST_OUT_OF_SYNC = 0 // checkstop (0,0,0) */ - scom_and_or_for_chiplet(id, MCBISTFIRACT1, - ~PPC_BIT(MCBISTFIRQ_MCBIST_BRODCAST_OUT_OF_SYNC), 0); + rscom_and_or_for_chiplet(chip, id, MCBISTFIRACT1, + ~PPC_BIT(MCBISTFIRQ_MCBIST_BRODCAST_OUT_OF_SYNC), 0); for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { uint64_t val; @@ -253,11 +282,11 @@ static void init_mcbist(uint8_t chip, int mcs_i) /* MC01.MCBIST.MBA_SCOMFIR.MCBSA0Q * [0-37] MCBSA0Q_CFG_START_ADDR_0 */ - write_scom_for_chiplet(id, MCBSA0Q, 0); + write_rscom_for_chiplet(chip, id, MCBSA0Q, 0); /* MC01.MCBIST.MBA_SCOMFIR.MCBEA0Q * [0-37] MCBSA0Q_CFG_END_ADDR_0 */ - write_scom_for_chiplet(id, MCBEA0Q, PPC_BITMASK(3, 37)); + write_rscom_for_chiplet(chip, id, MCBEA0Q, PPC_BITMASK(3, 37)); /* Hostboot stops MCBIST engine, die() if it is already started instead */ /* TODO: check all bits (MCBIST was ever started) or just "in progress"? */ @@ -266,7 +295,7 @@ static void init_mcbist(uint8_t chip, int mcs_i) * [1] MCB_CNTLSTATQ_MCB_DONE * [2] MCB_CNTLSTATQ_MCB_FAIL */ - if ((val = read_scom_for_chiplet(id, MCB_CNTLSTATQ)) != 0) + if ((val = read_rscom_for_chiplet(chip, id, MCB_CNTLSTATQ)) != 0) die("MCBIST started already (%#16.16llx), this shouldn't happen\n", val); /* @@ -276,10 +305,10 @@ static void init_mcbist(uint8_t chip, int mcs_i) * - MBS Memory Scrub/Read Error Count Register 1 - MC01.MCBIST.MBA_SCOMFIR.MBSEC1Q * - MCBIST Fault Isolation Register - MC01.MCBIST.MBA_SCOMFIR.MCBISTFIRQ */ - write_scom_for_chiplet(id, MCBSTATQ, 0); - write_scom_for_chiplet(id, MBSEC0Q, 0); - write_scom_for_chiplet(id, MBSEC1Q, 0); - write_scom_for_chiplet(id, MCBISTFIR, 0); + write_rscom_for_chiplet(chip, id, MCBSTATQ, 0); + write_rscom_for_chiplet(chip, id, MBSEC0Q, 0); + write_rscom_for_chiplet(chip, id, MBSEC1Q, 0); + write_rscom_for_chiplet(chip, id, MCBISTFIR, 0); /* Enable FIFO mode */ set_fifo_mode(chip, mcs_i, 1); @@ -297,9 +326,9 @@ static void init_mcbist(uint8_t chip, int mcs_i) * [10] MCBAGRAQ_CFG_MAINT_ADDR_MODE_EN = 1 * [12] MCBAGRAQ_CFG_MAINT_DETECT_SRANK_BOUNDARIES = 1 */ - write_scom_for_chiplet(id, MCBAGRAQ, - PPC_BIT(MCBAGRAQ_CFG_MAINT_ADDR_MODE_EN) | - PPC_BIT(MCBAGRAQ_CFG_MAINT_DETECT_SRANK_BOUNDARIES)); + write_rscom_for_chiplet(chip, id, MCBAGRAQ, + PPC_BIT(MCBAGRAQ_CFG_MAINT_ADDR_MODE_EN) | + PPC_BIT(MCBAGRAQ_CFG_MAINT_DETECT_SRANK_BOUNDARIES)); /* * Configure MCBIST @@ -322,16 +351,16 @@ static void init_mcbist(uint8_t chip, int mcs_i) * [57-58] MCBCFGQ_CFG_PAUSE_ON_ERROR_MODE = 0 for patterns, 0b10 for scrub * [63] MCBCFGQ_CFG_ENABLE_HOST_ATTN = see above */ - write_scom_for_chiplet(id, MCBCFGQ, - PPC_PLACE(0x2, MCBCFGQ_CFG_PAUSE_ON_ERROR_MODE, - MCBCFGQ_CFG_PAUSE_ON_ERROR_MODE_LEN)); + write_rscom_for_chiplet(chip, id, MCBCFGQ, + PPC_PLACE(0x2, MCBCFGQ_CFG_PAUSE_ON_ERROR_MODE, + MCBCFGQ_CFG_PAUSE_ON_ERROR_MODE_LEN)); /* * This sets up memory parameters, mostly gaps between commands. For as fast * as possible, gaps of 0 are configured here. */ /* MC01.MCBIST.MBA_SCOMFIR.MCBPARMQ */ - write_scom_for_chiplet(id, MCBPARMQ, 0); + write_rscom_for_chiplet(chip, id, MCBPARMQ, 0); /* * Steps done from this point should be moved out of this function, they @@ -342,7 +371,7 @@ static void init_mcbist(uint8_t chip, int mcs_i) /* Data pattern: 8 data registers + 1 ECC register */ /* TODO: different patterns can be used */ for (i = 0; i < 9; i++) { - write_scom_for_chiplet(id, MCBFD0Q + i, patterns[0][i]); + write_rscom_for_chiplet(chip, id, MCBFD0Q + i, patterns[0][i]); } /* TODO: random seeds */ @@ -365,9 +394,9 @@ static void init_mcbist(uint8_t chip, int mcs_i) * inverting. */ /* MC01.MCBIST.MBA_SCOMFIR.MCBDRCRQ */ - write_scom_for_chiplet(id, MCBDRCRQ, 0); + write_rscom_for_chiplet(chip, id, MCBDRCRQ, 0); /* MC01.MCBIST.MBA_SCOMFIR.MCBDRSRQ */ - write_scom_for_chiplet(id, MCBDRSRQ, 0); + write_rscom_for_chiplet(chip, id, MCBDRSRQ, 0); /* * The following step may be done just once, as long as the same set of @@ -395,48 +424,18 @@ static void init_mcbist(uint8_t chip, int mcs_i) * [56] MBSTRQ_CFG_NCE_INTER_SYMBOL_COUNT_ENABLE } counts all NCE * [57] MBSTRQ_CFG_NCE_HARD_SYMBOL_COUNT_ENABLE / */ - write_scom_for_chiplet(id, MBSTRQ, PPC_BITMASK(0, 31) | - PPC_BIT(MBSTRQ_CFG_PAUSE_ON_MPE) | - PPC_BIT(MBSTRQ_CFG_PAUSE_ON_UE) | - PPC_BIT(MBSTRQ_CFG_PAUSE_ON_AUE) | - PPC_BIT(MBSTRQ_CFG_NCE_SOFT_SYMBOL_COUNT_ENABLE) | - PPC_BIT(MBSTRQ_CFG_NCE_INTER_SYMBOL_COUNT_ENABLE) | - PPC_BIT(MBSTRQ_CFG_NCE_HARD_SYMBOL_COUNT_ENABLE)); + write_rscom_for_chiplet(chip, id, MBSTRQ, PPC_BITMASK(0, 31) | + PPC_BIT(MBSTRQ_CFG_PAUSE_ON_MPE) | + PPC_BIT(MBSTRQ_CFG_PAUSE_ON_UE) | + PPC_BIT(MBSTRQ_CFG_PAUSE_ON_AUE) | + PPC_BIT(MBSTRQ_CFG_NCE_SOFT_SYMBOL_COUNT_ENABLE) | + PPC_BIT(MBSTRQ_CFG_NCE_INTER_SYMBOL_COUNT_ENABLE) | + PPC_BIT(MBSTRQ_CFG_NCE_HARD_SYMBOL_COUNT_ENABLE)); } -/* - * 14.1 mss_memdiag: Mainstore Pattern Testing - * - * - The following step documents the generalities of this step - * - In FW PRD will control mem diags via interrupts. It doesn't use - * mss_memdiags.C directly but the HWP subroutines - * - In cronus it will execute mss_memdiags.C directly - * b) p9_mss_memdiags.C (mcbist)--Nimbus - * c) p9_mss_memdiags.C (mba) -- Cumulus - * - Prior to running this procedure will apply known DQ bad bits to prevent - * them from participating in training. This information is extracted from - * the bad DQ attribute and applied to Hardware - * - Nimbus uses the mcbist engine - * - Still supports superfast read/init/scrub - * - Cumulus/Centaur uses the scrub engine - * - Modes: - * - Minimal: Write-only with 0's - * - Standard: Write of 0's followed by a Read - * - Medium: Write-followed by Read, 4 patterns, last of 0's - * - Max: Write-followed by Read, 9 patterns, last of 0's - * - Run on the host - * - This procedure will update the bad DQ attribute for each dimm based on - * its findings - * - At the end of this procedure sets FIR masks correctly for runtime - * analysis - * - All subsequent repairs are considered runtime issues - */ -void istep_14_1(void) +static void mss_memdiag(uint8_t chip) { int mcs_i, mca_i; - uint8_t chip = 0; // TODO: support second CPU - printk(BIOS_EMERG, "starting istep 14.1\n"); - report_istep(14, 1); for (mcs_i = 0; mcs_i < MCS_PER_PROC; mcs_i++) { if (!mem_data.mcs[mcs_i].functional) @@ -527,7 +526,7 @@ void istep_14_1(void) /* TODO: dump error/status registers on failure */ if (!time) die("MCBIST%d times out (%#16.16llx)\n", mcs_i, - read_scom_for_chiplet(mcs_ids[mcs_i], MCB_CNTLSTATQ)); + read_rscom_for_chiplet(chip, mcs_ids[mcs_i], MCB_CNTLSTATQ)); total_time += time; printk(BIOS_ERR, "MCBIST%d took %ld us\n", mcs_i, total_time); @@ -538,6 +537,19 @@ void istep_14_1(void) /* Turn off FIFO mode to improve performance. */ set_fifo_mode(chip, mcs_i, 0); } +} + +void istep_14_1(uint8_t chips) +{ + uint8_t chip; + + printk(BIOS_EMERG, "starting istep 14.1\n"); + report_istep(14, 1); + + for (chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) + mss_memdiag(chip); + } printk(BIOS_EMERG, "ending istep 14.1\n"); } diff --git a/src/soc/ibm/power9/romstage.c b/src/soc/ibm/power9/romstage.c index 5c72e1bbe6b..ac53af73026 100644 --- a/src/soc/ibm/power9/romstage.c +++ b/src/soc/ibm/power9/romstage.c @@ -401,7 +401,7 @@ void main(void) report_istep(13, 12); // optional, not yet implemented istep_13_13(chips); - istep_14_1(); + istep_14_1(chips); istep_14_2(); istep_14_3(chips, pci_info); report_istep(14, 4); // no-op From 292b71b0cf744d4a0a7de9f2c59ad8530ccbe0f3 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Sun, 6 Feb 2022 01:24:54 +0200 Subject: [PATCH 143/213] soc/power9/mcbist.c: update for second CPU Change-Id: I3580a3a853a5046d6b630e50958bc7783e5fbf02 Signed-off-by: Sergii Dmytruk --- src/soc/ibm/power9/istep_14_1.c | 8 +++---- src/soc/ibm/power9/mcbist.c | 37 +++++++++++++++++---------------- src/soc/ibm/power9/mcbist.h | 10 +++++---- 3 files changed, 29 insertions(+), 26 deletions(-) diff --git a/src/soc/ibm/power9/istep_14_1.c b/src/soc/ibm/power9/istep_14_1.c index fa3a74c3288..ba7292fad66 100644 --- a/src/soc/ibm/power9/istep_14_1.c +++ b/src/soc/ibm/power9/istep_14_1.c @@ -471,14 +471,14 @@ static void mss_memdiag(uint8_t chip) if (!mca->dimm[dimm].present) continue; - add_fixed_pattern_write(mcs_i, mca_i*2 + dimm); + add_fixed_pattern_write(chip, mcs_i, mca_i*2 + dimm); /* * Hostboot uses separate program for scrub due to different * pausing conditions. Having it in the same program seems to * be working. */ if (!CONFIG(SKIP_INITIAL_ECC_SCRUB)) - add_scrub(mcs_i, mca_i*2 + dimm); + add_scrub(chip, mcs_i, mca_i*2 + dimm); } } @@ -500,7 +500,7 @@ static void mss_memdiag(uint8_t chip) * requirements) * - module family (but we don't support anything but RDIMM anyway) */ - mcbist_execute(mcs_i); + mcbist_execute(chip, mcs_i); } long total_time = 0; @@ -521,7 +521,7 @@ static void mss_memdiag(uint8_t chip) * (mcbist_is_done(0) || mcbist_is_done(1)) instead? Maybe even unmask * FIRs and set FIFO mode off inside mcbist_is_done()? */ - long time = wait_us(1000*1000*60, (udelay(1), mcbist_is_done(mcs_i))); + long time = wait_us(1000*1000*60, (udelay(1), mcbist_is_done(chip, mcs_i))); /* TODO: dump error/status registers on failure */ if (!time) diff --git a/src/soc/ibm/power9/mcbist.c b/src/soc/ibm/power9/mcbist.c index 7350de83b25..406f0714f70 100644 --- a/src/soc/ibm/power9/mcbist.c +++ b/src/soc/ibm/power9/mcbist.c @@ -59,7 +59,7 @@ enum op_type GOTO_SUBTEST_N = 0x7000, }; -static void commit_mcbist_memreg_cache(int mcs_i) +static void commit_mcbist_memreg_cache(uint8_t chip, int mcs_i) { chiplet_id_t id = mcs_ids[mcs_i]; int reg = (tests - 1) / MCBIST_TESTS_PER_REG; @@ -71,15 +71,15 @@ static void commit_mcbist_memreg_cache(int mcs_i) die("Too many MCBIST instructions added\n"); /* MC01.MCBIST.MBA_SCOMFIR.MCBMRQ */ - write_scom_for_chiplet(id, MCBMR0Q + reg, mcbist_memreg_cache); + write_rscom_for_chiplet(chip, id, MCBMR0Q + reg, mcbist_memreg_cache); mcbist_memreg_cache = 0; } -static void add_mcbist_test(int mcs_i, uint16_t test) +static void add_mcbist_test(uint8_t chip, int mcs_i, uint16_t test) { int test_i = tests % MCBIST_TESTS_PER_REG; if (test_i == 0 && tests != 0) - commit_mcbist_memreg_cache(mcs_i); + commit_mcbist_memreg_cache(chip, mcs_i); /* This assumes cache is properly cleared. */ mcbist_memreg_cache |= PPC_PLACE(test, test_i*16, 16); @@ -122,29 +122,29 @@ static void add_mcbist_test(int mcs_i, uint16_t test) * * TL;DR: ECC scrub is read operation with discarded results. */ -void add_scrub(int mcs_i, int port_dimm) +void add_scrub(uint8_t chip, int mcs_i, int port_dimm) { uint16_t test = READ | ECC_MODE | (port_dimm << 9); - add_mcbist_test(mcs_i, test); + add_mcbist_test(chip, mcs_i, test); } -void add_fixed_pattern_write(int mcs_i, int port_dimm) +void add_fixed_pattern_write(uint8_t chip, int mcs_i, int port_dimm) { /* Use ALTER instead of WRITE to use maintenance pattern. ALTER is slow. */ uint16_t test = WRITE | FIXED_DATA_MODE | ECC_MODE | (port_dimm << 9); - add_mcbist_test(mcs_i, test); + add_mcbist_test(chip, mcs_i, test); } /* -static void add_random_pattern_write(int port_dimm) +static void add_random_pattern_write(uint8_t chip, int port_dimm) { uint16_t test = WRITE | RAND_FWD_MAINT | ECC_MODE | (port_dimm << 9); - add_mcbist_test(test); + add_mcbist_test(chip, test); } */ /* TODO: calculate initial delays and timeouts */ -void mcbist_execute(int mcs_i) +void mcbist_execute(uint8_t chip, int mcs_i) { chiplet_id_t id = mcs_ids[mcs_i]; /* This is index of last instruction, not the new one. */ @@ -164,7 +164,8 @@ void mcbist_execute(int mcs_i) /* Check if in progress */ /* TODO: we could force it to stop, but dying will help with debugging */ - if ((val = read_scom_for_chiplet(id, MCB_CNTLSTATQ)) & PPC_BIT(MCB_CNTLSTATQ_MCB_IP)) + if ((val = read_rscom_for_chiplet(chip, id, MCB_CNTLSTATQ)) & + PPC_BIT(MCB_CNTLSTATQ_MCB_IP)) die("MCBIST in progress already (%#16.16llx), this shouldn't happen\n", val); /* @@ -174,22 +175,22 @@ void mcbist_execute(int mcs_i) * named in the documentation. */ mcbist_memreg_cache |= PPC_BIT(13 + test_i*16); - commit_mcbist_memreg_cache(mcs_i); + commit_mcbist_memreg_cache(chip, mcs_i); /* MC01.MCBIST.MBA_SCOMFIR.MCB_CNTLQ * [0] MCB_CNTLQ_MCB_START */ - scom_and_or_for_chiplet(id, MCB_CNTLQ, ~0, PPC_BIT(MCB_CNTLQ_MCB_START)); + rscom_and_or_for_chiplet(chip, id, MCB_CNTLQ, ~0, PPC_BIT(MCB_CNTLQ_MCB_START)); /* Wait for MCBIST to start. Test for IP and DONE, it may finish early. */ - if (((val = read_scom_for_chiplet(id, MCB_CNTLSTATQ)) & + if (((val = read_rscom_for_chiplet(chip, id, MCB_CNTLSTATQ)) & (PPC_BIT(MCB_CNTLSTATQ_MCB_IP) | PPC_BIT(MCB_CNTLSTATQ_MCB_DONE))) == 0) { /* * TODO: how long do we want to wait? Hostboot uses 10*100us polling, * but so far it seems to always be already started on the first read. */ udelay(1); - if (((val = read_scom_for_chiplet(id, MCB_CNTLSTATQ)) & + if (((val = read_rscom_for_chiplet(chip, id, MCB_CNTLSTATQ)) & (PPC_BIT(MCB_CNTLSTATQ_MCB_IP) | PPC_BIT(MCB_CNTLSTATQ_MCB_DONE))) == 0) die("MCBIST failed (%#16.16llx) to start twice\n", val); @@ -205,10 +206,10 @@ void mcbist_execute(int mcs_i) * gets set when MCBIST is paused, while 0x070123DC[0] IP stays on in that case. * This may become a problem for 3DS DIMMs. */ -int mcbist_is_done(int mcs_i) +int mcbist_is_done(uint8_t chip, int mcs_i) { chiplet_id_t id = mcs_ids[mcs_i]; - uint64_t val = val = read_scom_for_chiplet(id, MCB_CNTLSTATQ); + uint64_t val = val = read_rscom_for_chiplet(chip, id, MCB_CNTLSTATQ); /* Still in progress */ if (val & PPC_BIT(MCB_CNTLSTATQ_MCB_IP)) diff --git a/src/soc/ibm/power9/mcbist.h b/src/soc/ibm/power9/mcbist.h index a7d686feb1c..cd10890724b 100644 --- a/src/soc/ibm/power9/mcbist.h +++ b/src/soc/ibm/power9/mcbist.h @@ -3,10 +3,12 @@ #ifndef __SOC_IBM_POWER9_MCBIST_H #define __SOC_IBM_POWER9_MCBIST_H -void add_scrub(int mcs_i, int port_dimm); -void add_fixed_pattern_write(int mcs_i, int port_dimm); +#include -void mcbist_execute(int mcs_i); -int mcbist_is_done(int mcs_i); +void add_scrub(uint8_t chip, int mcs_i, int port_dimm); +void add_fixed_pattern_write(uint8_t chip, int mcs_i, int port_dimm); + +void mcbist_execute(uint8_t chip, int mcs_i); +int mcbist_is_done(uint8_t chip, int mcs_i); #endif /* __SOC_IBM_POWER9_MCBIST_H */ From 4dec923e197e90c6667e1f35ae680cca339ba24f Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Sun, 23 Jan 2022 00:52:59 +0200 Subject: [PATCH 144/213] soc/power9/istep_14_2.c: update for second CPU Also fix naming/formatting while at it. Change-Id: Ie0fa480eb06faafa68d8b7da99fd7337665aab3e Signed-off-by: Sergii Dmytruk --- src/include/cpu/power/istep_14.h | 2 +- src/soc/ibm/power9/istep_14_2.c | 40 ++++++++++++++++++-------------- src/soc/ibm/power9/romstage.c | 2 +- 3 files changed, 25 insertions(+), 19 deletions(-) diff --git a/src/include/cpu/power/istep_14.h b/src/include/cpu/power/istep_14.h index e982d5c32c0..ea3946656c7 100644 --- a/src/include/cpu/power/istep_14.h +++ b/src/include/cpu/power/istep_14.h @@ -8,7 +8,7 @@ struct pci_info; void istep_14_1(uint8_t chips); -void istep_14_2(void); +void istep_14_2(uint8_t chips); void istep_14_3(uint8_t chips, const struct pci_info *pci_info); void istep_14_5(void); diff --git a/src/soc/ibm/power9/istep_14_2.c b/src/soc/ibm/power9/istep_14_2.c index ff94e669fe2..05bc4e322bc 100644 --- a/src/soc/ibm/power9/istep_14_2.c +++ b/src/soc/ibm/power9/istep_14_2.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #define MCS_MCMODE0 0x5010811 @@ -23,41 +24,46 @@ static void thermal_init(uint8_t chip) ~PPC_BITMASK(0, 45), PPC_BIT(10) | PPC_BIT(25) | PPC_BIT(37)); } - scom_and_for_chiplet(mcs_to_nest[mcs_ids[mcs_i]], - MCS_MCMODE0 + 0x80 * mcs_i, - PPC_BIT(21)); + rscom_and_for_chiplet(chip, mcs_to_nest[mcs_ids[mcs_i]], + MCS_MCMODE0 + 0x80 * mcs_i, + PPC_BIT(21)); } } -static void prog_mc_mode0(chiplet_id_t nest_target, size_t index) +static void prog_mc_mode0(uint8_t chip, chiplet_id_t nest_target, size_t index) { uint64_t mask = PPC_BIT(MCS_MCMODE0_DISABLE_MC_SYNC) | PPC_BIT(MCS_MCMODE0_DISABLE_MC_PAIR_SYNC); uint64_t data = PPC_BIT(MCS_MCMODE0_DISABLE_MC_SYNC) | PPC_BIT(MCS_MCMODE0_DISABLE_MC_PAIR_SYNC); - scom_and_or_for_chiplet(nest_target, MCS_MCMODE0 + 0x80 * index, ~mask, - data & mask); + rscom_and_or_for_chiplet(chip, nest_target, MCS_MCMODE0 + 0x80 * index, ~mask, + data & mask); } -static void throttle_sync(void) +static void throttle_sync(uint8_t chip) { for (size_t mcs_i = 0; mcs_i < MCS_PER_PROC; ++mcs_i) - prog_mc_mode0(mcs_to_nest[mcs_ids[mcs_i]], mcs_i); - scom_and_for_chiplet(N3_CHIPLET_ID, MCS_MCSYNC, - ~PPC_BIT(MCS_MCSYNC_SYNC_GO_CH0)); - scom_and_or_for_chiplet(N3_CHIPLET_ID, MCS_MCSYNC, ~PPC_BIT(SUPER_SYNC_BIT), - PPC_BITMASK(0, 16)); - scom_and_for_chiplet(N3_CHIPLET_ID, MCS_MCSYNC, ~PPC_BIT(MBA_REFRESH_SYNC_BIT)); + prog_mc_mode0(chip, mcs_to_nest[mcs_ids[mcs_i]], mcs_i); + rscom_and_for_chiplet(chip, N3_CHIPLET_ID, MCS_MCSYNC, + ~PPC_BIT(MCS_MCSYNC_SYNC_GO_CH0)); + rscom_and_or_for_chiplet(chip, N3_CHIPLET_ID, MCS_MCSYNC, ~PPC_BIT(SUPER_SYNC_BIT), + PPC_BITMASK(0, 16)); + rscom_and_for_chiplet(chip, N3_CHIPLET_ID, MCS_MCSYNC, ~PPC_BIT(MBA_REFRESH_SYNC_BIT)); } -void istep_14_2(void) +void istep_14_2(uint8_t chips) { - uint8_t chip = 0; // TODO: support second CPU + uint8_t chip; + report_istep(14, 2); printk(BIOS_EMERG, "starting istep 14.2\n"); - thermal_init(chip); - throttle_sync(); + for (chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) { + thermal_init(chip); + throttle_sync(chip); + } + } printk(BIOS_EMERG, "ending istep 14.2\n"); } diff --git a/src/soc/ibm/power9/romstage.c b/src/soc/ibm/power9/romstage.c index ac53af73026..f401e2bdc24 100644 --- a/src/soc/ibm/power9/romstage.c +++ b/src/soc/ibm/power9/romstage.c @@ -402,7 +402,7 @@ void main(void) istep_13_13(chips); istep_14_1(chips); - istep_14_2(); + istep_14_2(chips); istep_14_3(chips, pci_info); report_istep(14, 4); // no-op istep_14_5(); From 4be76151bce1c182b3dd9247dbe26a334b05996b Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Sun, 23 Jan 2022 00:55:21 +0200 Subject: [PATCH 145/213] soc/power9/istep_14_5.c: update for second CPU Change-Id: Id68915c454f01a27bae682c2f963a1b77c47866d Signed-off-by: Sergii Dmytruk --- src/include/cpu/power/istep_14.h | 2 +- src/soc/ibm/power9/istep_14_5.c | 160 ++++++++++++++++++------------- src/soc/ibm/power9/romstage.c | 2 +- 3 files changed, 94 insertions(+), 70 deletions(-) diff --git a/src/include/cpu/power/istep_14.h b/src/include/cpu/power/istep_14.h index ea3946656c7..877114f930b 100644 --- a/src/include/cpu/power/istep_14.h +++ b/src/include/cpu/power/istep_14.h @@ -10,6 +10,6 @@ struct pci_info; void istep_14_1(uint8_t chips); void istep_14_2(uint8_t chips); void istep_14_3(uint8_t chips, const struct pci_info *pci_info); -void istep_14_5(void); +void istep_14_5(uint8_t chips); #endif /* CPU_PPC64_ISTEP_14_H */ diff --git a/src/soc/ibm/power9/istep_14_5.c b/src/soc/ibm/power9/istep_14_5.c index f46897acb39..716e84aa109 100644 --- a/src/soc/ibm/power9/istep_14_5.c +++ b/src/soc/ibm/power9/istep_14_5.c @@ -3,9 +3,43 @@ #include #include #include +#include #include "istep_13_scom.h" +/* + * 14.5 proc_setup_bars: Setup Memory BARs + * + * a) p9_mss_setup_bars.C (proc chip) -- Nimbus + * b) p9c_mss_setup_bars.C (proc chip) -- Cumulus + * - Same HWP interface for both Nimbus and Cumulus, input target is + * TARGET_TYPE_PROC_CHIP; HWP is to figure out if target is a Nimbus (MCS) + * or Cumulus (MI) internally. + * - Prior to setting the memory bars on each processor chip, this procedure + * needs to set the centaur security protection bit + * - TCM_CHIP_PROTECTION_EN_DC is SCOM Addr 0x03030000 + * - TCN_CHIP_PROTECTION_EN_DC is SCOM Addr 0x02030000 + * - Both must be set to protect Nest and Mem domains + * - Based on system memory map + * - Each MCS has its mirroring and non mirrored BARs + * - Set the correct checkerboard configs. Note that chip flushes to + * checkerboard + * - need to disable memory bar on slave otherwise base flush values will + * ack all memory accesses + * c) p9_setup_bars.C + * - Sets up Powerbus/MCD, L3 BARs on running core + * - Other cores are setup via winkle images + * - Setup dSMP and PCIe Bars + * - Setup PCIe outbound BARS (doing stores/loads from host core) + * - Addresses that PCIE responds to on powerbus (PCI init 1-7) + * - Informing PCIe of the memory map (inbound) + * - PCI Init 8-15 + * - Set up Powerbus Epsilon settings + * - Code is still running out of L3 cache + * - Use this procedure to setup runtime epsilon values + * - Must be done before memory is viable + */ + /* * Reset memory controller configuration written by SBE. * Close the MCS acker before enabling the real memory bars. @@ -18,7 +52,7 @@ * All register and field names come from code and comments only, except for the * first one. */ -static void revert_mc_hb_dcbz_config(void) +static void revert_mc_hb_dcbz_config(uint8_t chip) { int mcs_i, i; uint64_t val; @@ -32,7 +66,7 @@ static void revert_mc_hb_dcbz_config(void) * Hostboot uses - bit 10 for MCS0/1 and bit 9 for MCS2/3. */ /* TP.TCNx.Nx.CPLT_CTRL1, x = {1,3} */ - val = read_scom_for_chiplet(nest, NEST_CPLT_CTRL1); + val = read_rscom_for_chiplet(chip, nest, NEST_CPLT_CTRL1); if ((mcs_i == 0 && val & PPC_BIT(10)) || (mcs_i == 1 && val & PPC_BIT(9))) continue; @@ -45,9 +79,9 @@ static void revert_mc_hb_dcbz_config(void) [5-7] CHANNEL_0_GROUP_MEMBER_IDENTIFICATION = 0 // CHANNEL_1_GROUP_MEMBER_IDENTIFICATION not cleared? [13-23] GROUP_SIZE = 0 */ - scom_and_or_for_chiplet(nest, 0x0501080A + i * mul, - ~(PPC_BITMASK(0, 7) | PPC_BITMASK(13, 23)), - 0); + rscom_and_or_for_chiplet(chip, nest, 0x0501080A + i * mul, + ~(PPC_BITMASK(0, 7) | PPC_BITMASK(13, 23)), + 0); /* MCMODE1 -- enable speculation, cmd bypass, fp command bypass MCS_n_MCMODE1 // undocumented, 0x05010812, 0x05010892, 0x03010812, 0x03010892 @@ -56,24 +90,24 @@ static void revert_mc_hb_dcbz_config(void) [54-60] DISABLE_COMMAND_BYPASS = 0 [61] DISABLE_FP_COMMAND_BYPASS = 0 */ - scom_and_or_for_chiplet(nest, 0x05010812 + i * mul, - ~(PPC_BITMASK(32, 51) | PPC_BITMASK(54, 61)), - PPC_PLACE(0x40, 33, 19)); + rscom_and_or_for_chiplet(chip, nest, 0x05010812 + i * mul, + ~(PPC_BITMASK(32, 51) | PPC_BITMASK(54, 61)), + PPC_PLACE(0x40, 33, 19)); /* MCS_MCPERF1 -- enable fast path MCS_n_MCPERF1 // undocumented, 0x05010810, 0x05010890, 0x03010810, 0x03010890 [0] DISABLE_FASTPATH = 0 */ - scom_and_or_for_chiplet(nest, 0x05010810 + i * mul, - ~PPC_BIT(0), - 0); + rscom_and_or_for_chiplet(chip, nest, 0x05010810 + i * mul, + ~PPC_BIT(0), + 0); /* Re-mask MCFIR. We want to ensure all MCSs are masked until the * BARs are opened later during IPL. MCS_n_MCFIRMASK_OR // undocumented, 0x05010805, 0x05010885, 0x03010805, 0x03010885 [all] 1 */ - write_scom_for_chiplet(nest, 0x05010805 + i * mul, ~0); + write_rscom_for_chiplet(chip, nest, 0x05010805 + i * mul, ~0); } } } @@ -156,12 +190,23 @@ static void add_group(struct mc_group groups[MCA_PER_PROC], int size, uint8_t ma } /* TODO: make groups with > 1 MCA possible */ -static void fill_groups(void) +static void fill_groups(uint8_t chip) { + /* + * This is for ATTR_PROC_FABRIC_PUMP_MODE == PUMP_MODE_CHIP_IS_GROUP, + * when chip ID is actually a group ID and "chip ID" field is zero. + */ + uint64_t proc_base_addr = PPC_PLACE(0x0, 8, 5) // system ID + | PPC_PLACE(0x0, 13, 2) // msel + | PPC_PLACE(chip, 15, 4) // group ID + | PPC_PLACE(0x0, 19, 3); // chip ID + int mcs_i, mca_i, i; struct mc_group groups[MCA_PER_PROC] = {0}; /* This is in 4GB units, as expected by registers. */ - uint32_t cur_ba = 0; + uint32_t cur_ba = proc_base_addr >> 32; + + memset(mcfgp_regs, 0, sizeof(mcfgp_regs)); for (mcs_i = 0; mcs_i < MCS_PER_PROC; mcs_i++) { if (!mem_data.mcs[mcs_i].functional) @@ -235,7 +280,7 @@ static void fill_groups(void) * discarding the old one. As these registers are not documented, I can't even * tell whether it sets checkstop, recoverable error or something else. */ -static void fir_unmask(int mcs_i) +static void fir_unmask(uint8_t chip, int mcs_i) { chiplet_id_t nest = mcs_to_nest[mcs_ids[mcs_i]]; /* Stride discovered by trial and error due to lack of documentation. */ @@ -246,8 +291,8 @@ static void fir_unmask(int mcs_i) [0] MC_INTERNAL_RECOVERABLE_ERROR = 1 [8] COMMAND_LIST_TIMEOUT = 1 */ - write_scom_for_chiplet(nest, 0x05010807 + mcs_i * mul, - PPC_BIT(0) | PPC_BIT(8)); + write_rscom_for_chiplet(chip, nest, 0x05010807 + mcs_i * mul, + PPC_BIT(0) | PPC_BIT(8)); /* MCS_MCFIRMASK (AND) // undocumented, 0x05010804 [all] 1 @@ -258,60 +303,23 @@ static void fir_unmask(int mcs_i) [5] INVALID_ADDRESS = 0 [8] COMMAND_LIST_TIMEOUT = 0 */ - write_scom_for_chiplet(nest, 0x05010804 + mcs_i * mul, - ~(PPC_BIT(0) | PPC_BIT(1) | PPC_BIT(2) | - PPC_BIT(4) | PPC_BIT(5) | PPC_BIT(8))); + write_rscom_for_chiplet(chip, nest, 0x05010804 + mcs_i * mul, + ~(PPC_BIT(0) | PPC_BIT(1) | PPC_BIT(2) | + PPC_BIT(4) | PPC_BIT(5) | PPC_BIT(8))); } -static void mcd_fir_mask(void) +static void mcd_fir_mask(uint8_t chip) { /* These are set always for N1 chiplet only. */ - write_scom_for_chiplet(N1_CHIPLET_ID, MCD1_FIR_MASK_REG, ~0); - write_scom_for_chiplet(N1_CHIPLET_ID, MCD0_FIR_MASK_REG, ~0); + write_rscom_for_chiplet(chip, N1_CHIPLET_ID, MCD1_FIR_MASK_REG, ~0); + write_rscom_for_chiplet(chip, N1_CHIPLET_ID, MCD0_FIR_MASK_REG, ~0); } -/* - * 14.5 proc_setup_bars: Setup Memory BARs - * - * a) p9_mss_setup_bars.C (proc chip) -- Nimbus - * b) p9c_mss_setup_bars.C (proc chip) -- Cumulus - * - Same HWP interface for both Nimbus and Cumulus, input target is - * TARGET_TYPE_PROC_CHIP; HWP is to figure out if target is a Nimbus (MCS) - * or Cumulus (MI) internally. - * - Prior to setting the memory bars on each processor chip, this procedure - * needs to set the centaur security protection bit - * - TCM_CHIP_PROTECTION_EN_DC is SCOM Addr 0x03030000 - * - TCN_CHIP_PROTECTION_EN_DC is SCOM Addr 0x02030000 - * - Both must be set to protect Nest and Mem domains - * - Based on system memory map - * - Each MCS has its mirroring and non mirrored BARs - * - Set the correct checkerboard configs. Note that chip flushes to - * checkerboard - * - need to disable memory bar on slave otherwise base flush values will - * ack all memory accesses - * c) p9_setup_bars.C - * - Sets up Powerbus/MCD, L3 BARs on running core - * - Other cores are setup via winkle images - * - Setup dSMP and PCIe Bars - * - Setup PCIe outbound BARS (doing stores/loads from host core) - * - Addresses that PCIE responds to on powerbus (PCI init 1-7) - * - Informing PCIe of the memory map (inbound) - * - PCI Init 8-15 - * - Set up Powerbus Epsilon settings - * - Code is still running out of L3 cache - * - Use this procedure to setup runtime epsilon values - * - Must be done before memory is viable - */ -void istep_14_5(void) +static void proc_setup_bars(uint8_t chip) { int mcs_i; - printk(BIOS_EMERG, "starting istep 14.5\n"); - report_istep(14, 5); - - /* Start MCS reset */ - revert_mc_hb_dcbz_config(); - fill_groups(); + fill_groups(chip); for (mcs_i = 0; mcs_i < MCS_PER_PROC; mcs_i++) { chiplet_id_t nest = mcs_to_nest[mcs_ids[mcs_i]]; @@ -319,20 +327,36 @@ void istep_14_5(void) if (!mem_data.mcs[mcs_i].functional) continue; - fir_unmask(mcs_i); + fir_unmask(chip, mcs_i); /* * More undocumented registers. First two are described before * 'mcfgp_regs', last two are for setting up memory hole and SMF, they * are unused now. */ - write_scom_for_chiplet(nest, 0x0501080A, mcfgp_regs[mcs_i][0]); - write_scom_for_chiplet(nest, 0x0501080C, mcfgp_regs[mcs_i][1]); - write_scom_for_chiplet(nest, 0x0501080B, 0); - write_scom_for_chiplet(nest, 0x0501080D, 0); + write_rscom_for_chiplet(chip, nest, 0x0501080A, mcfgp_regs[mcs_i][0]); + write_rscom_for_chiplet(chip, nest, 0x0501080C, mcfgp_regs[mcs_i][1]); + write_rscom_for_chiplet(chip, nest, 0x0501080B, 0); + write_rscom_for_chiplet(chip, nest, 0x0501080D, 0); } - mcd_fir_mask(); + mcd_fir_mask(chip); +} + +void istep_14_5(uint8_t chips) +{ + uint8_t chip; + + printk(BIOS_EMERG, "starting istep 14.5\n"); + report_istep(14, 5); + + /* Start MCS reset */ + revert_mc_hb_dcbz_config(/*chip=*/0); + + for (chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) + proc_setup_bars(chip); + } printk(BIOS_EMERG, "ending istep 14.5\n"); } diff --git a/src/soc/ibm/power9/romstage.c b/src/soc/ibm/power9/romstage.c index f401e2bdc24..e6df4171501 100644 --- a/src/soc/ibm/power9/romstage.c +++ b/src/soc/ibm/power9/romstage.c @@ -405,7 +405,7 @@ void main(void) istep_14_2(chips); istep_14_3(chips, pci_info); report_istep(14, 4); // no-op - istep_14_5(); + istep_14_5(chips); timestamp_add_now(TS_INITRAM_END); From 5a66367b11bfa6cbeef9a5bf789e7eec62c58fc6 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Sun, 23 Jan 2022 18:10:53 +0200 Subject: [PATCH 146/213] soc/power9/i2c.c: add host I2C for second CPU Change-Id: I2d9796e0286501005cef6e777827b9449346d015 Signed-off-by: Sergii Dmytruk --- src/include/cpu/power/proc.h | 4 +++ src/soc/ibm/power9/i2c.c | 56 ++++++++++++++++++-------------- src/soc/ibm/power9/istep_13_10.c | 6 ++-- src/soc/ibm/power9/mvpd.c | 2 +- 4 files changed, 39 insertions(+), 29 deletions(-) diff --git a/src/include/cpu/power/proc.h b/src/include/cpu/power/proc.h index c27c8b0da42..ad6b21c6abc 100644 --- a/src/include/cpu/power/proc.h +++ b/src/include/cpu/power/proc.h @@ -22,6 +22,10 @@ _Static_assert(CONFIG_MAX_CPUS <= MAX_CHIPS, "Too many CPUs requested"); #define DIMMS_PER_MCA 2 #define DIMMS_PER_MCS (DIMMS_PER_MCA * MCA_PER_MCS) +#define I2C_BUSES_PER_CPU 4 +#define SPD_I2C_BUS 3 +#define FSI_I2C_BUS 8 // one bus to the second CPU + /* cores is a 64-bit map of functional cores of a single chip */ #define IS_EC_FUNCTIONAL(ec, cores) (!!((cores) & PPC_BIT(ec))) #define IS_EX_FUNCTIONAL(ex, cores) (!!((cores) & PPC_BITMASK(2 * (ex), 2 * (ex) + 1))) diff --git a/src/soc/ibm/power9/i2c.c b/src/soc/ibm/power9/i2c.c index e0b7c8a7c8e..d053462bbbf 100644 --- a/src/soc/ibm/power9/i2c.c +++ b/src/soc/ibm/power9/i2c.c @@ -5,6 +5,7 @@ #include #include +#include #include #include @@ -39,11 +40,11 @@ #define UNRECOVERABLE 0xFC80000000000000 #define I2C_MAX_FIFO_CAPACITY 8 -#define SPD_I2C_BUS 3 enum i2c_type { - HOST_I2C, // I2C via XSCOM (first CPU) - FSI_I2C, // I2C via FSI (second CPU) + HOST_I2C_CPU0, // I2C via XSCOM (first CPU) + HOST_I2C_CPU1, // I2C via XSCOM (second CPU) + FSI_I2C, // I2C via FSI (second CPU) }; /* return -1 if SMBus errors otherwise return 0 */ @@ -99,63 +100,70 @@ void get_spd_smbus(struct spd_block *blk) static void write_i2c(enum i2c_type type, uint64_t addr, uint64_t data) { - if (type == HOST_I2C) - write_scom(addr, data); + if (type != FSI_I2C) + write_rscom(type == HOST_I2C_CPU0 ? 0 : 1, addr, data); else write_fsi_i2c(/*chip=*/1, addr, data >> 32, /*size=*/4); } static uint64_t read_i2c(enum i2c_type type, uint64_t addr) { - if (type == HOST_I2C) - return read_scom(addr); + if (type != FSI_I2C) + return read_rscom(type == HOST_I2C_CPU0 ? 0 : 1, addr); else return (uint64_t)read_fsi_i2c(/*chip=*/1, addr, /*size=*/4) << 32; } static void write_i2c_byte(enum i2c_type type, uint64_t addr, uint8_t data) { - if (type == HOST_I2C) - write_scom(addr, (uint64_t)data << 56); + if (type != FSI_I2C) + write_rscom(type == HOST_I2C_CPU0 ? 0 : 1, addr, (uint64_t)data << 56); else write_fsi_i2c(/*chip=*/1, addr, (uint32_t)data << 24, /*size=*/1); } static uint8_t read_i2c_byte(enum i2c_type type, uint64_t addr) { - if (type == HOST_I2C) - return read_scom(addr) >> 56; + if (type != FSI_I2C) + return read_rscom(type == HOST_I2C_CPU0 ? 0 : 1, addr) >> 56; else return read_fsi_i2c(/*chip=*/1, addr, /*size=*/1) >> 24; } /* - * There are 4 buses/engines, but the function accepts bus [0-7] in order to - * allow specifying bus of the second CPU while still following coreboot's - * prototype for this function. [0-3] are buses of the first CPU and [4-7] of - * the second one (0-3 correspondingly). However, looks like only one bus is - * available through FSI, because its number is never set. + * There are 4 buses/engines, but the function accepts bus [0-8] in order to + * allow specifying buses of the second CPU and I2C FSI bus while still + * following coreboot's prototype for this function. [0-3] are buses of the + * first CPU, [4-7] of the second one (0-3 correspondingly) and 8 is FSI I2C of + * the second CPU. */ int platform_i2c_transfer(unsigned int bus, struct i2c_msg *segment, int seg_count) { - enum { BUSES_PER_CPU = 4 }; - int i; uint64_t r; - enum i2c_type type = HOST_I2C; - if (bus >= BUSES_PER_CPU) { - bus -= BUSES_PER_CPU; + enum i2c_type type = HOST_I2C_CPU0; + if (bus >= I2C_BUSES_PER_CPU) { + bus -= I2C_BUSES_PER_CPU; + type = HOST_I2C_CPU1; + } + if (bus >= I2C_BUSES_PER_CPU) { + bus -= I2C_BUSES_PER_CPU; type = FSI_I2C; + + /* There seems to be only one engine on FSI I2C */ + if (bus != 0) { + printk(BIOS_ERR, "FSI I2C bus out of range (%d)\n", bus); + return -1; + } } - if (bus >= BUSES_PER_CPU) { + if (bus >= I2C_BUSES_PER_CPU) { printk(BIOS_ERR, "I2C bus out of range (%d)\n", bus); return -1; } - /* There seems to be only one engine on FSI I2C */ uint32_t base = (type == FSI_I2C ? 0 : I2C_HOST_MASTER_BASE_ADDR | (bus << 12)); /* Addition is fine, because there will be no carry in bus number bits */ uint32_t fifo_reg = base + FIFO_REG; @@ -164,7 +172,7 @@ int platform_i2c_transfer(unsigned int bus, struct i2c_msg *segment, uint32_t status_reg = base + STATUS_REG; uint32_t res_err_reg = base + RES_ERR_REG; - uint64_t clear_err = (type == HOST_I2C ? PPC_BIT(0) : 0); + uint64_t clear_err = (type != FSI_I2C ? PPC_BIT(0) : 0); /* * Divisor fields in this register are poorly documented: diff --git a/src/soc/ibm/power9/istep_13_10.c b/src/soc/ibm/power9/istep_13_10.c index aa71c0e35ec..258d45df961 100644 --- a/src/soc/ibm/power9/istep_13_10.c +++ b/src/soc/ibm/power9/istep_13_10.c @@ -9,8 +9,6 @@ #include "istep_13_scom.h" -#define SPD_I2C_BUS 3 - /* * 13.10 mss_draminit: Dram initialize * @@ -55,7 +53,7 @@ static void rcd_load(uint8_t chip, mca_data_t *mca, int d) uint8_t val; rdimm_data_t *dimm = &mca->dimm[d]; uint8_t *spd = dimm->spd; - unsigned int spd_bus = SPD_I2C_BUS + chip * 4; + unsigned int spd_bus = SPD_I2C_BUS + chip * I2C_BUSES_PER_CPU; /* Raw card specifications are JEDEC documents MODULE4.20.28.x, where x is A-E */ @@ -402,7 +400,7 @@ static void mrs_load(uint8_t chip, int mcs_i, int mca_i, int d) static void mss_draminit(uint8_t chip) { - unsigned int spd_bus = SPD_I2C_BUS + chip * 4; + unsigned int spd_bus = SPD_I2C_BUS + chip * I2C_BUSES_PER_CPU; int mcs_i, mca_i, dimm; for (mcs_i = 0; mcs_i < MCS_PER_PROC; mcs_i++) { diff --git a/src/soc/ibm/power9/mvpd.c b/src/soc/ibm/power9/mvpd.c index ae42ad64bda..55b008e56dd 100644 --- a/src/soc/ibm/power9/mvpd.c +++ b/src/soc/ibm/power9/mvpd.c @@ -67,7 +67,7 @@ struct pt_record { on success. */ static int read_eeprom_chip(uint8_t cpu, uint32_t offset, void *data, uint16_t len) { - const unsigned int bus = 1 + 4 * cpu; // four I2C buses per CPU + const unsigned int bus = (cpu == 0 ? 1 : FSI_I2C_BUS); uint16_t addr = 0xA0; uint16_t slave = 0; uint16_t actual_offset = 0; From 70c8d9951f12ba88afcbb0feaf78353e7ec43e1d Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Sun, 23 Jan 2022 18:11:51 +0200 Subject: [PATCH 147/213] soc/power9/i2c.c: introduce get_spd_i2c() get_spd_smbus() prototype is too limiting, it doesn't allow specifying bus number which we need to be able to work with multiple CPUs. Change-Id: I445edc7d3fb02857b437ca0d5fa4596bdb4e607b Signed-off-by: Sergii Dmytruk --- src/include/spd_bin.h | 1 + src/soc/ibm/power9/i2c.c | 19 ++++++++++--------- src/soc/ibm/power9/romstage.c | 2 +- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/src/include/spd_bin.h b/src/include/spd_bin.h index 973eb498a7b..5133cff99bf 100644 --- a/src/include/spd_bin.h +++ b/src/include/spd_bin.h @@ -46,6 +46,7 @@ void print_spd_info(uint8_t spd[]); uintptr_t spd_cbfs_map(u8 spd_index); void dump_spd_info(struct spd_block *blk); void get_spd_smbus(struct spd_block *blk); +void get_spd_i2c(uint8_t bus, struct spd_block *blk); /* * get_spd_sn returns the SODIMM serial number. It only supports DDR3 and DDR4. diff --git a/src/soc/ibm/power9/i2c.c b/src/soc/ibm/power9/i2c.c index d053462bbbf..5c451c0459e 100644 --- a/src/soc/ibm/power9/i2c.c +++ b/src/soc/ibm/power9/i2c.c @@ -48,7 +48,7 @@ enum i2c_type { }; /* return -1 if SMBus errors otherwise return 0 */ -static int get_spd(u8 *spd, u8 addr) +static int get_spd(uint8_t bus, u8 *spd, u8 addr) { /* * Second half of DIMMs is on the second I2C port. platform_i2c_transfer() @@ -57,27 +57,28 @@ static int get_spd(u8 *spd, u8 addr) */ uint8_t fix = addr & 0x80; - if (i2c_read_bytes(SPD_I2C_BUS, addr, 0, spd, SPD_PAGE_LEN) < 0) { + if (i2c_read_bytes(bus, addr, 0, spd, SPD_PAGE_LEN) < 0) { printk(BIOS_INFO, "No memory DIMM at address %02X\n", addr); return -1; } /* DDR4 spd is 512 byte. Switch to page 1 */ - i2c_writeb(SPD_I2C_BUS, SPD_PAGE_1 | fix, 0, 0); + i2c_writeb(bus, SPD_PAGE_1 | fix, 0, 0); /* No need to check again if DIMM is present */ - i2c_read_bytes(SPD_I2C_BUS, addr, 0, spd + SPD_PAGE_LEN, SPD_PAGE_LEN); + i2c_read_bytes(bus, addr, 0, spd + SPD_PAGE_LEN, SPD_PAGE_LEN); /* Restore to page 0 */ - i2c_writeb(SPD_I2C_BUS, SPD_PAGE_0 | fix, 0, 0); + i2c_writeb(bus, SPD_PAGE_0 | fix, 0, 0); return 0; } -static u8 spd_data[CONFIG_DIMM_MAX * CONFIG_DIMM_SPD_SIZE]; +static u8 spd_data[MAX_CHIPS][CONFIG_DIMM_MAX][CONFIG_DIMM_SPD_SIZE]; -void get_spd_smbus(struct spd_block *blk) +void get_spd_i2c(uint8_t bus, struct spd_block *blk) { u8 i; + u8 chip = bus / I2C_BUSES_PER_CPU; for (i = 0 ; i < CONFIG_DIMM_MAX; i++) { if (blk->addr_map[i] == 0) { @@ -85,8 +86,8 @@ void get_spd_smbus(struct spd_block *blk) continue; } - if (get_spd(&spd_data[i * CONFIG_DIMM_SPD_SIZE], blk->addr_map[i]) == 0) - blk->spd_array[i] = &spd_data[i * CONFIG_DIMM_SPD_SIZE]; + if (get_spd(bus, spd_data[chip][i], blk->addr_map[i]) == 0) + blk->spd_array[i] = spd_data[chip][i]; else blk->spd_array[i] = NULL; } diff --git a/src/soc/ibm/power9/romstage.c b/src/soc/ibm/power9/romstage.c index e6df4171501..1759579004a 100644 --- a/src/soc/ibm/power9/romstage.c +++ b/src/soc/ibm/power9/romstage.c @@ -138,7 +138,7 @@ static void prepare_dimm_data(void) DIMM4 | 0x80, DIMM5 | 0x80, DIMM6 | 0x80, DIMM7 | 0x80 }, }; - get_spd_smbus(&blk); + get_spd_i2c(SPD_I2C_BUS, &blk); dump_spd_info(&blk); /* From 5328e65f437a7770c93ac9547af02836c501c9f7 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Sun, 23 Jan 2022 19:10:48 +0200 Subject: [PATCH 148/213] soc/power9/: collect DIMM data for two CPUs Change-Id: I5c60a361b2007fc320bdc1621e21bc76dccbaf51 Signed-off-by: Sergii Dmytruk --- src/include/cpu/power/istep_13.h | 30 +++---- src/soc/ibm/power9/ccs.c | 8 +- src/soc/ibm/power9/istep_13_10.c | 30 +++---- src/soc/ibm/power9/istep_13_11.c | 48 +++++----- src/soc/ibm/power9/istep_13_13.c | 8 +- src/soc/ibm/power9/istep_13_3.c | 6 +- src/soc/ibm/power9/istep_13_8.c | 150 ++++++++++++++++--------------- src/soc/ibm/power9/istep_13_9.c | 38 ++++---- src/soc/ibm/power9/istep_14_1.c | 12 +-- src/soc/ibm/power9/istep_14_5.c | 6 +- src/soc/ibm/power9/romstage.c | 141 ++++++++++++++++------------- 11 files changed, 250 insertions(+), 227 deletions(-) diff --git a/src/include/cpu/power/istep_13.h b/src/include/cpu/power/istep_13.h index 6d518fbbd5d..8437cffdce8 100644 --- a/src/include/cpu/power/istep_13.h +++ b/src/include/cpu/power/istep_13.h @@ -97,7 +97,7 @@ typedef struct { uint8_t nrtp; // max(4 nCK, 7.5 ns) = 7.5 ns for every supported speed } mcbist_data_t; -extern mcbist_data_t mem_data; +extern mcbist_data_t mem_data[MAX_CHIPS]; static const chiplet_id_t mcs_ids[MCS_PER_PROC] = {MC01_CHIPLET_ID, MC23_CHIPLET_ID}; /* @@ -108,36 +108,36 @@ static const chiplet_id_t mcs_ids[MCS_PER_PROC] = {MC01_CHIPLET_ID, MC23_CHIPLET * These functions should not be used before setting mem_data.speed to a valid * non-0 value. */ -static inline uint64_t tck_in_ps(void) +static inline uint64_t tck_in_ps(uint8_t chip) { /* * Speed is in MT/s, we need to divide it by 2 to get MHz. * tCK(avg) should be rounded down to the next valid speed bin, which * corresponds to value obtained by using standardized MT/s values. */ - return 1000000 / (mem_data.speed / 2); + return 1000000 / (mem_data[chip].speed / 2); } -static inline uint64_t ps_to_nck(uint64_t ps) +static inline uint64_t ps_to_nck(uint8_t chip, uint64_t ps) { /* Algorithm taken from JEDEC Standard No. 21-C */ - return ((ps * 1000 / tck_in_ps()) + 974) / 1000; + return ((ps * 1000 / tck_in_ps(chip)) + 974) / 1000; } -static inline uint64_t mtb_ftb_to_nck(uint64_t mtb, int8_t ftb) +static inline uint64_t mtb_ftb_to_nck(uint8_t chip, uint64_t mtb, int8_t ftb) { /* ftb is signed (always byte?) */ - return ps_to_nck(mtb * 125 + ftb); + return ps_to_nck(chip, mtb * 125 + ftb); } -static inline uint64_t ns_to_nck(uint64_t ns) +static inline uint64_t ns_to_nck(uint8_t chip, uint64_t ns) { - return ps_to_nck(ns * PSEC_PER_NSEC); + return ps_to_nck(chip, ns * PSEC_PER_NSEC); } -static inline uint64_t nck_to_ps(uint64_t nck) +static inline uint64_t nck_to_ps(uint8_t chip, uint64_t nck) { - return nck * tck_in_ps(); + return nck * tck_in_ps(chip); } /* @@ -147,14 +147,14 @@ static inline uint64_t nck_to_ps(uint64_t nck) * around 1 ns, so most smaller delays will be rounded up to 1 us. For better * resolution we would have to read TBR (Time Base Register) directly. */ -static inline uint64_t nck_to_us(uint64_t nck) +static inline uint64_t nck_to_us(uint8_t chip, uint64_t nck) { - return (nck_to_ps(nck) + PSEC_PER_USEC - 1) / PSEC_PER_USEC; + return (nck_to_ps(chip, nck) + PSEC_PER_USEC - 1) / PSEC_PER_USEC; } -static inline void delay_nck(uint64_t nck) +static inline void delay_nck(uint8_t chip, uint64_t nck) { - udelay(nck_to_us(nck)); + udelay(nck_to_us(chip, nck)); } /* TODO: consider non-RMW variants */ diff --git a/src/soc/ibm/power9/ccs.c b/src/soc/ibm/power9/ccs.c index dce6f2b4983..28574135ed1 100644 --- a/src/soc/ibm/power9/ccs.c +++ b/src/soc/ibm/power9/ccs.c @@ -169,7 +169,7 @@ void ccs_execute(uint8_t chip, chiplet_id_t id, int mca_i) */ if (total_cycles < 8) total_cycles = 8; - poll_timeout = nck_to_us((total_cycles * 7 * 4) / 8); + poll_timeout = nck_to_us(chip, (total_cycles * 7 * 4) / 8); write_rscom_for_chiplet(chip, id, CCS_CNTLQ, PPC_BIT(CCS_CNTLQ_CCS_STOP)); time = wait_us(1, !(read_rscom_for_chiplet(chip, id, CCS_STATQ) & @@ -211,7 +211,7 @@ void ccs_execute(uint8_t chip, chiplet_id_t id, int mca_i) write_rscom_for_chiplet(chip, id, CCS_CNTLQ, PPC_BIT(CCS_CNTLQ_CCS_START)); /* With microsecond resolution we are probably wasting a lot of time here. */ - delay_nck(total_cycles/8); + delay_nck(chip, total_cycles / 8); /* timeout(50*10ns): if MC01.MCBIST.MBA_SCOMFIR.CCS_STATQ[0] (CCS_STATQ_CCS_IP) != 1: break @@ -226,8 +226,8 @@ void ccs_execute(uint8_t chip, chiplet_id_t id, int mca_i) dump_cal_errors(chip, id, mca_i); printk(BIOS_DEBUG, "CCS took %lld us (%lld us timeout), %d instruction(s)\n", - time + nck_to_us(total_cycles/8), - poll_timeout + nck_to_us(total_cycles/8), instr); + time + nck_to_us(chip, total_cycles / 8), + poll_timeout + nck_to_us(chip, total_cycles / 8), instr); if (read_rscom_for_chiplet(chip, id, CCS_STATQ) != PPC_BIT(CCS_STATQ_CCS_DONE)) die("(%#16.16llx) CCS execution error\n", diff --git a/src/soc/ibm/power9/istep_13_10.c b/src/soc/ibm/power9/istep_13_10.c index 258d45df961..208a605a353 100644 --- a/src/soc/ibm/power9/istep_13_10.c +++ b/src/soc/ibm/power9/istep_13_10.c @@ -143,9 +143,9 @@ static void rcd_load(uint8_t chip, mca_data_t *mca, int d) 4 if 2666 MT/s F0RC0B = 0xe // External VrefCA connected to QVrefCA and BVrefCA */ - val = mem_data.speed == 1866 ? 1 : - mem_data.speed == 2133 ? 2 : - mem_data.speed == 2400 ? 3 : 4; + val = mem_data[chip].speed == 1866 ? 1 : + mem_data[chip].speed == 2133 ? 2 : + mem_data[chip].speed == 2400 ? 3 : 4; val |= 0xE0; rcd_write_reg(spd_bus, mca->dimm[d].rcd_i2c_addr, F0RC0A_0B, val); @@ -183,9 +183,9 @@ static void rcd_load(uint8_t chip, mca_data_t *mca, int d) 0x39 if 2400 MT/s 0x47 if 2666 MT/s */ - val = mem_data.speed == 1866 ? 0x1F : - mem_data.speed == 2133 ? 0x2C : - mem_data.speed == 2400 ? 0x39 : 0x47; + val = mem_data[chip].speed == 1866 ? 0x1F : + mem_data[chip].speed == 2133 ? 0x2C : + mem_data[chip].speed == 2400 ? 0x39 : 0x47; rcd_write_reg(spd_bus, mca->dimm[d].rcd_i2c_addr, F0RC3x, val); /* @@ -229,10 +229,10 @@ static void rcd_load(uint8_t chip, mca_data_t *mca, int d) */ val = 0x2; rcd_write_reg(spd_bus, mca->dimm[d].rcd_i2c_addr, F0RC06_07, val); - delay_nck(8000); + delay_nck(chip, 8000); val = 0x3; rcd_write_reg(spd_bus, mca->dimm[d].rcd_i2c_addr, F0RC06_07, val); - delay_nck(8000); + delay_nck(chip, 8000); /* * Dumped values from currently installed DIMM, from Petitboot: @@ -304,7 +304,7 @@ static void rcd_load(uint8_t chip, mca_data_t *mca, int d) static void mrs_load(uint8_t chip, int mcs_i, int mca_i, int d) { chiplet_id_t id = mcs_ids[mcs_i]; - mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; int mirrored = mca->dimm[d].spd[136] & 1; mrs_cmd_t mrs; int vpd_idx = (mca->dimm[d].mranks - 1) * 2 + (!!mca->dimm[d ^ 1].present); @@ -375,7 +375,7 @@ static void mrs_load(uint8_t chip, int mcs_i, int mca_i, int d) /* ATTR_MSS_MRW_REFRESH_RATE_REQUEST, default DOUBLE. * Do we need to half tREFI as well? */ DDR4_MR2_ASR_MANUAL_EXTENDED_RANGE, - mem_data.cwl); + mem_data[chip].cwl); ccs_add_mrs(chip, id, mrs, ranks, mirrored, tMRD); mrs = ddr4_get_mr1(DDR4_MR1_QOFF_ENABLE, @@ -404,7 +404,7 @@ static void mss_draminit(uint8_t chip) int mcs_i, mca_i, dimm; for (mcs_i = 0; mcs_i < MCS_PER_PROC; mcs_i++) { - if (!mem_data.mcs[mcs_i].functional) + if (!mem_data[chip].mcs[mcs_i].functional) continue; /* MC01.MCBIST.MBA_SCOMFIR.CCS_MODEQ @@ -433,7 +433,7 @@ static void mss_draminit(uint8_t chip) CCS_MODEQ_DDR_CAL_TIMEOUT_CNT_MULT_LEN)); for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { - mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; if (!mca->functional) continue; @@ -483,13 +483,13 @@ static void mss_draminit(uint8_t chip) * because CCS_ADDR_MUX_SEL is set. */ for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { - if (mem_data.mcs[mcs_i].mca[mca_i].functional) + if (mem_data[chip].mcs[mcs_i].mca[mca_i].functional) break; } draminit_cke_helper(chip, mcs_ids[mcs_i], mca_i); for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { - mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; if (!mca->functional) continue; @@ -508,7 +508,7 @@ static void mss_draminit(uint8_t chip) } for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { - mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; if (!mca->functional) continue; diff --git a/src/soc/ibm/power9/istep_13_11.c b/src/soc/ibm/power9/istep_13_11.c index 80b7580914d..0976c721654 100644 --- a/src/soc/ibm/power9/istep_13_11.c +++ b/src/soc/ibm/power9/istep_13_11.c @@ -26,7 +26,7 @@ static void setup_and_execute_zqcal(uint8_t chip, int mcs_i, int mca_i, int d) { chiplet_id_t id = mcs_ids[mcs_i]; - mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; int mirrored = mca->dimm[d].spd[136] & 1; /* Maybe add this to mca_data_t? */ mrs_cmd_t cmd = ddr4_get_zqcal_cmd(DDR4_ZQCAL_LONG); enum rank_selection ranks; @@ -258,7 +258,7 @@ static void wr_level_pre(uint8_t chip, int mcs_i, int mca_i, int rp, enum rank_selection ranks_present) { chiplet_id_t id = mcs_ids[mcs_i]; - mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; int d = rp / 2; int vpd_idx = (mca->dimm[d].mranks - 1) * 2 + (!!mca->dimm[d ^ 1].present); int mirrored = mca->dimm[d].spd[136] & 1; @@ -277,7 +277,7 @@ static void wr_level_pre(uint8_t chip, int mcs_i, int mca_i, int rp, mrs = ddr4_get_mr2(DDR4_MR2_WR_CRC_DISABLE, vpd_to_rtt_wr(0), DDR4_MR2_ASR_MANUAL_EXTENDED_RANGE, - mem_data.cwl); + mem_data[chip].cwl); ccs_add_mrs(chip, id, mrs, rank, mirrored, tMRD); /* MR1 = // redo the rest of the bits @@ -380,7 +380,7 @@ static void wr_level_pre(uint8_t chip, int mcs_i, int mca_i, int rp, //ccs_execute(id, mca_i); } -static uint64_t wr_level_time(mca_data_t *mca) +static uint64_t wr_level_time(uint8_t chip, mca_data_t *mca) { /* * "Note: the following equation is taken from the PHY workbook - leaving @@ -397,7 +397,7 @@ static uint64_t wr_level_time(mca_data_t *mca) const int big_step = 7; const int small_step = 0; const int num_valid_samples = 5; - const int twlo_twloe = ps_to_nck(11500); + const int twlo_twloe = ps_to_nck(chip, 11500); return (80 + twlo_twloe) * num_valid_samples * (384 / (big_step + 1) + (2 * (big_step + 1)) / (small_step + 1)) + 20; @@ -408,7 +408,7 @@ static void wr_level_post(uint8_t chip, int mcs_i, int mca_i, int rp, enum rank_selection ranks_present) { chiplet_id_t id = mcs_ids[mcs_i]; - mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; int d = rp / 2; int vpd_idx = (mca->dimm[d].mranks - 1) * 2 + (!!mca->dimm[d ^ 1].present); int mirrored = mca->dimm[d].spd[136] & 1; @@ -459,7 +459,7 @@ static void wr_level_post(uint8_t chip, int mcs_i, int mca_i, int rp, mrs = ddr4_get_mr2(DDR4_MR2_WR_CRC_DISABLE, vpd_to_rtt_wr(ATTR_MSS_VPD_MT_DRAM_RTT_WR[vpd_idx]), DDR4_MR2_ASR_MANUAL_EXTENDED_RANGE, - mem_data.cwl); + mem_data[chip].cwl); ccs_add_mrs(chip, id, mrs, rank, mirrored, tMRD); /* MR1 = // redo the rest of the bits @@ -522,7 +522,7 @@ static void wr_level_post(uint8_t chip, int mcs_i, int mca_i, int rp, //ccs_execute(id, mca_i); } -static uint64_t initial_pat_wr_time(mca_data_t *mca) +static uint64_t initial_pat_wr_time(uint8_t chip, mca_data_t *mca) { /* * "Not sure how long this should take, so we're gonna use 1 to make sure we @@ -563,10 +563,10 @@ static uint64_t initial_pat_wr_time(mca_data_t *mca) * * From the lack of better ideas, return 10 us. */ - return ns_to_nck(10 * 1000); + return ns_to_nck(chip, 10 * 1000); } -static uint64_t dqs_align_time(mca_data_t *mca) +static uint64_t dqs_align_time(uint8_t chip, mca_data_t *mca) { /* * "This step runs for approximately 6 x 600 x 4 DRAM clocks per rank pair." @@ -595,7 +595,7 @@ static void rdclk_align_pre(uint8_t chip, int mcs_i, int mca_i, int rp, mca_and_or(chip, id, mca_i, DDRPHY_PC_INIT_CAL_CONFIG1_P0, ~PPC_BITMASK(52, 53), 0); } -static uint64_t rdclk_align_time(mca_data_t *mca) +static uint64_t rdclk_align_time(uint8_t chip, mca_data_t *mca) { /* * "This step runs for approximately 24 x ((1024/COARSE_CAL_STEP_SIZE + @@ -698,7 +698,7 @@ static void read_ctr_pre(uint8_t chip, int mcs_i, int mca_i, int rp, PPC_BIT(CALIBRATION_ENABLE)); } -static uint64_t read_ctr_time(mca_data_t *mca) +static uint64_t read_ctr_time(uint8_t chip, mca_data_t *mca) { /* * "This step runs for approximately 6 x (512/COARSE_CAL_STEP_SIZE + 4 x @@ -745,7 +745,7 @@ static void write_ctr_pre(uint8_t chip, int mcs_i, int mca_i, int rp, enum rank_selection ranks_present) { chiplet_id_t id = mcs_ids[mcs_i]; - mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; int mirrored = mca->dimm[rp/2].spd[136] & 1; mrs_cmd_t mrs; enum rank_selection rank = 1 << rp; @@ -767,7 +767,7 @@ static void write_ctr_pre(uint8_t chip, int mcs_i, int mca_i, int rp, * Each step is followed by a 150ns (tVREFDQE or tVREFDQX) stream of DES * commands before next one. */ - uint64_t tVREFDQ_E_X = ns_to_nck(150); + uint64_t tVREFDQ_E_X = ns_to_nck(chip, 150); /* Fill MRS command once, then flip VREFDQ training mode bit as needed */ mrs = ddr4_get_mr6(mca->nccd_l, @@ -809,7 +809,7 @@ static void write_ctr_pre(uint8_t chip, int mcs_i, int mca_i, int rp, } } -static uint64_t write_ctr_time(mca_data_t *mca) +static uint64_t write_ctr_time(uint8_t chip, mca_data_t *mca) { /* * "1000 + (NUM_VALID_SAMPLES * (FW_WR_RD + FW_RD_WR + 16) * @@ -845,7 +845,7 @@ static uint64_t write_ctr_time(mca_data_t *mca) const int big_step = 7; const int small_step = 0; const int num_valid_samples = 5; - int fw_rd_wr = MAX(mca->nwtr_s + 11, ps_to_nck(7500) + 3); + int fw_rd_wr = MAX(mca->nwtr_s + 11, ps_to_nck(chip, 7500) + 3); return 1000 + (num_valid_samples * (fw_rd_wr + 16) * (1024/(small_step + 1) + 128/(big_step + 1)) + 2 * (big_step + 1)/(small_step + 1)) * 24; @@ -889,7 +889,7 @@ static void write_ctr_post(uint8_t chip, int mcs_i, int mca_i, int rp, die("Write Centering post-workaround required, but not yet implemented\n"); } -static uint64_t coarse_wr_rd_time(mca_data_t *mca) +static uint64_t coarse_wr_rd_time(uint8_t chip, mca_data_t *mca) { /* * "40 cycles for WR, 32 for RD" @@ -907,7 +907,7 @@ struct phy_step { const char *name; enum cal_config cfg; phy_workaround_t *pre; - uint64_t (*time)(mca_data_t *mca); + uint64_t (*time)(uint8_t chip, mca_data_t *mca); phy_workaround_t *post; }; @@ -972,13 +972,13 @@ static struct phy_step steps[] = { static void dispatch_step(uint8_t chip, struct phy_step *step, int mcs_i, int mca_i, int rp, enum rank_selection ranks_present) { - mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; printk(BIOS_DEBUG, "%s starting\n", step->name); if (step->pre) step->pre(chip, mcs_i, mca_i, rp, ranks_present); - ccs_phy_hw_step(chip, mcs_ids[mcs_i], mca_i, rp, step->cfg, step->time(mca)); + ccs_phy_hw_step(chip, mcs_ids[mcs_i], mca_i, rp, step->cfg, step->time(chip, mca)); if (step->post) step->post(chip, mcs_i, mca_i, rp, ranks_present); @@ -1062,7 +1062,7 @@ static int can_recover(uint8_t chip, int mcs_i, int mca_i, int rp) int bad_bits = 0; int dp; chiplet_id_t id = mcs_ids[mcs_i]; - uint8_t width = mem_data.mcs[mcs_i].mca[mca_i].dimm[rp/2].width; + uint8_t width = mem_data[chip].mcs[mcs_i].mca[mca_i].dimm[rp/2].width; for (dp = 0; dp < 5; dp++) { uint64_t reg; @@ -1174,7 +1174,7 @@ static void fir_unmask(uint8_t chip, int mcs_i) 0); for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { - if (!mem_data.mcs[mcs_i].mca[mca_i].functional) + if (!mem_data[chip].mcs[mcs_i].mca[mca_i].functional) continue; /* @@ -1243,11 +1243,11 @@ static void mss_draminit_training(uint8_t chip) enum rank_selection ranks_present; for (mcs_i = 0; mcs_i < MCS_PER_PROC; mcs_i++) { - if (!mem_data.mcs[mcs_i].functional) + if (!mem_data[chip].mcs[mcs_i].functional) continue; for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { - mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; if (!mca->functional) continue; diff --git a/src/soc/ibm/power9/istep_13_13.c b/src/soc/ibm/power9/istep_13_13.c index fbbba49834e..602e41d4de3 100644 --- a/src/soc/ibm/power9/istep_13_13.c +++ b/src/soc/ibm/power9/istep_13_13.c @@ -359,7 +359,7 @@ static const uint64_t xlt_tables[][3] = { static void setup_xlate_map(uint8_t chip, int mcs_i, int mca_i) { chiplet_id_t id = mcs_ids[mcs_i]; - mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; const int mca_mul = 0x10; /* * Mixing rules: @@ -505,7 +505,7 @@ static void fir_unmask(uint8_t chip, int mcs_i) 0); for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { - if (!mem_data.mcs[mcs_i].mca[mca_i].functional) + if (!mem_data[chip].mcs[mcs_i].mca[mca_i].functional) continue; /* From broadcast_out_of_sync() workaround: @@ -609,12 +609,12 @@ static void mss_draminit_mc(uint8_t chip) for (mcs_i = 0; mcs_i < MCS_PER_PROC; mcs_i++) { /* No need to initialize a non-functional MCS */ - if (!mem_data.mcs[mcs_i].functional) + if (!mem_data[chip].mcs[mcs_i].functional) continue; for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { chiplet_id_t id = mcs_ids[mcs_i]; - mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; if (!mca->functional) continue; diff --git a/src/soc/ibm/power9/istep_13_3.c b/src/soc/ibm/power9/istep_13_3.c index 92b26bd5f8d..5ee7a863dac 100644 --- a/src/soc/ibm/power9/istep_13_3.c +++ b/src/soc/ibm/power9/istep_13_3.c @@ -35,7 +35,7 @@ static void mem_pll_initf(uint8_t chip) /* Assuming MC doesn't run in sync mode with Fabric, otherwise this is no-op */ - switch (mem_data.speed) { + switch (mem_data[chip].speed) { case 2666: ring_id = RING_ID_2666; break; @@ -49,7 +49,7 @@ static void mem_pll_initf(uint8_t chip) ring_id = RING_ID_1866; break; default: - die("Unsupported memory speed (%d MT/s)\n", mem_data.speed); + die("Unsupported memory speed (%d MT/s)\n", mem_data[chip].speed); } /* @@ -72,7 +72,7 @@ static void mem_pll_initf(uint8_t chip) for (mcs_i = 0; mcs_i < MCS_PER_PROC; mcs_i++) { long time; - if (!mem_data.mcs[mcs_i].functional) + if (!mem_data[chip].mcs[mcs_i].functional) continue; /* https://github.com/open-power/hostboot/blob/master/src/include/usr/sbeio/sbe_psudd.H#L418 */ diff --git a/src/soc/ibm/power9/istep_13_8.c b/src/soc/ibm/power9/istep_13_8.c index bdb2922e4c7..04b89eb5964 100644 --- a/src/soc/ibm/power9/istep_13_8.c +++ b/src/soc/ibm/power9/istep_13_8.c @@ -36,7 +36,7 @@ static void p9n_mca_scom(uint8_t chip, int mcs_i, int mca_i) const struct powerbus_cfg *pb_cfg = powerbus_cfg(); chiplet_id_t id = mcs_ids[mcs_i]; - mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; const int mca_mul = 0x10; /* * Mixing rules: @@ -171,18 +171,18 @@ static void p9n_mca_scom(uint8_t chip, int mcs_i, int mca_i) MSS_FREQ_EQ_2666: ATTR_EFF_DRAM_CL + 9 */ /* ATTR_MSS_EFF_DPHY_WLO = 1 from VPD, 3 from dump? */ - uint64_t rdtag_dly = mem_data.speed == 2666 ? 9 : - mem_data.speed == 2400 ? 8 : 7; + uint64_t rdtag_dly = mem_data[chip].speed == 2666 ? 9 : + mem_data[chip].speed == 2400 ? 8 : 7; mca_and_or(chip, id, mca_i, MBA_DSM0Q, ~PPC_BITMASK(0, 41), - PPC_PLACE(mca->cl - mem_data.cwl, MBA_DSM0Q_CFG_RODT_START_DLY, + PPC_PLACE(mca->cl - mem_data[chip].cwl, MBA_DSM0Q_CFG_RODT_START_DLY, MBA_DSM0Q_CFG_RODT_START_DLY_LEN) | - PPC_PLACE(mca->cl - mem_data.cwl + 5, MBA_DSM0Q_CFG_RODT_END_DLY, + PPC_PLACE(mca->cl - mem_data[chip].cwl + 5, MBA_DSM0Q_CFG_RODT_END_DLY, MBA_DSM0Q_CFG_RODT_END_DLY_LEN) | PPC_PLACE(5, MBA_DSM0Q_CFG_WODT_END_DLY, MBA_DSM0Q_CFG_WODT_END_DLY_LEN) | PPC_PLACE(24, MBA_DSM0Q_CFG_WRDONE_DLY, MBA_DSM0Q_CFG_WRDONE_DLY_LEN) | - PPC_PLACE(mem_data.cwl + /* 1 */ 3 - 8, MBA_DSM0Q_CFG_WRDATA_DLY, + PPC_PLACE(mem_data[chip].cwl + /* 1 */ 3 - 8, MBA_DSM0Q_CFG_WRDATA_DLY, MBA_DSM0Q_CFG_WRDATA_DLY_LEN) | PPC_PLACE(mca->cl + rdtag_dly, MBA_DSM0Q_CFG_RDTAG_DLY, MBA_DSM0Q_CFG_RDTAG_DLY_LEN)); @@ -219,9 +219,9 @@ static void p9n_mca_scom(uint8_t chip, int mcs_i, int mca_i) [51-56] MBA_TMR0Q_WRSMSR_DLY = // same as below [57-62] MBA_TMR0Q_WRSMDR_DLY = ATTR_EFF_DRAM_CWL + ATTR_EFF_DRAM_TWTR_S + 4 */ - uint64_t var_dly = mem_data.speed == 2666 ? 11 : - mem_data.speed == 2400 ? 10 : - mem_data.speed == 2133 ? 9 : 8; + uint64_t var_dly = mem_data[chip].speed == 2666 ? 11 : + mem_data[chip].speed == 2400 ? 10 : + mem_data[chip].speed == 2133 ? 9 : 8; mca_and_or(chip, id, mca_i, MBA_TMR0Q, PPC_BIT(63), PPC_PLACE(var_dly, MBA_TMR0Q_RRDM_DLY, MBA_TMR0Q_RRDM_DLY_LEN) | PPC_PLACE(4, MBA_TMR0Q_RRSMSR_DLY, MBA_TMR0Q_RRSMSR_DLY_LEN) | @@ -231,17 +231,17 @@ static void p9n_mca_scom(uint8_t chip, int mcs_i, int mca_i) PPC_PLACE(4, MBA_TMR0Q_WWSMSR_DLY, MBA_TMR0Q_WWSMSR_DLY_LEN) | PPC_PLACE(4, MBA_TMR0Q_WWSMDR_DLY, MBA_TMR0Q_WWSMDR_DLY_LEN) | PPC_PLACE(mca->nccd_l, MBA_TMR0Q_WWOP_DLY, MBA_TMR0Q_WWOP_DLY_LEN) | - PPC_PLACE(mca->cl - mem_data.cwl + var_dly, MBA_TMR0Q_RWDM_DLY, + PPC_PLACE(mca->cl - mem_data[chip].cwl + var_dly, MBA_TMR0Q_RWDM_DLY, MBA_TMR0Q_RWDM_DLY_LEN) | - PPC_PLACE(mca->cl - mem_data.cwl + var_dly, MBA_TMR0Q_RWSMSR_DLY, + PPC_PLACE(mca->cl - mem_data[chip].cwl + var_dly, MBA_TMR0Q_RWSMSR_DLY, MBA_TMR0Q_RWSMSR_DLY_LEN) | - PPC_PLACE(mca->cl - mem_data.cwl + var_dly, MBA_TMR0Q_RWSMDR_DLY, + PPC_PLACE(mca->cl - mem_data[chip].cwl + var_dly, MBA_TMR0Q_RWSMDR_DLY, MBA_TMR0Q_RWSMDR_DLY_LEN) | - PPC_PLACE(mem_data.cwl - mca->cl + var_dly, MBA_TMR0Q_WRDM_DLY, + PPC_PLACE(mem_data[chip].cwl - mca->cl + var_dly, MBA_TMR0Q_WRDM_DLY, MBA_TMR0Q_WRDM_DLY_LEN) | - PPC_PLACE(mem_data.cwl + mca->nwtr_s + 4, MBA_TMR0Q_WRSMSR_DLY, + PPC_PLACE(mem_data[chip].cwl + mca->nwtr_s + 4, MBA_TMR0Q_WRSMSR_DLY, MBA_TMR0Q_WRSMSR_DLY_LEN) | - PPC_PLACE(mem_data.cwl + mca->nwtr_s + 4, MBA_TMR0Q_WRSMDR_DLY, + PPC_PLACE(mem_data[chip].cwl + mca->nwtr_s + 4, MBA_TMR0Q_WRSMDR_DLY, MBA_TMR0Q_WRSMDR_DLY_LEN)); /* MC01.PORT0.SRQ.MBA_TMR1Q = @@ -263,15 +263,17 @@ static void p9n_mca_scom(uint8_t chip, int mcs_i, int mca_i) */ mca_and_or(chip, id, mca_i, MBA_TMR1Q, 0, PPC_PLACE(mca->nccd_l, MBA_TMR1Q_RRSBG_DLY, MBA_TMR1Q_RRSBG_DLY_LEN) | - PPC_PLACE(mem_data.cwl + mca->nwtr_l + 4, MBA_TMR1Q_WRSBG_DLY, + PPC_PLACE(mem_data[chip].cwl + mca->nwtr_l + 4, MBA_TMR1Q_WRSBG_DLY, MBA_TMR1Q_WRSBG_DLY_LEN) | PPC_PLACE(mca->nfaw, MBA_TMR1Q_CFG_TFAW, MBA_TMR1Q_CFG_TFAW_LEN) | PPC_PLACE(mca->nrcd, MBA_TMR1Q_CFG_TRCD, MBA_TMR1Q_CFG_TRCD_LEN) | PPC_PLACE(mca->nrp, MBA_TMR1Q_CFG_TRP, MBA_TMR1Q_CFG_TRP_LEN) | PPC_PLACE(mca->nras, MBA_TMR1Q_CFG_TRAS, MBA_TMR1Q_CFG_TRAS_LEN) | - PPC_PLACE(mem_data.cwl + mca->nwr + 4, MBA_TMR1Q_CFG_WR2PRE, + PPC_PLACE(mem_data[chip].cwl + mca->nwr + 4, MBA_TMR1Q_CFG_WR2PRE, MBA_TMR1Q_CFG_WR2PRE_LEN) | - PPC_PLACE(mem_data.nrtp, MBA_TMR1Q_CFG_RD2PRE, MBA_TMR1Q_CFG_RD2PRE_LEN) | + PPC_PLACE(mem_data[chip].nrtp, + MBA_TMR1Q_CFG_RD2PRE, + MBA_TMR1Q_CFG_RD2PRE_LEN) | PPC_PLACE(mca->nrrd_s, MBA_TMR1Q_TRRD, MBA_TMR1Q_TRRD_LEN) | PPC_PLACE(mca->nrrd_l, MBA_TMR1Q_TRRD_SBG, MBA_TMR1Q_TRRD_SBG_LEN) | PPC_PLACE(var_dly, MBA_TMR1Q_CFG_ACT_TO_DIFF_RANK_DLY, @@ -416,7 +418,7 @@ static void p9n_mca_scom(uint8_t chip, int mcs_i, int mca_i) PPC_PLACE(3, MBAREF0Q_CFG_REFRESH_PRIORITY_THRESHOLD, MBAREF0Q_CFG_REFRESH_PRIORITY_THRESHOLD_LEN) | - PPC_PLACE(mem_data.nrefi / (8 * 2 * log_ranks), + PPC_PLACE(mem_data[chip].nrefi / (8 * 2 * log_ranks), MBAREF0Q_CFG_REFRESH_INTERVAL, MBAREF0Q_CFG_REFRESH_INTERVAL_LEN) | PPC_PLACE(mca->nrfc, @@ -425,7 +427,7 @@ static void p9n_mca_scom(uint8_t chip, int mcs_i, int mca_i) PPC_PLACE(mca->nrfc_dlr, MBAREF0Q_CFG_REFR_TSV_STACK, MBAREF0Q_CFG_REFR_TSV_STACK_LEN) | - PPC_PLACE(((mem_data.nrefi / 8) * 6) / 5, + PPC_PLACE(((mem_data[chip].nrefi / 8) * 6) / 5, MBAREF0Q_CFG_REFR_CHECK_INTERVAL, MBAREF0Q_CFG_REFR_CHECK_INTERVAL_LEN)); @@ -450,11 +452,11 @@ static void p9n_mca_scom(uint8_t chip, int mcs_i, int mca_i) (l_def_MASTER_RANKS_DIMM0 != 4): 0 */ /* Perhaps these can be done by ns_to_nck(), but Hostboot used a forest of ifs */ - uint64_t pup_avail = mem_data.speed == 1866 ? 6 : - mem_data.speed == 2133 ? 7 : - mem_data.speed == 2400 ? 8 : 9; - uint64_t p_up_dn = mem_data.speed == 1866 ? 5 : - mem_data.speed == 2666 ? 7 : 6; + uint64_t pup_avail = mem_data[chip].speed == 1866 ? 6 : + mem_data[chip].speed == 2133 ? 7 : + mem_data[chip].speed == 2400 ? 8 : 9; + uint64_t p_up_dn = mem_data[chip].speed == 1866 ? 5 : + mem_data[chip].speed == 2666 ? 7 : 6; mca_and_or(chip, id, mca_i, MBARPC0Q, ~PPC_BITMASK(6, 21), PPC_PLACE(pup_avail, MBARPC0Q_CFG_PUP_AVAIL, MBARPC0Q_CFG_PUP_AVAIL_LEN) | PPC_PLACE(p_up_dn, MBARPC0Q_CFG_PDN_PUP, MBARPC0Q_CFG_PDN_PUP_LEN) | @@ -480,17 +482,17 @@ static void p9n_mca_scom(uint8_t chip, int mcs_i, int mca_i) MSS_FREQ_EQ_2666: 939 [46-56] MBASTR0Q_CFG_SAFE_REFRESH_INTERVAL = ATTR_EFF_DRAM_TREFI / (8 * (DIMM0 + DIMM1 logical ranks)) */ - uint64_t tcksr_ex = mem_data.speed == 1866 ? 10 : - mem_data.speed == 2133 ? 11 : - mem_data.speed == 2400 ? 12 : 14; - uint64_t txsdll = mem_data.speed == 1866 ? 597 : - mem_data.speed == 2666 ? 939 : 768; + uint64_t tcksr_ex = mem_data[chip].speed == 1866 ? 10 : + mem_data[chip].speed == 2133 ? 11 : + mem_data[chip].speed == 2400 ? 12 : 14; + uint64_t txsdll = mem_data[chip].speed == 1866 ? 597 : + mem_data[chip].speed == 2666 ? 939 : 768; mca_and_or(chip, id, mca_i, MBASTR0Q, ~(PPC_BITMASK(12, 37) | PPC_BITMASK(46, 56)), PPC_PLACE(5, MBASTR0Q_CFG_TCKESR, MBASTR0Q_CFG_TCKESR_LEN) | PPC_PLACE(tcksr_ex, MBASTR0Q_CFG_TCKSRE, MBASTR0Q_CFG_TCKSRE_LEN) | PPC_PLACE(tcksr_ex, MBASTR0Q_CFG_TCKSRX, MBASTR0Q_CFG_TCKSRX_LEN) | PPC_PLACE(txsdll, MBASTR0Q_CFG_TXSDLL, MBASTR0Q_CFG_TXSDLL_LEN) | - PPC_PLACE(mem_data.nrefi / + PPC_PLACE(mem_data[chip].nrefi / (8 * (mca->dimm[0].log_ranks + mca->dimm[1].log_ranks)), MBASTR0Q_CFG_SAFE_REFRESH_INTERVAL, MBASTR0Q_CFG_SAFE_REFRESH_INTERVAL_LEN)); @@ -526,7 +528,7 @@ static void p9n_mca_scom(uint8_t chip, int mcs_i, int mca_i) * ATTR_MSS_FREQ is in MT/s (sigh). */ uint32_t pb_freq = pb_cfg->fabric_freq; - uint64_t mn_freq_ratio = 1000 * mem_data.speed / pb_freq; + uint64_t mn_freq_ratio = 1000 * mem_data[chip].speed / pb_freq; uint64_t val_to_data = mn_freq_ratio < 915 ? 3 : mn_freq_ratio < 1150 ? 4 : mn_freq_ratio < 1300 ? 5 : 6; @@ -639,9 +641,9 @@ static void p9n_ddrphy_scom(uint8_t chip, int mcs_i, int mca_i) * Hostboot sets this to proper value in phy_scominit(), but I don't see * why. Speed is the same for whole MCBIST anyway. */ - uint64_t strength = mem_data.speed == 1866 ? 1 : - mem_data.speed == 2133 ? 2 : - mem_data.speed == 2400 ? 4 : 8; + uint64_t strength = mem_data[chip].speed == 1866 ? 1 : + mem_data[chip].speed == 2133 ? 2 : + mem_data[chip].speed == 2400 ? 4 : 8; for (dp = 0; dp < 5; dp++) { /* IOM0.DDRPHY_DP16_DLL_VREG_CONTROL0_P0_{0,1,2,3,4} = @@ -895,7 +897,7 @@ static void p9n_mcbist_scom(uint8_t chip, int mcs_i) static void set_rank_pairs(uint8_t chip, int mcs_i, int mca_i) { chiplet_id_t id = mcs_ids[mcs_i]; - mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; /* * Assumptions: * - non-LR DIMMs (platform wiki), @@ -1069,7 +1071,7 @@ static const uint16_t x8_clk[8][5] = { static void reset_clock_enable(uint8_t chip, int mcs_i, int mca_i) { chiplet_id_t id = mcs_ids[mcs_i]; - mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; /* Assume the same rank configuration for both DIMMs */ int dp; int width = mca->dimm[0].present ? mca->dimm[0].width : @@ -1122,7 +1124,7 @@ static void reset_clock_enable(uint8_t chip, int mcs_i, int mca_i) static void reset_rd_vref(uint8_t chip, int mcs_i, int mca_i) { chiplet_id_t id = mcs_ids[mcs_i]; - mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; int dp; int vpd_idx = mca->dimm[0].present ? (mca->dimm[0].mranks == 2 ? 2 : 0) : @@ -1199,10 +1201,10 @@ static void pc_reset(uint8_t chip, int mcs_i, int mca_i) chiplet_id_t id = mcs_ids[mcs_i]; /* These are from VPD */ /* - uint64_t ATTR_MSS_EFF_DPHY_WLO = mem_data.speed == 1866 ? 1 : 2; - uint64_t ATTR_MSS_EFF_DPHY_RLO = mem_data.speed == 1866 ? 4 : - mem_data.speed == 2133 ? 5 : - mem_data.speed == 2400 ? 6 : 7; + uint64_t ATTR_MSS_EFF_DPHY_WLO = mem_data[chip].speed == 1866 ? 1 : 2; + uint64_t ATTR_MSS_EFF_DPHY_RLO = mem_data[chip].speed == 1866 ? 4 : + mem_data[chip].speed == 2133 ? 5 : + mem_data[chip].speed == 2400 ? 6 : 7; */ /* IOM0.DDRPHY_PC_CONFIG0_P0 has been reset in p9n_ddrphy_scom() */ @@ -1220,10 +1222,10 @@ static void pc_reset(uint8_t chip, int mcs_i, int mca_i) * be the same as in VPD, yet WLO is 3 and RLO is 5 when written to SCOM... * * These are from VPD: - * uint64_t ATTR_MSS_EFF_DPHY_WLO = mem_data.speed == 1866 ? 1 : 2; - * uint64_t ATTR_MSS_EFF_DPHY_RLO = mem_data.speed == 1866 ? 4 : - * mem_data.speed == 2133 ? 5 : - * mem_data.speed == 2400 ? 6 : 7; + * uint64_t ATTR_MSS_EFF_DPHY_WLO = mem_data[chip].speed == 1866 ? 1 : 2; + * uint64_t ATTR_MSS_EFF_DPHY_RLO = mem_data[chip].speed == 1866 ? 4 : + * mem_data[chip].speed == 2133 ? 5 : + * mem_data[chip].speed == 2400 ? 6 : 7; */ mca_and_or(chip, id, mca_i, DDRPHY_PC_CONFIG1_P0, ~(PPC_BITMASK(48, 55) | PPC_BITMASK(59, 62)), @@ -1248,7 +1250,7 @@ static void pc_reset(uint8_t chip, int mcs_i, int mca_i) static void wc_reset(uint8_t chip, int mcs_i, int mca_i) { chiplet_id_t id = mcs_ids[mcs_i]; - mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; /* IOM0.DDRPHY_WC_CONFIG0_P0 = [all] 0 @@ -1274,9 +1276,9 @@ static void wc_reset(uint8_t chip, int mcs_i, int mca_i) * FIXME: again, tWLO = 3 in Hostboot. Why? * This is still much smaller than tWLDQSEN so leave it, for now. */ - uint64_t tWLO = mem_data.speed == 1866 ? 1 : 2; - uint64_t tWLOE = ns_to_nck(2); - uint64_t tWLDQSEN = MAX(25, tMOD + (mem_data.cwl - 2) + 1); + uint64_t tWLO = mem_data[chip].speed == 1866 ? 1 : 2; + uint64_t tWLOE = ns_to_nck(chip, 2); + uint64_t tWLDQSEN = MAX(25, tMOD + (mem_data[chip].cwl - 2) + 1); /* * Use the version from the code, it may be longer than necessary but it * works. Note that MAX() always expands to CWL + 23 + 24 = 47 + CWL, which @@ -1314,7 +1316,9 @@ static void wc_reset(uint8_t chip, int mcs_i, int mca_i) /* There is no Additive Latency. */ mca_and_or(chip, id, mca_i, DDRPHY_WC_CONFIG2_P0, 0, PPC_PLACE(5, NUM_VALID_SAMPLES, NUM_VALID_SAMPLES_LEN) | - PPC_PLACE(MAX(mca->nwtr_s + 11, mem_data.nrtp + 3), FW_RD_WR, FW_RD_WR_LEN) | + PPC_PLACE(MAX(mca->nwtr_s + 11, mem_data[chip].nrtp + 3), + FW_RD_WR, + FW_RD_WR_LEN) | PPC_PLACE(5, IPW_WR_WR, IPW_WR_WR_LEN)); /* IOM0.DDRPHY_WC_CONFIG3_P0 = @@ -1330,14 +1334,14 @@ static void wc_reset(uint8_t chip, int mcs_i, int mca_i) [50-59] WR_CTR_VREF_COUNTER_RESET_VAL = 150ns in clock cycles // JESD79-4C Table 67 */ mca_and_or(chip, id, mca_i, DDRPHY_WC_RTT_WR_SWAP_ENABLE_P0, ~PPC_BITMASK(48, 59), - PPC_PLACE(ns_to_nck(150), WR_CTR_VREF_COUNTER_RESET_VAL, + PPC_PLACE(ns_to_nck(chip, 150), WR_CTR_VREF_COUNTER_RESET_VAL, WR_CTR_VREF_COUNTER_RESET_VAL_LEN)); } static void rc_reset(uint8_t chip, int mcs_i, int mca_i) { chiplet_id_t id = mcs_ids[mcs_i]; - mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; /* IOM0.DDRPHY_RC_CONFIG0_P0 [all] 0 @@ -1378,9 +1382,9 @@ static void rc_reset(uint8_t chip, int mcs_i, int mca_i) MSS_FREQ_EQ_2400: 0x0a50 MSS_FREQ_EQ_2666: 0x0b74 // use this value for all freqs maybe? */ - uint64_t wait_time = mem_data.speed == 1866 ? 0x0804 : - mem_data.speed == 2133 ? 0x092A : - mem_data.speed == 2400 ? 0x0A50 : 0x0B74; + uint64_t wait_time = mem_data[chip].speed == 1866 ? 0x0804 : + mem_data[chip].speed == 2133 ? 0x092A : + mem_data[chip].speed == 2400 ? 0x0A50 : 0x0B74; mca_and_or(chip, id, mca_i, DDRPHY_RC_RDVREF_CONFIG0_P0, 0, PPC_PLACE(wait_time, 48, 16)); @@ -1404,7 +1408,7 @@ static inline int log2_up(uint32_t x) static void seq_reset(uint8_t chip, int mcs_i, int mca_i) { chiplet_id_t id = mcs_ids[mcs_i]; - mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; int vpd_idx = mca->dimm[0].present ? (mca->dimm[0].mranks == 2 ? 2 : 0) : (mca->dimm[1].mranks == 2 ? 2 : 0); if (mca->dimm[0].present && mca->dimm[1].present) @@ -1471,7 +1475,7 @@ static void seq_reset(uint8_t chip, int mcs_i, int mca_i) */ /* AL and PL are disabled (0) */ mca_and_or(chip, id, mca_i, DDRPHY_SEQ_MEM_TIMING_PARAM2_P0, 0, - PPC_PLACE(log2_up(mem_data.cwl - 2), TODTLON_OFF_CYCLES, + PPC_PLACE(log2_up(mem_data[chip].cwl - 2), TODTLON_OFF_CYCLES, TODTLON_OFF_CYCLES_LEN) | PPC_PLACE(0x777, 52, 12)); @@ -1653,12 +1657,12 @@ static void reset_ctle_cntl(uint8_t chip, int mcs_i, int mca_i) static void reset_delay(uint8_t chip, int mcs_i, int mca_i) { chiplet_id_t id = mcs_ids[mcs_i]; - mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; /* See comments in ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_CSN0 for layout */ - int speed_idx = mem_data.speed == 1866 ? 0 : - mem_data.speed == 2133 ? 8 : - mem_data.speed == 2400 ? 16 : 24; + int speed_idx = mem_data[chip].speed == 1866 ? 0 : + mem_data[chip].speed == 2133 ? 8 : + mem_data[chip].speed == 2400 ? 16 : 24; int dimm_idx = (mca->dimm[0].present && mca->dimm[1].present) ? 4 : 0; /* TODO: second CPU not supported */ int vpd_idx = speed_idx + dimm_idx + mcs_i; @@ -1917,9 +1921,9 @@ static void reset_delay(uint8_t chip, int mcs_i, int mca_i) static void reset_tsys_adr(uint8_t chip, int mcs_i, int mca_i) { chiplet_id_t id = mcs_ids[mcs_i]; - int i = mem_data.speed == 1866 ? 0 : - mem_data.speed == 2133 ? 1 : - mem_data.speed == 2400 ? 2 : 3; + int i = mem_data[chip].speed == 1866 ? 0 : + mem_data[chip].speed == 2133 ? 1 : + mem_data[chip].speed == 2400 ? 2 : 3; /* IOM0.DDRPHY_ADR_MCCLK_WRCLK_PR_STATIC_OFFSET_P0_ADR32S{0,1} = [all] 0 @@ -1940,9 +1944,9 @@ static void reset_tsys_adr(uint8_t chip, int mcs_i, int mca_i) static void reset_tsys_data(uint8_t chip, int mcs_i, int mca_i) { chiplet_id_t id = mcs_ids[mcs_i]; - int i = mem_data.speed == 1866 ? 0 : - mem_data.speed == 2133 ? 1 : - mem_data.speed == 2400 ? 2 : 3; + int i = mem_data[chip].speed == 1866 ? 0 : + mem_data[chip].speed == 2133 ? 1 : + mem_data[chip].speed == 2400 ? 2 : 3; int dp; /* IOM0.DDRPHY_DP16_WRCLK_PR_P0_{0,1,2,3,4} = @@ -2107,7 +2111,7 @@ static void reset_io_impedances(uint8_t chip, int mcs_i, int mca_i) static void reset_wr_vref_registers(uint8_t chip, int mcs_i, int mca_i) { chiplet_id_t id = mcs_ids[mcs_i]; - mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; int dp; int vpd_idx = mca->dimm[0].present ? (mca->dimm[0].mranks == 2 ? 2 : 0) : (mca->dimm[1].mranks == 2 ? 2 : 0); @@ -2349,11 +2353,11 @@ static void mss_scominit(uint8_t chip) for (mcs_i = 0; mcs_i < MCS_PER_PROC; mcs_i++) { /* No need to initialize a non-functional MCS */ - if (!mem_data.mcs[mcs_i].functional) + if (!mem_data[chip].mcs[mcs_i].functional) continue; for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { - mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; /* * 0th MCA is 'magic' - it has a logic PHY block that is not contained * in other MCA. The magic MCA must be always initialized, even when it @@ -2377,11 +2381,11 @@ static void mss_scominit(uint8_t chip) /* This double loop is a part of phy_scominit() in Hostboot, but this is simpler. */ for (mcs_i = 0; mcs_i < MCS_PER_PROC; mcs_i++) { - if (!mem_data.mcs[mcs_i].functional) + if (!mem_data[chip].mcs[mcs_i].functional) continue; for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { - mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; /* No magic for phy_scominit(). */ if (mca->functional) phy_scominit(chip, mcs_i, mca_i); diff --git a/src/soc/ibm/power9/istep_13_9.c b/src/soc/ibm/power9/istep_13_9.c index d2140c393cb..9f133f3f4f3 100644 --- a/src/soc/ibm/power9/istep_13_9.c +++ b/src/soc/ibm/power9/istep_13_9.c @@ -60,7 +60,7 @@ static int test_bb_lock(uint8_t chip, int mcs_i) int mca_i, dp; for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { - if (!mem_data.mcs[mcs_i].mca[mca_i].functional) + if (!mem_data[chip].mcs[mcs_i].mca[mca_i].functional) continue; /* @@ -153,7 +153,7 @@ static void check_during_phy_reset(uint8_t chip, int mcs_i) /* If any of these bits is set, report error. Clear them unconditionally. */ for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { - if (mca_i != 0 && !mem_data.mcs[mcs_i].mca[mca_i].functional) + if (mca_i != 0 && !mem_data[chip].mcs[mcs_i].mca[mca_i].functional) continue; /* MC01.PORT0.SRQ.MBACALFIR @@ -235,7 +235,7 @@ static void fir_unmask(uint8_t chip, int mcs_i) 0); for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { - if (!mem_data.mcs[mcs_i].mca[mca_i].functional) + if (!mem_data[chip].mcs[mcs_i].mca[mca_i].functional) continue; /* @@ -344,11 +344,11 @@ static void mss_ddr_phy_reset(uint8_t chip) * loop, unclear if we can break it into pieces too. */ for (mcs_i = 0; mcs_i < MCS_PER_PROC; mcs_i++) { - if (!mem_data.mcs[mcs_i].functional) + if (!mem_data[chip].mcs[mcs_i].functional) continue; for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { - mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; if (mca_i != 0 && !mca->functional) continue; @@ -383,10 +383,10 @@ static void mss_ddr_phy_reset(uint8_t chip) PPC_BIT(MBA_CAL0Q_RESET_RECOVER)); } - delay_nck(32); + delay_nck(chip, 32); for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { - mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; if (mca_i != 0 && !mca->functional) continue; @@ -429,10 +429,10 @@ static void mss_ddr_phy_reset(uint8_t chip) } } - delay_nck(32); + delay_nck(chip, 32); for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { - mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; if (mca_i != 0 && !mca->functional) continue; @@ -475,7 +475,7 @@ static void mss_ddr_phy_reset(uint8_t chip) ~0, PPC_BIT(ENABLE_ZCAL)); /* Maybe it would be better to add another 1us later instead of this. */ - delay_nck(1024); + delay_nck(chip, 1024); /* for each magic MCA */ /* 50*10ns, but we don't have such precision. */ @@ -491,7 +491,7 @@ static void mss_ddr_phy_reset(uint8_t chip) * that make whole MCBIST non-functional? */ for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { - mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; if (!mca->functional) continue; @@ -537,7 +537,7 @@ static void mss_ddr_phy_reset(uint8_t chip) * * Why assume worst case instead of making the next timeout bigger? */ - delay_nck(37382); + delay_nck(chip, 37382); /* * The comment before poll says: @@ -567,7 +567,7 @@ static void mss_ddr_phy_reset(uint8_t chip) */ need_dll_workaround = false; for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { - if (mem_data.mcs[mcs_i].mca[mca_i].functional) + if (mem_data[chip].mcs[mcs_i].mca[mca_i].functional) break; } /* 50*10ns, but we don't have such precision. */ @@ -584,7 +584,7 @@ static void mss_ddr_phy_reset(uint8_t chip) * be written to while hardware calibration is in progress. */ for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { - mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; if (need_dll_workaround) break; @@ -623,7 +623,7 @@ static void mss_ddr_phy_reset(uint8_t chip) /* Start bang-bang-lock */ for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { - mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; if (!mca->functional) continue; @@ -661,7 +661,7 @@ static void mss_ddr_phy_reset(uint8_t chip) * Wait at least 5932 dphy_nclk clock cycles to allow the dphy_nclk/SysClk * alignment circuit to perform initial alignment. */ - delay_nck(5932); + delay_nck(chip, 5932); /* Check for LOCK in {DP16,ADR}_SYSCLK_PR_VALUE */ /* 50*10ns, but we don't have such precision. */ @@ -681,7 +681,7 @@ static void mss_ddr_phy_reset(uint8_t chip) die("BB lock timeout\n"); for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { - mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; if (!mca->functional) continue; @@ -757,12 +757,12 @@ static void mss_ddr_phy_reset(uint8_t chip) } /* Wait at least 32 dphy_nclk clock cycles */ - delay_nck(32); + delay_nck(chip, 32); /* Done bang-bang-lock */ /* Per J. Bialas, force_mclk_low can be dasserted */ for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { - mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; if (!mca->functional) continue; diff --git a/src/soc/ibm/power9/istep_14_1.c b/src/soc/ibm/power9/istep_14_1.c index ba7292fad66..62e6dffa47a 100644 --- a/src/soc/ibm/power9/istep_14_1.c +++ b/src/soc/ibm/power9/istep_14_1.c @@ -52,7 +52,7 @@ static void fir_unmask(uint8_t chip, int mcs_i) for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { uint64_t val; - if (!mem_data.mcs[mcs_i].mca[mca_i].functional) + if (!mem_data[chip].mcs[mcs_i].mca[mca_i].functional) continue; /* From broadcast_out_of_sync() workaround: @@ -177,7 +177,7 @@ static void set_fifo_mode(uint8_t chip, int mcs_i, int fifo) * [5] MBA_WRQ0Q_CFG_WRQ_FIFO_MODE = fifo */ for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { - if (!mem_data.mcs[mcs_i].mca[mca_i].functional) + if (!mem_data[chip].mcs[mcs_i].mca[mca_i].functional) continue; mca_and_or(chip, id, mca_i, MBA_RRQ0Q, ~PPC_BIT(MBA_RRQ0Q_CFG_RRQ_FIFO_MODE), @@ -204,7 +204,7 @@ static void load_maint_pattern(uint8_t chip, int mcs_i, const uint64_t pat[16]) for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { int i; - if (!mem_data.mcs[mcs_i].mca[mca_i].functional) + if (!mem_data[chip].mcs[mcs_i].mca[mca_i].functional) continue; /* MC01.PORT0.ECC64.SCOM.AACR @@ -438,7 +438,7 @@ static void mss_memdiag(uint8_t chip) int mcs_i, mca_i; for (mcs_i = 0; mcs_i < MCS_PER_PROC; mcs_i++) { - if (!mem_data.mcs[mcs_i].functional) + if (!mem_data[chip].mcs[mcs_i].functional) continue; /* @@ -462,7 +462,7 @@ static void mss_memdiag(uint8_t chip) * maintenance pattern write can succeed for the same configuration. */ for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { - mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; int dimm; if (!mca->functional) continue; @@ -506,7 +506,7 @@ static void mss_memdiag(uint8_t chip) long total_time = 0; for (mcs_i = 0; mcs_i < MCS_PER_PROC; mcs_i++) { - if (!mem_data.mcs[mcs_i].functional) + if (!mem_data[chip].mcs[mcs_i].functional) continue; /* diff --git a/src/soc/ibm/power9/istep_14_5.c b/src/soc/ibm/power9/istep_14_5.c index 716e84aa109..fdee67e9e62 100644 --- a/src/soc/ibm/power9/istep_14_5.c +++ b/src/soc/ibm/power9/istep_14_5.c @@ -209,11 +209,11 @@ static void fill_groups(uint8_t chip) memset(mcfgp_regs, 0, sizeof(mcfgp_regs)); for (mcs_i = 0; mcs_i < MCS_PER_PROC; mcs_i++) { - if (!mem_data.mcs[mcs_i].functional) + if (!mem_data[chip].mcs[mcs_i].functional) continue; for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { - mca_data_t *mca = &mem_data.mcs[mcs_i].mca[mca_i]; + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; if (!mca->functional) continue; @@ -324,7 +324,7 @@ static void proc_setup_bars(uint8_t chip) for (mcs_i = 0; mcs_i < MCS_PER_PROC; mcs_i++) { chiplet_id_t nest = mcs_to_nest[mcs_ids[mcs_i]]; - if (!mem_data.mcs[mcs_i].functional) + if (!mem_data[chip].mcs[mcs_i].functional) continue; fir_unmask(chip, mcs_i); diff --git a/src/soc/ibm/power9/romstage.c b/src/soc/ibm/power9/romstage.c index 1759579004a..ec1fe47aa80 100644 --- a/src/soc/ibm/power9/romstage.c +++ b/src/soc/ibm/power9/romstage.c @@ -29,7 +29,7 @@ #define DIMM6 0x56 #define DIMM7 0x57 -mcbist_data_t mem_data; +mcbist_data_t mem_data[MAX_CHIPS]; static void dump_mca_data(mca_data_t *mca) { @@ -85,31 +85,32 @@ static inline bool is_proper_dimm(spd_raw_data spd, int slot) return true; } -static void mark_nonfunctional(int mcs, int mca) +static void mark_nonfunctional(uint8_t chip, int mcs, int mca) { - mem_data.mcs[mcs].mca[mca].functional = false; + mem_data[chip].mcs[mcs].mca[mca].functional = false; /* Propagate upwards */ - if (!mem_data.mcs[mcs].mca[mca ^ 1].functional) { - mem_data.mcs[mcs].functional = false; - if (!mem_data.mcs[mcs ^ 1].functional) - die("No functional MCS left"); + if (!mem_data[chip].mcs[mcs].mca[mca ^ 1].functional) { + mem_data[chip].mcs[mcs].functional = false; + if (!mem_data[chip].mcs[mcs ^ 1].functional) + printk(BIOS_INFO, "No functional MCS left on chip %d\n", chip); } } -static uint64_t find_min_mtb_ftb(rdimm_data_t *dimm, int mtb_idx, int ftb_idx) +static uint64_t find_min_mtb_ftb(uint8_t chip, rdimm_data_t *dimm, int mtb_idx, int ftb_idx) { uint64_t val0 = 0, val1 = 0; if (dimm[0].present) - val0 = mtb_ftb_to_nck(dimm[0].spd[mtb_idx], (int8_t)dimm[0].spd[ftb_idx]); + val0 = mtb_ftb_to_nck(chip, dimm[0].spd[mtb_idx], (int8_t)dimm[0].spd[ftb_idx]); if (dimm[1].present) - val1 = mtb_ftb_to_nck(dimm[1].spd[mtb_idx], (int8_t)dimm[1].spd[ftb_idx]); + val1 = mtb_ftb_to_nck(chip, dimm[1].spd[mtb_idx], (int8_t)dimm[1].spd[ftb_idx]); return (val0 < val1) ? val1 : val0; } -static uint64_t find_min_multi_mtb(rdimm_data_t *dimm, int mtb_l, int mtb_h, uint8_t mask, int shift) +static uint64_t find_min_multi_mtb(uint8_t chip, rdimm_data_t *dimm, int mtb_l, int mtb_h, + uint8_t mask, int shift) { uint64_t val0 = 0, val1 = 0; @@ -118,14 +119,15 @@ static uint64_t find_min_multi_mtb(rdimm_data_t *dimm, int mtb_l, int mtb_h, uin if (dimm[1].present) val1 = dimm[1].spd[mtb_l] | ((dimm[1].spd[mtb_h] & mask) << shift); - return (val0 < val1) ? mtb_ftb_to_nck(val1, 0) : mtb_ftb_to_nck(val0, 0); + return (val0 < val1) ? mtb_ftb_to_nck(chip, val1, 0) : mtb_ftb_to_nck(chip, val0, 0); } /* This is most of step 7 condensed into one function */ -static void prepare_dimm_data(void) +static void prepare_cpu_dimm_data(uint8_t chip) { int i, mcs, mca; int tckmin = 0x06; // Platform limit + unsigned int spd_bus = I2C_BUSES_PER_CPU * chip + SPD_I2C_BUS; /* * DIMMs 4-7 are under a different port. This is not the same as bus, but we @@ -138,7 +140,7 @@ static void prepare_dimm_data(void) DIMM4 | 0x80, DIMM5 | 0x80, DIMM6 | 0x80, DIMM7 | 0x80 }, }; - get_spd_i2c(SPD_I2C_BUS, &blk); + get_spd_i2c(spd_bus, &blk); dump_spd_info(&blk); /* @@ -157,13 +159,13 @@ static void prepare_dimm_data(void) /* Maximum for 2 DIMMs on one port (channel, MCA) is 2400 MT/s */ - if (tckmin < 0x07 && mem_data.mcs[mcs].mca[mca].functional) + if (tckmin < 0x07 && mem_data[chip].mcs[mcs].mca[mca].functional) tckmin = 0x07; - mem_data.mcs[mcs].functional = true; - mem_data.mcs[mcs].mca[mca].functional = true; + mem_data[chip].mcs[mcs].functional = true; + mem_data[chip].mcs[mcs].mca[mca].functional = true; - rdimm_data_t *dimm = &mem_data.mcs[mcs].mca[mca].dimm[dimm_idx]; + rdimm_data_t *dimm = &mem_data[chip].mcs[mcs].mca[mca].dimm[dimm_idx]; dimm->present = true; dimm->spd = blk.spd_array[i]; @@ -191,54 +193,45 @@ static void prepare_dimm_data(void) } } - /* - * There is one (?) MCBIST per CPU. Fail if there are no supported DIMMs - * connected, otherwise assume it is functional. There is no reason to redo - * this test in the rest of isteps. - * - * TODO: 2 CPUs with one DIMM (in total) will not work with this code. - */ - if (!mem_data.mcs[0].functional && !mem_data.mcs[1].functional) - die("No DIMMs detected, aborting\n"); - switch (tckmin) { /* For CWL assume 1tCK write preamble */ case 0x06: - mem_data.speed = 2666; - mem_data.cwl = 14; + mem_data[chip].speed = 2666; + mem_data[chip].cwl = 14; break; case 0x07: - mem_data.speed = 2400; - mem_data.cwl = 12; + mem_data[chip].speed = 2400; + mem_data[chip].cwl = 12; break; case 0x08: - mem_data.speed = 2133; - mem_data.cwl = 11; + mem_data[chip].speed = 2133; + mem_data[chip].cwl = 11; break; case 0x09: - mem_data.speed = 1866; - mem_data.cwl = 10; + mem_data[chip].speed = 1866; + mem_data[chip].cwl = 10; break; default: die("Unsupported tCKmin: %d ps (+/- 125)\n", tckmin * 125); } /* Now that we know our speed, we can calculate the rest of the data */ - mem_data.nrefi = ns_to_nck(7800); - mem_data.nrtp = ps_to_nck(7500); + mem_data[chip].nrefi = ns_to_nck(chip, 7800); + mem_data[chip].nrtp = ps_to_nck(chip, 7500); printk(BIOS_SPEW, "Common memory parameters:\n" "\tspeed =\t%d MT/s\n" "\tREFI =\t%d clock cycles\n" "\tCWL =\t%d clock cycles\n" "\tRTP =\t%d clock cycles\n", - mem_data.speed, mem_data.nrefi, mem_data.cwl, mem_data.nrtp); + mem_data[chip].speed, mem_data[chip].nrefi, + mem_data[chip].cwl, mem_data[chip].nrtp); for (mcs = 0; mcs < MCS_PER_PROC; mcs++) { - if (!mem_data.mcs[mcs].functional) continue; + if (!mem_data[chip].mcs[mcs].functional) continue; for (mca = 0; mca < MCA_PER_MCS; mca++) { - if (!mem_data.mcs[mcs].mca[mca].functional) continue; + if (!mem_data[chip].mcs[mcs].mca[mca].functional) continue; - rdimm_data_t *dimm = mem_data.mcs[mcs].mca[mca].dimm; + rdimm_data_t *dimm = mem_data[chip].mcs[mcs].mca[mca].dimm; uint32_t val0, val1, common; int min; /* Minimum compatible with both DIMMs is the bigger value */ @@ -249,7 +242,7 @@ static void prepare_dimm_data(void) common = val0 & val1; /* tAAmin - minimum CAS latency time */ - min = find_min_mtb_ftb(dimm, 24, 123); + min = find_min_mtb_ftb(chip, dimm, 24, 123); while (min <= 36 && ((common >> (min - 7)) & 1) == 0) min++; @@ -257,11 +250,11 @@ static void prepare_dimm_data(void) /* Maybe just die() instead? */ printk(BIOS_WARNING, "Cannot find CL supported by all DIMMs under MCS%d, MCA%d." " Marking as nonfunctional.\n", mcs, mca); - mark_nonfunctional(mcs, mca); + mark_nonfunctional(chip, mcs, mca); continue; } - mem_data.mcs[mcs].mca[mca].cl = min; + mem_data[chip].mcs[mcs].mca[mca].cl = min; /* * There are also minimal values in Table 170 of JEDEC Standard No. 79-4C which @@ -272,38 +265,38 @@ static void prepare_dimm_data(void) */ /* Minimum CAS to CAS Delay Time, Same Bank Group */ - mem_data.mcs[mcs].mca[mca].nccd_l = find_min_mtb_ftb(dimm, 40, 117); + mem_data[chip].mcs[mcs].mca[mca].nccd_l = find_min_mtb_ftb(chip, dimm, 40, 117); /* Minimum Write to Read Time, Different Bank Group */ - mem_data.mcs[mcs].mca[mca].nwtr_s = find_min_multi_mtb(dimm, 44, 43, 0x0F, 8); + mem_data[chip].mcs[mcs].mca[mca].nwtr_s = find_min_multi_mtb(chip, dimm, 44, 43, 0x0F, 8); /* Minimum Write to Read Time, Same Bank Group */ - mem_data.mcs[mcs].mca[mca].nwtr_l = find_min_multi_mtb(dimm, 45, 43, 0xF0, 4); + mem_data[chip].mcs[mcs].mca[mca].nwtr_l = find_min_multi_mtb(chip, dimm, 45, 43, 0xF0, 4); /* Minimum Four Activate Window Delay Time */ - mem_data.mcs[mcs].mca[mca].nfaw = find_min_multi_mtb(dimm, 37, 36, 0x0F, 8); + mem_data[chip].mcs[mcs].mca[mca].nfaw = find_min_multi_mtb(chip, dimm, 37, 36, 0x0F, 8); /* Minimum RAS to CAS Delay Time */ - mem_data.mcs[mcs].mca[mca].nrcd = find_min_mtb_ftb(dimm, 25, 122); + mem_data[chip].mcs[mcs].mca[mca].nrcd = find_min_mtb_ftb(chip, dimm, 25, 122); /* Minimum Row Precharge Delay Time */ - mem_data.mcs[mcs].mca[mca].nrp = find_min_mtb_ftb(dimm, 26, 121); + mem_data[chip].mcs[mcs].mca[mca].nrp = find_min_mtb_ftb(chip, dimm, 26, 121); /* Minimum Active to Precharge Delay Time */ - mem_data.mcs[mcs].mca[mca].nras = find_min_multi_mtb(dimm, 28, 27, 0x0F, 8); + mem_data[chip].mcs[mcs].mca[mca].nras = find_min_multi_mtb(chip, dimm, 28, 27, 0x0F, 8); /* Minimum Write Recovery Time */ - mem_data.mcs[mcs].mca[mca].nwr = find_min_multi_mtb(dimm, 42, 41, 0x0F, 8); + mem_data[chip].mcs[mcs].mca[mca].nwr = find_min_multi_mtb(chip, dimm, 42, 41, 0x0F, 8); /* Minimum Activate to Activate Delay Time, Different Bank Group */ - mem_data.mcs[mcs].mca[mca].nrrd_s = find_min_mtb_ftb(dimm, 38, 119); + mem_data[chip].mcs[mcs].mca[mca].nrrd_s = find_min_mtb_ftb(chip, dimm, 38, 119); /* Minimum Activate to Activate Delay Time, Same Bank Group */ - mem_data.mcs[mcs].mca[mca].nrrd_l = find_min_mtb_ftb(dimm, 39, 118); + mem_data[chip].mcs[mcs].mca[mca].nrrd_l = find_min_mtb_ftb(chip, dimm, 39, 118); /* Minimum Refresh Recovery Delay Time */ /* Assuming no fine refresh mode. */ - mem_data.mcs[mcs].mca[mca].nrfc = find_min_multi_mtb(dimm, 30, 31, 0xFF, 8); + mem_data[chip].mcs[mcs].mca[mca].nrfc = find_min_multi_mtb(chip, dimm, 30, 31, 0xFF, 8); /* Minimum Refresh Recovery Delay Time for Different Logical Rank (3DS only) */ /* @@ -319,24 +312,50 @@ static void prepare_dimm_data(void) switch (min) { case 0x4: - mem_data.mcs[mcs].mca[mca].nrfc_dlr = ns_to_nck(90); + mem_data[chip].mcs[mcs].mca[mca].nrfc_dlr = ns_to_nck(chip, 90); break; case 0x5: - mem_data.mcs[mcs].mca[mca].nrfc_dlr = ns_to_nck(120); + mem_data[chip].mcs[mcs].mca[mca].nrfc_dlr = ns_to_nck(chip, 120); break; case 0x6: - mem_data.mcs[mcs].mca[mca].nrfc_dlr = ns_to_nck(185); + mem_data[chip].mcs[mcs].mca[mca].nrfc_dlr = ns_to_nck(chip, 185); break; default: die("Unsupported DRAM density\n"); } printk(BIOS_SPEW, "MCS%d, MCA%d times (in clock cycles):\n", mcs, mca); - dump_mca_data(&mem_data.mcs[mcs].mca[mca]); + dump_mca_data(&mem_data[chip].mcs[mcs].mca[mca]); } } } +/* This is most of step 7 condensed into one function */ +static void prepare_dimm_data(uint8_t chips) +{ + bool have_dimms = false; + + uint8_t chip; + + for (chip = 0; chip < MAX_CHIPS; chip++) { + int mcs; + + if (chips & (1 << chip)) + prepare_cpu_dimm_data(chip); + + for (mcs = 0; mcs < MCS_PER_PROC; mcs++) + have_dimms |= mem_data[chip].mcs[mcs].functional; + } + + /* + * There is one (?) MCBIST per CPU. Fail if there are no supported DIMMs + * connected, otherwise assume it is functional. There is no reason to redo + * this test in the rest of isteps. + */ + if (!have_dimms) + die("No DIMMs detected, aborting\n"); +} + void main(void) { uint8_t chips; @@ -385,7 +404,7 @@ void main(void) timestamp_add_now(TS_INITRAM_START); vpd_pnor_main(); - prepare_dimm_data(); + prepare_dimm_data(chips); report_istep(13, 1); // no-op istep_13_2(chips); From cb11891fecc00260f75815aeb824d654a42952ae Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Wed, 9 Feb 2022 23:56:36 +0200 Subject: [PATCH 149/213] soc/power9: extract PROC_BASE_ADDR() macro Used in three places already with at least one more coming. Change-Id: Ibe075f02a8c17061e55cf1851aed8c9f483e3caa Signed-off-by: Sergii Dmytruk --- src/include/cpu/power/proc.h | 13 +++++++++++++ src/soc/ibm/power9/homer.c | 5 +---- src/soc/ibm/power9/istep_14_3.c | 13 ++----------- src/soc/ibm/power9/istep_14_5.c | 11 +---------- 4 files changed, 17 insertions(+), 25 deletions(-) diff --git a/src/include/cpu/power/proc.h b/src/include/cpu/power/proc.h index ad6b21c6abc..ccf504e0e75 100644 --- a/src/include/cpu/power/proc.h +++ b/src/include/cpu/power/proc.h @@ -34,4 +34,17 @@ _Static_assert(CONFIG_MAX_CPUS <= MAX_CHIPS, "Too many CPUs requested"); /* Frequency of XBus for Nimbus */ #define FREQ_X_MHZ 2000 +/* + * This is for ATTR_PROC_FABRIC_PUMP_MODE == PUMP_MODE_CHIP_IS_GROUP, + * when chip ID is actually a group ID and "chip ID" field is zero. + * + * "nm" means non-mirrored. + */ +#define PROC_BASE_ADDR(chip, msel) ( \ + PPC_PLACE(0x0, 8, 5) | /* system ID */ \ + PPC_PLACE(msel, 13, 2) | /* msel (nm = 0b00/01, m = 0b10, mmio = 0b11) */ \ + PPC_PLACE(chip, 15, 4) | /* group ID */ \ + PPC_PLACE(0x0, 19, 3) /* chip ID */ \ + ) + #endif /* __SOC_IBM_POWER9_PROC_H */ diff --git a/src/soc/ibm/power9/homer.c b/src/soc/ibm/power9/homer.c index 8f93f40d143..3dde47f5158 100644 --- a/src/soc/ibm/power9/homer.c +++ b/src/soc/ibm/power9/homer.c @@ -2110,10 +2110,7 @@ static void populate_ncu_rng_bar_scom_reg(struct homer_st *homer) uint8_t ex = 0; - uint64_t regNcuRngBarData = PPC_PLACE(0x0, 8, 5) // system ID - | PPC_PLACE(0x3, 13, 2) // msel - | PPC_PLACE(0x0, 15, 4) // group ID - | PPC_PLACE(0x0, 19, 3); // chip ID + uint64_t regNcuRngBarData = PROC_BASE_ADDR(/*chip=*/0, /*msel=*/0x3); regNcuRngBarData += NX_RANGE_BAR_ADDR_OFFSET; diff --git a/src/soc/ibm/power9/istep_14_3.c b/src/soc/ibm/power9/istep_14_3.c index 86eef066d9d..7d33254444d 100644 --- a/src/soc/ibm/power9/istep_14_3.c +++ b/src/soc/ibm/power9/istep_14_3.c @@ -302,17 +302,8 @@ static void init_phbs(uint8_t chip, uint8_t phb_active_mask, const uint8_t *iova /* ATTR_PROC_PCIE_BAR_SIZE */ const uint64_t bar_sizes[3] = { 0 }; - /* - * Determine base address of chip MMIO range. - * This is for ATTR_PROC_FABRIC_PUMP_MODE == PUMP_MODE_CHIP_IS_GROUP, - * when chip ID is actually a group ID and "chip ID" field is zero. - */ - uint64_t base_addr_mmio = 0; - base_addr_mmio |= PPC_PLACE(0, 8, 5); // ATTR_PROC_FABRIC_SYSTEM_ID - base_addr_mmio |= PPC_PLACE(chip, 15, 4); // ATTR_PROC_EFF_FABRIC_GROUP_ID - base_addr_mmio |= PPC_PLACE(0, 19, 3); // ATTR_PROC_EFF_FABRIC_CHIP_ID - base_addr_mmio |= PPC_PLACE(3, 13, 2); // FABRIC_ADDR_MSEL - // nm = 0b00/01, m = 0b10, mmio = 0b11 + /* Base address of chip MMIO range */ + const uint64_t base_addr_mmio = PROC_BASE_ADDR(chip, /*msel=*/0x3); uint8_t phb = 0; for (phb = 0; phb < MAX_PHB_PER_PROC; ++phb) { diff --git a/src/soc/ibm/power9/istep_14_5.c b/src/soc/ibm/power9/istep_14_5.c index fdee67e9e62..c4523342ee1 100644 --- a/src/soc/ibm/power9/istep_14_5.c +++ b/src/soc/ibm/power9/istep_14_5.c @@ -192,19 +192,10 @@ static void add_group(struct mc_group groups[MCA_PER_PROC], int size, uint8_t ma /* TODO: make groups with > 1 MCA possible */ static void fill_groups(uint8_t chip) { - /* - * This is for ATTR_PROC_FABRIC_PUMP_MODE == PUMP_MODE_CHIP_IS_GROUP, - * when chip ID is actually a group ID and "chip ID" field is zero. - */ - uint64_t proc_base_addr = PPC_PLACE(0x0, 8, 5) // system ID - | PPC_PLACE(0x0, 13, 2) // msel - | PPC_PLACE(chip, 15, 4) // group ID - | PPC_PLACE(0x0, 19, 3); // chip ID - int mcs_i, mca_i, i; struct mc_group groups[MCA_PER_PROC] = {0}; /* This is in 4GB units, as expected by registers. */ - uint32_t cur_ba = proc_base_addr >> 32; + uint32_t cur_ba = PROC_BASE_ADDR(chip, /*msel=*/0x0) >> 32; memset(mcfgp_regs, 0, sizeof(mcfgp_regs)); From 85788d9037ff01281b57088b3fef5e87a2e50167 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Thu, 10 Feb 2022 01:32:35 +0200 Subject: [PATCH 150/213] soc/power9/chip.c: account for memory on the second CPU Change-Id: Ifeda279ab8ba5ccd20f5b5c0e2843d3704d42b8f Signed-off-by: Sergii Dmytruk --- src/soc/ibm/power9/chip.c | 49 ++++++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 19 deletions(-) diff --git a/src/soc/ibm/power9/chip.c b/src/soc/ibm/power9/chip.c index 8cdd5ca3c94..9a545e74d79 100644 --- a/src/soc/ibm/power9/chip.c +++ b/src/soc/ibm/power9/chip.c @@ -14,6 +14,7 @@ #include "istep_13_scom.h" #include "chip.h" +#include "fsi.h" static uint64_t nominal_freq; @@ -390,28 +391,38 @@ static void rng_init(void) static void enable_soc_dev(struct device *dev) { - int mcs_i, idx = 0; + int chip, idx = 0; unsigned long reserved_size, top = 0; + uint8_t chips = fsi_get_present_chips(); - for (mcs_i = 0; mcs_i < MCS_PER_PROC; mcs_i++) { - uint64_t reg; - chiplet_id_t nest = mcs_to_nest[mcs_ids[mcs_i]]; - - /* These registers are undocumented, see istep 14.5. */ - /* MCS_MCFGP */ - reg = read_scom_for_chiplet(nest, 0x0501080A); - if (reg & PPC_BIT(0)) { - ram_resource_kb(dev, idx++, base_k(reg), size_k(reg)); - if (base_k(reg) + size_k(reg) > top) - top = base_k(reg) + size_k(reg); - } + for (chip = 0; chip < MAX_CHIPS; chip++) { + int mcs_i; + + if (!(chips & (1 << chip))) + continue; - /* MCS_MCFGPM */ - reg = read_scom_for_chiplet(nest, 0x0501080C); - if (reg & PPC_BIT(0)) { - ram_resource_kb(dev, idx++, base_k(reg), size_k(reg)); - if (base_k(reg) + size_k(reg) > top) - top = base_k(reg) + size_k(reg); + for (mcs_i = 0; mcs_i < MCS_PER_PROC; mcs_i++) { + uint64_t reg; + chiplet_id_t nest = mcs_to_nest[mcs_ids[mcs_i]]; + + /* These registers are undocumented, see istep 14.5. */ + /* MCS_MCFGP */ + reg = read_rscom_for_chiplet(chip, nest, 0x0501080A); + if (reg & PPC_BIT(0)) { + uint64_t end = base_k(reg) + size_k(reg); + ram_resource_kb(dev, idx++, base_k(reg), size_k(reg)); + if (end > top) + top = end; + } + + /* MCS_MCFGPM */ + reg = read_rscom_for_chiplet(chip, nest, 0x0501080C); + if (reg & PPC_BIT(0)) { + uint64_t end = base_k(reg) + size_k(reg); + ram_resource_kb(dev, idx++, base_k(reg), size_k(reg)); + if (end > top) + top = end; + } } } From 8e821b887004437e89fe4730eb2dddc7575b9989 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Thu, 20 Jan 2022 01:16:26 +0200 Subject: [PATCH 151/213] soc/power9/homer.c: make build_homer_image() more manageable By extracting parts of it into functions. Change-Id: I163237407b167c2f58bd362e1846bf5c065f0676 Signed-off-by: Sergii Dmytruk --- src/soc/ibm/power9/homer.c | 183 ++++++++++++++++++++----------------- 1 file changed, 101 insertions(+), 82 deletions(-) diff --git a/src/soc/ibm/power9/homer.c b/src/soc/ibm/power9/homer.c index 3dde47f5158..ce5ab060cd5 100644 --- a/src/soc/ibm/power9/homer.c +++ b/src/soc/ibm/power9/homer.c @@ -2270,10 +2270,7 @@ const struct voltage_bucket_data * get_voltage_data(void) return bucket; } -/* - * This logic is for SMF disabled only! - */ -uint64_t build_homer_image(void *homer_bar) +static void layout_rings(struct homer_st *homer, uint8_t dd, uint64_t cores) { static uint8_t rings_buf[300 * KiB]; @@ -2287,51 +2284,9 @@ uint64_t build_homer_image(void *homer_bar) .work_buf2 = work_buf2, .work_buf2_size = sizeof(work_buf2), .work_buf3 = work_buf3, .work_buf3_size = sizeof(work_buf3), }; - enum ring_variant ring_variant; - - struct mmap_helper_region_device mdev = {0}; - struct homer_st *homer = homer_bar; - struct xip_hw_header *hw = homer_bar; - uint8_t dd = get_dd(); - int this_core = -1; - uint64_t cores = get_available_cores(&this_core); - - if (this_core == -1) - die("Couldn't found active core\n"); - - printk(BIOS_ERR, "DD%2.2x, boot core: %d\n", dd, this_core); - - /* HOMER must be aligned to 4M because CME HRMOR has bit for 2M set */ - if (!IS_ALIGNED((uint64_t) homer_bar, 4 * MiB)) - die("HOMER (%p) is not aligned to 4MB\n", homer_bar); - - memset(homer_bar, 0, 4 * MiB); - - /* - * This will work as long as we don't call mmap(). mmap() calls - * mem_poll_alloc() which doesn't check if mdev->pool is valid or at least - * not NULL. - */ - mount_part_from_pnor("HCODE", &mdev); - /* First MB of HOMER is unused, we can write OCC image from PNOR there. */ - rdev_readat(&mdev.rdev, hw, 0, 1 * MiB); - - assert(hw->magic == XIP_MAGIC_HW); - assert(hw->image_size <= 1 * MiB); - - build_sgpe(homer, (struct xip_sgpe_header *)(homer_bar + hw->sgpe.offset), - dd); - - build_self_restore(homer, - (struct xip_restore_header *)(homer_bar + hw->restore.offset), - dd, cores); - - build_cme(homer, (struct xip_cme_header *)(homer_bar + hw->cme.offset), dd); - build_pgpe(homer, (struct xip_pgpe_header *)(homer_bar + hw->pgpe.offset), - dd); - - ring_variant = (dd < 0x23 ? RV_BASE : RV_RL4); + struct xip_hw_header *hw = (void *)homer; + enum ring_variant ring_variant = (dd < 0x23 ? RV_BASE : RV_RL4); get_ppe_scan_rings(hw, dd, PT_CME, &ring_data); layout_rings_for_cme(homer, &ring_data, cores, ring_variant); @@ -2343,35 +2298,12 @@ uint64_t build_homer_image(void *homer_bar) ring_data.work_buf3_size = sizeof(work_buf3); get_ppe_scan_rings(hw, dd, PT_SGPE, &ring_data); layout_rings_for_sgpe(homer, &ring_data, - (struct xip_sgpe_header *)(homer_bar + hw->sgpe.offset), + (struct xip_sgpe_header *)((uint8_t *)homer + hw->sgpe.offset), cores, ring_variant); +} - build_parameter_blocks(homer, cores); - - update_headers(homer, cores); - - populate_epsilon_l2_scom_reg(homer); - populate_epsilon_l3_scom_reg(homer); - - /* Update L3 Refresh Timer Control SCOM Registers */ - populate_l3_refresh_scom_reg(homer, dd); - - /* Populate HOMER with SCOM restore value of NCU RNG BAR SCOM Register */ - populate_ncu_rng_bar_scom_reg(homer); - - /* Update flag fields in image headers */ - ((struct sgpe_img_header *)&homer->qpmr.sgpe.sram_image[INT_VECTOR_SIZE])->reserve_flags = 0x04000000; - ((struct cme_img_header *)&homer->cpmr.cme_sram_region[INT_VECTOR_SIZE])->qm_mode_flags = 0xF100; - ((struct pgpe_img_header *)&homer->ppmr.pgpe_sram_img[INT_VECTOR_SIZE])->flags = 0xF032; - - // Set the Fabric IDs - // setFabricIds( pChipHomer, i_procTgt ); - // - doesn't modify anything? - - // Customize magic word based on endianness - // customizeMagicWord( pChipHomer ); - - /* Set up wakeup mode */ +static void setup_wakeup_mode(uint64_t cores) +{ for (int i = 0; i < MAX_CORES_PER_CHIP; i++) { if (!IS_EC_FUNCTIONAL(i, cores)) continue; @@ -2386,16 +2318,20 @@ uint64_t build_homer_image(void *homer_bar) write_scom_for_chiplet(EC00_CHIPLET_ID + i, 0x200F0108, PPC_BIT(3) | PPC_BIT(4)); } +} - /* 15.2 set HOMER BAR */ - report_istep(15, 2); +/* 15.2 set HOMER BAR */ +static void istep_15_2(struct homer_st *homer) +{ write_scom(0x05012B00, (uint64_t)homer); write_scom(0x05012B04, (4 * MiB - 1) & ~((uint64_t)MiB - 1)); write_scom(0x05012B02, (uint64_t)homer + 8 * 4 * MiB); // FIXME write_scom(0x05012B06, (8 * MiB - 1) & ~((uint64_t)MiB - 1)); +} - /* 15.3 establish EX chiplet */ - report_istep(15, 3); +/* 15.3 establish EX chiplet */ +static void istep_15_3(uint64_t cores) +{ /* Multicast groups for cores were assigned in get_available_cores() */ for (int i = 0; i < MAX_QUADS_PER_CHIP; i++) { if (IS_EQ_FUNCTIONAL(i, cores) && @@ -2416,10 +2352,16 @@ uint64_t build_homer_image(void *homer_bar) qcsr |= PPC_BIT(i); } write_scom(0x0006C094, qcsr); +} - /* 15.4 start STOP engine */ - report_istep(15, 4); - +/* + * 15.4 start STOP engine + * + * SGPE startup is actually done as part of istep 21.1 after all + * preparations here to not have to restart it there. + */ +static void istep_15_4(uint64_t cores) +{ /* Initialize the PFET controllers */ for (int i = 0; i < MAX_CORES_PER_CHIP; i++) { if (IS_EC_FUNCTIONAL(i, cores)) { @@ -2511,6 +2453,83 @@ uint64_t build_homer_image(void *homer_bar) [30] 1 // OCCFLG2_SGPE_HCODE_STOP_REQ_ERR_INJ */ write_scom(PU_OCB_OCI_OCCFLG2_CLEAR, PPC_BIT(30)); +} + +/* + * This logic is for SMF disabled only! + */ +uint64_t build_homer_image(void *homer_bar) +{ + struct mmap_helper_region_device mdev = {0}; + struct homer_st *homer = homer_bar; + struct xip_hw_header *hw = homer_bar; + uint8_t dd = get_dd(); + int this_core = -1; + uint64_t cores = get_available_cores(&this_core); + + if (this_core == -1) + die("Couldn't found active core\n"); + + printk(BIOS_ERR, "DD%2.2x, boot core: %d\n", dd, this_core); + + /* HOMER must be aligned to 4M because CME HRMOR has bit for 2M set */ + if (!IS_ALIGNED((uint64_t) homer_bar, 4 * MiB)) + die("HOMER (%p) is not aligned to 4MB\n", homer_bar); + + memset(homer_bar, 0, 4 * MiB); + + /* + * This will work as long as we don't call mmap(). mmap() calls + * mem_poll_alloc() which doesn't check if mdev->pool is valid or at least + * not NULL. + */ + mount_part_from_pnor("HCODE", &mdev); + /* First MB of HOMER is unused, we can write OCC image from PNOR there. */ + rdev_readat(&mdev.rdev, hw, 0, 1 * MiB); + + assert(hw->magic == XIP_MAGIC_HW); + assert(hw->image_size <= 1 * MiB); + + build_sgpe(homer, (struct xip_sgpe_header *)(homer_bar + hw->sgpe.offset), + dd); + + build_self_restore(homer, + (struct xip_restore_header *)(homer_bar + hw->restore.offset), + dd, cores); + + build_cme(homer, (struct xip_cme_header *)(homer_bar + hw->cme.offset), dd); + + build_pgpe(homer, (struct xip_pgpe_header *)(homer_bar + hw->pgpe.offset), + dd); + + layout_rings(homer, dd, cores); + build_parameter_blocks(homer, cores); + update_headers(homer, cores); + + populate_epsilon_l2_scom_reg(homer); + populate_epsilon_l3_scom_reg(homer); + /* Update L3 Refresh Timer Control SCOM Registers */ + populate_l3_refresh_scom_reg(homer, dd); + /* Populate HOMER with SCOM restore value of NCU RNG BAR SCOM Register */ + populate_ncu_rng_bar_scom_reg(homer); + + /* Update flag fields in image headers */ + ((struct sgpe_img_header *)&homer->qpmr.sgpe.sram_image[INT_VECTOR_SIZE])->reserve_flags = 0x04000000; + ((struct cme_img_header *)&homer->cpmr.cme_sram_region[INT_VECTOR_SIZE])->qm_mode_flags = 0xf100; + ((struct pgpe_img_header *)&homer->ppmr.pgpe_sram_img[INT_VECTOR_SIZE])->flags = 0xf032; + + // Set the Fabric IDs + // setFabricIds( pChipHomer, i_procTgt ); + // - doesn't modify anything? + + setup_wakeup_mode(cores); + + report_istep(15, 2); + istep_15_2(homer); + report_istep(15, 3); + istep_15_3(cores); + report_istep(15, 4); + istep_15_4(cores); /* Boot OCC here and activate SGPE at the same time */ istep_21_1(homer, cores); From f557b49c03f2e3add510fe6ed9e27374768865d5 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Thu, 20 Jan 2022 18:13:30 +0200 Subject: [PATCH 152/213] soc/power9/homer.c: set fabric IDs in HOMER Change-Id: I843bc2d33b011e70d03f95095a9ed7a49202801b Signed-off-by: Sergii Dmytruk --- src/soc/ibm/power9/homer.c | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/src/soc/ibm/power9/homer.c b/src/soc/ibm/power9/homer.c index ce5ab060cd5..561f9ea6d3d 100644 --- a/src/soc/ibm/power9/homer.c +++ b/src/soc/ibm/power9/homer.c @@ -2302,6 +2302,33 @@ static void layout_rings(struct homer_st *homer, uint8_t dd, uint64_t cores) cores, ring_variant); } +/* Set the Fabric System, Group and Chip IDs into SGPE and CME headers */ +static void set_fabric_ids(uint8_t chip, struct homer_st *homer) +{ + struct cme_img_header *cme_hdr = (void *)&homer->cpmr.cme_sram_region[INT_VECTOR_SIZE]; + struct sgpe_img_header *sgpe_hdr = (void *) + &homer->qpmr.sgpe.sram_image[INT_VECTOR_SIZE]; + + /* + * Location Ids has the form of: + * 0:3 Group ID (loaded from ATTR_PROC_FABRIC_GROUP_ID) + * 4:6 Chip ID (loaded from ATTR_PROC_FABRIC_CHIP_ID) + * 7 0 + * 8:12 System ID (loaded from ATTR_PROC_FABRIC_SYSTEM_ID) + * 13:15 00 + * + * This is for ATTR_PROC_FABRIC_PUMP_MODE == PUMP_MODE_CHIP_IS_GROUP, + * when chip ID is actually a group ID and "chip ID" field is zero. + */ + uint16_t location_id = chip << 12; + + cme_hdr->location_id = location_id; + sgpe_hdr->location_id = location_id; + + /* Extended addressing is supported, but it's all zeros for both chips */ + sgpe_hdr->addr_extension = 0; +} + static void setup_wakeup_mode(uint64_t cores) { for (int i = 0; i < MAX_CORES_PER_CHIP; i++) { @@ -2518,9 +2545,7 @@ uint64_t build_homer_image(void *homer_bar) ((struct cme_img_header *)&homer->cpmr.cme_sram_region[INT_VECTOR_SIZE])->qm_mode_flags = 0xf100; ((struct pgpe_img_header *)&homer->ppmr.pgpe_sram_img[INT_VECTOR_SIZE])->flags = 0xf032; - // Set the Fabric IDs - // setFabricIds( pChipHomer, i_procTgt ); - // - doesn't modify anything? + set_fabric_ids(/*chip=*/0, homer); setup_wakeup_mode(cores); From 6a6b6a3dc1942ae31988086cdb1e482736438264 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Sat, 12 Feb 2022 19:16:00 +0200 Subject: [PATCH 153/213] soc/power9/homer.c: simplify get_available_cores() Don't hide setting of multicast groups in it. That's part of istep 15.3 and we have a function for it now. Change-Id: Iff51eab68c3c3871b85c9d54907f9a79873440a2 Signed-off-by: Sergii Dmytruk --- src/soc/ibm/power9/homer.c | 48 +++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 19 deletions(-) diff --git a/src/soc/ibm/power9/homer.c b/src/soc/ibm/power9/homer.c index 561f9ea6d3d..50e4140ee33 100644 --- a/src/soc/ibm/power9/homer.c +++ b/src/soc/ibm/power9/homer.c @@ -906,22 +906,10 @@ static uint64_t get_available_cores(int *me) if (val & PPC_BIT(0)) { printk(BIOS_SPEW, "Core %d is functional%s\n", i, (val & PPC_BIT(1)) ? "" : " and running"); + ret |= PPC_BIT(i); if ((val & PPC_BIT(1)) == 0 && me != NULL) *me = i; - - /* Might as well set multicast groups for cores */ - if ((read_scom_for_chiplet(EC00_CHIPLET_ID + i, 0xF0001) & PPC_BITMASK(3,5)) - == PPC_BITMASK(3,5)) - scom_and_or_for_chiplet(EC00_CHIPLET_ID + i, 0xF0001, - ~(PPC_BITMASK(3,5) | PPC_BITMASK(16,23)), - PPC_BITMASK(19,21)); - - if ((read_scom_for_chiplet(EC00_CHIPLET_ID + i, 0xF0002) & PPC_BITMASK(3,5)) - == PPC_BITMASK(3,5)) - scom_and_or_for_chiplet(EC00_CHIPLET_ID + i, 0xF0002, - ~(PPC_BITMASK(3,5) | PPC_BITMASK(16,23)), - PPC_BIT(5) | PPC_BITMASK(19,21)); } } return ret; @@ -2359,13 +2347,35 @@ static void istep_15_2(struct homer_st *homer) /* 15.3 establish EX chiplet */ static void istep_15_3(uint64_t cores) { - /* Multicast groups for cores were assigned in get_available_cores() */ + const uint64_t group_mask = PPC_BITMASK(3,5); + + /* Assign multicast groups for cores */ + for (int i = 0; i < MAX_CORES_PER_CHIP; i++) { + const chiplet_id_t chiplet = EC00_CHIPLET_ID + i; + + if (!IS_EC_FUNCTIONAL(i, cores)) + continue; + + if ((read_scom_for_chiplet(chiplet, 0xF0001) & group_mask) == group_mask) + scom_and_or_for_chiplet(chiplet, 0xF0001, + ~(group_mask | PPC_BITMASK(16,23)), + PPC_BITMASK(19,21)); + + if ((read_scom_for_chiplet(chiplet, 0xF0002) & group_mask) == group_mask) + scom_and_or_for_chiplet(chiplet, 0xF0002, + ~(group_mask | PPC_BITMASK(16,23)), + PPC_BIT(5) | PPC_BITMASK(19,21)); + } + for (int i = 0; i < MAX_QUADS_PER_CHIP; i++) { - if (IS_EQ_FUNCTIONAL(i, cores) && - (read_scom_for_chiplet(EP00_CHIPLET_ID + i, 0xF0001) & PPC_BITMASK(3,5)) - == PPC_BITMASK(3,5)) - scom_and_or_for_chiplet(EP00_CHIPLET_ID + i, 0xF0001, - ~(PPC_BITMASK(3,5) | PPC_BITMASK(16,23)), + const chiplet_id_t chiplet = EP00_CHIPLET_ID + i; + + if (!IS_EQ_FUNCTIONAL(i, cores)) + continue; + + if ((read_scom_for_chiplet(chiplet, 0xF0001) & group_mask) == group_mask) + scom_and_or_for_chiplet(chiplet, 0xF0001, + ~(group_mask | PPC_BITMASK(16,23)), PPC_BITMASK(19,21)); } From 0b026723d2137f15e84c26ce53bf5bfc252e67b1 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Thu, 20 Jan 2022 18:14:45 +0200 Subject: [PATCH 154/213] soc/power9/homer.c: start building HOMER for 2 CPUs Change-Id: I3aec1a46d671167b028930ce321298fc28d771c6 Signed-off-by: Sergii Dmytruk --- src/soc/ibm/power9/homer.c | 88 +++++++++++++++++++++++++++----------- 1 file changed, 64 insertions(+), 24 deletions(-) diff --git a/src/soc/ibm/power9/homer.c b/src/soc/ibm/power9/homer.c index 50e4140ee33..44fd838e9a9 100644 --- a/src/soc/ibm/power9/homer.c +++ b/src/soc/ibm/power9/homer.c @@ -18,6 +18,7 @@ #include "chip.h" #include "homer.h" +#include "fsi.h" #include "ops.h" #include "tor.h" #include "xip.h" @@ -898,11 +899,11 @@ static void stop_gpe_init(struct homer_st *homer) die("Timeout while waiting for SGPE activation\n"); } -static uint64_t get_available_cores(int *me) +static uint64_t get_available_cores(uint8_t chip, int *me) { uint64_t ret = 0; for (int i = 0; i < MAX_CORES_PER_CHIP; i++) { - uint64_t val = read_scom_for_chiplet(EC00_CHIPLET_ID + i, 0xF0040); + uint64_t val = read_rscom_for_chiplet(chip, EC00_CHIPLET_ID + i, 0xF0040); if (val & PPC_BIT(0)) { printk(BIOS_SPEW, "Core %d is functional%s\n", i, (val & PPC_BIT(1)) ? "" : " and running"); @@ -2317,6 +2318,28 @@ static void set_fabric_ids(uint8_t chip, struct homer_st *homer) sgpe_hdr->addr_extension = 0; } +static void fill_homer_for_chip(uint8_t chip, struct homer_st *homer, uint8_t dd, + uint64_t cores) +{ + layout_rings(homer, dd, cores); + build_parameter_blocks(homer, cores); + update_headers(homer, cores); + + populate_epsilon_l2_scom_reg(homer); + populate_epsilon_l3_scom_reg(homer); + /* Update L3 Refresh Timer Control SCOM Registers */ + populate_l3_refresh_scom_reg(homer, dd); + /* Populate HOMER with SCOM restore value of NCU RNG BAR SCOM Register */ + populate_ncu_rng_bar_scom_reg(homer); + + /* Update flag fields in image headers */ + ((struct sgpe_img_header *)&homer->qpmr.sgpe.sram_image[INT_VECTOR_SIZE])->reserve_flags = 0x04000000; + ((struct cme_img_header *)&homer->cpmr.cme_sram_region[INT_VECTOR_SIZE])->qm_mode_flags = 0xf100; + ((struct pgpe_img_header *)&homer->ppmr.pgpe_sram_img[INT_VECTOR_SIZE])->flags = 0xf032; + + set_fabric_ids(chip, homer); +} + static void setup_wakeup_mode(uint64_t cores) { for (int i = 0; i < MAX_CORES_PER_CHIP; i++) { @@ -2497,12 +2520,17 @@ static void istep_15_4(uint64_t cores) */ uint64_t build_homer_image(void *homer_bar) { + const uint8_t chips = fsi_get_present_chips(); + struct mmap_helper_region_device mdev = {0}; struct homer_st *homer = homer_bar; struct xip_hw_header *hw = homer_bar; - uint8_t dd = get_dd(); + uint8_t dd = get_dd(); // XXX: does this need to be chip-specific? int this_core = -1; - uint64_t cores = get_available_cores(&this_core); + uint64_t cores[MAX_CHIPS] = { + get_available_cores(0, &this_core), + (chips & 0x02) ? get_available_cores(1, NULL) : 0, + }; if (this_core == -1) die("Couldn't found active core\n"); @@ -2532,44 +2560,56 @@ uint64_t build_homer_image(void *homer_bar) build_self_restore(homer, (struct xip_restore_header *)(homer_bar + hw->restore.offset), - dd, cores); + dd, cores[0]); build_cme(homer, (struct xip_cme_header *)(homer_bar + hw->cme.offset), dd); build_pgpe(homer, (struct xip_pgpe_header *)(homer_bar + hw->pgpe.offset), dd); - layout_rings(homer, dd, cores); - build_parameter_blocks(homer, cores); - update_headers(homer, cores); + /* + * Until this point, only self restore part is CPU specific, use current + * state of the first HOMER image as a base for the second one. + */ + if (chips & 0x02) { + uint8_t *homer_bar2 = (void *)&homer[1]; + struct cme_img_header *hdr; - populate_epsilon_l2_scom_reg(homer); - populate_epsilon_l3_scom_reg(homer); - /* Update L3 Refresh Timer Control SCOM Registers */ - populate_l3_refresh_scom_reg(homer, dd); - /* Populate HOMER with SCOM restore value of NCU RNG BAR SCOM Register */ - populate_ncu_rng_bar_scom_reg(homer); + memcpy(&homer[1], &homer[0], sizeof(*homer)); - /* Update flag fields in image headers */ - ((struct sgpe_img_header *)&homer->qpmr.sgpe.sram_image[INT_VECTOR_SIZE])->reserve_flags = 0x04000000; - ((struct cme_img_header *)&homer->cpmr.cme_sram_region[INT_VECTOR_SIZE])->qm_mode_flags = 0xf100; - ((struct pgpe_img_header *)&homer->ppmr.pgpe_sram_img[INT_VECTOR_SIZE])->flags = 0xf032; + /* Patch part of data initialized by build_cme() */ + hdr = (struct cme_img_header *)&homer[1].cpmr.cme_sram_region[INT_VECTOR_SIZE]; + hdr->cpmr_phy_addr = (uint64_t)&homer[1] | 2 * MiB; + hdr->unsec_cpmr_phy_addr = hdr->cpmr_phy_addr; - set_fabric_ids(/*chip=*/0, homer); + /* Override data from the other CPU */ + build_self_restore(&homer[1], + (struct xip_restore_header *)(homer_bar2 + hw->restore.offset), + dd, cores[1]); + } + + for (uint8_t chip = 0; chip < MAX_CHIPS; chip++) { + if (!(chips & (1 << chip))) + continue; + + fill_homer_for_chip(chip, &homer[chip], dd, cores[chip]); + } - setup_wakeup_mode(cores); + setup_wakeup_mode(cores[0]); report_istep(15, 2); - istep_15_2(homer); + istep_15_2(&homer[0]); report_istep(15, 3); - istep_15_3(cores); + istep_15_3(cores[0]); report_istep(15, 4); - istep_15_4(cores); + istep_15_4(cores[0]); /* Boot OCC here and activate SGPE at the same time */ - istep_21_1(homer, cores); + /* TODO: initialize OCC for the second CPU when it's present */ + istep_21_1(homer, cores[0]); istep_16_1(this_core); + /* TODO: this should probably be chip-specific, need output parameter instead */ return (uint64_t)get_voltage_data()->nominal.freq * MHz; } From 61b9ac83d6f49e54b2d5a589f390c3695a33b737 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Sat, 9 Jul 2022 19:43:23 +0300 Subject: [PATCH 155/213] soc/power9/: disable WOF on table search failure There are two reasons it can be disabled: - chip doesn't support WOF - no WOF table for the chip was found Change-Id: I048c25d3e51a3580968df222c429d64a18f86ca7 Signed-off-by: Sergii Dmytruk --- src/soc/ibm/power9/homer.c | 22 ++++++- src/soc/ibm/power9/pstates.c | 121 +++++++++++++++++++++-------------- 2 files changed, 93 insertions(+), 50 deletions(-) diff --git a/src/soc/ibm/power9/homer.c b/src/soc/ibm/power9/homer.c index 44fd838e9a9..c77e874ef93 100644 --- a/src/soc/ibm/power9/homer.c +++ b/src/soc/ibm/power9/homer.c @@ -2321,6 +2321,15 @@ static void set_fabric_ids(uint8_t chip, struct homer_st *homer) static void fill_homer_for_chip(uint8_t chip, struct homer_st *homer, uint8_t dd, uint64_t cores) { + enum { + CME_QM_FLAG_SYS_WOF_ENABLE = 0x1000, + PGPE_FLAG_WOF_ENABLE = 0x1000, + }; + + const OCCPstateParmBlock *oppb = (void *)homer->ppmr.occ_parm_block; + uint16_t qm_mode_flags; + uint16_t pgpe_flags; + layout_rings(homer, dd, cores); build_parameter_blocks(homer, cores); update_headers(homer, cores); @@ -2333,9 +2342,18 @@ static void fill_homer_for_chip(uint8_t chip, struct homer_st *homer, uint8_t dd populate_ncu_rng_bar_scom_reg(homer); /* Update flag fields in image headers */ + + qm_mode_flags = 0xE100; + pgpe_flags = 0xE032; + + if (oppb->wof.wof_enabled) { + qm_mode_flags |= CME_QM_FLAG_SYS_WOF_ENABLE; + pgpe_flags |= PGPE_FLAG_WOF_ENABLE; + } + ((struct sgpe_img_header *)&homer->qpmr.sgpe.sram_image[INT_VECTOR_SIZE])->reserve_flags = 0x04000000; - ((struct cme_img_header *)&homer->cpmr.cme_sram_region[INT_VECTOR_SIZE])->qm_mode_flags = 0xf100; - ((struct pgpe_img_header *)&homer->ppmr.pgpe_sram_img[INT_VECTOR_SIZE])->flags = 0xf032; + ((struct cme_img_header *)&homer->cpmr.cme_sram_region[INT_VECTOR_SIZE])->qm_mode_flags = qm_mode_flags; + ((struct pgpe_img_header *)&homer->ppmr.pgpe_sram_img[INT_VECTOR_SIZE])->flags = pgpe_flags; set_fabric_ids(chip, homer); } diff --git a/src/soc/ibm/power9/pstates.c b/src/soc/ibm/power9/pstates.c index 03ff932fde9..0c4cd26c7fe 100644 --- a/src/soc/ibm/power9/pstates.c +++ b/src/soc/ibm/power9/pstates.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include // memcpy @@ -187,22 +188,30 @@ static void copy_poundW_v2_to_v3(PoundW_data_per_quad *v3, PoundW_data *v2) v3->resistance_data.r_undervolt_allowed = v2->undervolt_tested; } -static void check_valid_poundV(struct voltage_bucket_data *bucket) +static void check_valid_poundV(struct voltage_bucket_data *bucket, int wof_enabled) { + int num_op_points = NUM_OP_POINTS; // skip powerbus + struct voltage_data *data = &bucket->nominal; assert(bucket != NULL); - for (int i = 0; i < NUM_OP_POINTS; i++) { // skip powerbus + /* Skip UltraTurbo if WOF is disabled */ + if (!wof_enabled) + --num_op_points; + + for (int i = 0; i < num_op_points; i++) { if (data[i].freq == 0 || data[i].vdd_voltage == 0 || data[i].idd_current == 0 || data[i].vcs_voltage == 0 || data[i].ics_current == 0) die("Bad #V data\n"); } // TODO: check if values increase with operating points + // (skipping UltraTurbo if WOF is disabled) } static void check_valid_poundW(PoundW_data_per_quad *poundW_bucket, - uint64_t functional_cores) + uint64_t functional_cores, + int wof_enabled) { uint8_t prev_vid_compare_per_quad[MAXIMUM_QUADS] = {}; /* @@ -211,10 +220,12 @@ static void check_valid_poundW(PoundW_data_per_quad *poundW_bucket, */ for (int op = 0; op < NUM_OP_POINTS; op++) { - /* Assuming WOF is enabled - check that TDP VDD currents are nonzero */ - if (poundW_bucket->poundw[op].ivdd_tdp_ac_current_10ma == 0 || - poundW_bucket->poundw[op].ivdd_tdp_dc_current_10ma == 0) - die("TDP VDD current equals zero\n"); + if (wof_enabled) { + /* Check that TDP VDD currents are nonzero */ + if (poundW_bucket->poundw[op].ivdd_tdp_ac_current_10ma == 0 || + poundW_bucket->poundw[op].ivdd_tdp_dc_current_10ma == 0) + die("TDP VDD current equals zero\n"); + } /* Assuming VDM is enabled - validate threshold values */ for (int quad = 0; quad < MAXIMUM_QUADS; quad++) { @@ -534,9 +545,10 @@ static void wof_extract(uint8_t *buf, struct wof_image_entry entry, die("Failed to unmap WOF section!\n"); } -static void wof_init(uint8_t *buf, uint32_t core_count, - const OCCPstateParmBlock *oppb, - const struct voltage_bucket_data *poundV_bucket) +/* Returns WOF state */ +static uint8_t wof_init(uint8_t *buf, uint32_t core_count, + const OCCPstateParmBlock *oppb, + const struct voltage_bucket_data *poundV_bucket) { const struct region_device *wof_device = NULL; @@ -566,12 +578,14 @@ static void wof_init(uint8_t *buf, uint32_t core_count, entry_idx = wof_find(entries, hdr->entry_count, core_count, poundV_bucket); if (entry_idx == -1) - die("Failed to find a matching WOF tables section!\n"); - - wof_extract(buf, entries[entry_idx], oppb); + printk(BIOS_NOTICE, "Matching WOF tables section not found, disabling WOF\n"); + else + wof_extract(buf, entries[entry_idx], oppb); if (rdev_munmap(wof_device, entries)) die("Failed to unmap section table of WOF!\n"); + + return (entry_idx == -1 ? 0 : 1); } /* Assumption: no bias is applied to operating points */ @@ -614,21 +628,13 @@ void build_parameter_blocks(struct homer_st *homer, uint64_t functional_cores) LocalPstateParmBlock *lppb = (LocalPstateParmBlock *) &homer->cpmr.cme_sram_region[cme_hdr->pstate_offset * 32]; + /* Start with the assumption that WOF will work if it's supported by the chip */ + oppb->wof.wof_enabled = get_dd() > 0x20; + /* OPPB - constant fields */ oppb->magic = OCC_PARMSBLOCK_MAGIC; // "OCCPPB00" oppb->frequency_step_khz = FREQ_STEP_KHZ; - oppb->wof.tdp_rdp_factor = 0; // ATTR_TDP_RDP_CURRENT_FACTOR 0 from talos.xml - oppb->nest_leakage_percent = 60; // ATTR_NEST_LEAKAGE_PERCENT from hb_temp_defaults.xml - - oppb->wof.wof_enabled = 1; // Assuming wof_init() succeeds or dies - oppb->wof.tdp_rdp_factor = 0; // ATTR_TDP_RDP_CURRENT_FACTOR from talos.xml - oppb->nest_leakage_percent = 60; // ATTR_NEST_LEAKAGE_PERCENT from hb_temp_defaults.xml - /* - * As the Vdn dimension is not supported in the WOF tables, hardcoding this - * value to the OCC as non-zero to keep it happy. - */ - oppb->ceff_tdp_vdn = 1; /* Default values are from talos.xml */ oppb->vdd_sysparm.loadline_uohm = 254; @@ -736,7 +742,7 @@ void build_parameter_blocks(struct homer_st *homer, uint64_t functional_cores) } } - check_valid_poundV(bucket); + check_valid_poundV(bucket, oppb->wof.wof_enabled); if (poundV_bucket.id == 0) { memcpy(£V_bucket, bucket, sizeof(poundV_bucket)); @@ -773,6 +779,29 @@ void build_parameter_blocks(struct homer_st *homer, uint64_t functional_cores) oppb->frequency_max_khz = vd[VPD_PV_ULTRA].freq * 1000; oppb->nest_frequency_mhz = vd[VPD_PV_POWERBUS].freq; + /* If WOF is supported, try initializing it. Disable WOF if initialization fails. */ + if (oppb->wof.wof_enabled) { + uint32_t core_count = __builtin_popcount((uint32_t)functional_cores) + + __builtin_popcount(functional_cores >> 32); + /* wof_init() only needs two fields of oppb, both of which are + * initialized by now. */ + oppb->wof.wof_enabled = wof_init(homer->ppmr.wof_tables, core_count, oppb, + £V_bucket); + } + + if (oppb->wof.wof_enabled) { + /* ATTR_TDP_RDP_CURRENT_FACTOR from talos.xml */ + oppb->wof.tdp_rdp_factor = 0; + /* ATTR_NEST_LEAKAGE_PERCENT from hb_temp_defaults.xml */ + oppb->nest_leakage_percent = 60; + + /* + * As the Vdn dimension is not supported in the WOF tables, hardcoding this + * value to the OCC as non-zero to keep it happy. + */ + oppb->ceff_tdp_vdn = 1; + } + for (int op = 0; op < NUM_OP_POINTS; op++) { /* Assuming no bias */ oppb->operating_points[op].frequency_mhz = vd[op].freq; @@ -923,15 +952,16 @@ void build_parameter_blocks(struct homer_st *homer, uint64_t functional_cores) poundW_bucket.poundw[NOMINAL] = nom; } - check_valid_poundW(£W_bucket, functional_cores); - + check_valid_poundW(£W_bucket, functional_cores, oppb->wof.wof_enabled); /* OPPB - #W data */ - oppb->lac_tdp_vdd_turbo_10ma = - poundW_bucket.poundw[TURBO].ivdd_tdp_ac_current_10ma; - oppb->lac_tdp_vdd_nominal_10ma = - poundW_bucket.poundw[NOMINAL].ivdd_tdp_ac_current_10ma; + if (oppb->wof.wof_enabled) { + oppb->lac_tdp_vdd_turbo_10ma = + poundW_bucket.poundw[TURBO].ivdd_tdp_ac_current_10ma; + oppb->lac_tdp_vdd_nominal_10ma = + poundW_bucket.poundw[NOMINAL].ivdd_tdp_ac_current_10ma; + } /* Calculate safe mode frequency/pstate/voltage */ { @@ -992,20 +1022,20 @@ void build_parameter_blocks(struct homer_st *homer, uint64_t functional_cores) memcpy(lppb->PsVDMJumpSlopes, gppb->PsVDMJumpSlopes, sizeof(lppb->PsVDMJumpSlopes)); + if (oppb->wof.wof_enabled) { + /* + * IDDQ - can't read straight to IddqTable, see comment before spare bytes + * in struct definition. + */ + size = sizeof(buf); + /* TODO: don't hard-code chip if values are not the same among them */ + if (!mvpd_extract_keyword(/*chip=*/0, "CRP0", "IQ", buf, &size)) + die("Failed to read %s record from MVPD", "CRP0"); + assert(size >= sizeof(IddqTable)); + memcpy(&oppb->iddq, buf, sizeof(IddqTable)); - /* - * IDDQ - can't read straight to IddqTable, see comment before spare bytes - * in struct definition. - */ - size = sizeof(buf); - /* TODO: don't hard-code chip if values are not the same among them */ - if (!mvpd_extract_keyword(/*chip=*/0, "CRP0", "IQ", buf, &size)) { - die("Failed to read %s record from MVPD", "CRP0"); + check_valid_iddq(&oppb->iddq); } - assert(size >= sizeof(IddqTable)); - memcpy(&oppb->iddq, buf, sizeof(IddqTable)); - - check_valid_iddq(&oppb->iddq); /* * Pad was re-purposed, Hostboot developers created additional union. The @@ -1016,11 +1046,6 @@ void build_parameter_blocks(struct homer_st *homer, uint64_t functional_cores) ((GPPBOptionsPadUse *)&gppb->options.pad)->fields.good_cores_in_sort = oppb->iddq.good_normal_cores_per_sort; - wof_init(homer->ppmr.wof_tables, - __builtin_popcount((uint32_t)functional_cores) + - __builtin_popcount(functional_cores >> 32), - oppb, £V_bucket); - /* Copy LPPB to functional CMEs */ for (int cme = 1; cme < MAX_CMES_PER_CHIP; cme++) { if (!IS_EX_FUNCTIONAL(cme, functional_cores)) From b1cced25668608c18ec27ab43d2841c965ee3586 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Thu, 20 Jan 2022 19:26:57 +0200 Subject: [PATCH 156/213] soc/power9/: fetch chip-specific TOR rings Change-Id: Ia36ff52faab6841f3c382e651f66a1ec0cfd565f Signed-off-by: Sergii Dmytruk --- src/soc/ibm/power9/homer.c | 15 ++++++++------- src/soc/ibm/power9/tor.c | 21 +++++++++++++-------- src/soc/ibm/power9/tor.h | 3 ++- 3 files changed, 23 insertions(+), 16 deletions(-) diff --git a/src/soc/ibm/power9/homer.c b/src/soc/ibm/power9/homer.c index c77e874ef93..0711fdb686c 100644 --- a/src/soc/ibm/power9/homer.c +++ b/src/soc/ibm/power9/homer.c @@ -1534,8 +1534,8 @@ static void istep_21_1(struct homer_st *homer, uint64_t cores) } /* Extracts rings for a specific Programmable PowerPC-lite Engine */ -static void get_ppe_scan_rings(struct xip_hw_header *hw, uint8_t dd, enum ppe_type ppe, - struct ring_data *ring_data) +static void get_ppe_scan_rings(uint8_t chip, struct xip_hw_header *hw, uint8_t dd, + enum ppe_type ppe, struct ring_data *ring_data) { const uint32_t max_rings_buf_size = ring_data->rings_buf_size; @@ -1559,7 +1559,8 @@ static void get_ppe_scan_rings(struct xip_hw_header *hw, uint8_t dd, enum ppe_ty assert(ring_data->work_buf2_size == MAX_RING_BUF_SIZE); assert(ring_data->work_buf3_size == MAX_RING_BUF_SIZE); - tor_fetch_and_insert_vpd_rings((struct tor_hdr *)ring_data->rings_buf, + tor_fetch_and_insert_vpd_rings(chip, + (struct tor_hdr *)ring_data->rings_buf, &ring_data->rings_buf_size, max_rings_buf_size, overlays, ppe, ring_data->work_buf1, @@ -2259,7 +2260,7 @@ const struct voltage_bucket_data * get_voltage_data(void) return bucket; } -static void layout_rings(struct homer_st *homer, uint8_t dd, uint64_t cores) +static void layout_rings(uint8_t chip, struct homer_st *homer, uint8_t dd, uint64_t cores) { static uint8_t rings_buf[300 * KiB]; @@ -2277,7 +2278,7 @@ static void layout_rings(struct homer_st *homer, uint8_t dd, uint64_t cores) struct xip_hw_header *hw = (void *)homer; enum ring_variant ring_variant = (dd < 0x23 ? RV_BASE : RV_RL4); - get_ppe_scan_rings(hw, dd, PT_CME, &ring_data); + get_ppe_scan_rings(chip, hw, dd, PT_CME, &ring_data); layout_rings_for_cme(homer, &ring_data, cores, ring_variant); /* Reset buffer sizes to maximum values before reusing the structure */ @@ -2285,7 +2286,7 @@ static void layout_rings(struct homer_st *homer, uint8_t dd, uint64_t cores) ring_data.work_buf1_size = sizeof(work_buf1); ring_data.work_buf2_size = sizeof(work_buf2); ring_data.work_buf3_size = sizeof(work_buf3); - get_ppe_scan_rings(hw, dd, PT_SGPE, &ring_data); + get_ppe_scan_rings(chip, hw, dd, PT_SGPE, &ring_data); layout_rings_for_sgpe(homer, &ring_data, (struct xip_sgpe_header *)((uint8_t *)homer + hw->sgpe.offset), cores, ring_variant); @@ -2330,7 +2331,7 @@ static void fill_homer_for_chip(uint8_t chip, struct homer_st *homer, uint8_t dd uint16_t qm_mode_flags; uint16_t pgpe_flags; - layout_rings(homer, dd, cores); + layout_rings(chip, homer, dd, cores); build_parameter_blocks(homer, cores); update_headers(homer, cores); diff --git a/src/soc/ibm/power9/tor.c b/src/soc/ibm/power9/tor.c index c5623d4c202..876b11b3267 100644 --- a/src/soc/ibm/power9/tor.c +++ b/src/soc/ibm/power9/tor.c @@ -621,7 +621,8 @@ static void tor_append_ring(struct tor_hdr *ring_section, * applying overlay if necessary. All buffers must be be at least * MAX_RING_BUF_SIZE bytes in length. Indicates result by setting *ring_status. */ -static void tor_fetch_and_insert_vpd_ring(struct tor_hdr *ring_section, +static void tor_fetch_and_insert_vpd_ring(uint8_t chip, + struct tor_hdr *ring_section, uint32_t *ring_section_size, const struct ring_query *query, uint32_t max_ring_section_size, @@ -639,8 +640,7 @@ static void tor_fetch_and_insert_vpd_ring(struct tor_hdr *ring_section, uint8_t instance_id = 0; struct ring_hdr *ring = NULL; - /* TODO: don't hard-code chip if values are not the same among them */ - success = mvpd_extract_ring(/*chip=*/0, "CP00", query->kwd_name, + success = mvpd_extract_ring(chip, "CP00", query->kwd_name, chiplet_id, even_odd, query->ring_id, buf1, MAX_RING_BUF_SIZE); if (!success) { @@ -680,7 +680,8 @@ static void tor_fetch_and_insert_vpd_ring(struct tor_hdr *ring_section, *ring_status = RING_FOUND; } -void tor_fetch_and_insert_vpd_rings(struct tor_hdr *ring_section, +void tor_fetch_and_insert_vpd_rings(uint8_t chip, + struct tor_hdr *ring_section, uint32_t *ring_section_size, uint32_t max_ring_section_size, struct tor_hdr *overlays_section, @@ -739,7 +740,8 @@ void tor_fetch_and_insert_vpd_rings(struct tor_hdr *ring_section, instance <= max_instance_id; ++instance) { enum ring_status ring_status; - tor_fetch_and_insert_vpd_ring(ring_section, + tor_fetch_and_insert_vpd_ring(chip, + ring_section, ring_section_size, query, max_ring_section_size, @@ -777,7 +779,8 @@ void tor_fetch_and_insert_vpd_rings(struct tor_hdr *ring_section, enum ring_status ring_status; - tor_fetch_and_insert_vpd_ring(ring_section, + tor_fetch_and_insert_vpd_ring(chip, + ring_section, ring_section_size, eq_query, max_ring_section_size, @@ -801,7 +804,8 @@ void tor_fetch_and_insert_vpd_rings(struct tor_hdr *ring_section, enum ring_status ring_status; - tor_fetch_and_insert_vpd_ring(ring_section, + tor_fetch_and_insert_vpd_ring(chip, + ring_section, ring_section_size, ex_queries[i], max_ring_section_size, @@ -826,7 +830,8 @@ void tor_fetch_and_insert_vpd_rings(struct tor_hdr *ring_section, enum ring_status ring_status; - tor_fetch_and_insert_vpd_ring(ring_section, + tor_fetch_and_insert_vpd_ring(chip, + ring_section, ring_section_size, ec_query, max_ring_section_size, diff --git a/src/soc/ibm/power9/tor.h b/src/soc/ibm/power9/tor.h index 0bdc2df56a1..266ebb74c40 100644 --- a/src/soc/ibm/power9/tor.h +++ b/src/soc/ibm/power9/tor.h @@ -445,7 +445,8 @@ bool tor_access_ring(struct tor_hdr *ring_section, uint16_t ring_id, * applying overlay if necessary. All buffers must be be at least * MAX_RING_BUF_SIZE bytes in length. */ -void tor_fetch_and_insert_vpd_rings(struct tor_hdr *ring_section, +void tor_fetch_and_insert_vpd_rings(uint8_t chip, + struct tor_hdr *ring_section, uint32_t *ring_section_size, uint32_t max_ring_section_size, struct tor_hdr *overlays_section, From d49bf0ef00387fc4216ec6a1d08324a07fe53c6f Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Thu, 20 Jan 2022 19:36:08 +0200 Subject: [PATCH 157/213] soc/power9/pstates.c: build chip-specific parameter blocks By using CPU-specific MVPD partition. Change-Id: I00323cc8d1b4f1e45f1b671f625cc83f25db85ef Signed-off-by: Sergii Dmytruk --- src/soc/ibm/power9/homer.c | 2 +- src/soc/ibm/power9/homer.h | 2 +- src/soc/ibm/power9/pstates.c | 11 ++++------- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/src/soc/ibm/power9/homer.c b/src/soc/ibm/power9/homer.c index 0711fdb686c..4b2f8aaf96d 100644 --- a/src/soc/ibm/power9/homer.c +++ b/src/soc/ibm/power9/homer.c @@ -2332,7 +2332,7 @@ static void fill_homer_for_chip(uint8_t chip, struct homer_st *homer, uint8_t dd uint16_t pgpe_flags; layout_rings(chip, homer, dd, cores); - build_parameter_blocks(homer, cores); + build_parameter_blocks(chip, homer, cores); update_headers(homer, cores); populate_epsilon_l2_scom_reg(homer); diff --git a/src/soc/ibm/power9/homer.h b/src/soc/ibm/power9/homer.h index 0682f29d797..8ca8c5166f1 100644 --- a/src/soc/ibm/power9/homer.h +++ b/src/soc/ibm/power9/homer.h @@ -326,7 +326,7 @@ check_member(homer_st, ppmr, 3 * MiB); struct voltage_bucket_data; -void build_parameter_blocks(struct homer_st *homer, uint64_t functional_cores); +void build_parameter_blocks(uint8_t chip, struct homer_st *homer, uint64_t functional_cores); void configure_xive(int tgt_core); const struct voltage_bucket_data * get_voltage_data(void); diff --git a/src/soc/ibm/power9/pstates.c b/src/soc/ibm/power9/pstates.c index 0c4cd26c7fe..e8db9e3c6a1 100644 --- a/src/soc/ibm/power9/pstates.c +++ b/src/soc/ibm/power9/pstates.c @@ -589,7 +589,7 @@ static uint8_t wof_init(uint8_t *buf, uint32_t core_count, } /* Assumption: no bias is applied to operating points */ -void build_parameter_blocks(struct homer_st *homer, uint64_t functional_cores) +void build_parameter_blocks(uint8_t chip, struct homer_st *homer, uint64_t functional_cores) { uint8_t buf[512]; uint32_t size = sizeof(buf); @@ -718,8 +718,7 @@ void build_parameter_blocks(struct homer_st *homer, uint64_t functional_cores) record[3] = '0' + quad; size = sizeof(buf); - /* TODO: don't hard-code chip if values are not the same among them */ - if (!mvpd_extract_keyword(/*chip=*/0, record, "#V", buf, &size)) { + if (!mvpd_extract_keyword(chip, record, "#V", buf, &size)) { die("Failed to read %s record from MVPD", record); } @@ -904,8 +903,7 @@ void build_parameter_blocks(struct homer_st *homer, uint64_t functional_cores) * first parses/writes, then tests if bucket ID even match. */ size = sizeof(buf); - /* TODO: don't hard-code chip if values are not the same among them */ - if (!mvpd_extract_keyword(/*chip=*/0, "CRP0", "#W", buf, &size)) { + if (!mvpd_extract_keyword(chip, "CRP0", "#W", buf, &size)) { die("Failed to read %s record from MVPD", "CRP0"); } @@ -1028,8 +1026,7 @@ void build_parameter_blocks(struct homer_st *homer, uint64_t functional_cores) * in struct definition. */ size = sizeof(buf); - /* TODO: don't hard-code chip if values are not the same among them */ - if (!mvpd_extract_keyword(/*chip=*/0, "CRP0", "IQ", buf, &size)) + if (!mvpd_extract_keyword(chip, "CRP0", "IQ", buf, &size)) die("Failed to read %s record from MVPD", "CRP0"); assert(size >= sizeof(IddqTable)); memcpy(&oppb->iddq, buf, sizeof(IddqTable)); From 4bf800daa892bbeb58a2d7735f478e7bf0cca3f9 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Thu, 20 Jan 2022 20:26:46 +0200 Subject: [PATCH 158/213] soc/power9: make powerbus data chip-specific Change-Id: Idc264c5c1ae74b8c7d8fecf23cc4cdd6b608dcfa Signed-off-by: Sergii Dmytruk --- src/include/cpu/power/powerbus.h | 2 +- src/soc/ibm/power9/homer.c | 63 ++++++++++++++++---------------- src/soc/ibm/power9/istep_10_1.c | 4 +- src/soc/ibm/power9/istep_13_8.c | 2 +- src/soc/ibm/power9/istep_8_9.c | 2 +- src/soc/ibm/power9/powerbus.c | 29 ++++++++------- 6 files changed, 53 insertions(+), 49 deletions(-) diff --git a/src/include/cpu/power/powerbus.h b/src/include/cpu/power/powerbus.h index d2598b9648a..1c4298fdbce 100644 --- a/src/include/cpu/power/powerbus.h +++ b/src/include/cpu/power/powerbus.h @@ -47,6 +47,6 @@ struct powerbus_cfg uint32_t eps_w[NUM_EPSILON_WRITE_TIERS]; }; -const struct powerbus_cfg *powerbus_cfg(void); +const struct powerbus_cfg *powerbus_cfg(uint8_t chip); #endif // CPU_PPC64_POWERBUS_H diff --git a/src/soc/ibm/power9/homer.c b/src/soc/ibm/power9/homer.c index 4b2f8aaf96d..6dbad33ca1a 100644 --- a/src/soc/ibm/power9/homer.c +++ b/src/soc/ibm/power9/homer.c @@ -1122,7 +1122,7 @@ static void load_occ_image_to_homer(struct homer_st *homer) } /* Writes information about the host to be read by OCC */ -static void load_host_data_to_homer(struct homer_st *homer) +static void load_host_data_to_homer(uint8_t chip, struct homer_st *homer) { enum { OCC_HOST_DATA_VERSION = 0x00000090, @@ -1133,13 +1133,13 @@ static void load_host_data_to_homer(struct homer_st *homer) (void *)&homer->occ_host_area[HOMER_OFFSET_TO_OCC_HOST_DATA]; config_data->version = OCC_HOST_DATA_VERSION; - config_data->nest_freq = powerbus_cfg()->fabric_freq; + config_data->nest_freq = powerbus_cfg(chip)->fabric_freq; config_data->interrupt_type = USE_PSIHB_COMPLEX; config_data->is_fir_master = false; config_data->is_smf_mode = false; } -static void load_pm_complex(struct homer_st *homer) +static void load_pm_complex(uint8_t chip, struct homer_st *homer) { /* * Hostboot resets OCC here, but we haven't started it yet, so reset @@ -1147,7 +1147,7 @@ static void load_pm_complex(struct homer_st *homer) */ load_occ_image_to_homer(homer); - load_host_data_to_homer(homer); + load_host_data_to_homer(chip, homer); } static void pm_corequad_init(uint64_t cores) @@ -1427,7 +1427,7 @@ static void check_proc_config(struct homer_st *homer) *conf_vector = vector_value; } -static void pm_pss_init(void) +static void pm_pss_init(uint8_t chip) { enum { PU_SPIPSS_ADC_CTRL_REG0 = 0x00070000, @@ -1497,16 +1497,16 @@ static void pm_pss_init(void) */ scom_and_or(PU_SPIPSS_100NS_REG, PPC_BITMASK(0, 31), - PPC_PLACE(powerbus_cfg()->fabric_freq / 40, 0, 32)); + PPC_PLACE(powerbus_cfg(chip)->fabric_freq / 40, 0, 32)); } /* Initializes power-management and starts OCC */ -static void start_pm_complex(struct homer_st *homer, uint64_t cores) +static void start_pm_complex(uint8_t chip, struct homer_st *homer, uint64_t cores) { enum { STOP_RECOVERY_TRIGGER_ENABLE = 29 }; pm_corequad_init(cores); - pm_pss_init(); + pm_pss_init(chip); pm_occ_fir_init(); pm_pba_fir_init(); stop_gpe_init(homer); @@ -1520,12 +1520,12 @@ static void start_pm_complex(struct homer_st *homer, uint64_t cores) write_scom(PU_OCB_OCI_OCCFLG2_CLEAR, PPC_BIT(STOP_RECOVERY_TRIGGER_ENABLE)); } -static void istep_21_1(struct homer_st *homer, uint64_t cores) +static void istep_21_1(uint8_t chip, struct homer_st *homer, uint64_t cores) { - load_pm_complex(homer); + load_pm_complex(chip, homer); printk(BIOS_ERR, "Starting PM complex...\n"); - start_pm_complex(homer, cores); + start_pm_complex(chip, homer, cores); printk(BIOS_ERR, "Done starting PM complex\n"); printk(BIOS_ERR, "Activating OCC...\n"); @@ -1716,9 +1716,9 @@ static void layout_rings_for_cme(struct homer_st *homer, } } -static enum ring_id resolve_eq_inex_bucket(void) +static enum ring_id resolve_eq_inex_bucket(uint8_t chip) { - switch (powerbus_cfg()->core_floor_ratio) { + switch (powerbus_cfg(chip)->core_floor_ratio) { case FABRIC_CORE_FLOOR_RATIO_RATIO_8_8: return EQ_INEX_BUCKET_4; @@ -1735,7 +1735,8 @@ static enum ring_id resolve_eq_inex_bucket(void) die("Failed to resolve EQ_INEX_BUCKET_*!\n"); } -static void layout_cmn_rings_for_sgpe(struct homer_st *homer, +static void layout_cmn_rings_for_sgpe(uint8_t chip, + struct homer_st *homer, struct ring_data *ring_data, enum ring_variant ring_variant) { @@ -1760,7 +1761,7 @@ static void layout_cmn_rings_for_sgpe(struct homer_st *homer, EQ_ANA_BNDY_BUCKET_39, EQ_ANA_BNDY_BUCKET_40, EQ_ANA_BNDY_BUCKET_41 }; - const enum ring_id eq_index_bucket_id = resolve_eq_inex_bucket(); + const enum ring_id eq_index_bucket_id = resolve_eq_inex_bucket(chip); struct qpmr_header *qpmr_hdr = &homer->qpmr.sgpe.header; struct sgpe_cmn_ring_list *tmp = @@ -1866,7 +1867,7 @@ static void layout_inst_rings_for_sgpe(struct homer_st *homer, qpmr_hdr->spec_ring_len = payload - start; } -static void layout_rings_for_sgpe(struct homer_st *homer, +static void layout_rings_for_sgpe(uint8_t chip, struct homer_st *homer, struct ring_data *ring_data, struct xip_sgpe_header *sgpe, uint64_t cores, @@ -1876,7 +1877,7 @@ static void layout_rings_for_sgpe(struct homer_st *homer, struct sgpe_img_header *sgpe_img_hdr = (void *)&homer->qpmr.sgpe.sram_image[INT_VECTOR_SIZE]; - layout_cmn_rings_for_sgpe(homer, ring_data, ring_variant); + layout_cmn_rings_for_sgpe(chip, homer, ring_data, ring_variant); layout_inst_rings_for_sgpe(homer, ring_data, cores, RV_BASE); if (qpmr_hdr->common_ring_len == 0) { @@ -1979,9 +1980,9 @@ static void stop_save_scom(struct homer_st *homer, uint32_t scom_address, entry->data = scom_data; } -static void populate_epsilon_l2_scom_reg(struct homer_st *homer) +static void populate_epsilon_l2_scom_reg(uint8_t chip, struct homer_st *homer) { - const struct powerbus_cfg *pb_cfg = powerbus_cfg(); + const struct powerbus_cfg *pb_cfg = powerbus_cfg(chip); uint32_t eps_r_t0 = pb_cfg->eps_r[0] / 8 / L2_EPS_DIVIDER + 1; uint32_t eps_r_t1 = pb_cfg->eps_r[1] / 8 / L2_EPS_DIVIDER + 1; @@ -2025,9 +2026,9 @@ static void populate_epsilon_l2_scom_reg(struct homer_st *homer) } } -static void populate_epsilon_l3_scom_reg(struct homer_st *homer) +static void populate_epsilon_l3_scom_reg(uint8_t chip, struct homer_st *homer) { - const struct powerbus_cfg *pb_cfg = powerbus_cfg(); + const struct powerbus_cfg *pb_cfg = powerbus_cfg(chip); uint32_t eps_r_t0 = pb_cfg->eps_r[0] / 8 / L3_EPS_DIVIDER + 1; uint32_t eps_r_t1 = pb_cfg->eps_r[1] / 8 / L3_EPS_DIVIDER + 1; @@ -2071,14 +2072,14 @@ static void populate_epsilon_l3_scom_reg(struct homer_st *homer) } } -static void populate_l3_refresh_scom_reg(struct homer_st *homer, uint8_t dd) +static void populate_l3_refresh_scom_reg(uint8_t chip, struct homer_st *homer, uint8_t dd) { uint64_t refresh_val = 0x2000000000000000ULL; uint8_t quad = 0; /* ATTR_CHIP_EC_FEATURE_HW408892 === (DD <= 0x20) */ - if (powerbus_cfg()->fabric_freq >= 2000 && dd > 0x20) + if (powerbus_cfg(chip)->fabric_freq >= 2000 && dd > 0x20) refresh_val |= PPC_PLACE(0x2, 8, 4); for (quad = 0; quad < MAX_QUADS_PER_CHIP; ++quad) { @@ -2116,7 +2117,7 @@ static void populate_ncu_rng_bar_scom_reg(struct homer_st *homer) } } -static void update_headers(struct homer_st *homer, uint64_t cores) +static void update_headers(uint8_t chip, struct homer_st *homer, uint64_t cores) { /* * Update CPMR Header with Scan Ring details @@ -2181,7 +2182,7 @@ static void update_headers(struct homer_st *homer, uint64_t cores) cme_hdr->scom_len = 512; /* Timebase frequency */ - cme_hdr->timebase_hz = powerbus_cfg()->fabric_freq * MHz / 64; + cme_hdr->timebase_hz = powerbus_cfg(chip)->fabric_freq * MHz / 64; /* * Update QPMR Header area in HOMER @@ -2287,7 +2288,7 @@ static void layout_rings(uint8_t chip, struct homer_st *homer, uint8_t dd, uint6 ring_data.work_buf2_size = sizeof(work_buf2); ring_data.work_buf3_size = sizeof(work_buf3); get_ppe_scan_rings(chip, hw, dd, PT_SGPE, &ring_data); - layout_rings_for_sgpe(homer, &ring_data, + layout_rings_for_sgpe(chip, homer, &ring_data, (struct xip_sgpe_header *)((uint8_t *)homer + hw->sgpe.offset), cores, ring_variant); } @@ -2333,12 +2334,12 @@ static void fill_homer_for_chip(uint8_t chip, struct homer_st *homer, uint8_t dd layout_rings(chip, homer, dd, cores); build_parameter_blocks(chip, homer, cores); - update_headers(homer, cores); + update_headers(chip, homer, cores); - populate_epsilon_l2_scom_reg(homer); - populate_epsilon_l3_scom_reg(homer); + populate_epsilon_l2_scom_reg(chip, homer); + populate_epsilon_l3_scom_reg(chip, homer); /* Update L3 Refresh Timer Control SCOM Registers */ - populate_l3_refresh_scom_reg(homer, dd); + populate_l3_refresh_scom_reg(chip, homer, dd); /* Populate HOMER with SCOM restore value of NCU RNG BAR SCOM Register */ populate_ncu_rng_bar_scom_reg(homer); @@ -2625,7 +2626,7 @@ uint64_t build_homer_image(void *homer_bar) /* Boot OCC here and activate SGPE at the same time */ /* TODO: initialize OCC for the second CPU when it's present */ - istep_21_1(homer, cores[0]); + istep_21_1(/*chip=*/0, homer, cores[0]); istep_16_1(this_core); diff --git a/src/soc/ibm/power9/istep_10_1.c b/src/soc/ibm/power9/istep_10_1.c index ee6be37f0a4..335714c576c 100644 --- a/src/soc/ibm/power9/istep_10_1.c +++ b/src/soc/ibm/power9/istep_10_1.c @@ -163,7 +163,7 @@ static const uint64_t PB_HPA_MODE_NEXT_SHADOWS[P9_BUILD_SMP_NUM_SHADOWS] = { */ static void p9_fbc_cd_hp1_scom(uint8_t chip, bool is_xbus_active) { - const struct powerbus_cfg *pb_cfg = powerbus_cfg(); + const struct powerbus_cfg *pb_cfg = powerbus_cfg(chip); const uint32_t pb_freq_mhz = pb_cfg->fabric_freq; /* Frequency of XBus for Nimbus DD2 */ @@ -586,7 +586,7 @@ static void p9_build_smp_sequence_adu(uint8_t chips, enum build_smp_adu_action a static void p9_fbc_ab_hp_scom(uint8_t chip, bool is_xbus_active) { - const struct powerbus_cfg *pb_cfg = powerbus_cfg(); + const struct powerbus_cfg *pb_cfg = powerbus_cfg(chip); const uint32_t pb_freq_mhz = pb_cfg->fabric_freq; /* Frequency of XBus for Nimbus DD2 */ diff --git a/src/soc/ibm/power9/istep_13_8.c b/src/soc/ibm/power9/istep_13_8.c index 04b89eb5964..097a73dd567 100644 --- a/src/soc/ibm/power9/istep_13_8.c +++ b/src/soc/ibm/power9/istep_13_8.c @@ -33,7 +33,7 @@ */ static void p9n_mca_scom(uint8_t chip, int mcs_i, int mca_i) { - const struct powerbus_cfg *pb_cfg = powerbus_cfg(); + const struct powerbus_cfg *pb_cfg = powerbus_cfg(chip); chiplet_id_t id = mcs_ids[mcs_i]; mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; diff --git a/src/soc/ibm/power9/istep_8_9.c b/src/soc/ibm/power9/istep_8_9.c index 965126a8558..5446b8bcbf7 100644 --- a/src/soc/ibm/power9/istep_8_9.c +++ b/src/soc/ibm/power9/istep_8_9.c @@ -151,7 +151,7 @@ static void p9_fbc_ioe_tl_scom(bool is_xbus_active, uint8_t chip) * Meaning that X1 is present and X0 and X2 aren't. */ - const uint64_t pb_freq_mhz = powerbus_cfg()->fabric_freq; + const uint64_t pb_freq_mhz = powerbus_cfg(chip)->fabric_freq; const uint64_t dd2_lo_limit_d = (FREQ_X_MHZ * 10); const uint64_t dd2_lo_limit_n = pb_freq_mhz * 82; diff --git a/src/soc/ibm/power9/powerbus.c b/src/soc/ibm/power9/powerbus.c index 55905294c89..c8c863a37f2 100644 --- a/src/soc/ibm/power9/powerbus.c +++ b/src/soc/ibm/power9/powerbus.c @@ -3,6 +3,7 @@ #include #include +#include #include #include #include @@ -23,7 +24,7 @@ static const uint32_t EPSILON_W_T1_LE[] = { 15, 16, 17, 19, 21, 33 } /* See get_first_valid_pdV_pbFreq() in Hostboot */ -static bool read_voltage_data(struct powerbus_cfg *cfg) +static bool read_voltage_data(uint8_t chip, struct powerbus_cfg *cfg) { int i = 0; const struct voltage_kwd *voltage = NULL; @@ -41,8 +42,7 @@ static bool read_voltage_data(struct powerbus_cfg *cfg) uint32_t freq_floor = 0; /* Using LRP0 because frequencies are the same in all LRP records */ - /* TODO: don't hard-code chip if values are not the same among them */ - voltage = mvpd_get_voltage_data(/*chip=*/0, /*lrp=*/0); + voltage = mvpd_get_voltage_data(chip, /*lrp=*/0); for (i = 0; i < VOLTAGE_BUCKET_COUNT; ++i) { const struct voltage_bucket_data *bucket = &voltage->buckets[i]; @@ -225,22 +225,25 @@ static void calculate_epsilons(struct powerbus_cfg *cfg) printk(BIOS_WARNING, "Invalid relationship between base epsilon values\n"); } -const struct powerbus_cfg *powerbus_cfg(void) +const struct powerbus_cfg *powerbus_cfg(uint8_t chip) { - static struct powerbus_cfg cfg; + static struct powerbus_cfg cfg[2]; + static bool init_done[2]; - static bool init_done; - if (init_done) - return &cfg; + if (chip >= MAX_CHIPS) + die("Unsupported CPU number for powerbus config query: %d.\n", chip); - if (!read_voltage_data(&cfg)) + if (init_done[chip]) + return &cfg[chip]; + + if (!read_voltage_data(chip, &cfg[chip])) die("Failed to read voltage data"); - if (!calculate_frequencies(&cfg)) + if (!calculate_frequencies(&cfg[chip])) die("Incorrect core or PowerBus frequency"); - calculate_epsilons(&cfg); + calculate_epsilons(&cfg[chip]); - init_done = true; - return &cfg; + init_done[chip] = true; + return &cfg[chip]; } From 0c77d1e7de4a9e15de8cbed64e7e19dd2dc356c3 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Thu, 20 Jan 2022 20:32:38 +0200 Subject: [PATCH 159/213] soc/power9/homer.c: use group ID in two places This makes data CPU-specific. Change-Id: I236850ddae9b55e465d4c93079d725b37566532f Signed-off-by: Sergii Dmytruk --- src/soc/ibm/power9/homer.c | 171 ++++++++++++++++++------------------- 1 file changed, 85 insertions(+), 86 deletions(-) diff --git a/src/soc/ibm/power9/homer.c b/src/soc/ibm/power9/homer.c index 6dbad33ca1a..790eab94b9b 100644 --- a/src/soc/ibm/power9/homer.c +++ b/src/soc/ibm/power9/homer.c @@ -622,7 +622,7 @@ static void build_pgpe(struct homer_st *homer, struct xip_pgpe_header *pgpe, hdr->aux_controls = 1 << 24; } -static void pba_slave_setup_runtime_phase(void) +static void pba_slave_setup_runtime_phase(uint8_t chip) { enum { OCI_MASTER_ID_GPE2 = 0x2, @@ -668,7 +668,7 @@ static void pba_slave_setup_runtime_phase(void) data |= PPC_BIT(27); // en_slv_fairness data |= PPC_BIT(10); // en_second_wrbuf - write_scom(PU_PBAMODE_SCOM, data); + write_rscom(chip, PU_PBAMODE_SCOM, data); /* * Slave 0 (SGPE STOP). This is a read/write slave in the event that @@ -687,7 +687,7 @@ static void pba_slave_setup_runtime_phase(void) data |= PPC_BIT(22); // buf_alloc_c data |= PPC_BIT(19); // buf_alloc_w - write_scom(PU_PBASLVCTL0_SCOM, data); + write_rscom(chip, PU_PBASLVCTL0_SCOM, data); /* * Slave 1 (GPE 1, PPC405 booting). This is a read/write slave. Write gathering is @@ -707,7 +707,7 @@ static void pba_slave_setup_runtime_phase(void) data |= PPC_BIT(22); // buf_alloc_c data |= PPC_BIT(19); // buf_alloc_w - write_scom(PU_PBASLVCTL1_SCOM, data); + write_rscom(chip, PU_PBASLVCTL1_SCOM, data); /* * Slave 2 (PGPE Boot, Pstates/WOF). This is a read/write slave. Write gethering is @@ -728,12 +728,12 @@ static void pba_slave_setup_runtime_phase(void) data |= PPC_BIT(22); // buf_alloc_c data |= PPC_BIT(19); // buf_alloc_w - write_scom(PU_PBASLVCTL2_SCOM, data); + write_rscom(chip, PU_PBASLVCTL2_SCOM, data); /* Slave 3 is not modified by this function, because it is owned by SBE */ } -static void pba_reset(void) +static void pba_reset(uint8_t chip) { long time; /* Stopping Block Copy Download Engine @@ -741,14 +741,14 @@ static void pba_reset(void) [all] 0 [0] 1 */ - write_scom(0x00068010, PPC_BIT(0)); + write_rscom(chip, 0x00068010, PPC_BIT(0)); /* Stopping Block Copy Upload Engine *0x00068015 // undocumented, PU_BCUE_CTL_SCOM [all] 0 [0] 1 */ - write_scom(0x00068015, PPC_BIT(0)); + write_rscom(chip, 0x00068015, PPC_BIT(0)); /* Polling on, to verify that BCDE & BCUE are indeed stopped timeout(256*256us): @@ -759,15 +759,15 @@ static void pba_reset(void) if both bits are clear: break */ time = wait_us(256*256, - (((read_scom(0x00068012) & PPC_BIT(0)) == 0) && - ((read_scom(0x00068017) & PPC_BIT(0)) == 0))); + (((read_rscom(chip, 0x00068012) & PPC_BIT(0)) == 0) && + ((read_rscom(chip, 0x00068017) & PPC_BIT(0)) == 0))); if (!time) die("Timed out waiting for stopping of BCDE/BCUE\n"); /* Clear the BCDE and BCUE stop bits */ - write_scom(0x00068010, 0); - write_scom(0x00068015, 0); + write_rscom(chip, 0x00068010, 0); + write_rscom(chip, 0x00068015, 0); /* Reset each slave and wait for completion timeout(16*1us): @@ -782,10 +782,10 @@ static void pba_reset(void) */ for (int sl = 0; sl < 3; sl++) { // Fourth is owned by SBE, do not reset time = wait_us(16, - (write_scom(0x00068001, PPC_BIT(0) | PPC_PLACE(sl, 1, 2)), - (read_scom(0x00068001) & PPC_BIT(4 + sl)) == 0)); + (write_rscom(chip, 0x00068001, PPC_BIT(0) | PPC_PLACE(sl, 1, 2)), + (read_rscom(chip, 0x00068001) & PPC_BIT(4 + sl)) == 0)); - if (!time || read_scom(0x00068001) & PPC_BIT(8 + sl)) + if (!time || read_rscom(chip, 0x00068001) & PPC_BIT(8 + sl)) die("Timed out waiting for slave %d reset\n", sl); } @@ -807,28 +807,28 @@ static void pba_reset(void) BRIDGE.PBA.PBAERRRPT0 // 0x0501284C [all] 0 */ - write_scom(0x00068013, 0); - write_scom(0x00068014, 0); - write_scom(0x00068015, 0); - write_scom(0x00068016, 0); - write_scom(0x00068018, 0); - write_scom(0x00068019, 0); - write_scom(0x00068026, 0); - write_scom(0x0006802A, 0); - write_scom(0x00068027, 0); - write_scom(0x0006802B, 0); - write_scom(0x00068004, 0); - write_scom(0x00068005, 0); - write_scom(0x00068006, 0); - write_scom(0x05012840, 0); - write_scom(0x0501284C, 0); + write_rscom(chip, 0x00068013, 0); + write_rscom(chip, 0x00068014, 0); + write_rscom(chip, 0x00068015, 0); + write_rscom(chip, 0x00068016, 0); + write_rscom(chip, 0x00068018, 0); + write_rscom(chip, 0x00068019, 0); + write_rscom(chip, 0x00068026, 0); + write_rscom(chip, 0x0006802A, 0); + write_rscom(chip, 0x00068027, 0); + write_rscom(chip, 0x0006802B, 0); + write_rscom(chip, 0x00068004, 0); + write_rscom(chip, 0x00068005, 0); + write_rscom(chip, 0x00068006, 0); + write_rscom(chip, 0x05012840, 0); + write_rscom(chip, 0x0501284C, 0); /* Perform non-zero reset operations BRIDGE.PBA.PBACFG // 0x0501284B [all] 0 [38] PBACFG_CHSW_DIS_GROUP_SCOPE = 1 */ - write_scom(0x0501284B, PPC_BIT(38)); + write_rscom(chip, 0x0501284B, PPC_BIT(38)); /* *0x00068021 // Undocumented, PU_PBAXCFG_SCOM @@ -836,9 +836,9 @@ static void pba_reset(void) [2] 1 // PBAXCFG_SND_RESET? [3] 1 // PBAXCFG_RCV_RESET? */ - write_scom(PU_PBAXCFG_SCOM, PPC_BIT(2) | PPC_BIT(3)); + write_rscom(chip, PU_PBAXCFG_SCOM, PPC_BIT(2) | PPC_BIT(3)); - pba_slave_setup_runtime_phase(); + pba_slave_setup_runtime_phase(chip); } static void stop_gpe_init(struct homer_st *homer) @@ -1114,10 +1114,7 @@ static void load_occ_image_to_homer(struct homer_st *homer) * not NULL. */ mount_part_from_pnor("OCC", &mdev); - /* - * Common OCC area is located right after HOMER image. 0x120000 is the - * size of OCC partition in PNOR, last 0x2000 bytes aren't important? - */ + rdev_readat(&mdev.rdev, &homer->occ_host_area, 0, 1 * MiB); } @@ -1357,7 +1354,7 @@ static void pstate_gpe_init(struct homer_st *homer, uint64_t cores) } } -static void pm_pba_init(void) +static void pm_pba_init(uint8_t chip) { enum { PU_PBACFG = 0x0501284B, @@ -1373,9 +1370,9 @@ static void pm_pba_init(void) }; uint64_t data = 0; - /* Assuming all these attributes have zero values */ + /* These group and chip IDs aren't affected by pump mode */ uint8_t attr_pbax_groupid = 0; - uint8_t attr_pbax_chipid = 0; + uint8_t attr_pbax_chipid = chip; uint8_t attr_pbax_broadcast_vector = 0; /* Assuming ATTR_CHIP_EC_FEATURE_HW423589_OPTION1 == true */ @@ -1393,10 +1390,10 @@ static void pm_pba_init(void) write_scom(PU_PBAXCFG_SCOM, data); } -static void pm_pstate_gpe_init(struct homer_st *homer, uint64_t cores) +static void pm_pstate_gpe_init(uint8_t chip, struct homer_st *homer, uint64_t cores) { pstate_gpe_init(homer, cores); - pm_pba_init(); + pm_pba_init(chip); } /* Generates host configuration vector and updates the value in HOMER */ @@ -1510,7 +1507,7 @@ static void start_pm_complex(uint8_t chip, struct homer_st *homer, uint64_t core pm_occ_fir_init(); pm_pba_fir_init(); stop_gpe_init(homer); - pm_pstate_gpe_init(homer, cores); + pm_pstate_gpe_init(chip, homer, cores); check_proc_config(homer); clear_occ_special_wakeups(cores); @@ -2095,15 +2092,13 @@ static void populate_l3_refresh_scom_reg(uint8_t chip, struct homer_st *homer, u } } -static void populate_ncu_rng_bar_scom_reg(struct homer_st *homer) +static void populate_ncu_rng_bar_scom_reg(uint8_t chip, struct homer_st *homer) { enum { NX_RANGE_BAR_ADDR_OFFSET = 0x00000302031D0000 }; uint8_t ex = 0; - uint64_t regNcuRngBarData = PROC_BASE_ADDR(/*chip=*/0, /*msel=*/0x3); - - regNcuRngBarData += NX_RANGE_BAR_ADDR_OFFSET; + uint64_t data = PROC_BASE_ADDR(chip, /*msel=*/0x3) + NX_RANGE_BAR_ADDR_OFFSET; for (ex = 0; ex < MAX_CMES_PER_CHIP; ++ex) { /* Create restore entry for NCU RNG register */ @@ -2112,8 +2107,7 @@ static void populate_ncu_rng_bar_scom_reg(struct homer_st *homer) | ((ex / 2) << 24) | ((ex % 2) ? 0x0400 : 0x0000); - stop_save_scom(homer, scom_addr, regNcuRngBarData, - STOP_SECTION_EQ_SCOM, SCOM_REPLACE); + stop_save_scom(homer, scom_addr, data, STOP_SECTION_EQ_SCOM, SCOM_REPLACE); } } @@ -2341,7 +2335,7 @@ static void fill_homer_for_chip(uint8_t chip, struct homer_st *homer, uint8_t dd /* Update L3 Refresh Timer Control SCOM Registers */ populate_l3_refresh_scom_reg(chip, homer, dd); /* Populate HOMER with SCOM restore value of NCU RNG BAR SCOM Register */ - populate_ncu_rng_bar_scom_reg(homer); + populate_ncu_rng_bar_scom_reg(chip, homer); /* Update flag fields in image headers */ @@ -2360,7 +2354,7 @@ static void fill_homer_for_chip(uint8_t chip, struct homer_st *homer, uint8_t dd set_fabric_ids(chip, homer); } -static void setup_wakeup_mode(uint64_t cores) +static void setup_wakeup_mode(uint8_t chip, uint64_t cores) { for (int i = 0; i < MAX_CORES_PER_CHIP; i++) { if (!IS_EC_FUNCTIONAL(i, cores)) @@ -2373,22 +2367,23 @@ static void setup_wakeup_mode(uint64_t cores) [4] CPPM_CPMMR_RESERVED_2_9 = 1 */ /* SCOM2 - OR, 0x200F0108 */ - write_scom_for_chiplet(EC00_CHIPLET_ID + i, 0x200F0108, - PPC_BIT(3) | PPC_BIT(4)); + write_rscom_for_chiplet(chip, EC00_CHIPLET_ID + i, 0x200F0108, + PPC_BIT(3) | PPC_BIT(4)); } } /* 15.2 set HOMER BAR */ -static void istep_15_2(struct homer_st *homer) +static void istep_15_2(uint8_t chip, struct homer_st *homer) { - write_scom(0x05012B00, (uint64_t)homer); - write_scom(0x05012B04, (4 * MiB - 1) & ~((uint64_t)MiB - 1)); - write_scom(0x05012B02, (uint64_t)homer + 8 * 4 * MiB); // FIXME - write_scom(0x05012B06, (8 * MiB - 1) & ~((uint64_t)MiB - 1)); + write_rscom(chip, 0x05012B00, (uint64_t)homer); + write_rscom(chip, 0x05012B04, (4 * MiB - 1) & ~((uint64_t)MiB - 1)); + + write_rscom(chip, 0x05012B02, (uint64_t)homer + (8 - chip) * 4 * MiB); // FIXME + write_rscom(chip, 0x05012B06, (8 * MiB - 1) & ~((uint64_t)MiB - 1)); } /* 15.3 establish EX chiplet */ -static void istep_15_3(uint64_t cores) +static void istep_15_3(uint8_t chip, uint64_t cores) { const uint64_t group_mask = PPC_BITMASK(3,5); @@ -2416,14 +2411,14 @@ static void istep_15_3(uint64_t cores) if (!IS_EQ_FUNCTIONAL(i, cores)) continue; - if ((read_scom_for_chiplet(chiplet, 0xF0001) & group_mask) == group_mask) - scom_and_or_for_chiplet(chiplet, 0xF0001, - ~(group_mask | PPC_BITMASK(16,23)), - PPC_BITMASK(19,21)); + if ((read_rscom_for_chiplet(chip, chiplet, 0xF0001) & group_mask) == group_mask) + rscom_and_or_for_chiplet(chip, chiplet, 0xF0001, + ~(group_mask | PPC_BITMASK(16,23)), + PPC_BITMASK(19,21)); } /* Writing OCC CCSR */ - write_scom(0x0006C090, cores); + write_rscom(chip, 0x0006C090, cores); /* Writing OCC QCSR */ uint64_t qcsr = 0; @@ -2431,7 +2426,7 @@ static void istep_15_3(uint64_t cores) if (IS_EX_FUNCTIONAL(i, cores)) qcsr |= PPC_BIT(i); } - write_scom(0x0006C094, qcsr); + write_rscom(chip, 0x0006C094, qcsr); } /* @@ -2440,7 +2435,7 @@ static void istep_15_3(uint64_t cores) * SGPE startup is actually done as part of istep 21.1 after all * preparations here to not have to restart it there. */ -static void istep_15_4(uint64_t cores) +static void istep_15_4(uint8_t chip, uint64_t cores) { /* Initialize the PFET controllers */ for (int i = 0; i < MAX_CORES_PER_CHIP; i++) { @@ -2451,7 +2446,8 @@ static void istep_15_4(uint64_t cores) [all] 0 [2] CPPM_CPMMR_RESERVED_2 = 1 */ - write_scom_for_chiplet(EC00_CHIPLET_ID + i, 0x200F0108, PPC_BIT(2)); + write_rscom_for_chiplet(chip, EC00_CHIPLET_ID + i, 0x200F0108, + PPC_BIT(2)); /* TP.TPCHIP.NET.PCBSLEC14.PPMC.PPM_COMMON_REGS.PPM_PFDLY // 0x200F011B @@ -2459,16 +2455,16 @@ static void istep_15_4(uint64_t cores) [0-3] PPM_PFDLY_POWDN_DLY = 0x9 // 250ns, converted and encoded [4-7] PPM_PFDLY_POWUP_DLY = 0x9 */ - write_scom_for_chiplet(EC00_CHIPLET_ID + i, 0x200F011B, - PPC_PLACE(0x9, 0, 4) | PPC_PLACE(0x9, 4, 4)); + write_rscom_for_chiplet(chip, EC00_CHIPLET_ID + i, 0x200F011B, + PPC_PLACE(0x9, 0, 4) | PPC_PLACE(0x9, 4, 4)); /* TP.TPCHIP.NET.PCBSLEC14.PPMC.PPM_COMMON_REGS.PPM_PFOF // 0x200F011D [all] 0 [0-3] PPM_PFOFF_VDD_VOFF_SEL = 0x8 [4-7] PPM_PFOFF_VCS_VOFF_SEL = 0x8 */ - write_scom_for_chiplet(EC00_CHIPLET_ID + i, 0x200F011D, - PPC_PLACE(0x8, 0, 4) | PPC_PLACE(0x8, 4, 4)); + write_rscom_for_chiplet(chip, EC00_CHIPLET_ID + i, 0x200F011D, + PPC_PLACE(0x8, 0, 4) | PPC_PLACE(0x8, 4, 4)); } if ((i % 4) == 0 && IS_EQ_FUNCTIONAL(i/4, cores)) { @@ -2478,21 +2474,21 @@ static void istep_15_4(uint64_t cores) [0-3] PPM_PFDLY_POWDN_DLY = 0x9 // 250ns, converted and encoded [4-7] PPM_PFDLY_POWUP_DLY = 0x9 */ - write_scom_for_chiplet(EP00_CHIPLET_ID + i/4, 0x100F011B, - PPC_PLACE(0x9, 0, 4) | PPC_PLACE(0x9, 4, 4)); + write_rscom_for_chiplet(chip, EP00_CHIPLET_ID + i/4, 0x100F011B, + PPC_PLACE(0x9, 0, 4) | PPC_PLACE(0x9, 4, 4)); /* TP.TPCHIP.NET.PCBSLEP03.PPMQ.PPM_COMMON_REGS.PPM_PFOF // 0x100F011D [all] 0 [0-3] PPM_PFOFF_VDD_VOFF_SEL = 0x8 [4-7] PPM_PFOFF_VCS_VOFF_SEL = 0x8 */ - write_scom_for_chiplet(EP00_CHIPLET_ID + i/4, 0x100F011D, - PPC_PLACE(0x8, 0, 4) | PPC_PLACE(0x8, 4, 4)); + write_rscom_for_chiplet(chip, EP00_CHIPLET_ID + i/4, 0x100F011D, + PPC_PLACE(0x8, 0, 4) | PPC_PLACE(0x8, 4, 4)); } } /* Condition the PBA back to the base boot configuration */ - pba_reset(); + pba_reset(chip); /* * TODO: this is tested only if (ATTR_VDM_ENABLED || ATTR_IVRM_ENABLED), @@ -2502,7 +2498,7 @@ static void istep_15_4(uint64_t cores) /* TP.TPCHIP.TPC.ITR.FMU.KVREF_AND_VMEAS_MODE_STATUS_REG // 0x01020007 if ([16] == 0): die() */ - if (!(read_scom(0x01020007) & PPC_BIT(16))) + if (!(read_rscom(chip, 0x01020007) & PPC_BIT(16))) die("VDMs/IVRM are enabled but necessary VREF calibration failed\n"); /* First mask bit 7 in OIMR and then clear bit 7 in OISR @@ -2513,8 +2509,8 @@ static void istep_15_4(uint64_t cores) [all] 0 [7] OCB_OCI_OISR0_GPE2_ERROR = 1 */ - write_scom(0x0006C006, PPC_BIT(7)); - write_scom(0x0006C001, PPC_BIT(7)); + write_rscom(chip, 0x0006C006, PPC_BIT(7)); + write_rscom(chip, 0x0006C001, PPC_BIT(7)); /* * Setup the SGPE Timer Selects @@ -2525,14 +2521,14 @@ static void istep_15_4(uint64_t cores) [0-3] GPETSEL_FIT_SEL = 0x1 // FIT - fixed interval timer [4-7] GPETSEL_WATCHDOG_SEL = 0xA */ - write_scom(0x00066000, PPC_PLACE(0x1, 0, 4) | PPC_PLACE(0xA, 4, 4)); + write_rscom(chip, 0x00066000, PPC_PLACE(0x1, 0, 4) | PPC_PLACE(0xA, 4, 4)); /* Clear error injection bits *0x0006C18B // Undocumented, PU_OCB_OCI_OCCFLG2_CLEAR [all] 0 [30] 1 // OCCFLG2_SGPE_HCODE_STOP_REQ_ERR_INJ */ - write_scom(PU_OCB_OCI_OCCFLG2_CLEAR, PPC_BIT(30)); + write_rscom(chip, PU_OCB_OCI_OCCFLG2_CLEAR, PPC_BIT(30)); } /* @@ -2569,7 +2565,10 @@ uint64_t build_homer_image(void *homer_bar) * not NULL. */ mount_part_from_pnor("HCODE", &mdev); - /* First MB of HOMER is unused, we can write OCC image from PNOR there. */ + /* + * First MB of HOMER is unused at first, we can write OCC image from PNOR there. + * TODO: try putting HCODE somewhere else and load OCC host area right here. + */ rdev_readat(&mdev.rdev, hw, 0, 1 * MiB); assert(hw->magic == XIP_MAGIC_HW); @@ -2615,14 +2614,14 @@ uint64_t build_homer_image(void *homer_bar) fill_homer_for_chip(chip, &homer[chip], dd, cores[chip]); } - setup_wakeup_mode(cores[0]); + setup_wakeup_mode(/*chip=*/0, cores[0]); report_istep(15, 2); - istep_15_2(&homer[0]); + istep_15_2(/*chip=*/0, &homer[0]); report_istep(15, 3); - istep_15_3(cores[0]); + istep_15_3(/*chip=*/0, cores[0]); report_istep(15, 4); - istep_15_4(cores[0]); + istep_15_4(/*chip=*/0, cores[0]); /* Boot OCC here and activate SGPE at the same time */ /* TODO: initialize OCC for the second CPU when it's present */ From 1d8fe76dfb9872bfffb99e9b9f6438468a94e868 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Fri, 21 Jan 2022 00:49:00 +0200 Subject: [PATCH 160/213] soc/power9: make voltage data CPU-specific Partially ignoring OCC code at the moment as it will be updated separately later. Change-Id: I65ffb42e94eedd00063ba6cbfcdbbd8bd4613de8 Signed-off-by: Sergii Dmytruk --- src/soc/ibm/power9/chip.c | 11 ++++++----- src/soc/ibm/power9/chip.h | 2 +- src/soc/ibm/power9/homer.c | 11 ++++------- src/soc/ibm/power9/homer.h | 2 +- src/soc/ibm/power9/occ.c | 6 ++++-- 5 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/soc/ibm/power9/chip.c b/src/soc/ibm/power9/chip.c index 9a545e74d79..9bc42028d6f 100644 --- a/src/soc/ibm/power9/chip.c +++ b/src/soc/ibm/power9/chip.c @@ -16,7 +16,7 @@ #include "chip.h" #include "fsi.h" -static uint64_t nominal_freq; +static uint64_t nominal_freq[MAX_CHIPS]; /* * These are various definitions of the page sizes and segment sizes supported @@ -207,14 +207,15 @@ static void fill_cpu_node(struct device_tree *tree, * Old-style core clock frequency. Only create this property if the * frequency fits in a 32-bit number. Do not create it if it doesn't. */ - if ((nominal_freq >> 32) == 0) - dt_add_u32_prop(node, "clock-frequency", nominal_freq); + /* TODO: update these 3 uses of nominal_freq to be chip-specific */ + if ((nominal_freq[0] >> 32) == 0) + dt_add_u32_prop(node, "clock-frequency", nominal_freq[0]); /* * Mandatory: 64-bit version of the core clock frequency, always create * this property. */ - dt_add_u64_prop(node, "ibm,extended-clock-frequency", nominal_freq); + dt_add_u64_prop(node, "ibm,extended-clock-frequency", nominal_freq[0]); /* Timebase freq has a fixed value, always use that */ dt_add_u32_prop(node, "timebase-frequency", 512 * MHz); @@ -439,7 +440,7 @@ static void enable_soc_dev(struct device *dev) * Assumption: OCC boots successfully or coreboot die()s, booting in safe * mode without runtime power management is not supported. */ - nominal_freq = build_homer_image((void *)(top * 1024)); + build_homer_image((void *)(top * 1024), nominal_freq); rng_init(); istep_18_11(); diff --git a/src/soc/ibm/power9/chip.h b/src/soc/ibm/power9/chip.h index 4d223ab2fcb..2793b38897e 100644 --- a/src/soc/ibm/power9/chip.h +++ b/src/soc/ibm/power9/chip.h @@ -6,6 +6,6 @@ struct soc_ibm_power9_config { }; -uint64_t build_homer_image(void *homer_bar); +void build_homer_image(void *homer_bar, uint64_t nominal_freq[]); #endif /* __SOC_CAVIUM_CN81XX_CHIP_H */ diff --git a/src/soc/ibm/power9/homer.c b/src/soc/ibm/power9/homer.c index 790eab94b9b..6a6abe930ed 100644 --- a/src/soc/ibm/power9/homer.c +++ b/src/soc/ibm/power9/homer.c @@ -2232,7 +2232,7 @@ static void update_headers(uint8_t chip, struct homer_st *homer, uint64_t cores) pgpe_hdr->magic = 0x504750455f312e30; // PGPE_1.0 } -const struct voltage_bucket_data * get_voltage_data(void) +const struct voltage_bucket_data * get_voltage_data(uint8_t chip) { const struct voltage_kwd *voltage = NULL; const struct voltage_bucket_data *bucket = NULL; @@ -2240,8 +2240,7 @@ const struct voltage_bucket_data * get_voltage_data(void) uint8_t i = 0; /* Using LRP0 because frequencies are the same in all LRP records */ - /* TODO: don't hard-code chip if values are not the same among them */ - voltage = mvpd_get_voltage_data(/*chip=*/0, /*lrp=*/0); + voltage = mvpd_get_voltage_data(chip, /*lrp=*/0); for (i = 0; i < VOLTAGE_BUCKET_COUNT; ++i) { bucket = &voltage->buckets[i]; @@ -2534,7 +2533,7 @@ static void istep_15_4(uint8_t chip, uint64_t cores) /* * This logic is for SMF disabled only! */ -uint64_t build_homer_image(void *homer_bar) +void build_homer_image(void *homer_bar, uint64_t nominal_freq[]) { const uint8_t chips = fsi_get_present_chips(); @@ -2612,6 +2611,7 @@ uint64_t build_homer_image(void *homer_bar) continue; fill_homer_for_chip(chip, &homer[chip], dd, cores[chip]); + nominal_freq[chip] = get_voltage_data(chip)->nominal.freq * MHz; } setup_wakeup_mode(/*chip=*/0, cores[0]); @@ -2628,7 +2628,4 @@ uint64_t build_homer_image(void *homer_bar) istep_21_1(/*chip=*/0, homer, cores[0]); istep_16_1(this_core); - - /* TODO: this should probably be chip-specific, need output parameter instead */ - return (uint64_t)get_voltage_data()->nominal.freq * MHz; } diff --git a/src/soc/ibm/power9/homer.h b/src/soc/ibm/power9/homer.h index 8ca8c5166f1..f774f7d92fc 100644 --- a/src/soc/ibm/power9/homer.h +++ b/src/soc/ibm/power9/homer.h @@ -328,6 +328,6 @@ struct voltage_bucket_data; void build_parameter_blocks(uint8_t chip, struct homer_st *homer, uint64_t functional_cores); void configure_xive(int tgt_core); -const struct voltage_bucket_data * get_voltage_data(void); +const struct voltage_bucket_data * get_voltage_data(uint8_t chip); #endif /* __SOC_IBM_POWER9_HOMER_H */ diff --git a/src/soc/ibm/power9/occ.c b/src/soc/ibm/power9/occ.c index 010641a1851..7a446dfc89e 100644 --- a/src/soc/ibm/power9/occ.c +++ b/src/soc/ibm/power9/occ.c @@ -584,7 +584,8 @@ static void get_freq_point_msg_data(struct homer_st *homer, uint8_t *data, uint1 enum { OCC_CFGDATA_FREQ_POINT_VERSION = 0x20 }; OCCPstateParmBlock *oppb = (void *)homer->ppmr.occ_parm_block; - const struct voltage_bucket_data *bucket = get_voltage_data(); + /* TODO: don't hard-code chip number here */ + const struct voltage_bucket_data *bucket = get_voltage_data(/*chip=*/0); uint16_t index = 0; uint16_t min_freq = 0; @@ -951,7 +952,8 @@ static void get_avs_bus_cfg_msg_data(struct homer_st *homer, uint8_t *data, uint static void get_power_data(struct homer_st *homer, uint16_t *power_max, uint16_t *power_drop) { - const struct voltage_bucket_data *bucket = get_voltage_data(); + /* TODO: don't hard-code chip number here */ + const struct voltage_bucket_data *bucket = get_voltage_data(/*chip=*/0); /* All processor chips (do not have to be functional) */ const uint8_t num_procs = 2; // from Hostboot log From 48da74ddf2e2796e51ad08dd2a0e9aae9b14248b Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Tue, 8 Mar 2022 01:23:54 +0200 Subject: [PATCH 161/213] soc/power9/chip.c: allocate only necessary amount of HOMERs Rather than copying what Hostboot does. Change-Id: I60168789fd2e1664aa746d4cf849338a37d2e036 Signed-off-by: Sergii Dmytruk --- src/soc/ibm/power9/chip.c | 10 ++++++---- src/soc/ibm/power9/chip.h | 2 +- src/soc/ibm/power9/homer.c | 8 ++++---- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/soc/ibm/power9/chip.c b/src/soc/ibm/power9/chip.c index 9bc42028d6f..d1f9120dcf7 100644 --- a/src/soc/ibm/power9/chip.c +++ b/src/soc/ibm/power9/chip.c @@ -393,7 +393,7 @@ static void rng_init(void) static void enable_soc_dev(struct device *dev) { int chip, idx = 0; - unsigned long reserved_size, top = 0; + unsigned long reserved_size, homers_size, occ_area, top = 0; uint8_t chips = fsi_get_present_chips(); for (chip = 0; chip < MAX_CHIPS; chip++) { @@ -430,9 +430,10 @@ static void enable_soc_dev(struct device *dev) /* * Reserve top 8M (OCC common area) + 4M (HOMER). * - * TODO: 8M + (4M per CPU), hostboot reserves always 8M + 8 * 4M. + * 8M + (4M per CPU), hostboot always reserves 8M + 8 * 4M. */ - reserved_size = 8*1024 + 4*1024 *8 /* * num_of_cpus */; + homers_size = 4*1024 * __builtin_popcount(chips); + reserved_size = 8*1024 + homers_size; top -= reserved_size; reserved_ram_resource_kb(dev, idx++, top, reserved_size); @@ -440,7 +441,8 @@ static void enable_soc_dev(struct device *dev) * Assumption: OCC boots successfully or coreboot die()s, booting in safe * mode without runtime power management is not supported. */ - build_homer_image((void *)(top * 1024), nominal_freq); + occ_area = top + homers_size; + build_homer_image((void *)(top * 1024), (void *)(occ_area * 1024), nominal_freq); rng_init(); istep_18_11(); diff --git a/src/soc/ibm/power9/chip.h b/src/soc/ibm/power9/chip.h index 2793b38897e..89ab3a9733f 100644 --- a/src/soc/ibm/power9/chip.h +++ b/src/soc/ibm/power9/chip.h @@ -6,6 +6,6 @@ struct soc_ibm_power9_config { }; -void build_homer_image(void *homer_bar, uint64_t nominal_freq[]); +void build_homer_image(void *homer_bar, void *common_occ_area, uint64_t nominal_freq[]); #endif /* __SOC_CAVIUM_CN81XX_CHIP_H */ diff --git a/src/soc/ibm/power9/homer.c b/src/soc/ibm/power9/homer.c index 6a6abe930ed..f693aecb373 100644 --- a/src/soc/ibm/power9/homer.c +++ b/src/soc/ibm/power9/homer.c @@ -2372,12 +2372,12 @@ static void setup_wakeup_mode(uint8_t chip, uint64_t cores) } /* 15.2 set HOMER BAR */ -static void istep_15_2(uint8_t chip, struct homer_st *homer) +static void istep_15_2(uint8_t chip, struct homer_st *homer, void *common_occ_area) { write_rscom(chip, 0x05012B00, (uint64_t)homer); write_rscom(chip, 0x05012B04, (4 * MiB - 1) & ~((uint64_t)MiB - 1)); - write_rscom(chip, 0x05012B02, (uint64_t)homer + (8 - chip) * 4 * MiB); // FIXME + write_rscom(chip, 0x05012B02, (uint64_t)common_occ_area); write_rscom(chip, 0x05012B06, (8 * MiB - 1) & ~((uint64_t)MiB - 1)); } @@ -2533,7 +2533,7 @@ static void istep_15_4(uint8_t chip, uint64_t cores) /* * This logic is for SMF disabled only! */ -void build_homer_image(void *homer_bar, uint64_t nominal_freq[]) +void build_homer_image(void *homer_bar, void *common_occ_area, uint64_t nominal_freq[]) { const uint8_t chips = fsi_get_present_chips(); @@ -2617,7 +2617,7 @@ void build_homer_image(void *homer_bar, uint64_t nominal_freq[]) setup_wakeup_mode(/*chip=*/0, cores[0]); report_istep(15, 2); - istep_15_2(/*chip=*/0, &homer[0]); + istep_15_2(/*chip=*/0, &homer[0], common_occ_area); report_istep(15, 3); istep_15_3(/*chip=*/0, cores[0]); report_istep(15, 4); From f42ffab13f419974bca4daedf42e31d2f31b3de0 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Mon, 22 Nov 2021 00:23:04 +0200 Subject: [PATCH 162/213] soc/power9: update SCOM accesses in OCC code Change-Id: Ic33d97fa41ee88dd1833084cbca670ec984700c1 Signed-off-by: Sergii Dmytruk --- src/include/cpu/power/occ.h | 13 +-- src/soc/ibm/power9/homer.c | 201 ++++++++++++++++++------------------ src/soc/ibm/power9/occ.c | 188 +++++++++++++++++---------------- 3 files changed, 207 insertions(+), 195 deletions(-) diff --git a/src/include/cpu/power/occ.h b/src/include/cpu/power/occ.h index 49fb6231ec9..fa7df9d6120 100644 --- a/src/include/cpu/power/occ.h +++ b/src/include/cpu/power/occ.h @@ -3,17 +3,18 @@ #ifndef CPU_PPC64_OCC_H #define CPU_PPC64_OCC_H +#include #include struct homer_st; -void clear_occ_special_wakeups(uint64_t cores); -void special_occ_wakeup_disable(uint64_t cores); -void occ_start_from_mem(void); +void clear_occ_special_wakeups(uint8_t chip, uint64_t cores); +void special_occ_wakeup_disable(uint8_t chip, uint64_t cores); +void occ_start_from_mem(uint8_t chip); /* Moves OCC to active state */ -void activate_occ(struct homer_st *homer); +void activate_occ(uint8_t chip, struct homer_st *homer, bool is_master); -void pm_occ_fir_init(void); -void pm_pba_fir_init(void); +void pm_occ_fir_init(uint8_t chip); +void pm_pba_fir_init(uint8_t chip); #endif /* CPU_PPC64_OCC_H */ diff --git a/src/soc/ibm/power9/homer.c b/src/soc/ibm/power9/homer.c index f693aecb373..d8816dd6276 100644 --- a/src/soc/ibm/power9/homer.c +++ b/src/soc/ibm/power9/homer.c @@ -841,7 +841,7 @@ static void pba_reset(uint8_t chip) pba_slave_setup_runtime_phase(chip); } -static void stop_gpe_init(struct homer_st *homer) +static void stop_gpe_init(uint8_t chip, struct homer_st *homer) { /* First check if SGPE_ACTIVE is not set in OCCFLAG register if (TP.TPCHIP.OCC.OCI.OCB.OCB_OCI_OCCFLG[8] == 1): // 0x0006C08A @@ -849,9 +849,9 @@ static void stop_gpe_init(struct homer_st *homer) [all] 0 [8] 1 // SGPE_ACTIVE, bits in this register are defined by OCC firmware */ - if (read_scom(0x0006C08A) & PPC_BIT(8)) { + if (read_rscom(chip, 0x0006C08A) & PPC_BIT(8)) { printk(BIOS_WARNING, "SGPE_ACTIVE is set in OCCFLAG register, clearing it\n"); - write_scom(0x0006C08B, PPC_BIT(8)); + write_rscom(chip, 0x0006C08B, PPC_BIT(8)); } /* @@ -864,7 +864,7 @@ static void stop_gpe_init(struct homer_st *homer) */ uint32_t ivpr = 0x80000000 + homer->qpmr.sgpe.header.l1_offset + offsetof(struct homer_st, qpmr); - write_scom(0x00066001, PPC_PLACE(ivpr, 0, 32)); + write_rscom(chip, 0x00066001, PPC_PLACE(ivpr, 0, 32)); /* Program XCR to ACTIVATE SGPE TP.TPCHIP.OCC.OCI.GPE3.GPENXIXCR // 0x00066010 @@ -877,9 +877,9 @@ static void stop_gpe_init(struct homer_st *homer) [all] 0 [1-3] PPE_XIXCR_XCR = 2 // resume */ - write_scom(0x00066010, PPC_PLACE(6, 1, 3)); - write_scom(0x00066010, PPC_PLACE(4, 1, 3)); - write_scom(0x00066010, PPC_PLACE(2, 1, 3)); + write_rscom(chip, 0x00066010, PPC_PLACE(6, 1, 3)); + write_rscom(chip, 0x00066010, PPC_PLACE(4, 1, 3)); + write_rscom(chip, 0x00066010, PPC_PLACE(2, 1, 3)); /* * Now wait for SGPE to not be halted and for the HCode to indicate to be @@ -892,8 +892,8 @@ static void stop_gpe_init(struct homer_st *homer) if ((TP.TPCHIP.OCC.OCI.OCB.OCB_OCI_OCCFLG[8] == 1) && // 0x0006C08A (TP.TPCHIP.OCC.OCI.GPE3.GPEXIXSR[0] == 0)): break // 0x00066021 */ - long time = wait_us(125*20, ((read_scom(0x0006C08A) & PPC_BIT(8)) && - !(read_scom(0x00066021) & PPC_BIT(0)))); + long time = wait_us(125*20, ((read_rscom(chip, 0x0006C08A) & PPC_BIT(8)) && + !(read_rscom(chip, 0x00066021) & PPC_BIT(0)))); if (!time) die("Timeout while waiting for SGPE activation\n"); @@ -1147,7 +1147,7 @@ static void load_pm_complex(uint8_t chip, struct homer_st *homer) load_host_data_to_homer(chip, homer); } -static void pm_corequad_init(uint64_t cores) +static void pm_corequad_init(uint8_t chip, uint64_t cores) { enum { EQ_QPPM_QPMMR_CLEAR = 0x100F0104, @@ -1189,33 +1189,35 @@ static void pm_corequad_init(uint64_t cores) * 18 - 19 : PCB interrupt * 20,22,24,26: InterPPM Ivrm/Aclk/Vdata/Dpll enable */ - write_scom_for_chiplet(quad_chiplet, EQ_QPPM_QPMMR_CLEAR, - PPC_BIT(0) | - PPC_BITMASK(1, 11) | - PPC_BIT(12) | - PPC_BIT(13) | - PPC_BIT(14) | - PPC_BITMASK(18, 19) | - PPC_BIT(20) | - PPC_BIT(22) | - PPC_BIT(24) | - PPC_BIT(26)); + write_rscom_for_chiplet(chip, quad_chiplet, EQ_QPPM_QPMMR_CLEAR, + PPC_BIT(0) | + PPC_BITMASK(1, 11) | + PPC_BIT(12) | + PPC_BIT(13) | + PPC_BIT(14) | + PPC_BITMASK(18, 19) | + PPC_BIT(20) | + PPC_BIT(22) | + PPC_BIT(24) | + PPC_BIT(26)); /* Clear QUAD PPM ERROR Register */ - write_scom_for_chiplet(quad_chiplet, EQ_QPPM_ERR, 0); + write_rscom_for_chiplet(chip, quad_chiplet, EQ_QPPM_ERR, 0); /* Restore Quad PPM Error Mask */ err_mask = 0xFFFFFF00; // from Hostboot's log - write_scom_for_chiplet(quad_chiplet, EQ_QPPM_ERRMSK, - PPC_PLACE(err_mask, 0, 32)); + write_rscom_for_chiplet(chip, quad_chiplet, EQ_QPPM_ERRMSK, + PPC_PLACE(err_mask, 0, 32)); for (int core = quad * 4; core < (quad + 1) * 4; ++core) { chiplet_id_t core_chiplet = EC00_CHIPLET_ID + core; /* Clear the Core PPM CME DoorBells */ - for (int i = 0; i < DOORBELLS_COUNT; ++i) - write_scom_for_chiplet(core_chiplet, CME_DOORBELL_CLEAR[i], - PPC_BITMASK(0, 63)); + for (int i = 0; i < DOORBELLS_COUNT; ++i) { + write_rscom_for_chiplet(chip, core_chiplet, + CME_DOORBELL_CLEAR[i], + PPC_BITMASK(0, 63)); + } /* * Setup Core PPM Mode register @@ -1234,15 +1236,15 @@ static void pm_corequad_init(uint64_t cores) * 10 : STOP_EXIT_TYPE_SEL * 13 : WKUP_NOTIFY_SELECT */ - write_scom_for_chiplet(core_chiplet, C_CPPM_CPMMR_CLEAR, - PPC_BIT(1) | - PPC_BIT(11) | - PPC_BIT(12) | - PPC_BIT(14) | - PPC_BIT(15)); + write_rscom_for_chiplet(chip, core_chiplet, C_CPPM_CPMMR_CLEAR, + PPC_BIT(1) | + PPC_BIT(11) | + PPC_BIT(12) | + PPC_BIT(14) | + PPC_BIT(15)); /* Clear Core PPM Errors */ - write_scom_for_chiplet(core_chiplet, C_CPPM_ERR, 0); + write_rscom_for_chiplet(chip, core_chiplet, C_CPPM_ERR, 0); /* * Clear Hcode Error Injection and other CSAR settings: @@ -1255,21 +1257,21 @@ static void pm_corequad_init(uint64_t cores) * DISABLE_CME_NACK_ON_PROLONGED_DROOP is NOT cleared * as this is a persistent, characterization setting. */ - write_scom_for_chiplet(core_chiplet, C_CPPM_CSAR_CLEAR, - PPC_BIT(27) | - PPC_BIT(28) | - PPC_BIT(30) | - PPC_BIT(31)); + write_rscom_for_chiplet(chip, core_chiplet, C_CPPM_CSAR_CLEAR, + PPC_BIT(27) | + PPC_BIT(28) | + PPC_BIT(30) | + PPC_BIT(31)); /* Restore CORE PPM Error Mask */ err_mask = 0xFFF00000; // from Hostboot's log - write_scom_for_chiplet(core_chiplet, C_CPPM_ERRMSK, - PPC_PLACE(err_mask, 0, 32)); + write_rscom_for_chiplet(chip, core_chiplet, C_CPPM_ERRMSK, + PPC_PLACE(err_mask, 0, 32)); } } } -static void pstate_gpe_init(struct homer_st *homer, uint64_t cores) +static void pstate_gpe_init(uint8_t chip, struct homer_st *homer, uint64_t cores) { enum { /* The following constants hold approximate values */ @@ -1308,32 +1310,32 @@ static void pstate_gpe_init(struct homer_st *homer, uint64_t cores) uint8_t avsbus_rail = 0; uint64_t ivpr = 0x80000000 + offsetof(struct homer_st, ppmr.l1_bootloader); - write_scom(PU_GPE2_GPEIVPR_SCOM, ivpr << 32); + write_rscom(chip, PU_GPE2_GPEIVPR_SCOM, ivpr << 32); /* Set up the OCC Scratch 2 register before PGPE boot */ - occ_scratch = read_scom(PU_OCB_OCI_OCCS2_SCOM); + occ_scratch = read_rscom(chip, PU_OCB_OCI_OCCS2_SCOM); occ_scratch &= ~PPC_BIT(PGPE_ACTIVE); occ_scratch &= ~PPC_BITMASK(27, 31); occ_scratch |= PPC_PLACE(avsbus_number, 27, 1); occ_scratch |= PPC_PLACE(avsbus_rail, 28, 4); - write_scom(PU_OCB_OCI_OCCS2_SCOM, occ_scratch); + write_rscom(chip, PU_OCB_OCI_OCCS2_SCOM, occ_scratch); - write_scom(PU_GPE2_GPETSEL_SCOM, 0x1A00000000000000); + write_rscom(chip, PU_GPE2_GPETSEL_SCOM, 0x1A00000000000000); /* OCCFLG2_PGPE_HCODE_FIT_ERR_INJ | OCCFLG2_PGPE_HCODE_PSTATE_REQ_ERR_INJ */ - write_scom(PU_OCB_OCI_OCCFLG2_CLEAR, 0x1100000000); + write_rscom(chip, PU_OCB_OCI_OCCFLG2_CLEAR, 0x1100000000); printk(BIOS_ERR, "Attempting PGPE activation...\n"); - write_scom(PU_GPE2_PPE_XIXCR, PPC_PLACE(HARD_RESET, 1, 3)); - write_scom(PU_GPE2_PPE_XIXCR, PPC_PLACE(TOGGLE_XSR_TRH, 1, 3)); - write_scom(PU_GPE2_PPE_XIXCR, PPC_PLACE(RESUME, 1, 3)); + write_rscom(chip, PU_GPE2_PPE_XIXCR, PPC_PLACE(HARD_RESET, 1, 3)); + write_rscom(chip, PU_GPE2_PPE_XIXCR, PPC_PLACE(TOGGLE_XSR_TRH, 1, 3)); + write_rscom(chip, PU_GPE2_PPE_XIXCR, PPC_PLACE(RESUME, 1, 3)); wait_ms(PGPE_POLLTIME_MS * TIMEOUT_COUNT, - (read_scom(PU_OCB_OCI_OCCS2_SCOM) & PPC_BIT(PGPE_ACTIVE)) || - (read_scom(PU_GPE2_PPE_XIDBGPRO) & PPC_BIT(HALTED_STATE))); + (read_rscom(chip, PU_OCB_OCI_OCCS2_SCOM) & PPC_BIT(PGPE_ACTIVE)) || + (read_rscom(chip, PU_GPE2_PPE_XIDBGPRO) & PPC_BIT(HALTED_STATE))); - if (read_scom(PU_OCB_OCI_OCCS2_SCOM) & PPC_BIT(PGPE_ACTIVE)) + if (read_rscom(chip, PU_OCB_OCI_OCCS2_SCOM) & PPC_BIT(PGPE_ACTIVE)) printk(BIOS_ERR, "PGPE was activated successfully\n"); else die("Failed to activate PGPE\n"); @@ -1348,9 +1350,9 @@ static void pstate_gpe_init(struct homer_st *homer, uint64_t cores) if (!IS_EQ_FUNCTIONAL(quad, cores)) continue; - scom_and_or_for_chiplet(EP00_CHIPLET_ID + quad, EQ_QPPM_QPMMR, - ~PPC_BITMASK(1, 11), - PPC_PLACE(safe_mode_freq, 1, 11)); + rscom_and_or_for_chiplet(chip, EP00_CHIPLET_ID + quad, EQ_QPPM_QPMMR, + ~PPC_BITMASK(1, 11), + PPC_PLACE(safe_mode_freq, 1, 11)); } } @@ -1376,9 +1378,9 @@ static void pm_pba_init(uint8_t chip) uint8_t attr_pbax_broadcast_vector = 0; /* Assuming ATTR_CHIP_EC_FEATURE_HW423589_OPTION1 == true */ - write_scom(PU_PBACFG, PPC_BIT(PU_PBACFG_CHSW_DIS_GROUP_SCOPE)); + write_rscom(chip, PU_PBACFG, PPC_BIT(PU_PBACFG_CHSW_DIS_GROUP_SCOPE)); - write_scom(PU_PBAFIR, 0); + write_rscom(chip, PU_PBAFIR, 0); data |= PPC_PLACE(attr_pbax_groupid, 4, 4); data |= PPC_PLACE(attr_pbax_chipid, 8, 3); @@ -1387,17 +1389,17 @@ static void pm_pba_init(uint8_t chip) data |= PPC_PLACE(PBAX_SND_RETRY_COMMIT_OVERCOMMIT, 27, 1); data |= PPC_PLACE(PBAX_SND_RETRY_THRESHOLD, 28, 8); data |= PPC_PLACE(PBAX_SND_TIMEOUT, 36, 5); - write_scom(PU_PBAXCFG_SCOM, data); + write_rscom(chip, PU_PBAXCFG_SCOM, data); } static void pm_pstate_gpe_init(uint8_t chip, struct homer_st *homer, uint64_t cores) { - pstate_gpe_init(homer, cores); + pstate_gpe_init(chip, homer, cores); pm_pba_init(chip); } /* Generates host configuration vector and updates the value in HOMER */ -static void check_proc_config(struct homer_st *homer) +static void check_proc_config(uint8_t chip, struct homer_st *homer) { uint64_t vector_value = INIT_CONFIG_VALUE; uint64_t *conf_vector = (void *)((uint8_t *)&homer->qpmr + QPMR_PROC_CONFIG_POS); @@ -1408,8 +1410,8 @@ static void check_proc_config(struct homer_st *homer) chiplet_id_t nest = mcs_to_nest[mcs_ids[mcs_i]]; /* MCS_MCFGP and MCS_MCFGPM registers are undocumented, see istep 14.5. */ - if ((read_scom_for_chiplet(nest, 0x0501080A) & PPC_BIT(0)) || - (read_scom_for_chiplet(nest, 0x0501080C) & PPC_BIT(0))) { + if ((read_rscom_for_chiplet(chip, nest, 0x0501080A) & PPC_BIT(0)) || + (read_rscom_for_chiplet(chip, nest, 0x0501080C) & PPC_BIT(0))) { uint8_t pos = MCS_POS + mcs_i; *conf_vector |= PPC_BIT(pos); @@ -1438,9 +1440,9 @@ static void pm_pss_init(uint8_t chip) * 0-5 frame size * 12-17 in delay */ - scom_and_or(PU_SPIPSS_ADC_CTRL_REG0, - ~PPC_BITMASK(0, 5) & ~PPC_BITMASK(12, 17), - PPC_PLACE(0x20, 0, 6)); + rscom_and_or(chip, PU_SPIPSS_ADC_CTRL_REG0, + ~PPC_BITMASK(0, 5) & ~PPC_BITMASK(12, 17), + PPC_PLACE(0x20, 0, 6)); /* * 0 adc_fsm_enable = 1 @@ -1452,23 +1454,23 @@ static void pm_pss_init(uint8_t chip) * * Truncating last value to 4 bits gives 0. */ - scom_and_or(PU_SPIPSS_ADC_CTRL_REG0 + 1, ~PPC_BITMASK(0, 17), - PPC_BIT(0) | PPC_PLACE(10, 4, 10) | PPC_PLACE(0, 14, 4)); + rscom_and_or(chip, PU_SPIPSS_ADC_CTRL_REG0 + 1, ~PPC_BITMASK(0, 17), + PPC_BIT(0) | PPC_PLACE(10, 4, 10) | PPC_PLACE(0, 14, 4)); /* * 0-16 inter frame delay */ - scom_and(PU_SPIPSS_ADC_CTRL_REG0 + 2, ~PPC_BITMASK(0, 16)); + rscom_and(chip, PU_SPIPSS_ADC_CTRL_REG0 + 2, ~PPC_BITMASK(0, 16)); - write_scom(PU_SPIPSS_ADC_WDATA_REG, 0); + write_rscom(chip, PU_SPIPSS_ADC_WDATA_REG, 0); /* * 0-5 frame size * 12-17 in delay */ - scom_and_or(PU_SPIPSS_P2S_CTRL_REG0, - ~PPC_BITMASK(0, 5) & ~PPC_BITMASK(12, 17), - PPC_PLACE(0x20, 0, 6)); + rscom_and_or(chip, PU_SPIPSS_P2S_CTRL_REG0, + ~PPC_BITMASK(0, 5) & ~PPC_BITMASK(12, 17), + PPC_PLACE(0x20, 0, 6)); /* * 0 p2s_fsm_enable = 1 @@ -1478,23 +1480,23 @@ static void pm_pss_init(uint8_t chip) * 4-13 p2s_clock_divider = set to 10Mhz * 17 p2s_nr_of_frames = 1 (for auto 2 mode) */ - scom_and_or(PU_SPIPSS_P2S_CTRL_REG0 + 1, - ~(PPC_BITMASK(0, 13) | PPC_BIT(17)), - PPC_BIT(0) | PPC_PLACE(10, 4, 10) | PPC_BIT(17)); + rscom_and_or(chip, PU_SPIPSS_P2S_CTRL_REG0 + 1, + ~(PPC_BITMASK(0, 13) | PPC_BIT(17)), + PPC_BIT(0) | PPC_PLACE(10, 4, 10) | PPC_BIT(17)); /* * 0-16 inter frame delay */ - scom_and(PU_SPIPSS_P2S_CTRL_REG0 + 2, ~PPC_BITMASK(0, 16)); + rscom_and(chip, PU_SPIPSS_P2S_CTRL_REG0 + 2, ~PPC_BITMASK(0, 16)); - write_scom(PU_SPIPSS_P2S_WDATA_REG, 0); + write_rscom(chip, PU_SPIPSS_P2S_WDATA_REG, 0); /* * 0-31 100ns value */ - scom_and_or(PU_SPIPSS_100NS_REG, - PPC_BITMASK(0, 31), - PPC_PLACE(powerbus_cfg(chip)->fabric_freq / 40, 0, 32)); + rscom_and_or(chip, PU_SPIPSS_100NS_REG, + PPC_BITMASK(0, 31), + PPC_PLACE(powerbus_cfg(chip)->fabric_freq / 40, 0, 32)); } /* Initializes power-management and starts OCC */ @@ -1502,19 +1504,19 @@ static void start_pm_complex(uint8_t chip, struct homer_st *homer, uint64_t core { enum { STOP_RECOVERY_TRIGGER_ENABLE = 29 }; - pm_corequad_init(cores); + pm_corequad_init(chip, cores); pm_pss_init(chip); - pm_occ_fir_init(); - pm_pba_fir_init(); - stop_gpe_init(homer); + pm_occ_fir_init(chip); + pm_pba_fir_init(chip); + stop_gpe_init(chip, homer); pm_pstate_gpe_init(chip, homer, cores); - check_proc_config(homer); - clear_occ_special_wakeups(cores); - special_occ_wakeup_disable(cores); - occ_start_from_mem(); + check_proc_config(chip, homer); + clear_occ_special_wakeups(chip, cores); + special_occ_wakeup_disable(chip, cores); + occ_start_from_mem(chip); - write_scom(PU_OCB_OCI_OCCFLG2_CLEAR, PPC_BIT(STOP_RECOVERY_TRIGGER_ENABLE)); + write_rscom(chip, PU_OCB_OCI_OCCFLG2_CLEAR, PPC_BIT(STOP_RECOVERY_TRIGGER_ENABLE)); } static void istep_21_1(uint8_t chip, struct homer_st *homer, uint64_t cores) @@ -1526,7 +1528,8 @@ static void istep_21_1(uint8_t chip, struct homer_st *homer, uint64_t cores) printk(BIOS_ERR, "Done starting PM complex\n"); printk(BIOS_ERR, "Activating OCC...\n"); - activate_occ(homer); + /* Note: only OCCs of chips connected to APSS can be masters */ + activate_occ(chip, homer, /*is_master=*/(chip == 0)); printk(BIOS_ERR, "Done activating OCC\n"); } @@ -2393,15 +2396,15 @@ static void istep_15_3(uint8_t chip, uint64_t cores) if (!IS_EC_FUNCTIONAL(i, cores)) continue; - if ((read_scom_for_chiplet(chiplet, 0xF0001) & group_mask) == group_mask) - scom_and_or_for_chiplet(chiplet, 0xF0001, - ~(group_mask | PPC_BITMASK(16,23)), - PPC_BITMASK(19,21)); + if ((read_rscom_for_chiplet(chip, chiplet, 0xF0001) & group_mask) == group_mask) + rscom_and_or_for_chiplet(chip, chiplet, 0xF0001, + ~(group_mask | PPC_BITMASK(16,23)), + PPC_BITMASK(19,21)); - if ((read_scom_for_chiplet(chiplet, 0xF0002) & group_mask) == group_mask) - scom_and_or_for_chiplet(chiplet, 0xF0002, - ~(group_mask | PPC_BITMASK(16,23)), - PPC_BIT(5) | PPC_BITMASK(19,21)); + if ((read_rscom_for_chiplet(chip, chiplet, 0xF0002) & group_mask) == group_mask) + rscom_and_or_for_chiplet(chip, chiplet, 0xF0002, + ~(group_mask | PPC_BITMASK(16,23)), + PPC_BIT(5) | PPC_BITMASK(19,21)); } for (int i = 0; i < MAX_QUADS_PER_CHIP; i++) { diff --git a/src/soc/ibm/power9/occ.c b/src/soc/ibm/power9/occ.c index 7a446dfc89e..eee9cac9fd9 100644 --- a/src/soc/ibm/power9/occ.c +++ b/src/soc/ibm/power9/occ.c @@ -95,16 +95,16 @@ struct occ_poll_response { uint8_t sensor_data[]; // 4049 bytes } __attribute__((packed)); -static void pm_ocb_setup(uint32_t ocb_bar) +static void pm_ocb_setup(uint8_t chip, uint32_t ocb_bar) { - write_scom(PU_OCB_PIB_OCBCSR0_OR, PPC_BIT(OCB_PIB_OCBCSR0_OCB_STREAM_MODE)); - write_scom(PU_OCB_PIB_OCBCSR0_CLEAR, PPC_BIT(OCB_PIB_OCBCSR0_OCB_STREAM_TYPE)); - write_scom(PU_OCB_PIB_OCBAR0, (uint64_t)ocb_bar << 32); + write_rscom(chip, PU_OCB_PIB_OCBCSR0_OR, PPC_BIT(OCB_PIB_OCBCSR0_OCB_STREAM_MODE)); + write_rscom(chip, PU_OCB_PIB_OCBCSR0_CLEAR, PPC_BIT(OCB_PIB_OCBCSR0_OCB_STREAM_TYPE)); + write_rscom(chip, PU_OCB_PIB_OCBAR0, (uint64_t)ocb_bar << 32); } -static void check_ocb_mode(uint64_t ocb_csr_address, uint64_t ocb_shcs_address) +static void check_ocb_mode(uint8_t chip, uint64_t ocb_csr_address, uint64_t ocb_shcs_address) { - uint64_t ocb_pib = read_scom(ocb_csr_address); + uint64_t ocb_pib = read_rscom(chip, ocb_csr_address); /* * The following check for circular mode is an additional check @@ -117,7 +117,7 @@ static void check_ocb_mode(uint64_t ocb_csr_address, uint64_t ocb_shcs_address) * anyway to let the PIB error response return occur. (That is * what will happen if this checking code were not here.) */ - uint64_t stream_push_ctrl = read_scom(ocb_shcs_address); + uint64_t stream_push_ctrl = read_rscom(chip, ocb_shcs_address); if (stream_push_ctrl & PPC_BIT(OCB_OCI_OCBSHCS0_PUSH_ENABLE)) { uint8_t counter = 0; @@ -126,7 +126,7 @@ static void check_ocb_mode(uint64_t ocb_csr_address, uint64_t ocb_shcs_address) if (!(stream_push_ctrl & PPC_BIT(OCB_OCI_OCBSHCS0_PUSH_FULL))) break; - stream_push_ctrl = read_scom(ocb_shcs_address); + stream_push_ctrl = read_rscom(chip, ocb_shcs_address); } if (counter == 4) @@ -135,53 +135,54 @@ static void check_ocb_mode(uint64_t ocb_csr_address, uint64_t ocb_shcs_address) } } -static void put_ocb_indirect(uint32_t ocb_req_length, uint32_t oci_address, - uint64_t *ocb_buffer) +static void put_ocb_indirect(uint8_t chip, uint32_t ocb_req_length, + uint32_t oci_address, uint64_t *ocb_buffer) { - write_scom(PU_OCB_PIB_OCBAR0, (uint64_t)oci_address << 32); + write_rscom(chip, PU_OCB_PIB_OCBAR0, (uint64_t)oci_address << 32); - check_ocb_mode(PU_OCB_PIB_OCBCSR0_RO, PU_OCB_OCI_OCBSHCS0_SCOM); + check_ocb_mode(chip, PU_OCB_PIB_OCBCSR0_RO, PU_OCB_OCI_OCBSHCS0_SCOM); for (uint32_t index = 0; index < ocb_req_length; index++) - write_scom(PU_OCB_PIB_OCBDR0, ocb_buffer[index]); + write_rscom(chip, PU_OCB_PIB_OCBDR0, ocb_buffer[index]); } -static void get_ocb_indirect(uint32_t ocb_req_length, uint32_t oci_address, - uint64_t *ocb_buffer) +static void get_ocb_indirect(uint8_t chip, uint32_t ocb_req_length, + uint32_t oci_address, uint64_t *ocb_buffer) { - write_scom(PU_OCB_PIB_OCBAR0, (uint64_t)oci_address << 32); + write_rscom(chip, PU_OCB_PIB_OCBAR0, (uint64_t)oci_address << 32); for (uint32_t loopCount = 0; loopCount < ocb_req_length; loopCount++) - ocb_buffer[loopCount] = read_scom(PU_OCB_PIB_OCBDR0); + ocb_buffer[loopCount] = read_rscom(chip, PU_OCB_PIB_OCBDR0); } -static void write_occ_sram(uint32_t address, uint64_t *buffer, size_t data_length) +static void write_occ_sram(uint8_t chip, uint32_t address, uint64_t *buffer, size_t data_length) { - pm_ocb_setup(address); - put_ocb_indirect(data_length / 8, address, buffer); + pm_ocb_setup(chip, address); + put_ocb_indirect(chip, data_length / 8, address, buffer); } -static void read_occ_sram(uint32_t address, uint64_t *buffer, size_t data_length) +static void read_occ_sram(uint8_t chip, uint32_t address, uint64_t *buffer, size_t data_length) { - pm_ocb_setup(address); - get_ocb_indirect(data_length / 8, address, buffer); + pm_ocb_setup(chip, address); + get_ocb_indirect(chip, data_length / 8, address, buffer); } -static void write_occ_command(uint64_t write_data) +static void write_occ_command(uint8_t chip, uint64_t write_data) { - check_ocb_mode(PU_OCB_PIB_OCBCSR1_RO, PU_OCB_OCI_OCBSHCS1_SCOM); - write_scom(PU_OCB_PIB_OCBDR1, write_data); + check_ocb_mode(chip, PU_OCB_PIB_OCBCSR1_RO, PU_OCB_OCI_OCBSHCS1_SCOM); + write_rscom(chip, PU_OCB_PIB_OCBDR1, write_data); } -void clear_occ_special_wakeups(uint64_t cores) +void clear_occ_special_wakeups(uint8_t chip, uint64_t cores) { for (size_t i = 0; i < MAX_CORES_PER_CHIP; i += 2) { if (!IS_EX_FUNCTIONAL(i, cores)) continue; - scom_and_for_chiplet(EC00_CHIPLET_ID + i, EX_PPM_SPWKUP_OCC, ~PPC_BIT(0)); + rscom_and_for_chiplet(chip, EC00_CHIPLET_ID + i, EX_PPM_SPWKUP_OCC, + ~PPC_BIT(0)); } } -void special_occ_wakeup_disable(uint64_t cores) +void special_occ_wakeup_disable(uint8_t chip, uint64_t cores) { enum { PPM_SPWKUP_FSP = 0x200F010B }; @@ -189,14 +190,14 @@ void special_occ_wakeup_disable(uint64_t cores) if (!IS_EC_FUNCTIONAL(i, cores)) continue; - write_scom_for_chiplet(EC00_CHIPLET_ID + i, PPM_SPWKUP_FSP, 0); + write_rscom_for_chiplet(chip, EC00_CHIPLET_ID + i, PPM_SPWKUP_FSP, 0); /* This puts an inherent delay in the propagation of the reset transition */ - (void)read_scom_for_chiplet(EC00_CHIPLET_ID + i, PPM_SPWKUP_FSP); + (void)read_rscom_for_chiplet(chip, EC00_CHIPLET_ID + i, PPM_SPWKUP_FSP); } } /* Sets up boot loader in SRAM and returns 32-bit jump instruction to it */ -static uint64_t setup_memory_boot(void) +static uint64_t setup_memory_boot(uint8_t chip) { enum { OCC_BOOT_OFFSET = 0x40, @@ -220,12 +221,12 @@ static uint64_t setup_memory_boot(void) sram_program[1] |= ppc_bctr(); /* Write to SRAM */ - write_occ_sram(OCC_SRAM_BOOT_ADDR, sram_program, sizeof(sram_program)); + write_occ_sram(chip, OCC_SRAM_BOOT_ADDR, sram_program, sizeof(sram_program)); return ((uint64_t)ppc_b(OCC_SRAM_BOOT_ADDR2) << 32); } -void occ_start_from_mem(void) +void occ_start_from_mem(uint8_t chip) { enum { OCB_PIB_OCR_CORE_RESET_BIT = 0, @@ -238,25 +239,25 @@ void occ_start_from_mem(void) PU_OCB_PIB_OCR_OR = 0x0006D002, }; - write_scom(PU_OCB_PIB_OCBCSR0_OR, PPC_BIT(OCB_PIB_OCBCSR0_OCB_STREAM_MODE)); + write_rscom(chip, PU_OCB_PIB_OCBCSR0_OR, PPC_BIT(OCB_PIB_OCBCSR0_OCB_STREAM_MODE)); /* * Set up Boot Vector Registers in SRAM: * - set bv0-2 to all 0's (illegal instructions) * - set bv3 to proper branch instruction */ - write_scom(PU_SRAM_SRBV0_SCOM, 0); - write_scom(PU_SRAM_SRBV0_SCOM + 1, 0); - write_scom(PU_SRAM_SRBV0_SCOM + 2, 0); - write_scom(PU_SRAM_SRBV0_SCOM + 3, setup_memory_boot()); - - write_scom(PU_JTG_PIB_OJCFG_AND, ~PPC_BIT(JTG_PIB_OJCFG_DBG_HALT_BIT)); - write_scom(PU_OCB_PIB_OCR_OR, PPC_BIT(OCB_PIB_OCR_CORE_RESET_BIT)); - write_scom(PU_OCB_PIB_OCR_CLEAR, PPC_BIT(OCB_PIB_OCR_CORE_RESET_BIT)); + write_rscom(chip, PU_SRAM_SRBV0_SCOM, 0); + write_rscom(chip, PU_SRAM_SRBV0_SCOM + 1, 0); + write_rscom(chip, PU_SRAM_SRBV0_SCOM + 2, 0); + write_rscom(chip, PU_SRAM_SRBV0_SCOM + 3, setup_memory_boot(chip)); + + write_rscom(chip, PU_JTG_PIB_OJCFG_AND, ~PPC_BIT(JTG_PIB_OJCFG_DBG_HALT_BIT)); + write_rscom(chip, PU_OCB_PIB_OCR_OR, PPC_BIT(OCB_PIB_OCR_CORE_RESET_BIT)); + write_rscom(chip, PU_OCB_PIB_OCR_CLEAR, PPC_BIT(OCB_PIB_OCR_CORE_RESET_BIT)); } /* Wait for OCC to reach communications checkpoint */ -static void wait_for_occ_checkpoint(void) +static void wait_for_occ_checkpoint(uint8_t chip) { enum { /* Wait up to 15 seconds for OCC to be ready (1500 * 10ms = 15s) */ @@ -279,7 +280,7 @@ static void wait_for_occ_checkpoint(void) udelay(US_BETWEEN_READ); /* Read SRAM response buffer to check for OCC checkpoint */ - read_occ_sram(OCC_RSP_SRAM_ADDR, (uint64_t *)response, sizeof(response)); + read_occ_sram(chip, OCC_RSP_SRAM_ADDR, (uint64_t *)response, sizeof(response)); /* Pull status from response (byte 2) */ status = response[2]; @@ -446,7 +447,7 @@ static bool parse_occ_response(struct homer_st *homer, uint8_t occ_cmd, return true; } -static bool write_occ_cmd(struct homer_st *homer, uint8_t occ_cmd, +static bool write_occ_cmd(uint8_t chip, struct homer_st *homer, uint8_t occ_cmd, const uint8_t *data, uint16_t data_len, uint8_t *response, uint32_t *response_len) { @@ -462,7 +463,7 @@ static bool write_occ_cmd(struct homer_st *homer, uint8_t occ_cmd, build_occ_cmd(homer, occ_cmd, cmd_seq_num, data, data_len); /* Sender: HTMGT; command: Command Write Attention */ - write_occ_command(0x1001000000000000); + write_occ_command(chip, 0x1001000000000000); /* Wait for OCC to process command and send response (timeout is the same for all commands) */ @@ -493,7 +494,7 @@ static bool write_occ_cmd(struct homer_st *homer, uint8_t occ_cmd, return true; } -static void send_occ_cmd(struct homer_st *homer, uint8_t occ_cmd, +static void send_occ_cmd(uint8_t chip, struct homer_st *homer, uint8_t occ_cmd, const uint8_t *data, uint16_t data_len, uint8_t *response, uint32_t *response_len) { @@ -502,7 +503,7 @@ static void send_occ_cmd(struct homer_st *homer, uint8_t occ_cmd, uint8_t i = 0; for (i = 0; i < MAX_TRIES; ++i) { - if (write_occ_cmd(homer, occ_cmd, data, data_len, response, response_len)) + if (write_occ_cmd(chip, homer, occ_cmd, data, data_len, response, response_len)) break; if (i < MAX_TRIES - 1) @@ -514,7 +515,7 @@ static void send_occ_cmd(struct homer_st *homer, uint8_t occ_cmd, } /* Reports OCC error to the user and clears it on OCC's side */ -static void handle_occ_error(struct homer_st *homer, +static void handle_occ_error(uint8_t chip, struct homer_st *homer, const struct occ_poll_response *response) { static uint8_t error_log_buf[4096]; @@ -535,18 +536,18 @@ static void handle_occ_error(struct homer_st *homer, error_length = sizeof(error_log_buf); } - read_occ_sram(response->error_address, (uint64_t *)error_log_buf, error_length); + read_occ_sram(chip, response->error_address, (uint64_t *)error_log_buf, error_length); printk(BIOS_WARNING, "OCC error log:\n"); hexdump(error_log_buf, error_length); /* Confirm to OCC that we've read the log */ - send_occ_cmd(homer, OCC_CMD_CLEAR_ERROR_LOG, + send_occ_cmd(chip, homer, OCC_CMD_CLEAR_ERROR_LOG, clear_log_data, sizeof(clear_log_data), NULL, &response_len); } -static void poll_occ(struct homer_st *homer, bool flush_all_errors, +static void poll_occ(uint8_t chip, struct homer_st *homer, bool flush_all_errors, struct occ_poll_response *response) { enum { OCC_POLL_DATA_MIN_SIZE = 40 }; @@ -556,7 +557,7 @@ static void poll_occ(struct homer_st *homer, bool flush_all_errors, const uint8_t poll_data[1] = { 0x20 /*version*/ }; uint32_t response_len = sizeof(*response); - send_occ_cmd(homer, OCC_CMD_POLL, poll_data, sizeof(poll_data), + send_occ_cmd(chip, homer, OCC_CMD_POLL, poll_data, sizeof(poll_data), (uint8_t *)response, &response_len); if (response_len < OCC_POLL_DATA_MIN_SIZE) @@ -568,7 +569,7 @@ static void poll_occ(struct homer_st *homer, bool flush_all_errors, if (response->error_id == 0) break; - handle_occ_error(homer, response); + handle_occ_error(chip, homer, response); --max_more_errors; if (max_more_errors == 0) { @@ -1041,7 +1042,7 @@ static void get_gpu_msg_data(struct homer_st *homer, uint8_t *data, uint16_t *si *size = index; } -static void send_occ_config_data(struct homer_st *homer) +static void send_occ_config_data(uint8_t chip, struct homer_st *homer) { /* * Order in which these are sent is important! @@ -1074,21 +1075,22 @@ static void send_occ_config_data(struct homer_st *homer) if (data_len > sizeof(data)) die("Buffer for OCC data is too small!\n"); - send_occ_cmd(homer, OCC_CMD_SETUP_CFG_DATA, data, data_len, NULL, - &response_len); - poll_occ(homer, /*flush_all_errors=*/false, &poll_response); + send_occ_cmd(chip, homer, OCC_CMD_SETUP_CFG_DATA, data, data_len, + NULL, &response_len); + poll_occ(chip, homer, /*flush_all_errors=*/false, &poll_response); } } -static void send_occ_user_power_cap(struct homer_st *homer) +static void send_occ_user_power_cap(uint8_t chip, struct homer_st *homer) { /* No power limit */ const uint8_t data[2] = { 0x00, 0x00 }; uint32_t response_len = 0; - send_occ_cmd(homer, OCC_CMD_SET_POWER_CAP, data, sizeof(data), NULL, &response_len); + send_occ_cmd(chip, homer, OCC_CMD_SET_POWER_CAP, data, sizeof(data), + NULL, &response_len); } -static void wait_for_occ_status(struct homer_st *homer, uint8_t status_bit) +static void wait_for_occ_status(uint8_t chip, struct homer_st *homer, uint8_t status_bit) { enum { MAX_POLLS = 200, @@ -1099,7 +1101,7 @@ static void wait_for_occ_status(struct homer_st *homer, uint8_t status_bit) struct occ_poll_response poll_response; for (num_polls = 0; num_polls < MAX_POLLS; ++num_polls) { - poll_occ(homer, /*flush_all_errors=*/false, &poll_response); + poll_occ(chip, homer, /*flush_all_errors=*/false, &poll_response); if (poll_response.status & status_bit) break; @@ -1116,7 +1118,7 @@ static void wait_for_occ_status(struct homer_st *homer, uint8_t status_bit) die("Failed to wait until OCC has reached state 0x%02x\n", status_bit); } -static void set_occ_state(struct homer_st *homer, uint8_t state) +static void set_occ_state(uint8_t chip, struct homer_st *homer, uint8_t state) { struct occ_poll_response poll_response; @@ -1125,53 +1127,59 @@ static void set_occ_state(struct homer_st *homer, uint8_t state) uint32_t response_len = 0; /* Send poll cmd to confirm comm has been established and flush old errors */ - poll_occ(homer, /*flush_all_errors=*/true, &poll_response); + poll_occ(chip, homer, /*flush_all_errors=*/true, &poll_response); /* Try to switch to a new state */ - send_occ_cmd(homer, OCC_CMD_SET_STATE, data, sizeof(data), NULL, &response_len); + send_occ_cmd(chip, homer, OCC_CMD_SET_STATE, data, sizeof(data), NULL, &response_len); /* Send poll to query state of all OCC and flush any errors */ - poll_occ(homer, /*flush_all_errors=*/true, &poll_response); + poll_occ(chip, homer, /*flush_all_errors=*/true, &poll_response); if (poll_response.state != state) die("State of OCC is 0x%02x instead of 0x%02x.\n", poll_response.state, state); } -static void set_occ_active_state(struct homer_st *homer) +static void set_occ_active_state(uint8_t chip, struct homer_st *homer) { enum { OCC_STATUS_ACTIVE_READY = 0x01, OCC_STATE_ACTIVE = 0x03, }; - wait_for_occ_status(homer, OCC_STATUS_ACTIVE_READY); - set_occ_state(homer, OCC_STATE_ACTIVE); + wait_for_occ_status(chip, homer, OCC_STATUS_ACTIVE_READY); + set_occ_state(chip, homer, OCC_STATE_ACTIVE); } -void activate_occ(struct homer_st *homer) +void activate_occ(uint8_t chip, struct homer_st *homer, bool is_master) { + /* TODO: Hostboot performs some steps below for every OCC before moving + * to the next step (looks like performing it for master OCC + * first), so will need to loop over OCCs for some steps. + * All this after starting PM complex for every chip outside + * of this function. */ + struct occ_poll_response poll_response; /* Make sure OCCs are ready for communication */ - wait_for_occ_checkpoint(); + wait_for_occ_checkpoint(chip); /* Send initial poll to all OCCs to establish communication */ - poll_occ(homer, /*flush_all_errors=*/false, &poll_response); + poll_occ(chip, homer, /*flush_all_errors=*/false, &poll_response); /* Send OCC's config data */ - send_occ_config_data(homer); + send_occ_config_data(chip, homer); /* Set the User PCAP */ - send_occ_user_power_cap(homer); + send_occ_user_power_cap(chip, homer); /* Switch for OCC to active state */ - set_occ_active_state(homer); + set_occ_active_state(chip, homer); /* Hostboot sets active sensors for all OCCs here, so BMC can start - communication with OCCs. */ + * communication with OCCs. */ } -void pm_occ_fir_init(void) +void pm_occ_fir_init(uint8_t chip) { enum { PERV_TP_OCC_SCOM_OCCLFIR = 0x01010800, @@ -1264,18 +1272,18 @@ void pm_occ_fir_init(void) | PPC_BIT(SRAM_WRITE_ERR) | PPC_BIT(SRT_FSM_ERR) | PPC_BIT(STOP_RCV_NOTIFY_PRD) | PPC_BIT(C405_ECC_UE); - uint64_t mask = read_scom(PERV_TP_OCC_SCOM_OCCLFIR + MASK_INCR); + uint64_t mask = read_rscom(chip, PERV_TP_OCC_SCOM_OCCLFIR + MASK_INCR); mask &= ~action0_bits; mask &= ~action1_bits; - write_scom(PERV_TP_OCC_SCOM_OCCLFIR, 0); - write_scom(PERV_TP_OCC_SCOM_OCCLFIR + ACTION0_INCR, action0_bits); - write_scom(PERV_TP_OCC_SCOM_OCCLFIR + ACTION1_INCR, action1_bits); - write_scom(PERV_TP_OCC_SCOM_OCCLFIR + MASK_WOR_INCR, mask); - write_scom(PERV_TP_OCC_SCOM_OCCLFIR + MASK_WAND_INCR, mask); + write_rscom(chip, PERV_TP_OCC_SCOM_OCCLFIR, 0); + write_rscom(chip, PERV_TP_OCC_SCOM_OCCLFIR + ACTION0_INCR, action0_bits); + write_rscom(chip, PERV_TP_OCC_SCOM_OCCLFIR + ACTION1_INCR, action1_bits); + write_rscom(chip, PERV_TP_OCC_SCOM_OCCLFIR + MASK_WOR_INCR, mask); + write_rscom(chip, PERV_TP_OCC_SCOM_OCCLFIR + MASK_WAND_INCR, mask); } -void pm_pba_fir_init(void) +void pm_pba_fir_init(uint8_t chip) { enum { PU_PBAFIR = 0x05012840, @@ -1343,9 +1351,9 @@ void pm_pba_fir_init(void) mask &= ~action0_bits; mask &= ~action1_bits; - write_scom(PU_PBAFIR, 0); - write_scom(PU_PBAFIR + ACTION0_INCR, action0_bits); - write_scom(PU_PBAFIR + ACTION1_INCR, action1_bits); - write_scom(PU_PBAFIR + MASK_WOR_INCR, mask); - write_scom(PU_PBAFIR + MASK_WAND_INCR, mask); + write_rscom(chip, PU_PBAFIR, 0); + write_rscom(chip, PU_PBAFIR + ACTION0_INCR, action0_bits); + write_rscom(chip, PU_PBAFIR + ACTION1_INCR, action1_bits); + write_rscom(chip, PU_PBAFIR + MASK_WOR_INCR, mask); + write_rscom(chip, PU_PBAFIR + MASK_WAND_INCR, mask); } From 7ab0c20fc3564e58d594bb5d6853d28414d3484a Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Sun, 21 Nov 2021 19:43:23 +0200 Subject: [PATCH 163/213] soc/power9/: prepare sending data to slave OCCs Change-Id: I104ed3e1acd518dc3a03d4483ee6b65ca1f45e4c Signed-off-by: Sergii Dmytruk --- src/include/cpu/power/occ.h | 2 +- src/soc/ibm/power9/homer.c | 3 +- src/soc/ibm/power9/occ.c | 97 +++++++++++++++++++++++++------------ 3 files changed, 68 insertions(+), 34 deletions(-) diff --git a/src/include/cpu/power/occ.h b/src/include/cpu/power/occ.h index fa7df9d6120..cd16a2a2b89 100644 --- a/src/include/cpu/power/occ.h +++ b/src/include/cpu/power/occ.h @@ -12,7 +12,7 @@ void clear_occ_special_wakeups(uint8_t chip, uint64_t cores); void special_occ_wakeup_disable(uint8_t chip, uint64_t cores); void occ_start_from_mem(uint8_t chip); /* Moves OCC to active state */ -void activate_occ(uint8_t chip, struct homer_st *homer, bool is_master); +void activate_occ(uint8_t chip, struct homer_st *homer); void pm_occ_fir_init(uint8_t chip); void pm_pba_fir_init(uint8_t chip); diff --git a/src/soc/ibm/power9/homer.c b/src/soc/ibm/power9/homer.c index d8816dd6276..9945bed5f56 100644 --- a/src/soc/ibm/power9/homer.c +++ b/src/soc/ibm/power9/homer.c @@ -1528,8 +1528,7 @@ static void istep_21_1(uint8_t chip, struct homer_st *homer, uint64_t cores) printk(BIOS_ERR, "Done starting PM complex\n"); printk(BIOS_ERR, "Activating OCC...\n"); - /* Note: only OCCs of chips connected to APSS can be masters */ - activate_occ(chip, homer, /*is_master=*/(chip == 0)); + activate_occ(chip, homer); printk(BIOS_ERR, "Done activating OCC\n"); } diff --git a/src/soc/ibm/power9/occ.c b/src/soc/ibm/power9/occ.c index eee9cac9fd9..dec8538f267 100644 --- a/src/soc/ibm/power9/occ.c +++ b/src/soc/ibm/power9/occ.c @@ -70,9 +70,16 @@ enum fir_offset { ACTION1_INCR = 7 }; +struct occ_cfg_inputs { + struct homer_st *homer; + uint8_t chip; + bool is_master_occ; +}; + struct occ_cfg_info { const char *name; - void (*func)(struct homer_st *homer, uint8_t *data, uint16_t *size); + void (*func)(const struct occ_cfg_inputs *inputs, uint8_t *data, uint16_t *size); + bool to_master_only; }; struct occ_poll_response { @@ -580,13 +587,13 @@ static void poll_occ(uint8_t chip, struct homer_st *homer, bool flush_all_errors } } -static void get_freq_point_msg_data(struct homer_st *homer, uint8_t *data, uint16_t *size) +static void get_freq_point_msg_data(const struct occ_cfg_inputs *inputs, + uint8_t *data, uint16_t *size) { enum { OCC_CFGDATA_FREQ_POINT_VERSION = 0x20 }; - OCCPstateParmBlock *oppb = (void *)homer->ppmr.occ_parm_block; + OCCPstateParmBlock *oppb = (void *)inputs->homer->ppmr.occ_parm_block; - /* TODO: don't hard-code chip number here */ - const struct voltage_bucket_data *bucket = get_voltage_data(/*chip=*/0); + const struct voltage_bucket_data *bucket = get_voltage_data(inputs->chip); uint16_t index = 0; uint16_t min_freq = 0; @@ -622,17 +629,22 @@ static void get_freq_point_msg_data(struct homer_st *homer, uint8_t *data, uint1 *size = index; } -static void get_occ_role_msg_data(struct homer_st *homer, uint8_t *data, uint16_t *size) +static void get_occ_role_msg_data(const struct occ_cfg_inputs *inputs, + uint8_t *data, uint16_t *size) { - enum { OCC_ROLE_MASTER = 0x01 }; + enum { + OCC_ROLE_SLAVE = 0x00, + OCC_ROLE_MASTER = 0x01, + }; data[0] = OCC_CFGDATA_OCC_ROLE; - data[1] = OCC_ROLE_MASTER; + data[1] = (inputs->is_master_occ ? OCC_ROLE_MASTER : OCC_ROLE_SLAVE); *size = 2; } -static void get_apss_msg_data(struct homer_st *homer, uint8_t *data, uint16_t *size) +static void get_apss_msg_data(const struct occ_cfg_inputs *inputs, + uint8_t *data, uint16_t *size) { enum { OCC_CFGDATA_APSS_VERSION = 0x20 }; @@ -691,7 +703,8 @@ static void get_apss_msg_data(struct homer_st *homer, uint8_t *data, uint16_t *s *size = index; } -static void get_mem_cfg_msg_data(struct homer_st *homer, uint8_t *data, uint16_t *size) +static void get_mem_cfg_msg_data(const struct occ_cfg_inputs *inputs, + uint8_t *data, uint16_t *size) { enum { OCC_CFGDATA_MEM_CONFIG_VERSION = 0x21 }; @@ -735,7 +748,8 @@ static void add_sensor_id(uint8_t *data, uint16_t *index, uint32_t sensor_id) #define VRM_VDD_CALLOUT_ID 0x8C #define VRM_VDD_TEMP_ID 0xFF -static void get_sys_cfg_msg_data(struct homer_st *homer, uint8_t *data, uint16_t *size) +static void get_sys_cfg_msg_data(const struct occ_cfg_inputs *inputs, + uint8_t *data, uint16_t *size) { enum { OCC_CFGDATA_SYS_CONFIG_VERSION = 0x21, @@ -791,7 +805,8 @@ static void get_sys_cfg_msg_data(struct homer_st *homer, uint8_t *data, uint16_t *size = index; } -static void get_thermal_ctrl_msg_data(struct homer_st *homer, uint8_t *data, uint16_t *size) +static void get_thermal_ctrl_msg_data(const struct occ_cfg_inputs *inputs, + uint8_t *data, uint16_t *size) { enum { OCC_CFGDATA_TCT_CONFIG_VERSION = 0x20, @@ -887,7 +902,8 @@ static void get_thermal_ctrl_msg_data(struct homer_st *homer, uint8_t *data, uin *size = index; } -static void get_power_cap_msg_data(struct homer_st *homer, uint8_t *data, uint16_t *size) +static void get_power_cap_msg_data(const struct occ_cfg_inputs *inputs, + uint8_t *data, uint16_t *size) { enum { OCC_CFGDATA_PCAP_CONFIG_VERSION = 0x20 }; @@ -925,7 +941,8 @@ static void get_power_cap_msg_data(struct homer_st *homer, uint8_t *data, uint16 *size = index; } -static void get_avs_bus_cfg_msg_data(struct homer_st *homer, uint8_t *data, uint16_t *size) +static void get_avs_bus_cfg_msg_data(const struct occ_cfg_inputs *inputs, + uint8_t *data, uint16_t *size) { enum { OCC_CFGDATA_AVSBUS_CONFIG_VERSION = 0x01 }; @@ -951,10 +968,10 @@ static void get_avs_bus_cfg_msg_data(struct homer_st *homer, uint8_t *data, uint *size = index; } -static void get_power_data(struct homer_st *homer, uint16_t *power_max, uint16_t *power_drop) +static void get_power_data(const struct occ_cfg_inputs *inputs, + uint16_t *power_max, uint16_t *power_drop) { - /* TODO: don't hard-code chip number here */ - const struct voltage_bucket_data *bucket = get_voltage_data(/*chip=*/0); + const struct voltage_bucket_data *bucket = get_voltage_data(inputs->chip); /* All processor chips (do not have to be functional) */ const uint8_t num_procs = 2; // from Hostboot log @@ -974,7 +991,7 @@ static void get_power_data(struct homer_st *homer, uint16_t *power_max, uint16_t *power_max = proc_socket_power * num_procs; *power_max += mem_power_min_throttles + misc_power; - OCCPstateParmBlock *oppb = (void *)homer->ppmr.occ_parm_block; + OCCPstateParmBlock *oppb = (void *)inputs->homer->ppmr.occ_parm_block; uint16_t min_freq_mhz = oppb->frequency_min_khz / 1000; const uint16_t mhz_per_watt = 28; // ATTR_PROC_MHZ_PER_WATT, from talos.xml /* Drop is always calculated from Turbo to Min (not ultra) */ @@ -985,7 +1002,7 @@ static void get_power_data(struct homer_st *homer, uint16_t *power_max, uint16_t *power_drop = proc_drop + memory_drop; } -static void get_gpu_msg_data(struct homer_st *homer, uint8_t *data, uint16_t *size) +static void get_gpu_msg_data(const struct occ_cfg_inputs *inputs, uint8_t *data, uint16_t *size) { enum { OCC_CFGDATA_GPU_CONFIG_VERSION = 0x01, @@ -1000,7 +1017,7 @@ static void get_gpu_msg_data(struct homer_st *homer, uint8_t *data, uint16_t *si data[index++] = OCC_CFGDATA_GPU_CONFIG; data[index++] = OCC_CFGDATA_GPU_CONFIG_VERSION; - get_power_data(homer, &power_max, &power_drop); + get_power_data(inputs, &power_max, &power_drop); memcpy(&data[index], &power_max, 2); // Total non-GPU max power (W) index += 2; @@ -1044,20 +1061,31 @@ static void get_gpu_msg_data(struct homer_st *homer, uint8_t *data, uint16_t *si static void send_occ_config_data(uint8_t chip, struct homer_st *homer) { + enum { + TO_ALL = 0, /* to_master_only = false */ + TO_MASTER = 1, /* to_master_only = true */ + }; + /* * Order in which these are sent is important! * Not every order works. */ struct occ_cfg_info cfg_info[] = { - { "System config", &get_sys_cfg_msg_data }, - { "APSS config", &get_apss_msg_data }, - { "OCC role", &get_occ_role_msg_data }, - { "Frequency points", &get_freq_point_msg_data }, - { "Memory config", &get_mem_cfg_msg_data }, - { "Power cap", &get_power_cap_msg_data }, - { "Thermal control", &get_thermal_ctrl_msg_data }, - { "AVS", &get_avs_bus_cfg_msg_data }, - { "GPU", &get_gpu_msg_data }, + { "System config", &get_sys_cfg_msg_data, TO_ALL }, + { "APSS config", &get_apss_msg_data, TO_ALL }, + { "OCC role", &get_occ_role_msg_data, TO_ALL }, + { "Frequency points", &get_freq_point_msg_data, TO_MASTER }, + { "Memory config", &get_mem_cfg_msg_data, TO_ALL }, + { "Power cap", &get_power_cap_msg_data, TO_MASTER }, + { "Thermal control", &get_thermal_ctrl_msg_data, TO_ALL }, + { "AVS", &get_avs_bus_cfg_msg_data, TO_ALL }, + { "GPU", &get_gpu_msg_data, TO_ALL }, + }; + + const struct occ_cfg_inputs inputs = { + .homer = homer, + .chip = chip, + .is_master_occ = (chip == 0), }; uint8_t i; @@ -1071,7 +1099,14 @@ static void send_occ_config_data(uint8_t chip, struct homer_st *homer) /* Poll is sent between configuration packets to flush errors */ struct occ_poll_response poll_response; - cfg_info[i].func(homer, data, &data_len); + /* + * Certain kinds of configuration data is broadcasted to slave + * OCCs by the master and must not be sent to them directly + */ + if (cfg_info[i].to_master_only && !inputs.is_master_occ) + continue; + + cfg_info[i].func(&inputs, data, &data_len); if (data_len > sizeof(data)) die("Buffer for OCC data is too small!\n"); @@ -1150,7 +1185,7 @@ static void set_occ_active_state(uint8_t chip, struct homer_st *homer) set_occ_state(chip, homer, OCC_STATE_ACTIVE); } -void activate_occ(uint8_t chip, struct homer_st *homer, bool is_master) +void activate_occ(uint8_t chip, struct homer_st *homer) { /* TODO: Hostboot performs some steps below for every OCC before moving * to the next step (looks like performing it for master OCC From 815ccaf9897f0975e2118c523fcbca7100267863 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Sun, 13 Feb 2022 16:25:29 +0200 Subject: [PATCH 164/213] soc/power9/homer.c: block CME startup on SGPE init for slave CPUs Otherwise SGPE won't start. Our initialization sequence is missing something which enables SCOMs to CMEs even for the master CPU (only two out of four work). This however isn't an issue as skiboot seems to reset SGPEs in a way that enables everything. Signed-off-by: Sergii Dmytruk --- src/soc/ibm/power9/homer.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/soc/ibm/power9/homer.c b/src/soc/ibm/power9/homer.c index 9945bed5f56..f52fef37d6a 100644 --- a/src/soc/ibm/power9/homer.c +++ b/src/soc/ibm/power9/homer.c @@ -2428,6 +2428,15 @@ static void istep_15_3(uint8_t chip, uint64_t cores) qcsr |= PPC_BIT(i); } write_rscom(chip, 0x0006C094, qcsr); + + if (chip != 0) { + /* + * PU_OCB_OCI_QSSR_SCOM2 (OR) + * Start no CMEs on slave CPUs (set bit implies stop state). + */ + write_rscom(chip, 0x0006C09A, PPC_BITMASK(0, 11) | /* CMEs */ + PPC_BITMASK(14, 19) /* EQs */); + } } /* From 46fbd763b2f649f26c36738424b0ad02fdeef0b2 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Sun, 13 Feb 2022 16:21:35 +0200 Subject: [PATCH 165/213] soc/power9/homer.c: execute 15.* isteps for each CPU Change-Id: I4904ae931652aac1be268fb4a0be66adcd0d22f7 Signed-off-by: Sergii Dmytruk --- src/soc/ibm/power9/homer.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/src/soc/ibm/power9/homer.c b/src/soc/ibm/power9/homer.c index f52fef37d6a..1a8406f1327 100644 --- a/src/soc/ibm/power9/homer.c +++ b/src/soc/ibm/power9/homer.c @@ -2625,14 +2625,28 @@ void build_homer_image(void *homer_bar, void *common_occ_area, uint64_t nominal_ nominal_freq[chip] = get_voltage_data(chip)->nominal.freq * MHz; } - setup_wakeup_mode(/*chip=*/0, cores[0]); + for (uint8_t chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) + setup_wakeup_mode(chip, cores[chip]); + } report_istep(15, 2); - istep_15_2(/*chip=*/0, &homer[0], common_occ_area); + for (uint8_t chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) + istep_15_2(chip, &homer[chip], common_occ_area); + } + report_istep(15, 3); - istep_15_3(/*chip=*/0, cores[0]); + for (uint8_t chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) + istep_15_3(chip, cores[chip]); + } + report_istep(15, 4); - istep_15_4(/*chip=*/0, cores[0]); + for (uint8_t chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) + istep_15_4(chip, cores[chip]); + } /* Boot OCC here and activate SGPE at the same time */ /* TODO: initialize OCC for the second CPU when it's present */ From daedca23a692253a193ae86cb14c92515eefc17e Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Sun, 13 Feb 2022 16:23:50 +0200 Subject: [PATCH 166/213] soc/power9/: re-organize istep 21.1 Loop over CPUs on each step except for those which target master OCC only. Change-Id: I3b9f7f09cffa2de5372b9d07c45d47cfd16bebc9 Signed-off-by: Sergii Dmytruk --- src/soc/ibm/power9/homer.c | 17 ++++++++++------ src/soc/ibm/power9/occ.c | 40 +++++++++++++++++++++----------------- 2 files changed, 33 insertions(+), 24 deletions(-) diff --git a/src/soc/ibm/power9/homer.c b/src/soc/ibm/power9/homer.c index 1a8406f1327..f156862f7bd 100644 --- a/src/soc/ibm/power9/homer.c +++ b/src/soc/ibm/power9/homer.c @@ -1519,16 +1519,22 @@ static void start_pm_complex(uint8_t chip, struct homer_st *homer, uint64_t core write_rscom(chip, PU_OCB_OCI_OCCFLG2_CLEAR, PPC_BIT(STOP_RECOVERY_TRIGGER_ENABLE)); } -static void istep_21_1(uint8_t chip, struct homer_st *homer, uint64_t cores) +static void istep_21_1(uint8_t chips, struct homer_st *homers, const uint64_t *cores) { - load_pm_complex(chip, homer); + for (uint8_t chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) + load_pm_complex(chip, &homers[chip]); + } printk(BIOS_ERR, "Starting PM complex...\n"); - start_pm_complex(chip, homer, cores); + for (uint8_t chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) + start_pm_complex(chip, &homers[chip], cores[chip]); + } printk(BIOS_ERR, "Done starting PM complex\n"); printk(BIOS_ERR, "Activating OCC...\n"); - activate_occ(chip, homer); + activate_occ(chips, homers); printk(BIOS_ERR, "Done activating OCC\n"); } @@ -2649,8 +2655,7 @@ void build_homer_image(void *homer_bar, void *common_occ_area, uint64_t nominal_ } /* Boot OCC here and activate SGPE at the same time */ - /* TODO: initialize OCC for the second CPU when it's present */ - istep_21_1(/*chip=*/0, homer, cores[0]); + istep_21_1(chips, homer, cores); istep_16_1(this_core); } diff --git a/src/soc/ibm/power9/occ.c b/src/soc/ibm/power9/occ.c index dec8538f267..38d83914815 100644 --- a/src/soc/ibm/power9/occ.c +++ b/src/soc/ibm/power9/occ.c @@ -1185,33 +1185,37 @@ static void set_occ_active_state(uint8_t chip, struct homer_st *homer) set_occ_state(chip, homer, OCC_STATE_ACTIVE); } -void activate_occ(uint8_t chip, struct homer_st *homer) +void activate_occ(uint8_t chips, struct homer_st *homers) { - /* TODO: Hostboot performs some steps below for every OCC before moving - * to the next step (looks like performing it for master OCC - * first), so will need to loop over OCCs for some steps. - * All this after starting PM complex for every chip outside - * of this function. */ - - struct occ_poll_response poll_response; - /* Make sure OCCs are ready for communication */ - wait_for_occ_checkpoint(chip); + for (uint8_t chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) + wait_for_occ_checkpoint(chip); + } /* Send initial poll to all OCCs to establish communication */ - poll_occ(chip, homer, /*flush_all_errors=*/false, &poll_response); + for (uint8_t chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) { + struct occ_poll_response poll_response; + poll_occ(chip, &homers[chip], /*flush_all_errors=*/false, + &poll_response); + } + } /* Send OCC's config data */ - send_occ_config_data(chip, homer); + for (uint8_t chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) + send_occ_config_data(chip, &homers[chip]); + } - /* Set the User PCAP */ - send_occ_user_power_cap(chip, homer); + /* Set the User PCAP (sent only to master OCC) */ + send_occ_user_power_cap(/*chip=*/0, &homers[0]); - /* Switch for OCC to active state */ - set_occ_active_state(chip, homer); + /* Switch for OCC to active state (sent only to master OCC) */ + set_occ_active_state(/*chip=*/0, &homers[0]); - /* Hostboot sets active sensors for all OCCs here, so BMC can start - * communication with OCCs. */ + /* TODO: Hostboot sets active sensors for all OCCs here, so BMC can start + communication with OCCs. */ } void pm_occ_fir_init(uint8_t chip) From 184755996cf920aa4295f28e5319bb97fade3a57 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Wed, 9 Mar 2022 02:10:47 +0200 Subject: [PATCH 167/213] src/power9/: read OCC partition only once Change-Id: If3acd996d74406ed20a7f1ab143e007014345419 Signed-off-by: Sergii Dmytruk --- src/mainboard/raptor-cs/talos-2/Kconfig | 4 ++ src/mainboard/raptor-cs/talos-2/memlayout.ld | 2 +- src/soc/ibm/power9/homer.c | 44 +++++++++----------- 3 files changed, 25 insertions(+), 25 deletions(-) diff --git a/src/mainboard/raptor-cs/talos-2/Kconfig b/src/mainboard/raptor-cs/talos-2/Kconfig index e719dc86f80..62148ff0cd4 100644 --- a/src/mainboard/raptor-cs/talos-2/Kconfig +++ b/src/mainboard/raptor-cs/talos-2/Kconfig @@ -41,6 +41,10 @@ config MAINBOARD_DIR string default "raptor-cs/talos-2" +config HEAP_SIZE + hex + default 0x200000 + config MAINBOARD_PART_NUMBER string default "Talos II" diff --git a/src/mainboard/raptor-cs/talos-2/memlayout.ld b/src/mainboard/raptor-cs/talos-2/memlayout.ld index 7eb2cbe67c1..e4422b63e97 100644 --- a/src/mainboard/raptor-cs/talos-2/memlayout.ld +++ b/src/mainboard/raptor-cs/talos-2/memlayout.ld @@ -75,5 +75,5 @@ SECTIONS */ CBFS_CACHE( 0xF8380000, 6M) - RAMSTAGE( 0xF9000000, 2M) + RAMSTAGE( 0xF9000000, 3M) } diff --git a/src/soc/ibm/power9/homer.c b/src/soc/ibm/power9/homer.c index f156862f7bd..94b1c236514 100644 --- a/src/soc/ibm/power9/homer.c +++ b/src/soc/ibm/power9/homer.c @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -1143,7 +1144,8 @@ static void load_pm_complex(uint8_t chip, struct homer_st *homer) * shouldn't be necessary. */ - load_occ_image_to_homer(homer); + /* OCC image is pre-loaded for us earlier */ + load_host_data_to_homer(chip, homer); } @@ -2262,7 +2264,8 @@ const struct voltage_bucket_data * get_voltage_data(uint8_t chip) return bucket; } -static void layout_rings(uint8_t chip, struct homer_st *homer, uint8_t dd, uint64_t cores) +static void layout_rings(uint8_t chip, struct homer_st *homer, struct xip_hw_header *hw, + uint8_t dd, uint64_t cores) { static uint8_t rings_buf[300 * KiB]; @@ -2277,7 +2280,6 @@ static void layout_rings(uint8_t chip, struct homer_st *homer, uint8_t dd, uint6 .work_buf3 = work_buf3, .work_buf3_size = sizeof(work_buf3), }; - struct xip_hw_header *hw = (void *)homer; enum ring_variant ring_variant = (dd < 0x23 ? RV_BASE : RV_RL4); get_ppe_scan_rings(chip, hw, dd, PT_CME, &ring_data); @@ -2290,7 +2292,7 @@ static void layout_rings(uint8_t chip, struct homer_st *homer, uint8_t dd, uint6 ring_data.work_buf3_size = sizeof(work_buf3); get_ppe_scan_rings(chip, hw, dd, PT_SGPE, &ring_data); layout_rings_for_sgpe(chip, homer, &ring_data, - (struct xip_sgpe_header *)((uint8_t *)homer + hw->sgpe.offset), + (struct xip_sgpe_header *)((uint8_t *)hw + hw->sgpe.offset), cores, ring_variant); } @@ -2321,8 +2323,8 @@ static void set_fabric_ids(uint8_t chip, struct homer_st *homer) sgpe_hdr->addr_extension = 0; } -static void fill_homer_for_chip(uint8_t chip, struct homer_st *homer, uint8_t dd, - uint64_t cores) +static void fill_homer_for_chip(uint8_t chip, struct homer_st *homer, struct xip_hw_header *hw, + uint8_t dd, uint64_t cores) { enum { CME_QM_FLAG_SYS_WOF_ENABLE = 0x1000, @@ -2333,7 +2335,7 @@ static void fill_homer_for_chip(uint8_t chip, struct homer_st *homer, uint8_t dd uint16_t qm_mode_flags; uint16_t pgpe_flags; - layout_rings(chip, homer, dd, cores); + layout_rings(chip, homer, hw, dd, cores); build_parameter_blocks(chip, homer, cores); update_headers(chip, homer, cores); @@ -2556,13 +2558,14 @@ void build_homer_image(void *homer_bar, void *common_occ_area, uint64_t nominal_ struct mmap_helper_region_device mdev = {0}; struct homer_st *homer = homer_bar; - struct xip_hw_header *hw = homer_bar; uint8_t dd = get_dd(); // XXX: does this need to be chip-specific? int this_core = -1; uint64_t cores[MAX_CHIPS] = { get_available_cores(0, &this_core), (chips & 0x02) ? get_available_cores(1, NULL) : 0, }; + struct xip_hw_header *hw = xmalloc(1 * MiB); + uint8_t *hw_addr = (void *)hw; if (this_core == -1) die("Couldn't found active core\n"); @@ -2581,33 +2584,24 @@ void build_homer_image(void *homer_bar, void *common_occ_area, uint64_t nominal_ * not NULL. */ mount_part_from_pnor("HCODE", &mdev); - /* - * First MB of HOMER is unused at first, we can write OCC image from PNOR there. - * TODO: try putting HCODE somewhere else and load OCC host area right here. - */ rdev_readat(&mdev.rdev, hw, 0, 1 * MiB); assert(hw->magic == XIP_MAGIC_HW); assert(hw->image_size <= 1 * MiB); - build_sgpe(homer, (struct xip_sgpe_header *)(homer_bar + hw->sgpe.offset), - dd); - - build_self_restore(homer, - (struct xip_restore_header *)(homer_bar + hw->restore.offset), + build_sgpe(homer, (struct xip_sgpe_header *)(hw_addr + hw->sgpe.offset), dd); + build_self_restore(homer, (struct xip_restore_header *)(hw_addr + hw->restore.offset), dd, cores[0]); + build_cme(homer, (struct xip_cme_header *)(hw_addr + hw->cme.offset), dd); + build_pgpe(homer, (struct xip_pgpe_header *)(hw_addr + hw->pgpe.offset), dd); - build_cme(homer, (struct xip_cme_header *)(homer_bar + hw->cme.offset), dd); - - build_pgpe(homer, (struct xip_pgpe_header *)(homer_bar + hw->pgpe.offset), - dd); + load_occ_image_to_homer(homer); /* * Until this point, only self restore part is CPU specific, use current * state of the first HOMER image as a base for the second one. */ if (chips & 0x02) { - uint8_t *homer_bar2 = (void *)&homer[1]; struct cme_img_header *hdr; memcpy(&homer[1], &homer[0], sizeof(*homer)); @@ -2619,7 +2613,7 @@ void build_homer_image(void *homer_bar, void *common_occ_area, uint64_t nominal_ /* Override data from the other CPU */ build_self_restore(&homer[1], - (struct xip_restore_header *)(homer_bar2 + hw->restore.offset), + (struct xip_restore_header *)(hw_addr + hw->restore.offset), dd, cores[1]); } @@ -2627,10 +2621,12 @@ void build_homer_image(void *homer_bar, void *common_occ_area, uint64_t nominal_ if (!(chips & (1 << chip))) continue; - fill_homer_for_chip(chip, &homer[chip], dd, cores[chip]); + fill_homer_for_chip(chip, &homer[chip], hw, dd, cores[chip]); nominal_freq[chip] = get_voltage_data(chip)->nominal.freq * MHz; } + free(hw); + for (uint8_t chip = 0; chip < MAX_CHIPS; chip++) { if (chips & (1 << chip)) setup_wakeup_mode(chip, cores[chip]); From 83347bc885509a29d5719c818df142e6b9b4dbf0 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Tue, 15 Feb 2022 23:40:50 +0200 Subject: [PATCH 168/213] soc/power9/istep_18_11.c: update for second CPU Change-Id: I6612e72b1a44e9bd50d51a10ee8632a016a89588 Signed-off-by: Sergii Dmytruk --- src/include/cpu/power/istep_18.h | 4 +- src/soc/ibm/power9/chip.c | 2 +- src/soc/ibm/power9/istep_18_11.c | 312 +++++++++++++++++++++++-------- 3 files changed, 236 insertions(+), 82 deletions(-) diff --git a/src/include/cpu/power/istep_18.h b/src/include/cpu/power/istep_18.h index 3e9c154f75b..344d9bdc408 100644 --- a/src/include/cpu/power/istep_18.h +++ b/src/include/cpu/power/istep_18.h @@ -3,7 +3,9 @@ #ifndef CPU_PPC64_ISTEP18_H #define CPU_PPC64_ISTEP18_H -void istep_18_11(void); +#include + +void istep_18_11(uint8_t chips); void istep_18_12(void); #endif /* CPU_PPC64_ISTEP18_H */ diff --git a/src/soc/ibm/power9/chip.c b/src/soc/ibm/power9/chip.c index d1f9120dcf7..30594b22447 100644 --- a/src/soc/ibm/power9/chip.c +++ b/src/soc/ibm/power9/chip.c @@ -445,7 +445,7 @@ static void enable_soc_dev(struct device *dev) build_homer_image((void *)(top * 1024), (void *)(occ_area * 1024), nominal_freq); rng_init(); - istep_18_11(); + istep_18_11(chips); istep_18_12(); } diff --git a/src/soc/ibm/power9/istep_18_11.c b/src/soc/ibm/power9/istep_18_11.c index e1453e2e80f..3ef7ad89500 100644 --- a/src/soc/ibm/power9/istep_18_11.c +++ b/src/soc/ibm/power9/istep_18_11.c @@ -113,9 +113,6 @@ // Slave path-01: remote sync: maximum of SYNC miss counts: 0 - 255 syncs. #define PERV_TOD_S_PATH_CTRL_REG 0x00040005 -/* TODO: this one will change if we add more than one processor */ -#define MDMT (1) - /* * 2 CPU topology * @@ -128,105 +125,182 @@ * CHIP0 <--RX-- XBUS1 <-- XBUS1 <--TX-- CHIP1 */ -static uint32_t calculate_topology_delay(void) +static uint32_t calculate_topology_delay(uint8_t chip, uint8_t chips, uint8_t mdmt) { + enum { XBUS_LINK_FACTOR = 8 }; + + uint32_t delay; + /* - * In simple topology with one proc it is enough to assign 0. + * In simple topology with one proc it is enough to assign node delay to 0. * With multiple processors this will get more complicated, * see calculate_node_link_delay() in Hostboot */ - - if(MDMT) + if (chips == 0x01) return MDMT_TOD_GRID_CYCLE_STAGING_DELAY; - /* TODO: check again if this really is write, not RMW */ - write_scom(PU_PB_ELINK_RT_DELAY_CTL_REG, PPC_BITMASK(2, 3)); - uint64_t l_bus_mode_reg = read_scom(PU_PB_ELINK_DLY_0123_REG); + /* The only non-trivial topology supported is a two-node one */ + if (chips != 0x03) + die("%s() supports only two-chip configuration\n", __func__); + + /* There are no nodes connected to non-MDMT chip, hence no delay */ + if (chip != mdmt) + return 0; + + /* + * Find the most-delayed path in the topology; this is the MDMT's delay. + * + * A more advanced topologies would require building a tree and finding + * the longest/slowest path from top node to one of the leaves, which in + * our special case equals to delay to the next chip. + */ + + /* For some reason this is a write, not RMW */ + write_rscom(chip, PU_PB_ELINK_RT_DELAY_CTL_REG, PPC_BITMASK(2, 3)); + uint64_t bus_mode_reg = read_rscom(chip, PU_PB_ELINK_DLY_0123_REG); - uint32_t bus_delay = ((l_bus_mode_reg & BUS_DELAY_47) >> 16) + - (l_bus_mode_reg & BUS_DELAY_63); + uint32_t bus_delay = (((bus_mode_reg & BUS_DELAY_47) >> 16) + + (bus_mode_reg & BUS_DELAY_63)) / 2; + /* + * By default, the TOD grid runs at 400ps; TOD counts its delay based on this. + * + * Example: Bus round trip delay is 35 cycles and the bus is running at 4800MHz + * - Divide by 2 to get one-way delay time + * - Divide by 4800 * 10^6 to get delay in seconds + * - Multiply by 10^12 to get delay in picoseconds + * - Divide by 400ps to get TOD-grid-cycles + * - (To avoid including math.h) Add 1 and cast to uint32_t to round up to nearest TOD-grid-cycle + * - (To avoid including math.h) 10^12/10^6=1000000 + * - (uint32_t)(( 35 / 2 / (4800 * 10^6) * 10^12 / 400 ) + 1) = 10 TOD-grid-cycles + */ /* * FIXME: floating point wasn't fully configured, see if we can skip it. * Testing requires bigger topology, i.e. more CPUs. */ - return (uint32_t)(1 + ((double)(bus_delay * 8 * 1000000) - / (double)(4 * FREQ_X_MHZ * TOD_GRID_PS))); + delay = (uint32_t)(1 + ((double)(bus_delay * 1000000) + / (double)(2 * XBUS_LINK_FACTOR * FREQ_X_MHZ * TOD_GRID_PS))); + + /* The MDMT delay must include additional TOD-grid-cycles to account for + * staging latches in slaves */ + if (chip == mdmt) + delay += MDMT_TOD_GRID_CYCLE_STAGING_DELAY; + + return delay; } -static void calculate_m_path(void) +static void calculate_m_path(uint8_t chip, uint8_t mdmt) { uint64_t dual_edge_disable = - (read_scom(PERV_ROOT_CTRL8_SCOM) & PPC_BIT(PERV_ROOT_CTRL8_TP_PLL_CLKIN_SEL9_DC)) + (read_rscom(chip, PERV_ROOT_CTRL8_SCOM) & + PPC_BIT(PERV_ROOT_CTRL8_TP_PLL_CLKIN_SEL9_DC)) ? PPC_BIT(PERV_TOD_M_PATH_CTRL_REG_STEP_CREATE_DUAL_EDGE_DISABLE) : 0; - if(MDMT) { - scom_and_or(PERV_TOD_M_PATH_CTRL_REG, - ~(PPC_BIT(M_PATH_0_OSC_NOT_VALID) | - PPC_BIT(M_PATH_0_STEP_ALIGN_DISABLE) | - PPC_BIT(PERV_TOD_M_PATH_CTRL_REG_STEP_CREATE_DUAL_EDGE_DISABLE) | - PPC_PLACE(0x7, M_PATH_0_STEP_CHECK_VALIDITY_COUNT_OFFSET, - M_PATH_0_STEP_CHECK_VALIDITY_COUNT_LEN) | - PPC_PLACE(0x7, M_PATH_SYNC_CREATE_SPS_SELECT_OFFSET, - M_PATH_SYNC_CREATE_SPS_SELECT_LEN) | - PPC_PLACE(0xF, M_PATH_0_STEP_CHECK_CPS_DEVIATION_OFFSET, - M_PATH_0_STEP_CHECK_CPS_DEVIATION_LEN) | - PPC_PLACE(0x3, M_PATH_STEP_CHECK_CPS_DEVIATION_FACTOR_OFFSET, - M_PATH_STEP_CHECK_CPS_DEVIATION_FACTOR_LEN)), - PPC_BIT(M_PATH_1_OSC_NOT_VALID) | - PPC_PLACE(0x8, M_PATH_0_STEP_CHECK_CPS_DEVIATION_OFFSET, - M_PATH_0_STEP_CHECK_CPS_DEVIATION_LEN) | - PPC_PLACE(0x3, M_PATH_0_STEP_CHECK_VALIDITY_COUNT_OFFSET, - M_PATH_0_STEP_CHECK_VALIDITY_COUNT_LEN) | - dual_edge_disable); + if (chip == mdmt) { + rscom_and_or(chip, PERV_TOD_M_PATH_CTRL_REG, + ~(PPC_BIT(M_PATH_0_OSC_NOT_VALID) | + PPC_BIT(M_PATH_0_STEP_ALIGN_DISABLE) | + PPC_BIT(PERV_TOD_M_PATH_CTRL_REG_STEP_CREATE_DUAL_EDGE_DISABLE) | + PPC_PLACE(0x7, M_PATH_0_STEP_CHECK_VALIDITY_COUNT_OFFSET, + M_PATH_0_STEP_CHECK_VALIDITY_COUNT_LEN) | + PPC_PLACE(0x7, M_PATH_SYNC_CREATE_SPS_SELECT_OFFSET, + M_PATH_SYNC_CREATE_SPS_SELECT_LEN) | + PPC_PLACE(0xF, M_PATH_0_STEP_CHECK_CPS_DEVIATION_OFFSET, + M_PATH_0_STEP_CHECK_CPS_DEVIATION_LEN) | + PPC_PLACE(0x3, M_PATH_STEP_CHECK_CPS_DEVIATION_FACTOR_OFFSET, + M_PATH_STEP_CHECK_CPS_DEVIATION_FACTOR_LEN)), + PPC_BIT(M_PATH_1_OSC_NOT_VALID) | + PPC_PLACE(0x8, M_PATH_0_STEP_CHECK_CPS_DEVIATION_OFFSET, + M_PATH_0_STEP_CHECK_CPS_DEVIATION_LEN) | + PPC_PLACE(0x3, M_PATH_0_STEP_CHECK_VALIDITY_COUNT_OFFSET, + M_PATH_0_STEP_CHECK_VALIDITY_COUNT_LEN) | + dual_edge_disable); } else { - scom_and_or(PERV_TOD_M_PATH_CTRL_REG, - ~PPC_BIT(PERV_TOD_M_PATH_CTRL_REG_STEP_CREATE_DUAL_EDGE_DISABLE), - dual_edge_disable); + rscom_and_or(chip, PERV_TOD_M_PATH_CTRL_REG, + ~PPC_BIT(PERV_TOD_M_PATH_CTRL_REG_STEP_CREATE_DUAL_EDGE_DISABLE), + dual_edge_disable); } } -void istep_18_11(void) +static void configure_tod(uint8_t chip, uint8_t chips, uint8_t pri_mdmt, uint8_t sec_mdmt) { - printk(BIOS_EMERG, "starting istep 18.11\n"); - report_istep(18, 11); + uint32_t topology_delay = calculate_topology_delay(chip, chips, pri_mdmt); /* Clear previous primary topology */ - write_scom(PERV_TOD_PRI_PORT_0_CTRL_REG, 0); - write_scom(PERV_TOD_SEC_PORT_0_CTRL_REG, 0); + write_rscom(chip, PERV_TOD_PRI_PORT_0_CTRL_REG, 0); + write_rscom(chip, PERV_TOD_SEC_PORT_0_CTRL_REG, 0); /* Workaround for HW480181: Init remote sync checker tolerance to maximum * [26-27] REMOTE_SYNC_CHECK_CPS_DEVIATION_FACTOR = 0x3 (factor 8) * [28-31] REMOTE_SYNC_CHECK_CPS_DEVIATION = 0xF (93.75%) */ - scom_or(PERV_TOD_S_PATH_CTRL_REG, PPC_PLACE(0x3, 26, 2) | PPC_PLACE(0xF, 28, 4)); + rscom_or(chip, PERV_TOD_S_PATH_CTRL_REG, PPC_PLACE(0x3, 26, 2) | PPC_PLACE(0xF, 28, 4)); /* - * Set PSS_MSS_CTRL_REG for primary configuration, assumptions: - * - MDMT = 1 + * Set PSS_MSS_CTRL_REG for primary configuration, assumption: * - valid oscillator is attached to master path-0, but not path-1 * [0] PRI_M_PATH_SELECT = 0 (path-0 selected) * [1] PRI_M_S_TOD_SELECT = 1 (TOD is master) * [2] PRI_M_S_DRAWER_SELECT = 1 (drawer is master) */ - scom_and_or(PERV_TOD_PSS_MSS_CTRL_REG, ~PPC_BIT(0), - PPC_BIT(1) | PPC_BIT(2)); + if (chip == pri_mdmt) { + rscom_and_or(chip, PERV_TOD_PSS_MSS_CTRL_REG, + ~PPC_BIT(0), PPC_BIT(1) | PPC_BIT(2)); + } else { + rscom_and(chip, PERV_TOD_PSS_MSS_CTRL_REG, ~PPC_BIT(1)); + } - /* Configure PORT_CTRL_REGs (primary) */ /* - * TODO: this touches XBUS/OBUS, but Hostboot doesn't modify registers so - * skip it for now and fix if needed. + * Set CPS deviation to 75% (CPS deviation bits = 0xC, factor=1), + * 8 valid steps to enable step check. + * + * [0] S_PATH_CTRL_REG_PRI_SELECT = 0 (slave path 0) + * [6-7] S_PATH_CTRL_REG_STEP_CHECK_CPS_DEVIATION_FACTOR = 0 (factor = 1) + * [8-11] S_PATH_CTRL_REG_0_STEP_CHECK_CPS_DEVIATION = 0xC (75%) + * [13-15] S_PATH_CTRL_REG_0_STEP_CHECK_VALIDITY_COUNT = 3 (8 valid steps) + * [26-27] S_PATH_CTRL_REG_REMOTE_SYNC_CHECK_CPS_DEVIATION_FACTOR = 0 (factor = 1) + * [28-31] S_PATH_CTRL_REG_REMOTE_SYNC_CHECK_CPS_DEVIATION = 0xC (75%) + * [32-39] TOD_S_PATH_CTRL_REG_REMOTE_SYNC_MISS_COUNT_2 = 0x5 */ - //scom_and_or(PERV_TOD_PRI_PORT_0_CTRL_REG, ???, ???); - //scom_and_or(PERV_TOD_SEC_PORT_0_CTRL_REG, ???, ???); + if (chip != pri_mdmt) { + rscom_and_or(chip, PERV_TOD_S_PATH_CTRL_REG, + ~(PPC_BIT(0) | PPC_BITMASK(6, 11) | PPC_BITMASK(13, 15) | + PPC_BITMASK(26, 39)), + PPC_PLACE(0xC, 8, 4) | PPC_PLACE(0x3, 13, 3) | + PPC_PLACE(0xC, 28, 4) | PPC_PLACE(0x5, 32, 8)); + } - /* Configure M_PATH_CTRL_REG */ + /* + * Configure PORT_CTRL_REGs (primary), assumptions: + * - XBUS1 link is used on both chips + * - oscillator connected to OSC0 and not to OSC1 + * + * [0-2] PRI_PORT_0_CTRL_REG_RX_SELECT = 1 (link number) + * [6-7] TOD_PORT_CTRL_REG_TX_X1_SEL = TX sel + * [21] TOD_PORT_CTRL_REG_TX_X1_EN = enable + */ + if (chips != 0x01) { + /* MDMT has no RX */ + int rx_sel = (chip == pri_mdmt ? 0 : 1); + /* Only MDMT has TX */ + int tx_sel = (chip == pri_mdmt ? 2 /*m_path_0*/ : 0); + uint64_t tx_en = (chip == pri_mdmt ? PPC_BIT(21) : 0); + + rscom_and_or(chip, PERV_TOD_PRI_PORT_0_CTRL_REG, + ~(PPC_BITMASK(0, 2) | PPC_BITMASK(6, 7) | PPC_BIT(21)), + PPC_PLACE(rx_sel, 0, 3) | PPC_PLACE(tx_sel, 6, 2) | tx_en); + rscom_and_or(chip, PERV_TOD_SEC_PORT_0_CTRL_REG, + ~(PPC_BITMASK(0, 2) | PPC_BITMASK(6, 7) | PPC_BIT(21)), + PPC_PLACE(rx_sel, 0, 3) | PPC_PLACE(tx_sel, 6, 2) | tx_en); + } + + /* Configure M_PATH_CTRL_REG (primary) */ /* * TODO: check this again. Value is correct, not sure whether fields are * correctly cleared. Also comment the values written. */ - calculate_m_path(); + calculate_m_path(chip, pri_mdmt); /* Configure I_PATH_CTRL_REG (primary) */ /* PERV_TOD_PRI_PORT_0_CTRL_REG: @@ -238,11 +312,11 @@ void istep_18_11(void) * [8-11] I_PATH_STEP_CHECK_CPS_DEVIATION = 0xF (93.75%) * [13-15] I_PATH_STEP_CHECK_VALIDITY_COUNT = 0x3 (count = 8) */ - scom_and_or(PERV_TOD_PRI_PORT_0_CTRL_REG, ~PPC_BITMASK(32, 39), - PPC_PLACE(calculate_topology_delay(), 32, 8)); - scom_and_or(PERV_TOD_I_PATH_CTRL_REG, - ~(PPC_BIT(0) | PPC_BIT(1) | PPC_BITMASK(6, 11) | PPC_BITMASK(13, 15)), - PPC_PLACE(0xF, 8, 4) | PPC_PLACE(0x3, 13, 3)); + rscom_and_or(chip, PERV_TOD_PRI_PORT_0_CTRL_REG, + ~PPC_BITMASK(32, 39), PPC_PLACE(topology_delay, 32, 8)); + rscom_and_or(chip, PERV_TOD_I_PATH_CTRL_REG, + ~(PPC_BIT(0) | PPC_BIT(1) | PPC_BITMASK(6, 11) | PPC_BITMASK(13, 15)), + PPC_PLACE(0xF, 8, 4) | PPC_PLACE(0x3, 13, 3)); /* Configure INIT_CHIP_CTRL_REG (primary) */ /* [1-3] I_PATH_CORE_SYNC_PERIOD_SELECT = 0 (core sync period is 8us) @@ -253,32 +327,75 @@ void istep_18_11(void) * [10-15] LOW_ORDER_STEP_VALUE = 0x3F (4-bit WOF counter is incremented with each 200MHz clock cycle) * [30] XSTOP_GATE = 0 (stop TOD on checkstop) */ - scom_and_or(PERV_TOD_CHIP_CTRL_REG, - ~(PPC_BITMASK(1, 4) | PPC_BITMASK(7, 15) | PPC_BIT(30)), - PPC_PLACE(0x3F, 10, 6)); + rscom_and_or(chip, PERV_TOD_CHIP_CTRL_REG, + ~(PPC_BITMASK(1, 4) | PPC_BITMASK(7, 15) | PPC_BIT(30)), + PPC_PLACE(0x3F, 10, 6)); /* TODO: test if we can skip repeated writes (M_PATH, I_PATH, CHIP) */ + topology_delay = calculate_topology_delay(chip, chips, sec_mdmt); + /* Clear previous secondary topology */ /* NOTE: order is swapped wrt primary, does it matter? */ - write_scom(PERV_TOD_SEC_PORT_1_CTRL_REG, 0); - write_scom(PERV_TOD_PRI_PORT_1_CTRL_REG, 0); + write_rscom(chip, PERV_TOD_SEC_PORT_1_CTRL_REG, 0); + write_rscom(chip, PERV_TOD_PRI_PORT_1_CTRL_REG, 0); /* * Set PSS_MSS_CTRL_REG for secondary configuration, assumptions as before + * [0] PRI_M_PATH_SELECT = 0 (path-0 selected) (SW440224) * [8] SEC_M_PATH_SELECT = 0 (path-0 selected) * [9] SEC_M_S_TOD_SELECT = 1 (TOD is master) * [10] SEC_M_S_DRAWER_SELECT = 1 (drawer is master) */ - scom_and_or(PERV_TOD_PSS_MSS_CTRL_REG, ~PPC_BIT(8), - PPC_BIT(9) | PPC_BIT(10)); + if (chip == sec_mdmt) { + rscom_and_or(chip, PERV_TOD_PSS_MSS_CTRL_REG, ~(PPC_BIT(0) | PPC_BIT(8)), + PPC_BIT(9) | PPC_BIT(10)); + } else { + rscom_and(chip, PERV_TOD_PSS_MSS_CTRL_REG, ~PPC_BIT(9)); + } + + /* + * Set CPS deviation to 75% (CPS deviation bits = 0xC, factor=1), + * 8 valid steps to enable step check. + * + * [0] S_PATH_CTRL_REG_PRI_SELECT = 0 (slave path 0) + * [4] S_PATH_CTRL_REG_SEC_SELECT = 1 (slave path 1) + * [6-7] S_PATH_CTRL_REG_STEP_CHECK_CPS_DEVIATION_FACTOR = 0 (factor = 1) + * [16-19] S_PATH_CTRL_REG_1_STEP_CHECK_CPS_DEVIATION = 0xC (75%) + * [21-23] PERV_TOD_S_PATH_CTRL_REG_1_STEP_CHECK_VALIDITY_COUNT = 3 (8 valid steps) + * [32-39] TOD_S_PATH_CTRL_REG_REMOTE_SYNC_MISS_COUNT_2 = 0x5 + */ + if (chip != sec_mdmt) { + rscom_and_or(chip, PERV_TOD_S_PATH_CTRL_REG, + ~(PPC_BIT(0) | PPC_BITMASK(6, 7) | PPC_BITMASK(16, 19) | + PPC_BITMASK(21, 23) | PPC_BITMASK(32, 39)), + PPC_BIT(4) | PPC_PLACE(0xC, 16, 4) | PPC_PLACE(0x3, 21, 3) | + PPC_PLACE(0x5, 32, 8)); + } - /* Configure PORT_CTRL_REGs (secondary) */ - //scom_and_or(PERV_TOD_SEC_PORT_1_CTRL_REG, ???, ???); - //scom_and_or(PERV_TOD_PRI_PORT_1_CTRL_REG, ???, ???); + /* Configure PORT_CTRL_REGs (secondary), assumptions as above. + * + * [0-2] PRI_PORT_0_CTRL_REG_RX_SELECT = 1 (link number) + * [6-7] TOD_PORT_CTRL_REG_TX_X1_SEL = TX sel + * [21] TOD_PORT_CTRL_REG_TX_X1_EN = enable + */ + if (chips != 0x01) { + /* MDMT has no RX */ + int rx_sel = (chip == sec_mdmt ? 0 : 1); + /* Only MDMT has TX */ + int tx_sel = (chip == sec_mdmt ? 2 /*m_path_0*/ : 0); + uint64_t tx_en = (chip == sec_mdmt ? PPC_BIT(21) : 0); + + rscom_and_or(chip, PERV_TOD_PRI_PORT_1_CTRL_REG, + ~(PPC_BITMASK(0, 2) | PPC_BITMASK(6, 7) | PPC_BIT(21)), + PPC_PLACE(rx_sel, 0, 3) | PPC_PLACE(tx_sel, 6, 2) | tx_en); + rscom_and_or(chip, PERV_TOD_SEC_PORT_1_CTRL_REG, + ~(PPC_BITMASK(0, 2) | PPC_BITMASK(6, 7) | PPC_BIT(21)), + PPC_PLACE(rx_sel, 0, 3) | PPC_PLACE(tx_sel, 6, 2) | tx_en); + } - /* Configure M_PATH_CTRL_REG */ - calculate_m_path(); + /* Configure M_PATH_CTRL_REG (secondary) */ + calculate_m_path(chip, sec_mdmt); /* Configure I_PATH_CTRL_REG (secondary) */ /* PERV_TOD_SEC_PORT_0_CTRL_REG: @@ -290,11 +407,11 @@ void istep_18_11(void) * [8-11] I_PATH_STEP_CHECK_CPS_DEVIATION = 0xF (93.75%) * [13-15] I_PATH_STEP_CHECK_VALIDITY_COUNT = 0x3 (count = 8) */ - scom_and_or(PERV_TOD_SEC_PORT_0_CTRL_REG, ~PPC_BITMASK(32, 39), - PPC_PLACE(calculate_topology_delay(), 32, 8)); - scom_and_or(PERV_TOD_I_PATH_CTRL_REG, - ~(PPC_BIT(0) | PPC_BIT(1) | PPC_BITMASK(6, 11) | PPC_BITMASK(13, 15)), - PPC_PLACE(0xF, 8, 4) | PPC_PLACE(0x3, 13, 3)); + rscom_and_or(chip, PERV_TOD_SEC_PORT_0_CTRL_REG, + ~PPC_BITMASK(32, 39), PPC_PLACE(topology_delay, 32, 8)); + rscom_and_or(chip, PERV_TOD_I_PATH_CTRL_REG, + ~(PPC_BIT(0) | PPC_BIT(1) | PPC_BITMASK(6, 11) | PPC_BITMASK(13, 15)), + PPC_PLACE(0xF, 8, 4) | PPC_PLACE(0x3, 13, 3)); /* Configure INIT_CHIP_CTRL_REG (secondary) */ /* [1-3] I_PATH_CORE_SYNC_PERIOD_SELECT = 0 (core sync period is 8us) @@ -305,9 +422,44 @@ void istep_18_11(void) * [10-15] LOW_ORDER_STEP_VALUE = 0x3F (4-bit WOF counter is incremented with each 200MHz clock cycle) * [30] XSTOP_GATE = 0 (stop TOD on checkstop) */ - scom_and_or(PERV_TOD_CHIP_CTRL_REG, - ~(PPC_BITMASK(1, 4) | PPC_BITMASK(7, 15) | PPC_BIT(30)), - PPC_PLACE(0x3F, 10, 6)); + rscom_and_or(chip, PERV_TOD_CHIP_CTRL_REG, + ~(PPC_BITMASK(1, 4) | PPC_BITMASK(7, 15) | PPC_BIT(30)), + PPC_PLACE(0x3F, 10, 6)); +} + +static int core_count(uint8_t chip) +{ + uint64_t cores = read_rscom(chip, 0x0006C090); + return __builtin_popcount((uint32_t)cores) + __builtin_popcount(cores >> 32); +} + +void istep_18_11(uint8_t chips) +{ + uint8_t pri_mdmt; + uint8_t sec_mdmt; + uint8_t chip; + + printk(BIOS_EMERG, "starting istep 18.11\n"); + report_istep(18, 11); + + if (chips != 0x01 && chips != 0x03) + die("Unsupported number of chips for TOD: 0x%02x\n", chips); + + pri_mdmt = 0; + sec_mdmt = 0; + + if (chips == 0x3) { + uint64_t cores[2] = { core_count(0), core_count(1) }; + /* CPU with max amount of cores is primary MDMT */ + pri_mdmt = (cores[0] <= cores[1] ? 0 : 1); + /* The other one will be secondary MDMT */ + sec_mdmt = (cores[0] <= cores[1] ? 1 : 0); + } + + for (chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) + configure_tod(chip, chips, pri_mdmt, sec_mdmt); + } printk(BIOS_EMERG, "ending istep 18.11\n"); } From 234f3b1645af5583e051cb84088d1e2ff68c39d5 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Tue, 15 Feb 2022 23:42:34 +0200 Subject: [PATCH 169/213] soc/power9/istep_18_12.c: update for second CPU Change-Id: If99ab2942eb4a456586870cde653ce89da344d81 Signed-off-by: Sergii Dmytruk --- src/include/cpu/power/istep_18.h | 4 +- src/soc/ibm/power9/chip.c | 5 +- src/soc/ibm/power9/istep_18_11.c | 4 +- src/soc/ibm/power9/istep_18_12.c | 128 ++++++++++++++++--------------- 4 files changed, 74 insertions(+), 67 deletions(-) diff --git a/src/include/cpu/power/istep_18.h b/src/include/cpu/power/istep_18.h index 344d9bdc408..bc43d2fc0e9 100644 --- a/src/include/cpu/power/istep_18.h +++ b/src/include/cpu/power/istep_18.h @@ -5,7 +5,7 @@ #include -void istep_18_11(uint8_t chips); -void istep_18_12(void); +void istep_18_11(uint8_t chips, uint8_t *mdmt); +void istep_18_12(uint8_t chips, uint8_t mdmt); #endif /* CPU_PPC64_ISTEP18_H */ diff --git a/src/soc/ibm/power9/chip.c b/src/soc/ibm/power9/chip.c index 30594b22447..040aca1fd7a 100644 --- a/src/soc/ibm/power9/chip.c +++ b/src/soc/ibm/power9/chip.c @@ -395,6 +395,7 @@ static void enable_soc_dev(struct device *dev) int chip, idx = 0; unsigned long reserved_size, homers_size, occ_area, top = 0; uint8_t chips = fsi_get_present_chips(); + uint8_t tod_mdmt; for (chip = 0; chip < MAX_CHIPS; chip++) { int mcs_i; @@ -445,8 +446,8 @@ static void enable_soc_dev(struct device *dev) build_homer_image((void *)(top * 1024), (void *)(occ_area * 1024), nominal_freq); rng_init(); - istep_18_11(chips); - istep_18_12(); + istep_18_11(chips, &tod_mdmt); + istep_18_12(chips, tod_mdmt); } static void activate_slave_cores(void) diff --git a/src/soc/ibm/power9/istep_18_11.c b/src/soc/ibm/power9/istep_18_11.c index 3ef7ad89500..114a2f235d2 100644 --- a/src/soc/ibm/power9/istep_18_11.c +++ b/src/soc/ibm/power9/istep_18_11.c @@ -433,7 +433,7 @@ static int core_count(uint8_t chip) return __builtin_popcount((uint32_t)cores) + __builtin_popcount(cores >> 32); } -void istep_18_11(uint8_t chips) +void istep_18_11(uint8_t chips, uint8_t *mdmt) { uint8_t pri_mdmt; uint8_t sec_mdmt; @@ -456,6 +456,8 @@ void istep_18_11(uint8_t chips) sec_mdmt = (cores[0] <= cores[1] ? 1 : 0); } + *mdmt = pri_mdmt; + for (chip = 0; chip < MAX_CHIPS; chip++) { if (chips & (1 << chip)) configure_tod(chip, chips, pri_mdmt, sec_mdmt); diff --git a/src/soc/ibm/power9/istep_18_12.c b/src/soc/ibm/power9/istep_18_12.c index eced1e18f74..375a5514a1b 100644 --- a/src/soc/ibm/power9/istep_18_12.c +++ b/src/soc/ibm/power9/istep_18_12.c @@ -3,6 +3,7 @@ #include #include +#include #include #include @@ -58,84 +59,87 @@ // as in the previous (PERV_TOD_ERROR_REG) register #define PERV_TOD_ERROR_MASK_REG 0x00040032 -/* TODO: this one will change if we add more than one processor */ -#define MDMT (1) - /* See istep 18.11 for 2 CPU topology diagram */ -/* TODO: this will be much more complicated for different topology */ -static void init_tod_node(void) +static void init_tod_node(uint8_t chips, uint8_t mdmt) { - uint64_t error_reg; + uint8_t chip; /* Clear the TOD error register by writing all bits to 1 */ /* * Probably documentation issue, all bits in this register are described as * RW, but code treats them as if they were write-1-to-clear. */ - write_scom(PERV_TOD_ERROR_REG, ~0); - - /* Assumption: node is MDMT */ - if (MDMT) { - /* Chip TOD step checkers enable */ - write_scom(PERV_TOD_TX_TTYPE_2_REG, - PPC_BIT(PERV_TOD_TX_TTYPE_REG_TRIGGER)); - - /* Switch local Chip TOD to 'Not Set' state */ - write_scom(PERV_TOD_LOAD_TOD_MOD_REG, - PPC_BIT(PERV_TOD_LOAD_TOD_MOD_REG_FSM_TRIGGER)); - - /* Switch all Chip TOD in the system to 'Not Set' state */ - write_scom(PERV_TOD_TX_TTYPE_5_REG, - PPC_BIT(PERV_TOD_TX_TTYPE_REG_TRIGGER)); - - /* Chip TOD load value (move TB to TOD) */ - write_scom(PERV_TOD_LOAD_TOD_REG, - PPC_PLACE(0x3FF, 0, 60) | PPC_PLACE(0xC, 60, 4)); - - /* Chip TOD start_tod (switch local Chip TOD to 'Running' state) */ - write_scom(PERV_TOD_START_TOD_REG, - PPC_BIT(PERV_TOD_START_TOD_REG_FSM_TRIGGER)); - - /* Send local Chip TOD value to all Chip TODs */ - write_scom(PERV_TOD_TX_TTYPE_4_REG, - PPC_BIT(PERV_TOD_TX_TTYPE_REG_TRIGGER)); - } - - /* Wait until TOD is running */ - if (!wait_us(1000, - read_scom(PERV_TOD_FSM_REG) & PPC_BIT(PERV_TOD_FSM_REG_IS_RUNNING))) { - printk(BIOS_ERR, "PERV_TOD_ERROR_REG = %#16.16llx\n", - read_scom(PERV_TOD_ERROR_REG)); - die("Error: TOD is not running!\n"); + for (chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) + write_rscom(chip, PERV_TOD_ERROR_REG, ~0); } - /* Clear TTYPE#2, TTYPE#4, and TTYPE#5 status */ - write_scom(PERV_TOD_ERROR_REG, - PPC_BIT(PERV_TOD_ERROR_REG_RX_TTYPE_0 + 2) | - PPC_BIT(PERV_TOD_ERROR_REG_RX_TTYPE_0 + 4) | - PPC_BIT(PERV_TOD_ERROR_REG_RX_TTYPE_0 + 5)); - - /* Check for real errors */ - error_reg = read_scom(PERV_TOD_ERROR_REG); - if (error_reg != 0) { - printk(BIOS_ERR, "PERV_TOD_ERROR_REG = %#16.16llx\n", - read_scom(PERV_TOD_ERROR_REG)); - die("Error: TOD initialization failed!\n"); + /* Configure MDMT */ + + /* Chip TOD step checkers enable */ + write_rscom(mdmt, PERV_TOD_TX_TTYPE_2_REG, + PPC_BIT(PERV_TOD_TX_TTYPE_REG_TRIGGER)); + + /* Switch local Chip TOD to 'Not Set' state */ + write_rscom(mdmt, PERV_TOD_LOAD_TOD_MOD_REG, + PPC_BIT(PERV_TOD_LOAD_TOD_MOD_REG_FSM_TRIGGER)); + + /* Switch all Chip TOD in the system to 'Not Set' state */ + write_rscom(mdmt, PERV_TOD_TX_TTYPE_5_REG, + PPC_BIT(PERV_TOD_TX_TTYPE_REG_TRIGGER)); + + /* Chip TOD load value (move TB to TOD) */ + write_rscom(mdmt, PERV_TOD_LOAD_TOD_REG, + PPC_PLACE(0x3FF, 0, 60) | PPC_PLACE(0xC, 60, 4)); + + /* Chip TOD start_tod (switch local Chip TOD to 'Running' state) */ + write_rscom(mdmt, PERV_TOD_START_TOD_REG, + PPC_BIT(PERV_TOD_START_TOD_REG_FSM_TRIGGER)); + + /* Send local Chip TOD value to all Chip TODs */ + write_rscom(mdmt, PERV_TOD_TX_TTYPE_4_REG, + PPC_BIT(PERV_TOD_TX_TTYPE_REG_TRIGGER)); + + /* In case of larger topology, replace loops with a recursion */ + for (chip = 0; chip < MAX_CHIPS; chip++) { + uint64_t error_reg; + + if (!(chips & (1 << chip))) + continue; + + /* Wait until TOD is running */ + if (!wait_us(1000, read_rscom(chip, PERV_TOD_FSM_REG) & + PPC_BIT(PERV_TOD_FSM_REG_IS_RUNNING))) { + printk(BIOS_ERR, "PERV_TOD_ERROR_REG = %#16.16llx\n", + read_rscom(chip, PERV_TOD_ERROR_REG)); + die("Error on chip#%d: TOD is not running!\n", chip); + } + + /* Clear TTYPE#2, TTYPE#4, and TTYPE#5 status */ + write_rscom(chip, PERV_TOD_ERROR_REG, + PPC_BIT(PERV_TOD_ERROR_REG_RX_TTYPE_0 + 2) | + PPC_BIT(PERV_TOD_ERROR_REG_RX_TTYPE_0 + 4) | + PPC_BIT(PERV_TOD_ERROR_REG_RX_TTYPE_0 + 5)); + + /* Check for real errors */ + error_reg = read_rscom(chip, PERV_TOD_ERROR_REG); + if (error_reg != 0) { + printk(BIOS_ERR, "PERV_TOD_ERROR_REG = %#16.16llx\n", error_reg); + die("Error: TOD initialization failed!\n"); + } + + /* Set error mask to runtime configuration (mask TTYPE informational bits) */ + write_rscom(chip, PERV_TOD_ERROR_MASK_REG, + PPC_BITMASK(PERV_TOD_ERROR_MASK_REG_RX_TTYPE_0, + PERV_TOD_ERROR_MASK_REG_RX_TTYPE_0 + 5)); } - - /* Set error mask to runtime configuration (mask TTYPE informational bits) */ - write_scom(PERV_TOD_ERROR_MASK_REG, - PPC_BITMASK(PERV_TOD_ERROR_MASK_REG_RX_TTYPE_0, - PERV_TOD_ERROR_MASK_REG_RX_TTYPE_0 + 5)); - - /* In case of multinode system, configure child nodes recursively here */ } -void istep_18_12(void) +void istep_18_12(uint8_t chips, uint8_t mdmt) { printk(BIOS_EMERG, "starting istep 18.12\n"); report_istep(18, 12); - init_tod_node(); + init_tod_node(chips, mdmt); printk(BIOS_EMERG, "ending istep 18.12\n"); } From b37646fe71c5437e7c9e3f04df4ce89b70cfe58d Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Sun, 20 Feb 2022 00:56:34 +0200 Subject: [PATCH 170/213] soc/power9/istep_10_13.c: update for second CPU Change-Id: Ie15652de1127de7ce5c8060b1a07566499874615 Signed-off-by: Sergii Dmytruk --- src/include/cpu/power/istep_10.h | 2 +- src/soc/ibm/power9/istep_10_13.c | 64 +++++++++++++++++++------------- src/soc/ibm/power9/romstage.c | 2 +- 3 files changed, 41 insertions(+), 27 deletions(-) diff --git a/src/include/cpu/power/istep_10.h b/src/include/cpu/power/istep_10.h index c51ae593107..a9dbc3bf156 100644 --- a/src/include/cpu/power/istep_10.h +++ b/src/include/cpu/power/istep_10.h @@ -10,6 +10,6 @@ struct pci_info; void istep_10_1(uint8_t chips); void istep_10_10(uint8_t chips, struct pci_info *pci_info); void istep_10_12(uint8_t chips); -void istep_10_13(void); +void istep_10_13(uint8_t chips); #endif /* CPU_PPC64_ISTEP10_H */ diff --git a/src/soc/ibm/power9/istep_10_13.c b/src/soc/ibm/power9/istep_10_13.c index 229c1dbdf0d..7a923888e5d 100644 --- a/src/soc/ibm/power9/istep_10_13.c +++ b/src/soc/ibm/power9/istep_10_13.c @@ -1,8 +1,11 @@ /* SPDX-License-Identifier: GPL-2.0-only */ +#include + #include #include #include +#include /* * 10.13 host_rng_bist: Trigger Built In Self Test for RNG @@ -11,18 +14,15 @@ * - Trigger the Random Number Generator Built In Self Test (BIST). Results * are checked later in step 16 when RNG is secured */ -void istep_10_13(void) -{ - printk(BIOS_EMERG, "starting istep 10.13\n"); - - report_istep(10, 13); +static void host_rng_bist(uint8_t chip) +{ /* Assume DD2.0 or newer */ /* PU_NX_RNG_CFG [44] COND_STARTUP_TEST_FAIL */ - if (read_scom_for_chiplet(N0_CHIPLET_ID, 0x020110E0) & PPC_BIT(44)) + if (read_rscom_for_chiplet(chip, N0_CHIPLET_ID, 0x020110E0) & PPC_BIT(44)) die("RNG Conditioner startup test failed\n"); /* PU_NX_RNG_ST0 @@ -34,20 +34,20 @@ void istep_10_13(void) [36-47] ADAPTEST_CRN_RNG0_MATCH_TH = 0x32 (50; Assuming H = 5) [48-59] ADAPTEST_CRN_RNG1_MATCH_TH = 0x32 (50; Assuming H = 5) */ - scom_and_or_for_chiplet(N0_CHIPLET_ID, 0x020110E1, - ~(PPC_BITMASK(0, 1) | PPC_BITMASK(7, 63)), - PPC_PLACE(1, 0, 2) | PPC_PLACE(2, 7, 2) | PPC_PLACE(1, 9, 3) - | PPC_PLACE(0x32, 12, 12) | PPC_PLACE(0x32, 24, 12) - | PPC_PLACE(0x32, 36, 12) | PPC_PLACE(0x32, 48, 12)); + rscom_and_or_for_chiplet(chip, N0_CHIPLET_ID, 0x020110E1, + ~(PPC_BITMASK(0, 1) | PPC_BITMASK(7, 63)), + PPC_PLACE(1, 0, 2) | PPC_PLACE(2, 7, 2) | PPC_PLACE(1, 9, 3) + | PPC_PLACE(0x32, 12, 12) | PPC_PLACE(0x32, 24, 12) + | PPC_PLACE(0x32, 36, 12) | PPC_PLACE(0x32, 48, 12)); /* PU_NX_RNG_ST1 [0-6] ADAPTEST_SOFT_FAIL_TH = 2 [7-22] ADAPTEST_1BIT_MATCH_TH_MIN = 100 [23-38] ADAPTEST_1BIT_MATCH_TH_MAX = 415 */ - scom_and_or_for_chiplet(N0_CHIPLET_ID, 0x020110E2, ~PPC_BITMASK(0, 38), - PPC_PLACE(2, 0, 7) | PPC_PLACE(100, 7, 16) - | PPC_PLACE(415, 23, 16)); + rscom_and_or_for_chiplet(chip, N0_CHIPLET_ID, 0x020110E2, ~PPC_BITMASK(0, 38), + PPC_PLACE(2, 0, 7) | PPC_PLACE(100, 7, 16) + | PPC_PLACE(415, 23, 16)); /* PU_NX_RNG_ST3 [0] SAMPTEST_RRN_ENABLE = 1 @@ -55,16 +55,16 @@ void istep_10_13(void) [4-19] SAMPTEST_MATCH_TH_MIN = 0x6D60 (28,000) [20-35] SAMPTEST_MATCH_TH_MAX = 0x988A (39,050) */ - scom_and_or_for_chiplet(N0_CHIPLET_ID, 0x020110E8, ~PPC_BITMASK(0, 35), - PPC_BIT(0) | PPC_PLACE(7, 1, 3) | PPC_PLACE(0x6D60, 4, 16) - | PPC_PLACE(0x988A, 20, 16)); + rscom_and_or_for_chiplet(chip, N0_CHIPLET_ID, 0x020110E8, ~PPC_BITMASK(0, 35), + PPC_BIT(0) | PPC_PLACE(7, 1, 3) | PPC_PLACE(0x6D60, 4, 16) + | PPC_PLACE(0x988A, 20, 16)); /* PU_NX_RNG_RDELAY [6] LFSR_RESEED_EN = 1 [7-11] READ_RTY_RATIO = 0x1D (1/16) */ - scom_and_or_for_chiplet(N0_CHIPLET_ID, 0x020110E5, ~PPC_BITMASK(6, 11), - PPC_BIT(6) | PPC_PLACE(0x1D, 7, 5)); + rscom_and_or_for_chiplet(chip, N0_CHIPLET_ID, 0x020110E5, ~PPC_BITMASK(6, 11), + PPC_BIT(6) | PPC_PLACE(0x1D, 7, 5)); /* PU_NX_RNG_CFG [30-37] ST2_RESET_PERIOD = 0x1B @@ -76,12 +76,26 @@ void istep_10_13(void) [46-61] PACE_RATE = 0x07D0 (2000) [63] ENABLE = 1 */ - scom_and_or_for_chiplet(N0_CHIPLET_ID, 0x020110E0, - ~(PPC_BITMASK(30, 37) | PPC_BITMASK(39, 43) - | PPC_BITMASK(46, 61) | PPC_BIT(63)), - PPC_PLACE(0x1B, 30, 8) | PPC_BIT(40) | PPC_BIT(41) - | PPC_BIT(42) | PPC_BIT(43) | PPC_PLACE(0x07D0, 46, 16) - | PPC_BIT(63)); + rscom_and_or_for_chiplet(chip, N0_CHIPLET_ID, 0x020110E0, + ~(PPC_BITMASK(30, 37) | PPC_BITMASK(39, 43) + | PPC_BITMASK(46, 61) | PPC_BIT(63)), + PPC_PLACE(0x1B, 30, 8) | PPC_BIT(40) | PPC_BIT(41) + | PPC_BIT(42) | PPC_BIT(43) | PPC_PLACE(0x07D0, 46, 16) + | PPC_BIT(63)); +} + +void istep_10_13(uint8_t chips) +{ + uint8_t chip; + + printk(BIOS_EMERG, "starting istep 10.13\n"); + + report_istep(10, 13); + + for (chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) + host_rng_bist(chip); + } printk(BIOS_EMERG, "ending istep 10.13\n"); } diff --git a/src/soc/ibm/power9/romstage.c b/src/soc/ibm/power9/romstage.c index ec1fe47aa80..9ecb4388d63 100644 --- a/src/soc/ibm/power9/romstage.c +++ b/src/soc/ibm/power9/romstage.c @@ -399,7 +399,7 @@ void main(void) istep_10_1(chips); istep_10_10(chips, pci_info); istep_10_12(chips); - istep_10_13(); + istep_10_13(chips); timestamp_add_now(TS_INITRAM_START); From 817e0695906def6a2a2e21ef240ecb7ae0a4e8ac Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Sun, 20 Feb 2022 00:58:06 +0200 Subject: [PATCH 171/213] src/ibm/power9/chip.c: init RNG for second CPU Change-Id: Ic6246efd766b8bbf8c8ef5b9589f5702d7a02d2c Signed-off-by: Sergii Dmytruk --- src/soc/ibm/power9/chip.c | 79 +++++++++++++++++++++------------------ 1 file changed, 43 insertions(+), 36 deletions(-) diff --git a/src/soc/ibm/power9/chip.c b/src/soc/ibm/power9/chip.c index 040aca1fd7a..e2cd1f4c12a 100644 --- a/src/soc/ibm/power9/chip.c +++ b/src/soc/ibm/power9/chip.c @@ -350,44 +350,51 @@ static int dt_platform_update(struct device_tree *tree) return 0; } -static void rng_init(void) +static void rng_init(uint8_t chips) { - /* - * RNG is allowed to run for M cycles (M = enough time to complete init; - * recommend 1 second of time). - * - * The only thing that ensures this is delay between istep 10.13 and now. - * 14.1 is the most time-consuming istep, its duration depends on the amount - * of installed RAM under the bigger of MCBISTs (i.e. sides of CPU on the - * board). This is more than enough in Hostboot. - * - * TODO: test if this is enough for coreboot with initial ECC scrubbing - * skipped, low amount of RAM and no debug output. - */ - /* NX.PBI.PBI_RNG.NX_RNG_CFG - * [0-9] FAIL_REG - abort if any of these bits is set - * [17] BIST_COMPLETE - should be 1 at this point - */ - uint64_t rng_status = read_scom(0x020110E0); - assert(rng_status & PPC_BIT(17)); - while (!((rng_status = read_scom(0x020110E0)) & PPC_BIT(17))); - - if (rng_status & PPC_BITMASK(0, 9)) - die("RNG initialization failed, NX_RNG_CFG = %#16.16llx\n", rng_status); + uint8_t chip; - /* - * Hostboot sets 'enable' bit again even though it was already set. - * Following that behavior just in case. - */ - write_scom(0x020110E0, rng_status | PPC_BIT(63)); - - /* - * This would be the place to set BARs, but it is done as part of quad SCOM - * restore. - */ + for (chip = 0; chip < MAX_CHIPS; chip++) { + if (!(chips & (1 << chip))) + continue; - /* Lock NX RNG configuration */ - scom_or(0x00010005, PPC_BIT(9)); + /* + * RNG is allowed to run for M cycles (M = enough time to complete init; + * recommend 1 second of time). + * + * The only thing that ensures this is delay between istep 10.13 and now. + * 14.1 is the most time-consuming istep, its duration depends on the amount + * of installed RAM under the bigger of MCBISTs (i.e. sides of CPU on the + * board). This is more than enough in Hostboot. + * + * TODO: test if this is enough for coreboot with initial ECC scrubbing + * skipped, low amount of RAM and no debug output. + */ + /* NX.PBI.PBI_RNG.NX_RNG_CFG + * [0-9] FAIL_REG - abort if any of these bits is set + * [17] BIST_COMPLETE - should be 1 at this point + */ + uint64_t rng_status = read_rscom(chip, 0x020110E0); + assert(rng_status & PPC_BIT(17)); + while (!((rng_status = read_rscom(chip, 0x020110E0)) & PPC_BIT(17))); + + if (rng_status & PPC_BITMASK(0, 9)) + die("RNG initialization failed, NX_RNG_CFG = %#16.16llx\n", rng_status); + + /* + * Hostboot sets 'enable' bit again even though it was already set. + * Following that behavior just in case. + */ + write_rscom(chip, 0x020110E0, rng_status | PPC_BIT(63)); + + /* + * This would be the place to set BARs, but it is done as part of quad SCOM + * restore. + */ + + /* Lock NX RNG configuration */ + rscom_or(chip, 0x00010005, PPC_BIT(9)); + } } static void enable_soc_dev(struct device *dev) @@ -445,7 +452,7 @@ static void enable_soc_dev(struct device *dev) occ_area = top + homers_size; build_homer_image((void *)(top * 1024), (void *)(occ_area * 1024), nominal_freq); - rng_init(); + rng_init(chips); istep_18_11(chips, &tod_mdmt); istep_18_12(chips, tod_mdmt); } From 0dac7dbd45e7599353c1fc35ae8bc79f48c358bd Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Fri, 18 Feb 2022 20:01:21 +0200 Subject: [PATCH 172/213] soc/power9/istep_10_6.c: scom inits to all chiplets (sans Quad) Without this istep, skiboot checkstops for two CPUs during XIVE initialization. Change-Id: Ia445d25cb489267f0b72676083d1034cc11a0b99 Signed-off-by: Sergii Dmytruk --- src/include/cpu/power/istep_10.h | 1 + src/soc/ibm/power9/Makefile.inc | 1 + src/soc/ibm/power9/istep_10_6.c | 424 +++++++++++++++++++++++++++++++ src/soc/ibm/power9/romstage.c | 1 + 4 files changed, 427 insertions(+) create mode 100644 src/soc/ibm/power9/istep_10_6.c diff --git a/src/include/cpu/power/istep_10.h b/src/include/cpu/power/istep_10.h index a9dbc3bf156..5c74840b90c 100644 --- a/src/include/cpu/power/istep_10.h +++ b/src/include/cpu/power/istep_10.h @@ -8,6 +8,7 @@ struct pci_info; void istep_10_1(uint8_t chips); +void istep_10_6(uint8_t chips); void istep_10_10(uint8_t chips, struct pci_info *pci_info); void istep_10_12(uint8_t chips); void istep_10_13(uint8_t chips); diff --git a/src/soc/ibm/power9/Makefile.inc b/src/soc/ibm/power9/Makefile.inc index 2798396c68a..512fe40e93b 100644 --- a/src/soc/ibm/power9/Makefile.inc +++ b/src/soc/ibm/power9/Makefile.inc @@ -21,6 +21,7 @@ romstage-y += istep_9_4.c romstage-y += istep_9_6.c romstage-y += istep_9_7.c romstage-y += istep_10_1.c +romstage-y += istep_10_6.c romstage-y += istep_10_10.c romstage-y += istep_10_12.c romstage-y += istep_10_13.c diff --git a/src/soc/ibm/power9/istep_10_6.c b/src/soc/ibm/power9/istep_10_6.c new file mode 100644 index 00000000000..9c43f1dddb6 --- /dev/null +++ b/src/soc/ibm/power9/istep_10_6.c @@ -0,0 +1,424 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include + +#include +#include +#include +#include +#include + +static void mcs_scom(uint8_t chip, chiplet_id_t chiplet) +{ + uint64_t data; + + { + data = read_rscom_for_chiplet(chip, chiplet, 0x5010810); + + PPC_INSERT(data, 25, 32, 7); + PPC_INSERT(data, 0x7, 46, 4); + PPC_INSERT(data, 0xF, 55, 6); + /* MC01_PBI01_SCOMFIR_MCPERF1_ENABLE_PF_DROP_CMDLIST_ON */ + data |= PPC_BIT(61); + PPC_INSERT(data, 0x0, 62, 1); + /* MC01_PBI01_SCOMFIR_MCPERF1_ENABLE_PREFETCH_PROMOTE_ON */ + data |= PPC_BIT(63); + + write_rscom_for_chiplet(chip, chiplet, 0x5010810, data); + } + + { + data = read_rscom_for_chiplet(chip, chiplet, 0x5010811); + + /* MC01_PBI01_SCOMFIR_MCMODE0_ENABLE_CENTAUR_SYNC_ON */ + data |= PPC_BIT(20); + /* MC01_PBI01_SCOMFIR_MCMODE0_ENABLE_64_128B_READ_ON */ + data |= PPC_BIT(9); + /* MC01_PBI01_SCOMFIR_MCMODE0_ENABLE_DROP_FP_DYN64_ACTIVE_ON */ + data |= PPC_BIT(8); + /* MC01_PBI01_SCOMFIR_MCMODE0_CENTAURP_ENABLE_ECRESP_OFF */ + data &= ~PPC_BIT(7); + /* MC01_PBI01_SCOMFIR_MCMODE0_DISABLE_MC_SYNC_ON */ + data |= PPC_BIT(27); + /* MC01_PBI01_SCOMFIR_MCMODE0_DISABLE_MC_PAIR_SYNC_ON */ + data |= PPC_BIT(28); + /* MC01_PBI01_SCOMFIR_MCMODE0_FORCE_COMMANDLIST_VALID_ON */ + data |= PPC_BIT(17); + + write_rscom_for_chiplet(chip, chiplet, 0x5010811, data); + } + { + data = read_rscom_for_chiplet(chip, chiplet, 0x5010812); + + /* MC01_PBI01_SCOMFIR_MCMODE1_DISABLE_FP_M_BIT_ON */ + data |= PPC_BIT(10); + PPC_INSERT(data, 0x40, 33, 19); + + write_rscom_for_chiplet(chip, chiplet, 0x5010812, data); + } + { + data = read_rscom_for_chiplet(chip, chiplet, 0x5010813); + PPC_INSERT(data, 0x8, 24, 16); + write_rscom_for_chiplet(chip, chiplet, 0x5010813, data); + } + { + data = read_rscom_for_chiplet(chip, chiplet, 0x501081B); + + /* MC01_PBI01_SCOMFIR_MCTO_SELECT_PB_HANG_PULSE_ON */ + data |= PPC_BIT(0); + /* MC01_PBI01_SCOMFIR_MCTO_SELECT_LOCAL_HANG_PULSE_OFF */ + data &= ~PPC_BIT(1); + /* MC01_PBI01_SCOMFIR_MCTO_ENABLE_NONMIRROR_HANG_ON */ + data |= PPC_BIT(32); + /* MC01_PBI01_SCOMFIR_MCTO_ENABLE_APO_HANG_ON */ + data |= PPC_BIT(34); + PPC_INSERT(data, 0x1, 2, 2); + PPC_INSERT(data, 0x1, 24, 8); + PPC_INSERT(data, 0x7, 5, 3); + + write_rscom_for_chiplet(chip, chiplet, 0x501081B, data); + } +} + +static void fbc_ioo_tl_scom(uint8_t chip) +{ + uint64_t data; + + /* PB_IOO_SCOM_A0_MODE_BLOCKED */ + rscom_or(chip, 0x501380A, PPC_BIT(20) | PPC_BIT(25) | PPC_BIT(52) | PPC_BIT(57)); + + /* PB_IOO_SCOM_A1_MODE_BLOCKED */ + rscom_or(chip, 0x501380B, PPC_BIT(20) | PPC_BIT(25) | PPC_BIT(52) | PPC_BIT(57)); + + /* PB_IOO_SCOM_A2_MODE_BLOCKED */ + rscom_or(chip, 0x501380C, PPC_BIT(20) | PPC_BIT(25) | PPC_BIT(52) | PPC_BIT(57)); + + /* PB_IOO_SCOM_A3_MODE_BLOCKED */ + rscom_or(chip, 0x501380D, PPC_BIT(20) | PPC_BIT(25) | PPC_BIT(52) | PPC_BIT(57)); + + /* 0x5013810, 0x5013811, 0x5013812 and 0x5013813 are not modified */ + + data = read_rscom(chip, 0x5013823); + + data &= ~PPC_BIT(0); // PB_IOO_SCOM_PB_CFG_IOO01_IS_LOGICAL_PAIR_OFF + data &= ~PPC_BIT(1); // PB_IOO_SCOM_PB_CFG_IOO23_IS_LOGICAL_PAIR_OFF + data &= ~PPC_BIT(2); // PB_IOO_SCOM_PB_CFG_IOO45_IS_LOGICAL_PAIR_OFF + data &= ~PPC_BIT(3); // PB_IOO_SCOM_PB_CFG_IOO67_IS_LOGICAL_PAIR_OFF + data &= ~PPC_BIT(8); // PB_IOO_SCOM_LINKS01_TOD_ENABLE_OFF + data &= ~PPC_BIT(9); // PB_IOO_SCOM_LINKS23_TOD_ENABLE_OFF + data &= ~PPC_BIT(10); // PB_IOO_SCOM_LINKS45_TOD_ENABLE_OFF + data &= ~PPC_BIT(11); // PB_IOO_SCOM_LINKS67_TOD_ENABLE_OFF + + write_rscom(chip, 0x5013823, data); + + /* 0x5013824 is not modified */ +} + +static void nx_scom(uint8_t chip, uint8_t dd) +{ + uint64_t data; + + { + data = read_rscom(chip, 0x2011041); + + data |= PPC_BIT(63); // NX_DMA_CH0_EFT_ENABLE_ON + data |= PPC_BIT(62); // NX_DMA_CH1_EFT_ENABLE_ON + data |= PPC_BIT(58); // NX_DMA_CH2_SYM_ENABLE_ON + data |= PPC_BIT(57); // NX_DMA_CH3_SYM_ENABLE_ON + data |= PPC_BIT(61); // NX_DMA_CH4_GZIP_ENABLE_ON + + write_rscom(chip, 0x2011041, data); + } + { + data = read_rscom(chip, 0x2011042); + + PPC_INSERT(data, 0xF, 8, 4); // NX_DMA_GZIPCOMP_MAX_INRD_MAX_15_INRD + PPC_INSERT(data, 0xF, 12, 4); // NX_DMA_GZIPDECOMP_MAX_INRD_MAX_15_INRD + PPC_INSERT(data, 0x3, 25, 4); // NX_DMA_SYM_MAX_INRD_MAX_3_INRD + PPC_INSERT(data, 0xF, 33, 4); // NX_DMA_EFTCOMP_MAX_INRD_MAX_15_INRD = 0xf; + PPC_INSERT(data, 0xF, 37, 4); // NX_DMA_EFTDECOMP_MAX_INRD_MAX_15_INRD + + data |= PPC_BIT(23); // NX_DMA_EFT_COMP_PREFETCH_ENABLE_ON + data |= PPC_BIT(24); // NX_DMA_EFT_DECOMP_PREFETCH_ENABLE_ON + data |= PPC_BIT(16); // NX_DMA_GZIP_COMP_PREFETCH_ENABLE_ON + data |= PPC_BIT(17); // NX_DMA_GZIP_DECOMP_PREFETCH_ENABLE_ON + data &= ~PPC_BIT(56); // NX_DMA_EFT_SPBC_WRITE_ENABLE_OFF + + write_rscom(chip, 0x2011042, data); + } + { + data = read_rscom(chip, 0x201105C); + + PPC_INSERT(data, 0x9, 1, 4); // NX_DMA_CH0_WATCHDOG_REF_DIV_DIVIDE_BY_512 + PPC_INSERT(data, 0x9, 6, 4); // NX_DMA_CH1_WATCHDOG_REF_DIV_DIVIDE_BY_512 + PPC_INSERT(data, 0x9, 11, 4); // NX_DMA_CH2_WATCHDOG_REF_DIV_DIVIDE_BY_512 + PPC_INSERT(data, 0x9, 16, 4); // NX_DMA_CH3_WATCHDOG_REF_DIV_DIVIDE_BY_512 + PPC_INSERT(data, 0x9, 21, 4); // NX_DMA_CH4_WATCHDOG_REF_DIV_DIVIDE_BY_512 + PPC_INSERT(data, 0x8, 26, 4); // NX_DMA_DMA_HANG_TIMER_REF_DIV_DIVIDE_BY_1024 + + data |= PPC_BIT(0); // NX_DMA_CH0_WATCHDOG_TIMER_ENBL_ON + data |= PPC_BIT(5); // NX_DMA_CH1_WATCHDOG_TIMER_ENBL_ON + data |= PPC_BIT(10); // NX_DMA_CH2_WATCHDOG_TIMER_ENBL_ON + data |= PPC_BIT(15); // NX_DMA_CH3_WATCHDOG_TIMER_ENBL_ON + data |= PPC_BIT(20); // NX_DMA_CH4_WATCHDOG_TIMER_ENBL_ON + data |= PPC_BIT(25); // NX_DMA_DMA_HANG_TIMER_ENBL_ON + + write_rscom(chip, 0x201105C, data); + } + { + data = read_rscom(chip, 0x2011087); + + data &= ~0x93EFDFFF3FF00000; + data |= 0x48102000C0000000; + + if (dd == 0x20) + data &= ~0x2400000000000000; + else + data |= 0x2400000000000000; + + write_rscom(chip, 0x2011087, data); + } + { + data = read_rscom(chip, 0x2011095); + + data |= PPC_BIT(24); // NX_PBI_CQ_WRAP_NXCQ_SCOM_SKIP_G_ON + data |= PPC_BIT(1); // NX_PBI_CQ_WRAP_NXCQ_SCOM_DMA_WR_DISABLE_GROUP_ON + data |= PPC_BIT(5); // NX_PBI_CQ_WRAP_NXCQ_SCOM_DMA_RD_DISABLE_GROUP_ON + data |= PPC_BIT(9); // NX_PBI_CQ_WRAP_NXCQ_SCOM_UMAC_WR_DISABLE_GROUP_ON + data |= PPC_BIT(13); // NX_PBI_CQ_WRAP_NXCQ_SCOM_UMAC_RD_DISABLE_GROUP_ON + data |= PPC_BIT(2); // NX_PBI_CQ_WRAP_NXCQ_SCOM_DMA_WR_DISABLE_VG_NOT_SYS_ON + data |= PPC_BIT(6); // NX_PBI_CQ_WRAP_NXCQ_SCOM_DMA_RD_DISABLE_VG_NOT_SYS_ON + data |= PPC_BIT(10); // NX_PBI_CQ_WRAP_NXCQ_SCOM_UMAC_WR_DISABLE_VG_NOT_SYS_ON + data |= PPC_BIT(14); // NX_PBI_CQ_WRAP_NXCQ_SCOM_UMAC_RD_DISABLE_VG_NOT_SYS_ON + data |= PPC_BIT(22); // NX_PBI_CQ_WRAP_NXCQ_SCOM_RD_GO_M_QOS_ON + data &= ~PPC_BIT(23); // NX_PBI_CQ_WRAP_NXCQ_SCOM_ADDR_BAR_MODE_OFF + + PPC_INSERT(data, 0x0, 56, 4); // TGT1_ATTR_FABRIC_ADDR_EXTENSION_GROUP_ID + PPC_INSERT(data, 0x0, 60, 3); // TGT1_ATTR_FABRIC_ADDR_EXTENSION_CHIP_ID + PPC_INSERT(data, 0x1, 25, 2); + PPC_INSERT(data, 0xFC, 40, 8); + PPC_INSERT(data, 0xFC, 48, 8); + + write_rscom(chip, 0x2011095, data); + } + { + data = read_rscom(chip, 0x20110D6); + + PPC_INSERT(data, 0x2, 9, 3); + data |= PPC_BIT(6); // NX_PBI_DISABLE_PROMOTE_ON + + write_rscom(chip, 0x20110D6, data); + } + { + data = read_rscom(chip, 0x2011107); + + data &= ~0xF0839FFFC2FFC000; + data |= 0x0A7400003D000000; + + if (dd == 0x20) + data &= ~0x0508600000000000; + else + data |= 0x0508600000000000; + + write_rscom(chip, 0x2011107, data); + } + + rscom_and_or(chip, 0x2011083, ~0xEEF8FF9CFD000000, 0x1107006302F00000); + rscom_and(chip, 0x2011086, ~0xFFFFFFFFFFF00000); + rscom_and_or(chip, 0x20110A8, ~0x0FFFF00000000000, 0x0888800000000000); + rscom_and_or(chip, 0x20110C3, ~0x0000001F00000000, 0x0000000080000000); + rscom_and_or(chip, 0x20110C4, ~PPC_BITMASK(27, 35), PPC_PLACE(0x8, 27, 9)); + rscom_and_or(chip, 0x20110C5, ~PPC_BITMASK(27, 35), PPC_PLACE(0x8, 27, 9)); + rscom_or(chip, 0x20110D5, PPC_BIT(1)); // NX_PBI_PBI_UMAC_CRB_READS_ENBL_ON + rscom_and_or(chip, 0x2011103, ~0xCF7DEF81BF003000, 0x3082107E40FFC000); + rscom_and(chip, 0x2011106, ~0xFFFFFFFFFFFFC000); +} + +static void cxa_scom(uint8_t chip, uint8_t dd) +{ + uint64_t data; + + data = read_rscom(chip, 0x2010803); + data &= ~PPC_BITMASK(0, 52); + data |= (dd == 0x20 ? 0x801B1F98C8717000 : 0x801B1F98D8717000); + write_rscom(chip, 0x2010803, data); + + data = read_rscom(chip, 0x2010818); + data &= ~PPC_BIT(1); // CAPP0_CXA_TOP_CXA_APC0_APCCTL_ADR_BAR_MODE_OFF + data |= PPC_BIT(6); // CAPP0_CXA_TOP_CXA_APC0_APCCTL_SKIP_G_ON + data &= ~PPC_BITMASK(21, 24); // ATTR_FABRIC_ADDR_EXTENSION_GROUP_ID + data &= ~PPC_BITMASK(25, 27); // ATTR_FABRIC_ADDR_EXTENSION_CHIP_ID + data |= PPC_BIT(4); // CAPP0_CXA_TOP_CXA_APC0_APCCTL_DISABLE_G_ON + data |= PPC_BIT(3); // CAPP0_CXA_TOP_CXA_APC0_APCCTL_DISABLE_VG_NOT_SYS_ON + write_rscom(chip, 0x2010818, data); + + rscom_and(chip, 0x2010806, ~PPC_BITMASK(0, 52)); + rscom_or(chip, 0x2010807, PPC_BIT(2) | PPC_BIT(8) | PPC_BIT(34) | PPC_BIT(44)); + rscom_and(chip, 0x2010819, ~PPC_BITMASK(4, 7)); + rscom_and_or(chip, 0x201081B, + ~PPC_BITMASK(45, 51), PPC_PLACE(0x7, 45, 3) | PPC_PLACE(0x2, 48, 4)); + rscom_and_or(chip, 0x201081C, ~PPC_BITMASK(18, 21), PPC_PLACE(0x1, 18, 4)); +} + +static void int_scom(uint8_t chip, uint8_t dd) +{ + /* + * [0] = 0 + * [1] = 1 + * [5-8] ATTR_FABRIC_ADDR_EXTENSION_GROUP_ID + * [9-11] ATTR_FABRIC_ADDR_EXTENSION_CHIP_ID + */ + rscom_and_or(chip, 0x501300A, ~(PPC_BITMASK(0, 1) | PPC_BITMASK(5, 11)), PPC_BIT(1)); + + rscom_or(chip, 0x5013021, + PPC_BIT(46) | // INT_CQ_PBO_CTL_DISABLE_VG_NOT_SYS_ON + PPC_BIT(47) | // INT_CQ_PBO_CTL_DISABLE_G_ON + PPC_BIT(49)); + + if (dd <= 0x20) + write_rscom(chip, 0x5013033, 0x2000005C040281C3); + else + write_rscom(chip, 0x5013033, 0x0000005C040081C3); + + write_rscom(chip, 0x5013036, 0); + write_rscom(chip, 0x5013037, 0x9554021F80110E0C); + + rscom_and_or(chip, 0x5013130, + ~(PPC_BITMASK(2, 7) | PPC_BITMASK(10, 15)), + PPC_PLACE(0x18, 2, 6) | PPC_PLACE(0x18, 10, 6)); + + write_rscom(chip, 0x5013140, 0x050043EF00100020); + write_rscom(chip, 0x5013141, 0xFADFBB8CFFAFFFD7); + write_rscom(chip, 0x5013178, 0x0002000610000000); + + rscom_and_or(chip, 0x501320E, ~PPC_BITMASK(0, 47), PPC_PLACE(0x626222024216, 0, 48)); + rscom_and_or(chip, 0x5013214, ~PPC_BITMASK(16, 31), PPC_PLACE(0x5BBF, 16, 16)); + rscom_and_or(chip, 0x501322B, ~PPC_BITMASK(58, 63), PPC_PLACE(0x18, 58, 6)); + + if (dd == 0x20) { + rscom_and_or(chip, 0x5013272, + ~PPC_BITMASK(0, 43), PPC_PLACE(0x0002C018006, 0, 44)); + rscom_and_or(chip, 0x5013273, + ~PPC_BITMASK(0, 43), PPC_PLACE(0xFFFCFFEFFFA, 0, 44)); + } +} + +static void vas_scom(uint8_t chip, uint8_t dd) +{ + uint64_t data; + + rscom_and_or(chip, 0x3011803, ~PPC_BITMASK(0, 53), 0x00210102540D7C00); + rscom_and(chip, 0x3011806, ~PPC_BITMASK(0, 53)); + + data = read_rscom(chip, 0x3011807); + data &= ~PPC_BITMASK(0, 53); + data |= (dd == 0x20 ? 0x00DD020180000000 : 0x00DF020180000000); + write_rscom(chip, 0x3011807, data); + + /* + * [0-3] ATTR_FABRIC_ADDR_EXTENSION_GROUP_ID + * [4-6] ATTR_FABRIC_ADDR_EXTENSION_CHIP_ID + */ + rscom_and(chip, 0x301184D, ~PPC_BITMASK(0, 6)); + + data = read_rscom(chip, 0x301184E); + data &= ~PPC_BIT(13); // SOUTH_VA_EG_SCF_ADDR_BAR_MODE_OFF + data |= PPC_BIT(14); // SOUTH_VA_EG_SCF_SKIP_G_ON + data |= PPC_BIT(1); // SOUTH_VA_EG_SCF_DISABLE_G_WR_ON + data |= PPC_BIT(5); // SOUTH_VA_EG_SCF_DISABLE_G_RD_ON + data |= PPC_BIT(2); // SOUTH_VA_EG_SCF_DISABLE_VG_WR_ON + data |= PPC_BIT(6); // SOUTH_VA_EG_SCF_DISABLE_VG_RD_ON + PPC_INSERT(data, 0xFC, 20, 8); + PPC_INSERT(data, 0xFC, 28, 8); + write_rscom(chip, 0x301184E, data); + + if (dd == 0x20) + rscom_or(chip, 0x301184F, PPC_BIT(0)); +} + +static void chiplet_scominit(uint8_t chip, uint8_t dd) +{ + enum { + PU_PB_CENT_SM0_PB_CENT_FIR_REG = 0x05011C00, + PU_PB_CENT_SM0_PB_CENT_FIR_MASK_REG_SPARE_13 = 13, + + PU_PB_IOE_FIR_MASK_REG_OR = 0x05013405, + PU_PB_CENT_SM1_EXTFIR_MASK_REG_OR = 0x05011C33, + + FBC_IOE_TL_FIR_MASK_X0_NF = 0x00C00C0C00000880, + FBC_IOE_TL_FIR_MASK_X2_NF = 0x000300C0C0000220, + FBC_EXT_FIR_MASK_X0_NF = 0x8000000000000000, + FBC_EXT_FIR_MASK_X1_NF = 0x4000000000000000, + FBC_EXT_FIR_MASK_X2_NF = 0x2000000000000000, + + PU_NMMU_MM_EPSILON_COUNTER_VALUE = 0x5012C1D, + }; + + const struct powerbus_cfg *pb_cfg = powerbus_cfg(chip); + + int mcs_i; + + for (mcs_i = 0; mcs_i < MCS_PER_PROC; mcs_i++) + mcs_scom(chip, mcs_to_nest[mcs_ids[mcs_i]]); + + /* + * Read spare FBC FIR bit -- if set, SBE has configured XBUS FIR resources for all + * present units, and code here will be run to mask resources associated with + * non-functional units. + */ + if (read_rscom(chip, PU_PB_CENT_SM0_PB_CENT_FIR_REG) & + PPC_BIT(PU_PB_CENT_SM0_PB_CENT_FIR_MASK_REG_SPARE_13)) { + /* Masking XBUS FIR resources for unused links */ + + /* XBUS0 FBC TL */ + write_rscom(chip, PU_PB_IOE_FIR_MASK_REG_OR, FBC_IOE_TL_FIR_MASK_X0_NF); + /* XBUS0 EXTFIR */ + write_rscom(chip, PU_PB_CENT_SM1_EXTFIR_MASK_REG_OR, FBC_EXT_FIR_MASK_X0_NF); + + /* XBUS2 FBC TL */ + write_rscom(chip, PU_PB_IOE_FIR_MASK_REG_OR, FBC_IOE_TL_FIR_MASK_X2_NF); + /* XBUS2 EXTFIR */ + write_rscom(chip, PU_PB_CENT_SM1_EXTFIR_MASK_REG_OR, FBC_EXT_FIR_MASK_X2_NF); + } + + fbc_ioo_tl_scom(chip); + nx_scom(chip, dd); + cxa_scom(chip, dd); // CAPP + int_scom(chip, dd); + vas_scom(chip, dd); + + /* Setup NMMU epsilon write cycles */ + rscom_and_or(chip, PU_NMMU_MM_EPSILON_COUNTER_VALUE, + ~(PPC_BITMASK(0, 11) | PPC_BITMASK(16, 27)), + PPC_PLACE(pb_cfg->eps_w[0], 0, 12) | PPC_PLACE(pb_cfg->eps_w[1], 16, 12)); +} + +static void psi_scom(uint8_t chip) +{ + rscom_or(chip, 0x4011803, PPC_BITMASK(0, 6)); + rscom_and(chip, 0x4011806, ~PPC_BITMASK(0, 6)); + rscom_and(chip, 0x4011807, ~PPC_BITMASK(0, 6)); + + rscom_and_or(chip, 0x5012903, ~PPC_BITMASK(0, 28), PPC_PLACE(0x7E040DF, 0, 29)); + rscom_and_or(chip, 0x5012906, ~PPC_BITMASK(0, 28), PPC_PLACE(0x0, 0, 29)); + rscom_and_or(chip, 0x5012907, ~PPC_BITMASK(0, 28), PPC_PLACE(0x18050020, 0, 29)); + + rscom_and(chip, 0x501290F, ~(PPC_BITMASK(16, 27) | PPC_BITMASK(48, 52))); +} + +void istep_10_6(uint8_t chips) +{ + uint8_t dd = get_dd(); // XXX: this should probably be chip-specific + + printk(BIOS_EMERG, "starting istep 10.6\n"); + report_istep(10, 6); + + for (uint8_t chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) { + chiplet_scominit(chip, dd); + psi_scom(chip); + } + } + + printk(BIOS_EMERG, "ending istep 10.6\n"); +} diff --git a/src/soc/ibm/power9/romstage.c b/src/soc/ibm/power9/romstage.c index 9ecb4388d63..a36d0bf04a6 100644 --- a/src/soc/ibm/power9/romstage.c +++ b/src/soc/ibm/power9/romstage.c @@ -397,6 +397,7 @@ void main(void) istep_9_7(chips); istep_10_1(chips); + istep_10_6(chips); istep_10_10(chips, pci_info); istep_10_12(chips); istep_10_13(chips); From 60b1cf0f9edbab9e5a90452b044d7b8b303a51c5 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Sun, 20 Feb 2022 15:38:29 +0200 Subject: [PATCH 173/213] soc/power9/chip.c: update DT for two CPUs Change-Id: Ib1c8831c792ccce61925408ea19fddca682b476e Signed-off-by: Sergii Dmytruk --- src/soc/ibm/power9/chip.c | 60 +++++++++++++++++++++++++++------------ 1 file changed, 42 insertions(+), 18 deletions(-) diff --git a/src/soc/ibm/power9/chip.c b/src/soc/ibm/power9/chip.c index e2cd1f4c12a..d0c7b118a69 100644 --- a/src/soc/ibm/power9/chip.c +++ b/src/soc/ibm/power9/chip.c @@ -126,7 +126,7 @@ static void fill_l2_node(struct device_tree *tree, } static void fill_cpu_node(struct device_tree *tree, - struct device_tree_node *node, uint32_t pir, + struct device_tree_node *node, uint8_t chip, uint32_t pir, uint32_t next_lvl_phandle) { /* Mandatory/standard properties */ @@ -152,8 +152,8 @@ static void fill_cpu_node(struct device_tree *tree, dt_add_u32_prop(node, "reg", pir); dt_add_u32_prop(node, "ibm,pir", pir); - /* Chip ID of this core */ - dt_add_u32_prop(node, "ibm,chip-id", 0); /* FIXME for second CPU */ + /* Chip ID of this core, not sure why it's shifted (group id + chip id?) */ + dt_add_u32_prop(node, "ibm,chip-id", chip << 3); /* * Interrupt server numbers (aka HW processor numbers) of all threads @@ -207,15 +207,14 @@ static void fill_cpu_node(struct device_tree *tree, * Old-style core clock frequency. Only create this property if the * frequency fits in a 32-bit number. Do not create it if it doesn't. */ - /* TODO: update these 3 uses of nominal_freq to be chip-specific */ - if ((nominal_freq[0] >> 32) == 0) - dt_add_u32_prop(node, "clock-frequency", nominal_freq[0]); + if ((nominal_freq[chip] >> 32) == 0) + dt_add_u32_prop(node, "clock-frequency", nominal_freq[chip]); /* * Mandatory: 64-bit version of the core clock frequency, always create * this property. */ - dt_add_u64_prop(node, "ibm,extended-clock-frequency", nominal_freq[0]); + dt_add_u64_prop(node, "ibm,extended-clock-frequency", nominal_freq[chip]); /* Timebase freq has a fixed value, always use that */ dt_add_u32_prop(node, "timebase-frequency", 512 * MHz); @@ -269,23 +268,44 @@ static inline unsigned long size_k(uint64_t reg) */ static int dt_platform_update(struct device_tree *tree) { + uint8_t chips = fsi_get_present_chips(); + struct device_tree_node *cpus, *xscom; - uint64_t cores = read_scom(0x0006C090); - assert(cores != 0); /* Find xscom node, halt if not found */ /* TODO: is the address always the same? */ xscom = dt_find_node_by_path(tree, "/xscom@603fc00000000", NULL, NULL, 0); if (xscom == NULL) - die("No 'xscom' node in device tree!\n"); + die("No 'xscom' node for chip#0 in device tree!\n"); + + /* Check for xscom node of the second CPU (assuming group pump mode) */ + xscom = dt_find_node_by_path(tree, "/xscom@623fc00000000", NULL, NULL, 0); + if (chips & 0x2) { + if (xscom == NULL) + die("No 'xscom' node for chip#1 in device tree!\n"); + } else { + if (xscom != NULL) + die("Found 'xscom' node for missing chip#1 in device tree!\n"); + } /* Find "cpus" node */ cpus = dt_find_node_by_path(tree, "/cpus", NULL, NULL, 0); if (cpus == NULL) die("No 'cpus' node in device tree!\n"); - for (int core_id = 0; core_id <= 24; core_id++) { - if (IS_EC_FUNCTIONAL(core_id, cores)) { + for (uint8_t chip = 0; chip < MAX_CHIPS; chip++) { + uint64_t cores; + + if (!(chips & (1 << chip))) + continue; + + cores = read_rscom(chip, 0x0006C090); + assert(cores != 0); + + for (int core_id = 0; core_id < MAX_CORES_PER_CHIP; core_id++) { + if (!IS_EC_FUNCTIONAL(core_id, cores)) + continue; + /* * Not sure who is the original author of this comment, it is * duplicated in Hostboot and Skiboot, and now also here. It @@ -293,12 +313,12 @@ static int dt_platform_update(struct device_tree *tree) * of thread 0 of _first_ core in pair, both for L2 and L3. */ /* - * Cache nodes. Those are siblings of the processor nodes under /cpus and - * represent the various level of caches. + * Cache nodes. Those are siblings of the processor nodes under /cpus + * and represent the various level of caches. * * The unit address (and reg property) is mostly free-for-all as long as - * there is no collisions. On HDAT machines we use the following encoding - * which I encourage you to also follow to limit surprises: + * there is no collisions. On HDAT machines we use the following + * encoding which I encourage you to also follow to limit surprises: * * L2 : (0x20 << 24) | PIR (PIR is PIR value of thread 0 of core) * L3 : (0x30 << 24) | PIR @@ -308,7 +328,11 @@ static int dt_platform_update(struct device_tree *tree) * own "l2-cache" (or "next-level-cache") property, so the core node * points to the L2, the L2 points to the L3 etc... */ - uint32_t pir = core_id * 4; + /* + * This is for ATTR_PROC_FABRIC_PUMP_MODE == PUMP_MODE_CHIP_IS_GROUP, + * when chip ID is actually a group ID and "chip ID" field is zero. + */ + uint32_t pir = PPC_PLACE(chip, 49, 4) | PPC_PLACE(core_id, 57, 5); uint32_t l2_pir = (0x20 << 24) | (pir & ~7); uint32_t l3_pir = (0x30 << 24) | (pir & ~7); /* "/cpus/l?-cache@12345678" -> 23 characters + terminator */ @@ -341,7 +365,7 @@ static int dt_platform_update(struct device_tree *tree) fill_l2_node(tree, l2_node, l2_pir, l3_node->phandle); } - fill_cpu_node(tree, cpu_node, pir, l2_node->phandle); + fill_cpu_node(tree, cpu_node, chip, pir, l2_node->phandle); } } From 4bae1596a258f94f3ea5e80becbbd10cd106a09d Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Sun, 20 Feb 2022 15:40:51 +0200 Subject: [PATCH 174/213] soc/power9/chip.c: activate slave cores on two CPUs Main thing to note is that we're not skipping the first thread of the first core on the slave CPU, because it wasn't yet started. Change-Id: I51915169640631ab9406202ea3112044aec07dd7 Signed-off-by: Sergii Dmytruk --- src/soc/ibm/power9/chip.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/src/soc/ibm/power9/chip.c b/src/soc/ibm/power9/chip.c index d0c7b118a69..3ba4c6aa4dd 100644 --- a/src/soc/ibm/power9/chip.c +++ b/src/soc/ibm/power9/chip.c @@ -481,14 +481,20 @@ static void enable_soc_dev(struct device *dev) istep_18_12(chips, tod_mdmt); } -static void activate_slave_cores(void) +static void activate_slave_cores(uint8_t chip) { enum { DOORBELL_MSG_TYPE = 0x0000000028000000 }; uint8_t i; /* Read OCC CCSR written by the code earlier */ - const uint64_t functional_cores = read_scom(0x0006C090); + const uint64_t functional_cores = read_rscom(chip, 0x0006C090); + + /* + * This is for ATTR_PROC_FABRIC_PUMP_MODE == PUMP_MODE_CHIP_IS_GROUP, + * when chip ID is actually a group ID and "chip ID" field is zero. + */ + const uint64_t chip_msg = DOORBELL_MSG_TYPE | PPC_PLACE(chip, 49, 4); /* Find and process the first core in a separate loop to slightly * simplify processing of all the other cores by removing a conditional */ @@ -500,10 +506,10 @@ static void activate_slave_cores(void) continue; /* Message value for thread 0 of the current core */ - core_msg = DOORBELL_MSG_TYPE | (i << 2); + core_msg = chip_msg | (i << 2); /* Skip sending doorbell to the current thread of the current core */ - for (thread = 1; thread < 4; ++thread) { + for (thread = (chip == 0 ? 1 : 0); thread < 4; ++thread) { register uint64_t msg = core_msg | thread; asm volatile("msgsnd %0" :: "r" (msg)); } @@ -519,7 +525,7 @@ static void activate_slave_cores(void) continue; /* Message value for thread 0 of the i-th core */ - core_msg = DOORBELL_MSG_TYPE | (i << 2); + core_msg = chip_msg | (i << 2); for (thread = 0; thread < 4; ++thread) { register uint64_t msg = core_msg | thread; @@ -552,6 +558,8 @@ static void *load_fdt(const char *dtb_file) void platform_prog_run(struct prog *prog) { + uint8_t chips = fsi_get_present_chips(); + void *fdt; fdt = load_fdt("1-cpu.dtb"); @@ -581,7 +589,10 @@ void platform_prog_run(struct prog *prog) * activated threads start execution before current thread jumps into * the payload. */ - activate_slave_cores(); + for (uint8_t chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) + activate_slave_cores(chip); + } } struct chip_operations soc_ibm_power9_ops = { From b35e904d3fe5987bfe0d60853bc1dac4f69b0066 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Mon, 21 Mar 2022 21:30:30 +0200 Subject: [PATCH 175/213] soc/power9/chip.c: validate PCIe nodes of DT Put existing xscom nodes validation and the new code into a separate function. Change-Id: Ie04da44b1beb6010afe8cda527340e5d234cc4e1 Signed-off-by: Sergii Dmytruk --- src/soc/ibm/power9/chip.c | 70 +++++++++++++++++++++++++++++++++------ 1 file changed, 60 insertions(+), 10 deletions(-) diff --git a/src/soc/ibm/power9/chip.c b/src/soc/ibm/power9/chip.c index 3ba4c6aa4dd..99876c3d1cb 100644 --- a/src/soc/ibm/power9/chip.c +++ b/src/soc/ibm/power9/chip.c @@ -15,6 +15,13 @@ #include "istep_13_scom.h" #include "chip.h" #include "fsi.h" +#include "pci.h" + +/* + * Chip ID, not sure why it's shifted (group id + chip id?). + * Might be specific to group pump mode. + */ +#define CHIP_ID(chip) ((chip) << 3) static uint64_t nominal_freq[MAX_CHIPS]; @@ -152,8 +159,8 @@ static void fill_cpu_node(struct device_tree *tree, dt_add_u32_prop(node, "reg", pir); dt_add_u32_prop(node, "ibm,pir", pir); - /* Chip ID of this core, not sure why it's shifted (group id + chip id?) */ - dt_add_u32_prop(node, "ibm,chip-id", chip << 3); + /* Chip ID of this core */ + dt_add_u32_prop(node, "ibm,chip-id", CHIP_ID(chip)); /* * Interrupt server numbers (aka HW processor numbers) of all threads @@ -261,16 +268,32 @@ static inline unsigned long size_k(uint64_t reg) return CONVERT_4GB_TO_KB(((reg & SIZE_MASK) >> SIZE_SHIFT) + 1); } -/* - * Device tree passed to Skiboot has to have phandles set either for all nodes - * or none at all. Because relative phandles are set for cpu->l2_cache->l3_cache - * chain, only first option is possible. - */ -static int dt_platform_update(struct device_tree *tree) +/* Finds first root complex for a given chip that's present in DT else returns NULL */ +static bool dt_contains_pcie(struct device_tree *tree, uint8_t chip_id) { - uint8_t chips = fsi_get_present_chips(); + int phb; + + /* See comment before pec0_lane_cfg global variable in istep_10_10.c */ + for (phb = 0; phb < MAX_PHB_PER_PROC; phb++) { + struct device_tree_node *node; - struct device_tree_node *cpus, *xscom; + char path[40]; + snprintf(path, sizeof(path), "/ibm,pcie-slots/root-complex@%d,%d", chip_id, + phb); + + node = dt_find_node_by_path(tree, path, NULL, NULL, 0); + if (node != NULL) + return true; + } + + return false; +} + +/* Checks input device tree for sanity and dies on failure */ +static void validate_dt(struct device_tree *tree, uint8_t chips) +{ + struct device_tree_node *xscom; + bool found_pcie; /* Find xscom node, halt if not found */ /* TODO: is the address always the same? */ @@ -288,6 +311,33 @@ static int dt_platform_update(struct device_tree *tree) die("Found 'xscom' node for missing chip#1 in device tree!\n"); } + if (!dt_contains_pcie(tree, /*chip_id=*/0)) + die("No 'root-complex' nodes for chip#0 in device tree!\n"); + + /* Check for xscom node of the second CPU (assuming group pump mode) */ + found_pcie = dt_contains_pcie(tree, /*chip_id=*/CHIP_ID(1)); + if (chips & 0x2) { + if (!found_pcie) + die("No 'root-complex' node for chip#1 in device tree!\n"); + } else { + if (found_pcie) + die("Found 'root-complex' node for missing chip#1 in device tree!\n"); + } +} + +/* + * Device tree passed to Skiboot has to have phandles set either for all nodes + * or none at all. Because relative phandles are set for cpu->l2_cache->l3_cache + * chain, only first option is possible. + */ +static int dt_platform_update(struct device_tree *tree) +{ + uint8_t chips = fsi_get_present_chips(); + + struct device_tree_node *cpus; + + validate_dt(tree, chips); + /* Find "cpus" node */ cpus = dt_find_node_by_path(tree, "/cpus", NULL, NULL, 0); if (cpus == NULL) From 10d74b1fa7c076c9308beeee1c2156977d8b8786 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Wed, 23 Mar 2022 16:16:24 +0200 Subject: [PATCH 176/213] soc/power9/: pass mem_data from romstage to ramstage It's needed as a source of information for building device tree. Change-Id: I306d384186c4677452d531d490cde495b1129204 Signed-off-by: Sergii Dmytruk --- src/include/cpu/power/proc.h | 1 + src/soc/ibm/power9/chip.c | 41 +++++++++++++++++++++++++++++++++++ src/soc/ibm/power9/romstage.c | 37 +++++++++++++++++++++++++++++++ 3 files changed, 79 insertions(+) diff --git a/src/include/cpu/power/proc.h b/src/include/cpu/power/proc.h index ccf504e0e75..1af8ee48ebb 100644 --- a/src/include/cpu/power/proc.h +++ b/src/include/cpu/power/proc.h @@ -21,6 +21,7 @@ _Static_assert(CONFIG_MAX_CPUS <= MAX_CHIPS, "Too many CPUs requested"); #define MCA_PER_PROC (MCA_PER_MCS * MCS_PER_PROC) #define DIMMS_PER_MCA 2 #define DIMMS_PER_MCS (DIMMS_PER_MCA * MCA_PER_MCS) +#define DIMMS_PER_PROC (DIMMS_PER_MCS * MCS_PER_PROC) #define I2C_BUSES_PER_CPU 4 #define SPD_I2C_BUS 3 diff --git a/src/soc/ibm/power9/chip.c b/src/soc/ibm/power9/chip.c index 99876c3d1cb..835f459e06f 100644 --- a/src/soc/ibm/power9/chip.c +++ b/src/soc/ibm/power9/chip.c @@ -1,6 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0-only */ #include +#include +#include #include #include #include @@ -23,6 +25,9 @@ */ #define CHIP_ID(chip) ((chip) << 3) +/* Copy of data put together by the romstage */ +mcbist_data_t mem_data[MAX_CHIPS]; + static uint64_t nominal_freq[MAX_CHIPS]; /* @@ -649,3 +654,39 @@ struct chip_operations soc_ibm_power9_ops = { CHIP_NAME("POWER9") .enable_dev = enable_soc_dev, }; + +/* Restores global mem_data variable from cbmem */ +static void restore_mem_data(int is_recovery) +{ + const struct cbmem_entry *entry; + uint8_t *data; + int dimm_i; + + (void)is_recovery; /* unused */ + + entry = cbmem_entry_find(CBMEM_ID_MEMINFO); + if (entry == NULL) + die("Failed to find mem_data entry in CBMEM in ramstage!"); + + /* Layout: mem_data itself then SPD data of each dimm which has it */ + data = cbmem_entry_start(entry); + + memcpy(&mem_data, data, sizeof(mem_data)); + data += sizeof(mem_data); + + for (dimm_i = 0; dimm_i < MAX_CHIPS * DIMMS_PER_PROC; dimm_i++) { + int chip = dimm_i / DIMMS_PER_PROC; + int mcs = (dimm_i % DIMMS_PER_PROC) / DIMMS_PER_MCS; + int mca = (dimm_i % DIMMS_PER_MCS) / DIMMS_PER_MCA; + int dimm = dimm_i % DIMMS_PER_MCA; + + rdimm_data_t *dimm_data = &mem_data[chip].mcs[mcs].mca[mca].dimm[dimm]; + if (dimm_data->spd == NULL) + continue; + + /* We're not deleting the entry so this is valid */ + dimm_data->spd = data; + data += CONFIG_DIMM_SPD_SIZE; + } +} +CBMEM_READY_HOOK(restore_mem_data); diff --git a/src/soc/ibm/power9/romstage.c b/src/soc/ibm/power9/romstage.c index a36d0bf04a6..bc61c05ec0e 100644 --- a/src/soc/ibm/power9/romstage.c +++ b/src/soc/ibm/power9/romstage.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include "fsi.h" @@ -442,3 +443,39 @@ void main(void) cbmem_initialize_empty(); run_ramstage(); } + +/* Stores global mem_data variable into cbmem for future use by ramstage */ +static void store_mem_data(int is_recovery) +{ + const struct cbmem_entry *entry; + uint8_t *data; + int dimm_i; + + (void)is_recovery; /* unused */ + + /* Layout: mem_data itself then SPD data of each dimm which has it */ + entry = cbmem_entry_add(CBMEM_ID_MEMINFO, sizeof(mem_data) + + MAX_CHIPS * DIMMS_PER_PROC * CONFIG_DIMM_SPD_SIZE); + if (entry == NULL) + die("Failed to add mem_data entry to CBMEM in romstage!"); + + data = cbmem_entry_start(entry); + + memcpy(data, &mem_data, sizeof(mem_data)); + data += sizeof(mem_data); + + for (dimm_i = 0; dimm_i < MAX_CHIPS * DIMMS_PER_PROC; dimm_i++) { + int chip = dimm_i / DIMMS_PER_PROC; + int mcs = (dimm_i % DIMMS_PER_PROC) / DIMMS_PER_MCS; + int mca = (dimm_i % DIMMS_PER_MCS) / DIMMS_PER_MCA; + int dimm = dimm_i % DIMMS_PER_MCA; + + rdimm_data_t *dimm_data = &mem_data[chip].mcs[mcs].mca[mca].dimm[dimm]; + if (dimm_data->spd == NULL) + continue; + + memcpy(data, dimm_data->spd, CONFIG_DIMM_SPD_SIZE); + data += CONFIG_DIMM_SPD_SIZE; + } +} +CBMEM_CREATION_HOOK(store_mem_data); From b7ab59a53ba1212e3168969728d7753dedb5cdb0 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Wed, 23 Mar 2022 16:19:30 +0200 Subject: [PATCH 177/213] soc/power9/chip.c: generate DIMM sensor nodes for DT Two nodes per installed DIMM: state (presence) and temperature sensors. Change-Id: I297c4820679491699fcdfea173d6fbd01eddbe75 Signed-off-by: Sergii Dmytruk --- src/mainboard/raptor-cs/talos-2/1-cpu.dts | 48 ----------------------- src/soc/ibm/power9/chip.c | 45 +++++++++++++++++++++ 2 files changed, 45 insertions(+), 48 deletions(-) diff --git a/src/mainboard/raptor-cs/talos-2/1-cpu.dts b/src/mainboard/raptor-cs/talos-2/1-cpu.dts index 7b8124ae925..9a749e4eb36 100644 --- a/src/mainboard/raptor-cs/talos-2/1-cpu.dts +++ b/src/mainboard/raptor-cs/talos-2/1-cpu.dts @@ -63,54 +63,6 @@ ipmi-sensor-type = <0xc1>; }; - sensor@b { - compatible = "ibm,ipmi-sensor"; - reg = <0x0b>; - ipmi-sensor-type = <0x0c>; - }; - - sensor@d { - compatible = "ibm,ipmi-sensor"; - reg = <0x0d>; - ipmi-sensor-type = <0x0c>; - }; - - sensor@f { - compatible = "ibm,ipmi-sensor"; - reg = <0x0f>; - ipmi-sensor-type = <0x0c>; - }; - - sensor@11 { - compatible = "ibm,ipmi-sensor"; - reg = <0x11>; - ipmi-sensor-type = <0x0c>; - }; - - sensor@1b { - compatible = "ibm,ipmi-sensor"; - reg = <0x1b>; - ipmi-sensor-type = <0x01>; - }; - - sensor@1d { - compatible = "ibm,ipmi-sensor"; - reg = <0x1d>; - ipmi-sensor-type = <0x01>; - }; - - sensor@1f { - compatible = "ibm,ipmi-sensor"; - reg = <0x1f>; - ipmi-sensor-type = <0x01>; - }; - - sensor@21 { - compatible = "ibm,ipmi-sensor"; - reg = <0x21>; - ipmi-sensor-type = <0x01>; - }; - sensor@8b { compatible = "ibm,ipmi-sensor"; reg = <0x8b>; diff --git a/src/soc/ibm/power9/chip.c b/src/soc/ibm/power9/chip.c index 835f459e06f..cf2637f9fd8 100644 --- a/src/soc/ibm/power9/chip.c +++ b/src/soc/ibm/power9/chip.c @@ -330,6 +330,49 @@ static void validate_dt(struct device_tree *tree, uint8_t chips) } } +/* Mind that this function creates nodes without chip ID, but some types of sensors need it */ +static void add_sensor_node(struct device_tree *tree, uint8_t number, uint8_t ipmi_type) +{ + char path[32]; + struct device_tree_node *node; + + snprintf(path, sizeof(path), "/bmc/sensors/sensor@%x", number); + node = dt_find_node_by_path(tree, path, NULL, NULL, 1); + + dt_add_string_prop(node, "compatible", "ibm,ipmi-sensor"); + dt_add_u32_prop(node, "reg", number); + dt_add_u32_prop(node, "ipmi-sensor-type", ipmi_type); +} + +static void add_dimm_sensor_nodes(struct device_tree *tree, uint8_t chips) +{ + enum { + /* Base numbers for sensor ids */ + DIMM_STATE_BASE = 0x0B, + DIMM_TEMP_BASE = 0x1B, + + /* IPMI sensor types */ + STATE_IPMI_SENSOR = 0x0C, + TEMP_IPMI_SENSOR = 0x01, + }; + + int dimm_i; + for (dimm_i = 0; dimm_i < MAX_CHIPS * DIMMS_PER_PROC; dimm_i++) { + int chip = dimm_i / DIMMS_PER_PROC; + int mcs = (dimm_i % DIMMS_PER_PROC) / DIMMS_PER_MCS; + int mca = (dimm_i % DIMMS_PER_MCS) / DIMMS_PER_MCA; + int dimm = dimm_i % DIMMS_PER_MCA; + + if (!(chips & (1 << chip))) + continue; + if (!mem_data[chip].mcs[mcs].mca[mca].dimm[dimm].present) + continue; + + add_sensor_node(tree, DIMM_STATE_BASE + dimm_i, STATE_IPMI_SENSOR); + add_sensor_node(tree, DIMM_TEMP_BASE + dimm_i, TEMP_IPMI_SENSOR); + } +} + /* * Device tree passed to Skiboot has to have phandles set either for all nodes * or none at all. Because relative phandles are set for cpu->l2_cache->l3_cache @@ -343,6 +386,8 @@ static int dt_platform_update(struct device_tree *tree) validate_dt(tree, chips); + add_dimm_sensor_nodes(tree, chips); + /* Find "cpus" node */ cpus = dt_find_node_by_path(tree, "/cpus", NULL, NULL, 0); if (cpus == NULL) From 8a03a5fd8545d4b93e0ba7fd5dfa16d3ca9f3e9d Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Wed, 23 Feb 2022 01:19:57 +0200 Subject: [PATCH 178/213] mb/raptor-cs/talos-2/2-cpus.dts: add Change-Id: Id1d1bf193c507d11cefbd05461a5fbef958276bd Signed-off-by: Sergii Dmytruk --- src/mainboard/raptor-cs/talos-2/2-cpus.dts | 372 +++++++++++++++++++++ src/soc/ibm/power9/Makefile.inc | 7 + 2 files changed, 379 insertions(+) create mode 100644 src/mainboard/raptor-cs/talos-2/2-cpus.dts diff --git a/src/mainboard/raptor-cs/talos-2/2-cpus.dts b/src/mainboard/raptor-cs/talos-2/2-cpus.dts new file mode 100644 index 00000000000..d22660f23f3 --- /dev/null +++ b/src/mainboard/raptor-cs/talos-2/2-cpus.dts @@ -0,0 +1,372 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +/* This file adds nodes specific to systems with 2 CPUs */ + +/dts-v1/; + +/include/ "1-cpu.dts" + +/ { + bmc { + sensors { + sensor@4 { + compatible = "ibm,ipmi-sensor"; + ibm,chip-id = <0x08>; + ipmi-sensor-type = <0x07>; + reg = <0x04>; + }; + + sensor@7 { + compatible = "ibm,ipmi-sensor"; + ibm,chip-id = <0x08>; + ipmi-sensor-type = <0x01>; + reg = <0x07>; + }; + + sensor@9 { + compatible = "ibm,ipmi-sensor"; + ibm,chip-id = <0x08>; + ipmi-sensor-type = <0x07>; + reg = <0x09>; + }; + }; + }; + + ibm,pcie-slots { + root-complex@8,0 { + #address-cells = <0x02>; + #size-cells = <0x00>; + compatible = "ibm,pcie-port", "ibm,pcie-root-port"; + ibm,chip-id = <0x08>; + ibm,slot-label = "SLOT2"; + reg = <0x08 0x00>; + + pluggable { + ibm,slot-label = "SLOT2"; + lane-mask = <0xffff>; + max-power = <0x00>; + mrw-slot-id = <0x02>; + }; + }; + + root-complex@8,3 { + #address-cells = <0x02>; + #size-cells = <0x00>; + compatible = "ibm,pcie-port", "ibm,pcie-root-port"; + ibm,chip-id = <0x08>; + ibm,slot-label = "SLOT1"; + reg = <0x08 0x03>; + + pluggable { + ibm,slot-label = "SLOT1"; + lane-mask = <0xff00>; + max-power = <0x00>; + mrw-slot-id = <0x01>; + }; + }; + + root-complex@8,4 { + #address-cells = <0x02>; + #size-cells = <0x00>; + compatible = "ibm,pcie-port", "ibm,pcie-root-port"; + ibm,chip-id = <0x08>; + ibm,slot-label = "SLOT0"; + reg = <0x08 0x04>; + + pluggable { + ibm,slot-label = "SLOT0"; + lane-mask = <0xf0>; + max-power = <0x00>; + }; + }; + + root-complex@8,5 { + #address-cells = <0x02>; + #size-cells = <0x00>; + compatible = "ibm,pcie-port", "ibm,pcie-root-port"; + ibm,chip-id = <0x08>; + lane-mask = <0x0f>; + reg = <0x08 0x05>; + + switch-up@10b5,8725 { + #address-cells = <0x01>; + #size-cells = <0x00>; + ibm,pluggable; + reg = <0x10b5 0x8725>; + upstream-port = <0x01>; + + down-port@4 { + compatible = "ibm,pcie-port"; + ibm,pluggable; + ibm,slot-label = "GPU3"; + reg = <0x04>; + + builtin { + ibm,slot-label = "GPU3"; + }; + }; + + down-port@5 { + compatible = "ibm,pcie-port"; + ibm,pluggable; + ibm,slot-label = "GPU4"; + reg = <0x05>; + + builtin { + ibm,slot-label = "GPU4"; + }; + }; + + down-port@d { + compatible = "ibm,pcie-port"; + ibm,pluggable; + ibm,slot-label = "GPU5"; + reg = <0x0d>; + + builtin { + ibm,slot-label = "GPU5"; + }; + }; + }; + }; + }; + + xscom@623fc00000000 { + #address-cells = <0x01>; + #size-cells = <0x01>; + bus-frequency = <0x00 0x1bce39a0>; + compatible = "ibm,xscom\0ibm,power9-xscom"; + ibm,chip-id = <0x08>; + ibm,proc-chip-id = <0x01>; + reg = <0x623fc 0x00 0x08 0x00>; + scom-controller; + + chiptod@40000 { + reg = <0x40000 0x34>; + compatible = "ibm,power-chiptod\0ibm,power9-chiptod"; + secondary; + }; + + i2cm@a1000 { + #address-cells = <0x01>; + #size-cells = <0x00>; + chip-engine# = <0x01>; + clock-frequency = <0x6f38e68>; + compatible = "ibm,power8-i2cm\0ibm,power9-i2cm"; + reg = <0xa1000 0x1000>; + + i2c-bus@0 { + #address-cells = <0x01>; + #size-cells = <0x00>; + bus-frequency = <0xf4240>; + compatible = "ibm,opal-i2c\0ibm,power8-i2c-port\0ibm,power9-i2c-port"; + reg = <0x00>; + + eeprom@50 { + compatible = "atmel,24c512"; + label = "module-vpd"; + link-id = <0x10000>; + reg = <0x50>; + }; + }; + + i2c-bus@2 { + #address-cells = <0x01>; + #size-cells = <0x00>; + bus-frequency = <0xf4240>; + compatible = "ibm,opal-i2c\0ibm,power8-i2c-port\0ibm,power9-i2c-port"; + reg = <0x02>; + + eeprom@50 { + compatible = "atmel,24c128"; + label = "module-vpd"; + link-id = <0x1000b>; + reg = <0x50>; + }; + }; + }; + + i2cm@a3000 { + #address-cells = <0x01>; + #size-cells = <0x00>; + chip-engine# = <0x03>; + clock-frequency = <0x6f38e68>; + compatible = "ibm,power8-i2cm\0ibm,power9-i2cm"; + reg = <0xa3000 0x1000>; + + i2c-bus@0 { + #address-cells = <0x01>; + #size-cells = <0x00>; + bus-frequency = <0x61a80>; + compatible = "ibm,opal-i2c\0ibm,power8-i2c-port\0ibm,power9-i2c-port"; + reg = <0x00>; + + eeprom@50 { + compatible = "spd"; + label = "dimm-spd"; + link-id = <0x10011>; + reg = <0x50>; + }; + + eeprom@51 { + compatible = "spd"; + label = "dimm-spd"; + link-id = <0x10009>; + reg = <0x51>; + }; + + eeprom@52 { + compatible = "spd"; + label = "dimm-spd"; + link-id = <0x10001>; + reg = <0x52>; + }; + + eeprom@53 { + compatible = "spd"; + label = "dimm-spd"; + link-id = <0x10002>; + reg = <0x53>; + }; + }; + + i2c-bus@1 { + #address-cells = <0x01>; + #size-cells = <0x00>; + bus-frequency = <0x61a80>; + compatible = "ibm,opal-i2c\0ibm,power8-i2c-port\0ibm,power9-i2c-port"; + reg = <0x01>; + + eeprom@54 { + compatible = "spd"; + label = "dimm-spd"; + link-id = <0x1000a>; + reg = <0x54>; + }; + + eeprom@55 { + compatible = "spd"; + label = "dimm-spd"; + link-id = <0x1000c>; + reg = <0x55>; + }; + + eeprom@56 { + compatible = "spd"; + label = "dimm-spd"; + link-id = <0x1000e>; + reg = <0x56>; + }; + + eeprom@57 { + compatible = "spd"; + label = "dimm-spd"; + link-id = <0x1000f>; + reg = <0x57>; + }; + }; + }; + + nmmu@5012c40 { + compatible = "ibm,power9-nest-mmu"; + reg = <0x5012c40 0x20>; + }; + + nx@2010000 { + compatible = "ibm,power9-nx"; + reg = <0x2010000 0x4000>; + }; + + pbcq@4010c00 { + #address-cells = <0x01>; + #size-cells = <0x00>; + compatible = "ibm,power9-pbcq"; + ibm,hub-id = <0x01>; + ibm,pec-index = <0x00>; + reg = <0x4010c00 0x100 0xd010800 0x200>; + + stack@0 { + compatible = "ibm,power9-phb-stack"; + ibm,lane-eq = <0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x77777777 0x77777777 0x77777777 0x77777777>; + ibm,phb-index = <0x00>; + reg = <0x00>; + status = "okay"; + }; + }; + + pbcq@4011000 { + #address-cells = <0x01>; + #size-cells = <0x00>; + compatible = "ibm,power9-pbcq"; + ibm,hub-id = <0x01>; + ibm,pec-index = <0x01>; + reg = <0x4011000 0x100 0xe010800 0x200>; + + stack@0 { + compatible = "ibm,power9-phb-stack"; + ibm,lane-eq = <0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x77777777 0x77777777 0x77777777 0x77777777>; + ibm,phb-index = <0x01>; + reg = <0x00>; + status = "okay"; + }; + + stack@1 { + compatible = "ibm,power9-phb-stack"; + ibm,lane-eq = <0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x77777777 0x77777777 0x77777777 0x77777777>; + ibm,phb-index = <0x02>; + reg = <0x01>; + status = "okay"; + }; + }; + + pbcq@4011400 { + #address-cells = <0x01>; + #size-cells = <0x00>; + compatible = "ibm,power9-pbcq"; + ibm,hub-id = <0x01>; + ibm,pec-index = <0x02>; + reg = <0x4011400 0x100 0xf010800 0x200>; + + stack@0 { + compatible = "ibm,power9-phb-stack"; + ibm,lane-eq = <0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x77777777 0x77777777 0x77777777 0x77777777>; + ibm,phb-index = <0x03>; + reg = <0x00>; + status = "okay"; + }; + + stack@1 { + compatible = "ibm,power9-phb-stack"; + ibm,lane-eq = <0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x77777777 0x77777777 0x77777777 0x77777777>; + ibm,phb-index = <0x04>; + reg = <0x01>; + status = "disabled"; + }; + + stack@2 { + compatible = "ibm,power9-phb-stack"; + ibm,lane-eq = <0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x77777777 0x77777777 0x77777777 0x77777777>; + ibm,phb-index = <0x05>; + reg = <0x02>; + status = "disabled"; + }; + }; + + psihb@5012900 { + reg = <0x5012900 0x100>; + compatible = "ibm,power9-psihb-x\0ibm,psihb-x"; + }; + + vas@3011800 { + reg = <0x3011800 0x300>; + compatible = "ibm,power9-vas-x"; + ibm,vas-id = <0x01>; + }; + + xive@5013000 { + reg = <0x5013000 0x300>; + compatible = "ibm,power9-xive-x"; + force-assign-bars; + }; + }; +}; diff --git a/src/soc/ibm/power9/Makefile.inc b/src/soc/ibm/power9/Makefile.inc index 512fe40e93b..6a544e4fd2a 100644 --- a/src/soc/ibm/power9/Makefile.inc +++ b/src/soc/ibm/power9/Makefile.inc @@ -64,12 +64,19 @@ ramstage-y += fsi.c MB_DIR = src/mainboard/$(MAINBOARDDIR) ONECPU_DTB = 1-cpu.dtb +TWOCPU_DTB = 2-cpus.dtb $(obj)/%.dtb: $(MB_DIR)/%.dts dtc -I dts -O dtb -o $@ -i $(MB_DIR) $< +$(obj)/$(TWOCPU_DTB): $(obj)/$(ONECPU_DTB) + cbfs-files-y += $(ONECPU_DTB) $(ONECPU_DTB)-file := $(obj)/$(ONECPU_DTB) $(ONECPU_DTB)-type := raw +cbfs-files-y += $(TWOCPU_DTB) +$(TWOCPU_DTB)-file := $(obj)/$(TWOCPU_DTB) +$(TWOCPU_DTB)-type := raw + endif From 1189899420876b1c488158c8e89833d66df7fd5f Mon Sep 17 00:00:00 2001 From: Igor Bagnucki Date: Tue, 8 Jun 2021 12:52:40 +0200 Subject: [PATCH 179/213] mb/raptor-cs/talos-2: allow using skiboot payload Change-Id: Id9d85981eb59cb8e69e1e044891a6b20a6f98a00 Signed-off-by: Igor Bagnucki --- payloads/external/skiboot/Kconfig | 4 +++- src/mainboard/raptor-cs/talos-2/Kconfig | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/payloads/external/skiboot/Kconfig b/payloads/external/skiboot/Kconfig index 3198358ecba..22024cd5fa1 100644 --- a/payloads/external/skiboot/Kconfig +++ b/payloads/external/skiboot/Kconfig @@ -7,13 +7,15 @@ config PAYLOAD_FILE config SKIBOOT_GIT_REPO string "Git repository of skiboot payload" - default "https://github.com/open-power/skiboot" + default "https://github.com/open-power/skiboot" if !BOARD_RAPTOR_CS_TALOS_2 + default "https://git.raptorcs.com/git/talos-skiboot" if BOARD_RAPTOR_CS_TALOS_2 help Git repository which will be used to clone skiboot. config SKIBOOT_REVISION string "Revision of skiboot payload" default "d93ddbd39b4eeac0bc11dacbdadea76df2996c13" if BOARD_EMULATION_QEMU_POWER9 + default "9858186353f2203fe477f316964e03609d12fd1d" if BOARD_RAPTOR_CS_TALOS_2 help Revision, that skiboot repository will be checked out to, before building an image. diff --git a/src/mainboard/raptor-cs/talos-2/Kconfig b/src/mainboard/raptor-cs/talos-2/Kconfig index 62148ff0cd4..0b6ace4c307 100644 --- a/src/mainboard/raptor-cs/talos-2/Kconfig +++ b/src/mainboard/raptor-cs/talos-2/Kconfig @@ -14,7 +14,7 @@ config BOARD_SPECIFIC_OPTIONS def_bool y select CPU_POWER9 select CPU_IBM_POWER9 - select BOARD_ROMSIZE_KB_512 + select BOARD_ROMSIZE_KB_1024 select ARCH_BOOTBLOCK_PPC64 select ARCH_VERSTAGE_PPC64 select ARCH_ROMSTAGE_PPC64 From a76eecdaf0a2e98dc363932a79ca479159245669 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Wed, 23 Feb 2022 01:34:28 +0200 Subject: [PATCH 180/213] soc/power9/: fully prepare and pass FDT to payload Change-Id: Id3380fb706d427ec8bb8083515e11ce93600c6f9 Signed-off-by: Sergii Dmytruk --- src/soc/ibm/power9/chip.c | 152 ++++++++++++++++++++++++++++++++++---- 1 file changed, 136 insertions(+), 16 deletions(-) diff --git a/src/soc/ibm/power9/chip.c b/src/soc/ibm/power9/chip.c index cf2637f9fd8..96ccfca0497 100644 --- a/src/soc/ibm/power9/chip.c +++ b/src/soc/ibm/power9/chip.c @@ -28,6 +28,24 @@ /* Copy of data put together by the romstage */ mcbist_data_t mem_data[MAX_CHIPS]; +#define SIZE_MASK PPC_BITMASK(13,23) +#define SIZE_SHIFT (63 - 23) +#define BASE_MASK PPC_BITMASK(24,47) +#define BASE_SHIFT (63 - 47) + +/* Values in registers are in 4GB units, ram_resource_kb() expects kilobytes. */ +#define CONVERT_4GB_TO_KB(x) ((x) << 22) + +static inline unsigned long base_k(uint64_t reg) +{ + return CONVERT_4GB_TO_KB((reg & BASE_MASK) >> BASE_SHIFT); +} + +static inline unsigned long size_k(uint64_t reg) +{ + return CONVERT_4GB_TO_KB(((reg & SIZE_MASK) >> SIZE_SHIFT) + 1); +} + static uint64_t nominal_freq[MAX_CHIPS]; /* @@ -255,22 +273,74 @@ static void fill_cpu_node(struct device_tree *tree, dt_add_u32_prop(node, "tlb-sets", 4); } -#define SIZE_MASK PPC_BITMASK(13,23) -#define SIZE_SHIFT (63 - 23) -#define BASE_MASK PPC_BITMASK(24,47) -#define BASE_SHIFT (63 - 47) +static void add_memory_node(struct device_tree *tree, uint8_t chip, uint64_t reg) +{ + struct device_tree_node *node; + /* /memory@0123456789abcdef - 24 characters + null byte */ + char path[26] = {}; -/* Values in registers are in 4GB units, ram_resource_kb() expects kilobytes. */ -#define CONVERT_4GB_TO_KB(x) ((x) << 22) + union {uint32_t u32[2]; uint64_t u64;} addr = { .u64 = base_k(reg) * KiB }; + union {uint32_t u32[2]; uint64_t u64;} size = { .u64 = size_k(reg) * KiB }; -static inline unsigned long base_k(uint64_t reg) + snprintf(path, sizeof(path), "/memory@%llx", addr.u64); + node = dt_find_node_by_path(tree, path, NULL, NULL, 1); + + dt_add_string_prop(node, "device_type", (char *)"memory"); + /* Use 2 cells each for address and size. This assumes BE. */ + dt_add_reg_prop(node, &addr.u64, &size.u64, 1, 2, 2); + + /* Don't know why the value needs to be shifted (group id + chip id?) */ + dt_add_u32_prop(node, "ibm,chip-id", chip << 3); +} + +static bool add_mem_reserve_node(const struct range_entry *r, void *arg) { - return CONVERT_4GB_TO_KB((reg & BASE_MASK) >> BASE_SHIFT); + struct device_tree *tree = arg; + + if (range_entry_tag(r) != BM_MEM_RAM) { + struct device_tree_reserve_map_entry *entry = xzalloc(sizeof(*entry)); + entry->start = range_entry_base(r); + entry->size = range_entry_size(r); + + list_insert_after(&entry->list_node, &tree->reserve_map); + } + + return true; } -static inline unsigned long size_k(uint64_t reg) +static void add_memory_nodes(struct device_tree *tree) { - return CONVERT_4GB_TO_KB(((reg & SIZE_MASK) >> SIZE_SHIFT) + 1); + uint8_t chip; + uint8_t chips = fsi_get_present_chips(); + + /* + * Not using bootmem_walk_os_mem() to be consistent with Hostboot, + * whose "memory" nodes include reserved regions. + */ + for (chip = 0; chip < MAX_CHIPS; chip++) { + int mcs_i; + + if (!(chips & (1 << chip))) + continue; + + for (mcs_i = 0; mcs_i < MCS_PER_PROC; mcs_i++) { + uint64_t reg; + chiplet_id_t nest = mcs_to_nest[mcs_ids[mcs_i]]; + + /* These registers are undocumented, see istep 14.5. */ + /* MCS_MCFGP */ + reg = read_rscom_for_chiplet(chip, nest, 0x0501080A); + if (reg & PPC_BIT(0)) + add_memory_node(tree, chip, reg); + + /* MCS_MCFGPM */ + reg = read_rscom_for_chiplet(chip, nest, 0x0501080C); + if (reg & PPC_BIT(0)) + add_memory_node(tree, chip, reg); + } + } + + bootmem_walk_os_mem(add_mem_reserve_node, tree); } /* Finds first root complex for a given chip that's present in DT else returns NULL */ @@ -373,20 +443,66 @@ static void add_dimm_sensor_nodes(struct device_tree *tree, uint8_t chips) } } +/* Add coreboot tables and CBMEM information to the device tree */ +static void add_cb_fdt_data(struct device_tree *tree) +{ + u32 addr_cells = 1, size_cells = 1; + u64 reg_addrs[2], reg_sizes[2]; + void *baseptr = NULL; + size_t size = 0; + + static const char *firmware_path[] = {"firmware", NULL}; + struct device_tree_node *firmware_node = dt_find_node(tree->root, + firmware_path, &addr_cells, &size_cells, 1); + + /* Need to add 'ranges' to the intermediate node to make 'reg' work. */ + dt_add_bin_prop(firmware_node, "ranges", NULL, 0); + + static const char *coreboot_path[] = {"coreboot", NULL}; + struct device_tree_node *coreboot_node = dt_find_node(firmware_node, + coreboot_path, &addr_cells, &size_cells, 1); + + dt_add_string_prop(coreboot_node, "compatible", "coreboot"); + + /* Fetch CB tables from cbmem */ + void *cbtable = cbmem_find(CBMEM_ID_CBTABLE); + if (!cbtable) { + printk(BIOS_WARNING, "FIT: No coreboot table found!\n"); + return; + } + + /* First 'reg' address range is the coreboot table. */ + const struct lb_header *header = cbtable; + reg_addrs[0] = (uintptr_t)header; + reg_sizes[0] = header->header_bytes + header->table_bytes; + + /* Second is the CBMEM area (which usually includes the coreboot table). */ + cbmem_get_region(&baseptr, &size); + if (!baseptr || size == 0) { + printk(BIOS_WARNING, "FIT: CBMEM pointer/size not found!\n"); + return; + } + + reg_addrs[1] = (uintptr_t)baseptr; + reg_sizes[1] = size; + + dt_add_reg_prop(coreboot_node, reg_addrs, reg_sizes, 2, addr_cells, size_cells); +} + /* * Device tree passed to Skiboot has to have phandles set either for all nodes * or none at all. Because relative phandles are set for cpu->l2_cache->l3_cache * chain, only first option is possible. */ -static int dt_platform_update(struct device_tree *tree) +static int dt_platform_update(struct device_tree *tree, uint8_t chips) { - uint8_t chips = fsi_get_present_chips(); - struct device_tree_node *cpus; validate_dt(tree, chips); + add_memory_nodes(tree); add_dimm_sensor_nodes(tree, chips); + add_cb_fdt_data(tree); /* Find "cpus" node */ cpus = dt_find_node_by_path(tree, "/cpus", NULL, NULL, 0); @@ -634,7 +750,7 @@ static void activate_slave_cores(uint8_t chip) } } -static void *load_fdt(const char *dtb_file) +static void *load_fdt(const char *dtb_file, uint8_t chips) { void *fdt; void *fdt_rom; @@ -646,7 +762,7 @@ static void *load_fdt(const char *dtb_file) tree = fdt_unflatten(fdt_rom); - dt_platform_update(tree); + dt_platform_update(tree, chips); fdt = malloc(dt_flat_size(tree)); if (fdt == NULL) @@ -661,8 +777,12 @@ void platform_prog_run(struct prog *prog) uint8_t chips = fsi_get_present_chips(); void *fdt; + const char *dtb_file; + + assert(chips == 0x01 || chips == 0x03); - fdt = load_fdt("1-cpu.dtb"); + dtb_file = (chips == 0x01 ? "1-cpu.dtb" : "2-cpus.dtb"); + fdt = load_fdt(dtb_file, chips); /* See asm/head.S in skiboot where fdt_entry starts at offset 0x10 */ prog_set_entry(prog, prog_start(prog) + 0x10, fdt); From 09263d1b56b36bb3a408a32e688528207bacfb9e Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Fri, 4 Mar 2022 02:01:38 +0200 Subject: [PATCH 181/213] soc/power9/mcbist.c: make globals chip-specific This is to allow running MCBISTs of multiple chips in parallel. Change-Id: I66301c47becd83042957872da8d8267670285188 Signed-off-by: Sergii Dmytruk --- src/soc/ibm/power9/mcbist.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/soc/ibm/power9/mcbist.c b/src/soc/ibm/power9/mcbist.c index 406f0714f70..155098dc5d7 100644 --- a/src/soc/ibm/power9/mcbist.c +++ b/src/soc/ibm/power9/mcbist.c @@ -19,8 +19,8 @@ * TODO: if we were to run both MCBISTs in parallel, we would need separate * instances of those... */ -static uint64_t mcbist_memreg_cache; -static unsigned tests; +static uint64_t mcbist_memreg_cache[MAX_CHIPS]; +static unsigned tests[MAX_CHIPS]; #define ECC_MODE 0x0008 #define DONE 0x0004 @@ -62,7 +62,7 @@ enum op_type static void commit_mcbist_memreg_cache(uint8_t chip, int mcs_i) { chiplet_id_t id = mcs_ids[mcs_i]; - int reg = (tests - 1) / MCBIST_TESTS_PER_REG; + int reg = (tests[chip] - 1) / MCBIST_TESTS_PER_REG; if (reg < 0) die("commit_mcbist_memreg_cache() called without adding tests first!\n"); @@ -71,19 +71,19 @@ static void commit_mcbist_memreg_cache(uint8_t chip, int mcs_i) die("Too many MCBIST instructions added\n"); /* MC01.MCBIST.MBA_SCOMFIR.MCBMRQ */ - write_rscom_for_chiplet(chip, id, MCBMR0Q + reg, mcbist_memreg_cache); - mcbist_memreg_cache = 0; + write_rscom_for_chiplet(chip, id, MCBMR0Q + reg, mcbist_memreg_cache[chip]); + mcbist_memreg_cache[chip] = 0; } static void add_mcbist_test(uint8_t chip, int mcs_i, uint16_t test) { - int test_i = tests % MCBIST_TESTS_PER_REG; - if (test_i == 0 && tests != 0) + int test_i = tests[chip] % MCBIST_TESTS_PER_REG; + if (test_i == 0 && tests[chip] != 0) commit_mcbist_memreg_cache(chip, mcs_i); /* This assumes cache is properly cleared. */ - mcbist_memreg_cache |= PPC_PLACE(test, test_i*16, 16); - tests++; + mcbist_memreg_cache[chip] |= PPC_PLACE(test, test_i*16, 16); + tests[chip]++; } /* @@ -148,7 +148,7 @@ void mcbist_execute(uint8_t chip, int mcs_i) { chiplet_id_t id = mcs_ids[mcs_i]; /* This is index of last instruction, not the new one. */ - int test_i = (tests - 1) % MCBIST_TESTS_PER_REG; + int test_i = (tests[chip] - 1) % MCBIST_TESTS_PER_REG; uint64_t val; /* @@ -159,7 +159,7 @@ void mcbist_execute(uint8_t chip, int mcs_i) * Another possibility would be to start MCBIST with single no-op test (goto * with DONE bit set), but this may unnecessarily make things slower. */ - if (tests == 0) + if (tests[chip] == 0) return; /* Check if in progress */ @@ -174,7 +174,7 @@ void mcbist_execute(uint8_t chip, int mcs_i) * is tells that MCBIST should stop after this test, but this is how it is * named in the documentation. */ - mcbist_memreg_cache |= PPC_BIT(13 + test_i*16); + mcbist_memreg_cache[chip] |= PPC_BIT(13 + test_i*16); commit_mcbist_memreg_cache(chip, mcs_i); /* MC01.MCBIST.MBA_SCOMFIR.MCB_CNTLQ @@ -198,7 +198,7 @@ void mcbist_execute(uint8_t chip, int mcs_i) printk(BIOS_INFO, "MCBIST started after delay\n"); } - tests = 0; + tests[chip] = 0; } /* From d0e2ab9be100d56b48dfe6fa75e6a8d170b99027 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Fri, 4 Mar 2022 02:07:13 +0200 Subject: [PATCH 182/213] soc/power9/istep_14_1.c: run memdiag in parallel That is start it on all MCs of all chips and then wait for all of them. Change-Id: Icff125bfec563c2a7da74b16b9563ff993ae9e19 Signed-off-by: Sergii Dmytruk --- src/soc/ibm/power9/istep_14_1.c | 183 +++++++++++++++----------------- 1 file changed, 88 insertions(+), 95 deletions(-) diff --git a/src/soc/ibm/power9/istep_14_1.c b/src/soc/ibm/power9/istep_14_1.c index 62e6dffa47a..f00168b31b8 100644 --- a/src/soc/ibm/power9/istep_14_1.c +++ b/src/soc/ibm/power9/istep_14_1.c @@ -433,123 +433,116 @@ static void init_mcbist(uint8_t chip, int mcs_i) PPC_BIT(MBSTRQ_CFG_NCE_HARD_SYMBOL_COUNT_ENABLE)); } -static void mss_memdiag(uint8_t chip) +static void mss_memdiag(uint8_t chips) { + uint8_t chip; int mcs_i, mca_i; - for (mcs_i = 0; mcs_i < MCS_PER_PROC; mcs_i++) { - if (!mem_data[chip].mcs[mcs_i].functional) - continue; - - /* - * FIXME: add testing for chipkill - * - * Testing touches bad DQ registers. This step also configures MC to - * deal with bad nibbles/DQs - see can_recover() in 13.11. It repeats, - * to some extent, training done in 13.12 which is TODO. Following the - * assumptions made in previous isteps, skip this for now. - */ - init_mcbist(chip, mcs_i); - - /* - * Add subtests. - * - * At the very minimum one pattern write is required, otherwise RAM will - * have random data, which most likely will throw unrecoverable errors - * because ECC is also random. - * - * Scrubbing may throw errors when address mapping is wrong even when - * maintenance pattern write can succeed for the same configuration. - */ - for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { - mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; - int dimm; - if (!mca->functional) + for (chip = 0; chip < MAX_CHIPS; chip++) { + for (mcs_i = 0; mcs_i < MCS_PER_PROC; mcs_i++) { + if (!mem_data[chip].mcs[mcs_i].functional) continue; - for (dimm = 0; dimm < DIMMS_PER_MCA; dimm++) { - if (!mca->dimm[dimm].present) + /* + * FIXME: add testing for chipkill + * + * Testing touches bad DQ registers. This step also configures MC to + * deal with bad nibbles/DQs - see can_recover() in 13.11. It repeats, + * to some extent, training done in 13.12 which is TODO. Following the + * assumptions made in previous isteps, skip this for now. + */ + init_mcbist(chip, mcs_i); + + /* + * Add subtests. + * + * At the very minimum one pattern write is required, otherwise RAM will + * have random data, which most likely will throw unrecoverable errors + * because ECC is also random. + * + * Scrubbing may throw errors when address mapping is wrong even when + * maintenance pattern write can succeed for the same configuration. + */ + for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; + int dimm; + if (!mca->functional) continue; - add_fixed_pattern_write(chip, mcs_i, mca_i*2 + dimm); - /* - * Hostboot uses separate program for scrub due to different - * pausing conditions. Having it in the same program seems to - * be working. - */ - if (!CONFIG(SKIP_INITIAL_ECC_SCRUB)) - add_scrub(chip, mcs_i, mca_i*2 + dimm); + for (dimm = 0; dimm < DIMMS_PER_MCA; dimm++) { + if (!mca->dimm[dimm].present) + continue; + + add_fixed_pattern_write(chip, mcs_i, mca_i*2 + dimm); + /* + * Hostboot uses separate program for scrub due to + * different pausing conditions. Having it in the same + * program seems to be working. + */ + if (!CONFIG(SKIP_INITIAL_ECC_SCRUB)) + add_scrub(chip, mcs_i, mca_i*2 + dimm); + } } - } - /* - * TODO: it writes whole RAM, this will take loooooong time. We can - * easily start second MCBIST while this is running. This would get more - * complicated for more patterns, but it still should be doable without - * interrupts reporting completion. - * - * Also, under right circumstances*, it should be possible to use - * broadcast mode for writing to all DIMMs simultaneously. - * - * *) Proper circumstances are: - * - every port has the same number of DIMMs (or no DIMMs at all) - * - every DIMM has the same: - * - rank configuration - * - number of row and column bits - * - width (and density, but this is implied by previous - * requirements) - * - module family (but we don't support anything but RDIMM anyway) - */ - mcbist_execute(chip, mcs_i); + /* + * TODO: it writes whole RAM, this will take loooooong time. We can + * easily start second MCBIST while this is running. This would get more + * complicated for more patterns, but it still should be doable without + * interrupts reporting completion. + * + * Also, under right circumstances*, it should be possible to use + * broadcast mode for writing to all DIMMs simultaneously. + * + * *) Proper circumstances are: + * - every port has the same number of DIMMs (or no DIMMs at all) + * - every DIMM has the same: + * - rank configuration + * - number of row and column bits + * - width (and density, but this is implied by previous + * requirements) + * - module family (but we don't support anything but RDIMM anyway) + */ + mcbist_execute(chip, mcs_i); + } } - long total_time = 0; - - for (mcs_i = 0; mcs_i < MCS_PER_PROC; mcs_i++) { - if (!mem_data[chip].mcs[mcs_i].functional) - continue; - - /* - * When there is no other activity on the bus, this should take roughly - * (total RAM size under MCS / transfer rate) * number of subtests. - * - * TODO: for the second MCS we should account for the time the first MCS - * took to finish it's tasks. If the second MCS has less RAM (counted in - * address bits used) it is possible that it finishes before the first - * one does. In that case the amount of time required for second MCS - * would be lost. Maybe we could get fancy and in wait_us() check for - * (mcbist_is_done(0) || mcbist_is_done(1)) instead? Maybe even unmask - * FIRs and set FIFO mode off inside mcbist_is_done()? - */ - long time = wait_us(1000*1000*60, (udelay(1), mcbist_is_done(chip, mcs_i))); - - /* TODO: dump error/status registers on failure */ - if (!time) - die("MCBIST%d times out (%#16.16llx)\n", mcs_i, - read_rscom_for_chiplet(chip, mcs_ids[mcs_i], MCB_CNTLSTATQ)); + for (chip = 0; chip < MAX_CHIPS; chip++) { + for (mcs_i = 0; mcs_i < MCS_PER_PROC; mcs_i++) { + if (!mem_data[chip].mcs[mcs_i].functional) + continue; - total_time += time; - printk(BIOS_ERR, "MCBIST%d took %ld us\n", mcs_i, total_time); + /* + * When there is no other activity on the bus, this should take roughly + * (total RAM size under MCS / transfer rate) * number of subtests. + * + * Not measuring time it takes individual MCBISTs to complete as they + * all work in parallel. + */ + long time = wait_us(1000*1000*60, + (udelay(1), mcbist_is_done(chip, mcs_i))); + + /* TODO: dump error/status registers on failure */ + if (!time) { + die("MCBIST%d of chip %d times out (%#16.16llx)\n", mcs_i, chip, + read_rscom_for_chiplet(chip, mcs_ids[mcs_i], + MCB_CNTLSTATQ)); + } - /* Unmask mainline FIRs. */ - fir_unmask(chip, mcs_i); + /* Unmask mainline FIRs. */ + fir_unmask(chip, mcs_i); - /* Turn off FIFO mode to improve performance. */ - set_fifo_mode(chip, mcs_i, 0); + /* Turn off FIFO mode to improve performance. */ + set_fifo_mode(chip, mcs_i, 0); + } } } void istep_14_1(uint8_t chips) { - uint8_t chip; - printk(BIOS_EMERG, "starting istep 14.1\n"); report_istep(14, 1); - for (chip = 0; chip < MAX_CHIPS; chip++) { - if (chips & (1 << chip)) - mss_memdiag(chip); - } + mss_memdiag(chips); printk(BIOS_EMERG, "ending istep 14.1\n"); } From 5014bca88263a31fc97752c3f4ccabd4c08744a1 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Fri, 4 Mar 2022 02:12:48 +0200 Subject: [PATCH 183/213] soc/power9/romstage.c: prebuild MVPDs explicitly This makes delays in console output easier to understand. Change-Id: I7c3c14e2f8ec7446708211aad7e7a8b04519e599 Signed-off-by: Sergii Dmytruk --- src/soc/ibm/power9/romstage.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/soc/ibm/power9/romstage.c b/src/soc/ibm/power9/romstage.c index bc61c05ec0e..e690288d3e8 100644 --- a/src/soc/ibm/power9/romstage.c +++ b/src/soc/ibm/power9/romstage.c @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ #include +#include #include #include #include @@ -357,6 +358,19 @@ static void prepare_dimm_data(uint8_t chips) die("No DIMMs detected, aborting\n"); } +static void build_mvpds(uint8_t chips) +{ + uint8_t chip; + + printk(BIOS_EMERG, "Building MVPDs...\n"); + + /* Calling mvpd_get_available_cores() triggers building and caching of MVPD */ + for (chip = 0; chip < MAX_CHIPS; ++chip) { + if (chips & (1 << chip)) + (void)mvpd_get_available_cores(chip); + } +} + void main(void) { uint8_t chips; @@ -384,6 +398,8 @@ void main(void) chips = fsi_get_present_chips(); printk(BIOS_EMERG, "Initialized FSI (chips mask: 0x%02X)\n", chips); + build_mvpds(chips); + istep_8_1(chips); istep_8_2(chips); istep_8_3(chips); From 2a3cd67af9842e4cd5534cdbb72fd59e7a56f5d1 Mon Sep 17 00:00:00 2001 From: Krystian Hebel Date: Fri, 4 Feb 2022 19:05:26 +0100 Subject: [PATCH 184/213] soc/power9/: hide different SCOM implementations behind common API Signed-off-by: Krystian Hebel Change-Id: I22723ee17f52c821760ea427c428408a883a78e5 --- src/arch/ppc64/include/arch/io.h | 2 - src/cpu/power9/Makefile.inc | 6 +- src/cpu/power9/scom.c | 181 ++++------------------------- src/include/cpu/power/scom.h | 30 ++++- src/soc/ibm/power9/Makefile.inc | 3 + src/soc/ibm/power9/fsi.c | 3 + src/soc/ibm/power9/istep_10_1.c | 2 + src/soc/ibm/power9/sbeio.c | 6 +- src/soc/ibm/power9/sbeio.h | 11 -- src/soc/ibm/power9/xbus.c | 12 +- src/soc/ibm/power9/xscom.c | 188 +++++++++++++++++++++++++++++++ 11 files changed, 250 insertions(+), 194 deletions(-) delete mode 100644 src/soc/ibm/power9/sbeio.h create mode 100644 src/soc/ibm/power9/xscom.c diff --git a/src/arch/ppc64/include/arch/io.h b/src/arch/ppc64/include/arch/io.h index 69efc4c1ada..5c65d75e67b 100644 --- a/src/arch/ppc64/include/arch/io.h +++ b/src/arch/ppc64/include/arch/io.h @@ -13,8 +13,6 @@ #define FW_SPACE_SIZE 0x10000000 #define LPC_BASE_ADDR (MMIO_GROUP0_CHIP0_LPC_BASE_ADDR + LPCHC_IO_SPACE) #define FLASH_BASE_ADDR (MMIO_GROUP0_CHIP0_LPC_BASE_ADDR + FLASH_IO_SPACE) -#define MMIO_GROUP0_CHIP0_SCOM_BASE_ADDR 0x800603FC00000000 -#define MMIO_GROUP_SIZE 0x0000200000000000 /* Enforce In-order Execution of I/O */ static inline void eieio(void) diff --git a/src/cpu/power9/Makefile.inc b/src/cpu/power9/Makefile.inc index 3585199ba89..bcab6df03e7 100644 --- a/src/cpu/power9/Makefile.inc +++ b/src/cpu/power9/Makefile.inc @@ -1,7 +1,5 @@ ## SPDX-License-Identifier: GPL-2.0-or-later -ramstage-y += power9.c - -bootblock-y += scom.c romstage-y += scom.c -ramstage-y += scom.c + +ramstage-y += power9.c diff --git a/src/cpu/power9/scom.c b/src/cpu/power9/scom.c index a723620c3fb..f73eae436a8 100644 --- a/src/cpu/power9/scom.c +++ b/src/cpu/power9/scom.c @@ -1,178 +1,39 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ +/* Avoids defining read/write_rscom as a macro */ +#define SKIP_SCOM_DEBUG + #include -#include // HMER #include -#define XSCOM_ADDR_IND_FLAG PPC_BIT(0) -#define XSCOM_DATA_IND_FORM1 PPC_BIT(3) -#define XSCOM_ADDR_IND_ADDR PPC_BITMASK(11, 31) -#define XSCOM_ADDR_IND_DATA PPC_BITMASK(48, 63) - -#define XSCOM_DATA_IND_READ PPC_BIT(0) -#define XSCOM_DATA_IND_COMPLETE PPC_BIT(32) -#define XSCOM_DATA_IND_ERR PPC_BITMASK(33, 35) -#define XSCOM_DATA_IND_DATA PPC_BITMASK(48, 63) -#define XSCOM_DATA_IND_FORM1_DATA PPC_BITMASK(12, 63) -#define XSCOM_IND_MAX_RETRIES 10 - -#define XSCOM_IND_FORM1_ADDR PPC_BITMASK(32, 63) -#define XSCOM_IND_FORM1_DATA_FROM_ADDR PPC_BITMASK(0, 11) -#define XSCOM_IND_FORM1_DATA_IN_ADDR PPC_BITMASK(20, 31) +extern uint64_t read_xscom(uint8_t chip, uint64_t addr); +extern void write_xscom(uint8_t chip, uint64_t addr, uint64_t data); -#define XSCOM_RCVED_STAT_REG 0x00090018 -#define XSCOM_LOG_REG 0x00090012 -#define XSCOM_ERR_REG 0x00090013 +extern uint64_t read_sbe_scom(uint8_t chip, uint64_t addr); +extern void write_sbe_scom(uint8_t chip, uint64_t addr, uint64_t data); -static void reset_scom_engine(uint8_t chip) -{ - /* - * With cross-CPU SCOM accesses, first register should be cleared on the - * executing CPU, the other two on target CPU. In that case it may be - * necessary to do the remote writes in assembly directly to skip checking - * HMER and possibly end in a loop. - */ - write_rscom(0, XSCOM_RCVED_STAT_REG, 0); - write_rscom(chip, XSCOM_LOG_REG, 0); - write_rscom(chip, XSCOM_ERR_REG, 0); - clear_hmer(); - eieio(); -} +/* Start with SBEIO. Change this to FSI if needed. */ +static uint64_t (*read_scom_secondary)(uint8_t, uint64_t) = read_sbe_scom; +static void (*write_scom_secondary)(uint8_t, uint64_t, uint64_t) = write_sbe_scom; -static uint64_t read_scom_direct(uint8_t chip, uint64_t reg_address) +void switch_secondary_scom_to_xscom(void) { - uint64_t val; - uint64_t hmer = 0; - do { - /* - * Clearing HMER on every SCOM access seems to slow down CCS up - * to a point where it starts hitting timeout on "less ideal" - * DIMMs for write centering. Clear it only if this do...while - * executes more than once. - */ - if ((hmer & SPR_HMER_XSCOM_STATUS) == SPR_HMER_XSCOM_OCCUPIED) - clear_hmer(); - - eieio(); - asm volatile( - "ldcix %0, %1, %2" : - "=r"(val) : - "b"(MMIO_GROUP0_CHIP0_SCOM_BASE_ADDR + chip * MMIO_GROUP_SIZE), - "r"(reg_address << 3)); - eieio(); - hmer = read_hmer(); - } while ((hmer & SPR_HMER_XSCOM_STATUS) == SPR_HMER_XSCOM_OCCUPIED); - - if (hmer & SPR_HMER_XSCOM_STATUS) { - reset_scom_engine(chip); - /* - * All F's are returned in case of error, but code polls for a set bit - * after changes that can make such error appear (e.g. clock settings). - * Return 0 so caller won't have to test for all F's in that case. - */ - return 0; - } - return val; -} - -static void write_scom_direct(uint8_t chip, uint64_t reg_address, uint64_t data) -{ - uint64_t hmer = 0; - do { - /* See comment in read_scom_direct() */ - if ((hmer & SPR_HMER_XSCOM_STATUS) == SPR_HMER_XSCOM_OCCUPIED) - clear_hmer(); - - eieio(); - asm volatile( - "stdcix %0, %1, %2":: - "r"(data), - "b"(MMIO_GROUP0_CHIP0_SCOM_BASE_ADDR + chip * MMIO_GROUP_SIZE ), - "r"(reg_address << 3)); - eieio(); - hmer = read_hmer(); - } while ((hmer & SPR_HMER_XSCOM_STATUS) == SPR_HMER_XSCOM_OCCUPIED); - - if (hmer & SPR_HMER_XSCOM_STATUS) - reset_scom_engine(chip); -} - -static void write_scom_indirect_form0(uint8_t chip, uint64_t reg_address, uint64_t value) -{ - uint64_t addr; - uint64_t data; - addr = reg_address & 0x7FFFFFFF; - data = reg_address & XSCOM_ADDR_IND_ADDR; - data |= value & XSCOM_ADDR_IND_DATA; - - write_scom_direct(chip, addr, data); - - for (int retries = 0; retries < XSCOM_IND_MAX_RETRIES; ++retries) { - data = read_scom_direct(chip, addr); - if((data & XSCOM_DATA_IND_COMPLETE) && ((data & XSCOM_DATA_IND_ERR) == 0)) { - return; - } - else if(data & XSCOM_DATA_IND_COMPLETE) { - printk(BIOS_EMERG, "SCOM WR error %16.16llx = %16.16llx : %16.16llx\n", - reg_address, value, data); - } - // TODO: delay? - } + read_scom_secondary = read_xscom; + write_scom_secondary = write_xscom; } -static void write_scom_indirect_form1(uint8_t chip, uint64_t reg_address, uint64_t value) -{ - uint64_t addr; - uint64_t data; - - if (value & XSCOM_IND_FORM1_DATA_FROM_ADDR) - die("Value for form 1 indirect SCOM must have bits 0-11 zeroed!"); - - data = value | ((reg_address & XSCOM_IND_FORM1_DATA_IN_ADDR) << 20); - addr = reg_address & XSCOM_IND_FORM1_ADDR; - - write_scom_direct(chip, addr, data); -} - -static uint64_t read_scom_indirect_form0(uint8_t chip, uint64_t reg_address) -{ - uint64_t addr; - uint64_t data; - addr = reg_address & 0x7FFFFFFF; - data = XSCOM_DATA_IND_READ | (reg_address & XSCOM_ADDR_IND_ADDR); - - write_scom_direct(chip, addr, data); - - for (int retries = 0; retries < XSCOM_IND_MAX_RETRIES; ++retries) { - data = read_scom_direct(chip, addr); - if((data & XSCOM_DATA_IND_COMPLETE) && ((data & XSCOM_DATA_IND_ERR) == 0)) { - break; - } else if(data & XSCOM_DATA_IND_COMPLETE) { - printk(BIOS_EMERG, "SCOM RD error %16.16llx : %16.16llx\n", - reg_address, data); - } - // TODO: delay? - } - - return data & XSCOM_DATA_IND_DATA; -} - -void write_rscom(uint8_t chip, uint64_t addr, uint64_t data) +uint64_t read_rscom(uint8_t chip, uint64_t addr) { - if (!(addr & XSCOM_ADDR_IND_FLAG)) - write_scom_direct(chip, addr, data); - else if (!(addr & XSCOM_DATA_IND_FORM1)) - write_scom_indirect_form0(chip, addr, data); + if (chip == 0) + return read_xscom(chip, addr); else - write_scom_indirect_form1(chip, addr, data); + return read_scom_secondary(chip, addr); } -uint64_t read_rscom(uint8_t chip, uint64_t addr) +void write_rscom(uint8_t chip, uint64_t addr, uint64_t data) { - if (!(addr & XSCOM_ADDR_IND_FLAG)) - return read_scom_direct(chip, addr); - else if (!(addr & XSCOM_DATA_IND_FORM1)) - return read_scom_indirect_form0(chip, addr); + if (chip == 0) + write_xscom(chip, addr, data); else - die("Form 1 indirect SCOM does not have a read operation!"); + write_scom_secondary(chip, addr, data); } diff --git a/src/include/cpu/power/scom.h b/src/include/cpu/power/scom.h index e6a84110636..2b5025888d6 100644 --- a/src/include/cpu/power/scom.h +++ b/src/include/cpu/power/scom.h @@ -89,11 +89,34 @@ static const chiplet_id_t mcs_to_nest[] = [MC23_CHIPLET_ID] = N1_CHIPLET_ID, }; -/* "rscom" are generic ("r" is for remote) XSCOM functions, other functions are - * equivalent to rscom calls for chip #0 */ +/* + * Usage of SCOM engines: + * - CPU0: + * - always XSCOM + * - CPU1++: + * - FSI SCOM before and during 8.4 (not needed, not implemented) + * - SBEIO SCOM after 8.4, before XSCOM is enabled in 10.1 + * - XSCOM after 10.1 + * + * Only romstage has to ever use anything else than XSCOM - bootblock doesn't + * access secondary CPUs at all and ramstage can use XSCOM from the beginning. + * SCOM dispatcher code is thus not compiled for stages other than romstage and + * assembly label is used to alias high-level functions directly to XSCOM engine + * implementation. + */ + +#if ENV_ROMSTAGE +void switch_secondary_scom_to_xscom(void); void write_rscom(uint8_t chip, uint64_t addr, uint64_t data); uint64_t read_rscom(uint8_t chip, uint64_t addr); +#else +void write_rscom(uint8_t chip, uint64_t addr, uint64_t data) asm("write_xscom"); +uint64_t read_rscom(uint8_t chip, uint64_t addr) asm("read_xscom"); +#endif + +/* "rscom" are generic ("r" is for remote) SCOM functions, other functions are + * equivalent to rscom calls for chip #0 */ static inline void rscom_and_or(uint8_t chip, uint64_t addr, uint64_t and, uint64_t or) { @@ -158,9 +181,6 @@ static inline uint64_t read_scom(uint64_t addr) #if CONFIG(DEBUG_SCOM) && !defined(SKIP_SCOM_DEBUG) #include -#define write_scom(x, y) write_rscom(0, x, y) -#define read_scom(x) read_rscom(0, x) - #define write_rscom(c, x, y) \ ({ \ uint8_t __cw = (c); \ diff --git a/src/soc/ibm/power9/Makefile.inc b/src/soc/ibm/power9/Makefile.inc index 6a544e4fd2a..50ac8da7dd0 100644 --- a/src/soc/ibm/power9/Makefile.inc +++ b/src/soc/ibm/power9/Makefile.inc @@ -4,6 +4,7 @@ ifeq ($(CONFIG_CPU_IBM_POWER9),y) bootblock-y += bootblock.c bootblock-y += rom_media.c +bootblock-y += xscom.c romstage-y += rom_media.c romstage-y += romstage.c romstage-y += mvpd.c @@ -45,6 +46,7 @@ romstage-y += timer.c romstage-y += fsi.c romstage-y += sbeio.c romstage-y += xbus.c +romstage-y += xscom.c ramstage-y += chip.c ramstage-y += homer.c ramstage-y += rom_media.c @@ -61,6 +63,7 @@ ramstage-y += int_vectors.S ramstage-y += i2c.c ramstage-y += occ.c ramstage-y += fsi.c +ramstage-y += xscom.c MB_DIR = src/mainboard/$(MAINBOARDDIR) ONECPU_DTB = 1-cpu.dtb diff --git a/src/soc/ibm/power9/fsi.c b/src/soc/ibm/power9/fsi.c index 7bc44612ac1..826ab7e227b 100644 --- a/src/soc/ibm/power9/fsi.c +++ b/src/soc/ibm/power9/fsi.c @@ -1,5 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0-only */ +/* FSI is used to read MVPD, logging which takes too much time */ +#define SKIP_SCOM_DEBUG + #include "fsi.h" #include diff --git a/src/soc/ibm/power9/istep_10_1.c b/src/soc/ibm/power9/istep_10_1.c index 335714c576c..d39b6fa05a4 100644 --- a/src/soc/ibm/power9/istep_10_1.c +++ b/src/soc/ibm/power9/istep_10_1.c @@ -767,6 +767,8 @@ void istep_10_1(uint8_t chips) p9_build_smp(chips); if (chips & 0x02) { + switch_secondary_scom_to_xscom(); + /* Sanity check that XSCOM works for the second CPU */ if (read_rscom(1, 0xF000F) == 0xFFFFFFFFFFFFFFFF) die("XSCOM doesn't work for the second CPU\n"); diff --git a/src/soc/ibm/power9/sbeio.c b/src/soc/ibm/power9/sbeio.c index 2c049ce68db..c9400e9429f 100644 --- a/src/soc/ibm/power9/sbeio.c +++ b/src/soc/ibm/power9/sbeio.c @@ -1,7 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0-only */ -#include "sbeio.h" - #include #include #include @@ -203,6 +201,10 @@ static void read_response(uint8_t chip, void *response, uint32_t word_count) die("Invalid status in SBE IO response\n"); } +/* Private API used only by SCOM dispatcher, no need to expose it */ +void write_sbe_scom(uint8_t chip, uint64_t addr, uint64_t data); +uint64_t read_sbe_scom(uint8_t chip, uint64_t addr); + void write_sbe_scom(uint8_t chip, uint64_t addr, uint64_t data) { struct put_scom_request_t request = { diff --git a/src/soc/ibm/power9/sbeio.h b/src/soc/ibm/power9/sbeio.h deleted file mode 100644 index b2b3ae2ae01..00000000000 --- a/src/soc/ibm/power9/sbeio.h +++ /dev/null @@ -1,11 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ - -#ifndef __SOC_IBM_POWER9_SBEIO_H -#define __SOC_IBM_POWER9_SBEIO_H - -#include - -void write_sbe_scom(uint8_t chip, uint64_t addr, uint64_t data); -uint64_t read_sbe_scom(uint8_t chip, uint64_t addr); - -#endif /* __SOC_IBM_POWER9_SBEIO_H */ diff --git a/src/soc/ibm/power9/xbus.c b/src/soc/ibm/power9/xbus.c index 22aa182a3aa..8d6bd7af3c8 100644 --- a/src/soc/ibm/power9/xbus.c +++ b/src/soc/ibm/power9/xbus.c @@ -5,8 +5,6 @@ #include #include -#include "sbeio.h" - /* Updates address that targets XBus chiplet to use specified XBus link number. * Does nothing to non-XBus addresses. */ static uint64_t xbus_addr(uint8_t xbus, uint64_t addr) @@ -38,18 +36,12 @@ void put_scom(uint8_t chip, uint64_t addr, uint64_t data) { addr = xbus_addr(/*xbus=*/1, addr); - if (chip == 0) - write_scom(addr, data); - else - write_sbe_scom(chip, addr, data); + write_rscom(chip, addr, data); } uint64_t get_scom(uint8_t chip, uint64_t addr) { addr = xbus_addr(/*xbus=*/1, addr); - if (chip == 0) - return read_scom(addr); - else - return read_sbe_scom(chip, addr); + return read_rscom(chip, addr); } diff --git a/src/soc/ibm/power9/xscom.c b/src/soc/ibm/power9/xscom.c new file mode 100644 index 00000000000..0358abc487d --- /dev/null +++ b/src/soc/ibm/power9/xscom.c @@ -0,0 +1,188 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include +#include // HMER +#include + +#define MMIO_GROUP0_CHIP0_SCOM_BASE_ADDR 0x800603FC00000000 +#define MMIO_GROUP_SIZE 0x0000200000000000 + +#define XSCOM_ADDR_IND_FLAG PPC_BIT(0) +#define XSCOM_DATA_IND_FORM1 PPC_BIT(3) +#define XSCOM_ADDR_IND_ADDR PPC_BITMASK(11, 31) +#define XSCOM_ADDR_IND_DATA PPC_BITMASK(48, 63) + +#define XSCOM_DATA_IND_READ PPC_BIT(0) +#define XSCOM_DATA_IND_COMPLETE PPC_BIT(32) +#define XSCOM_DATA_IND_ERR PPC_BITMASK(33, 35) +#define XSCOM_DATA_IND_DATA PPC_BITMASK(48, 63) +#define XSCOM_DATA_IND_FORM1_DATA PPC_BITMASK(12, 63) +#define XSCOM_IND_MAX_RETRIES 10 + +#define XSCOM_IND_FORM1_ADDR PPC_BITMASK(32, 63) +#define XSCOM_IND_FORM1_DATA_FROM_ADDR PPC_BITMASK(0, 11) +#define XSCOM_IND_FORM1_DATA_IN_ADDR PPC_BITMASK(20, 31) + +#define XSCOM_RCVED_STAT_REG 0x00090018 +#define XSCOM_LOG_REG 0x00090012 +#define XSCOM_ERR_REG 0x00090013 + +static void write_xscom_direct(uint8_t chip, uint64_t reg_address, uint64_t data); + +static void reset_xscom_engine(uint8_t chip) +{ + /* + * With cross-CPU SCOM accesses, first register should be cleared on the + * executing CPU, the other two on target CPU. In that case it may be + * necessary to do the remote writes in assembly directly to skip checking + * HMER and possibly end in a loop. + */ + write_xscom_direct(0, XSCOM_RCVED_STAT_REG, 0); + write_xscom_direct(chip, XSCOM_LOG_REG, 0); + write_xscom_direct(chip, XSCOM_ERR_REG, 0); + clear_hmer(); + eieio(); +} + +static uint64_t read_xscom_direct(uint8_t chip, uint64_t reg_address) +{ + uint64_t val; + uint64_t hmer = 0; + do { + /* + * Clearing HMER on every SCOM access seems to slow down CCS up + * to a point where it starts hitting timeout on "less ideal" + * DIMMs for write centering. Clear it only if this do...while + * executes more than once. + */ + if ((hmer & SPR_HMER_XSCOM_STATUS) == SPR_HMER_XSCOM_OCCUPIED) + clear_hmer(); + + eieio(); + asm volatile( + "ldcix %0, %1, %2" : + "=r"(val) : + "b"(MMIO_GROUP0_CHIP0_SCOM_BASE_ADDR + chip * MMIO_GROUP_SIZE), + "r"(reg_address << 3)); + eieio(); + hmer = read_hmer(); + } while ((hmer & SPR_HMER_XSCOM_STATUS) == SPR_HMER_XSCOM_OCCUPIED); + + if (hmer & SPR_HMER_XSCOM_STATUS) { + reset_xscom_engine(chip); + /* + * All F's are returned in case of error, but code polls for a set bit + * after changes that can make such error appear (e.g. clock settings). + * Return 0 so caller won't have to test for all F's in that case. + */ + return 0; + } + return val; +} + +static void write_xscom_direct(uint8_t chip, uint64_t reg_address, uint64_t data) +{ + uint64_t hmer = 0; + do { + /* See comment in read_xscom_direct() */ + if ((hmer & SPR_HMER_XSCOM_STATUS) == SPR_HMER_XSCOM_OCCUPIED) + clear_hmer(); + + eieio(); + asm volatile( + "stdcix %0, %1, %2":: + "r"(data), + "b"(MMIO_GROUP0_CHIP0_SCOM_BASE_ADDR + chip * MMIO_GROUP_SIZE ), + "r"(reg_address << 3)); + eieio(); + hmer = read_hmer(); + } while ((hmer & SPR_HMER_XSCOM_STATUS) == SPR_HMER_XSCOM_OCCUPIED); + + if (hmer & SPR_HMER_XSCOM_STATUS) + reset_xscom_engine(chip); +} + +static void write_xscom_indirect_form0(uint8_t chip, uint64_t reg_address, uint64_t value) +{ + uint64_t addr; + uint64_t data; + addr = reg_address & 0x7FFFFFFF; + data = reg_address & XSCOM_ADDR_IND_ADDR; + data |= value & XSCOM_ADDR_IND_DATA; + + write_xscom_direct(chip, addr, data); + + for (int retries = 0; retries < XSCOM_IND_MAX_RETRIES; ++retries) { + data = read_xscom_direct(chip, addr); + if((data & XSCOM_DATA_IND_COMPLETE) && ((data & XSCOM_DATA_IND_ERR) == 0)) { + return; + } + else if(data & XSCOM_DATA_IND_COMPLETE) { + printk(BIOS_EMERG, "SCOM WR error %16.16llx = %16.16llx : %16.16llx\n", + reg_address, value, data); + } + // TODO: delay? + } +} + +static void write_xscom_indirect_form1(uint8_t chip, uint64_t reg_address, uint64_t value) +{ + uint64_t addr; + uint64_t data; + + if (value & XSCOM_IND_FORM1_DATA_FROM_ADDR) + die("Value for form 1 indirect SCOM must have bits 0-11 zeroed!"); + + data = value | ((reg_address & XSCOM_IND_FORM1_DATA_IN_ADDR) << 20); + addr = reg_address & XSCOM_IND_FORM1_ADDR; + + write_xscom_direct(chip, addr, data); +} + +static uint64_t read_xscom_indirect_form0(uint8_t chip, uint64_t reg_address) +{ + uint64_t addr; + uint64_t data; + addr = reg_address & 0x7FFFFFFF; + data = XSCOM_DATA_IND_READ | (reg_address & XSCOM_ADDR_IND_ADDR); + + write_xscom_direct(chip, addr, data); + + for (int retries = 0; retries < XSCOM_IND_MAX_RETRIES; ++retries) { + data = read_xscom_direct(chip, addr); + if((data & XSCOM_DATA_IND_COMPLETE) && ((data & XSCOM_DATA_IND_ERR) == 0)) { + break; + } + else if(data & XSCOM_DATA_IND_COMPLETE) { + printk(BIOS_EMERG, "SCOM RD error %16.16llx : %16.16llx\n", + reg_address, data); + } + // TODO: delay? + } + + return data & XSCOM_DATA_IND_DATA; +} + +/* Private API used only by SCOM dispatcher, no need to expose it */ +void write_xscom(uint8_t chip, uint64_t addr, uint64_t data); +uint64_t read_xscom(uint8_t chip, uint64_t addr); + +void write_xscom(uint8_t chip, uint64_t addr, uint64_t data) +{ + if (!(addr & XSCOM_ADDR_IND_FLAG)) + write_xscom_direct(chip, addr, data); + else if (!(addr & XSCOM_DATA_IND_FORM1)) + write_xscom_indirect_form0(chip, addr, data); + else + write_xscom_indirect_form1(chip, addr, data); +} + +uint64_t read_xscom(uint8_t chip, uint64_t addr) +{ + if (!(addr & XSCOM_ADDR_IND_FLAG)) + return read_xscom_direct(chip, addr); + else if (!(addr & XSCOM_DATA_IND_FORM1)) + return read_xscom_indirect_form0(chip, addr); + else + die("Form 1 indirect SCOM does not have a read operation!"); +} From f0b4fae6febed70ae65137ab6114c8451b45356d Mon Sep 17 00:00:00 2001 From: Krystian Hebel Date: Thu, 14 Apr 2022 19:49:17 +0200 Subject: [PATCH 185/213] soc/power9: change local SCOM accesses to remote ones, remove local accessors Signed-off-by: Krystian Hebel Change-Id: I5e1d9ea393800cdcbf435262ae14e11cc7fea496 --- src/include/cpu/power/scom.h | 58 +-------------------------------- src/soc/ibm/power9/Makefile.inc | 1 - src/soc/ibm/power9/fsi.c | 8 ++--- src/soc/ibm/power9/homer.c | 26 +++++++-------- src/soc/ibm/power9/istep_8_1.c | 4 +-- src/soc/ibm/power9/powerbus.c | 2 +- src/soc/ibm/power9/romstage.c | 4 +-- src/soc/ibm/power9/xive.c | 20 ++++++------ 8 files changed, 33 insertions(+), 90 deletions(-) diff --git a/src/include/cpu/power/scom.h b/src/include/cpu/power/scom.h index 2b5025888d6..e6d446a1d47 100644 --- a/src/include/cpu/power/scom.h +++ b/src/include/cpu/power/scom.h @@ -115,9 +115,6 @@ void write_rscom(uint8_t chip, uint64_t addr, uint64_t data) asm("write_xscom"); uint64_t read_rscom(uint8_t chip, uint64_t addr) asm("read_xscom"); #endif -/* "rscom" are generic ("r" is for remote) SCOM functions, other functions are - * equivalent to rscom calls for chip #0 */ - static inline void rscom_and_or(uint8_t chip, uint64_t addr, uint64_t and, uint64_t or) { uint64_t data = read_rscom(chip, addr); @@ -166,18 +163,6 @@ static inline void rscom_or_for_chiplet(uint8_t chip, chiplet_id_t chiplet, uint rscom_and_or_for_chiplet(chip, chiplet, addr, ~0, or); } -/* "scom" are functions with chip number being fixed at 0 */ - -static inline void write_scom(uint64_t addr, uint64_t data) -{ - return write_rscom(0, addr, data); -} - -static inline uint64_t read_scom(uint64_t addr) -{ - return read_rscom(0, addr); -} - #if CONFIG(DEBUG_SCOM) && !defined(SKIP_SCOM_DEBUG) #include @@ -201,50 +186,9 @@ static inline uint64_t read_scom(uint64_t addr) #endif -static inline void scom_and_or(uint64_t addr, uint64_t and, uint64_t or) -{ - rscom_and_or(0, addr, and, or); -} - -static inline void scom_and(uint64_t addr, uint64_t and) -{ - scom_and_or(addr, and, 0); -} - -static inline void scom_or(uint64_t addr, uint64_t or) -{ - scom_and_or(addr, ~0, or); -} - -static inline void write_scom_for_chiplet(chiplet_id_t chiplet, uint64_t addr, uint64_t data) -{ - write_rscom_for_chiplet(0, chiplet, addr, data); -} - -static inline uint64_t read_scom_for_chiplet(chiplet_id_t chiplet, uint64_t addr) -{ - return read_rscom_for_chiplet(0, chiplet, addr); -} - -static inline void scom_and_or_for_chiplet(chiplet_id_t chiplet, uint64_t addr, - uint64_t and, uint64_t or) -{ - rscom_and_or_for_chiplet(0, chiplet, addr, and, or); -} - -static inline void scom_and_for_chiplet(chiplet_id_t chiplet, uint64_t addr, uint64_t and) -{ - scom_and_or_for_chiplet(chiplet, addr, and, 0); -} - -static inline void scom_or_for_chiplet(chiplet_id_t chiplet, uint64_t addr, uint64_t or) -{ - scom_and_or_for_chiplet(chiplet, addr, ~0, or); -} - static inline uint8_t get_dd(void) { - uint64_t val = read_scom(0xF000F); + uint64_t val = read_rscom(0, 0xF000F); val = ((val >> 52) & 0x0F) | ((val >> 56) & 0xF0); return (uint8_t) val; } diff --git a/src/soc/ibm/power9/Makefile.inc b/src/soc/ibm/power9/Makefile.inc index 50ac8da7dd0..2ef412b044f 100644 --- a/src/soc/ibm/power9/Makefile.inc +++ b/src/soc/ibm/power9/Makefile.inc @@ -4,7 +4,6 @@ ifeq ($(CONFIG_CPU_IBM_POWER9),y) bootblock-y += bootblock.c bootblock-y += rom_media.c -bootblock-y += xscom.c romstage-y += rom_media.c romstage-y += romstage.c romstage-y += mvpd.c diff --git a/src/soc/ibm/power9/fsi.c b/src/soc/ibm/power9/fsi.c index 826ab7e227b..55eee158a18 100644 --- a/src/soc/ibm/power9/fsi.c +++ b/src/soc/ibm/power9/fsi.c @@ -175,7 +175,7 @@ static void basic_master_init(void) fsi_reset_pib2opb(chip); /* Ensure we don't have any errors before we even start */ - tmp = read_scom(FSI2OPB_OFFSET_0 | OPB_REG_STAT); + tmp = read_rscom(0, FSI2OPB_OFFSET_0 | OPB_REG_STAT); if (tmp & OPB_STAT_NON_MFSI_ERR) die("Unclearable errors on MFSI initialization: 0x%016llx\n", tmp); @@ -341,10 +341,10 @@ static inline uint64_t poll_opb(uint8_t chip) /* Timeout after 10ms, check every 10us, supposedly there is hardware * timeout after 1ms */ - tmp = read_scom(stat_addr); + tmp = read_rscom(0, stat_addr); for (i = 0; (tmp & OPB_STAT_BUSY) && !(tmp & err_mask) && i < MAX_WAIT_LOOPS; i++) { udelay(TIMEOUT_STEP_US); - tmp = read_scom(stat_addr); + tmp = read_rscom(0, stat_addr); } if (tmp & err_mask) @@ -393,7 +393,7 @@ uint32_t fsi_op(uint8_t chip, uint32_t addr, uint32_t data, bool is_read, uint8_ if (!is_read) cmd |= WRITE_NOT_READ; - write_scom(FSI2OPB_OFFSET_0 | OPB_REG_CMD, cmd); + write_rscom(0, FSI2OPB_OFFSET_0 | OPB_REG_CMD, cmd); /* Poll for complete and get the data back. */ response = poll_opb(chip); diff --git a/src/soc/ibm/power9/homer.c b/src/soc/ibm/power9/homer.c index 94b1c236514..eaced074efa 100644 --- a/src/soc/ibm/power9/homer.c +++ b/src/soc/ibm/power9/homer.c @@ -921,36 +921,36 @@ static uint64_t get_available_cores(uint8_t chip, int *me) static void psu_command(uint8_t flags, long time) { /* TP.TPCHIP.PIB.PSU.PSU_SBE_DOORBELL_REG */ - if (read_scom(0x000D0060) & PPC_BIT(0)) + if (read_rscom(0, 0x000D0060) & PPC_BIT(0)) die("MBOX to SBE busy, this should not happen\n"); - if (read_scom(0x000D0063) & PPC_BIT(0)) { + if (read_rscom(0, 0x000D0063) & PPC_BIT(0)) { printk(BIOS_ERR, "SBE to Host doorbell already active, clearing it\n"); - write_scom(0x000D0064, ~PPC_BIT(0)); + write_rscom(0, 0x000D0064, ~PPC_BIT(0)); } /* https://github.com/open-power/hostboot/blob/master/src/include/usr/sbeio/sbe_psudd.H#L418 */ /* TP.TPCHIP.PIB.PSU.PSU_HOST_SBE_MBOX0_REG */ /* REQUIRE_RESPONSE, CLASS_CORE_STATE, CMD_CONTROL_DEADMAN_LOOP, flags */ - write_scom(0x000D0050, 0x000001000000D101 | PPC_PLACE(flags, 24, 8)); + write_rscom(0, 0x000D0050, 0x000001000000D101 | PPC_PLACE(flags, 24, 8)); /* TP.TPCHIP.PIB.PSU.PSU_HOST_SBE_MBOX0_REG */ - write_scom(0x000D0051, time); + write_rscom(0, 0x000D0051, time); /* Ring the host->SBE doorbell */ /* TP.TPCHIP.PIB.PSU.PSU_SBE_DOORBELL_REG_OR */ - write_scom(0x000D0062, PPC_BIT(0)); + write_rscom(0, 0x000D0062, PPC_BIT(0)); /* Wait for response */ /* TP.TPCHIP.PIB.PSU.PSU_HOST_DOORBELL_REG */ - time = wait_ms(time, read_scom(0x000D0063) & PPC_BIT(0)); + time = wait_ms(time, read_rscom(0, 0x000D0063) & PPC_BIT(0)); if (!time) die("Timed out while waiting for SBE response\n"); /* Clear SBE->host doorbell */ /* TP.TPCHIP.PIB.PSU.PSU_HOST_DOORBELL_REG_AND */ - write_scom(0x000D0064, ~PPC_BIT(0)); + write_rscom(0, 0x000D0064, ~PPC_BIT(0)); } #define DEADMAN_LOOP_START 0x0001 @@ -962,11 +962,11 @@ static void block_wakeup_int(int core, int state) /* Depending on requested state we write to SCOM1 (CLEAR) or SCOM2 (OR). */ uint64_t scom = state ? 0x200F0102 : 0x200F0101; - write_scom_for_chiplet(EC00_CHIPLET_ID + core, 0x200F0108, PPC_BIT(1)); + write_rscom_for_chiplet(0, EC00_CHIPLET_ID + core, 0x200F0108, PPC_BIT(1)); /* Register is documented, but its bits are reserved... */ - write_scom_for_chiplet(EC00_CHIPLET_ID + core, scom, PPC_BIT(6)); + write_rscom_for_chiplet(0, EC00_CHIPLET_ID + core, scom, PPC_BIT(6)); - write_scom_for_chiplet(EC00_CHIPLET_ID + core, 0x200F0107, PPC_BIT(1)); + write_rscom_for_chiplet(0, EC00_CHIPLET_ID + core, 0x200F0107, PPC_BIT(1)); } /* @@ -1087,9 +1087,9 @@ static void istep_16_1(int this_core) * No need to handle the timeout, if it happens, SBE will checkstop the * system anyway. */ - wait_us(time, read_scom(0x000D0063) & PPC_BIT(2)); + wait_us(time, read_rscom(0, 0x000D0063) & PPC_BIT(2)); - write_scom(0x000D0064, ~PPC_BIT(2)); + write_rscom(0, 0x000D0064, ~PPC_BIT(2)); /* * This tells SBE that we were properly awoken. Hostboot uses default diff --git a/src/soc/ibm/power9/istep_8_1.c b/src/soc/ibm/power9/istep_8_1.c index f0ea338e316..1dc46497ce1 100644 --- a/src/soc/ibm/power9/istep_8_1.c +++ b/src/soc/ibm/power9/istep_8_1.c @@ -173,7 +173,7 @@ static void setup_sbe_config(uint8_t chip) scratch = PPC_SHIFT(read_cfam(chip, MBOX_SCRATCH_REG1_FSI + 2), 31); - boot_flags = (read_scom(MBOX_SCRATCH_REG1 + 2) >> 32); + boot_flags = (read_rscom(0, MBOX_SCRATCH_REG1 + 2) >> 32); risk_level = (get_dd() < 0x23 ? 0 : 4); /* Note that the two fields overlap (boot flags include risk level), so @@ -280,7 +280,7 @@ static void setup_sbe_config(uint8_t chip) static int get_master_sbe_boot_seeprom(void) { enum { PERV_SB_CS_SCOM = 0x00050008 }; - return (read_scom(PERV_SB_CS_SCOM) & SBE_BOOT_SELECT_MASK) ? 1 : 0; + return (read_rscom(0, PERV_SB_CS_SCOM) & SBE_BOOT_SELECT_MASK) ? 1 : 0; } static void set_sbe_boot_seeprom(uint8_t chip, int seeprom_side) diff --git a/src/soc/ibm/power9/powerbus.c b/src/soc/ibm/power9/powerbus.c index c8c863a37f2..df7f7228829 100644 --- a/src/soc/ibm/power9/powerbus.c +++ b/src/soc/ibm/power9/powerbus.c @@ -167,7 +167,7 @@ static void calculate_epsilons(struct powerbus_cfg *cfg) uint32_t i; - uint64_t scratch_reg6 = read_scom(MBOX_SCRATCH_REG1 + 5); + uint64_t scratch_reg6 = read_rscom(0, MBOX_SCRATCH_REG1 + 5); /* ATTR_PROC_FABRIC_PUMP_MODE, it's either node or group pump mode */ bool node_pump_mode = !(scratch_reg6 & PPC_BIT(MBOX_SCRATCH_REG6_GROUP_PUMP_MODE)); diff --git a/src/soc/ibm/power9/romstage.c b/src/soc/ibm/power9/romstage.c index e690288d3e8..df0e6ed77f6 100644 --- a/src/soc/ibm/power9/romstage.c +++ b/src/soc/ibm/power9/romstage.c @@ -447,13 +447,13 @@ void main(void) timestamp_add_now(TS_INITRAM_END); /* Test if SCOM still works. Maybe should check also indirect access? */ - printk(BIOS_DEBUG, "0xF000F = %llx\n", read_scom(0xF000F)); + printk(BIOS_DEBUG, "0xF000F = %llx\n", read_rscom(0, 0xF000F)); /* * Halt to give a chance to inspect FIRs, otherwise checkstops from * ramstage may cover up the failure in romstage. */ - if (read_scom(0xF000F) == 0xFFFFFFFFFFFFFFFF) + if (read_rscom(0, 0xF000F) == 0xFFFFFFFFFFFFFFFF) die("SCOM stopped working, check FIRs, halting now\n"); cbmem_initialize_empty(); diff --git a/src/soc/ibm/power9/xive.c b/src/soc/ibm/power9/xive.c index f31e96b8525..6cc631ae072 100644 --- a/src/soc/ibm/power9/xive.c +++ b/src/soc/ibm/power9/xive.c @@ -41,32 +41,32 @@ void configure_xive(int core) memcpy((void *)0xEA0, hyp_virt_int, CODE_SIZE(hyp_virt_int)); /* IVPE BAR + enable bit */ - write_scom(0x05013012, IVPE_BAR | PPC_BIT(0)); + write_rscom(0, 0x05013012, IVPE_BAR | PPC_BIT(0)); /* FSP BAR */ - write_scom(0x0501290B, FSP_BAR); + write_rscom(0, 0x0501290B, FSP_BAR); /* PSI HB BAR + enable bit */ /* TODO: check if 2 separate writes are required */ - write_scom(0x0501290A, PSI_HB_BAR); - write_scom(0x0501290A, PSI_HB_BAR | PPC_BIT(63)); + write_rscom(0, 0x0501290A, PSI_HB_BAR); + write_rscom(0, 0x0501290A, PSI_HB_BAR | PPC_BIT(63)); /* Disable VPC Pull error */ - scom_and(0x05013179, ~PPC_BIT(30)); + rscom_and(0, 0x05013179, ~PPC_BIT(30)); /* PSI HB ESB BAR + enable bit */ /* TODO: check if 2 separate writes are required */ - write_scom(0x05012916, PSI_HB_ESB_BAR); - write_scom(0x05012916, PSI_HB_ESB_BAR | PPC_BIT(63)); + write_rscom(0, 0x05012916, PSI_HB_ESB_BAR); + write_rscom(0, 0x05012916, PSI_HB_ESB_BAR | PPC_BIT(63)); /* XIVE IC BAR + enable bit */ - write_scom(0x05013010, XIVE_IC_BAR | PPC_BIT(0)); + write_rscom(0, 0x05013010, XIVE_IC_BAR | PPC_BIT(0)); /* Set HB mode on P3PC register */ - scom_or(0x05013110, PPC_BIT(33)); + rscom_or(0, 0x05013110, PPC_BIT(33)); /* Disable PSI interrupts */ - write_scom(0x05012913, PPC_BIT(3)); + write_rscom(0, 0x05012913, PPC_BIT(3)); void *esb_bar = (void *)PSI_HB_ESB_BAR; /* Mask all interrupt sources */ From 22e6c09ecf740e93deb407b9b47442736088ac89 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Fri, 15 Apr 2022 22:07:46 +0300 Subject: [PATCH 186/213] soc/power9: drop "r" from all rscom-functions List of renamed functions: * write_rscom() * read_rscom() * rscom_and_or() * rscom_and() * rscom_or() * write_rscom_for_chiplet() * read_rscom_for_chiplet() * rscom_and_or_for_chiplet() * rscom_and_for_chiplet() * rscom_or_for_chiplet() Change-Id: I28cec407798433c4d1a5d82ab10edbd4565c42f4 Signed-off-by: Sergii Dmytruk --- src/cpu/power9/scom.c | 6 +- src/include/cpu/power/istep_13.h | 6 +- src/include/cpu/power/scom.h | 98 +++++----- src/soc/ibm/power9/ccs.c | 92 ++++----- src/soc/ibm/power9/chip.c | 20 +- src/soc/ibm/power9/fsi.c | 12 +- src/soc/ibm/power9/homer.c | 314 +++++++++++++++---------------- src/soc/ibm/power9/i2c.c | 8 +- src/soc/ibm/power9/istep_10_1.c | 2 +- src/soc/ibm/power9/istep_10_10.c | 136 ++++++------- src/soc/ibm/power9/istep_10_12.c | 4 +- src/soc/ibm/power9/istep_10_13.c | 40 ++-- src/soc/ibm/power9/istep_10_6.c | 190 +++++++++---------- src/soc/ibm/power9/istep_13_10.c | 22 +-- src/soc/ibm/power9/istep_13_11.c | 26 +-- src/soc/ibm/power9/istep_13_13.c | 37 ++-- src/soc/ibm/power9/istep_13_2.c | 67 ++++--- src/soc/ibm/power9/istep_13_3.c | 16 +- src/soc/ibm/power9/istep_13_4.c | 38 ++-- src/soc/ibm/power9/istep_13_6.c | 102 +++++----- src/soc/ibm/power9/istep_13_8.c | 130 ++++++------- src/soc/ibm/power9/istep_13_9.c | 30 +-- src/soc/ibm/power9/istep_14_1.c | 56 +++--- src/soc/ibm/power9/istep_14_2.c | 20 +- src/soc/ibm/power9/istep_14_3.c | 30 +-- src/soc/ibm/power9/istep_14_5.c | 44 ++--- src/soc/ibm/power9/istep_18_11.c | 148 +++++++-------- src/soc/ibm/power9/istep_18_12.c | 46 ++--- src/soc/ibm/power9/istep_8_1.c | 4 +- src/soc/ibm/power9/mcbist.c | 12 +- src/soc/ibm/power9/occ.c | 70 +++---- src/soc/ibm/power9/powerbus.c | 2 +- src/soc/ibm/power9/romstage.c | 4 +- src/soc/ibm/power9/xbus.c | 4 +- src/soc/ibm/power9/xive.c | 20 +- 35 files changed, 928 insertions(+), 928 deletions(-) diff --git a/src/cpu/power9/scom.c b/src/cpu/power9/scom.c index f73eae436a8..06bc89da42b 100644 --- a/src/cpu/power9/scom.c +++ b/src/cpu/power9/scom.c @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ -/* Avoids defining read/write_rscom as a macro */ +/* Avoids defining read/write_scom as a macro */ #define SKIP_SCOM_DEBUG #include @@ -22,7 +22,7 @@ void switch_secondary_scom_to_xscom(void) write_scom_secondary = write_xscom; } -uint64_t read_rscom(uint8_t chip, uint64_t addr) +uint64_t read_scom(uint8_t chip, uint64_t addr) { if (chip == 0) return read_xscom(chip, addr); @@ -30,7 +30,7 @@ uint64_t read_rscom(uint8_t chip, uint64_t addr) return read_scom_secondary(chip, addr); } -void write_rscom(uint8_t chip, uint64_t addr, uint64_t data) +void write_scom(uint8_t chip, uint64_t addr, uint64_t data) { if (chip == 0) write_xscom(chip, addr, data); diff --git a/src/include/cpu/power/istep_13.h b/src/include/cpu/power/istep_13.h index 8437cffdce8..f321a8d6cb1 100644 --- a/src/include/cpu/power/istep_13.h +++ b/src/include/cpu/power/istep_13.h @@ -167,7 +167,7 @@ static inline void mca_and_or(uint8_t chip, chiplet_id_t mcs, int mca, uint64_t */ unsigned mul = (scom & PPC_BIT(0) || (scom & 0xFFFFF000) == 0x07011000) ? 0x400 : 0x40; - rscom_and_or_for_chiplet(chip, mcs, scom + mca * mul, and, or); + scom_and_or_for_chiplet(chip, mcs, scom + mca * mul, and, or); } static inline void dp_mca_and_or(uint8_t chip, chiplet_id_t mcs, int dp, int mca, @@ -182,7 +182,7 @@ static inline uint64_t mca_read(uint8_t chip, chiplet_id_t mcs, int mca, uint64_ * general, except for (only?) direct PHY registers. */ unsigned mul = (scom & PPC_BIT(0) || (scom & 0xFFFFF000) == 0x07011000) ? 0x400 : 0x40; - return read_rscom_for_chiplet(chip, mcs, scom + mca * mul); + return read_scom_for_chiplet(chip, mcs, scom + mca * mul); } static inline void mca_write(uint8_t chip, chiplet_id_t mcs, int mca, uint64_t scom, @@ -192,7 +192,7 @@ static inline void mca_write(uint8_t chip, chiplet_id_t mcs, int mca, uint64_t s * general, except for (only?) direct PHY registers. */ unsigned mul = (scom & PPC_BIT(0) || (scom & 0xFFFFF000) == 0x07011000) ? 0x400 : 0x40; - write_rscom_for_chiplet(chip, mcs, scom + mca * mul, val); + write_scom_for_chiplet(chip, mcs, scom + mca * mul, val); } static inline uint64_t dp_mca_read(uint8_t chip, chiplet_id_t mcs, int dp, int mca, uint64_t scom) diff --git a/src/include/cpu/power/scom.h b/src/include/cpu/power/scom.h index e6d446a1d47..19594782342 100644 --- a/src/include/cpu/power/scom.h +++ b/src/include/cpu/power/scom.h @@ -108,87 +108,89 @@ static const chiplet_id_t mcs_to_nest[] = #if ENV_ROMSTAGE void switch_secondary_scom_to_xscom(void); -void write_rscom(uint8_t chip, uint64_t addr, uint64_t data); -uint64_t read_rscom(uint8_t chip, uint64_t addr); +void write_scom(uint8_t chip, uint64_t addr, uint64_t data); +uint64_t read_scom(uint8_t chip, uint64_t addr); #else -void write_rscom(uint8_t chip, uint64_t addr, uint64_t data) asm("write_xscom"); -uint64_t read_rscom(uint8_t chip, uint64_t addr) asm("read_xscom"); +void write_scom(uint8_t chip, uint64_t addr, uint64_t data) asm("write_xscom"); +uint64_t read_scom(uint8_t chip, uint64_t addr) asm("read_xscom"); #endif -static inline void rscom_and_or(uint8_t chip, uint64_t addr, uint64_t and, uint64_t or) +#if CONFIG(DEBUG_SCOM) && !defined(SKIP_SCOM_DEBUG) +#include + +#define write_scom(c, x, y) \ +({ \ + uint8_t __cw = (c); \ + uint64_t __xw = (x); \ + uint64_t __yw = (y); \ + printk(BIOS_SPEW, "SCOM W P%d %016llX %016llX\n", __cw, __xw, __yw); \ + write_scom(__cw, __xw, __yw); \ +}) + +#define read_scom(c, x) \ +({ \ + uint8_t __cr = (c); \ + uint64_t __xr = (x); \ + uint64_t __yr = read_scom(__cr, __xr); \ + printk(BIOS_SPEW, "SCOM R P%d %016llX %016llX\n", __cr, __xr, __yr); \ + __yr; \ +}) + +#endif + +static inline void scom_and_or(uint8_t chip, uint64_t addr, uint64_t and, uint64_t or) { - uint64_t data = read_rscom(chip, addr); - write_rscom(chip, addr, (data & and) | or); + uint64_t data = read_scom(chip, addr); + write_scom(chip, addr, (data & and) | or); } -static inline void rscom_and(uint8_t chip, int64_t addr, uint64_t and) +static inline void scom_and(uint8_t chip, int64_t addr, uint64_t and) { - rscom_and_or(chip, addr, and, 0); + scom_and_or(chip, addr, and, 0); } -static inline void rscom_or(uint8_t chip, uint64_t addr, uint64_t or) +static inline void scom_or(uint8_t chip, uint64_t addr, uint64_t or) { - rscom_and_or(chip, addr, ~0, or); + scom_and_or(chip, addr, ~0, or); } -static inline void write_rscom_for_chiplet(uint8_t chip, chiplet_id_t chiplet, - uint64_t addr, uint64_t data) +static inline void write_scom_for_chiplet(uint8_t chip, chiplet_id_t chiplet, + uint64_t addr, uint64_t data) { addr &= ~PPC_BITMASK(34,39); addr |= ((chiplet & 0x3F) << 24); - write_rscom(chip, addr, data); + write_scom(chip, addr, data); } -static inline uint64_t read_rscom_for_chiplet(uint8_t chip, chiplet_id_t chiplet, uint64_t addr) +static inline uint64_t read_scom_for_chiplet(uint8_t chip, chiplet_id_t chiplet, uint64_t addr) { addr &= ~PPC_BITMASK(34,39); addr |= ((chiplet & 0x3F) << 24); - return read_rscom(chip, addr); + return read_scom(chip, addr); } -static inline void rscom_and_or_for_chiplet(uint8_t chip, chiplet_id_t chiplet, - uint64_t addr, uint64_t and, uint64_t or) +static inline void scom_and_or_for_chiplet(uint8_t chip, chiplet_id_t chiplet, + uint64_t addr, uint64_t and, uint64_t or) { - uint64_t data = read_rscom_for_chiplet(chip, chiplet, addr); - write_rscom_for_chiplet(chip, chiplet, addr, (data & and) | or); + uint64_t data = read_scom_for_chiplet(chip, chiplet, addr); + write_scom_for_chiplet(chip, chiplet, addr, (data & and) | or); } -static inline void rscom_and_for_chiplet(uint8_t chip, chiplet_id_t chiplet, uint64_t addr, uint64_t and) +static inline void scom_and_for_chiplet(uint8_t chip, chiplet_id_t chiplet, + uint64_t addr, uint64_t and) { - rscom_and_or_for_chiplet(chip, chiplet, addr, and, 0); + scom_and_or_for_chiplet(chip, chiplet, addr, and, 0); } -static inline void rscom_or_for_chiplet(uint8_t chip, chiplet_id_t chiplet, uint64_t addr, uint64_t or) +static inline void scom_or_for_chiplet(uint8_t chip, chiplet_id_t chiplet, + uint64_t addr, uint64_t or) { - rscom_and_or_for_chiplet(chip, chiplet, addr, ~0, or); + scom_and_or_for_chiplet(chip, chiplet, addr, ~0, or); } -#if CONFIG(DEBUG_SCOM) && !defined(SKIP_SCOM_DEBUG) -#include - -#define write_rscom(c, x, y) \ -({ \ - uint8_t __cw = (c); \ - uint64_t __xw = (x); \ - uint64_t __yw = (y); \ - printk(BIOS_SPEW, "SCOM W P%d %016llX %016llX\n", __cw, __xw, __yw); \ - write_rscom(__cw, __xw, __yw); \ -}) - -#define read_rscom(c, x) \ -({ \ - uint8_t __cr = (c); \ - uint64_t __xr = (x); \ - uint64_t __yr = read_rscom(__cr, __xr); \ - printk(BIOS_SPEW, "SCOM R P%d %016llX %016llX\n", __cr, __xr, __yr); \ - __yr; \ -}) - -#endif - static inline uint8_t get_dd(void) { - uint64_t val = read_rscom(0, 0xF000F); + uint64_t val = read_scom(0, 0xF000F); val = ((val >> 52) & 0x0F) | ((val >> 56) & 0xF0); return (uint8_t) val; } diff --git a/src/soc/ibm/power9/ccs.c b/src/soc/ibm/power9/ccs.c index 28574135ed1..36612c70267 100644 --- a/src/soc/ibm/power9/ccs.c +++ b/src/soc/ibm/power9/ccs.c @@ -64,25 +64,25 @@ void ccs_add_instruction(uint8_t chip, chiplet_id_t id, mrs_cmd_t mrs, uint8_t c [32-33] CCS_INST_ARR0_00_CCS_DDR_CSN_0_1 = csn[0:1] [36-37] CCS_INST_ARR0_00_CCS_DDR_CSN_2_3 = csn[2:3] */ - write_rscom_for_chiplet(chip, id, CCS_INST_ARR0_00 + instr, - mrs64 | PPC_BIT(CCS_INST_ARR0_00_CCS_DDR_ACTN) | - PPC_PLACE(cke, CCS_INST_ARR0_00_CCS_DDR_CKE, - CCS_INST_ARR0_00_CCS_DDR_CKE_LEN) | - PPC_PLACE(csn >> 2, CCS_INST_ARR0_00_CCS_DDR_CSN_0_1, - CCS_INST_ARR0_00_CCS_DDR_CSN_0_1_LEN) | - PPC_PLACE(csn, CCS_INST_ARR0_00_CCS_DDR_CSN_2_3, - CCS_INST_ARR0_00_CCS_DDR_CSN_2_3_LEN)); + write_scom_for_chiplet(chip, id, CCS_INST_ARR0_00 + instr, + mrs64 | PPC_BIT(CCS_INST_ARR0_00_CCS_DDR_ACTN) | + PPC_PLACE(cke, CCS_INST_ARR0_00_CCS_DDR_CKE, + CCS_INST_ARR0_00_CCS_DDR_CKE_LEN) | + PPC_PLACE(csn >> 2, CCS_INST_ARR0_00_CCS_DDR_CSN_0_1, + CCS_INST_ARR0_00_CCS_DDR_CSN_0_1_LEN) | + PPC_PLACE(csn, CCS_INST_ARR0_00_CCS_DDR_CSN_2_3, + CCS_INST_ARR0_00_CCS_DDR_CSN_2_3_LEN)); /* MC01.MCBIST.CCS.CCS_INST_ARR1_n [all] 0 [0-15] CCS_INST_ARR1_00_IDLES = idles [59-63] CCS_INST_ARR1_00_GOTO_CMD = instr + 1 */ - write_rscom_for_chiplet(chip, id, CCS_INST_ARR1_00 + instr, - PPC_PLACE(idles, CCS_INST_ARR1_00_IDLES, - CCS_INST_ARR1_00_IDLES_LEN) | - PPC_PLACE(instr + 1, CCS_INST_ARR1_00_GOTO_CMD, - CCS_INST_ARR1_00_GOTO_CMD_LEN)); + write_scom_for_chiplet(chip, id, CCS_INST_ARR1_00 + instr, + PPC_PLACE(idles, CCS_INST_ARR1_00_IDLES, + CCS_INST_ARR1_00_IDLES_LEN) | + PPC_PLACE(instr + 1, CCS_INST_ARR1_00_GOTO_CMD, + CCS_INST_ARR1_00_GOTO_CMD_LEN)); /* * For the last instruction in the stream we could decrease it by one (final @@ -102,7 +102,7 @@ void ccs_add_instruction(uint8_t chip, chiplet_id_t id, mrs_cmd_t mrs, uint8_t c static void dump_cal_errors(uint8_t chip, chiplet_id_t id, int mca_i) { /* Stop CCS so it won't mess up with the values */ - write_rscom_for_chiplet(chip, id, CCS_CNTLQ, PPC_BIT(CCS_CNTLQ_CCS_STOP)); + write_scom_for_chiplet(chip, id, CCS_CNTLQ, PPC_BIT(CCS_CNTLQ_CCS_STOP)); #if CONFIG(DEBUG_RAM_SETUP) int dp; @@ -171,8 +171,8 @@ void ccs_execute(uint8_t chip, chiplet_id_t id, int mca_i) total_cycles = 8; poll_timeout = nck_to_us(chip, (total_cycles * 7 * 4) / 8); - write_rscom_for_chiplet(chip, id, CCS_CNTLQ, PPC_BIT(CCS_CNTLQ_CCS_STOP)); - time = wait_us(1, !(read_rscom_for_chiplet(chip, id, CCS_STATQ) & + write_scom_for_chiplet(chip, id, CCS_CNTLQ, PPC_BIT(CCS_CNTLQ_CCS_STOP)); + time = wait_us(1, !(read_scom_for_chiplet(chip, id, CCS_STATQ) & PPC_BIT(CCS_STATQ_CCS_IP))); /* Is it always as described below (CKE, CSN) or is it a copy of last instr? */ @@ -189,26 +189,26 @@ void ccs_execute(uint8_t chip, chiplet_id_t id, int mca_i) [all] 0 [58] CCS_INST_ARR1_00_CCS_END = 1 */ - write_rscom_for_chiplet(chip, id, CCS_INST_ARR0_00 + instr, - PPC_BIT(CCS_INST_ARR0_00_CCS_DDR_ACTN) | - PPC_PLACE(0xF, CCS_INST_ARR0_00_CCS_DDR_CKE, - CCS_INST_ARR0_00_CCS_DDR_CKE_LEN) | - PPC_PLACE(3, CCS_INST_ARR0_00_CCS_DDR_CSN_0_1, - CCS_INST_ARR0_00_CCS_DDR_CSN_0_1_LEN) | - PPC_PLACE(3, CCS_INST_ARR0_00_CCS_DDR_CSN_2_3, - CCS_INST_ARR0_00_CCS_DDR_CSN_2_3_LEN)); - write_rscom_for_chiplet(chip, id, CCS_INST_ARR1_00 + instr, - PPC_BIT(CCS_INST_ARR1_00_CCS_END)); + write_scom_for_chiplet(chip, id, CCS_INST_ARR0_00 + instr, + PPC_BIT(CCS_INST_ARR0_00_CCS_DDR_ACTN) | + PPC_PLACE(0xF, CCS_INST_ARR0_00_CCS_DDR_CKE, + CCS_INST_ARR0_00_CCS_DDR_CKE_LEN) | + PPC_PLACE(3, CCS_INST_ARR0_00_CCS_DDR_CSN_0_1, + CCS_INST_ARR0_00_CCS_DDR_CSN_0_1_LEN) | + PPC_PLACE(3, CCS_INST_ARR0_00_CCS_DDR_CSN_2_3, + CCS_INST_ARR0_00_CCS_DDR_CSN_2_3_LEN)); + write_scom_for_chiplet(chip, id, CCS_INST_ARR1_00 + instr, + PPC_BIT(CCS_INST_ARR1_00_CCS_END)); /* Select ports MC01.MCBIST.MBA_SCOMFIR.MCB_CNTLQ // Broadcast mode is not supported, set only one bit at a time [2-5] MCB_CNTLQ_MCBCNTL_PORT_SEL = bitmap with MCA index */ - rscom_and_or_for_chiplet(chip, id, MCB_CNTLQ, ~PPC_BITMASK(2, 5), PPC_BIT(2 + mca_i)); + scom_and_or_for_chiplet(chip, id, MCB_CNTLQ, ~PPC_BITMASK(2, 5), PPC_BIT(2 + mca_i)); /* Lets go */ - write_rscom_for_chiplet(chip, id, CCS_CNTLQ, PPC_BIT(CCS_CNTLQ_CCS_START)); + write_scom_for_chiplet(chip, id, CCS_CNTLQ, PPC_BIT(CCS_CNTLQ_CCS_START)); /* With microsecond resolution we are probably wasting a lot of time here. */ delay_nck(chip, total_cycles / 8); @@ -218,7 +218,7 @@ void ccs_execute(uint8_t chip, chiplet_id_t id, int mca_i) delay(10ns) if MC01.MCBIST.MBA_SCOMFIR.CCS_STATQ != 0x40..00: report failure // only [1] set, others 0 */ - time = wait_us(poll_timeout, (udelay(1), !(read_rscom_for_chiplet(chip, id, CCS_STATQ) & + time = wait_us(poll_timeout, (udelay(1), !(read_scom_for_chiplet(chip, id, CCS_STATQ) & PPC_BIT(CCS_STATQ_CCS_IP)))); /* This isn't useful for anything but calibration steps, do we want it? */ @@ -229,9 +229,9 @@ void ccs_execute(uint8_t chip, chiplet_id_t id, int mca_i) time + nck_to_us(chip, total_cycles / 8), poll_timeout + nck_to_us(chip, total_cycles / 8), instr); - if (read_rscom_for_chiplet(chip, id, CCS_STATQ) != PPC_BIT(CCS_STATQ_CCS_DONE)) + if (read_scom_for_chiplet(chip, id, CCS_STATQ) != PPC_BIT(CCS_STATQ_CCS_DONE)) die("(%#16.16llx) CCS execution error\n", - read_rscom_for_chiplet(chip, id, CCS_STATQ)); + read_scom_for_chiplet(chip, id, CCS_STATQ)); instr = 0; total_cycles = 0; @@ -324,15 +324,15 @@ void ccs_phy_hw_step(uint8_t chip, chiplet_id_t id, int mca_i, int rp, enum cal_ [36-37] CCS_INST_ARR0_00_CCS_DDR_CSN_2_3 = 3 // Not used by the engine for calibration? [56-59] CCS_INST_ARR0_00_CCS_DDR_CAL_TYPE = 0xc */ - write_rscom_for_chiplet(chip, id, CCS_INST_ARR0_00 + instr, - PPC_PLACE(0xF, CCS_INST_ARR0_00_CCS_DDR_CKE, - CCS_INST_ARR0_00_CCS_DDR_CKE_LEN) | - PPC_PLACE(3, CCS_INST_ARR0_00_CCS_DDR_CSN_0_1, - CCS_INST_ARR0_00_CCS_DDR_CSN_0_1_LEN) | - PPC_PLACE(3, CCS_INST_ARR0_00_CCS_DDR_CSN_2_3, - CCS_INST_ARR0_00_CCS_DDR_CSN_2_3_LEN) | - PPC_PLACE(0xC, CCS_INST_ARR0_00_CCS_DDR_CAL_TYPE, - CCS_INST_ARR0_00_CCS_DDR_CAL_TYPE_LEN)); + write_scom_for_chiplet(chip, id, CCS_INST_ARR0_00 + instr, + PPC_PLACE(0xF, CCS_INST_ARR0_00_CCS_DDR_CKE, + CCS_INST_ARR0_00_CCS_DDR_CKE_LEN) | + PPC_PLACE(3, CCS_INST_ARR0_00_CCS_DDR_CSN_0_1, + CCS_INST_ARR0_00_CCS_DDR_CSN_0_1_LEN) | + PPC_PLACE(3, CCS_INST_ARR0_00_CCS_DDR_CSN_2_3, + CCS_INST_ARR0_00_CCS_DDR_CSN_2_3_LEN) | + PPC_PLACE(0xC, CCS_INST_ARR0_00_CCS_DDR_CAL_TYPE, + CCS_INST_ARR0_00_CCS_DDR_CAL_TYPE_LEN)); /* MC01.MCBIST.CCS.CCS_INST_ARR1_n [all] 0 @@ -340,12 +340,12 @@ void ccs_phy_hw_step(uint8_t chip, chiplet_id_t id, int mca_i, int rp, enum cal_ [57] CCS_INST_ARR1_00_DDR_CALIBRATION_ENABLE = 1 [59-63] CCS_INST_ARR1_00_GOTO_CMD = instr + 1 */ - write_rscom_for_chiplet(chip, id, CCS_INST_ARR1_00 + instr, - PPC_PLACE(rp, CCS_INST_ARR1_00_DDR_CAL_RANK, - CCS_INST_ARR1_00_DDR_CAL_RANK_LEN) | - PPC_BIT(CCS_INST_ARR1_00_DDR_CALIBRATION_ENABLE) | - PPC_PLACE(instr + 1, CCS_INST_ARR1_00_GOTO_CMD, - CCS_INST_ARR1_00_GOTO_CMD_LEN)); + write_scom_for_chiplet(chip, id, CCS_INST_ARR1_00 + instr, + PPC_PLACE(rp, CCS_INST_ARR1_00_DDR_CAL_RANK, + CCS_INST_ARR1_00_DDR_CAL_RANK_LEN) | + PPC_BIT(CCS_INST_ARR1_00_DDR_CALIBRATION_ENABLE) | + PPC_PLACE(instr + 1, CCS_INST_ARR1_00_GOTO_CMD, + CCS_INST_ARR1_00_GOTO_CMD_LEN)); total_cycles += step_cycles; instr++; diff --git a/src/soc/ibm/power9/chip.c b/src/soc/ibm/power9/chip.c index 96ccfca0497..c216fe68fb8 100644 --- a/src/soc/ibm/power9/chip.c +++ b/src/soc/ibm/power9/chip.c @@ -329,12 +329,12 @@ static void add_memory_nodes(struct device_tree *tree) /* These registers are undocumented, see istep 14.5. */ /* MCS_MCFGP */ - reg = read_rscom_for_chiplet(chip, nest, 0x0501080A); + reg = read_scom_for_chiplet(chip, nest, 0x0501080A); if (reg & PPC_BIT(0)) add_memory_node(tree, chip, reg); /* MCS_MCFGPM */ - reg = read_rscom_for_chiplet(chip, nest, 0x0501080C); + reg = read_scom_for_chiplet(chip, nest, 0x0501080C); if (reg & PPC_BIT(0)) add_memory_node(tree, chip, reg); } @@ -515,7 +515,7 @@ static int dt_platform_update(struct device_tree *tree, uint8_t chips) if (!(chips & (1 << chip))) continue; - cores = read_rscom(chip, 0x0006C090); + cores = read_scom(chip, 0x0006C090); assert(cores != 0); for (int core_id = 0; core_id < MAX_CORES_PER_CHIP; core_id++) { @@ -614,9 +614,9 @@ static void rng_init(uint8_t chips) * [0-9] FAIL_REG - abort if any of these bits is set * [17] BIST_COMPLETE - should be 1 at this point */ - uint64_t rng_status = read_rscom(chip, 0x020110E0); + uint64_t rng_status = read_scom(chip, 0x020110E0); assert(rng_status & PPC_BIT(17)); - while (!((rng_status = read_rscom(chip, 0x020110E0)) & PPC_BIT(17))); + while (!((rng_status = read_scom(chip, 0x020110E0)) & PPC_BIT(17))); if (rng_status & PPC_BITMASK(0, 9)) die("RNG initialization failed, NX_RNG_CFG = %#16.16llx\n", rng_status); @@ -625,7 +625,7 @@ static void rng_init(uint8_t chips) * Hostboot sets 'enable' bit again even though it was already set. * Following that behavior just in case. */ - write_rscom(chip, 0x020110E0, rng_status | PPC_BIT(63)); + write_scom(chip, 0x020110E0, rng_status | PPC_BIT(63)); /* * This would be the place to set BARs, but it is done as part of quad SCOM @@ -633,7 +633,7 @@ static void rng_init(uint8_t chips) */ /* Lock NX RNG configuration */ - rscom_or(chip, 0x00010005, PPC_BIT(9)); + scom_or(chip, 0x00010005, PPC_BIT(9)); } } @@ -656,7 +656,7 @@ static void enable_soc_dev(struct device *dev) /* These registers are undocumented, see istep 14.5. */ /* MCS_MCFGP */ - reg = read_rscom_for_chiplet(chip, nest, 0x0501080A); + reg = read_scom_for_chiplet(chip, nest, 0x0501080A); if (reg & PPC_BIT(0)) { uint64_t end = base_k(reg) + size_k(reg); ram_resource_kb(dev, idx++, base_k(reg), size_k(reg)); @@ -665,7 +665,7 @@ static void enable_soc_dev(struct device *dev) } /* MCS_MCFGPM */ - reg = read_rscom_for_chiplet(chip, nest, 0x0501080C); + reg = read_scom_for_chiplet(chip, nest, 0x0501080C); if (reg & PPC_BIT(0)) { uint64_t end = base_k(reg) + size_k(reg); ram_resource_kb(dev, idx++, base_k(reg), size_k(reg)); @@ -704,7 +704,7 @@ static void activate_slave_cores(uint8_t chip) uint8_t i; /* Read OCC CCSR written by the code earlier */ - const uint64_t functional_cores = read_rscom(chip, 0x0006C090); + const uint64_t functional_cores = read_scom(chip, 0x0006C090); /* * This is for ATTR_PROC_FABRIC_PUMP_MODE == PUMP_MODE_CHIP_IS_GROUP, diff --git a/src/soc/ibm/power9/fsi.c b/src/soc/ibm/power9/fsi.c index 55eee158a18..b19ec0850be 100644 --- a/src/soc/ibm/power9/fsi.c +++ b/src/soc/ibm/power9/fsi.c @@ -77,8 +77,8 @@ enum { void fsi_reset_pib2opb(uint8_t chip) { - write_rscom(chip, FSI2OPB_OFFSET_0 | OPB_REG_RES, 0x8000000000000000); - write_rscom(chip, FSI2OPB_OFFSET_0 | OPB_REG_STAT, 0x8000000000000000); + write_scom(chip, FSI2OPB_OFFSET_0 | OPB_REG_RES, 0x8000000000000000); + write_scom(chip, FSI2OPB_OFFSET_0 | OPB_REG_STAT, 0x8000000000000000); } static void cleanup_port_maeb_error(uint8_t port) @@ -175,7 +175,7 @@ static void basic_master_init(void) fsi_reset_pib2opb(chip); /* Ensure we don't have any errors before we even start */ - tmp = read_rscom(0, FSI2OPB_OFFSET_0 | OPB_REG_STAT); + tmp = read_scom(0, FSI2OPB_OFFSET_0 | OPB_REG_STAT); if (tmp & OPB_STAT_NON_MFSI_ERR) die("Unclearable errors on MFSI initialization: 0x%016llx\n", tmp); @@ -341,10 +341,10 @@ static inline uint64_t poll_opb(uint8_t chip) /* Timeout after 10ms, check every 10us, supposedly there is hardware * timeout after 1ms */ - tmp = read_rscom(0, stat_addr); + tmp = read_scom(0, stat_addr); for (i = 0; (tmp & OPB_STAT_BUSY) && !(tmp & err_mask) && i < MAX_WAIT_LOOPS; i++) { udelay(TIMEOUT_STEP_US); - tmp = read_rscom(0, stat_addr); + tmp = read_scom(0, stat_addr); } if (tmp & err_mask) @@ -393,7 +393,7 @@ uint32_t fsi_op(uint8_t chip, uint32_t addr, uint32_t data, bool is_read, uint8_ if (!is_read) cmd |= WRITE_NOT_READ; - write_rscom(0, FSI2OPB_OFFSET_0 | OPB_REG_CMD, cmd); + write_scom(0, FSI2OPB_OFFSET_0 | OPB_REG_CMD, cmd); /* Poll for complete and get the data back. */ response = poll_opb(chip); diff --git a/src/soc/ibm/power9/homer.c b/src/soc/ibm/power9/homer.c index eaced074efa..299bcbb054e 100644 --- a/src/soc/ibm/power9/homer.c +++ b/src/soc/ibm/power9/homer.c @@ -669,7 +669,7 @@ static void pba_slave_setup_runtime_phase(uint8_t chip) data |= PPC_BIT(27); // en_slv_fairness data |= PPC_BIT(10); // en_second_wrbuf - write_rscom(chip, PU_PBAMODE_SCOM, data); + write_scom(chip, PU_PBAMODE_SCOM, data); /* * Slave 0 (SGPE STOP). This is a read/write slave in the event that @@ -688,7 +688,7 @@ static void pba_slave_setup_runtime_phase(uint8_t chip) data |= PPC_BIT(22); // buf_alloc_c data |= PPC_BIT(19); // buf_alloc_w - write_rscom(chip, PU_PBASLVCTL0_SCOM, data); + write_scom(chip, PU_PBASLVCTL0_SCOM, data); /* * Slave 1 (GPE 1, PPC405 booting). This is a read/write slave. Write gathering is @@ -708,7 +708,7 @@ static void pba_slave_setup_runtime_phase(uint8_t chip) data |= PPC_BIT(22); // buf_alloc_c data |= PPC_BIT(19); // buf_alloc_w - write_rscom(chip, PU_PBASLVCTL1_SCOM, data); + write_scom(chip, PU_PBASLVCTL1_SCOM, data); /* * Slave 2 (PGPE Boot, Pstates/WOF). This is a read/write slave. Write gethering is @@ -729,7 +729,7 @@ static void pba_slave_setup_runtime_phase(uint8_t chip) data |= PPC_BIT(22); // buf_alloc_c data |= PPC_BIT(19); // buf_alloc_w - write_rscom(chip, PU_PBASLVCTL2_SCOM, data); + write_scom(chip, PU_PBASLVCTL2_SCOM, data); /* Slave 3 is not modified by this function, because it is owned by SBE */ } @@ -742,14 +742,14 @@ static void pba_reset(uint8_t chip) [all] 0 [0] 1 */ - write_rscom(chip, 0x00068010, PPC_BIT(0)); + write_scom(chip, 0x00068010, PPC_BIT(0)); /* Stopping Block Copy Upload Engine *0x00068015 // undocumented, PU_BCUE_CTL_SCOM [all] 0 [0] 1 */ - write_rscom(chip, 0x00068015, PPC_BIT(0)); + write_scom(chip, 0x00068015, PPC_BIT(0)); /* Polling on, to verify that BCDE & BCUE are indeed stopped timeout(256*256us): @@ -760,15 +760,15 @@ static void pba_reset(uint8_t chip) if both bits are clear: break */ time = wait_us(256*256, - (((read_rscom(chip, 0x00068012) & PPC_BIT(0)) == 0) && - ((read_rscom(chip, 0x00068017) & PPC_BIT(0)) == 0))); + (((read_scom(chip, 0x00068012) & PPC_BIT(0)) == 0) && + ((read_scom(chip, 0x00068017) & PPC_BIT(0)) == 0))); if (!time) die("Timed out waiting for stopping of BCDE/BCUE\n"); /* Clear the BCDE and BCUE stop bits */ - write_rscom(chip, 0x00068010, 0); - write_rscom(chip, 0x00068015, 0); + write_scom(chip, 0x00068010, 0); + write_scom(chip, 0x00068015, 0); /* Reset each slave and wait for completion timeout(16*1us): @@ -783,10 +783,10 @@ static void pba_reset(uint8_t chip) */ for (int sl = 0; sl < 3; sl++) { // Fourth is owned by SBE, do not reset time = wait_us(16, - (write_rscom(chip, 0x00068001, PPC_BIT(0) | PPC_PLACE(sl, 1, 2)), - (read_rscom(chip, 0x00068001) & PPC_BIT(4 + sl)) == 0)); + (write_scom(chip, 0x00068001, PPC_BIT(0) | PPC_PLACE(sl, 1, 2)), + (read_scom(chip, 0x00068001) & PPC_BIT(4 + sl)) == 0)); - if (!time || read_rscom(chip, 0x00068001) & PPC_BIT(8 + sl)) + if (!time || read_scom(chip, 0x00068001) & PPC_BIT(8 + sl)) die("Timed out waiting for slave %d reset\n", sl); } @@ -808,28 +808,28 @@ static void pba_reset(uint8_t chip) BRIDGE.PBA.PBAERRRPT0 // 0x0501284C [all] 0 */ - write_rscom(chip, 0x00068013, 0); - write_rscom(chip, 0x00068014, 0); - write_rscom(chip, 0x00068015, 0); - write_rscom(chip, 0x00068016, 0); - write_rscom(chip, 0x00068018, 0); - write_rscom(chip, 0x00068019, 0); - write_rscom(chip, 0x00068026, 0); - write_rscom(chip, 0x0006802A, 0); - write_rscom(chip, 0x00068027, 0); - write_rscom(chip, 0x0006802B, 0); - write_rscom(chip, 0x00068004, 0); - write_rscom(chip, 0x00068005, 0); - write_rscom(chip, 0x00068006, 0); - write_rscom(chip, 0x05012840, 0); - write_rscom(chip, 0x0501284C, 0); + write_scom(chip, 0x00068013, 0); + write_scom(chip, 0x00068014, 0); + write_scom(chip, 0x00068015, 0); + write_scom(chip, 0x00068016, 0); + write_scom(chip, 0x00068018, 0); + write_scom(chip, 0x00068019, 0); + write_scom(chip, 0x00068026, 0); + write_scom(chip, 0x0006802A, 0); + write_scom(chip, 0x00068027, 0); + write_scom(chip, 0x0006802B, 0); + write_scom(chip, 0x00068004, 0); + write_scom(chip, 0x00068005, 0); + write_scom(chip, 0x00068006, 0); + write_scom(chip, 0x05012840, 0); + write_scom(chip, 0x0501284C, 0); /* Perform non-zero reset operations BRIDGE.PBA.PBACFG // 0x0501284B [all] 0 [38] PBACFG_CHSW_DIS_GROUP_SCOPE = 1 */ - write_rscom(chip, 0x0501284B, PPC_BIT(38)); + write_scom(chip, 0x0501284B, PPC_BIT(38)); /* *0x00068021 // Undocumented, PU_PBAXCFG_SCOM @@ -837,7 +837,7 @@ static void pba_reset(uint8_t chip) [2] 1 // PBAXCFG_SND_RESET? [3] 1 // PBAXCFG_RCV_RESET? */ - write_rscom(chip, PU_PBAXCFG_SCOM, PPC_BIT(2) | PPC_BIT(3)); + write_scom(chip, PU_PBAXCFG_SCOM, PPC_BIT(2) | PPC_BIT(3)); pba_slave_setup_runtime_phase(chip); } @@ -850,9 +850,9 @@ static void stop_gpe_init(uint8_t chip, struct homer_st *homer) [all] 0 [8] 1 // SGPE_ACTIVE, bits in this register are defined by OCC firmware */ - if (read_rscom(chip, 0x0006C08A) & PPC_BIT(8)) { + if (read_scom(chip, 0x0006C08A) & PPC_BIT(8)) { printk(BIOS_WARNING, "SGPE_ACTIVE is set in OCCFLAG register, clearing it\n"); - write_rscom(chip, 0x0006C08B, PPC_BIT(8)); + write_scom(chip, 0x0006C08B, PPC_BIT(8)); } /* @@ -865,7 +865,7 @@ static void stop_gpe_init(uint8_t chip, struct homer_st *homer) */ uint32_t ivpr = 0x80000000 + homer->qpmr.sgpe.header.l1_offset + offsetof(struct homer_st, qpmr); - write_rscom(chip, 0x00066001, PPC_PLACE(ivpr, 0, 32)); + write_scom(chip, 0x00066001, PPC_PLACE(ivpr, 0, 32)); /* Program XCR to ACTIVATE SGPE TP.TPCHIP.OCC.OCI.GPE3.GPENXIXCR // 0x00066010 @@ -878,9 +878,9 @@ static void stop_gpe_init(uint8_t chip, struct homer_st *homer) [all] 0 [1-3] PPE_XIXCR_XCR = 2 // resume */ - write_rscom(chip, 0x00066010, PPC_PLACE(6, 1, 3)); - write_rscom(chip, 0x00066010, PPC_PLACE(4, 1, 3)); - write_rscom(chip, 0x00066010, PPC_PLACE(2, 1, 3)); + write_scom(chip, 0x00066010, PPC_PLACE(6, 1, 3)); + write_scom(chip, 0x00066010, PPC_PLACE(4, 1, 3)); + write_scom(chip, 0x00066010, PPC_PLACE(2, 1, 3)); /* * Now wait for SGPE to not be halted and for the HCode to indicate to be @@ -893,8 +893,8 @@ static void stop_gpe_init(uint8_t chip, struct homer_st *homer) if ((TP.TPCHIP.OCC.OCI.OCB.OCB_OCI_OCCFLG[8] == 1) && // 0x0006C08A (TP.TPCHIP.OCC.OCI.GPE3.GPEXIXSR[0] == 0)): break // 0x00066021 */ - long time = wait_us(125*20, ((read_rscom(chip, 0x0006C08A) & PPC_BIT(8)) && - !(read_rscom(chip, 0x00066021) & PPC_BIT(0)))); + long time = wait_us(125*20, ((read_scom(chip, 0x0006C08A) & PPC_BIT(8)) && + !(read_scom(chip, 0x00066021) & PPC_BIT(0)))); if (!time) die("Timeout while waiting for SGPE activation\n"); @@ -904,7 +904,7 @@ static uint64_t get_available_cores(uint8_t chip, int *me) { uint64_t ret = 0; for (int i = 0; i < MAX_CORES_PER_CHIP; i++) { - uint64_t val = read_rscom_for_chiplet(chip, EC00_CHIPLET_ID + i, 0xF0040); + uint64_t val = read_scom_for_chiplet(chip, EC00_CHIPLET_ID + i, 0xF0040); if (val & PPC_BIT(0)) { printk(BIOS_SPEW, "Core %d is functional%s\n", i, (val & PPC_BIT(1)) ? "" : " and running"); @@ -921,36 +921,36 @@ static uint64_t get_available_cores(uint8_t chip, int *me) static void psu_command(uint8_t flags, long time) { /* TP.TPCHIP.PIB.PSU.PSU_SBE_DOORBELL_REG */ - if (read_rscom(0, 0x000D0060) & PPC_BIT(0)) + if (read_scom(0, 0x000D0060) & PPC_BIT(0)) die("MBOX to SBE busy, this should not happen\n"); - if (read_rscom(0, 0x000D0063) & PPC_BIT(0)) { + if (read_scom(0, 0x000D0063) & PPC_BIT(0)) { printk(BIOS_ERR, "SBE to Host doorbell already active, clearing it\n"); - write_rscom(0, 0x000D0064, ~PPC_BIT(0)); + write_scom(0, 0x000D0064, ~PPC_BIT(0)); } /* https://github.com/open-power/hostboot/blob/master/src/include/usr/sbeio/sbe_psudd.H#L418 */ /* TP.TPCHIP.PIB.PSU.PSU_HOST_SBE_MBOX0_REG */ /* REQUIRE_RESPONSE, CLASS_CORE_STATE, CMD_CONTROL_DEADMAN_LOOP, flags */ - write_rscom(0, 0x000D0050, 0x000001000000D101 | PPC_PLACE(flags, 24, 8)); + write_scom(0, 0x000D0050, 0x000001000000D101 | PPC_PLACE(flags, 24, 8)); /* TP.TPCHIP.PIB.PSU.PSU_HOST_SBE_MBOX0_REG */ - write_rscom(0, 0x000D0051, time); + write_scom(0, 0x000D0051, time); /* Ring the host->SBE doorbell */ /* TP.TPCHIP.PIB.PSU.PSU_SBE_DOORBELL_REG_OR */ - write_rscom(0, 0x000D0062, PPC_BIT(0)); + write_scom(0, 0x000D0062, PPC_BIT(0)); /* Wait for response */ /* TP.TPCHIP.PIB.PSU.PSU_HOST_DOORBELL_REG */ - time = wait_ms(time, read_rscom(0, 0x000D0063) & PPC_BIT(0)); + time = wait_ms(time, read_scom(0, 0x000D0063) & PPC_BIT(0)); if (!time) die("Timed out while waiting for SBE response\n"); /* Clear SBE->host doorbell */ /* TP.TPCHIP.PIB.PSU.PSU_HOST_DOORBELL_REG_AND */ - write_rscom(0, 0x000D0064, ~PPC_BIT(0)); + write_scom(0, 0x000D0064, ~PPC_BIT(0)); } #define DEADMAN_LOOP_START 0x0001 @@ -962,11 +962,11 @@ static void block_wakeup_int(int core, int state) /* Depending on requested state we write to SCOM1 (CLEAR) or SCOM2 (OR). */ uint64_t scom = state ? 0x200F0102 : 0x200F0101; - write_rscom_for_chiplet(0, EC00_CHIPLET_ID + core, 0x200F0108, PPC_BIT(1)); + write_scom_for_chiplet(0, EC00_CHIPLET_ID + core, 0x200F0108, PPC_BIT(1)); /* Register is documented, but its bits are reserved... */ - write_rscom_for_chiplet(0, EC00_CHIPLET_ID + core, scom, PPC_BIT(6)); + write_scom_for_chiplet(0, EC00_CHIPLET_ID + core, scom, PPC_BIT(6)); - write_rscom_for_chiplet(0, EC00_CHIPLET_ID + core, 0x200F0107, PPC_BIT(1)); + write_scom_for_chiplet(0, EC00_CHIPLET_ID + core, 0x200F0107, PPC_BIT(1)); } /* @@ -1087,9 +1087,9 @@ static void istep_16_1(int this_core) * No need to handle the timeout, if it happens, SBE will checkstop the * system anyway. */ - wait_us(time, read_rscom(0, 0x000D0063) & PPC_BIT(2)); + wait_us(time, read_scom(0, 0x000D0063) & PPC_BIT(2)); - write_rscom(0, 0x000D0064, ~PPC_BIT(2)); + write_scom(0, 0x000D0064, ~PPC_BIT(2)); /* * This tells SBE that we were properly awoken. Hostboot uses default @@ -1191,24 +1191,24 @@ static void pm_corequad_init(uint8_t chip, uint64_t cores) * 18 - 19 : PCB interrupt * 20,22,24,26: InterPPM Ivrm/Aclk/Vdata/Dpll enable */ - write_rscom_for_chiplet(chip, quad_chiplet, EQ_QPPM_QPMMR_CLEAR, - PPC_BIT(0) | - PPC_BITMASK(1, 11) | - PPC_BIT(12) | - PPC_BIT(13) | - PPC_BIT(14) | - PPC_BITMASK(18, 19) | - PPC_BIT(20) | - PPC_BIT(22) | - PPC_BIT(24) | - PPC_BIT(26)); + write_scom_for_chiplet(chip, quad_chiplet, EQ_QPPM_QPMMR_CLEAR, + PPC_BIT(0) | + PPC_BITMASK(1, 11) | + PPC_BIT(12) | + PPC_BIT(13) | + PPC_BIT(14) | + PPC_BITMASK(18, 19) | + PPC_BIT(20) | + PPC_BIT(22) | + PPC_BIT(24) | + PPC_BIT(26)); /* Clear QUAD PPM ERROR Register */ - write_rscom_for_chiplet(chip, quad_chiplet, EQ_QPPM_ERR, 0); + write_scom_for_chiplet(chip, quad_chiplet, EQ_QPPM_ERR, 0); /* Restore Quad PPM Error Mask */ err_mask = 0xFFFFFF00; // from Hostboot's log - write_rscom_for_chiplet(chip, quad_chiplet, EQ_QPPM_ERRMSK, + write_scom_for_chiplet(chip, quad_chiplet, EQ_QPPM_ERRMSK, PPC_PLACE(err_mask, 0, 32)); for (int core = quad * 4; core < (quad + 1) * 4; ++core) { @@ -1216,9 +1216,9 @@ static void pm_corequad_init(uint8_t chip, uint64_t cores) /* Clear the Core PPM CME DoorBells */ for (int i = 0; i < DOORBELLS_COUNT; ++i) { - write_rscom_for_chiplet(chip, core_chiplet, - CME_DOORBELL_CLEAR[i], - PPC_BITMASK(0, 63)); + write_scom_for_chiplet(chip, core_chiplet, + CME_DOORBELL_CLEAR[i], + PPC_BITMASK(0, 63)); } /* @@ -1238,15 +1238,15 @@ static void pm_corequad_init(uint8_t chip, uint64_t cores) * 10 : STOP_EXIT_TYPE_SEL * 13 : WKUP_NOTIFY_SELECT */ - write_rscom_for_chiplet(chip, core_chiplet, C_CPPM_CPMMR_CLEAR, - PPC_BIT(1) | - PPC_BIT(11) | - PPC_BIT(12) | - PPC_BIT(14) | - PPC_BIT(15)); + write_scom_for_chiplet(chip, core_chiplet, C_CPPM_CPMMR_CLEAR, + PPC_BIT(1) | + PPC_BIT(11) | + PPC_BIT(12) | + PPC_BIT(14) | + PPC_BIT(15)); /* Clear Core PPM Errors */ - write_rscom_for_chiplet(chip, core_chiplet, C_CPPM_ERR, 0); + write_scom_for_chiplet(chip, core_chiplet, C_CPPM_ERR, 0); /* * Clear Hcode Error Injection and other CSAR settings: @@ -1259,16 +1259,16 @@ static void pm_corequad_init(uint8_t chip, uint64_t cores) * DISABLE_CME_NACK_ON_PROLONGED_DROOP is NOT cleared * as this is a persistent, characterization setting. */ - write_rscom_for_chiplet(chip, core_chiplet, C_CPPM_CSAR_CLEAR, - PPC_BIT(27) | - PPC_BIT(28) | - PPC_BIT(30) | - PPC_BIT(31)); + write_scom_for_chiplet(chip, core_chiplet, C_CPPM_CSAR_CLEAR, + PPC_BIT(27) | + PPC_BIT(28) | + PPC_BIT(30) | + PPC_BIT(31)); /* Restore CORE PPM Error Mask */ err_mask = 0xFFF00000; // from Hostboot's log - write_rscom_for_chiplet(chip, core_chiplet, C_CPPM_ERRMSK, - PPC_PLACE(err_mask, 0, 32)); + write_scom_for_chiplet(chip, core_chiplet, C_CPPM_ERRMSK, + PPC_PLACE(err_mask, 0, 32)); } } } @@ -1312,32 +1312,32 @@ static void pstate_gpe_init(uint8_t chip, struct homer_st *homer, uint64_t cores uint8_t avsbus_rail = 0; uint64_t ivpr = 0x80000000 + offsetof(struct homer_st, ppmr.l1_bootloader); - write_rscom(chip, PU_GPE2_GPEIVPR_SCOM, ivpr << 32); + write_scom(chip, PU_GPE2_GPEIVPR_SCOM, ivpr << 32); /* Set up the OCC Scratch 2 register before PGPE boot */ - occ_scratch = read_rscom(chip, PU_OCB_OCI_OCCS2_SCOM); + occ_scratch = read_scom(chip, PU_OCB_OCI_OCCS2_SCOM); occ_scratch &= ~PPC_BIT(PGPE_ACTIVE); occ_scratch &= ~PPC_BITMASK(27, 31); occ_scratch |= PPC_PLACE(avsbus_number, 27, 1); occ_scratch |= PPC_PLACE(avsbus_rail, 28, 4); - write_rscom(chip, PU_OCB_OCI_OCCS2_SCOM, occ_scratch); + write_scom(chip, PU_OCB_OCI_OCCS2_SCOM, occ_scratch); - write_rscom(chip, PU_GPE2_GPETSEL_SCOM, 0x1A00000000000000); + write_scom(chip, PU_GPE2_GPETSEL_SCOM, 0x1A00000000000000); /* OCCFLG2_PGPE_HCODE_FIT_ERR_INJ | OCCFLG2_PGPE_HCODE_PSTATE_REQ_ERR_INJ */ - write_rscom(chip, PU_OCB_OCI_OCCFLG2_CLEAR, 0x1100000000); + write_scom(chip, PU_OCB_OCI_OCCFLG2_CLEAR, 0x1100000000); printk(BIOS_ERR, "Attempting PGPE activation...\n"); - write_rscom(chip, PU_GPE2_PPE_XIXCR, PPC_PLACE(HARD_RESET, 1, 3)); - write_rscom(chip, PU_GPE2_PPE_XIXCR, PPC_PLACE(TOGGLE_XSR_TRH, 1, 3)); - write_rscom(chip, PU_GPE2_PPE_XIXCR, PPC_PLACE(RESUME, 1, 3)); + write_scom(chip, PU_GPE2_PPE_XIXCR, PPC_PLACE(HARD_RESET, 1, 3)); + write_scom(chip, PU_GPE2_PPE_XIXCR, PPC_PLACE(TOGGLE_XSR_TRH, 1, 3)); + write_scom(chip, PU_GPE2_PPE_XIXCR, PPC_PLACE(RESUME, 1, 3)); wait_ms(PGPE_POLLTIME_MS * TIMEOUT_COUNT, - (read_rscom(chip, PU_OCB_OCI_OCCS2_SCOM) & PPC_BIT(PGPE_ACTIVE)) || - (read_rscom(chip, PU_GPE2_PPE_XIDBGPRO) & PPC_BIT(HALTED_STATE))); + (read_scom(chip, PU_OCB_OCI_OCCS2_SCOM) & PPC_BIT(PGPE_ACTIVE)) || + (read_scom(chip, PU_GPE2_PPE_XIDBGPRO) & PPC_BIT(HALTED_STATE))); - if (read_rscom(chip, PU_OCB_OCI_OCCS2_SCOM) & PPC_BIT(PGPE_ACTIVE)) + if (read_scom(chip, PU_OCB_OCI_OCCS2_SCOM) & PPC_BIT(PGPE_ACTIVE)) printk(BIOS_ERR, "PGPE was activated successfully\n"); else die("Failed to activate PGPE\n"); @@ -1352,9 +1352,9 @@ static void pstate_gpe_init(uint8_t chip, struct homer_st *homer, uint64_t cores if (!IS_EQ_FUNCTIONAL(quad, cores)) continue; - rscom_and_or_for_chiplet(chip, EP00_CHIPLET_ID + quad, EQ_QPPM_QPMMR, - ~PPC_BITMASK(1, 11), - PPC_PLACE(safe_mode_freq, 1, 11)); + scom_and_or_for_chiplet(chip, EP00_CHIPLET_ID + quad, EQ_QPPM_QPMMR, + ~PPC_BITMASK(1, 11), + PPC_PLACE(safe_mode_freq, 1, 11)); } } @@ -1380,9 +1380,9 @@ static void pm_pba_init(uint8_t chip) uint8_t attr_pbax_broadcast_vector = 0; /* Assuming ATTR_CHIP_EC_FEATURE_HW423589_OPTION1 == true */ - write_rscom(chip, PU_PBACFG, PPC_BIT(PU_PBACFG_CHSW_DIS_GROUP_SCOPE)); + write_scom(chip, PU_PBACFG, PPC_BIT(PU_PBACFG_CHSW_DIS_GROUP_SCOPE)); - write_rscom(chip, PU_PBAFIR, 0); + write_scom(chip, PU_PBAFIR, 0); data |= PPC_PLACE(attr_pbax_groupid, 4, 4); data |= PPC_PLACE(attr_pbax_chipid, 8, 3); @@ -1391,7 +1391,7 @@ static void pm_pba_init(uint8_t chip) data |= PPC_PLACE(PBAX_SND_RETRY_COMMIT_OVERCOMMIT, 27, 1); data |= PPC_PLACE(PBAX_SND_RETRY_THRESHOLD, 28, 8); data |= PPC_PLACE(PBAX_SND_TIMEOUT, 36, 5); - write_rscom(chip, PU_PBAXCFG_SCOM, data); + write_scom(chip, PU_PBAXCFG_SCOM, data); } static void pm_pstate_gpe_init(uint8_t chip, struct homer_st *homer, uint64_t cores) @@ -1412,8 +1412,8 @@ static void check_proc_config(uint8_t chip, struct homer_st *homer) chiplet_id_t nest = mcs_to_nest[mcs_ids[mcs_i]]; /* MCS_MCFGP and MCS_MCFGPM registers are undocumented, see istep 14.5. */ - if ((read_rscom_for_chiplet(chip, nest, 0x0501080A) & PPC_BIT(0)) || - (read_rscom_for_chiplet(chip, nest, 0x0501080C) & PPC_BIT(0))) { + if ((read_scom_for_chiplet(chip, nest, 0x0501080A) & PPC_BIT(0)) || + (read_scom_for_chiplet(chip, nest, 0x0501080C) & PPC_BIT(0))) { uint8_t pos = MCS_POS + mcs_i; *conf_vector |= PPC_BIT(pos); @@ -1442,9 +1442,9 @@ static void pm_pss_init(uint8_t chip) * 0-5 frame size * 12-17 in delay */ - rscom_and_or(chip, PU_SPIPSS_ADC_CTRL_REG0, - ~PPC_BITMASK(0, 5) & ~PPC_BITMASK(12, 17), - PPC_PLACE(0x20, 0, 6)); + scom_and_or(chip, PU_SPIPSS_ADC_CTRL_REG0, + ~PPC_BITMASK(0, 5) & ~PPC_BITMASK(12, 17), + PPC_PLACE(0x20, 0, 6)); /* * 0 adc_fsm_enable = 1 @@ -1456,23 +1456,23 @@ static void pm_pss_init(uint8_t chip) * * Truncating last value to 4 bits gives 0. */ - rscom_and_or(chip, PU_SPIPSS_ADC_CTRL_REG0 + 1, ~PPC_BITMASK(0, 17), - PPC_BIT(0) | PPC_PLACE(10, 4, 10) | PPC_PLACE(0, 14, 4)); + scom_and_or(chip, PU_SPIPSS_ADC_CTRL_REG0 + 1, ~PPC_BITMASK(0, 17), + PPC_BIT(0) | PPC_PLACE(10, 4, 10) | PPC_PLACE(0, 14, 4)); /* * 0-16 inter frame delay */ - rscom_and(chip, PU_SPIPSS_ADC_CTRL_REG0 + 2, ~PPC_BITMASK(0, 16)); + scom_and(chip, PU_SPIPSS_ADC_CTRL_REG0 + 2, ~PPC_BITMASK(0, 16)); - write_rscom(chip, PU_SPIPSS_ADC_WDATA_REG, 0); + write_scom(chip, PU_SPIPSS_ADC_WDATA_REG, 0); /* * 0-5 frame size * 12-17 in delay */ - rscom_and_or(chip, PU_SPIPSS_P2S_CTRL_REG0, - ~PPC_BITMASK(0, 5) & ~PPC_BITMASK(12, 17), - PPC_PLACE(0x20, 0, 6)); + scom_and_or(chip, PU_SPIPSS_P2S_CTRL_REG0, + ~PPC_BITMASK(0, 5) & ~PPC_BITMASK(12, 17), + PPC_PLACE(0x20, 0, 6)); /* * 0 p2s_fsm_enable = 1 @@ -1482,23 +1482,23 @@ static void pm_pss_init(uint8_t chip) * 4-13 p2s_clock_divider = set to 10Mhz * 17 p2s_nr_of_frames = 1 (for auto 2 mode) */ - rscom_and_or(chip, PU_SPIPSS_P2S_CTRL_REG0 + 1, - ~(PPC_BITMASK(0, 13) | PPC_BIT(17)), - PPC_BIT(0) | PPC_PLACE(10, 4, 10) | PPC_BIT(17)); + scom_and_or(chip, PU_SPIPSS_P2S_CTRL_REG0 + 1, + ~(PPC_BITMASK(0, 13) | PPC_BIT(17)), + PPC_BIT(0) | PPC_PLACE(10, 4, 10) | PPC_BIT(17)); /* * 0-16 inter frame delay */ - rscom_and(chip, PU_SPIPSS_P2S_CTRL_REG0 + 2, ~PPC_BITMASK(0, 16)); + scom_and(chip, PU_SPIPSS_P2S_CTRL_REG0 + 2, ~PPC_BITMASK(0, 16)); - write_rscom(chip, PU_SPIPSS_P2S_WDATA_REG, 0); + write_scom(chip, PU_SPIPSS_P2S_WDATA_REG, 0); /* * 0-31 100ns value */ - rscom_and_or(chip, PU_SPIPSS_100NS_REG, - PPC_BITMASK(0, 31), - PPC_PLACE(powerbus_cfg(chip)->fabric_freq / 40, 0, 32)); + scom_and_or(chip, PU_SPIPSS_100NS_REG, + PPC_BITMASK(0, 31), + PPC_PLACE(powerbus_cfg(chip)->fabric_freq / 40, 0, 32)); } /* Initializes power-management and starts OCC */ @@ -1518,7 +1518,7 @@ static void start_pm_complex(uint8_t chip, struct homer_st *homer, uint64_t core special_occ_wakeup_disable(chip, cores); occ_start_from_mem(chip); - write_rscom(chip, PU_OCB_OCI_OCCFLG2_CLEAR, PPC_BIT(STOP_RECOVERY_TRIGGER_ENABLE)); + write_scom(chip, PU_OCB_OCI_OCCFLG2_CLEAR, PPC_BIT(STOP_RECOVERY_TRIGGER_ENABLE)); } static void istep_21_1(uint8_t chips, struct homer_st *homers, const uint64_t *cores) @@ -2376,19 +2376,19 @@ static void setup_wakeup_mode(uint8_t chip, uint64_t cores) [4] CPPM_CPMMR_RESERVED_2_9 = 1 */ /* SCOM2 - OR, 0x200F0108 */ - write_rscom_for_chiplet(chip, EC00_CHIPLET_ID + i, 0x200F0108, - PPC_BIT(3) | PPC_BIT(4)); + write_scom_for_chiplet(chip, EC00_CHIPLET_ID + i, 0x200F0108, + PPC_BIT(3) | PPC_BIT(4)); } } /* 15.2 set HOMER BAR */ static void istep_15_2(uint8_t chip, struct homer_st *homer, void *common_occ_area) { - write_rscom(chip, 0x05012B00, (uint64_t)homer); - write_rscom(chip, 0x05012B04, (4 * MiB - 1) & ~((uint64_t)MiB - 1)); + write_scom(chip, 0x05012B00, (uint64_t)homer); + write_scom(chip, 0x05012B04, (4 * MiB - 1) & ~((uint64_t)MiB - 1)); - write_rscom(chip, 0x05012B02, (uint64_t)common_occ_area); - write_rscom(chip, 0x05012B06, (8 * MiB - 1) & ~((uint64_t)MiB - 1)); + write_scom(chip, 0x05012B02, (uint64_t)common_occ_area); + write_scom(chip, 0x05012B06, (8 * MiB - 1) & ~((uint64_t)MiB - 1)); } /* 15.3 establish EX chiplet */ @@ -2403,15 +2403,15 @@ static void istep_15_3(uint8_t chip, uint64_t cores) if (!IS_EC_FUNCTIONAL(i, cores)) continue; - if ((read_rscom_for_chiplet(chip, chiplet, 0xF0001) & group_mask) == group_mask) - rscom_and_or_for_chiplet(chip, chiplet, 0xF0001, - ~(group_mask | PPC_BITMASK(16,23)), - PPC_BITMASK(19,21)); + if ((read_scom_for_chiplet(chip, chiplet, 0xF0001) & group_mask) == group_mask) + scom_and_or_for_chiplet(chip, chiplet, 0xF0001, + ~(group_mask | PPC_BITMASK(16,23)), + PPC_BITMASK(19,21)); - if ((read_rscom_for_chiplet(chip, chiplet, 0xF0002) & group_mask) == group_mask) - rscom_and_or_for_chiplet(chip, chiplet, 0xF0002, - ~(group_mask | PPC_BITMASK(16,23)), - PPC_BIT(5) | PPC_BITMASK(19,21)); + if ((read_scom_for_chiplet(chip, chiplet, 0xF0002) & group_mask) == group_mask) + scom_and_or_for_chiplet(chip, chiplet, 0xF0002, + ~(group_mask | PPC_BITMASK(16,23)), + PPC_BIT(5) | PPC_BITMASK(19,21)); } for (int i = 0; i < MAX_QUADS_PER_CHIP; i++) { @@ -2420,14 +2420,14 @@ static void istep_15_3(uint8_t chip, uint64_t cores) if (!IS_EQ_FUNCTIONAL(i, cores)) continue; - if ((read_rscom_for_chiplet(chip, chiplet, 0xF0001) & group_mask) == group_mask) - rscom_and_or_for_chiplet(chip, chiplet, 0xF0001, - ~(group_mask | PPC_BITMASK(16,23)), - PPC_BITMASK(19,21)); + if ((read_scom_for_chiplet(chip, chiplet, 0xF0001) & group_mask) == group_mask) + scom_and_or_for_chiplet(chip, chiplet, 0xF0001, + ~(group_mask | PPC_BITMASK(16,23)), + PPC_BITMASK(19,21)); } /* Writing OCC CCSR */ - write_rscom(chip, 0x0006C090, cores); + write_scom(chip, 0x0006C090, cores); /* Writing OCC QCSR */ uint64_t qcsr = 0; @@ -2435,15 +2435,15 @@ static void istep_15_3(uint8_t chip, uint64_t cores) if (IS_EX_FUNCTIONAL(i, cores)) qcsr |= PPC_BIT(i); } - write_rscom(chip, 0x0006C094, qcsr); + write_scom(chip, 0x0006C094, qcsr); if (chip != 0) { /* * PU_OCB_OCI_QSSR_SCOM2 (OR) * Start no CMEs on slave CPUs (set bit implies stop state). */ - write_rscom(chip, 0x0006C09A, PPC_BITMASK(0, 11) | /* CMEs */ - PPC_BITMASK(14, 19) /* EQs */); + write_scom(chip, 0x0006C09A, PPC_BITMASK(0, 11) | /* CMEs */ + PPC_BITMASK(14, 19) /* EQs */); } } @@ -2464,8 +2464,8 @@ static void istep_15_4(uint8_t chip, uint64_t cores) [all] 0 [2] CPPM_CPMMR_RESERVED_2 = 1 */ - write_rscom_for_chiplet(chip, EC00_CHIPLET_ID + i, 0x200F0108, - PPC_BIT(2)); + write_scom_for_chiplet(chip, EC00_CHIPLET_ID + i, 0x200F0108, + PPC_BIT(2)); /* TP.TPCHIP.NET.PCBSLEC14.PPMC.PPM_COMMON_REGS.PPM_PFDLY // 0x200F011B @@ -2473,16 +2473,16 @@ static void istep_15_4(uint8_t chip, uint64_t cores) [0-3] PPM_PFDLY_POWDN_DLY = 0x9 // 250ns, converted and encoded [4-7] PPM_PFDLY_POWUP_DLY = 0x9 */ - write_rscom_for_chiplet(chip, EC00_CHIPLET_ID + i, 0x200F011B, - PPC_PLACE(0x9, 0, 4) | PPC_PLACE(0x9, 4, 4)); + write_scom_for_chiplet(chip, EC00_CHIPLET_ID + i, 0x200F011B, + PPC_PLACE(0x9, 0, 4) | PPC_PLACE(0x9, 4, 4)); /* TP.TPCHIP.NET.PCBSLEC14.PPMC.PPM_COMMON_REGS.PPM_PFOF // 0x200F011D [all] 0 [0-3] PPM_PFOFF_VDD_VOFF_SEL = 0x8 [4-7] PPM_PFOFF_VCS_VOFF_SEL = 0x8 */ - write_rscom_for_chiplet(chip, EC00_CHIPLET_ID + i, 0x200F011D, - PPC_PLACE(0x8, 0, 4) | PPC_PLACE(0x8, 4, 4)); + write_scom_for_chiplet(chip, EC00_CHIPLET_ID + i, 0x200F011D, + PPC_PLACE(0x8, 0, 4) | PPC_PLACE(0x8, 4, 4)); } if ((i % 4) == 0 && IS_EQ_FUNCTIONAL(i/4, cores)) { @@ -2492,16 +2492,16 @@ static void istep_15_4(uint8_t chip, uint64_t cores) [0-3] PPM_PFDLY_POWDN_DLY = 0x9 // 250ns, converted and encoded [4-7] PPM_PFDLY_POWUP_DLY = 0x9 */ - write_rscom_for_chiplet(chip, EP00_CHIPLET_ID + i/4, 0x100F011B, - PPC_PLACE(0x9, 0, 4) | PPC_PLACE(0x9, 4, 4)); + write_scom_for_chiplet(chip, EP00_CHIPLET_ID + i/4, 0x100F011B, + PPC_PLACE(0x9, 0, 4) | PPC_PLACE(0x9, 4, 4)); /* TP.TPCHIP.NET.PCBSLEP03.PPMQ.PPM_COMMON_REGS.PPM_PFOF // 0x100F011D [all] 0 [0-3] PPM_PFOFF_VDD_VOFF_SEL = 0x8 [4-7] PPM_PFOFF_VCS_VOFF_SEL = 0x8 */ - write_rscom_for_chiplet(chip, EP00_CHIPLET_ID + i/4, 0x100F011D, - PPC_PLACE(0x8, 0, 4) | PPC_PLACE(0x8, 4, 4)); + write_scom_for_chiplet(chip, EP00_CHIPLET_ID + i/4, 0x100F011D, + PPC_PLACE(0x8, 0, 4) | PPC_PLACE(0x8, 4, 4)); } } @@ -2516,7 +2516,7 @@ static void istep_15_4(uint8_t chip, uint64_t cores) /* TP.TPCHIP.TPC.ITR.FMU.KVREF_AND_VMEAS_MODE_STATUS_REG // 0x01020007 if ([16] == 0): die() */ - if (!(read_rscom(chip, 0x01020007) & PPC_BIT(16))) + if (!(read_scom(chip, 0x01020007) & PPC_BIT(16))) die("VDMs/IVRM are enabled but necessary VREF calibration failed\n"); /* First mask bit 7 in OIMR and then clear bit 7 in OISR @@ -2527,8 +2527,8 @@ static void istep_15_4(uint8_t chip, uint64_t cores) [all] 0 [7] OCB_OCI_OISR0_GPE2_ERROR = 1 */ - write_rscom(chip, 0x0006C006, PPC_BIT(7)); - write_rscom(chip, 0x0006C001, PPC_BIT(7)); + write_scom(chip, 0x0006C006, PPC_BIT(7)); + write_scom(chip, 0x0006C001, PPC_BIT(7)); /* * Setup the SGPE Timer Selects @@ -2539,14 +2539,14 @@ static void istep_15_4(uint8_t chip, uint64_t cores) [0-3] GPETSEL_FIT_SEL = 0x1 // FIT - fixed interval timer [4-7] GPETSEL_WATCHDOG_SEL = 0xA */ - write_rscom(chip, 0x00066000, PPC_PLACE(0x1, 0, 4) | PPC_PLACE(0xA, 4, 4)); + write_scom(chip, 0x00066000, PPC_PLACE(0x1, 0, 4) | PPC_PLACE(0xA, 4, 4)); /* Clear error injection bits *0x0006C18B // Undocumented, PU_OCB_OCI_OCCFLG2_CLEAR [all] 0 [30] 1 // OCCFLG2_SGPE_HCODE_STOP_REQ_ERR_INJ */ - write_rscom(chip, PU_OCB_OCI_OCCFLG2_CLEAR, PPC_BIT(30)); + write_scom(chip, PU_OCB_OCI_OCCFLG2_CLEAR, PPC_BIT(30)); } /* diff --git a/src/soc/ibm/power9/i2c.c b/src/soc/ibm/power9/i2c.c index 5c451c0459e..af214fb273b 100644 --- a/src/soc/ibm/power9/i2c.c +++ b/src/soc/ibm/power9/i2c.c @@ -102,7 +102,7 @@ void get_spd_i2c(uint8_t bus, struct spd_block *blk) static void write_i2c(enum i2c_type type, uint64_t addr, uint64_t data) { if (type != FSI_I2C) - write_rscom(type == HOST_I2C_CPU0 ? 0 : 1, addr, data); + write_scom(type == HOST_I2C_CPU0 ? 0 : 1, addr, data); else write_fsi_i2c(/*chip=*/1, addr, data >> 32, /*size=*/4); } @@ -110,7 +110,7 @@ static void write_i2c(enum i2c_type type, uint64_t addr, uint64_t data) static uint64_t read_i2c(enum i2c_type type, uint64_t addr) { if (type != FSI_I2C) - return read_rscom(type == HOST_I2C_CPU0 ? 0 : 1, addr); + return read_scom(type == HOST_I2C_CPU0 ? 0 : 1, addr); else return (uint64_t)read_fsi_i2c(/*chip=*/1, addr, /*size=*/4) << 32; } @@ -118,7 +118,7 @@ static uint64_t read_i2c(enum i2c_type type, uint64_t addr) static void write_i2c_byte(enum i2c_type type, uint64_t addr, uint8_t data) { if (type != FSI_I2C) - write_rscom(type == HOST_I2C_CPU0 ? 0 : 1, addr, (uint64_t)data << 56); + write_scom(type == HOST_I2C_CPU0 ? 0 : 1, addr, (uint64_t)data << 56); else write_fsi_i2c(/*chip=*/1, addr, (uint32_t)data << 24, /*size=*/1); } @@ -126,7 +126,7 @@ static void write_i2c_byte(enum i2c_type type, uint64_t addr, uint8_t data) static uint8_t read_i2c_byte(enum i2c_type type, uint64_t addr) { if (type != FSI_I2C) - return read_rscom(type == HOST_I2C_CPU0 ? 0 : 1, addr) >> 56; + return read_scom(type == HOST_I2C_CPU0 ? 0 : 1, addr) >> 56; else return read_fsi_i2c(/*chip=*/1, addr, /*size=*/1) >> 24; } diff --git a/src/soc/ibm/power9/istep_10_1.c b/src/soc/ibm/power9/istep_10_1.c index d39b6fa05a4..5ecedefe1e1 100644 --- a/src/soc/ibm/power9/istep_10_1.c +++ b/src/soc/ibm/power9/istep_10_1.c @@ -770,7 +770,7 @@ void istep_10_1(uint8_t chips) switch_secondary_scom_to_xscom(); /* Sanity check that XSCOM works for the second CPU */ - if (read_rscom(1, 0xF000F) == 0xFFFFFFFFFFFFFFFF) + if (read_scom(1, 0xF000F) == 0xFFFFFFFFFFFFFFFF) die("XSCOM doesn't work for the second CPU\n"); fsi_reset_pib2opb(/*chip=*/1); diff --git a/src/soc/ibm/power9/istep_10_10.c b/src/soc/ibm/power9/istep_10_10.c index db696048437..d671bff94bb 100644 --- a/src/soc/ibm/power9/istep_10_10.c +++ b/src/soc/ibm/power9/istep_10_10.c @@ -398,7 +398,7 @@ static void phase1(uint8_t chip, const struct lane_config_row **pec_cfgs, PEC0_IOP_CONFIG_START_BIT, PEC0_IOP_BIT_COUNT * 2, PEC1_IOP_CONFIG_START_BIT, PEC1_IOP_BIT_COUNT * 2, PEC2_IOP_CONFIG_START_BIT, PEC2_IOP_BIT_COUNT * 2); - write_rscom_for_chiplet(chip, chiplet, PEC_CPLT_CONF1_OR, val); + write_scom_for_chiplet(chip, chiplet, PEC_CPLT_CONF1_OR, val); /* Phase1 init step 2b */ @@ -406,7 +406,7 @@ static void phase1(uint8_t chip, const struct lane_config_row **pec_cfgs, PEC0_IOP_SWAP_START_BIT, PEC0_IOP_BIT_COUNT, PEC1_IOP_SWAP_START_BIT, PEC1_IOP_BIT_COUNT, PEC2_IOP_SWAP_START_BIT, PEC2_IOP_BIT_COUNT); - write_rscom_for_chiplet(chip, chiplet, PEC_CPLT_CONF1_OR, val); + write_scom_for_chiplet(chip, chiplet, PEC_CPLT_CONF1_OR, val); /* Phase1 init step 3a */ @@ -422,23 +422,23 @@ static void phase1(uint8_t chip, const struct lane_config_row **pec_cfgs, val |= PPC_BIT(PEC_IOP_IOVALID_ENABLE_STACK1_BIT); } - write_rscom_for_chiplet(chip, chiplet, PEC_CPLT_CONF1_OR, val); + write_scom_for_chiplet(chip, chiplet, PEC_CPLT_CONF1_OR, val); /* Phase1 init step 3b (enable clock) */ /* ATTR_PROC_PCIE_REFCLOCK_ENABLE, all PECs are enabled. */ - write_rscom_for_chiplet(chip, chiplet, PEC_CPLT_CTRL0_OR, - PPC_BIT(PEC_IOP_REFCLOCK_ENABLE_START_BIT)); + write_scom_for_chiplet(chip, chiplet, PEC_CPLT_CTRL0_OR, + PPC_BIT(PEC_IOP_REFCLOCK_ENABLE_START_BIT)); /* Phase1 init step 4 (PMA reset) */ - write_rscom_for_chiplet(chip, chiplet, PEC_CPLT_CONF1_CLEAR, - PPC_BIT(PEC_IOP_PMA_RESET_START_BIT)); + write_scom_for_chiplet(chip, chiplet, PEC_CPLT_CONF1_CLEAR, + PPC_BIT(PEC_IOP_PMA_RESET_START_BIT)); udelay(1); /* at least 400ns */ - write_rscom_for_chiplet(chip, chiplet, PEC_CPLT_CONF1_OR, - PPC_BIT(PEC_IOP_PMA_RESET_START_BIT)); + write_scom_for_chiplet(chip, chiplet, PEC_CPLT_CONF1_OR, + PPC_BIT(PEC_IOP_PMA_RESET_START_BIT)); udelay(1); /* at least 400ns */ - write_rscom_for_chiplet(chip, chiplet, PEC_CPLT_CONF1_CLEAR, - PPC_BIT(PEC_IOP_PMA_RESET_START_BIT)); + write_scom_for_chiplet(chip, chiplet, PEC_CPLT_CONF1_CLEAR, + PPC_BIT(PEC_IOP_PMA_RESET_START_BIT)); /* * Poll for PRTREADY status on PLLA and PLLB: @@ -447,22 +447,22 @@ static void phase1(uint8_t chip, const struct lane_config_row **pec_cfgs, * PEC_IOP_HSS_PORT_READY_START_BIT = 58 */ time = wait_us(40, - (read_rscom_for_chiplet(chip, chiplet, 0x800005010D010C3F) & PPC_BIT(58)) || - (read_rscom_for_chiplet(chip, chiplet, 0x800005410D010C3F) & PPC_BIT(58))); + (read_scom_for_chiplet(chip, chiplet, 0x800005010D010C3F) & PPC_BIT(58)) || + (read_scom_for_chiplet(chip, chiplet, 0x800005410D010C3F) & PPC_BIT(58))); if (!time) die("IOP HSS Port Ready status is not set!"); /* Phase1 init step 5 (Set IOP FIR action0) */ - write_rscom_for_chiplet(chip, chiplet, PEC_FIR_ACTION0_REG, - PCI_IOP_FIR_ACTION0_REG); + write_scom_for_chiplet(chip, chiplet, PEC_FIR_ACTION0_REG, + PCI_IOP_FIR_ACTION0_REG); /* Phase1 init step 6 (Set IOP FIR action1) */ - write_rscom_for_chiplet(chip, chiplet, PEC_FIR_ACTION1_REG, - PCI_IOP_FIR_ACTION1_REG); + write_scom_for_chiplet(chip, chiplet, PEC_FIR_ACTION1_REG, + PCI_IOP_FIR_ACTION1_REG); /* Phase1 init step 7 (Set IOP FIR mask) */ - write_rscom_for_chiplet(chip, chiplet, PEC_FIR_MASK_REG, - PCI_IOP_FIR_MASK_REG); + write_scom_for_chiplet(chip, chiplet, PEC_FIR_MASK_REG, + PCI_IOP_FIR_MASK_REG); /* Phase1 init step 8-11 (Config 0 - 3) */ @@ -470,32 +470,32 @@ static void phase1(uint8_t chip, const struct lane_config_row **pec_cfgs, uint8_t lane; /* RX Config Mode */ - write_rscom_for_chiplet(chip, chiplet, PEC_PCS_RX_CONFIG_MODE_REG, - pcs_config_mode[i]); + write_scom_for_chiplet(chip, chiplet, PEC_PCS_RX_CONFIG_MODE_REG, + pcs_config_mode[i]); /* RX CDR GAIN */ - rscom_and_or_for_chiplet(chip, chiplet, PEC_PCS_RX_CDR_GAIN_REG, - ~PPC_BITMASK(56, 63), - pcs_cdr_gain[i]); + scom_and_or_for_chiplet(chip, chiplet, PEC_PCS_RX_CDR_GAIN_REG, + ~PPC_BITMASK(56, 63), + pcs_cdr_gain[i]); for (lane = 0; lane < NUM_PCIE_LANES; ++lane) { /* RX INITGAIN */ - rscom_and_or_for_chiplet(chip, chiplet, - RX_VGA_CTRL3_REGISTER[lane], - ~PPC_BITMASK(48, 52), - PPC_PLACE(pcs_init_gain, 48, 5)); + scom_and_or_for_chiplet(chip, chiplet, + RX_VGA_CTRL3_REGISTER[lane], + ~PPC_BITMASK(48, 52), + PPC_PLACE(pcs_init_gain, 48, 5)); /* RX PKINIT */ - rscom_and_or_for_chiplet(chip, chiplet, - RX_LOFF_CNTL_REGISTER[lane], - ~PPC_BITMASK(58, 63), - pcs_pk_init); + scom_and_or_for_chiplet(chip, chiplet, + RX_LOFF_CNTL_REGISTER[lane], + ~PPC_BITMASK(58, 63), + pcs_pk_init); } /* RX SIGDET LVL */ - rscom_and_or_for_chiplet(chip, chiplet, PEC_PCS_RX_SIGDET_CONTROL_REG, - ~PPC_BITMASK(59, 63), - pcs_sigdet_lvl); + scom_and_or_for_chiplet(chip, chiplet, PEC_PCS_RX_SIGDET_CONTROL_REG, + ~PPC_BITMASK(59, 63), + pcs_sigdet_lvl); } /* @@ -507,43 +507,43 @@ static void phase1(uint8_t chip, const struct lane_config_row **pec_cfgs, * - ATTR_PROC_PCIE_PCS_RX_ROT_EXTEL (59) * - ATTR_PROC_PCIE_PCS_RX_ROT_RST_FW (62) */ - rscom_and_for_chiplet(chip, chiplet, PEC_PCS_RX_ROT_CNTL_REG, - ~(PPC_BIT(55) | PPC_BIT(63) | PPC_BIT(59) | PPC_BIT(62))); + scom_and_for_chiplet(chip, chiplet, PEC_PCS_RX_ROT_CNTL_REG, + ~(PPC_BIT(55) | PPC_BIT(63) | PPC_BIT(59) | PPC_BIT(62))); /* Phase1 init step 13 (RX Config Mode Enable External Config Control) */ - write_rscom_for_chiplet(chip, chiplet, PEC_PCS_RX_CONFIG_MODE_REG, 0x8600); + write_scom_for_chiplet(chip, chiplet, PEC_PCS_RX_CONFIG_MODE_REG, 0x8600); /* Phase1 init step 14 (PCLCK Control Register - PLLA) */ /* ATTR_PROC_PCIE_PCS_PCLCK_CNTL_PLLA = 0xF8 */ - rscom_and_or_for_chiplet(chip, chiplet, PEC_PCS_PCLCK_CNTL_PLLA_REG, - ~PPC_BITMASK(56, 63), - 0xF8); + scom_and_or_for_chiplet(chip, chiplet, PEC_PCS_PCLCK_CNTL_PLLA_REG, + ~PPC_BITMASK(56, 63), + 0xF8); /* Phase1 init step 15 (PCLCK Control Register - PLLB) */ /* ATTR_PROC_PCIE_PCS_PCLCK_CNTL_PLLB = 0xF8 */ - rscom_and_or_for_chiplet(chip, chiplet, PEC_PCS_PCLCK_CNTL_PLLB_REG, - ~PPC_BITMASK(56, 63), - 0xF8); + scom_and_or_for_chiplet(chip, chiplet, PEC_PCS_PCLCK_CNTL_PLLB_REG, + ~PPC_BITMASK(56, 63), + 0xF8); /* Phase1 init step 16 (TX DCLCK Rotator Override) */ /* ATTR_PROC_PCIE_PCS_TX_DCLCK_ROT = 0x0022 */ - write_rscom_for_chiplet(chip, chiplet, PEC_PCS_TX_DCLCK_ROTATOR_REG, 0x0022); + write_scom_for_chiplet(chip, chiplet, PEC_PCS_TX_DCLCK_ROTATOR_REG, 0x0022); /* Phase1 init step 17 (TX PCIe Receiver Detect Control Register 1) */ /* ATTR_PROC_PCIE_PCS_TX_PCIE_RECV_DETECT_CNTL_REG1 = 0xAA7A */ - write_rscom_for_chiplet(chip, chiplet, PEC_PCS_TX_PCIE_REC_DETECT_CNTL1_REG, - 0xAA7A); + write_scom_for_chiplet(chip, chiplet, PEC_PCS_TX_PCIE_REC_DETECT_CNTL1_REG, + 0xAA7A); /* Phase1 init step 18 (TX PCIe Receiver Detect Control Register 2) */ /* ATTR_PROC_PCIE_PCS_TX_PCIE_RECV_DETECT_CNTL_REG2 = 0x2000 */ - write_rscom_for_chiplet(chip, chiplet, PEC_PCS_TX_PCIE_REC_DETECT_CNTL2_REG, - 0x2000); + write_scom_for_chiplet(chip, chiplet, PEC_PCS_TX_PCIE_REC_DETECT_CNTL2_REG, + 0x2000); /* Phase1 init step 19 (TX Power Sequence Enable) */ /* ATTR_PROC_PCIE_PCS_TX_POWER_SEQ_ENABLE = 0xFF, but field is 7 bits */ - rscom_and_or_for_chiplet(chip, chiplet, PEC_PCS_TX_POWER_SEQ_ENABLE_REG, - ~PPC_BITMASK(56, 62), - PPC_PLACE(0x7F, 56, 7)); + scom_and_or_for_chiplet(chip, chiplet, PEC_PCS_TX_POWER_SEQ_ENABLE_REG, + ~PPC_BITMASK(56, 62), + PPC_PLACE(0x7F, 56, 7)); /* Phase1 init step 20 (RX VGA Control Register 1) */ @@ -552,21 +552,21 @@ static void phase1(uint8_t chip, const struct lane_config_row **pec_cfgs, /* ATTR_CHIP_EC_FEATURE_HW414759 = 0, so not setting PEC_SCOM0X0B_EDMOD */ - write_rscom_for_chiplet(chip, chiplet, PEC_PCS_RX_VGA_CONTROL1_REG, val); + write_scom_for_chiplet(chip, chiplet, PEC_PCS_RX_VGA_CONTROL1_REG, val); /* Phase1 init step 21 (RX VGA Control Register 2) */ /* ATTR_PROC_PCIE_PCS_RX_VGA_CNTL_REG2 = 0 */ - write_rscom_for_chiplet(chip, chiplet, PEC_PCS_RX_VGA_CONTROL2_REG, 0); + write_scom_for_chiplet(chip, chiplet, PEC_PCS_RX_VGA_CONTROL2_REG, 0); /* Phase1 init step 22 (RX DFE Func Control Register 1) */ /* ATTR_PROC_PCIE_PCS_RX_DFE_FDDC = 1 */ - rscom_or_for_chiplet(chip, chiplet, PEC_IOP_RX_DFE_FUNC_REGISTER1, PPC_BIT(50)); + scom_or_for_chiplet(chip, chiplet, PEC_IOP_RX_DFE_FUNC_REGISTER1, PPC_BIT(50)); /* Phase1 init step 23 (PCS System Control) */ /* ATTR_PROC_PCIE_PCS_SYSTEM_CNTL computed above */ - rscom_and_or_for_chiplet(chip, chiplet, PEC_PCS_SYS_CONTROL_REG, - ~PPC_BITMASK(55, 63), - pec_cfgs[pec]->phb_to_pcie_mac); + scom_and_or_for_chiplet(chip, chiplet, PEC_PCS_SYS_CONTROL_REG, + ~PPC_BITMASK(55, 63), + pec_cfgs[pec]->phb_to_pcie_mac); /* * All values in ATTR_PROC_PCIE_PCS_M_CNTL are 0. @@ -575,24 +575,24 @@ static void phase1(uint8_t chip, const struct lane_config_row **pec_cfgs, */ /* Phase1 init step 24 (PCS M1 Control) */ - rscom_and_for_chiplet(chip, chiplet, PEC_PCS_M1_CONTROL_REG, - ~PPC_BITMASK(55, 63)); + scom_and_for_chiplet(chip, chiplet, PEC_PCS_M1_CONTROL_REG, + ~PPC_BITMASK(55, 63)); /* Phase1 init step 25 (PCS M2 Control) */ - rscom_and_for_chiplet(chip, chiplet, PEC_PCS_M2_CONTROL_REG, - ~PPC_BITMASK(55, 63)); + scom_and_for_chiplet(chip, chiplet, PEC_PCS_M2_CONTROL_REG, + ~PPC_BITMASK(55, 63)); /* Phase1 init step 26 (PCS M3 Control) */ - rscom_and_for_chiplet(chip, chiplet, PEC_PCS_M3_CONTROL_REG, - ~PPC_BITMASK(55, 63)); + scom_and_for_chiplet(chip, chiplet, PEC_PCS_M3_CONTROL_REG, + ~PPC_BITMASK(55, 63)); /* Phase1 init step 27 (PCS M4 Control) */ - rscom_and_for_chiplet(chip, chiplet, PEC_PCS_M4_CONTROL_REG, - ~PPC_BITMASK(55, 63)); + scom_and_for_chiplet(chip, chiplet, PEC_PCS_M4_CONTROL_REG, + ~PPC_BITMASK(55, 63)); /* Delay a minimum of 200ns to allow prior SCOM programming to take effect */ udelay(1); /* Phase1 init step 28 */ - write_rscom_for_chiplet(chip, chiplet, PEC_CPLT_CONF1_CLEAR, - PPC_BIT(PEC_IOP_PIPE_RESET_START_BIT)); + write_scom_for_chiplet(chip, chiplet, PEC_CPLT_CONF1_CLEAR, + PPC_BIT(PEC_IOP_PIPE_RESET_START_BIT)); /* * Delay a minimum of 300ns for reset to complete. diff --git a/src/soc/ibm/power9/istep_10_12.c b/src/soc/ibm/power9/istep_10_12.c index 84f0f49e2ea..dfdae731e04 100644 --- a/src/soc/ibm/power9/istep_10_12.c +++ b/src/soc/ibm/power9/istep_10_12.c @@ -22,13 +22,13 @@ static void enable_ridi(uint8_t chip) chiplet_id_t chiplet = PCI0_CHIPLET_ID + pec; /* Getting NET_CTRL0 register value and checking its CHIPLET_ENABLE bit */ - if (read_rscom_for_chiplet(chip, chiplet, PERV_NET_CTRL0) & PPC_BIT(0)) { + if (read_scom_for_chiplet(chip, chiplet, PERV_NET_CTRL0) & PPC_BIT(0)) { /* Enable Receivers, Drivers DI1 & DI2 */ uint64_t val = 0; val |= PPC_BIT(19); // NET_CTRL0.RI_N = 1 val |= PPC_BIT(20); // NET_CTRL0.DI1_N = 1 val |= PPC_BIT(21); // NET_CTRL0.DI2_N = 1 - write_rscom_for_chiplet(chip, chiplet, PERV_NET_CTRL0_WOR, val); + write_scom_for_chiplet(chip, chiplet, PERV_NET_CTRL0_WOR, val); } } } diff --git a/src/soc/ibm/power9/istep_10_13.c b/src/soc/ibm/power9/istep_10_13.c index 7a923888e5d..d8909a3bcd1 100644 --- a/src/soc/ibm/power9/istep_10_13.c +++ b/src/soc/ibm/power9/istep_10_13.c @@ -22,7 +22,7 @@ static void host_rng_bist(uint8_t chip) /* PU_NX_RNG_CFG [44] COND_STARTUP_TEST_FAIL */ - if (read_rscom_for_chiplet(chip, N0_CHIPLET_ID, 0x020110E0) & PPC_BIT(44)) + if (read_scom_for_chiplet(chip, N0_CHIPLET_ID, 0x020110E0) & PPC_BIT(44)) die("RNG Conditioner startup test failed\n"); /* PU_NX_RNG_ST0 @@ -34,20 +34,20 @@ static void host_rng_bist(uint8_t chip) [36-47] ADAPTEST_CRN_RNG0_MATCH_TH = 0x32 (50; Assuming H = 5) [48-59] ADAPTEST_CRN_RNG1_MATCH_TH = 0x32 (50; Assuming H = 5) */ - rscom_and_or_for_chiplet(chip, N0_CHIPLET_ID, 0x020110E1, - ~(PPC_BITMASK(0, 1) | PPC_BITMASK(7, 63)), - PPC_PLACE(1, 0, 2) | PPC_PLACE(2, 7, 2) | PPC_PLACE(1, 9, 3) - | PPC_PLACE(0x32, 12, 12) | PPC_PLACE(0x32, 24, 12) - | PPC_PLACE(0x32, 36, 12) | PPC_PLACE(0x32, 48, 12)); + scom_and_or_for_chiplet(chip, N0_CHIPLET_ID, 0x020110E1, + ~(PPC_BITMASK(0, 1) | PPC_BITMASK(7, 63)), + PPC_PLACE(1, 0, 2) | PPC_PLACE(2, 7, 2) | PPC_PLACE(1, 9, 3) + | PPC_PLACE(0x32, 12, 12) | PPC_PLACE(0x32, 24, 12) + | PPC_PLACE(0x32, 36, 12) | PPC_PLACE(0x32, 48, 12)); /* PU_NX_RNG_ST1 [0-6] ADAPTEST_SOFT_FAIL_TH = 2 [7-22] ADAPTEST_1BIT_MATCH_TH_MIN = 100 [23-38] ADAPTEST_1BIT_MATCH_TH_MAX = 415 */ - rscom_and_or_for_chiplet(chip, N0_CHIPLET_ID, 0x020110E2, ~PPC_BITMASK(0, 38), - PPC_PLACE(2, 0, 7) | PPC_PLACE(100, 7, 16) - | PPC_PLACE(415, 23, 16)); + scom_and_or_for_chiplet(chip, N0_CHIPLET_ID, 0x020110E2, ~PPC_BITMASK(0, 38), + PPC_PLACE(2, 0, 7) | PPC_PLACE(100, 7, 16) + | PPC_PLACE(415, 23, 16)); /* PU_NX_RNG_ST3 [0] SAMPTEST_RRN_ENABLE = 1 @@ -55,16 +55,16 @@ static void host_rng_bist(uint8_t chip) [4-19] SAMPTEST_MATCH_TH_MIN = 0x6D60 (28,000) [20-35] SAMPTEST_MATCH_TH_MAX = 0x988A (39,050) */ - rscom_and_or_for_chiplet(chip, N0_CHIPLET_ID, 0x020110E8, ~PPC_BITMASK(0, 35), - PPC_BIT(0) | PPC_PLACE(7, 1, 3) | PPC_PLACE(0x6D60, 4, 16) - | PPC_PLACE(0x988A, 20, 16)); + scom_and_or_for_chiplet(chip, N0_CHIPLET_ID, 0x020110E8, ~PPC_BITMASK(0, 35), + PPC_BIT(0) | PPC_PLACE(7, 1, 3) | PPC_PLACE(0x6D60, 4, 16) + | PPC_PLACE(0x988A, 20, 16)); /* PU_NX_RNG_RDELAY [6] LFSR_RESEED_EN = 1 [7-11] READ_RTY_RATIO = 0x1D (1/16) */ - rscom_and_or_for_chiplet(chip, N0_CHIPLET_ID, 0x020110E5, ~PPC_BITMASK(6, 11), - PPC_BIT(6) | PPC_PLACE(0x1D, 7, 5)); + scom_and_or_for_chiplet(chip, N0_CHIPLET_ID, 0x020110E5, ~PPC_BITMASK(6, 11), + PPC_BIT(6) | PPC_PLACE(0x1D, 7, 5)); /* PU_NX_RNG_CFG [30-37] ST2_RESET_PERIOD = 0x1B @@ -76,12 +76,12 @@ static void host_rng_bist(uint8_t chip) [46-61] PACE_RATE = 0x07D0 (2000) [63] ENABLE = 1 */ - rscom_and_or_for_chiplet(chip, N0_CHIPLET_ID, 0x020110E0, - ~(PPC_BITMASK(30, 37) | PPC_BITMASK(39, 43) - | PPC_BITMASK(46, 61) | PPC_BIT(63)), - PPC_PLACE(0x1B, 30, 8) | PPC_BIT(40) | PPC_BIT(41) - | PPC_BIT(42) | PPC_BIT(43) | PPC_PLACE(0x07D0, 46, 16) - | PPC_BIT(63)); + scom_and_or_for_chiplet(chip, N0_CHIPLET_ID, 0x020110E0, + ~(PPC_BITMASK(30, 37) | PPC_BITMASK(39, 43) + | PPC_BITMASK(46, 61) | PPC_BIT(63)), + PPC_PLACE(0x1B, 30, 8) | PPC_BIT(40) | PPC_BIT(41) + | PPC_BIT(42) | PPC_BIT(43) | PPC_PLACE(0x07D0, 46, 16) + | PPC_BIT(63)); } void istep_10_13(uint8_t chips) diff --git a/src/soc/ibm/power9/istep_10_6.c b/src/soc/ibm/power9/istep_10_6.c index 9c43f1dddb6..4458e8faf40 100644 --- a/src/soc/ibm/power9/istep_10_6.c +++ b/src/soc/ibm/power9/istep_10_6.c @@ -13,7 +13,7 @@ static void mcs_scom(uint8_t chip, chiplet_id_t chiplet) uint64_t data; { - data = read_rscom_for_chiplet(chip, chiplet, 0x5010810); + data = read_scom_for_chiplet(chip, chiplet, 0x5010810); PPC_INSERT(data, 25, 32, 7); PPC_INSERT(data, 0x7, 46, 4); @@ -24,11 +24,11 @@ static void mcs_scom(uint8_t chip, chiplet_id_t chiplet) /* MC01_PBI01_SCOMFIR_MCPERF1_ENABLE_PREFETCH_PROMOTE_ON */ data |= PPC_BIT(63); - write_rscom_for_chiplet(chip, chiplet, 0x5010810, data); + write_scom_for_chiplet(chip, chiplet, 0x5010810, data); } { - data = read_rscom_for_chiplet(chip, chiplet, 0x5010811); + data = read_scom_for_chiplet(chip, chiplet, 0x5010811); /* MC01_PBI01_SCOMFIR_MCMODE0_ENABLE_CENTAUR_SYNC_ON */ data |= PPC_BIT(20); @@ -45,24 +45,24 @@ static void mcs_scom(uint8_t chip, chiplet_id_t chiplet) /* MC01_PBI01_SCOMFIR_MCMODE0_FORCE_COMMANDLIST_VALID_ON */ data |= PPC_BIT(17); - write_rscom_for_chiplet(chip, chiplet, 0x5010811, data); + write_scom_for_chiplet(chip, chiplet, 0x5010811, data); } { - data = read_rscom_for_chiplet(chip, chiplet, 0x5010812); + data = read_scom_for_chiplet(chip, chiplet, 0x5010812); /* MC01_PBI01_SCOMFIR_MCMODE1_DISABLE_FP_M_BIT_ON */ data |= PPC_BIT(10); PPC_INSERT(data, 0x40, 33, 19); - write_rscom_for_chiplet(chip, chiplet, 0x5010812, data); + write_scom_for_chiplet(chip, chiplet, 0x5010812, data); } { - data = read_rscom_for_chiplet(chip, chiplet, 0x5010813); + data = read_scom_for_chiplet(chip, chiplet, 0x5010813); PPC_INSERT(data, 0x8, 24, 16); - write_rscom_for_chiplet(chip, chiplet, 0x5010813, data); + write_scom_for_chiplet(chip, chiplet, 0x5010813, data); } { - data = read_rscom_for_chiplet(chip, chiplet, 0x501081B); + data = read_scom_for_chiplet(chip, chiplet, 0x501081B); /* MC01_PBI01_SCOMFIR_MCTO_SELECT_PB_HANG_PULSE_ON */ data |= PPC_BIT(0); @@ -76,7 +76,7 @@ static void mcs_scom(uint8_t chip, chiplet_id_t chiplet) PPC_INSERT(data, 0x1, 24, 8); PPC_INSERT(data, 0x7, 5, 3); - write_rscom_for_chiplet(chip, chiplet, 0x501081B, data); + write_scom_for_chiplet(chip, chiplet, 0x501081B, data); } } @@ -85,20 +85,20 @@ static void fbc_ioo_tl_scom(uint8_t chip) uint64_t data; /* PB_IOO_SCOM_A0_MODE_BLOCKED */ - rscom_or(chip, 0x501380A, PPC_BIT(20) | PPC_BIT(25) | PPC_BIT(52) | PPC_BIT(57)); + scom_or(chip, 0x501380A, PPC_BIT(20) | PPC_BIT(25) | PPC_BIT(52) | PPC_BIT(57)); /* PB_IOO_SCOM_A1_MODE_BLOCKED */ - rscom_or(chip, 0x501380B, PPC_BIT(20) | PPC_BIT(25) | PPC_BIT(52) | PPC_BIT(57)); + scom_or(chip, 0x501380B, PPC_BIT(20) | PPC_BIT(25) | PPC_BIT(52) | PPC_BIT(57)); /* PB_IOO_SCOM_A2_MODE_BLOCKED */ - rscom_or(chip, 0x501380C, PPC_BIT(20) | PPC_BIT(25) | PPC_BIT(52) | PPC_BIT(57)); + scom_or(chip, 0x501380C, PPC_BIT(20) | PPC_BIT(25) | PPC_BIT(52) | PPC_BIT(57)); /* PB_IOO_SCOM_A3_MODE_BLOCKED */ - rscom_or(chip, 0x501380D, PPC_BIT(20) | PPC_BIT(25) | PPC_BIT(52) | PPC_BIT(57)); + scom_or(chip, 0x501380D, PPC_BIT(20) | PPC_BIT(25) | PPC_BIT(52) | PPC_BIT(57)); /* 0x5013810, 0x5013811, 0x5013812 and 0x5013813 are not modified */ - data = read_rscom(chip, 0x5013823); + data = read_scom(chip, 0x5013823); data &= ~PPC_BIT(0); // PB_IOO_SCOM_PB_CFG_IOO01_IS_LOGICAL_PAIR_OFF data &= ~PPC_BIT(1); // PB_IOO_SCOM_PB_CFG_IOO23_IS_LOGICAL_PAIR_OFF @@ -109,7 +109,7 @@ static void fbc_ioo_tl_scom(uint8_t chip) data &= ~PPC_BIT(10); // PB_IOO_SCOM_LINKS45_TOD_ENABLE_OFF data &= ~PPC_BIT(11); // PB_IOO_SCOM_LINKS67_TOD_ENABLE_OFF - write_rscom(chip, 0x5013823, data); + write_scom(chip, 0x5013823, data); /* 0x5013824 is not modified */ } @@ -119,7 +119,7 @@ static void nx_scom(uint8_t chip, uint8_t dd) uint64_t data; { - data = read_rscom(chip, 0x2011041); + data = read_scom(chip, 0x2011041); data |= PPC_BIT(63); // NX_DMA_CH0_EFT_ENABLE_ON data |= PPC_BIT(62); // NX_DMA_CH1_EFT_ENABLE_ON @@ -127,10 +127,10 @@ static void nx_scom(uint8_t chip, uint8_t dd) data |= PPC_BIT(57); // NX_DMA_CH3_SYM_ENABLE_ON data |= PPC_BIT(61); // NX_DMA_CH4_GZIP_ENABLE_ON - write_rscom(chip, 0x2011041, data); + write_scom(chip, 0x2011041, data); } { - data = read_rscom(chip, 0x2011042); + data = read_scom(chip, 0x2011042); PPC_INSERT(data, 0xF, 8, 4); // NX_DMA_GZIPCOMP_MAX_INRD_MAX_15_INRD PPC_INSERT(data, 0xF, 12, 4); // NX_DMA_GZIPDECOMP_MAX_INRD_MAX_15_INRD @@ -144,10 +144,10 @@ static void nx_scom(uint8_t chip, uint8_t dd) data |= PPC_BIT(17); // NX_DMA_GZIP_DECOMP_PREFETCH_ENABLE_ON data &= ~PPC_BIT(56); // NX_DMA_EFT_SPBC_WRITE_ENABLE_OFF - write_rscom(chip, 0x2011042, data); + write_scom(chip, 0x2011042, data); } { - data = read_rscom(chip, 0x201105C); + data = read_scom(chip, 0x201105C); PPC_INSERT(data, 0x9, 1, 4); // NX_DMA_CH0_WATCHDOG_REF_DIV_DIVIDE_BY_512 PPC_INSERT(data, 0x9, 6, 4); // NX_DMA_CH1_WATCHDOG_REF_DIV_DIVIDE_BY_512 @@ -163,10 +163,10 @@ static void nx_scom(uint8_t chip, uint8_t dd) data |= PPC_BIT(20); // NX_DMA_CH4_WATCHDOG_TIMER_ENBL_ON data |= PPC_BIT(25); // NX_DMA_DMA_HANG_TIMER_ENBL_ON - write_rscom(chip, 0x201105C, data); + write_scom(chip, 0x201105C, data); } { - data = read_rscom(chip, 0x2011087); + data = read_scom(chip, 0x2011087); data &= ~0x93EFDFFF3FF00000; data |= 0x48102000C0000000; @@ -176,10 +176,10 @@ static void nx_scom(uint8_t chip, uint8_t dd) else data |= 0x2400000000000000; - write_rscom(chip, 0x2011087, data); + write_scom(chip, 0x2011087, data); } { - data = read_rscom(chip, 0x2011095); + data = read_scom(chip, 0x2011095); data |= PPC_BIT(24); // NX_PBI_CQ_WRAP_NXCQ_SCOM_SKIP_G_ON data |= PPC_BIT(1); // NX_PBI_CQ_WRAP_NXCQ_SCOM_DMA_WR_DISABLE_GROUP_ON @@ -199,18 +199,18 @@ static void nx_scom(uint8_t chip, uint8_t dd) PPC_INSERT(data, 0xFC, 40, 8); PPC_INSERT(data, 0xFC, 48, 8); - write_rscom(chip, 0x2011095, data); + write_scom(chip, 0x2011095, data); } { - data = read_rscom(chip, 0x20110D6); + data = read_scom(chip, 0x20110D6); PPC_INSERT(data, 0x2, 9, 3); data |= PPC_BIT(6); // NX_PBI_DISABLE_PROMOTE_ON - write_rscom(chip, 0x20110D6, data); + write_scom(chip, 0x20110D6, data); } { - data = read_rscom(chip, 0x2011107); + data = read_scom(chip, 0x2011107); data &= ~0xF0839FFFC2FFC000; data |= 0x0A7400003D000000; @@ -220,44 +220,44 @@ static void nx_scom(uint8_t chip, uint8_t dd) else data |= 0x0508600000000000; - write_rscom(chip, 0x2011107, data); + write_scom(chip, 0x2011107, data); } - rscom_and_or(chip, 0x2011083, ~0xEEF8FF9CFD000000, 0x1107006302F00000); - rscom_and(chip, 0x2011086, ~0xFFFFFFFFFFF00000); - rscom_and_or(chip, 0x20110A8, ~0x0FFFF00000000000, 0x0888800000000000); - rscom_and_or(chip, 0x20110C3, ~0x0000001F00000000, 0x0000000080000000); - rscom_and_or(chip, 0x20110C4, ~PPC_BITMASK(27, 35), PPC_PLACE(0x8, 27, 9)); - rscom_and_or(chip, 0x20110C5, ~PPC_BITMASK(27, 35), PPC_PLACE(0x8, 27, 9)); - rscom_or(chip, 0x20110D5, PPC_BIT(1)); // NX_PBI_PBI_UMAC_CRB_READS_ENBL_ON - rscom_and_or(chip, 0x2011103, ~0xCF7DEF81BF003000, 0x3082107E40FFC000); - rscom_and(chip, 0x2011106, ~0xFFFFFFFFFFFFC000); + scom_and_or(chip, 0x2011083, ~0xEEF8FF9CFD000000, 0x1107006302F00000); + scom_and(chip, 0x2011086, ~0xFFFFFFFFFFF00000); + scom_and_or(chip, 0x20110A8, ~0x0FFFF00000000000, 0x0888800000000000); + scom_and_or(chip, 0x20110C3, ~0x0000001F00000000, 0x0000000080000000); + scom_and_or(chip, 0x20110C4, ~PPC_BITMASK(27, 35), PPC_PLACE(0x8, 27, 9)); + scom_and_or(chip, 0x20110C5, ~PPC_BITMASK(27, 35), PPC_PLACE(0x8, 27, 9)); + scom_or(chip, 0x20110D5, PPC_BIT(1)); // NX_PBI_PBI_UMAC_CRB_READS_ENBL_ON + scom_and_or(chip, 0x2011103, ~0xCF7DEF81BF003000, 0x3082107E40FFC000); + scom_and(chip, 0x2011106, ~0xFFFFFFFFFFFFC000); } static void cxa_scom(uint8_t chip, uint8_t dd) { uint64_t data; - data = read_rscom(chip, 0x2010803); + data = read_scom(chip, 0x2010803); data &= ~PPC_BITMASK(0, 52); data |= (dd == 0x20 ? 0x801B1F98C8717000 : 0x801B1F98D8717000); - write_rscom(chip, 0x2010803, data); + write_scom(chip, 0x2010803, data); - data = read_rscom(chip, 0x2010818); + data = read_scom(chip, 0x2010818); data &= ~PPC_BIT(1); // CAPP0_CXA_TOP_CXA_APC0_APCCTL_ADR_BAR_MODE_OFF data |= PPC_BIT(6); // CAPP0_CXA_TOP_CXA_APC0_APCCTL_SKIP_G_ON data &= ~PPC_BITMASK(21, 24); // ATTR_FABRIC_ADDR_EXTENSION_GROUP_ID data &= ~PPC_BITMASK(25, 27); // ATTR_FABRIC_ADDR_EXTENSION_CHIP_ID data |= PPC_BIT(4); // CAPP0_CXA_TOP_CXA_APC0_APCCTL_DISABLE_G_ON data |= PPC_BIT(3); // CAPP0_CXA_TOP_CXA_APC0_APCCTL_DISABLE_VG_NOT_SYS_ON - write_rscom(chip, 0x2010818, data); - - rscom_and(chip, 0x2010806, ~PPC_BITMASK(0, 52)); - rscom_or(chip, 0x2010807, PPC_BIT(2) | PPC_BIT(8) | PPC_BIT(34) | PPC_BIT(44)); - rscom_and(chip, 0x2010819, ~PPC_BITMASK(4, 7)); - rscom_and_or(chip, 0x201081B, - ~PPC_BITMASK(45, 51), PPC_PLACE(0x7, 45, 3) | PPC_PLACE(0x2, 48, 4)); - rscom_and_or(chip, 0x201081C, ~PPC_BITMASK(18, 21), PPC_PLACE(0x1, 18, 4)); + write_scom(chip, 0x2010818, data); + + scom_and(chip, 0x2010806, ~PPC_BITMASK(0, 52)); + scom_or(chip, 0x2010807, PPC_BIT(2) | PPC_BIT(8) | PPC_BIT(34) | PPC_BIT(44)); + scom_and(chip, 0x2010819, ~PPC_BITMASK(4, 7)); + scom_and_or(chip, 0x201081B, + ~PPC_BITMASK(45, 51), PPC_PLACE(0x7, 45, 3) | PPC_PLACE(0x2, 48, 4)); + scom_and_or(chip, 0x201081C, ~PPC_BITMASK(18, 21), PPC_PLACE(0x1, 18, 4)); } static void int_scom(uint8_t chip, uint8_t dd) @@ -268,38 +268,38 @@ static void int_scom(uint8_t chip, uint8_t dd) * [5-8] ATTR_FABRIC_ADDR_EXTENSION_GROUP_ID * [9-11] ATTR_FABRIC_ADDR_EXTENSION_CHIP_ID */ - rscom_and_or(chip, 0x501300A, ~(PPC_BITMASK(0, 1) | PPC_BITMASK(5, 11)), PPC_BIT(1)); + scom_and_or(chip, 0x501300A, ~(PPC_BITMASK(0, 1) | PPC_BITMASK(5, 11)), PPC_BIT(1)); - rscom_or(chip, 0x5013021, - PPC_BIT(46) | // INT_CQ_PBO_CTL_DISABLE_VG_NOT_SYS_ON - PPC_BIT(47) | // INT_CQ_PBO_CTL_DISABLE_G_ON - PPC_BIT(49)); + scom_or(chip, 0x5013021, + PPC_BIT(46) | // INT_CQ_PBO_CTL_DISABLE_VG_NOT_SYS_ON + PPC_BIT(47) | // INT_CQ_PBO_CTL_DISABLE_G_ON + PPC_BIT(49)); if (dd <= 0x20) - write_rscom(chip, 0x5013033, 0x2000005C040281C3); + write_scom(chip, 0x5013033, 0x2000005C040281C3); else - write_rscom(chip, 0x5013033, 0x0000005C040081C3); + write_scom(chip, 0x5013033, 0x0000005C040081C3); - write_rscom(chip, 0x5013036, 0); - write_rscom(chip, 0x5013037, 0x9554021F80110E0C); + write_scom(chip, 0x5013036, 0); + write_scom(chip, 0x5013037, 0x9554021F80110E0C); - rscom_and_or(chip, 0x5013130, - ~(PPC_BITMASK(2, 7) | PPC_BITMASK(10, 15)), - PPC_PLACE(0x18, 2, 6) | PPC_PLACE(0x18, 10, 6)); + scom_and_or(chip, 0x5013130, + ~(PPC_BITMASK(2, 7) | PPC_BITMASK(10, 15)), + PPC_PLACE(0x18, 2, 6) | PPC_PLACE(0x18, 10, 6)); - write_rscom(chip, 0x5013140, 0x050043EF00100020); - write_rscom(chip, 0x5013141, 0xFADFBB8CFFAFFFD7); - write_rscom(chip, 0x5013178, 0x0002000610000000); + write_scom(chip, 0x5013140, 0x050043EF00100020); + write_scom(chip, 0x5013141, 0xFADFBB8CFFAFFFD7); + write_scom(chip, 0x5013178, 0x0002000610000000); - rscom_and_or(chip, 0x501320E, ~PPC_BITMASK(0, 47), PPC_PLACE(0x626222024216, 0, 48)); - rscom_and_or(chip, 0x5013214, ~PPC_BITMASK(16, 31), PPC_PLACE(0x5BBF, 16, 16)); - rscom_and_or(chip, 0x501322B, ~PPC_BITMASK(58, 63), PPC_PLACE(0x18, 58, 6)); + scom_and_or(chip, 0x501320E, ~PPC_BITMASK(0, 47), PPC_PLACE(0x626222024216, 0, 48)); + scom_and_or(chip, 0x5013214, ~PPC_BITMASK(16, 31), PPC_PLACE(0x5BBF, 16, 16)); + scom_and_or(chip, 0x501322B, ~PPC_BITMASK(58, 63), PPC_PLACE(0x18, 58, 6)); if (dd == 0x20) { - rscom_and_or(chip, 0x5013272, - ~PPC_BITMASK(0, 43), PPC_PLACE(0x0002C018006, 0, 44)); - rscom_and_or(chip, 0x5013273, - ~PPC_BITMASK(0, 43), PPC_PLACE(0xFFFCFFEFFFA, 0, 44)); + scom_and_or(chip, 0x5013272, + ~PPC_BITMASK(0, 43), PPC_PLACE(0x0002C018006, 0, 44)); + scom_and_or(chip, 0x5013273, + ~PPC_BITMASK(0, 43), PPC_PLACE(0xFFFCFFEFFFA, 0, 44)); } } @@ -307,21 +307,21 @@ static void vas_scom(uint8_t chip, uint8_t dd) { uint64_t data; - rscom_and_or(chip, 0x3011803, ~PPC_BITMASK(0, 53), 0x00210102540D7C00); - rscom_and(chip, 0x3011806, ~PPC_BITMASK(0, 53)); + scom_and_or(chip, 0x3011803, ~PPC_BITMASK(0, 53), 0x00210102540D7C00); + scom_and(chip, 0x3011806, ~PPC_BITMASK(0, 53)); - data = read_rscom(chip, 0x3011807); + data = read_scom(chip, 0x3011807); data &= ~PPC_BITMASK(0, 53); data |= (dd == 0x20 ? 0x00DD020180000000 : 0x00DF020180000000); - write_rscom(chip, 0x3011807, data); + write_scom(chip, 0x3011807, data); /* * [0-3] ATTR_FABRIC_ADDR_EXTENSION_GROUP_ID * [4-6] ATTR_FABRIC_ADDR_EXTENSION_CHIP_ID */ - rscom_and(chip, 0x301184D, ~PPC_BITMASK(0, 6)); + scom_and(chip, 0x301184D, ~PPC_BITMASK(0, 6)); - data = read_rscom(chip, 0x301184E); + data = read_scom(chip, 0x301184E); data &= ~PPC_BIT(13); // SOUTH_VA_EG_SCF_ADDR_BAR_MODE_OFF data |= PPC_BIT(14); // SOUTH_VA_EG_SCF_SKIP_G_ON data |= PPC_BIT(1); // SOUTH_VA_EG_SCF_DISABLE_G_WR_ON @@ -330,10 +330,10 @@ static void vas_scom(uint8_t chip, uint8_t dd) data |= PPC_BIT(6); // SOUTH_VA_EG_SCF_DISABLE_VG_RD_ON PPC_INSERT(data, 0xFC, 20, 8); PPC_INSERT(data, 0xFC, 28, 8); - write_rscom(chip, 0x301184E, data); + write_scom(chip, 0x301184E, data); if (dd == 0x20) - rscom_or(chip, 0x301184F, PPC_BIT(0)); + scom_or(chip, 0x301184F, PPC_BIT(0)); } static void chiplet_scominit(uint8_t chip, uint8_t dd) @@ -366,19 +366,19 @@ static void chiplet_scominit(uint8_t chip, uint8_t dd) * present units, and code here will be run to mask resources associated with * non-functional units. */ - if (read_rscom(chip, PU_PB_CENT_SM0_PB_CENT_FIR_REG) & + if (read_scom(chip, PU_PB_CENT_SM0_PB_CENT_FIR_REG) & PPC_BIT(PU_PB_CENT_SM0_PB_CENT_FIR_MASK_REG_SPARE_13)) { /* Masking XBUS FIR resources for unused links */ /* XBUS0 FBC TL */ - write_rscom(chip, PU_PB_IOE_FIR_MASK_REG_OR, FBC_IOE_TL_FIR_MASK_X0_NF); + write_scom(chip, PU_PB_IOE_FIR_MASK_REG_OR, FBC_IOE_TL_FIR_MASK_X0_NF); /* XBUS0 EXTFIR */ - write_rscom(chip, PU_PB_CENT_SM1_EXTFIR_MASK_REG_OR, FBC_EXT_FIR_MASK_X0_NF); + write_scom(chip, PU_PB_CENT_SM1_EXTFIR_MASK_REG_OR, FBC_EXT_FIR_MASK_X0_NF); /* XBUS2 FBC TL */ - write_rscom(chip, PU_PB_IOE_FIR_MASK_REG_OR, FBC_IOE_TL_FIR_MASK_X2_NF); + write_scom(chip, PU_PB_IOE_FIR_MASK_REG_OR, FBC_IOE_TL_FIR_MASK_X2_NF); /* XBUS2 EXTFIR */ - write_rscom(chip, PU_PB_CENT_SM1_EXTFIR_MASK_REG_OR, FBC_EXT_FIR_MASK_X2_NF); + write_scom(chip, PU_PB_CENT_SM1_EXTFIR_MASK_REG_OR, FBC_EXT_FIR_MASK_X2_NF); } fbc_ioo_tl_scom(chip); @@ -388,22 +388,22 @@ static void chiplet_scominit(uint8_t chip, uint8_t dd) vas_scom(chip, dd); /* Setup NMMU epsilon write cycles */ - rscom_and_or(chip, PU_NMMU_MM_EPSILON_COUNTER_VALUE, - ~(PPC_BITMASK(0, 11) | PPC_BITMASK(16, 27)), - PPC_PLACE(pb_cfg->eps_w[0], 0, 12) | PPC_PLACE(pb_cfg->eps_w[1], 16, 12)); + scom_and_or(chip, PU_NMMU_MM_EPSILON_COUNTER_VALUE, + ~(PPC_BITMASK(0, 11) | PPC_BITMASK(16, 27)), + PPC_PLACE(pb_cfg->eps_w[0], 0, 12) | PPC_PLACE(pb_cfg->eps_w[1], 16, 12)); } static void psi_scom(uint8_t chip) { - rscom_or(chip, 0x4011803, PPC_BITMASK(0, 6)); - rscom_and(chip, 0x4011806, ~PPC_BITMASK(0, 6)); - rscom_and(chip, 0x4011807, ~PPC_BITMASK(0, 6)); + scom_or(chip, 0x4011803, PPC_BITMASK(0, 6)); + scom_and(chip, 0x4011806, ~PPC_BITMASK(0, 6)); + scom_and(chip, 0x4011807, ~PPC_BITMASK(0, 6)); - rscom_and_or(chip, 0x5012903, ~PPC_BITMASK(0, 28), PPC_PLACE(0x7E040DF, 0, 29)); - rscom_and_or(chip, 0x5012906, ~PPC_BITMASK(0, 28), PPC_PLACE(0x0, 0, 29)); - rscom_and_or(chip, 0x5012907, ~PPC_BITMASK(0, 28), PPC_PLACE(0x18050020, 0, 29)); + scom_and_or(chip, 0x5012903, ~PPC_BITMASK(0, 28), PPC_PLACE(0x7E040DF, 0, 29)); + scom_and_or(chip, 0x5012906, ~PPC_BITMASK(0, 28), PPC_PLACE(0x0, 0, 29)); + scom_and_or(chip, 0x5012907, ~PPC_BITMASK(0, 28), PPC_PLACE(0x18050020, 0, 29)); - rscom_and(chip, 0x501290F, ~(PPC_BITMASK(16, 27) | PPC_BITMASK(48, 52))); + scom_and(chip, 0x501290F, ~(PPC_BITMASK(16, 27) | PPC_BITMASK(48, 52))); } void istep_10_6(uint8_t chips) diff --git a/src/soc/ibm/power9/istep_13_10.c b/src/soc/ibm/power9/istep_13_10.c index 208a605a353..85bee5f715a 100644 --- a/src/soc/ibm/power9/istep_13_10.c +++ b/src/soc/ibm/power9/istep_13_10.c @@ -420,17 +420,17 @@ static void mss_draminit(uint8_t chip) [8-23] CCS_MODEQ_DDR_CAL_TIMEOUT_CNT = 0xffff [30-31] CCS_MODEQ_DDR_CAL_TIMEOUT_CNT_MULT = 3 */ - rscom_and_or_for_chiplet(chip, mcs_ids[mcs_i], CCS_MODEQ, - ~(PPC_BIT(CCS_MODEQ_CCS_STOP_ON_ERR) | - PPC_BIT(CCS_MODEQ_CCS_UE_DISABLE)), - PPC_BIT(CCS_MODEQ_CFG_CCS_PARITY_AFTER_CMD) | - PPC_BIT(CCS_MODEQ_COPY_CKE_TO_SPARE_CKE) | - PPC_PLACE(0xFFFF, - CCS_MODEQ_DDR_CAL_TIMEOUT_CNT, - CCS_MODEQ_DDR_CAL_TIMEOUT_CNT_LEN) | - PPC_PLACE(0x3, - CCS_MODEQ_DDR_CAL_TIMEOUT_CNT_MULT, - CCS_MODEQ_DDR_CAL_TIMEOUT_CNT_MULT_LEN)); + scom_and_or_for_chiplet(chip, mcs_ids[mcs_i], CCS_MODEQ, + ~(PPC_BIT(CCS_MODEQ_CCS_STOP_ON_ERR) | + PPC_BIT(CCS_MODEQ_CCS_UE_DISABLE)), + PPC_BIT(CCS_MODEQ_CFG_CCS_PARITY_AFTER_CMD) | + PPC_BIT(CCS_MODEQ_COPY_CKE_TO_SPARE_CKE) | + PPC_PLACE(0xFFFF, + CCS_MODEQ_DDR_CAL_TIMEOUT_CNT, + CCS_MODEQ_DDR_CAL_TIMEOUT_CNT_LEN) | + PPC_PLACE(0x3, + CCS_MODEQ_DDR_CAL_TIMEOUT_CNT_MULT, + CCS_MODEQ_DDR_CAL_TIMEOUT_CNT_MULT_LEN)); for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; diff --git a/src/soc/ibm/power9/istep_13_11.c b/src/soc/ibm/power9/istep_13_11.c index 0976c721654..020b9a83def 100644 --- a/src/soc/ibm/power9/istep_13_11.c +++ b/src/soc/ibm/power9/istep_13_11.c @@ -1027,15 +1027,15 @@ static int process_initial_cal_errors(uint8_t chip, int mcs_i, int mca_i) * calibration engine itself. Check for latter. */ /* IOM0.IOM_PHY0_DDRPHY_FIR_REG */ - if (read_rscom_for_chiplet(chip, id, IOM_PHY0_DDRPHY_FIR_REG) & + if (read_scom_for_chiplet(chip, id, IOM_PHY0_DDRPHY_FIR_REG) & PPC_BIT(IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_2)) { /* * "Clear the PHY FIR ERROR 2 bit so we don't keep failing training and * training advance on this port" */ - rscom_and_or_for_chiplet(chip, id, IOM_PHY0_DDRPHY_FIR_REG, - ~PPC_BIT(IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_2), - 0); + scom_and_or_for_chiplet(chip, id, IOM_PHY0_DDRPHY_FIR_REG, + ~PPC_BIT(IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_2), + 0); return 1; } @@ -1163,15 +1163,15 @@ static void fir_unmask(uint8_t chip, int mcs_i) MC01.MCBIST.MBA_SCOMFIR.MCBISTFIRMASK [1] MCBISTFIRQ_COMMAND_ADDRESS_TIMEOUT = 0 //recoverable_error (0,1,0) */ - rscom_and_or_for_chiplet(chip, id, MCBISTFIRACT0, - ~PPC_BIT(MCBISTFIRQ_COMMAND_ADDRESS_TIMEOUT), - 0); - rscom_and_or_for_chiplet(chip, id, MCBISTFIRACT1, - ~PPC_BIT(MCBISTFIRQ_COMMAND_ADDRESS_TIMEOUT), - PPC_BIT(MCBISTFIRQ_COMMAND_ADDRESS_TIMEOUT)); - rscom_and_or_for_chiplet(chip, id, MCBISTFIRMASK, - ~PPC_BIT(MCBISTFIRQ_COMMAND_ADDRESS_TIMEOUT), - 0); + scom_and_or_for_chiplet(chip, id, MCBISTFIRACT0, + ~PPC_BIT(MCBISTFIRQ_COMMAND_ADDRESS_TIMEOUT), + 0); + scom_and_or_for_chiplet(chip, id, MCBISTFIRACT1, + ~PPC_BIT(MCBISTFIRQ_COMMAND_ADDRESS_TIMEOUT), + PPC_BIT(MCBISTFIRQ_COMMAND_ADDRESS_TIMEOUT)); + scom_and_or_for_chiplet(chip, id, MCBISTFIRMASK, + ~PPC_BIT(MCBISTFIRQ_COMMAND_ADDRESS_TIMEOUT), + 0); for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { if (!mem_data[chip].mcs[mcs_i].mca[mca_i].functional) diff --git a/src/soc/ibm/power9/istep_13_13.c b/src/soc/ibm/power9/istep_13_13.c index 602e41d4de3..7a70be8421a 100644 --- a/src/soc/ibm/power9/istep_13_13.c +++ b/src/soc/ibm/power9/istep_13_13.c @@ -404,17 +404,16 @@ static void setup_xlate_map(uint8_t chip, int mcs_i, int mca_i) } /* MCS_PORT02_MCP0XLT0 (?) */ - write_rscom_for_chiplet(chip, nest, 0x05010820 + mca_i * mca_mul, - dimms_rank_config(mca, xlt_tables[cfg][0], update_d)); + write_scom_for_chiplet(chip, nest, 0x05010820 + mca_i * mca_mul, + dimms_rank_config(mca, xlt_tables[cfg][0], update_d)); /* MCS_PORT02_MCP0XLT1 (?) */ - write_rscom_for_chiplet(chip, nest, 0x05010821 + mca_i * mca_mul, - xlt_tables[cfg][1]); + write_scom_for_chiplet(chip, nest, 0x05010821 + mca_i * mca_mul, + xlt_tables[cfg][1]); /* MCS_PORT02_MCP0XLT2 (?) */ - write_rscom_for_chiplet(chip, nest, 0x05010822 + mca_i * mca_mul, - xlt_tables[cfg][2]); - + write_scom_for_chiplet(chip, nest, 0x05010822 + mca_i * mca_mul, + xlt_tables[cfg][2]); } static void enable_pm(uint8_t chip, int mcs_i, int mca_i) @@ -491,18 +490,18 @@ static void fir_unmask(uint8_t chip, int mcs_i) * TODO: check if this works with bootblock in SEEPROM too. We don't have * interrupt handlers set up in that case. */ - rscom_and_or_for_chiplet(chip, id, MCBISTFIRACT0, - ~(PPC_BIT(MCBISTFIRQ_MCBIST_BRODCAST_OUT_OF_SYNC) | - PPC_BIT(MCBISTFIRQ_MCBIST_PROGRAM_COMPLETE)), - PPC_BIT(MCBISTFIRQ_MCBIST_PROGRAM_COMPLETE)); - rscom_and_or_for_chiplet(chip, id, MCBISTFIRACT1, - ~(PPC_BIT(MCBISTFIRQ_MCBIST_BRODCAST_OUT_OF_SYNC) | - PPC_BIT(MCBISTFIRQ_MCBIST_PROGRAM_COMPLETE)), - PPC_BIT(MCBISTFIRQ_MCBIST_BRODCAST_OUT_OF_SYNC)); - rscom_and_or_for_chiplet(chip, id, MCBISTFIRMASK, - ~(PPC_BIT(MCBISTFIRQ_MCBIST_BRODCAST_OUT_OF_SYNC) | - PPC_BIT(MCBISTFIRQ_MCBIST_PROGRAM_COMPLETE)), - 0); + scom_and_or_for_chiplet(chip, id, MCBISTFIRACT0, + ~(PPC_BIT(MCBISTFIRQ_MCBIST_BRODCAST_OUT_OF_SYNC) | + PPC_BIT(MCBISTFIRQ_MCBIST_PROGRAM_COMPLETE)), + PPC_BIT(MCBISTFIRQ_MCBIST_PROGRAM_COMPLETE)); + scom_and_or_for_chiplet(chip, id, MCBISTFIRACT1, + ~(PPC_BIT(MCBISTFIRQ_MCBIST_BRODCAST_OUT_OF_SYNC) | + PPC_BIT(MCBISTFIRQ_MCBIST_PROGRAM_COMPLETE)), + PPC_BIT(MCBISTFIRQ_MCBIST_BRODCAST_OUT_OF_SYNC)); + scom_and_or_for_chiplet(chip, id, MCBISTFIRMASK, + ~(PPC_BIT(MCBISTFIRQ_MCBIST_BRODCAST_OUT_OF_SYNC) | + PPC_BIT(MCBISTFIRQ_MCBIST_PROGRAM_COMPLETE)), + 0); for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { if (!mem_data[chip].mcs[mcs_i].mca[mca_i].functional) diff --git a/src/soc/ibm/power9/istep_13_2.c b/src/soc/ibm/power9/istep_13_2.c index 095fcfd3704..8dd8bfdc845 100644 --- a/src/soc/ibm/power9/istep_13_2.c +++ b/src/soc/ibm/power9/istep_13_2.c @@ -32,16 +32,15 @@ static void mem_pll_reset(uint8_t chip) [all] 0 [1] PCB_EP_RESET = 1 */ - write_rscom_for_chiplet(chip, mcs_ids[i], PCBSLMC01_NET_CTRL0_WOR, - PPC_BIT(PCBSLMC01_NET_CTRL0_PCB_EP_RESET)); + write_scom_for_chiplet(chip, mcs_ids[i], PCBSLMC01_NET_CTRL0_WOR, + PPC_BIT(PCBSLMC01_NET_CTRL0_PCB_EP_RESET)); // Mask PLL unlock error in PCB slave /* TP.TPCHIP.NET.PCBSLMC01.SLAVE_CONFIG_REG [12] (part of) ERROR_MASK = 1 */ - rscom_or_for_chiplet(chip, mcs_ids[i], PCBSLMC01_SLAVE_CONFIG_REG, - PPC_BIT(12)); + scom_or_for_chiplet(chip, mcs_ids[i], PCBSLMC01_SLAVE_CONFIG_REG, PPC_BIT(12)); // Move MC PLL into reset state (3 separate writes, no delays between them) /* @@ -55,12 +54,12 @@ static void mem_pll_reset(uint8_t chip) [all] 0 [3] PLL_TEST_EN = 1 */ - write_rscom_for_chiplet(chip, mcs_ids[i], PCBSLMC01_NET_CTRL0_WOR, - PPC_BIT(PCBSLMC01_NET_CTRL0_PLL_BYPASS)); - write_rscom_for_chiplet(chip, mcs_ids[i], PCBSLMC01_NET_CTRL0_WOR, - PPC_BIT(PCBSLMC01_NET_CTRL0_PLL_RESET)); - write_rscom_for_chiplet(chip, mcs_ids[i], PCBSLMC01_NET_CTRL0_WOR, - PPC_BIT(PCBSLMC01_NET_CTRL0_PLL_TEST_EN)); + write_scom_for_chiplet(chip, mcs_ids[i], PCBSLMC01_NET_CTRL0_WOR, + PPC_BIT(PCBSLMC01_NET_CTRL0_PLL_BYPASS)); + write_scom_for_chiplet(chip, mcs_ids[i], PCBSLMC01_NET_CTRL0_WOR, + PPC_BIT(PCBSLMC01_NET_CTRL0_PLL_RESET)); + write_scom_for_chiplet(chip, mcs_ids[i], PCBSLMC01_NET_CTRL0_WOR, + PPC_BIT(PCBSLMC01_NET_CTRL0_PLL_TEST_EN)); // Assert MEM PLDY and DCC bypass /* @@ -69,9 +68,9 @@ static void mem_pll_reset(uint8_t chip) [1] CLK_DCC_BYPASS_EN = 1 [2] CLK_PDLY_BYPASS_EN = 1 */ - write_rscom_for_chiplet(chip, mcs_ids[i], PCBSLMC01_NET_CTRL1_WOR, - PPC_BIT(PCBSLMC01_NET_CTRL1_CLK_DCC_BYPASS_EN) | - PPC_BIT(PCBSLMC01_NET_CTRL1_CLK_PDLY_BYPASS_EN)); + write_scom_for_chiplet(chip, mcs_ids[i], PCBSLMC01_NET_CTRL1_WOR, + PPC_BIT(PCBSLMC01_NET_CTRL1_CLK_DCC_BYPASS_EN) | + PPC_BIT(PCBSLMC01_NET_CTRL1_CLK_PDLY_BYPASS_EN)); // Drop endpoint reset /* @@ -79,16 +78,16 @@ static void mem_pll_reset(uint8_t chip) [all] 1 [1] PCB_EP_RESET = 0 */ - write_rscom_for_chiplet(chip, mcs_ids[i], PCBSLMC01_NET_CTRL0_WAND, - ~PPC_BIT(PCBSLMC01_NET_CTRL0_PCB_EP_RESET)); + write_scom_for_chiplet(chip, mcs_ids[i], PCBSLMC01_NET_CTRL0_WAND, + ~PPC_BIT(PCBSLMC01_NET_CTRL0_PCB_EP_RESET)); // Disable listen to sync pulse to MC chiplet, when MEM is not in sync to nest /* TP.TCMC01.MCSLOW.SYNC_CONFIG [4] LISTEN_TO_SYNC_PULSE_DIS = 1 */ - rscom_or_for_chiplet(chip, mcs_ids[i], MCSLOW_SYNC_CONFIG, - PPC_BIT(MCSLOW_SYNC_CONFIG_LISTEN_TO_SYNC_PULSE_DIS)); + scom_or_for_chiplet(chip, mcs_ids[i], MCSLOW_SYNC_CONFIG, + PPC_BIT(MCSLOW_SYNC_CONFIG_LISTEN_TO_SYNC_PULSE_DIS)); // Initialize OPCG_ALIGN register /* @@ -99,7 +98,7 @@ static void mem_pll_reset(uint8_t chip) [47-51] SCAN_RATIO = 0 // 1:1 [52-63] OPCG_WAIT_CYCLES = 0x20 */ - write_rscom_for_chiplet(chip, mcs_ids[i], MCSLOW_OPCG_ALIGN, + write_scom_for_chiplet(chip, mcs_ids[i], MCSLOW_OPCG_ALIGN, PPC_PLACE(5, MCSLOW_OPCG_ALIGN_INOP_ALIGN, MCSLOW_OPCG_ALIGN_INOP_ALIGN_LEN) | PPC_PLACE(0x20, MCSLOW_OPCG_ALIGN_OPCG_WAIT_CYCLES, @@ -123,18 +122,18 @@ static void mem_pll_reset(uint8_t chip) TP.TCMC01.MCSLOW.OPCG_REG0 [2] RUN_SCAN0 = 1 */ - write_rscom_for_chiplet(chip, mcs_ids[i], MCSLOW_CLK_REGION, - PPC_BIT(MCSLOW_CLK_REGION_CLOCK_REGION_UNIT10) | - PPC_BIT(MCSLOW_CLK_REGION_SEL_THOLD_SL) | - PPC_BIT(MCSLOW_CLK_REGION_SEL_THOLD_NSL) | - PPC_BIT(MCSLOW_CLK_REGION_SEL_THOLD_ARY)); - write_rscom_for_chiplet(chip, mcs_ids[i], MCSLOW_SCAN_REGION_TYPE, - PPC_BIT(MCSLOW_SCAN_REGION_TYPE_SCAN_REGION_UNIT10) | - PPC_BIT(MCSLOW_SCAN_REGION_TYPE_SCAN_TYPE_BNDY)); - rscom_and_for_chiplet(chip, mcs_ids[i], MCSLOW_OPCG_REG0, - ~PPC_BIT(MCSLOW_OPCG_RUNN_MODE)); - rscom_or_for_chiplet(chip, mcs_ids[i], MCSLOW_OPCG_REG0, - PPC_BIT(MCSLOW_OPCG_RUN_SCAN0)); + write_scom_for_chiplet(chip, mcs_ids[i], MCSLOW_CLK_REGION, + PPC_BIT(MCSLOW_CLK_REGION_CLOCK_REGION_UNIT10) | + PPC_BIT(MCSLOW_CLK_REGION_SEL_THOLD_SL) | + PPC_BIT(MCSLOW_CLK_REGION_SEL_THOLD_NSL) | + PPC_BIT(MCSLOW_CLK_REGION_SEL_THOLD_ARY)); + write_scom_for_chiplet(chip, mcs_ids[i], MCSLOW_SCAN_REGION_TYPE, + PPC_BIT(MCSLOW_SCAN_REGION_TYPE_SCAN_REGION_UNIT10) | + PPC_BIT(MCSLOW_SCAN_REGION_TYPE_SCAN_TYPE_BNDY)); + scom_and_for_chiplet(chip, mcs_ids[i], MCSLOW_OPCG_REG0, + ~PPC_BIT(MCSLOW_OPCG_RUNN_MODE)); + scom_or_for_chiplet(chip, mcs_ids[i], MCSLOW_OPCG_REG0, + PPC_BIT(MCSLOW_OPCG_RUN_SCAN0)); } /* Separate loop so we won't have to wait for timeout twice */ @@ -150,8 +149,8 @@ static void mem_pll_reset(uint8_t chip) delay(16us) */ time_elapsed = wait_us(200 * 16 - time_elapsed, - read_rscom_for_chiplet(chip, mcs_ids[i], - MCSLOW_CPLT_STAT0) & + read_scom_for_chiplet(chip, mcs_ids[i], + MCSLOW_CPLT_STAT0) & PPC_BIT(MCSLOW_CPLT_STAT0_CC_CTRL_OPCG_DONE_DC)); if (!time_elapsed) @@ -164,8 +163,8 @@ static void mem_pll_reset(uint8_t chip) TP.TCMC01.MCSLOW.SCAN_REGION_TYPE [all] 0 */ - write_rscom_for_chiplet(chip, mcs_ids[i], MCSLOW_CLK_REGION, 0); - write_rscom_for_chiplet(chip, mcs_ids[i], MCSLOW_SCAN_REGION_TYPE, 0); + write_scom_for_chiplet(chip, mcs_ids[i], MCSLOW_CLK_REGION, 0); + write_scom_for_chiplet(chip, mcs_ids[i], MCSLOW_SCAN_REGION_TYPE, 0); } } diff --git a/src/soc/ibm/power9/istep_13_3.c b/src/soc/ibm/power9/istep_13_3.c index 5ee7a863dac..eeb6ef20705 100644 --- a/src/soc/ibm/power9/istep_13_3.c +++ b/src/soc/ibm/power9/istep_13_3.c @@ -65,7 +65,7 @@ static void mem_pll_initf(uint8_t chip) * making a function from this. */ // TP.TPCHIP.PIB.PSU.PSU_SBE_DOORBELL_REG - if (read_rscom(chip, PSU_SBE_DOORBELL_REG) & PPC_BIT(0)) + if (read_scom(chip, PSU_SBE_DOORBELL_REG) & PPC_BIT(0)) die("MBOX to SBE busy, this should not happen\n"); @@ -85,17 +85,17 @@ static void mem_pll_initf(uint8_t chip) * variable for it, which probably implies wrapping this into a function and * moving it to separate file. */ - write_rscom(chip, PSU_HOST_SBE_MBOX0_REG, 0x000001000000D301); + write_scom(chip, PSU_HOST_SBE_MBOX0_REG, 0x000001000000D301); // TP.TPCHIP.PIB.PSU.PSU_HOST_SBE_MBOX0_REG /* TARGET_TYPE_PERV, chiplet ID = 0x07, ring ID, RING_MODE_SET_PULSE_NSL */ - write_rscom(chip, PSU_HOST_SBE_MBOX1_REG, 0x0002000000000004 | - PPC_PLACE(ring_id, 32, 16) | - PPC_PLACE(mcs_ids[mcs_i], 24, 8)); + write_scom(chip, PSU_HOST_SBE_MBOX1_REG, 0x0002000000000004 | + PPC_PLACE(ring_id, 32, 16) | + PPC_PLACE(mcs_ids[mcs_i], 24, 8)); // Ring the host->SBE doorbell // TP.TPCHIP.PIB.PSU.PSU_SBE_DOORBELL_REG_OR - write_rscom(chip, PSU_SBE_DOORBELL_REG_WOR, PPC_BIT(0)); + write_scom(chip, PSU_SBE_DOORBELL_REG_WOR, PPC_BIT(0)); // Wait for response /* @@ -108,7 +108,7 @@ static void mem_pll_initf(uint8_t chip) */ // TP.TPCHIP.PIB.PSU.PSU_HOST_DOORBELL_REG time = wait_ms(90 * MSECS_PER_SEC, - read_rscom(chip, PSU_HOST_DOORBELL_REG) & PPC_BIT(0)); + read_scom(chip, PSU_HOST_DOORBELL_REG) & PPC_BIT(0)); if (!time) die("Timed out while waiting for SBE response\n"); @@ -119,7 +119,7 @@ static void mem_pll_initf(uint8_t chip) // Clear SBE->host doorbell // TP.TPCHIP.PIB.PSU.PSU_HOST_DOORBELL_REG_AND - write_rscom(chip, PSU_HOST_DOORBELL_REG_WAND, ~PPC_BIT(0)); + write_scom(chip, PSU_HOST_DOORBELL_REG_WAND, ~PPC_BIT(0)); } } diff --git a/src/soc/ibm/power9/istep_13_4.c b/src/soc/ibm/power9/istep_13_4.c index 0041d09a0ff..191c42e3dd3 100644 --- a/src/soc/ibm/power9/istep_13_4.c +++ b/src/soc/ibm/power9/istep_13_4.c @@ -29,8 +29,8 @@ static void mem_pll_setup(uint8_t chip) [all] 1 [2] CLK_PDLY_BYPASS_EN = 0 */ - write_rscom_for_chiplet(chip, mcs_ids[i], PCBSLMC01_NET_CTRL1_WAND, - ~PPC_BIT(PCBSLMC01_NET_CTRL1_CLK_PDLY_BYPASS_EN)); + write_scom_for_chiplet(chip, mcs_ids[i], PCBSLMC01_NET_CTRL1_WAND, + ~PPC_BIT(PCBSLMC01_NET_CTRL1_CLK_PDLY_BYPASS_EN)); // Drop DCC bypass of DCC logic /* @@ -38,8 +38,8 @@ static void mem_pll_setup(uint8_t chip) [all] 1 [1] CLK_DCC_BYPASS_EN = 0 */ - write_rscom_for_chiplet(chip, mcs_ids[i], PCBSLMC01_NET_CTRL1_WAND, - ~PPC_BIT(PCBSLMC01_NET_CTRL1_CLK_DCC_BYPASS_EN)); + write_scom_for_chiplet(chip, mcs_ids[i], PCBSLMC01_NET_CTRL1_WAND, + ~PPC_BIT(PCBSLMC01_NET_CTRL1_CLK_DCC_BYPASS_EN)); // ATTR_NEST_MEM_X_O_PCI_BYPASS is set to 0 in talos.xml. // > if (ATTR_NEST_MEM_X_O_PCI_BYPASS == 0) @@ -50,8 +50,8 @@ static void mem_pll_setup(uint8_t chip) [all] 1 [3] PLL_TEST_EN = 0 */ - write_rscom_for_chiplet(chip, mcs_ids[i], PCBSLMC01_NET_CTRL0_WAND, - ~PPC_BIT(PCBSLMC01_NET_CTRL0_PLL_TEST_EN)); + write_scom_for_chiplet(chip, mcs_ids[i], PCBSLMC01_NET_CTRL0_WAND, + ~PPC_BIT(PCBSLMC01_NET_CTRL0_PLL_TEST_EN)); // Drop PLL reset /* @@ -59,8 +59,8 @@ static void mem_pll_setup(uint8_t chip) [all] 1 [4] PLL_RESET = 0 */ - write_rscom_for_chiplet(chip, mcs_ids[i], PCBSLMC01_NET_CTRL0_WAND, - ~PPC_BIT(PCBSLMC01_NET_CTRL0_PLL_RESET)); + write_scom_for_chiplet(chip, mcs_ids[i], PCBSLMC01_NET_CTRL0_WAND, + ~PPC_BIT(PCBSLMC01_NET_CTRL0_PLL_RESET)); /* * TODO: This is how Hosboot does it, maybe it would be better to use @@ -75,8 +75,8 @@ static void mem_pll_setup(uint8_t chip) TP.TPCHIP.NET.PCBSLMC01.PLL_LOCK_REG assert([0] (reserved) == 1) */ - if (!(read_rscom_for_chiplet(chip, mcs_ids[i], - PCBSLMC01_PLL_LOCK_REG) & PPC_BIT(0))) + if (!(read_scom_for_chiplet(chip, mcs_ids[i], + PCBSLMC01_PLL_LOCK_REG) & PPC_BIT(0))) die("MCS%d PLL not locked\n", i); // Drop PLL Bypass @@ -85,18 +85,18 @@ static void mem_pll_setup(uint8_t chip) [all] 1 [5] PLL_BYPASS = 0 */ - write_rscom_for_chiplet(chip, mcs_ids[i], PCBSLMC01_NET_CTRL0_WAND, - ~PPC_BIT(PCBSLMC01_NET_CTRL0_PLL_BYPASS)); + write_scom_for_chiplet(chip, mcs_ids[i], PCBSLMC01_NET_CTRL0_WAND, + ~PPC_BIT(PCBSLMC01_NET_CTRL0_PLL_BYPASS)); // Set scan ratio to 4:1 /* TP.TCMC01.MCSLOW.OPCG_ALIGN [47-51] SCAN_RATIO = 3 // 4:1 */ - rscom_and_or_for_chiplet(chip, mcs_ids[i], MCSLOW_OPCG_ALIGN, - ~PPC_BITMASK(47,51), - PPC_PLACE(3, MCSLOW_OPCG_ALIGN_SCAN_RATIO, - MCSLOW_OPCG_ALIGN_SCAN_RATIO_LEN)); + scom_and_or_for_chiplet(chip, mcs_ids[i], MCSLOW_OPCG_ALIGN, + ~PPC_BITMASK(47,51), + PPC_PLACE(3, MCSLOW_OPCG_ALIGN_SCAN_RATIO, + MCSLOW_OPCG_ALIGN_SCAN_RATIO_LEN)); // > end if @@ -105,15 +105,15 @@ static void mem_pll_setup(uint8_t chip) TP.TPCHIP.NET.PCBSLMC01.ERROR_REG [all] 1 // Write 1 to clear */ - write_rscom_for_chiplet(chip, mcs_ids[i], PCBSLMC01_ERROR_REG, ~0); + write_scom_for_chiplet(chip, mcs_ids[i], PCBSLMC01_ERROR_REG, ~0); // Unmask PLL unlock error in PCB slave /* TP.TPCHIP.NET.PCBSLMC01.SLAVE_CONFIG_REG [12] (part of) ERROR_MASK = 0 */ - rscom_and_for_chiplet(chip, mcs_ids[i], PCBSLMC01_SLAVE_CONFIG_REG, - ~PPC_BIT(12)); + scom_and_for_chiplet(chip, mcs_ids[i], PCBSLMC01_SLAVE_CONFIG_REG, + ~PPC_BIT(12)); } } diff --git a/src/soc/ibm/power9/istep_13_6.c b/src/soc/ibm/power9/istep_13_6.c index 67174367164..a3dd602560b 100644 --- a/src/soc/ibm/power9/istep_13_6.c +++ b/src/soc/ibm/power9/istep_13_6.c @@ -28,7 +28,7 @@ static inline void p9_mem_startclocks_cplt_ctrl_action_function(uint8_t chip, ch [3] TC_VITL_REGION_FENCE = ~ATTR_PG[3] [4-14] TC_REGION{1-3}_FENCE, UNUSED_{8-14}B = ~ATTR_PG[4-14] */ - write_rscom_for_chiplet(chip, id, MCSLOW_CPLT_CTRL1_WCLEAR, ~pg & PPC_BITMASK(3, 14)); + write_scom_for_chiplet(chip, id, MCSLOW_CPLT_CTRL1_WCLEAR, ~pg & PPC_BITMASK(3, 14)); // Reset abistclk_muxsel and syncclk_muxsel /* @@ -37,9 +37,9 @@ static inline void p9_mem_startclocks_cplt_ctrl_action_function(uint8_t chip, ch [0] CTRL_CC_ABSTCLK_MUXSEL_DC = 1 [1] TC_UNIT_SYNCCLK_MUXSEL_DC = 1 */ - write_rscom_for_chiplet(chip, id, MCSLOW_CPLT_CTRL0_WCLEAR, - PPC_BIT(MCSLOW_CPLT_CTRL0_CTRL_CC_ABSTCLK_MUXSEL_DC) | - PPC_BIT(MCSLOW_CPLT_CTRL0_TC_UNIT_SYNCCLK_MUXSEL_DC)); + write_scom_for_chiplet(chip, id, MCSLOW_CPLT_CTRL0_WCLEAR, + PPC_BIT(MCSLOW_CPLT_CTRL0_CTRL_CC_ABSTCLK_MUXSEL_DC) | + PPC_BIT(MCSLOW_CPLT_CTRL0_TC_UNIT_SYNCCLK_MUXSEL_DC)); } @@ -51,8 +51,8 @@ static inline void p9_sbe_common_align_chiplets(uint8_t chip, chiplet_id_t id) [all] 0 [2] CTRL_CC_FLUSHMODE_INH_DC = 1 */ - write_rscom_for_chiplet(chip, id, MCSLOW_CPLT_CTRL0_WOR, - PPC_BIT(MCSLOW_CPLT_CTRL0_CTRL_CC_FLUSHMODE_INH_DC)); + write_scom_for_chiplet(chip, id, MCSLOW_CPLT_CTRL0_WOR, + PPC_BIT(MCSLOW_CPLT_CTRL0_CTRL_CC_FLUSHMODE_INH_DC)); // Enable alignement /* @@ -60,24 +60,24 @@ static inline void p9_sbe_common_align_chiplets(uint8_t chip, chiplet_id_t id) [all] 0 [3] CTRL_CC_FORCE_ALIGN_DC = 1 */ - write_rscom_for_chiplet(chip, id, MCSLOW_CPLT_CTRL0_WOR, - PPC_BIT(MCSLOW_CPLT_CTRL0_CTRL_CC_FORCE_ALIGN_DC)); + write_scom_for_chiplet(chip, id, MCSLOW_CPLT_CTRL0_WOR, + PPC_BIT(MCSLOW_CPLT_CTRL0_CTRL_CC_FORCE_ALIGN_DC)); // Clear chiplet is aligned /* TP.TCMC01.MCSLOW.SYNC_CONFIG [7] CLEAR_CHIPLET_IS_ALIGNED = 1 */ - rscom_or_for_chiplet(chip, id, MCSLOW_SYNC_CONFIG, - PPC_BIT(MCSLOW_SYNC_CONFIG_CLEAR_CHIPLET_IS_ALIGNED)); + scom_or_for_chiplet(chip, id, MCSLOW_SYNC_CONFIG, + PPC_BIT(MCSLOW_SYNC_CONFIG_CLEAR_CHIPLET_IS_ALIGNED)); // Unset Clear chiplet is aligned /* TP.TCMC01.MCSLOW.SYNC_CONFIG [7] CLEAR_CHIPLET_IS_ALIGNED = 0 */ - rscom_and_for_chiplet(chip, id, MCSLOW_SYNC_CONFIG, - ~PPC_BIT(MCSLOW_SYNC_CONFIG_CLEAR_CHIPLET_IS_ALIGNED)); + scom_and_for_chiplet(chip, id, MCSLOW_SYNC_CONFIG, + ~PPC_BIT(MCSLOW_SYNC_CONFIG_CLEAR_CHIPLET_IS_ALIGNED)); udelay(100); @@ -88,7 +88,7 @@ static inline void p9_sbe_common_align_chiplets(uint8_t chip, chiplet_id_t id) if (([9] CC_CTRL_CHIPLET_IS_ALIGNED_DC) == 1) break delay(100us) */ - if (!wait_us(10 * 100, read_rscom_for_chiplet(chip, id, MCSLOW_CPLT_STAT0) & + if (!wait_us(10 * 100, read_scom_for_chiplet(chip, id, MCSLOW_CPLT_STAT0) & PPC_BIT(MCSLOW_CPLT_STAT0_CC_CTRL_CHIPLET_IS_ALIGNED_DC))) die("Timeout while waiting for chiplet alignment\n"); @@ -98,8 +98,8 @@ static inline void p9_sbe_common_align_chiplets(uint8_t chip, chiplet_id_t id) [all] 0 [3] CTRL_CC_FORCE_ALIGN_DC = 1 */ - write_rscom_for_chiplet(chip, id, MCSLOW_CPLT_CTRL0_WCLEAR, - PPC_BIT(MCSLOW_CPLT_CTRL0_CTRL_CC_FORCE_ALIGN_DC)); + write_scom_for_chiplet(chip, id, MCSLOW_CPLT_CTRL0_WCLEAR, + PPC_BIT(MCSLOW_CPLT_CTRL0_CTRL_CC_FORCE_ALIGN_DC)); } static void p9_sbe_common_clock_start_stop(uint8_t chip, chiplet_id_t id, uint64_t pg) @@ -110,15 +110,15 @@ static void p9_sbe_common_clock_start_stop(uint8_t chip, chiplet_id_t id, uint64 [all] 0 [2] CTRL_CC_FLUSHMODE_INH_DC = 1 */ - write_rscom_for_chiplet(chip, id, MCSLOW_CPLT_CTRL0_WOR, - PPC_BIT(MCSLOW_CPLT_CTRL0_CTRL_CC_FLUSHMODE_INH_DC)); + write_scom_for_chiplet(chip, id, MCSLOW_CPLT_CTRL0_WOR, + PPC_BIT(MCSLOW_CPLT_CTRL0_CTRL_CC_FLUSHMODE_INH_DC)); // Clear Scan region type register /* TP.TCMC01.MCSLOW.SCAN_REGION_TYPE [all] 0 */ - write_rscom_for_chiplet(chip, id, MCSLOW_SCAN_REGION_TYPE, 0); + write_scom_for_chiplet(chip, id, MCSLOW_SCAN_REGION_TYPE, 0); // Setup all Clock Domains and Clock Types /* @@ -133,14 +133,14 @@ static void p9_sbe_common_clock_start_stop(uint8_t chip, chiplet_id_t id, uint64 [49] SEL_THOLD_NSL = 1 [50] SEL_THOLD_ARY = 1 */ - rscom_and_or_for_chiplet(chip, id, MCSLOW_CLK_REGION, - ~(PPC_BITMASK(0, 14) | PPC_BITMASK(48, 50)), - PPC_PLACE(1, MCSLOW_CLK_REGION_CLOCK_CMD, - MCSLOW_CLK_REGION_CLOCK_CMD_LEN) | - PPC_BIT(MCSLOW_CLK_REGION_SEL_THOLD_SL) | - PPC_BIT(MCSLOW_CLK_REGION_SEL_THOLD_NSL) | - PPC_BIT(MCSLOW_CLK_REGION_SEL_THOLD_ARY) | - (~pg & PPC_BITMASK(4, 13))); + scom_and_or_for_chiplet(chip, id, MCSLOW_CLK_REGION, + ~(PPC_BITMASK(0, 14) | PPC_BITMASK(48, 50)), + PPC_PLACE(1, MCSLOW_CLK_REGION_CLOCK_CMD, + MCSLOW_CLK_REGION_CLOCK_CMD_LEN) | + PPC_BIT(MCSLOW_CLK_REGION_SEL_THOLD_SL) | + PPC_BIT(MCSLOW_CLK_REGION_SEL_THOLD_NSL) | + PPC_BIT(MCSLOW_CLK_REGION_SEL_THOLD_ARY) | + (~pg & PPC_BITMASK(4, 13))); // Poll OPCG done bit to check for completeness /* @@ -149,7 +149,7 @@ static void p9_sbe_common_clock_start_stop(uint8_t chip, chiplet_id_t id, uint64 if (([8] CC_CTRL_OPCG_DONE_DC) == 1) break delay(100us) */ - if (!wait_us(10 * 100, read_rscom_for_chiplet(chip, id, MCSLOW_CPLT_STAT0) & + if (!wait_us(10 * 100, read_scom_for_chiplet(chip, id, MCSLOW_CPLT_STAT0) & PPC_BIT(MCSLOW_CPLT_STAT0_CC_CTRL_OPCG_DONE_DC))) die("Timeout while waiting for OPCG done bit\n"); @@ -167,9 +167,9 @@ static void p9_sbe_common_clock_start_stop(uint8_t chip, chiplet_id_t id, uint64 */ uint64_t mask = PPC_BITMASK(4, 13); uint64_t expected = pg & mask; - if ((read_rscom_for_chiplet(chip, id, MCSLOW_CLOCK_STAT_SL) & mask) != expected || - (read_rscom_for_chiplet(chip, id, MCSLOW_CLOCK_STAT_NSL) & mask) != expected || - (read_rscom_for_chiplet(chip, id, MCSLOW_CLOCK_STAT_ARY) & mask) != expected) + if ((read_scom_for_chiplet(chip, id, MCSLOW_CLOCK_STAT_SL) & mask) != expected || + (read_scom_for_chiplet(chip, id, MCSLOW_CLOCK_STAT_NSL) & mask) != expected || + (read_scom_for_chiplet(chip, id, MCSLOW_CLOCK_STAT_ARY) & mask) != expected) die("Unexpected clock status\n"); } @@ -196,8 +196,8 @@ static inline void p9_mem_startclocks_fence_setup_function(uint8_t chip, chiplet [all] 1 [18] FENCE_EN = 0 */ - write_rscom_for_chiplet(chip, id, PCBSLMC01_NET_CTRL0_WAND, - ~PPC_BIT(PCBSLMC01_NET_CTRL0_FENCE_EN)); + write_scom_for_chiplet(chip, id, PCBSLMC01_NET_CTRL0_WAND, + ~PPC_BIT(PCBSLMC01_NET_CTRL0_FENCE_EN)); /* }*/ } @@ -209,7 +209,7 @@ static void p9_sbe_common_configure_chiplet_FIR(uint8_t chip, chiplet_id_t id) TP.TCMC01.MCSLOW.LOCAL_FIR [all] 0 */ - write_rscom_for_chiplet(chip, id, MCSLOW_LOCAL_FIR, 0); + write_scom_for_chiplet(chip, id, MCSLOW_LOCAL_FIR, 0); // configure pervasive FIR action/mask /* @@ -222,23 +222,23 @@ static void p9_sbe_common_configure_chiplet_FIR(uint8_t chip, chiplet_id_t id) [all] 0 [4-41] 0x3FFFFFFFFF (every bit set) */ - write_rscom_for_chiplet(chip, id, MCSLOW_LOCAL_FIR_ACTION0, 0); - write_rscom_for_chiplet(chip, id, MCSLOW_LOCAL_FIR_ACTION1, PPC_BITMASK(0, 3)); - write_rscom_for_chiplet(chip, id, MCSLOW_LOCAL_FIR_MASK, PPC_BITMASK(4, 41)); + write_scom_for_chiplet(chip, id, MCSLOW_LOCAL_FIR_ACTION0, 0); + write_scom_for_chiplet(chip, id, MCSLOW_LOCAL_FIR_ACTION1, PPC_BITMASK(0, 3)); + write_scom_for_chiplet(chip, id, MCSLOW_LOCAL_FIR_MASK, PPC_BITMASK(4, 41)); // reset XFIR /* TP.TCMC01.MCSLOW.XFIR [all] 0 */ - write_rscom_for_chiplet(chip, id, MCSLOW_XFIR, 0); + write_scom_for_chiplet(chip, id, MCSLOW_XFIR, 0); // configure XFIR mask /* TP.TCMC01.MCSLOW.FIR_MASK [all] 0 */ - write_rscom_for_chiplet(chip, id, MCSLOW_FIR_MASK, 0); + write_scom_for_chiplet(chip, id, MCSLOW_FIR_MASK, 0); } static void mem_startclocks(uint8_t chip) @@ -273,8 +273,8 @@ static void mem_startclocks(uint8_t chip) [all] 0 [2] CTRL_CC_FLUSHMODE_INH_DC = 1 */ - write_rscom_for_chiplet(chip, mcs_ids[i], MCSLOW_CPLT_CTRL0_WCLEAR, - PPC_BIT(MCSLOW_CPLT_CTRL0_CTRL_CC_FLUSHMODE_INH_DC)); + write_scom_for_chiplet(chip, mcs_ids[i], MCSLOW_CPLT_CTRL0_WCLEAR, + PPC_BIT(MCSLOW_CPLT_CTRL0_CTRL_CC_FLUSHMODE_INH_DC)); // Call p9_sbe_common_configure_chiplet_FIR for MC chiplets p9_sbe_common_configure_chiplet_FIR(chip, mcs_ids[i]); @@ -291,9 +291,9 @@ static void mem_startclocks(uint8_t chip) * ATTR_FABRIC_GROUP_ID of parent PROC (same for CHIP_ID). Only * SYSTEM_ID is present in talos.xml with full name. */ - rscom_and_or_for_chiplet(chip, mcs_ids[i], MCSLOW_CPLT_CONF0, - ~(PPC_BITMASK(48, 54) | PPC_BITMASK(56, 60)), - PPC_PLACE(chip, 48, 4)); + scom_and_or_for_chiplet(chip, mcs_ids[i], MCSLOW_CPLT_CONF0, + ~(PPC_BITMASK(48, 54) | PPC_BITMASK(56, 60)), + PPC_PLACE(chip, 48, 4)); // Add to Multicast Group /* Avoid setting if register is already set, i.e. [3-5] != 7 */ @@ -305,17 +305,17 @@ static void mem_startclocks(uint8_t chip) [3-5] MULTICAST1_GROUP: if 7 then set to 2 [16-23] (not described): if [3-5] == 7 then set to 0x1C */ - if ((read_rscom_for_chiplet(chip, mcs_ids[i], PCBSLMC01_MULTICAST_GROUP_1) & + if ((read_scom_for_chiplet(chip, mcs_ids[i], PCBSLMC01_MULTICAST_GROUP_1) & PPC_BITMASK(3, 5)) == PPC_BITMASK(3, 5)) - rscom_and_or_for_chiplet(chip, mcs_ids[i], PCBSLMC01_MULTICAST_GROUP_1, - ~(PPC_BITMASK(3, 5) | PPC_BITMASK(16, 23)), - PPC_BITMASK(19, 21)); + scom_and_or_for_chiplet(chip, mcs_ids[i], PCBSLMC01_MULTICAST_GROUP_1, + ~(PPC_BITMASK(3, 5) | PPC_BITMASK(16, 23)), + PPC_BITMASK(19, 21)); - if ((read_rscom_for_chiplet(chip, mcs_ids[i], PCBSLMC01_MULTICAST_GROUP_2) & + if ((read_scom_for_chiplet(chip, mcs_ids[i], PCBSLMC01_MULTICAST_GROUP_2) & PPC_BITMASK(3, 5)) == PPC_BITMASK(3, 5)) - rscom_and_or_for_chiplet(chip, mcs_ids[i], PCBSLMC01_MULTICAST_GROUP_2, - ~(PPC_BITMASK(3, 5) | PPC_BITMASK(16, 23)), - PPC_BIT(4) | PPC_BITMASK(19, 21)); + scom_and_or_for_chiplet(chip, mcs_ids[i], PCBSLMC01_MULTICAST_GROUP_2, + ~(PPC_BITMASK(3, 5) | PPC_BITMASK(16, 23)), + PPC_BIT(4) | PPC_BITMASK(19, 21)); } } diff --git a/src/soc/ibm/power9/istep_13_8.c b/src/soc/ibm/power9/istep_13_8.c index 097a73dd567..b123ae4a5fb 100644 --- a/src/soc/ibm/power9/istep_13_8.c +++ b/src/soc/ibm/power9/istep_13_8.c @@ -59,8 +59,8 @@ static void p9n_mca_scom(uint8_t chip, int mcs_i, int mca_i) /* P9N2_MCS_PORT02_MCPERF0 (?) [22-27] = 0x20 // AMO_LIMIT */ - rscom_and_or_for_chiplet(chip, nest, 0x05010823 + mca_i * mca_mul, - ~PPC_BITMASK(22, 27), PPC_PLACE(0x20, 22, 6)); + scom_and_or_for_chiplet(chip, nest, 0x05010823 + mca_i * mca_mul, + ~PPC_BITMASK(22, 27), PPC_PLACE(0x20, 22, 6)); /* P9N2_MCS_PORT02_MCPERF2 (?) [0-2] = 1 // PF_DROP_VALUE0 @@ -92,24 +92,24 @@ static void p9n_mca_scom(uint8_t chip, int mcs_i, int mca_i) uint64_t en_ref_blk = (log_ranks <= 1 || log_ranks > 8) ? 0 : (n_dimms == 1 && mranks == 4 && log_ranks == 8) ? 0 : 3; - rscom_and_or_for_chiplet(chip, nest, 0x05010824 + mca_i * mca_mul, - /* and */ - ~(PPC_BITMASK(0, 11) | PPC_BITMASK(13, 18) | PPC_BITMASK(28, 31) - | PPC_BITMASK(28, 31) | PPC_BITMASK(50, 54) | PPC_BIT(61)), - /* or */ - PPC_PLACE(1, 0, 3) | PPC_PLACE(3, 3, 3) | PPC_PLACE(5, 6, 3) - | PPC_PLACE(7, 9, 3) /* PF_DROP_VALUEs */ - | PPC_PLACE(ref_blk_cfg, 13, 3) | PPC_PLACE(en_ref_blk, 16, 2) - | PPC_PLACE(0x4, 28, 4) | PPC_PLACE(0x1C, 50, 5)); + scom_and_or_for_chiplet(chip, nest, 0x05010824 + mca_i * mca_mul, + /* and */ + ~(PPC_BITMASK(0, 11) | PPC_BITMASK(13, 18) | PPC_BITMASK(28, 31) + | PPC_BITMASK(28, 31) | PPC_BITMASK(50, 54) | PPC_BIT(61)), + /* or */ + PPC_PLACE(1, 0, 3) | PPC_PLACE(3, 3, 3) | PPC_PLACE(5, 6, 3) + | PPC_PLACE(7, 9, 3) /* PF_DROP_VALUEs */ + | PPC_PLACE(ref_blk_cfg, 13, 3) | PPC_PLACE(en_ref_blk, 16, 2) + | PPC_PLACE(0x4, 28, 4) | PPC_PLACE(0x1C, 50, 5)); /* P9N2_MCS_PORT02_MCAMOC (?) [1] = 0 // FORCE_PF_DROP0 [4-28] = 0x19fffff // WRTO_AMO_COLLISION_RULES [29-31] = 1 // AMO_SIZE_SELECT, 128B_RW_64B_DATA */ - rscom_and_or_for_chiplet(chip, nest, 0x05010825 + mca_i * mca_mul, - ~(PPC_BIT(1) | PPC_BITMASK(4, 31)), - PPC_PLACE(0x19FFFFF, 4, 25) | PPC_PLACE(1, 29, 3)); + scom_and_or_for_chiplet(chip, nest, 0x05010825 + mca_i * mca_mul, + ~(PPC_BIT(1) | PPC_BITMASK(4, 31)), + PPC_PLACE(0x19FFFFF, 4, 25) | PPC_PLACE(1, 29, 3)); /* P9N2_MCS_PORT02_MCEPSQ (?) [0-7] = 1 // JITTER_EPSILON @@ -122,13 +122,13 @@ static void p9n_mca_scom(uint8_t chip, int mcs_i, int mca_i) [40-47] = (ATTR_PROC_EPS_READ_CYCLES_T2 + 6) / 4 // VECTOR_GROUP_EPSILON */ #define F(X) (((X) + 6) / 4) - rscom_and_or_for_chiplet(chip, nest, 0x05010826 + mca_i * mca_mul, ~PPC_BITMASK(0, 47), - PPC_PLACE(1, 0, 8) - | PPC_PLACE(F(pb_cfg->eps_r[0]), 8, 8) - | PPC_PLACE(F(pb_cfg->eps_r[1]), 16, 8) - | PPC_PLACE(F(pb_cfg->eps_r[1]), 24, 8) - | PPC_PLACE(F(pb_cfg->eps_r[2]), 32, 8) - | PPC_PLACE(F(pb_cfg->eps_r[2]), 40, 8)); + scom_and_or_for_chiplet(chip, nest, 0x05010826 + mca_i * mca_mul, ~PPC_BITMASK(0, 47), + PPC_PLACE(1, 0, 8) + | PPC_PLACE(F(pb_cfg->eps_r[0]), 8, 8) + | PPC_PLACE(F(pb_cfg->eps_r[1]), 16, 8) + | PPC_PLACE(F(pb_cfg->eps_r[1]), 24, 8) + | PPC_PLACE(F(pb_cfg->eps_r[2]), 32, 8) + | PPC_PLACE(F(pb_cfg->eps_r[2]), 40, 8)); #undef F //~ static const uint32_t EPSILON_R_T0_LE[] = { 7, 7, 8, 8, 10, 22 }; // T0, T1 //~ static const uint32_t EPSILON_R_T2_LE[] = { 67, 69, 71, 74, 79, 103 }; // T2 @@ -140,9 +140,9 @@ static void p9n_mca_scom(uint8_t chip, int mcs_i, int mca_i) [14-23] = 51 // BUSY_COUNTER_THRESHOLD1 [24-33] = 64 // BUSY_COUNTER_THRESHOLD2 */ - rscom_and_or_for_chiplet(chip, nest, 0x05010827 + mca_i * mca_mul, ~PPC_BITMASK(0, 33), - PPC_BIT(0) | PPC_PLACE(1, 1, 3) | PPC_PLACE(38, 4, 10) - | PPC_PLACE(51, 14, 10) | PPC_PLACE(64, 24, 10)); + scom_and_or_for_chiplet(chip, nest, 0x05010827 + mca_i * mca_mul, ~PPC_BITMASK(0, 33), + PPC_BIT(0) | PPC_PLACE(1, 1, 3) | PPC_PLACE(38, 4, 10) + | PPC_PLACE(51, 14, 10) | PPC_PLACE(64, 24, 10)); /* P9N2_MCS_PORT02_MCPERF3 (?) [31] = 1 // ENABLE_CL0 @@ -151,9 +151,9 @@ static void p9n_mca_scom(uint8_t chip, int mcs_i, int mca_i) [44] = 1 // DISABLE_WRTO_IG [45] = 1 // AMO_LIMIT_SEL */ - rscom_or_for_chiplet(chip, nest, 0x0501082B + mca_i * mca_mul, - PPC_BIT(31) | PPC_BIT(41) | PPC_BIT(43) | PPC_BIT(44) - | PPC_BIT(45)); + scom_or_for_chiplet(chip, nest, 0x0501082B + mca_i * mca_mul, + PPC_BIT(31) | PPC_BIT(41) | PPC_BIT(43) | PPC_BIT(44) + | PPC_BIT(45)); /* MC01.PORT0.SRQ.MBA_DSM0Q = // These are set per port so all latencies should be calculated from both DIMMs (if present) @@ -812,86 +812,86 @@ static void p9n_mcbist_scom(uint8_t chip, int mcs_i) /* MC01.MCBIST.MBA_SCOMFIR.WATCFG0AQ = [0-47] WATCFG0AQ_CFG_WAT_EVENT_SEL = 0x400000000000 */ - rscom_and_or_for_chiplet(chip, id, WATCFG0AQ, ~PPC_BITMASK(0, 47), - PPC_PLACE(0x400000000000, WATCFG0AQ_CFG_WAT_EVENT_SEL, - WATCFG0AQ_CFG_WAT_EVENT_SEL_LEN)); + scom_and_or_for_chiplet(chip, id, WATCFG0AQ, ~PPC_BITMASK(0, 47), + PPC_PLACE(0x400000000000, WATCFG0AQ_CFG_WAT_EVENT_SEL, + WATCFG0AQ_CFG_WAT_EVENT_SEL_LEN)); /* MC01.MCBIST.MBA_SCOMFIR.WATCFG0BQ = [0-43] WATCFG0BQ_CFG_WAT_MSKA = 0x3fbfff [44-60] WATCFG0BQ_CFG_WAT_CNTL = 0x10000 */ - rscom_and_or_for_chiplet(chip, id, WATCFG0BQ, ~PPC_BITMASK(0, 60), - PPC_PLACE(0x3fbfff, WATCFG0BQ_CFG_WAT_MSKA, - WATCFG0BQ_CFG_WAT_MSKA_LEN) | - PPC_PLACE(0x10000, WATCFG0BQ_CFG_WAT_CNTL, - WATCFG0BQ_CFG_WAT_CNTL_LEN)); + scom_and_or_for_chiplet(chip, id, WATCFG0BQ, ~PPC_BITMASK(0, 60), + PPC_PLACE(0x3fbfff, WATCFG0BQ_CFG_WAT_MSKA, + WATCFG0BQ_CFG_WAT_MSKA_LEN) | + PPC_PLACE(0x10000, WATCFG0BQ_CFG_WAT_CNTL, + WATCFG0BQ_CFG_WAT_CNTL_LEN)); /* MC01.MCBIST.MBA_SCOMFIR.WATCFG0DQ = [0-43] WATCFG0DQ_CFG_WAT_PATA = 0x80200004000 */ - rscom_and_or_for_chiplet(chip, id, WATCFG0DQ, ~PPC_BITMASK(0, 43), - PPC_PLACE(0x80200004000, WATCFG0DQ_CFG_WAT_PATA, - WATCFG0DQ_CFG_WAT_PATA_LEN)); + scom_and_or_for_chiplet(chip, id, WATCFG0DQ, ~PPC_BITMASK(0, 43), + PPC_PLACE(0x80200004000, WATCFG0DQ_CFG_WAT_PATA, + WATCFG0DQ_CFG_WAT_PATA_LEN)); /* MC01.MCBIST.MBA_SCOMFIR.WATCFG3AQ = [0-47] WATCFG3AQ_CFG_WAT_EVENT_SEL = 0x800000000000 */ - rscom_and_or_for_chiplet(chip, id, WATCFG3AQ, ~PPC_BITMASK(0, 47), - PPC_PLACE(0x800000000000, WATCFG3AQ_CFG_WAT_EVENT_SEL, - WATCFG3AQ_CFG_WAT_EVENT_SEL_LEN)); + scom_and_or_for_chiplet(chip, id, WATCFG3AQ, ~PPC_BITMASK(0, 47), + PPC_PLACE(0x800000000000, WATCFG3AQ_CFG_WAT_EVENT_SEL, + WATCFG3AQ_CFG_WAT_EVENT_SEL_LEN)); /* MC01.MCBIST.MBA_SCOMFIR.WATCFG3BQ = [0-43] WATCFG3BQ_CFG_WAT_MSKA = 0xfffffffffff [44-60] WATCFG3BQ_CFG_WAT_CNTL = 0x10400 */ - rscom_and_or_for_chiplet(chip, id, WATCFG3BQ, ~PPC_BITMASK(0, 60), - PPC_PLACE(0xfffffffffff, WATCFG3BQ_CFG_WAT_MSKA, - WATCFG3BQ_CFG_WAT_MSKA_LEN) | - PPC_PLACE(0x10400, WATCFG3BQ_CFG_WAT_CNTL, - WATCFG3BQ_CFG_WAT_CNTL_LEN)); + scom_and_or_for_chiplet(chip, id, WATCFG3BQ, ~PPC_BITMASK(0, 60), + PPC_PLACE(0xfffffffffff, WATCFG3BQ_CFG_WAT_MSKA, + WATCFG3BQ_CFG_WAT_MSKA_LEN) | + PPC_PLACE(0x10400, WATCFG3BQ_CFG_WAT_CNTL, + WATCFG3BQ_CFG_WAT_CNTL_LEN)); /* MC01.MCBIST.MBA_SCOMFIR.MCBCFGQ = [36] MCBCFGQ_CFG_LOG_COUNTS_IN_TRACE = 0 */ - rscom_and_for_chiplet(chip, id, MCBCFGQ, ~PPC_BIT(MCBCFGQ_CFG_LOG_COUNTS_IN_TRACE)); + scom_and_for_chiplet(chip, id, MCBCFGQ, ~PPC_BIT(MCBCFGQ_CFG_LOG_COUNTS_IN_TRACE)); /* MC01.MCBIST.MBA_SCOMFIR.DBGCFG0Q = [0] DBGCFG0Q_CFG_DBG_ENABLE = 1 [23-33] DBGCFG0Q_CFG_DBG_PICK_MCBIST01 = 0x780 */ - rscom_and_or_for_chiplet(chip, id, DBGCFG0Q, ~PPC_BITMASK(23, 33), - PPC_BIT(DBGCFG0Q_CFG_DBG_ENABLE) | - PPC_PLACE(0x780, DBGCFG0Q_CFG_DBG_PICK_MCBIST01, - DBGCFG0Q_CFG_DBG_PICK_MCBIST01_LEN)); + scom_and_or_for_chiplet(chip, id, DBGCFG0Q, ~PPC_BITMASK(23, 33), + PPC_BIT(DBGCFG0Q_CFG_DBG_ENABLE) | + PPC_PLACE(0x780, DBGCFG0Q_CFG_DBG_PICK_MCBIST01, + DBGCFG0Q_CFG_DBG_PICK_MCBIST01_LEN)); /* MC01.MCBIST.MBA_SCOMFIR.DBGCFG1Q = [0] DBGCFG1Q_CFG_WAT_ENABLE = 1 */ - rscom_or_for_chiplet(chip, id, DBGCFG1Q, PPC_BIT(DBGCFG1Q_CFG_WAT_ENABLE)); + scom_or_for_chiplet(chip, id, DBGCFG1Q, PPC_BIT(DBGCFG1Q_CFG_WAT_ENABLE)); /* MC01.MCBIST.MBA_SCOMFIR.DBGCFG2Q = [0-19] DBGCFG2Q_CFG_WAT_LOC_EVENT0_SEL = 0x10000 [20-39] DBGCFG2Q_CFG_WAT_LOC_EVENT1_SEL = 0x08000 */ - rscom_and_or_for_chiplet(chip, id, DBGCFG2Q, ~PPC_BITMASK(0, 39), - PPC_PLACE(0x10000, DBGCFG2Q_CFG_WAT_LOC_EVENT0_SEL, - DBGCFG2Q_CFG_WAT_LOC_EVENT0_SEL_LEN) | - PPC_PLACE(0x08000, DBGCFG2Q_CFG_WAT_LOC_EVENT1_SEL, - DBGCFG2Q_CFG_WAT_LOC_EVENT1_SEL_LEN)); + scom_and_or_for_chiplet(chip, id, DBGCFG2Q, ~PPC_BITMASK(0, 39), + PPC_PLACE(0x10000, DBGCFG2Q_CFG_WAT_LOC_EVENT0_SEL, + DBGCFG2Q_CFG_WAT_LOC_EVENT0_SEL_LEN) | + PPC_PLACE(0x08000, DBGCFG2Q_CFG_WAT_LOC_EVENT1_SEL, + DBGCFG2Q_CFG_WAT_LOC_EVENT1_SEL_LEN)); /* MC01.MCBIST.MBA_SCOMFIR.DBGCFG3Q = [20-22] DBGCFG3Q_CFG_WAT_GLOB_EVENT0_SEL = 0x4 [23-25] DBGCFG3Q_CFG_WAT_GLOB_EVENT1_SEL = 0x4 [37-40] DBGCFG3Q_CFG_WAT_ACT_SET_SPATTN_PULSE = 0x4 */ - rscom_and_or_for_chiplet(chip, id, DBGCFG3Q, - ~(PPC_BITMASK(20, 25) | PPC_BITMASK(37, 40)), - PPC_PLACE(0x4, DBGCFG3Q_CFG_WAT_GLOB_EVENT0_SEL, - DBGCFG3Q_CFG_WAT_GLOB_EVENT0_SEL_LEN) | - PPC_PLACE(0x4, DBGCFG3Q_CFG_WAT_GLOB_EVENT1_SEL, - DBGCFG3Q_CFG_WAT_GLOB_EVENT1_SEL_LEN) | - PPC_PLACE(0x4, DBGCFG3Q_CFG_WAT_ACT_SET_SPATTN_PULSE, - DBGCFG3Q_CFG_WAT_ACT_SET_SPATTN_PULSE_LEN)); + scom_and_or_for_chiplet(chip, id, DBGCFG3Q, + ~(PPC_BITMASK(20, 25) | PPC_BITMASK(37, 40)), + PPC_PLACE(0x4, DBGCFG3Q_CFG_WAT_GLOB_EVENT0_SEL, + DBGCFG3Q_CFG_WAT_GLOB_EVENT0_SEL_LEN) | + PPC_PLACE(0x4, DBGCFG3Q_CFG_WAT_GLOB_EVENT1_SEL, + DBGCFG3Q_CFG_WAT_GLOB_EVENT1_SEL_LEN) | + PPC_PLACE(0x4, DBGCFG3Q_CFG_WAT_ACT_SET_SPATTN_PULSE, + DBGCFG3Q_CFG_WAT_ACT_SET_SPATTN_PULSE_LEN)); } static void set_rank_pairs(uint8_t chip, int mcs_i, int mca_i) diff --git a/src/soc/ibm/power9/istep_13_9.c b/src/soc/ibm/power9/istep_13_9.c index 9f133f3f4f3..07e9ba3e2a1 100644 --- a/src/soc/ibm/power9/istep_13_9.c +++ b/src/soc/ibm/power9/istep_13_9.c @@ -218,21 +218,21 @@ static void fir_unmask(uint8_t chip, int mcs_i) [13] MCBISTFIRQ_SCOM_RECOVERABLE_REG_PE = 0 // recoverable_error (0,1,0) [14] MCBISTFIRQ_SCOM_FATAL_REG_PE = 0 // checkstop (0,0,0) */ - rscom_and_or_for_chiplet(chip, id, MCBISTFIRACT0, - ~(PPC_BIT(MCBISTFIRQ_INTERNAL_FSM_ERROR) | - PPC_BIT(MCBISTFIRQ_SCOM_RECOVERABLE_REG_PE) | - PPC_BIT(MCBISTFIRQ_SCOM_FATAL_REG_PE)), - 0); - rscom_and_or_for_chiplet(chip, id, MCBISTFIRACT1, - ~(PPC_BIT(MCBISTFIRQ_INTERNAL_FSM_ERROR) | - PPC_BIT(MCBISTFIRQ_SCOM_RECOVERABLE_REG_PE) | - PPC_BIT(MCBISTFIRQ_SCOM_FATAL_REG_PE)), - PPC_BIT(MCBISTFIRQ_SCOM_RECOVERABLE_REG_PE)); - rscom_and_or_for_chiplet(chip, id, MCBISTFIRMASK, - ~(PPC_BIT(MCBISTFIRQ_INTERNAL_FSM_ERROR) | - PPC_BIT(MCBISTFIRQ_SCOM_RECOVERABLE_REG_PE) | - PPC_BIT(MCBISTFIRQ_SCOM_FATAL_REG_PE)), - 0); + scom_and_or_for_chiplet(chip, id, MCBISTFIRACT0, + ~(PPC_BIT(MCBISTFIRQ_INTERNAL_FSM_ERROR) | + PPC_BIT(MCBISTFIRQ_SCOM_RECOVERABLE_REG_PE) | + PPC_BIT(MCBISTFIRQ_SCOM_FATAL_REG_PE)), + 0); + scom_and_or_for_chiplet(chip, id, MCBISTFIRACT1, + ~(PPC_BIT(MCBISTFIRQ_INTERNAL_FSM_ERROR) | + PPC_BIT(MCBISTFIRQ_SCOM_RECOVERABLE_REG_PE) | + PPC_BIT(MCBISTFIRQ_SCOM_FATAL_REG_PE)), + PPC_BIT(MCBISTFIRQ_SCOM_RECOVERABLE_REG_PE)); + scom_and_or_for_chiplet(chip, id, MCBISTFIRMASK, + ~(PPC_BIT(MCBISTFIRQ_INTERNAL_FSM_ERROR) | + PPC_BIT(MCBISTFIRQ_SCOM_RECOVERABLE_REG_PE) | + PPC_BIT(MCBISTFIRQ_SCOM_FATAL_REG_PE)), + 0); for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { if (!mem_data[chip].mcs[mcs_i].mca[mca_i].functional) diff --git a/src/soc/ibm/power9/istep_14_1.c b/src/soc/ibm/power9/istep_14_1.c index f00168b31b8..f91538c2c03 100644 --- a/src/soc/ibm/power9/istep_14_1.c +++ b/src/soc/ibm/power9/istep_14_1.c @@ -47,8 +47,8 @@ static void fir_unmask(uint8_t chip, int mcs_i) MC01.MCBIST.MBA_SCOMFIR.MCBISTFIRACT1 [3] MCBISTFIRQ_MCBIST_BRODCAST_OUT_OF_SYNC = 0 // checkstop (0,0,0) */ - rscom_and_or_for_chiplet(chip, id, MCBISTFIRACT1, - ~PPC_BIT(MCBISTFIRQ_MCBIST_BRODCAST_OUT_OF_SYNC), 0); + scom_and_or_for_chiplet(chip, id, MCBISTFIRACT1, + ~PPC_BIT(MCBISTFIRQ_MCBIST_BRODCAST_OUT_OF_SYNC), 0); for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { uint64_t val; @@ -282,11 +282,11 @@ static void init_mcbist(uint8_t chip, int mcs_i) /* MC01.MCBIST.MBA_SCOMFIR.MCBSA0Q * [0-37] MCBSA0Q_CFG_START_ADDR_0 */ - write_rscom_for_chiplet(chip, id, MCBSA0Q, 0); + write_scom_for_chiplet(chip, id, MCBSA0Q, 0); /* MC01.MCBIST.MBA_SCOMFIR.MCBEA0Q * [0-37] MCBSA0Q_CFG_END_ADDR_0 */ - write_rscom_for_chiplet(chip, id, MCBEA0Q, PPC_BITMASK(3, 37)); + write_scom_for_chiplet(chip, id, MCBEA0Q, PPC_BITMASK(3, 37)); /* Hostboot stops MCBIST engine, die() if it is already started instead */ /* TODO: check all bits (MCBIST was ever started) or just "in progress"? */ @@ -295,7 +295,7 @@ static void init_mcbist(uint8_t chip, int mcs_i) * [1] MCB_CNTLSTATQ_MCB_DONE * [2] MCB_CNTLSTATQ_MCB_FAIL */ - if ((val = read_rscom_for_chiplet(chip, id, MCB_CNTLSTATQ)) != 0) + if ((val = read_scom_for_chiplet(chip, id, MCB_CNTLSTATQ)) != 0) die("MCBIST started already (%#16.16llx), this shouldn't happen\n", val); /* @@ -305,10 +305,10 @@ static void init_mcbist(uint8_t chip, int mcs_i) * - MBS Memory Scrub/Read Error Count Register 1 - MC01.MCBIST.MBA_SCOMFIR.MBSEC1Q * - MCBIST Fault Isolation Register - MC01.MCBIST.MBA_SCOMFIR.MCBISTFIRQ */ - write_rscom_for_chiplet(chip, id, MCBSTATQ, 0); - write_rscom_for_chiplet(chip, id, MBSEC0Q, 0); - write_rscom_for_chiplet(chip, id, MBSEC1Q, 0); - write_rscom_for_chiplet(chip, id, MCBISTFIR, 0); + write_scom_for_chiplet(chip, id, MCBSTATQ, 0); + write_scom_for_chiplet(chip, id, MBSEC0Q, 0); + write_scom_for_chiplet(chip, id, MBSEC1Q, 0); + write_scom_for_chiplet(chip, id, MCBISTFIR, 0); /* Enable FIFO mode */ set_fifo_mode(chip, mcs_i, 1); @@ -326,9 +326,9 @@ static void init_mcbist(uint8_t chip, int mcs_i) * [10] MCBAGRAQ_CFG_MAINT_ADDR_MODE_EN = 1 * [12] MCBAGRAQ_CFG_MAINT_DETECT_SRANK_BOUNDARIES = 1 */ - write_rscom_for_chiplet(chip, id, MCBAGRAQ, - PPC_BIT(MCBAGRAQ_CFG_MAINT_ADDR_MODE_EN) | - PPC_BIT(MCBAGRAQ_CFG_MAINT_DETECT_SRANK_BOUNDARIES)); + write_scom_for_chiplet(chip, id, MCBAGRAQ, + PPC_BIT(MCBAGRAQ_CFG_MAINT_ADDR_MODE_EN) | + PPC_BIT(MCBAGRAQ_CFG_MAINT_DETECT_SRANK_BOUNDARIES)); /* * Configure MCBIST @@ -351,16 +351,16 @@ static void init_mcbist(uint8_t chip, int mcs_i) * [57-58] MCBCFGQ_CFG_PAUSE_ON_ERROR_MODE = 0 for patterns, 0b10 for scrub * [63] MCBCFGQ_CFG_ENABLE_HOST_ATTN = see above */ - write_rscom_for_chiplet(chip, id, MCBCFGQ, - PPC_PLACE(0x2, MCBCFGQ_CFG_PAUSE_ON_ERROR_MODE, - MCBCFGQ_CFG_PAUSE_ON_ERROR_MODE_LEN)); + write_scom_for_chiplet(chip, id, MCBCFGQ, + PPC_PLACE(0x2, MCBCFGQ_CFG_PAUSE_ON_ERROR_MODE, + MCBCFGQ_CFG_PAUSE_ON_ERROR_MODE_LEN)); /* * This sets up memory parameters, mostly gaps between commands. For as fast * as possible, gaps of 0 are configured here. */ /* MC01.MCBIST.MBA_SCOMFIR.MCBPARMQ */ - write_rscom_for_chiplet(chip, id, MCBPARMQ, 0); + write_scom_for_chiplet(chip, id, MCBPARMQ, 0); /* * Steps done from this point should be moved out of this function, they @@ -371,7 +371,7 @@ static void init_mcbist(uint8_t chip, int mcs_i) /* Data pattern: 8 data registers + 1 ECC register */ /* TODO: different patterns can be used */ for (i = 0; i < 9; i++) { - write_rscom_for_chiplet(chip, id, MCBFD0Q + i, patterns[0][i]); + write_scom_for_chiplet(chip, id, MCBFD0Q + i, patterns[0][i]); } /* TODO: random seeds */ @@ -394,9 +394,9 @@ static void init_mcbist(uint8_t chip, int mcs_i) * inverting. */ /* MC01.MCBIST.MBA_SCOMFIR.MCBDRCRQ */ - write_rscom_for_chiplet(chip, id, MCBDRCRQ, 0); + write_scom_for_chiplet(chip, id, MCBDRCRQ, 0); /* MC01.MCBIST.MBA_SCOMFIR.MCBDRSRQ */ - write_rscom_for_chiplet(chip, id, MCBDRSRQ, 0); + write_scom_for_chiplet(chip, id, MCBDRSRQ, 0); /* * The following step may be done just once, as long as the same set of @@ -424,13 +424,13 @@ static void init_mcbist(uint8_t chip, int mcs_i) * [56] MBSTRQ_CFG_NCE_INTER_SYMBOL_COUNT_ENABLE } counts all NCE * [57] MBSTRQ_CFG_NCE_HARD_SYMBOL_COUNT_ENABLE / */ - write_rscom_for_chiplet(chip, id, MBSTRQ, PPC_BITMASK(0, 31) | - PPC_BIT(MBSTRQ_CFG_PAUSE_ON_MPE) | - PPC_BIT(MBSTRQ_CFG_PAUSE_ON_UE) | - PPC_BIT(MBSTRQ_CFG_PAUSE_ON_AUE) | - PPC_BIT(MBSTRQ_CFG_NCE_SOFT_SYMBOL_COUNT_ENABLE) | - PPC_BIT(MBSTRQ_CFG_NCE_INTER_SYMBOL_COUNT_ENABLE) | - PPC_BIT(MBSTRQ_CFG_NCE_HARD_SYMBOL_COUNT_ENABLE)); + write_scom_for_chiplet(chip, id, MBSTRQ, PPC_BITMASK(0, 31) | + PPC_BIT(MBSTRQ_CFG_PAUSE_ON_MPE) | + PPC_BIT(MBSTRQ_CFG_PAUSE_ON_UE) | + PPC_BIT(MBSTRQ_CFG_PAUSE_ON_AUE) | + PPC_BIT(MBSTRQ_CFG_NCE_SOFT_SYMBOL_COUNT_ENABLE) | + PPC_BIT(MBSTRQ_CFG_NCE_INTER_SYMBOL_COUNT_ENABLE) | + PPC_BIT(MBSTRQ_CFG_NCE_HARD_SYMBOL_COUNT_ENABLE)); } static void mss_memdiag(uint8_t chips) @@ -524,8 +524,8 @@ static void mss_memdiag(uint8_t chips) /* TODO: dump error/status registers on failure */ if (!time) { die("MCBIST%d of chip %d times out (%#16.16llx)\n", mcs_i, chip, - read_rscom_for_chiplet(chip, mcs_ids[mcs_i], - MCB_CNTLSTATQ)); + read_scom_for_chiplet(chip, mcs_ids[mcs_i], + MCB_CNTLSTATQ)); } /* Unmask mainline FIRs. */ diff --git a/src/soc/ibm/power9/istep_14_2.c b/src/soc/ibm/power9/istep_14_2.c index 05bc4e322bc..bb73c2adece 100644 --- a/src/soc/ibm/power9/istep_14_2.c +++ b/src/soc/ibm/power9/istep_14_2.c @@ -24,9 +24,9 @@ static void thermal_init(uint8_t chip) ~PPC_BITMASK(0, 45), PPC_BIT(10) | PPC_BIT(25) | PPC_BIT(37)); } - rscom_and_for_chiplet(chip, mcs_to_nest[mcs_ids[mcs_i]], - MCS_MCMODE0 + 0x80 * mcs_i, - PPC_BIT(21)); + scom_and_for_chiplet(chip, mcs_to_nest[mcs_ids[mcs_i]], + MCS_MCMODE0 + 0x80 * mcs_i, + PPC_BIT(21)); } } @@ -36,19 +36,19 @@ static void prog_mc_mode0(uint8_t chip, chiplet_id_t nest_target, size_t index) | PPC_BIT(MCS_MCMODE0_DISABLE_MC_PAIR_SYNC); uint64_t data = PPC_BIT(MCS_MCMODE0_DISABLE_MC_SYNC) | PPC_BIT(MCS_MCMODE0_DISABLE_MC_PAIR_SYNC); - rscom_and_or_for_chiplet(chip, nest_target, MCS_MCMODE0 + 0x80 * index, ~mask, - data & mask); + scom_and_or_for_chiplet(chip, nest_target, MCS_MCMODE0 + 0x80 * index, ~mask, + data & mask); } static void throttle_sync(uint8_t chip) { for (size_t mcs_i = 0; mcs_i < MCS_PER_PROC; ++mcs_i) prog_mc_mode0(chip, mcs_to_nest[mcs_ids[mcs_i]], mcs_i); - rscom_and_for_chiplet(chip, N3_CHIPLET_ID, MCS_MCSYNC, - ~PPC_BIT(MCS_MCSYNC_SYNC_GO_CH0)); - rscom_and_or_for_chiplet(chip, N3_CHIPLET_ID, MCS_MCSYNC, ~PPC_BIT(SUPER_SYNC_BIT), - PPC_BITMASK(0, 16)); - rscom_and_for_chiplet(chip, N3_CHIPLET_ID, MCS_MCSYNC, ~PPC_BIT(MBA_REFRESH_SYNC_BIT)); + scom_and_for_chiplet(chip, N3_CHIPLET_ID, MCS_MCSYNC, + ~PPC_BIT(MCS_MCSYNC_SYNC_GO_CH0)); + scom_and_or_for_chiplet(chip, N3_CHIPLET_ID, MCS_MCSYNC, ~PPC_BIT(SUPER_SYNC_BIT), + PPC_BITMASK(0, 16)); + scom_and_for_chiplet(chip, N3_CHIPLET_ID, MCS_MCSYNC, ~PPC_BIT(MBA_REFRESH_SYNC_BIT)); } void istep_14_2(uint8_t chips) diff --git a/src/soc/ibm/power9/istep_14_3.c b/src/soc/ibm/power9/istep_14_3.c index 7d33254444d..81c1f7d0e64 100644 --- a/src/soc/ibm/power9/istep_14_3.c +++ b/src/soc/ibm/power9/istep_14_3.c @@ -43,7 +43,7 @@ static void init_pecs(uint8_t chip, const uint8_t *iovalid_enable) bool node_pump_mode = false; uint8_t dd = get_dd(); - scratch_reg6 = read_rscom(chip, MBOX_SCRATCH_REG1 + 5); + scratch_reg6 = read_scom(chip, MBOX_SCRATCH_REG1 + 5); /* ATTR_PROC_FABRIC_PUMP_MODE, it's either node or group pump mode */ node_pump_mode = !(scratch_reg6 & PPC_BIT(MBOX_SCRATCH_REG6_GROUP_PUMP_MODE)); @@ -57,10 +57,10 @@ static void init_pecs(uint8_t chip, const uint8_t *iovalid_enable) * ATTR_FABRIC_ADDR_EXTENSION_GROUP_ID = 0 * ATTR_FABRIC_ADDR_EXTENSION_CHIP_ID = 0 */ - rscom_and_or_for_chiplet(chip, N2_CHIPLET_ID, - pec_addr(pec, P9N2_PEC_ADDREXTMASK_REG), - ~PPC_BITMASK(0, 6), - PPC_PLACE(0, 0, 7)); + scom_and_or_for_chiplet(chip, N2_CHIPLET_ID, + pec_addr(pec, P9N2_PEC_ADDREXTMASK_REG), + ~PPC_BITMASK(0, 6), + PPC_PLACE(0, 0, 7)); /* * Phase2 init step 1 @@ -78,8 +78,8 @@ static void init_pecs(uint8_t chip, const uint8_t *iovalid_enable) * scope */ - val = read_rscom_for_chiplet(chip, N2_CHIPLET_ID, - pec_addr(pec, PEC_PBCQHWCFG_REG)); + val = read_scom_for_chiplet(chip, N2_CHIPLET_ID, + pec_addr(pec, PEC_PBCQHWCFG_REG)); /* Set hang poll scale */ val &= ~PPC_BITMASK(0, 3); val |= PPC_PLACE(1, 0, 4); @@ -154,8 +154,8 @@ static void init_pecs(uint8_t chip, const uint8_t *iovalid_enable) if (pec == 1 || (pec == 2 && iovalid_enable[pec] != 0x4)) val |= PPC_BIT(PEC_PBCQHWCFG_REG_PE_DISABLE_TCE_ARBITRATION); - write_rscom_for_chiplet(chip, N2_CHIPLET_ID, pec_addr(pec, PEC_PBCQHWCFG_REG), - val); + write_scom_for_chiplet(chip, N2_CHIPLET_ID, pec_addr(pec, PEC_PBCQHWCFG_REG), + val); /* * Phase2 init step 2 @@ -169,9 +169,9 @@ static void init_pecs(uint8_t chip, const uint8_t *iovalid_enable) * Set bits 00:03 = 0b1001 Enable trace, and select * inbound operations with addr information */ - rscom_and_or_for_chiplet(chip, N2_CHIPLET_ID, pec_addr(pec, PEC_NESTTRC_REG), - ~PPC_BITMASK(0, 3), - PPC_PLACE(9, 0, 4)); + scom_and_or_for_chiplet(chip, N2_CHIPLET_ID, pec_addr(pec, PEC_NESTTRC_REG), + ~PPC_BITMASK(0, 3), + PPC_PLACE(9, 0, 4)); /* * Phase2 init step 4 @@ -194,7 +194,7 @@ static void init_pecs(uint8_t chip, const uint8_t *iovalid_enable) val |= PPC_BIT(PEC_PBAIBHWCFG_REG_PE_PCIE_CLK_TRACE_EN); val |= PPC_PLACE(7, PEC_AIB_HWCFG_OSBM_HOL_BLK_CNT, PEC_AIB_HWCFG_OSBM_HOL_BLK_CNT_LEN); - write_rscom_for_chiplet(chip, PCI0_CHIPLET_ID + pec, PEC_PBAIBHWCFG_REG, val); + write_scom_for_chiplet(chip, PCI0_CHIPLET_ID + pec, PEC_PBAIBHWCFG_REG, val); } } @@ -217,7 +217,7 @@ static void phb_write(uint8_t chip, uint8_t phb, uint64_t addr, uint64_t data) addr &= ~PPC_BITMASK(54, 57); addr |= PPC_PLACE(sat_id, 54, 4); - write_rscom_for_chiplet(chip, chiplet, addr, data); + write_scom_for_chiplet(chip, chiplet, addr, data); } /* See src/import/chips/p9/common/scominfo/p9_scominfo.C in Hostboot */ @@ -244,7 +244,7 @@ static void phb_nest_write(uint8_t chip, uint8_t phb, uint64_t addr, uint64_t da addr &= ~PPC_BITMASK(54, 57); addr |= PPC_PLACE(sat_id, 54, 4); - write_rscom_for_chiplet(chip, N2_CHIPLET_ID, addr, data); + write_scom_for_chiplet(chip, N2_CHIPLET_ID, addr, data); } static void init_phbs(uint8_t chip, uint8_t phb_active_mask, const uint8_t *iovalid_enable) diff --git a/src/soc/ibm/power9/istep_14_5.c b/src/soc/ibm/power9/istep_14_5.c index c4523342ee1..a555ee00eba 100644 --- a/src/soc/ibm/power9/istep_14_5.c +++ b/src/soc/ibm/power9/istep_14_5.c @@ -66,7 +66,7 @@ static void revert_mc_hb_dcbz_config(uint8_t chip) * Hostboot uses - bit 10 for MCS0/1 and bit 9 for MCS2/3. */ /* TP.TCNx.Nx.CPLT_CTRL1, x = {1,3} */ - val = read_rscom_for_chiplet(chip, nest, NEST_CPLT_CTRL1); + val = read_scom_for_chiplet(chip, nest, NEST_CPLT_CTRL1); if ((mcs_i == 0 && val & PPC_BIT(10)) || (mcs_i == 1 && val & PPC_BIT(9))) continue; @@ -79,9 +79,9 @@ static void revert_mc_hb_dcbz_config(uint8_t chip) [5-7] CHANNEL_0_GROUP_MEMBER_IDENTIFICATION = 0 // CHANNEL_1_GROUP_MEMBER_IDENTIFICATION not cleared? [13-23] GROUP_SIZE = 0 */ - rscom_and_or_for_chiplet(chip, nest, 0x0501080A + i * mul, - ~(PPC_BITMASK(0, 7) | PPC_BITMASK(13, 23)), - 0); + scom_and_or_for_chiplet(chip, nest, 0x0501080A + i * mul, + ~(PPC_BITMASK(0, 7) | PPC_BITMASK(13, 23)), + 0); /* MCMODE1 -- enable speculation, cmd bypass, fp command bypass MCS_n_MCMODE1 // undocumented, 0x05010812, 0x05010892, 0x03010812, 0x03010892 @@ -90,24 +90,24 @@ static void revert_mc_hb_dcbz_config(uint8_t chip) [54-60] DISABLE_COMMAND_BYPASS = 0 [61] DISABLE_FP_COMMAND_BYPASS = 0 */ - rscom_and_or_for_chiplet(chip, nest, 0x05010812 + i * mul, - ~(PPC_BITMASK(32, 51) | PPC_BITMASK(54, 61)), - PPC_PLACE(0x40, 33, 19)); + scom_and_or_for_chiplet(chip, nest, 0x05010812 + i * mul, + ~(PPC_BITMASK(32, 51) | PPC_BITMASK(54, 61)), + PPC_PLACE(0x40, 33, 19)); /* MCS_MCPERF1 -- enable fast path MCS_n_MCPERF1 // undocumented, 0x05010810, 0x05010890, 0x03010810, 0x03010890 [0] DISABLE_FASTPATH = 0 */ - rscom_and_or_for_chiplet(chip, nest, 0x05010810 + i * mul, - ~PPC_BIT(0), - 0); + scom_and_or_for_chiplet(chip, nest, 0x05010810 + i * mul, + ~PPC_BIT(0), + 0); /* Re-mask MCFIR. We want to ensure all MCSs are masked until the * BARs are opened later during IPL. MCS_n_MCFIRMASK_OR // undocumented, 0x05010805, 0x05010885, 0x03010805, 0x03010885 [all] 1 */ - write_rscom_for_chiplet(chip, nest, 0x05010805 + i * mul, ~0); + write_scom_for_chiplet(chip, nest, 0x05010805 + i * mul, ~0); } } } @@ -282,8 +282,8 @@ static void fir_unmask(uint8_t chip, int mcs_i) [0] MC_INTERNAL_RECOVERABLE_ERROR = 1 [8] COMMAND_LIST_TIMEOUT = 1 */ - write_rscom_for_chiplet(chip, nest, 0x05010807 + mcs_i * mul, - PPC_BIT(0) | PPC_BIT(8)); + write_scom_for_chiplet(chip, nest, 0x05010807 + mcs_i * mul, + PPC_BIT(0) | PPC_BIT(8)); /* MCS_MCFIRMASK (AND) // undocumented, 0x05010804 [all] 1 @@ -294,16 +294,16 @@ static void fir_unmask(uint8_t chip, int mcs_i) [5] INVALID_ADDRESS = 0 [8] COMMAND_LIST_TIMEOUT = 0 */ - write_rscom_for_chiplet(chip, nest, 0x05010804 + mcs_i * mul, - ~(PPC_BIT(0) | PPC_BIT(1) | PPC_BIT(2) | - PPC_BIT(4) | PPC_BIT(5) | PPC_BIT(8))); + write_scom_for_chiplet(chip, nest, 0x05010804 + mcs_i * mul, + ~(PPC_BIT(0) | PPC_BIT(1) | PPC_BIT(2) | + PPC_BIT(4) | PPC_BIT(5) | PPC_BIT(8))); } static void mcd_fir_mask(uint8_t chip) { /* These are set always for N1 chiplet only. */ - write_rscom_for_chiplet(chip, N1_CHIPLET_ID, MCD1_FIR_MASK_REG, ~0); - write_rscom_for_chiplet(chip, N1_CHIPLET_ID, MCD0_FIR_MASK_REG, ~0); + write_scom_for_chiplet(chip, N1_CHIPLET_ID, MCD1_FIR_MASK_REG, ~0); + write_scom_for_chiplet(chip, N1_CHIPLET_ID, MCD0_FIR_MASK_REG, ~0); } static void proc_setup_bars(uint8_t chip) @@ -325,10 +325,10 @@ static void proc_setup_bars(uint8_t chip) * 'mcfgp_regs', last two are for setting up memory hole and SMF, they * are unused now. */ - write_rscom_for_chiplet(chip, nest, 0x0501080A, mcfgp_regs[mcs_i][0]); - write_rscom_for_chiplet(chip, nest, 0x0501080C, mcfgp_regs[mcs_i][1]); - write_rscom_for_chiplet(chip, nest, 0x0501080B, 0); - write_rscom_for_chiplet(chip, nest, 0x0501080D, 0); + write_scom_for_chiplet(chip, nest, 0x0501080A, mcfgp_regs[mcs_i][0]); + write_scom_for_chiplet(chip, nest, 0x0501080C, mcfgp_regs[mcs_i][1]); + write_scom_for_chiplet(chip, nest, 0x0501080B, 0); + write_scom_for_chiplet(chip, nest, 0x0501080D, 0); } mcd_fir_mask(chip); diff --git a/src/soc/ibm/power9/istep_18_11.c b/src/soc/ibm/power9/istep_18_11.c index 114a2f235d2..c71477817da 100644 --- a/src/soc/ibm/power9/istep_18_11.c +++ b/src/soc/ibm/power9/istep_18_11.c @@ -156,8 +156,8 @@ static uint32_t calculate_topology_delay(uint8_t chip, uint8_t chips, uint8_t md */ /* For some reason this is a write, not RMW */ - write_rscom(chip, PU_PB_ELINK_RT_DELAY_CTL_REG, PPC_BITMASK(2, 3)); - uint64_t bus_mode_reg = read_rscom(chip, PU_PB_ELINK_DLY_0123_REG); + write_scom(chip, PU_PB_ELINK_RT_DELAY_CTL_REG, PPC_BITMASK(2, 3)); + uint64_t bus_mode_reg = read_scom(chip, PU_PB_ELINK_DLY_0123_REG); uint32_t bus_delay = (((bus_mode_reg & BUS_DELAY_47) >> 16) + (bus_mode_reg & BUS_DELAY_63)) / 2; @@ -192,34 +192,34 @@ static uint32_t calculate_topology_delay(uint8_t chip, uint8_t chips, uint8_t md static void calculate_m_path(uint8_t chip, uint8_t mdmt) { uint64_t dual_edge_disable = - (read_rscom(chip, PERV_ROOT_CTRL8_SCOM) & + (read_scom(chip, PERV_ROOT_CTRL8_SCOM) & PPC_BIT(PERV_ROOT_CTRL8_TP_PLL_CLKIN_SEL9_DC)) ? PPC_BIT(PERV_TOD_M_PATH_CTRL_REG_STEP_CREATE_DUAL_EDGE_DISABLE) : 0; if (chip == mdmt) { - rscom_and_or(chip, PERV_TOD_M_PATH_CTRL_REG, - ~(PPC_BIT(M_PATH_0_OSC_NOT_VALID) | - PPC_BIT(M_PATH_0_STEP_ALIGN_DISABLE) | - PPC_BIT(PERV_TOD_M_PATH_CTRL_REG_STEP_CREATE_DUAL_EDGE_DISABLE) | - PPC_PLACE(0x7, M_PATH_0_STEP_CHECK_VALIDITY_COUNT_OFFSET, - M_PATH_0_STEP_CHECK_VALIDITY_COUNT_LEN) | - PPC_PLACE(0x7, M_PATH_SYNC_CREATE_SPS_SELECT_OFFSET, - M_PATH_SYNC_CREATE_SPS_SELECT_LEN) | - PPC_PLACE(0xF, M_PATH_0_STEP_CHECK_CPS_DEVIATION_OFFSET, - M_PATH_0_STEP_CHECK_CPS_DEVIATION_LEN) | - PPC_PLACE(0x3, M_PATH_STEP_CHECK_CPS_DEVIATION_FACTOR_OFFSET, - M_PATH_STEP_CHECK_CPS_DEVIATION_FACTOR_LEN)), - PPC_BIT(M_PATH_1_OSC_NOT_VALID) | - PPC_PLACE(0x8, M_PATH_0_STEP_CHECK_CPS_DEVIATION_OFFSET, - M_PATH_0_STEP_CHECK_CPS_DEVIATION_LEN) | - PPC_PLACE(0x3, M_PATH_0_STEP_CHECK_VALIDITY_COUNT_OFFSET, - M_PATH_0_STEP_CHECK_VALIDITY_COUNT_LEN) | - dual_edge_disable); + scom_and_or(chip, PERV_TOD_M_PATH_CTRL_REG, + ~(PPC_BIT(M_PATH_0_OSC_NOT_VALID) | + PPC_BIT(M_PATH_0_STEP_ALIGN_DISABLE) | + PPC_BIT(PERV_TOD_M_PATH_CTRL_REG_STEP_CREATE_DUAL_EDGE_DISABLE) | + PPC_PLACE(0x7, M_PATH_0_STEP_CHECK_VALIDITY_COUNT_OFFSET, + M_PATH_0_STEP_CHECK_VALIDITY_COUNT_LEN) | + PPC_PLACE(0x7, M_PATH_SYNC_CREATE_SPS_SELECT_OFFSET, + M_PATH_SYNC_CREATE_SPS_SELECT_LEN) | + PPC_PLACE(0xF, M_PATH_0_STEP_CHECK_CPS_DEVIATION_OFFSET, + M_PATH_0_STEP_CHECK_CPS_DEVIATION_LEN) | + PPC_PLACE(0x3, M_PATH_STEP_CHECK_CPS_DEVIATION_FACTOR_OFFSET, + M_PATH_STEP_CHECK_CPS_DEVIATION_FACTOR_LEN)), + PPC_BIT(M_PATH_1_OSC_NOT_VALID) | + PPC_PLACE(0x8, M_PATH_0_STEP_CHECK_CPS_DEVIATION_OFFSET, + M_PATH_0_STEP_CHECK_CPS_DEVIATION_LEN) | + PPC_PLACE(0x3, M_PATH_0_STEP_CHECK_VALIDITY_COUNT_OFFSET, + M_PATH_0_STEP_CHECK_VALIDITY_COUNT_LEN) | + dual_edge_disable); } else { - rscom_and_or(chip, PERV_TOD_M_PATH_CTRL_REG, - ~PPC_BIT(PERV_TOD_M_PATH_CTRL_REG_STEP_CREATE_DUAL_EDGE_DISABLE), - dual_edge_disable); + scom_and_or(chip, PERV_TOD_M_PATH_CTRL_REG, + ~PPC_BIT(PERV_TOD_M_PATH_CTRL_REG_STEP_CREATE_DUAL_EDGE_DISABLE), + dual_edge_disable); } } @@ -228,14 +228,14 @@ static void configure_tod(uint8_t chip, uint8_t chips, uint8_t pri_mdmt, uint8_t uint32_t topology_delay = calculate_topology_delay(chip, chips, pri_mdmt); /* Clear previous primary topology */ - write_rscom(chip, PERV_TOD_PRI_PORT_0_CTRL_REG, 0); - write_rscom(chip, PERV_TOD_SEC_PORT_0_CTRL_REG, 0); + write_scom(chip, PERV_TOD_PRI_PORT_0_CTRL_REG, 0); + write_scom(chip, PERV_TOD_SEC_PORT_0_CTRL_REG, 0); /* Workaround for HW480181: Init remote sync checker tolerance to maximum * [26-27] REMOTE_SYNC_CHECK_CPS_DEVIATION_FACTOR = 0x3 (factor 8) * [28-31] REMOTE_SYNC_CHECK_CPS_DEVIATION = 0xF (93.75%) */ - rscom_or(chip, PERV_TOD_S_PATH_CTRL_REG, PPC_PLACE(0x3, 26, 2) | PPC_PLACE(0xF, 28, 4)); + scom_or(chip, PERV_TOD_S_PATH_CTRL_REG, PPC_PLACE(0x3, 26, 2) | PPC_PLACE(0xF, 28, 4)); /* * Set PSS_MSS_CTRL_REG for primary configuration, assumption: @@ -245,10 +245,10 @@ static void configure_tod(uint8_t chip, uint8_t chips, uint8_t pri_mdmt, uint8_t * [2] PRI_M_S_DRAWER_SELECT = 1 (drawer is master) */ if (chip == pri_mdmt) { - rscom_and_or(chip, PERV_TOD_PSS_MSS_CTRL_REG, - ~PPC_BIT(0), PPC_BIT(1) | PPC_BIT(2)); + scom_and_or(chip, PERV_TOD_PSS_MSS_CTRL_REG, + ~PPC_BIT(0), PPC_BIT(1) | PPC_BIT(2)); } else { - rscom_and(chip, PERV_TOD_PSS_MSS_CTRL_REG, ~PPC_BIT(1)); + scom_and(chip, PERV_TOD_PSS_MSS_CTRL_REG, ~PPC_BIT(1)); } /* @@ -264,11 +264,11 @@ static void configure_tod(uint8_t chip, uint8_t chips, uint8_t pri_mdmt, uint8_t * [32-39] TOD_S_PATH_CTRL_REG_REMOTE_SYNC_MISS_COUNT_2 = 0x5 */ if (chip != pri_mdmt) { - rscom_and_or(chip, PERV_TOD_S_PATH_CTRL_REG, - ~(PPC_BIT(0) | PPC_BITMASK(6, 11) | PPC_BITMASK(13, 15) | - PPC_BITMASK(26, 39)), - PPC_PLACE(0xC, 8, 4) | PPC_PLACE(0x3, 13, 3) | - PPC_PLACE(0xC, 28, 4) | PPC_PLACE(0x5, 32, 8)); + scom_and_or(chip, PERV_TOD_S_PATH_CTRL_REG, + ~(PPC_BIT(0) | PPC_BITMASK(6, 11) | PPC_BITMASK(13, 15) | + PPC_BITMASK(26, 39)), + PPC_PLACE(0xC, 8, 4) | PPC_PLACE(0x3, 13, 3) | + PPC_PLACE(0xC, 28, 4) | PPC_PLACE(0x5, 32, 8)); } /* @@ -287,12 +287,12 @@ static void configure_tod(uint8_t chip, uint8_t chips, uint8_t pri_mdmt, uint8_t int tx_sel = (chip == pri_mdmt ? 2 /*m_path_0*/ : 0); uint64_t tx_en = (chip == pri_mdmt ? PPC_BIT(21) : 0); - rscom_and_or(chip, PERV_TOD_PRI_PORT_0_CTRL_REG, - ~(PPC_BITMASK(0, 2) | PPC_BITMASK(6, 7) | PPC_BIT(21)), - PPC_PLACE(rx_sel, 0, 3) | PPC_PLACE(tx_sel, 6, 2) | tx_en); - rscom_and_or(chip, PERV_TOD_SEC_PORT_0_CTRL_REG, - ~(PPC_BITMASK(0, 2) | PPC_BITMASK(6, 7) | PPC_BIT(21)), - PPC_PLACE(rx_sel, 0, 3) | PPC_PLACE(tx_sel, 6, 2) | tx_en); + scom_and_or(chip, PERV_TOD_PRI_PORT_0_CTRL_REG, + ~(PPC_BITMASK(0, 2) | PPC_BITMASK(6, 7) | PPC_BIT(21)), + PPC_PLACE(rx_sel, 0, 3) | PPC_PLACE(tx_sel, 6, 2) | tx_en); + scom_and_or(chip, PERV_TOD_SEC_PORT_0_CTRL_REG, + ~(PPC_BITMASK(0, 2) | PPC_BITMASK(6, 7) | PPC_BIT(21)), + PPC_PLACE(rx_sel, 0, 3) | PPC_PLACE(tx_sel, 6, 2) | tx_en); } /* Configure M_PATH_CTRL_REG (primary) */ @@ -312,11 +312,11 @@ static void configure_tod(uint8_t chip, uint8_t chips, uint8_t pri_mdmt, uint8_t * [8-11] I_PATH_STEP_CHECK_CPS_DEVIATION = 0xF (93.75%) * [13-15] I_PATH_STEP_CHECK_VALIDITY_COUNT = 0x3 (count = 8) */ - rscom_and_or(chip, PERV_TOD_PRI_PORT_0_CTRL_REG, - ~PPC_BITMASK(32, 39), PPC_PLACE(topology_delay, 32, 8)); - rscom_and_or(chip, PERV_TOD_I_PATH_CTRL_REG, - ~(PPC_BIT(0) | PPC_BIT(1) | PPC_BITMASK(6, 11) | PPC_BITMASK(13, 15)), - PPC_PLACE(0xF, 8, 4) | PPC_PLACE(0x3, 13, 3)); + scom_and_or(chip, PERV_TOD_PRI_PORT_0_CTRL_REG, + ~PPC_BITMASK(32, 39), PPC_PLACE(topology_delay, 32, 8)); + scom_and_or(chip, PERV_TOD_I_PATH_CTRL_REG, + ~(PPC_BIT(0) | PPC_BIT(1) | PPC_BITMASK(6, 11) | PPC_BITMASK(13, 15)), + PPC_PLACE(0xF, 8, 4) | PPC_PLACE(0x3, 13, 3)); /* Configure INIT_CHIP_CTRL_REG (primary) */ /* [1-3] I_PATH_CORE_SYNC_PERIOD_SELECT = 0 (core sync period is 8us) @@ -327,9 +327,9 @@ static void configure_tod(uint8_t chip, uint8_t chips, uint8_t pri_mdmt, uint8_t * [10-15] LOW_ORDER_STEP_VALUE = 0x3F (4-bit WOF counter is incremented with each 200MHz clock cycle) * [30] XSTOP_GATE = 0 (stop TOD on checkstop) */ - rscom_and_or(chip, PERV_TOD_CHIP_CTRL_REG, - ~(PPC_BITMASK(1, 4) | PPC_BITMASK(7, 15) | PPC_BIT(30)), - PPC_PLACE(0x3F, 10, 6)); + scom_and_or(chip, PERV_TOD_CHIP_CTRL_REG, + ~(PPC_BITMASK(1, 4) | PPC_BITMASK(7, 15) | PPC_BIT(30)), + PPC_PLACE(0x3F, 10, 6)); /* TODO: test if we can skip repeated writes (M_PATH, I_PATH, CHIP) */ @@ -337,8 +337,8 @@ static void configure_tod(uint8_t chip, uint8_t chips, uint8_t pri_mdmt, uint8_t /* Clear previous secondary topology */ /* NOTE: order is swapped wrt primary, does it matter? */ - write_rscom(chip, PERV_TOD_SEC_PORT_1_CTRL_REG, 0); - write_rscom(chip, PERV_TOD_PRI_PORT_1_CTRL_REG, 0); + write_scom(chip, PERV_TOD_SEC_PORT_1_CTRL_REG, 0); + write_scom(chip, PERV_TOD_PRI_PORT_1_CTRL_REG, 0); /* * Set PSS_MSS_CTRL_REG for secondary configuration, assumptions as before @@ -348,10 +348,10 @@ static void configure_tod(uint8_t chip, uint8_t chips, uint8_t pri_mdmt, uint8_t * [10] SEC_M_S_DRAWER_SELECT = 1 (drawer is master) */ if (chip == sec_mdmt) { - rscom_and_or(chip, PERV_TOD_PSS_MSS_CTRL_REG, ~(PPC_BIT(0) | PPC_BIT(8)), - PPC_BIT(9) | PPC_BIT(10)); + scom_and_or(chip, PERV_TOD_PSS_MSS_CTRL_REG, ~(PPC_BIT(0) | PPC_BIT(8)), + PPC_BIT(9) | PPC_BIT(10)); } else { - rscom_and(chip, PERV_TOD_PSS_MSS_CTRL_REG, ~PPC_BIT(9)); + scom_and(chip, PERV_TOD_PSS_MSS_CTRL_REG, ~PPC_BIT(9)); } /* @@ -366,11 +366,11 @@ static void configure_tod(uint8_t chip, uint8_t chips, uint8_t pri_mdmt, uint8_t * [32-39] TOD_S_PATH_CTRL_REG_REMOTE_SYNC_MISS_COUNT_2 = 0x5 */ if (chip != sec_mdmt) { - rscom_and_or(chip, PERV_TOD_S_PATH_CTRL_REG, - ~(PPC_BIT(0) | PPC_BITMASK(6, 7) | PPC_BITMASK(16, 19) | - PPC_BITMASK(21, 23) | PPC_BITMASK(32, 39)), - PPC_BIT(4) | PPC_PLACE(0xC, 16, 4) | PPC_PLACE(0x3, 21, 3) | - PPC_PLACE(0x5, 32, 8)); + scom_and_or(chip, PERV_TOD_S_PATH_CTRL_REG, + ~(PPC_BIT(0) | PPC_BITMASK(6, 7) | PPC_BITMASK(16, 19) | + PPC_BITMASK(21, 23) | PPC_BITMASK(32, 39)), + PPC_BIT(4) | PPC_PLACE(0xC, 16, 4) | PPC_PLACE(0x3, 21, 3) | + PPC_PLACE(0x5, 32, 8)); } /* Configure PORT_CTRL_REGs (secondary), assumptions as above. @@ -386,12 +386,12 @@ static void configure_tod(uint8_t chip, uint8_t chips, uint8_t pri_mdmt, uint8_t int tx_sel = (chip == sec_mdmt ? 2 /*m_path_0*/ : 0); uint64_t tx_en = (chip == sec_mdmt ? PPC_BIT(21) : 0); - rscom_and_or(chip, PERV_TOD_PRI_PORT_1_CTRL_REG, - ~(PPC_BITMASK(0, 2) | PPC_BITMASK(6, 7) | PPC_BIT(21)), - PPC_PLACE(rx_sel, 0, 3) | PPC_PLACE(tx_sel, 6, 2) | tx_en); - rscom_and_or(chip, PERV_TOD_SEC_PORT_1_CTRL_REG, - ~(PPC_BITMASK(0, 2) | PPC_BITMASK(6, 7) | PPC_BIT(21)), - PPC_PLACE(rx_sel, 0, 3) | PPC_PLACE(tx_sel, 6, 2) | tx_en); + scom_and_or(chip, PERV_TOD_PRI_PORT_1_CTRL_REG, + ~(PPC_BITMASK(0, 2) | PPC_BITMASK(6, 7) | PPC_BIT(21)), + PPC_PLACE(rx_sel, 0, 3) | PPC_PLACE(tx_sel, 6, 2) | tx_en); + scom_and_or(chip, PERV_TOD_SEC_PORT_1_CTRL_REG, + ~(PPC_BITMASK(0, 2) | PPC_BITMASK(6, 7) | PPC_BIT(21)), + PPC_PLACE(rx_sel, 0, 3) | PPC_PLACE(tx_sel, 6, 2) | tx_en); } /* Configure M_PATH_CTRL_REG (secondary) */ @@ -407,11 +407,11 @@ static void configure_tod(uint8_t chip, uint8_t chips, uint8_t pri_mdmt, uint8_t * [8-11] I_PATH_STEP_CHECK_CPS_DEVIATION = 0xF (93.75%) * [13-15] I_PATH_STEP_CHECK_VALIDITY_COUNT = 0x3 (count = 8) */ - rscom_and_or(chip, PERV_TOD_SEC_PORT_0_CTRL_REG, - ~PPC_BITMASK(32, 39), PPC_PLACE(topology_delay, 32, 8)); - rscom_and_or(chip, PERV_TOD_I_PATH_CTRL_REG, - ~(PPC_BIT(0) | PPC_BIT(1) | PPC_BITMASK(6, 11) | PPC_BITMASK(13, 15)), - PPC_PLACE(0xF, 8, 4) | PPC_PLACE(0x3, 13, 3)); + scom_and_or(chip, PERV_TOD_SEC_PORT_0_CTRL_REG, + ~PPC_BITMASK(32, 39), PPC_PLACE(topology_delay, 32, 8)); + scom_and_or(chip, PERV_TOD_I_PATH_CTRL_REG, + ~(PPC_BIT(0) | PPC_BIT(1) | PPC_BITMASK(6, 11) | PPC_BITMASK(13, 15)), + PPC_PLACE(0xF, 8, 4) | PPC_PLACE(0x3, 13, 3)); /* Configure INIT_CHIP_CTRL_REG (secondary) */ /* [1-3] I_PATH_CORE_SYNC_PERIOD_SELECT = 0 (core sync period is 8us) @@ -422,14 +422,14 @@ static void configure_tod(uint8_t chip, uint8_t chips, uint8_t pri_mdmt, uint8_t * [10-15] LOW_ORDER_STEP_VALUE = 0x3F (4-bit WOF counter is incremented with each 200MHz clock cycle) * [30] XSTOP_GATE = 0 (stop TOD on checkstop) */ - rscom_and_or(chip, PERV_TOD_CHIP_CTRL_REG, - ~(PPC_BITMASK(1, 4) | PPC_BITMASK(7, 15) | PPC_BIT(30)), - PPC_PLACE(0x3F, 10, 6)); + scom_and_or(chip, PERV_TOD_CHIP_CTRL_REG, + ~(PPC_BITMASK(1, 4) | PPC_BITMASK(7, 15) | PPC_BIT(30)), + PPC_PLACE(0x3F, 10, 6)); } static int core_count(uint8_t chip) { - uint64_t cores = read_rscom(chip, 0x0006C090); + uint64_t cores = read_scom(chip, 0x0006C090); return __builtin_popcount((uint32_t)cores) + __builtin_popcount(cores >> 32); } diff --git a/src/soc/ibm/power9/istep_18_12.c b/src/soc/ibm/power9/istep_18_12.c index 375a5514a1b..efe41f9248a 100644 --- a/src/soc/ibm/power9/istep_18_12.c +++ b/src/soc/ibm/power9/istep_18_12.c @@ -72,34 +72,34 @@ static void init_tod_node(uint8_t chips, uint8_t mdmt) */ for (chip = 0; chip < MAX_CHIPS; chip++) { if (chips & (1 << chip)) - write_rscom(chip, PERV_TOD_ERROR_REG, ~0); + write_scom(chip, PERV_TOD_ERROR_REG, ~0); } /* Configure MDMT */ /* Chip TOD step checkers enable */ - write_rscom(mdmt, PERV_TOD_TX_TTYPE_2_REG, - PPC_BIT(PERV_TOD_TX_TTYPE_REG_TRIGGER)); + write_scom(mdmt, PERV_TOD_TX_TTYPE_2_REG, + PPC_BIT(PERV_TOD_TX_TTYPE_REG_TRIGGER)); /* Switch local Chip TOD to 'Not Set' state */ - write_rscom(mdmt, PERV_TOD_LOAD_TOD_MOD_REG, - PPC_BIT(PERV_TOD_LOAD_TOD_MOD_REG_FSM_TRIGGER)); + write_scom(mdmt, PERV_TOD_LOAD_TOD_MOD_REG, + PPC_BIT(PERV_TOD_LOAD_TOD_MOD_REG_FSM_TRIGGER)); /* Switch all Chip TOD in the system to 'Not Set' state */ - write_rscom(mdmt, PERV_TOD_TX_TTYPE_5_REG, - PPC_BIT(PERV_TOD_TX_TTYPE_REG_TRIGGER)); + write_scom(mdmt, PERV_TOD_TX_TTYPE_5_REG, + PPC_BIT(PERV_TOD_TX_TTYPE_REG_TRIGGER)); /* Chip TOD load value (move TB to TOD) */ - write_rscom(mdmt, PERV_TOD_LOAD_TOD_REG, - PPC_PLACE(0x3FF, 0, 60) | PPC_PLACE(0xC, 60, 4)); + write_scom(mdmt, PERV_TOD_LOAD_TOD_REG, + PPC_PLACE(0x3FF, 0, 60) | PPC_PLACE(0xC, 60, 4)); /* Chip TOD start_tod (switch local Chip TOD to 'Running' state) */ - write_rscom(mdmt, PERV_TOD_START_TOD_REG, - PPC_BIT(PERV_TOD_START_TOD_REG_FSM_TRIGGER)); + write_scom(mdmt, PERV_TOD_START_TOD_REG, + PPC_BIT(PERV_TOD_START_TOD_REG_FSM_TRIGGER)); /* Send local Chip TOD value to all Chip TODs */ - write_rscom(mdmt, PERV_TOD_TX_TTYPE_4_REG, - PPC_BIT(PERV_TOD_TX_TTYPE_REG_TRIGGER)); + write_scom(mdmt, PERV_TOD_TX_TTYPE_4_REG, + PPC_BIT(PERV_TOD_TX_TTYPE_REG_TRIGGER)); /* In case of larger topology, replace loops with a recursion */ for (chip = 0; chip < MAX_CHIPS; chip++) { @@ -109,30 +109,30 @@ static void init_tod_node(uint8_t chips, uint8_t mdmt) continue; /* Wait until TOD is running */ - if (!wait_us(1000, read_rscom(chip, PERV_TOD_FSM_REG) & + if (!wait_us(1000, read_scom(chip, PERV_TOD_FSM_REG) & PPC_BIT(PERV_TOD_FSM_REG_IS_RUNNING))) { printk(BIOS_ERR, "PERV_TOD_ERROR_REG = %#16.16llx\n", - read_rscom(chip, PERV_TOD_ERROR_REG)); + read_scom(chip, PERV_TOD_ERROR_REG)); die("Error on chip#%d: TOD is not running!\n", chip); } /* Clear TTYPE#2, TTYPE#4, and TTYPE#5 status */ - write_rscom(chip, PERV_TOD_ERROR_REG, - PPC_BIT(PERV_TOD_ERROR_REG_RX_TTYPE_0 + 2) | - PPC_BIT(PERV_TOD_ERROR_REG_RX_TTYPE_0 + 4) | - PPC_BIT(PERV_TOD_ERROR_REG_RX_TTYPE_0 + 5)); + write_scom(chip, PERV_TOD_ERROR_REG, + PPC_BIT(PERV_TOD_ERROR_REG_RX_TTYPE_0 + 2) | + PPC_BIT(PERV_TOD_ERROR_REG_RX_TTYPE_0 + 4) | + PPC_BIT(PERV_TOD_ERROR_REG_RX_TTYPE_0 + 5)); /* Check for real errors */ - error_reg = read_rscom(chip, PERV_TOD_ERROR_REG); + error_reg = read_scom(chip, PERV_TOD_ERROR_REG); if (error_reg != 0) { printk(BIOS_ERR, "PERV_TOD_ERROR_REG = %#16.16llx\n", error_reg); die("Error: TOD initialization failed!\n"); } /* Set error mask to runtime configuration (mask TTYPE informational bits) */ - write_rscom(chip, PERV_TOD_ERROR_MASK_REG, - PPC_BITMASK(PERV_TOD_ERROR_MASK_REG_RX_TTYPE_0, - PERV_TOD_ERROR_MASK_REG_RX_TTYPE_0 + 5)); + write_scom(chip, PERV_TOD_ERROR_MASK_REG, + PPC_BITMASK(PERV_TOD_ERROR_MASK_REG_RX_TTYPE_0, + PERV_TOD_ERROR_MASK_REG_RX_TTYPE_0 + 5)); } } diff --git a/src/soc/ibm/power9/istep_8_1.c b/src/soc/ibm/power9/istep_8_1.c index 1dc46497ce1..eca0f0c54aa 100644 --- a/src/soc/ibm/power9/istep_8_1.c +++ b/src/soc/ibm/power9/istep_8_1.c @@ -173,7 +173,7 @@ static void setup_sbe_config(uint8_t chip) scratch = PPC_SHIFT(read_cfam(chip, MBOX_SCRATCH_REG1_FSI + 2), 31); - boot_flags = (read_rscom(0, MBOX_SCRATCH_REG1 + 2) >> 32); + boot_flags = (read_scom(0, MBOX_SCRATCH_REG1 + 2) >> 32); risk_level = (get_dd() < 0x23 ? 0 : 4); /* Note that the two fields overlap (boot flags include risk level), so @@ -280,7 +280,7 @@ static void setup_sbe_config(uint8_t chip) static int get_master_sbe_boot_seeprom(void) { enum { PERV_SB_CS_SCOM = 0x00050008 }; - return (read_rscom(0, PERV_SB_CS_SCOM) & SBE_BOOT_SELECT_MASK) ? 1 : 0; + return (read_scom(0, PERV_SB_CS_SCOM) & SBE_BOOT_SELECT_MASK) ? 1 : 0; } static void set_sbe_boot_seeprom(uint8_t chip, int seeprom_side) diff --git a/src/soc/ibm/power9/mcbist.c b/src/soc/ibm/power9/mcbist.c index 155098dc5d7..02ff6aed3ac 100644 --- a/src/soc/ibm/power9/mcbist.c +++ b/src/soc/ibm/power9/mcbist.c @@ -71,7 +71,7 @@ static void commit_mcbist_memreg_cache(uint8_t chip, int mcs_i) die("Too many MCBIST instructions added\n"); /* MC01.MCBIST.MBA_SCOMFIR.MCBMRQ */ - write_rscom_for_chiplet(chip, id, MCBMR0Q + reg, mcbist_memreg_cache[chip]); + write_scom_for_chiplet(chip, id, MCBMR0Q + reg, mcbist_memreg_cache[chip]); mcbist_memreg_cache[chip] = 0; } @@ -164,7 +164,7 @@ void mcbist_execute(uint8_t chip, int mcs_i) /* Check if in progress */ /* TODO: we could force it to stop, but dying will help with debugging */ - if ((val = read_rscom_for_chiplet(chip, id, MCB_CNTLSTATQ)) & + if ((val = read_scom_for_chiplet(chip, id, MCB_CNTLSTATQ)) & PPC_BIT(MCB_CNTLSTATQ_MCB_IP)) die("MCBIST in progress already (%#16.16llx), this shouldn't happen\n", val); @@ -180,17 +180,17 @@ void mcbist_execute(uint8_t chip, int mcs_i) /* MC01.MCBIST.MBA_SCOMFIR.MCB_CNTLQ * [0] MCB_CNTLQ_MCB_START */ - rscom_and_or_for_chiplet(chip, id, MCB_CNTLQ, ~0, PPC_BIT(MCB_CNTLQ_MCB_START)); + scom_and_or_for_chiplet(chip, id, MCB_CNTLQ, ~0, PPC_BIT(MCB_CNTLQ_MCB_START)); /* Wait for MCBIST to start. Test for IP and DONE, it may finish early. */ - if (((val = read_rscom_for_chiplet(chip, id, MCB_CNTLSTATQ)) & + if (((val = read_scom_for_chiplet(chip, id, MCB_CNTLSTATQ)) & (PPC_BIT(MCB_CNTLSTATQ_MCB_IP) | PPC_BIT(MCB_CNTLSTATQ_MCB_DONE))) == 0) { /* * TODO: how long do we want to wait? Hostboot uses 10*100us polling, * but so far it seems to always be already started on the first read. */ udelay(1); - if (((val = read_rscom_for_chiplet(chip, id, MCB_CNTLSTATQ)) & + if (((val = read_scom_for_chiplet(chip, id, MCB_CNTLSTATQ)) & (PPC_BIT(MCB_CNTLSTATQ_MCB_IP) | PPC_BIT(MCB_CNTLSTATQ_MCB_DONE))) == 0) die("MCBIST failed (%#16.16llx) to start twice\n", val); @@ -209,7 +209,7 @@ void mcbist_execute(uint8_t chip, int mcs_i) int mcbist_is_done(uint8_t chip, int mcs_i) { chiplet_id_t id = mcs_ids[mcs_i]; - uint64_t val = val = read_rscom_for_chiplet(chip, id, MCB_CNTLSTATQ); + uint64_t val = val = read_scom_for_chiplet(chip, id, MCB_CNTLSTATQ); /* Still in progress */ if (val & PPC_BIT(MCB_CNTLSTATQ_MCB_IP)) diff --git a/src/soc/ibm/power9/occ.c b/src/soc/ibm/power9/occ.c index 38d83914815..ae0422b95a7 100644 --- a/src/soc/ibm/power9/occ.c +++ b/src/soc/ibm/power9/occ.c @@ -104,14 +104,14 @@ struct occ_poll_response { static void pm_ocb_setup(uint8_t chip, uint32_t ocb_bar) { - write_rscom(chip, PU_OCB_PIB_OCBCSR0_OR, PPC_BIT(OCB_PIB_OCBCSR0_OCB_STREAM_MODE)); - write_rscom(chip, PU_OCB_PIB_OCBCSR0_CLEAR, PPC_BIT(OCB_PIB_OCBCSR0_OCB_STREAM_TYPE)); - write_rscom(chip, PU_OCB_PIB_OCBAR0, (uint64_t)ocb_bar << 32); + write_scom(chip, PU_OCB_PIB_OCBCSR0_OR, PPC_BIT(OCB_PIB_OCBCSR0_OCB_STREAM_MODE)); + write_scom(chip, PU_OCB_PIB_OCBCSR0_CLEAR, PPC_BIT(OCB_PIB_OCBCSR0_OCB_STREAM_TYPE)); + write_scom(chip, PU_OCB_PIB_OCBAR0, (uint64_t)ocb_bar << 32); } static void check_ocb_mode(uint8_t chip, uint64_t ocb_csr_address, uint64_t ocb_shcs_address) { - uint64_t ocb_pib = read_rscom(chip, ocb_csr_address); + uint64_t ocb_pib = read_scom(chip, ocb_csr_address); /* * The following check for circular mode is an additional check @@ -124,7 +124,7 @@ static void check_ocb_mode(uint8_t chip, uint64_t ocb_csr_address, uint64_t ocb_ * anyway to let the PIB error response return occur. (That is * what will happen if this checking code were not here.) */ - uint64_t stream_push_ctrl = read_rscom(chip, ocb_shcs_address); + uint64_t stream_push_ctrl = read_scom(chip, ocb_shcs_address); if (stream_push_ctrl & PPC_BIT(OCB_OCI_OCBSHCS0_PUSH_ENABLE)) { uint8_t counter = 0; @@ -133,7 +133,7 @@ static void check_ocb_mode(uint8_t chip, uint64_t ocb_csr_address, uint64_t ocb_ if (!(stream_push_ctrl & PPC_BIT(OCB_OCI_OCBSHCS0_PUSH_FULL))) break; - stream_push_ctrl = read_rscom(chip, ocb_shcs_address); + stream_push_ctrl = read_scom(chip, ocb_shcs_address); } if (counter == 4) @@ -145,20 +145,20 @@ static void check_ocb_mode(uint8_t chip, uint64_t ocb_csr_address, uint64_t ocb_ static void put_ocb_indirect(uint8_t chip, uint32_t ocb_req_length, uint32_t oci_address, uint64_t *ocb_buffer) { - write_rscom(chip, PU_OCB_PIB_OCBAR0, (uint64_t)oci_address << 32); + write_scom(chip, PU_OCB_PIB_OCBAR0, (uint64_t)oci_address << 32); check_ocb_mode(chip, PU_OCB_PIB_OCBCSR0_RO, PU_OCB_OCI_OCBSHCS0_SCOM); for (uint32_t index = 0; index < ocb_req_length; index++) - write_rscom(chip, PU_OCB_PIB_OCBDR0, ocb_buffer[index]); + write_scom(chip, PU_OCB_PIB_OCBDR0, ocb_buffer[index]); } static void get_ocb_indirect(uint8_t chip, uint32_t ocb_req_length, uint32_t oci_address, uint64_t *ocb_buffer) { - write_rscom(chip, PU_OCB_PIB_OCBAR0, (uint64_t)oci_address << 32); + write_scom(chip, PU_OCB_PIB_OCBAR0, (uint64_t)oci_address << 32); for (uint32_t loopCount = 0; loopCount < ocb_req_length; loopCount++) - ocb_buffer[loopCount] = read_rscom(chip, PU_OCB_PIB_OCBDR0); + ocb_buffer[loopCount] = read_scom(chip, PU_OCB_PIB_OCBDR0); } static void write_occ_sram(uint8_t chip, uint32_t address, uint64_t *buffer, size_t data_length) @@ -176,7 +176,7 @@ static void read_occ_sram(uint8_t chip, uint32_t address, uint64_t *buffer, size static void write_occ_command(uint8_t chip, uint64_t write_data) { check_ocb_mode(chip, PU_OCB_PIB_OCBCSR1_RO, PU_OCB_OCI_OCBSHCS1_SCOM); - write_rscom(chip, PU_OCB_PIB_OCBDR1, write_data); + write_scom(chip, PU_OCB_PIB_OCBDR1, write_data); } void clear_occ_special_wakeups(uint8_t chip, uint64_t cores) @@ -184,8 +184,8 @@ void clear_occ_special_wakeups(uint8_t chip, uint64_t cores) for (size_t i = 0; i < MAX_CORES_PER_CHIP; i += 2) { if (!IS_EX_FUNCTIONAL(i, cores)) continue; - rscom_and_for_chiplet(chip, EC00_CHIPLET_ID + i, EX_PPM_SPWKUP_OCC, - ~PPC_BIT(0)); + scom_and_for_chiplet(chip, EC00_CHIPLET_ID + i, EX_PPM_SPWKUP_OCC, + ~PPC_BIT(0)); } } @@ -197,9 +197,9 @@ void special_occ_wakeup_disable(uint8_t chip, uint64_t cores) if (!IS_EC_FUNCTIONAL(i, cores)) continue; - write_rscom_for_chiplet(chip, EC00_CHIPLET_ID + i, PPM_SPWKUP_FSP, 0); + write_scom_for_chiplet(chip, EC00_CHIPLET_ID + i, PPM_SPWKUP_FSP, 0); /* This puts an inherent delay in the propagation of the reset transition */ - (void)read_rscom_for_chiplet(chip, EC00_CHIPLET_ID + i, PPM_SPWKUP_FSP); + (void)read_scom_for_chiplet(chip, EC00_CHIPLET_ID + i, PPM_SPWKUP_FSP); } } @@ -246,21 +246,21 @@ void occ_start_from_mem(uint8_t chip) PU_OCB_PIB_OCR_OR = 0x0006D002, }; - write_rscom(chip, PU_OCB_PIB_OCBCSR0_OR, PPC_BIT(OCB_PIB_OCBCSR0_OCB_STREAM_MODE)); + write_scom(chip, PU_OCB_PIB_OCBCSR0_OR, PPC_BIT(OCB_PIB_OCBCSR0_OCB_STREAM_MODE)); /* * Set up Boot Vector Registers in SRAM: * - set bv0-2 to all 0's (illegal instructions) * - set bv3 to proper branch instruction */ - write_rscom(chip, PU_SRAM_SRBV0_SCOM, 0); - write_rscom(chip, PU_SRAM_SRBV0_SCOM + 1, 0); - write_rscom(chip, PU_SRAM_SRBV0_SCOM + 2, 0); - write_rscom(chip, PU_SRAM_SRBV0_SCOM + 3, setup_memory_boot(chip)); - - write_rscom(chip, PU_JTG_PIB_OJCFG_AND, ~PPC_BIT(JTG_PIB_OJCFG_DBG_HALT_BIT)); - write_rscom(chip, PU_OCB_PIB_OCR_OR, PPC_BIT(OCB_PIB_OCR_CORE_RESET_BIT)); - write_rscom(chip, PU_OCB_PIB_OCR_CLEAR, PPC_BIT(OCB_PIB_OCR_CORE_RESET_BIT)); + write_scom(chip, PU_SRAM_SRBV0_SCOM, 0); + write_scom(chip, PU_SRAM_SRBV0_SCOM + 1, 0); + write_scom(chip, PU_SRAM_SRBV0_SCOM + 2, 0); + write_scom(chip, PU_SRAM_SRBV0_SCOM + 3, setup_memory_boot(chip)); + + write_scom(chip, PU_JTG_PIB_OJCFG_AND, ~PPC_BIT(JTG_PIB_OJCFG_DBG_HALT_BIT)); + write_scom(chip, PU_OCB_PIB_OCR_OR, PPC_BIT(OCB_PIB_OCR_CORE_RESET_BIT)); + write_scom(chip, PU_OCB_PIB_OCR_CLEAR, PPC_BIT(OCB_PIB_OCR_CORE_RESET_BIT)); } /* Wait for OCC to reach communications checkpoint */ @@ -1311,15 +1311,15 @@ void pm_occ_fir_init(uint8_t chip) | PPC_BIT(SRAM_WRITE_ERR) | PPC_BIT(SRT_FSM_ERR) | PPC_BIT(STOP_RCV_NOTIFY_PRD) | PPC_BIT(C405_ECC_UE); - uint64_t mask = read_rscom(chip, PERV_TP_OCC_SCOM_OCCLFIR + MASK_INCR); + uint64_t mask = read_scom(chip, PERV_TP_OCC_SCOM_OCCLFIR + MASK_INCR); mask &= ~action0_bits; mask &= ~action1_bits; - write_rscom(chip, PERV_TP_OCC_SCOM_OCCLFIR, 0); - write_rscom(chip, PERV_TP_OCC_SCOM_OCCLFIR + ACTION0_INCR, action0_bits); - write_rscom(chip, PERV_TP_OCC_SCOM_OCCLFIR + ACTION1_INCR, action1_bits); - write_rscom(chip, PERV_TP_OCC_SCOM_OCCLFIR + MASK_WOR_INCR, mask); - write_rscom(chip, PERV_TP_OCC_SCOM_OCCLFIR + MASK_WAND_INCR, mask); + write_scom(chip, PERV_TP_OCC_SCOM_OCCLFIR, 0); + write_scom(chip, PERV_TP_OCC_SCOM_OCCLFIR + ACTION0_INCR, action0_bits); + write_scom(chip, PERV_TP_OCC_SCOM_OCCLFIR + ACTION1_INCR, action1_bits); + write_scom(chip, PERV_TP_OCC_SCOM_OCCLFIR + MASK_WOR_INCR, mask); + write_scom(chip, PERV_TP_OCC_SCOM_OCCLFIR + MASK_WAND_INCR, mask); } void pm_pba_fir_init(uint8_t chip) @@ -1390,9 +1390,9 @@ void pm_pba_fir_init(uint8_t chip) mask &= ~action0_bits; mask &= ~action1_bits; - write_rscom(chip, PU_PBAFIR, 0); - write_rscom(chip, PU_PBAFIR + ACTION0_INCR, action0_bits); - write_rscom(chip, PU_PBAFIR + ACTION1_INCR, action1_bits); - write_rscom(chip, PU_PBAFIR + MASK_WOR_INCR, mask); - write_rscom(chip, PU_PBAFIR + MASK_WAND_INCR, mask); + write_scom(chip, PU_PBAFIR, 0); + write_scom(chip, PU_PBAFIR + ACTION0_INCR, action0_bits); + write_scom(chip, PU_PBAFIR + ACTION1_INCR, action1_bits); + write_scom(chip, PU_PBAFIR + MASK_WOR_INCR, mask); + write_scom(chip, PU_PBAFIR + MASK_WAND_INCR, mask); } diff --git a/src/soc/ibm/power9/powerbus.c b/src/soc/ibm/power9/powerbus.c index df7f7228829..68ce0178b2d 100644 --- a/src/soc/ibm/power9/powerbus.c +++ b/src/soc/ibm/power9/powerbus.c @@ -167,7 +167,7 @@ static void calculate_epsilons(struct powerbus_cfg *cfg) uint32_t i; - uint64_t scratch_reg6 = read_rscom(0, MBOX_SCRATCH_REG1 + 5); + uint64_t scratch_reg6 = read_scom(0, MBOX_SCRATCH_REG1 + 5); /* ATTR_PROC_FABRIC_PUMP_MODE, it's either node or group pump mode */ bool node_pump_mode = !(scratch_reg6 & PPC_BIT(MBOX_SCRATCH_REG6_GROUP_PUMP_MODE)); diff --git a/src/soc/ibm/power9/romstage.c b/src/soc/ibm/power9/romstage.c index df0e6ed77f6..8414d5abf05 100644 --- a/src/soc/ibm/power9/romstage.c +++ b/src/soc/ibm/power9/romstage.c @@ -447,13 +447,13 @@ void main(void) timestamp_add_now(TS_INITRAM_END); /* Test if SCOM still works. Maybe should check also indirect access? */ - printk(BIOS_DEBUG, "0xF000F = %llx\n", read_rscom(0, 0xF000F)); + printk(BIOS_DEBUG, "0xF000F = %llx\n", read_scom(0, 0xF000F)); /* * Halt to give a chance to inspect FIRs, otherwise checkstops from * ramstage may cover up the failure in romstage. */ - if (read_rscom(0, 0xF000F) == 0xFFFFFFFFFFFFFFFF) + if (read_scom(0, 0xF000F) == 0xFFFFFFFFFFFFFFFF) die("SCOM stopped working, check FIRs, halting now\n"); cbmem_initialize_empty(); diff --git a/src/soc/ibm/power9/xbus.c b/src/soc/ibm/power9/xbus.c index 8d6bd7af3c8..be53808de18 100644 --- a/src/soc/ibm/power9/xbus.c +++ b/src/soc/ibm/power9/xbus.c @@ -36,12 +36,12 @@ void put_scom(uint8_t chip, uint64_t addr, uint64_t data) { addr = xbus_addr(/*xbus=*/1, addr); - write_rscom(chip, addr, data); + write_scom(chip, addr, data); } uint64_t get_scom(uint8_t chip, uint64_t addr) { addr = xbus_addr(/*xbus=*/1, addr); - return read_rscom(chip, addr); + return read_scom(chip, addr); } diff --git a/src/soc/ibm/power9/xive.c b/src/soc/ibm/power9/xive.c index 6cc631ae072..8d6ac06e52a 100644 --- a/src/soc/ibm/power9/xive.c +++ b/src/soc/ibm/power9/xive.c @@ -41,32 +41,32 @@ void configure_xive(int core) memcpy((void *)0xEA0, hyp_virt_int, CODE_SIZE(hyp_virt_int)); /* IVPE BAR + enable bit */ - write_rscom(0, 0x05013012, IVPE_BAR | PPC_BIT(0)); + write_scom(0, 0x05013012, IVPE_BAR | PPC_BIT(0)); /* FSP BAR */ - write_rscom(0, 0x0501290B, FSP_BAR); + write_scom(0, 0x0501290B, FSP_BAR); /* PSI HB BAR + enable bit */ /* TODO: check if 2 separate writes are required */ - write_rscom(0, 0x0501290A, PSI_HB_BAR); - write_rscom(0, 0x0501290A, PSI_HB_BAR | PPC_BIT(63)); + write_scom(0, 0x0501290A, PSI_HB_BAR); + write_scom(0, 0x0501290A, PSI_HB_BAR | PPC_BIT(63)); /* Disable VPC Pull error */ - rscom_and(0, 0x05013179, ~PPC_BIT(30)); + scom_and(0, 0x05013179, ~PPC_BIT(30)); /* PSI HB ESB BAR + enable bit */ /* TODO: check if 2 separate writes are required */ - write_rscom(0, 0x05012916, PSI_HB_ESB_BAR); - write_rscom(0, 0x05012916, PSI_HB_ESB_BAR | PPC_BIT(63)); + write_scom(0, 0x05012916, PSI_HB_ESB_BAR); + write_scom(0, 0x05012916, PSI_HB_ESB_BAR | PPC_BIT(63)); /* XIVE IC BAR + enable bit */ - write_rscom(0, 0x05013010, XIVE_IC_BAR | PPC_BIT(0)); + write_scom(0, 0x05013010, XIVE_IC_BAR | PPC_BIT(0)); /* Set HB mode on P3PC register */ - rscom_or(0, 0x05013110, PPC_BIT(33)); + scom_or(0, 0x05013110, PPC_BIT(33)); /* Disable PSI interrupts */ - write_rscom(0, 0x05012913, PPC_BIT(3)); + write_scom(0, 0x05012913, PPC_BIT(3)); void *esb_bar = (void *)PSI_HB_ESB_BAR; /* Mask all interrupt sources */ From 3eb8b3b0874b10b0f5cf2d7c703b46bd1a0e6c18 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Sat, 16 Apr 2022 19:21:48 +0300 Subject: [PATCH 187/213] soc/power9/: remove XBus-specific SCOM functions Only provide address modification function and use it with generic SCOM accessors. Change-Id: Ib4df9dfd59685e3ad313d45a77267aff9e36d644 Signed-off-by: Sergii Dmytruk --- src/soc/ibm/power9/Makefile.inc | 1 - src/soc/ibm/power9/istep_10_1.c | 79 +++++++++--------- src/soc/ibm/power9/istep_8_10.c | 144 ++++++++++++++++---------------- src/soc/ibm/power9/istep_8_11.c | 6 +- src/soc/ibm/power9/istep_8_9.c | 89 ++++++++++---------- src/soc/ibm/power9/istep_9_2.c | 60 ++++++------- src/soc/ibm/power9/istep_9_4.c | 28 ++++--- src/soc/ibm/power9/istep_9_6.c | 2 +- src/soc/ibm/power9/istep_9_7.c | 18 ++-- src/soc/ibm/power9/xbus.c | 47 ----------- src/soc/ibm/power9/xbus.h | 77 +++++------------ 11 files changed, 237 insertions(+), 314 deletions(-) delete mode 100644 src/soc/ibm/power9/xbus.c diff --git a/src/soc/ibm/power9/Makefile.inc b/src/soc/ibm/power9/Makefile.inc index 2ef412b044f..ce6978f9cd9 100644 --- a/src/soc/ibm/power9/Makefile.inc +++ b/src/soc/ibm/power9/Makefile.inc @@ -44,7 +44,6 @@ romstage-y += mcbist.c romstage-y += timer.c romstage-y += fsi.c romstage-y += sbeio.c -romstage-y += xbus.c romstage-y += xscom.c ramstage-y += chip.c ramstage-y += homer.c diff --git a/src/soc/ibm/power9/istep_10_1.c b/src/soc/ibm/power9/istep_10_1.c index 5ecedefe1e1..7bcb18dc3db 100644 --- a/src/soc/ibm/power9/istep_10_1.c +++ b/src/soc/ibm/power9/istep_10_1.c @@ -9,7 +9,6 @@ #include #include "fsi.h" -#include "xbus.h" enum build_smp_adu_action { SWITCH_AB = 1, @@ -173,7 +172,7 @@ static void p9_fbc_cd_hp1_scom(uint8_t chip, bool is_xbus_active) uint64_t tmp; val = PPC_PLACE(is_xbus_active ? 0x08 : 0x06, 54, 5) | PPC_PLACE(0x03, 59, 5); - put_scom(chip, 0x90000CB205012011, val); + write_scom(chip, 0x90000CB205012011, val); tmp = 0; if (100 * xbus_freq_mhz >= 120 * pb_freq_mhz) @@ -185,61 +184,61 @@ static void p9_fbc_cd_hp1_scom(uint8_t chip, bool is_xbus_active) else if (125 * xbus_freq_mhz >= 100 * pb_freq_mhz) tmp = 0x0C; val = PPC_PLACE(tmp, 54, 5) | PPC_PLACE(3, 59, 5); - put_scom(chip, 0x90000CB305012011, val); + write_scom(chip, 0x90000CB305012011, val); val = PPC_PLACE(0x10, 51, 5) | PPC_PLACE(2, 58, 2) | PPC_PLACE(is_xbus_active ? 0xF : 0x8, 60, 4); - put_scom(chip, 0x90000CDB05011C11, val); + write_scom(chip, 0x90000CDB05011C11, val); val = PPC_PLACE(7, 49, 3) | PPC_PLACE(4, 52, 6); - put_scom(chip, 0x90000CF405011C11, val); + write_scom(chip, 0x90000CF405011C11, val); val = PPC_PLACE(0xC, 45, 4) | PPC_PLACE(1, 57, 2); - put_scom(chip, 0x90000D3F05011C11, val); + write_scom(chip, 0x90000D3F05011C11, val); val = PPC_PLACE(3, 41, 2) | PPC_PLACE(1, 43, 2) | PPC_PLACE(3, 45, 4) | PPC_PLACE(0xC0, 49, 8); - put_scom(chip, 0x90000D7805011C11, val); + write_scom(chip, 0x90000D7805011C11, val); val = PPC_PLACE(8, 38, 4) | PPC_PLACE(4, 42, 4) | PPC_PLACE(1, 57, 3) | PPC_PLACE(is_xbus_active ? 0xF : 0x8, 60, 4); - put_scom(chip, 0x90000DAA05011C11, val); + write_scom(chip, 0x90000DAA05011C11, val); val = PPC_PLACE(4, 36, 3) | PPC_PLACE(0x20, 41, 8) | PPC_BIT(49) | PPC_BIT(51) | PPC_BIT(52) | PPC_BIT(53) | PPC_BIT(55) | PPC_BIT(56) | PPC_BIT(57) | PPC_PLACE(is_xbus_active ? 0xF : 0x8, 60, 4); - put_scom(chip, 0x90000DCC05011C11, val); + write_scom(chip, 0x90000DCC05011C11, val); val = PPC_PLACE(1, 41, 3) | PPC_PLACE(1, 44, 3) | PPC_PLACE(2, 47, 3) | PPC_PLACE(3, 50, 3) | PPC_PLACE(5, 53, 3) | PPC_PLACE(5, 57, 3); - put_scom(chip, 0x90000E0605011C11, val); + write_scom(chip, 0x90000E0605011C11, val); val = PPC_PLACE(0x06, 33, 5) | PPC_PLACE(0x0D, 38, 5) | PPC_PLACE(0x1E, 48, 5) | PPC_PLACE(0x19, 53, 5) | PPC_BIT(63); - put_scom(chip, 0x90000E4305011C11, val); + write_scom(chip, 0x90000E4305011C11, val); val = PPC_PLACE(0x400, 22, 12) | PPC_PLACE(0x400, 34, 12) | PPC_PLACE(2, 46, 3) | PPC_PLACE(2, 49, 3) | PPC_PLACE(2, 52, 3) | PPC_PLACE(2, 55, 3) | PPC_PLACE(2, 58, 3) | PPC_PLACE(2, 61, 3); - put_scom(chip, 0x90000EA205011C11, val); + write_scom(chip, 0x90000EA205011C11, val); /* 44 - set because ATTR_CHIP_EC_FEATURE_HW409019 == 1 */ val = PPC_PLACE(0x0C, 20, 8) | PPC_BIT(44); - put_scom(chip, 0x90000EC705011C11, val); + write_scom(chip, 0x90000EC705011C11, val); val = PPC_PLACE(0x4, 18, 10) | PPC_PLACE(0x141, 28, 12) | PPC_PLACE(0x21B, 40, 12) | PPC_PLACE(0x30D, 52, 12); - put_scom(chip, 0x90000EE105011C11, val); + write_scom(chip, 0x90000EE105011C11, val); val = PPC_PLACE(1, 25, 3) | PPC_PLACE(1, 28, 3) | PPC_PLACE(2, 31, 3) | PPC_PLACE(3, 34, 3) | PPC_PLACE(5, 37, 3) | PPC_PLACE(1, 49, 3) | PPC_PLACE(1, 52, 3) | PPC_PLACE(2, 55, 3) | PPC_PLACE(3, 58, 3) | PPC_PLACE(5, 61, 3); - put_scom(chip, 0x90000F0505011C11, val); + write_scom(chip, 0x90000F0505011C11, val); val = PPC_PLACE(0x7, 14, 10) | PPC_PLACE(0x5, 24, 10) | PPC_PLACE(0x5, 34, 10) | PPC_PLACE(0x4, 44, 10) | PPC_PLACE(0x5, 54, 10); - put_scom(chip, 0x90000F2005011C11, val); + write_scom(chip, 0x90000F2005011C11, val); val = PPC_BIT(20) | PPC_PLACE(3, 32, 2) | PPC_PLACE(7, 34, 3) | PPC_PLACE(3, 37, 2) | PPC_PLACE(1, 41, 1) | PPC_PLACE(1, 42, 1); @@ -247,15 +246,15 @@ static void p9_fbc_cd_hp1_scom(uint8_t chip, bool is_xbus_active) val |= PPC_PLACE(3, 24, 2) | PPC_PLACE(3, 44, 2); tmp = (pb_cfg->core_ceiling_ratio == FABRIC_CORE_CEILING_RATIO_RATIO_8_8 ? 3 : 2); val |= PPC_PLACE(tmp, 28, 2); - put_scom(chip, 0x90000F4005011811, val); - put_scom(chip, 0x90000F4005012011, val); + write_scom(chip, 0x90000F4005011811, val); + write_scom(chip, 0x90000F4005012011, val); val = PPC_BIT(12) | PPC_PLACE(4, 13, 4) | PPC_PLACE(4, 17, 4) | PPC_PLACE(4, 21, 4) | PPC_PLACE(1, 25, 3) | PPC_PLACE(1, 28, 3) | PPC_PLACE(1, 31, 3) | PPC_PLACE(0xFE, 34, 8) | PPC_PLACE(0xFE, 42, 8) | PPC_PLACE(1, 50, 2) | PPC_PLACE(2, 54, 3) | PPC_PLACE(2, 57, 2) | PPC_BIT(60) | PPC_BIT(61) | PPC_BIT(63); - put_scom(chip, 0x90000F4D05011C11, val); + write_scom(chip, 0x90000F4D05011C11, val); val = PPC_BIT(35) | PPC_PLACE(1, 36, 2) | PPC_PLACE(2, 39, 2) | PPC_BIT(49) | PPC_PLACE(1, 51, 2); @@ -276,8 +275,8 @@ static void p9_fbc_cd_hp1_scom(uint8_t chip, bool is_xbus_active) tmp = 2; val |= PPC_PLACE(tmp, 44, 2); - put_scom(chip, 0x90000E6105011811, val); - put_scom(chip, 0x90000E6105012011, val); + write_scom(chip, 0x90000E6105011811, val); + write_scom(chip, 0x90000E6105012011, val); } /* @@ -293,14 +292,14 @@ static void p9_fbc_cd_hp23_scom(uint8_t chip, bool is_xbus_active, int seq) val = PPC_PLACE(8, 38, 4) | PPC_PLACE(4, 42, 4) | PPC_PLACE(tmp, 50, 1) | PPC_PLACE(1, 57, 3) | PPC_PLACE((seq == 2 && is_xbus_active) ? 0xF : 0x8, 60, 4); - put_scom(chip, 0x90000DAA05011C11, val); + write_scom(chip, 0x90000DAA05011C11, val); val = PPC_BIT(12) | PPC_PLACE(4, 13, 4) | PPC_PLACE(4, 17, 4) | PPC_PLACE(4, 21, 4) | PPC_PLACE(1, 25, 3) | PPC_PLACE(1, 28, 3) | PPC_PLACE(1, 31, 3) | PPC_PLACE(0xFE, 34, 8) | PPC_PLACE(0xFE, 42, 8) | PPC_PLACE(1, 50, 2) | PPC_PLACE(2, 54, 3) | PPC_PLACE(2, 57, 2) | PPC_PLACE(tmp, 59, 1) | PPC_PLACE(tmp, 60, 1) | PPC_BIT(61) | PPC_BIT(63); - put_scom(chip, 0x90000F4D05011C11, val); + write_scom(chip, 0x90000F4D05011C11, val); } /* Set action which will occur on fabric pmisc switch command */ @@ -315,16 +314,16 @@ static void p9_adu_coherent_utils_set_switch_action(uint8_t chip, enum adu_op ad if (adu_op == PRE_SWITCH_CD) data |= PPC_BIT(PU_SND_MODE_REG_ENABLE_PB_SWITCH_CD); - and_or_scom(chip, PU_SND_MODE_REG, ~mask, data); + scom_and_or(chip, PU_SND_MODE_REG, ~mask, data); } static void p9_adu_coherent_utils_check_fbc_state(uint8_t chip) { /* PU_PB_CENT_SM0_PB_CENT_MODE_PB_CENT_PBIXXX_INIT */ - if (!(get_scom(chip, PU_PB_CENT_SM0_PB_CENT_MODE) & PPC_BIT(0))) + if (!(read_scom(chip, PU_PB_CENT_SM0_PB_CENT_MODE) & PPC_BIT(0))) die("FBC isn't initialized!\n"); - if (get_scom(chip, PU_SND_MODE_REG) & PPC_BIT(PU_SND_MODE_REG_PB_STOP)) + if (read_scom(chip, PU_SND_MODE_REG) & PPC_BIT(PU_SND_MODE_REG_PB_STOP)) die("FBC isn't running!\n"); } @@ -338,7 +337,7 @@ static void lock_adu(uint8_t chip) data |= PPC_BIT(PU_ALTD_CMD_REG_FBC_CLEAR_STATUS); /* Write ADU command register to attempt lock manipulation */ - put_scom(chip, PU_ALTD_CMD_REG, data); + write_scom(chip, PU_ALTD_CMD_REG, data); } /* Setup the value for ADU option register to enable quiesce & init around a @@ -367,7 +366,7 @@ static void set_quiesce_init(uint8_t chip) /* Setup workaround for HW397129 to re-enable fastpath for DD2 */ data |= PPC_BIT(PU_ALTD_OPTION_REG_FBC_ALTD_HW397129); - put_scom(chip, PU_ALTD_OPTION_REG, data); + write_scom(chip, PU_ALTD_OPTION_REG, data); } static void p9_adu_coherent_setup_adu(uint8_t chip, enum adu_op adu_op) @@ -378,7 +377,7 @@ static void p9_adu_coherent_setup_adu(uint8_t chip, enum adu_op adu_op) /* Write the address. Not sure if operations we support actually need * this. */ - put_scom(chip, PU_ALTD_ADDR_REG, 0); + write_scom(chip, PU_ALTD_ADDR_REG, 0); /* This routine assumes the lock is held by the caller, preserve this * locked state */ @@ -411,7 +410,7 @@ static void p9_adu_coherent_setup_adu(uint8_t chip, enum adu_op adu_op) PPC_INSERT(cmd, ttype, PU_ALTD_CMD_REG_FBC_TTYPE, PU_ALTD_CMD_REG_FBC_TTYPE_LEN); PPC_INSERT(cmd, tsize, PU_ALTD_CMD_REG_FBC_TSIZE, PU_ALTD_CMD_REG_FBC_TSIZE_LEN); - put_scom(chip, PU_ALTD_CMD_REG, cmd); + write_scom(chip, PU_ALTD_CMD_REG, cmd); } static void p9_adu_setup(uint8_t chip, enum adu_op adu_op) @@ -442,7 +441,7 @@ static void p9_adu_coherent_status_check(uint8_t chip, bool is_addr_only) //Check for a successful status 10 times for (i = 0; i < 10; i++) { - status = get_scom(chip, PU_ALTD_STATUS_REG); + status = read_scom(chip, PU_ALTD_STATUS_REG); if (!(status & PPC_BIT(PU_ALTD_STATUS_REG_FBC_ALTD_BUSY))) break; @@ -492,8 +491,8 @@ static void p9_adu_access(uint8_t chip, enum adu_op adu_op) if (is_addr_only) { udelay(10); } else { - put_scom(chip, PU_ALTD_DATA_REG, 0); - or_scom(chip, PU_ALTD_CMD_REG, PPC_BIT(PU_ALTD_CMD_REG_FBC_START_OP)); + write_scom(chip, PU_ALTD_DATA_REG, 0); + scom_or(chip, PU_ALTD_CMD_REG, PPC_BIT(PU_ALTD_CMD_REG_FBC_START_OP)); /* If it's not a cache inhibit operation, we just want to delay * for a while and then it's done */ @@ -504,7 +503,7 @@ static void p9_adu_access(uint8_t chip, enum adu_op adu_op) p9_adu_coherent_status_check(chip, is_addr_only); /* If it's the last read/write cleanup the ADU */ - put_scom(chip, PU_ALTD_CMD_REG, 0); + write_scom(chip, PU_ALTD_CMD_REG, 0); } /* We don't write any specific data to ADU, just execute an action on it */ @@ -608,7 +607,7 @@ static void p9_fbc_ab_hp_scom(uint8_t chip, bool is_xbus_active) /* *_HP_MODE_NEXT */ - val = get_scom(chip, PB_HP_MODE_NEXT_SHADOWS[i]); + val = read_scom(chip, PB_HP_MODE_NEXT_SHADOWS[i]); if (!is_fabric_master) { val &= ~PPC_BIT(0); // PB_COM_PB_CFG_MASTER_CHIP_NEXT_OFF @@ -624,11 +623,11 @@ static void p9_fbc_ab_hp_scom(uint8_t chip, bool is_xbus_active) val &= ~PPC_BIT(29); // PB_COM_PB_CFG_HOP_MODE_NEXT_OFF - put_scom(chip, PB_HP_MODE_NEXT_SHADOWS[i], val); + write_scom(chip, PB_HP_MODE_NEXT_SHADOWS[i], val); /* *_HPX_MODE_NEXT */ - val = get_scom(chip, PB_HPX_MODE_NEXT_SHADOWS[i]); + val = read_scom(chip, PB_HPX_MODE_NEXT_SHADOWS[i]); PPC_INSERT(val, is_xbus_active, 1, 1); // PB_COM_PB_CFG_LINK_X1_EN_NEXT PPC_INSERT(val, attached_chip, 19, 3); // PB_COM_PB_CFG_LINK_X1_CHIPID_NEXT_ID @@ -646,7 +645,7 @@ static void p9_fbc_ab_hp_scom(uint8_t chip, bool is_xbus_active) tmp = (cmd_rate_4b_n / cmd_rate_d) - 1; PPC_INSERT(val, tmp, 56, 8); - put_scom(chip, PB_HPX_MODE_NEXT_SHADOWS[i], val); + write_scom(chip, PB_HPX_MODE_NEXT_SHADOWS[i], val); } } @@ -655,7 +654,7 @@ static uint64_t p9_build_smp_get_hp_ab_shadow(uint8_t chip, const uint64_t shado uint64_t last_data = 0; for (uint8_t i = 0; i < P9_BUILD_SMP_NUM_SHADOWS; i++) { - const uint64_t data = get_scom(chip, shadow_regs[i]); + const uint64_t data = read_scom(chip, shadow_regs[i]); /* Check consistency of west/center/east register copies while * reading them */ @@ -672,7 +671,7 @@ static void p9_build_smp_set_hp_ab_shadow(uint8_t chip, const uint64_t shadow_re uint64_t data) { for (uint8_t i = 0; i < P9_BUILD_SMP_NUM_SHADOWS; i++) - put_scom(chip, shadow_regs[i], data); + write_scom(chip, shadow_regs[i], data); } static void p9_build_smp_copy_hp_ab_next_curr(uint8_t chip) diff --git a/src/soc/ibm/power9/istep_8_10.c b/src/soc/ibm/power9/istep_8_10.c index 263e7a00d7c..6332ea9a7e1 100644 --- a/src/soc/ibm/power9/istep_8_10.c +++ b/src/soc/ibm/power9/istep_8_10.c @@ -27,132 +27,132 @@ static void xbus_scom(uint8_t chip, uint8_t group) /* *_RX_DATA_DAC_SPARE_MODE_PL */ for (i = 0; i < 18; i++) { - uint64_t addr = 0x8000000006010C3F + offset + 0x100000000 * i; + uint64_t addr = xbus_addr(0x8000000006010C3F + offset + 0x100000000 * i); // 53 - *_RX_DAC_REGS_RX_DAC_REGS_RX_PL_DATA_DAC_SPARE_MODE_5_OFF // 54 - *_RX_DAC_REGS_RX_DAC_REGS_RX_PL_DATA_DAC_SPARE_MODE_6_OFF // 55 - *_RX_DAC_REGS_RX_DAC_REGS_RX_PL_DATA_DAC_SPARE_MODE_7_OFF - and_scom(chip, addr, ~PPC_BITMASK(53, 55)); + scom_and(chip, addr, ~PPC_BITMASK(53, 55)); } /* *_RX_DAC_CNTL1_EO_PL */ for (i = 0; i < 18; i++) { - uint64_t addr = 0x8000080006010C3F + offset + 0x100000000 * i; + uint64_t addr = xbus_addr(0x8000080006010C3F + offset + 0x100000000 * i); // 54 - *_RX_DAC_REGS_RX_DAC_REGS_RX_LANE_ANA_PDWN_{OFF,ON} if (i < 17) - and_scom(chip, addr, ~PPC_BIT(54)); + scom_and(chip, addr, ~PPC_BIT(54)); else - or_scom(chip, addr, PPC_BIT(54)); + scom_or(chip, addr, PPC_BIT(54)); } /* *_RX_DAC_CNTL5_EO_PL */ for (i = 0; i < 18; i++) { - uint64_t addr = 0x8000280006010C3F + offset + 0x100000000 * i; - and_scom(chip, addr, + uint64_t addr = xbus_addr(0x8000280006010C3F + offset + 0x100000000 * i); + scom_and(chip, addr, ~(PPC_BITMASK(48, 51) | PPC_BITMASK(52, 56) | PPC_BITMASK(57, 61))); } /* *_RX_DAC_CNTL6_EO_PL */ for (i = 0; i < 18; i++) { - uint64_t addr = 0x8000300006010C3F + offset + 0x100000000 * i; - and_or_scom(chip, addr, + uint64_t addr = xbus_addr(0x8000300006010C3F + offset + 0x100000000 * i); + scom_and_or(chip, addr, ~(PPC_BITMASK(53, 56) | PPC_BITMASK(48, 52)), PPC_PLACE(0x7, 53, 4) | PPC_PLACE(0x0C, 48, 5)); } /* *_RX_DAC_CNTL9_E_PL */ for (i = 0; i < 18; i++) { - uint64_t addr = 0x8000C00006010C3F + offset + 0x100000000 * i; - and_scom(chip, addr, ~(PPC_BITMASK(48, 54) | PPC_BITMASK(55, 60))); + uint64_t addr = xbus_addr(0x8000C00006010C3F + offset + 0x100000000 * i); + scom_and(chip, addr, ~(PPC_BITMASK(48, 54) | PPC_BITMASK(55, 60))); } /* *_RX_BIT_MODE1_EO_PL */ for (i = 0; i < 18; i++) { - uint64_t addr = 0x8002200006010C3F + offset + 0x100000000 * i; + uint64_t addr = xbus_addr(0x8002200006010C3F + offset + 0x100000000 * i); // 48 - *_RX_BIT_REGS_RX_LANE_DIG_PDWN_{OFF,ON} if (i < 17) - and_scom(chip, addr, ~PPC_BIT(48)); + scom_and(chip, addr, ~PPC_BIT(48)); else - or_scom(chip, addr, PPC_BIT(48)); + scom_or(chip, addr, PPC_BIT(48)); } /* *_RX_BIT_MODE1_E_PL */ for (i = 0; i < 17; i++) { - uint64_t addr = 0x8002C00006010C3F + offset + 0x100000000 * i; + uint64_t addr = xbus_addr(0x8002C00006010C3F + offset + 0x100000000 * i); const uint16_t data[17] = { 0x1000, 0xF03E, 0x07BC, 0x07C7, 0x03EF, 0x1F0F, 0x1800, 0x9C00, 0x1000, 0x9C00, 0x1800, 0x1F0F, 0x03EF, 0x07C7, 0x07BC, 0xF03E, 0x1000 }; - and_or_scom(chip, addr, ~PPC_BITMASK(48, 63), PPC_PLACE(data[i], 48, 16)); + scom_and_or(chip, addr, ~PPC_BITMASK(48, 63), PPC_PLACE(data[i], 48, 16)); } /* *_RX_BIT_MODE2_E_PL */ for (i = 0; i < 17; i++) { - uint64_t addr = 0x8002C80006010C3F + offset + 0x100000000 * i; + uint64_t addr = xbus_addr(0x8002C80006010C3F + offset + 0x100000000 * i); const uint8_t data[17] = { 0x42, 0x3E, 0x00, 0x60, 0x40, 0x40, 0x03, 0x03, 0x42, 0x03, 0x03, 0x40, 0x40, 0x60, 0x00, 0x3E, 0x42 }; - and_or_scom(chip, addr, ~PPC_BITMASK(48, 54), PPC_PLACE(data[i], 48, 7)); + scom_and_or(chip, addr, ~PPC_BITMASK(48, 54), PPC_PLACE(data[i], 48, 7)); } /* *_TX_MODE1_PL */ for (i = 0; i < 17; i++) { - uint64_t addr = 0x8004040006010C3F + offset + 0x100000000 * i; - and_scom(chip, addr, ~PPC_BIT(48)); + uint64_t addr = xbus_addr(0x8004040006010C3F + offset + 0x100000000 * i); + scom_and(chip, addr, ~PPC_BIT(48)); } /* *_TX_MODE2_PL */ for (i = 0; i < 17; i++) { - uint64_t addr = 0x80040C0006010C3F + offset + 0x100000000 * i; - or_scom(chip, addr, PPC_BIT(62)); + uint64_t addr = xbus_addr(0x80040C0006010C3F + offset + 0x100000000 * i); + scom_or(chip, addr, PPC_BIT(62)); } /* *_TX_BIT_MODE1_E_PL */ for (i = 0; i < 17; i++) { - uint64_t addr = 0x80043C0006010C3F + offset + 0x100000000 * i; + uint64_t addr = xbus_addr(0x80043C0006010C3F + offset + 0x100000000 * i); const uint16_t data[17] = { 0x000, 0x000, 0x01E, 0x01F, 0x00F, 0x07C, 0xC63, 0xE73, 0x000, 0xE73, 0xC63, 0x07C, 0x00F, 0x01F, 0x01E, 0x000, 0x000, }; - and_or_scom(chip, addr, ~PPC_BITMASK(48, 63), PPC_PLACE(data[i], 48, 16)); + scom_and_or(chip, addr, ~PPC_BITMASK(48, 63), PPC_PLACE(data[i], 48, 16)); } /* *_TX_BIT_MODE2_E_PL */ for (i = 0; i < 17; i++) { - uint64_t addr = 0x8004440006010C3F + offset + 0x100000000 * i; + uint64_t addr = xbus_addr(0x8004440006010C3F + offset + 0x100000000 * i); const uint8_t data[17] = { 0x01, 0x7C, 0x7B, 0x0C, 0x5E, 0x10, 0x0C, 0x4E, 0x01, 0x4E, 0x0C, 0x10, 0x5E, 0x0C, 0x7B, 0x7C, 0x01, }; - and_or_scom(chip, addr, ~PPC_BITMASK(48, 54), PPC_PLACE(data[i], 48, 7)); + scom_and_or(chip, addr, ~PPC_BITMASK(48, 54), PPC_PLACE(data[i], 48, 7)); } // P9A_XBUS_0_RX[01]_RX_SPARE_MODE_PG // 49 - IOF1_RX_RX0_RXCTL_CTL_REGS_RX_CTL_REGS_RX_PG_SPARE_MODE_1_ON - or_scom(chip, 0x8008000006010C3F + offset, PPC_BIT(49)); + scom_or(chip, xbus_addr(0x8008000006010C3F + offset), PPC_BIT(49)); // P9A_XBUS_0_RX[01]_RX_ID1_PG - and_or_scom(chip, 0x8008080006010C3F + offset, + scom_and_or(chip, xbus_addr(0x8008080006010C3F + offset), ~PPC_BITMASK(48, 53), PPC_PLACE((group == 0 ? 0x00 : 0x01), 48, 6)); // P9A_XBUS_0_RX[01]_RX_CTL_MODE1_EO_PG // 48 - IOF1_RX_RX0_RXCTL_CTL_REGS_RX_CTL_REGS_RX_CLKDIST_PDWN_OFF - and_scom(chip, 0x8008100006010C3F + offset, ~PPC_BIT(48)); + scom_and(chip, xbus_addr(0x8008100006010C3F + offset), ~PPC_BIT(48)); // P9A_XBUS_0_RX[01]_RX_CTL_MODE5_EO_PG // 51-53 - IOF1_RX_RX0_RXCTL_CTL_REGS_RX_CTL_REGS_RX_DYN_RECAL_INTERVAL_TIMEOUT_SEL_TAP5 // 54-55 - IOF1_RX_RX0_RXCTL_CTL_REGS_RX_CTL_REGS_RX_DYN_RECAL_STATUS_RPT_TIMEOUT_SEL_TAP1 - and_or_scom(chip, 0x8008300006010C3F + offset, + scom_and_or(chip, xbus_addr(0x8008300006010C3F + offset), ~(PPC_BITMASK(51, 53) | PPC_BITMASK(54, 55)), PPC_PLACE(0x5, 51, 3) | PPC_PLACE(0x1, 54, 2)); // P9A_XBUS_0_RX[01]_RX_CTL_MODE7_EO_PG - and_or_scom(chip, 0x8008400006010C3F + offset, + scom_and_or(chip, xbus_addr(0x8008400006010C3F + offset), ~PPC_BITMASK(60, 63), PPC_PLACE(0xA, 60, 4)); @@ -162,12 +162,12 @@ static void xbus_scom(uint8_t chip, uint8_t group) // 59 - IOF1_RX_RX0_RXCTL_CTL_REGS_RX_CTL_REGS_RX_DFEHISPD_EN_ON // 60 - IOF1_RX_RX0_RXCTL_CTL_REGS_RX_CTL_REGS_RX_DFE12_EN_ON if (group == 0) { - and_or_scom(chip, 0x8008C00006010C3F, + scom_and_or(chip, xbus_addr(0x8008C00006010C3F), ~(PPC_BITMASK(48, 49) | PPC_BITMASK(55, 60)), PPC_PLACE(0x1, 48, 2) | PPC_BIT(56) | PPC_PLACE(0x3, 57, 2) | PPC_BIT(59) | PPC_BIT(60)); } else { - and_or_scom(chip, 0x8008C00006010C3F, + scom_and_or(chip, xbus_addr(0x8008C00006010C3F), ~PPC_BITMASK(48, 49), PPC_PLACE(0x1, 48, 2)); } @@ -178,22 +178,22 @@ static void xbus_scom(uint8_t chip, uint8_t group) // 59 - IOF1_RX_RX1_RXCTL_CTL_REGS_RX_CTL_REGS_RX_DFEHISPD_EN_ON // 60 - IOF1_RX_RX1_RXCTL_CTL_REGS_RX_CTL_REGS_RX_DFE12_EN_ON if (group == 1) { - and_or_scom(chip, 0x8008C02006010C3F, + scom_and_or(chip, xbus_addr(0x8008C02006010C3F), ~PPC_BITMASK(55, 60), PPC_BIT(56) | PPC_PLACE(0x3, 57, 2) | PPC_BIT(59) | PPC_BIT(60)); } // P9A_XBUS_0_RX0_RX_CTL_MODE29_EO_PG (identical for both groups) - and_or_scom(chip, 0x8008D00006010C3F + offset, + scom_and_or(chip, xbus_addr(0x8008D00006010C3F + offset), ~(PPC_BITMASK(48, 55) | PPC_BITMASK(56, 63)), PPC_PLACE(0x66, 48, 8) | PPC_PLACE(0x44, 56, 8)); // P9A_XBUS_0_RX[01]_RX_CTL_MODE27_EO_PG // 48 - IOF1_RX_RX0_RXCTL_CTL_REGS_RX_CTL_REGS_RX_RC_ENABLE_CTLE_1ST_LATCH_OFFSET_CAL_ON - or_scom(chip, 0x8009700006010C3F + offset, PPC_BIT(48)); + scom_or(chip, xbus_addr(0x8009700006010C3F + offset), PPC_BIT(48)); // P9A_XBUS_0_RX[01]_RX_ID2_PG - and_or_scom(chip, 0x8009800006010C3F + offset, + scom_and_or(chip, xbus_addr(0x8009800006010C3F + offset), ~(PPC_BITMASK(49, 55) | PPC_BITMASK(57, 63)), PPC_PLACE(0x00, 49, 7) | PPC_PLACE(0x10, 57, 7)); @@ -201,111 +201,111 @@ static void xbus_scom(uint8_t chip, uint8_t group) // 48 - IOF1_RX_RX0_RXCTL_CTL_REGS_RX_CTL_REGS_RX_MASTER_MODE_MASTER // 57 - IOF1_RX_RX0_RXCTL_CTL_REGS_RX_CTL_REGS_RX_FENCE_FENCED // 58 - IOF1_RX_RX0_RXCTL_CTL_REGS_RX_CTL_REGS_RX_PDWN_LITE_DISABLE_ON - or_scom(chip, 0x8009900006010C3F + offset, + scom_or(chip, xbus_addr(0x8009900006010C3F + offset), (xbus_master_mode ? PPC_BIT(48) : 0) | PPC_BIT(57) | PPC_BIT(58)); // P9A_XBUS_0_RX[01]_RX_CTL_MODE2_E_PG - and_or_scom(chip, 0x8009980006010C3F + offset, + scom_and_or(chip, xbus_addr(0x8009980006010C3F + offset), ~PPC_BITMASK(48, 52), PPC_PLACE(0x01, 48, 5)); // P9A_XBUS_0_RX[01]_RX_CTL_MODE3_E_PG - and_or_scom(chip, 0x8009A00006010C3F + offset, + scom_and_or(chip, xbus_addr(0x8009A00006010C3F + offset), ~PPC_BITMASK(48, 51), PPC_PLACE(0xB, 48, 4)); // P9A_XBUS_0_RX[01]_RX_CTL_MODE5_E_PG - and_or_scom(chip, 0x8009B00006010C3F + offset, + scom_and_or(chip, xbus_addr(0x8009B00006010C3F + offset), ~PPC_BITMASK(52, 55), PPC_PLACE(0x1, 52, 4)); // P9A_XBUS_0_RX[01]_RX_CTL_MODE6_E_PG - and_or_scom(chip, 0x8009B80006010C3F + offset, + scom_and_or(chip, xbus_addr(0x8009B80006010C3F + offset), ~(PPC_BITMASK(48, 54) | PPC_BITMASK(55, 61)), PPC_PLACE(0x11, 48, 7) | PPC_PLACE(0x11, 55, 7)); // P9A_XBUS_0_RX[01]_RX_CTL_MODE8_E_PG // 55-58 - IOF1_RX_RX0_RXCTL_CTL_REGS_RX_CTL_REGS_RX_DYN_RPR_ERR_CNTR1_DURATION_TAP5 - and_or_scom(chip, 0x8009C80006010C3F + offset, + scom_and_or(chip, xbus_addr(0x8009C80006010C3F + offset), ~(PPC_BITMASK(48, 54) | PPC_BITMASK(55, 58) | PPC_BITMASK(61, 63)), PPC_PLACE(0xF, 48, 7) | PPC_PLACE(0x5, 55, 4) | PPC_PLACE(0x5, 61, 3)); // P9A_XBUS_0_RX[01]_RX_CTL_MODE9_E_PG // 55-58 - IOF1_RX_RX0_RXCTL_CTL_REGS_RX_CTL_REGS_RX_DYN_RPR_ERR_CNTR2_DURATION_TAP5 - and_or_scom(chip, 0x8009D00006010C3F + offset, + scom_and_or(chip, xbus_addr(0x8009D00006010C3F + offset), ~(PPC_BITMASK(48, 54) | PPC_BITMASK(55, 58)), PPC_PLACE(0x3F, 48, 7) | PPC_PLACE(0x5, 55, 4)); // P9A_XBUS_0_RX[01]_RX_CTL_MODE11_E_PG - and_scom(chip, 0x8009E00006010C3F + offset, ~PPC_BITMASK(48, 63)); + scom_and(chip, xbus_addr(0x8009E00006010C3F + offset), ~PPC_BITMASK(48, 63)); // P9A_XBUS_0_RX[01]_RX_CTL_MODE12_E_PG - and_or_scom(chip, 0x8009E80006010C3F + offset, + scom_and_or(chip, xbus_addr(0x8009E80006010C3F + offset), ~PPC_BITMASK(48, 55), PPC_PLACE(0x7F, 48, 8)); // P9A_XBUS_0_RX[01]_RX_GLBSM_SPARE_MODE_PG // 50 - IOF1_RX_RX0_RXCTL_GLBSM_REGS_RX_PG_GLBSM_SPARE_MODE_2_ON // 56 - IOF1_RX_RX0_RXCTL_GLBSM_REGS_RX_DESKEW_BUMP_AFTER_AFTER - or_scom(chip, 0x800A800006010C3F + offset, PPC_BIT(50) | PPC_BIT(56)); + scom_or(chip, xbus_addr(0x800A800006010C3F + offset), PPC_BIT(50) | PPC_BIT(56)); // P9A_XBUS_0_RX[01]_RX_GLBSM_CNTL3_EO_PG - and_or_scom(chip, 0x800AE80006010C3F + offset, + scom_and_or(chip, xbus_addr(0x800AE80006010C3F + offset), ~PPC_BITMASK(56, 57), PPC_PLACE(0x2, 56, 2)); // P9A_XBUS_0_RX[01]_RX_GLBSM_MODE1_EO_PG - and_or_scom(chip, 0x800AF80006010C3F + offset, + scom_and_or(chip, xbus_addr(0x800AF80006010C3F + offset), ~(PPC_BITMASK(48, 51) | PPC_BITMASK(52, 55)), PPC_PLACE(0xC, 48, 4) | PPC_PLACE(0xC, 52, 4)); // P9A_XBUS_0_RX[01]_RX_DATASM_SPARE_MODE_PG // 60 - IOF1_RX_RX0_RXCTL_DATASM_DATASM_REGS_RX_CTL_DATASM_CLKDIST_PDWN_OFF - and_scom(chip, 0x800B800006010C3F + offset, ~PPC_BIT(60)); + scom_and(chip, xbus_addr(0x800B800006010C3F + offset), ~PPC_BIT(60)); // P9A_XBUS_0_TX[01]_TX_SPARE_MODE_PG - and_scom(chip, 0x800C040006010C3F + offset, ~PPC_BITMASK(56, 57)); + scom_and(chip, xbus_addr(0x800C040006010C3F + offset), ~PPC_BITMASK(56, 57)); // P9A_XBUS_0_TX[01]_TX_ID1_PG - and_or_scom(chip, 0x800C0C0006010C3F + offset, + scom_and_or(chip, xbus_addr(0x800C0C0006010C3F + offset), ~PPC_BITMASK(48, 53), PPC_PLACE((group == 0 ? 0x00 : 0x01), 48, 6)); // P9A_XBUS_0_TX[01]_TX_CTL_MODE1_EO_PG // 48 - IOF1_TX_WRAP_TX0_TXCTL_CTL_REGS_TX_CTL_REGS_TX_CLKDIST_PDWN_OFF // 59 - IOF1_TX_WRAP_TX0_TXCTL_CTL_REGS_TX_CTL_REGS_TX_PDWN_LITE_DISABLE_ON - and_or_scom(chip, 0x800C140006010C3F + offset, + scom_and_or(chip, xbus_addr(0x800C140006010C3F + offset), ~(PPC_BIT(48) | PPC_BITMASK(53, 57) | PPC_BIT(59)), PPC_PLACE(0x01, 53, 5) | PPC_BIT(59)); // P9A_XBUS_0_TX[01]_TX_CTL_MODE2_EO_PG - and_or_scom(chip, 0x800C1C0006010C3F + offset, + scom_and_or(chip, xbus_addr(0x800C1C0006010C3F + offset), ~PPC_BITMASK(56, 62), PPC_PLACE(0x11, 56, 7)); // P9A_XBUS_0_TX[01]_TX_CTL_CNTLG1_EO_PG // 48-49 - IOF1_TX_WRAP_TX0_TXCTL_CTL_REGS_TX_CTL_REGS_TX_DRV_CLK_PATTERN_GCRMSG_DRV_0S - and_scom(chip, 0x800C240006010C3F + offset, ~PPC_BITMASK(48, 49)); + scom_and(chip, xbus_addr(0x800C240006010C3F + offset), ~PPC_BITMASK(48, 49)); // P9A_XBUS_0_TX[01]_TX_ID2_PG - and_or_scom(chip, 0x800C840006010C3F + offset, + scom_and_or(chip, xbus_addr(0x800C840006010C3F + offset), ~(PPC_BITMASK(49, 55) | PPC_BITMASK(57, 63)), PPC_PLACE(0x0, 49, 7) | PPC_PLACE(0x10, 57, 7)); // P9A_XBUS_0_TX[01]_TX_CTL_MODE1_E_PG // 55-57 - IOF1_TX_WRAP_TX0_TXCTL_CTL_REGS_TX_CTL_REGS_TX_DYN_RECAL_INTERVAL_TIMEOUT_SEL_TAP5 // 58-59 - IOF1_TX_WRAP_TX0_TXCTL_CTL_REGS_TX_CTL_REGS_TX_DYN_RECAL_STATUS_RPT_TIMEOUT_SEL_TAP1 - and_or_scom(chip, 0x800C8C0006010C3F + offset, + scom_and_or(chip, xbus_addr(0x800C8C0006010C3F + offset), ~(PPC_BITMASK(55, 57) | PPC_BITMASK(58, 59)), PPC_PLACE(0x5, 55, 3) | PPC_PLACE(0x1, 58, 2)); // P9A_XBUS_0_TX[01]_TX_CTL_MODE2_E_PG - and_scom(chip, 0x800CEC0006010C3F + offset, ~PPC_BITMASK(48, 63)); + scom_and(chip, xbus_addr(0x800CEC0006010C3F + offset), ~PPC_BITMASK(48, 63)); // P9A_XBUS_0_TX[01]_TX_CTL_MODE3_E_PG - and_or_scom(chip, 0x800CF40006010C3F + offset, + scom_and_or(chip, xbus_addr(0x800CF40006010C3F + offset), ~PPC_BITMASK(48, 55), PPC_PLACE(0x7F, 48, 8)); // P9A_XBUS_0_TX[01]_TX_CTLSM_MODE1_EO_PG // 59 - IOF1_TX_WRAP_TX0_TXCTL_TX_CTL_SM_REGS_TX_FFE_BOOST_EN_ON - or_scom(chip, 0x800D2C0006010C3F + offset, PPC_BIT(59)); + scom_and(chip, xbus_addr(0x800D2C0006010C3F + offset), PPC_BIT(59)); // P9A_XBUS_0_TX_IMPCAL_P_4X_PB (identical for both groups) - and_or_scom(chip, 0x800F1C0006010C3F, + scom_and_or(chip, xbus_addr(0x800F1C0006010C3F), ~PPC_BITMASK(48, 54), PPC_PLACE(0x0E, 48, 5)); } @@ -316,13 +316,13 @@ static void set_msb_swap(uint8_t chip, int group) EDIP_TX_MSBSWAP = 58, }; - const uint64_t offset = group * XBUS_LINK_GROUP_OFFSET; + const uint64_t addr = xbus_addr(TX_CTL_MODE1_EO_PG + group * XBUS_LINK_GROUP_OFFSET); /* ATTR_EI_BUS_TX_MSBSWAP seems to be 0x80 which is GROUP_0_SWAP */ if (group == 0) - or_scom(chip, TX_CTL_MODE1_EO_PG + offset, PPC_BIT(EDIP_TX_MSBSWAP)); + scom_or(chip, addr, PPC_BIT(EDIP_TX_MSBSWAP)); else - and_scom(chip, TX_CTL_MODE1_EO_PG + offset, ~PPC_BIT(EDIP_TX_MSBSWAP)); + scom_and(chip, addr, ~PPC_BIT(EDIP_TX_MSBSWAP)); } static void xbus_scominit(int group) @@ -344,11 +344,11 @@ static void xbus_scominit(int group) const uint64_t offset = group * XBUS_LINK_GROUP_OFFSET; /* Assert IO reset to power-up bus endpoint logic */ - or_scom(0, EDIP_RX_IORESET + offset, PPC_BIT(52)); - or_scom(1, EDIP_RX_IORESET + offset, PPC_BIT(52)); + scom_or(0, xbus_addr(EDIP_RX_IORESET + offset), PPC_BIT(52)); + scom_or(1, xbus_addr(EDIP_RX_IORESET + offset), PPC_BIT(52)); udelay(50); - or_scom(0, EDIP_TX_IORESET + offset, PPC_BIT(48)); - or_scom(1, EDIP_TX_IORESET + offset, PPC_BIT(48)); + scom_or(0, xbus_addr(EDIP_TX_IORESET + offset), PPC_BIT(48)); + scom_or(1, xbus_addr(EDIP_TX_IORESET + offset), PPC_BIT(48)); udelay(50); set_msb_swap(/*chip=*/0, group); @@ -358,10 +358,10 @@ static void xbus_scominit(int group) xbus_scom(/*chip=*/1, group); /* PU_PB_CENT_SM0_PB_CENT_FIR_MASK_REG_SPARE_13 */ - if (!(get_scom(/*chip=*/0, PU_PB_CENT_SM0_PB_CENT_FIR_REG) & PPC_BIT(13))) { - put_scom(/*chip=*/0, XBUS_FIR_ACTION0_REG, XBUS_PHY_FIR_ACTION0); - put_scom(/*chip=*/0, XBUS_FIR_ACTION1_REG, XBUS_PHY_FIR_ACTION1); - put_scom(/*chip=*/0, XBUS_FIR_MASK_REG, XBUS_PHY_FIR_MASK); + if (!(read_scom(/*chip=*/0, PU_PB_CENT_SM0_PB_CENT_FIR_REG) & PPC_BIT(13))) { + write_scom(/*chip=*/0, xbus_addr(XBUS_FIR_ACTION0_REG), XBUS_PHY_FIR_ACTION0); + write_scom(/*chip=*/0, xbus_addr(XBUS_FIR_ACTION1_REG), XBUS_PHY_FIR_ACTION1); + write_scom(/*chip=*/0, xbus_addr(XBUS_FIR_MASK_REG), XBUS_PHY_FIR_MASK); } } diff --git a/src/soc/ibm/power9/istep_8_11.c b/src/soc/ibm/power9/istep_8_11.c index 35e62c58a26..9fc17454c76 100644 --- a/src/soc/ibm/power9/istep_8_11.c +++ b/src/soc/ibm/power9/istep_8_11.c @@ -16,13 +16,13 @@ static void xbus_enable_ridi(uint8_t chip) }; /* Getting NET_CTRL0 register value and checking its CHIPLET_ENABLE bit */ - if (get_scom(chip, PERV_NET_CTRL0) & PPC_BIT(0)) { - /* Enable Recievers, Drivers DI1 & DI2 */ + if (read_scom(chip, xbus_addr(PERV_NET_CTRL0)) & PPC_BIT(0)) { + /* Enable Receivers, Drivers DI1 & DI2 */ uint64_t val = 0; val |= PPC_BIT(19); // NET_CTRL0.RI_N = 1 val |= PPC_BIT(20); // NET_CTRL0.DI1_N = 1 val |= PPC_BIT(21); // NET_CTRL0.DI2_N = 1 - put_scom(chip, PERV_NET_CTRL0_WOR, val); + write_scom(chip, xbus_addr(PERV_NET_CTRL0_WOR), val); } } diff --git a/src/soc/ibm/power9/istep_8_9.c b/src/soc/ibm/power9/istep_8_9.c index 5446b8bcbf7..2c60bdf6fc8 100644 --- a/src/soc/ibm/power9/istep_8_9.c +++ b/src/soc/ibm/power9/istep_8_9.c @@ -64,45 +64,45 @@ static void p9_fbc_no_hp_scom(bool is_xbus_active, uint8_t chip) uint64_t pb_cent_rgp_cmd_rate_dp0, pb_cent_rgp_cmd_rate_dp1; uint64_t pb_cent_sp_cmd_rate_dp0, pb_cent_sp_cmd_rate_dp1; - pb_west_mode = get_scom(chip, PB_WEST_MODE); + pb_west_mode = read_scom(chip, PB_WEST_MODE); PPC_INSERT(pb_west_mode, (num_x_links_cfg == 0), PB_CFG_CHIP_IS_SYSTEM, 1); PPC_INSERT(pb_west_mode, 0x3F, PB_CFG_SP_HW_MARK, PB_CFG_SP_HW_MARK_LEN); PPC_INSERT(pb_west_mode, 0x3F, PB_CFG_GP_HW_MARK, PB_CFG_GP_HW_MARK_LEN); PPC_INSERT(pb_west_mode, 0x2A, PB_CFG_LCL_HW_MARK, PB_CFG_LCL_HW_MARK_LEN); - put_scom(chip, PB_WEST_MODE, pb_west_mode); + write_scom(chip, PB_WEST_MODE, pb_west_mode); - pb_cent_mode = get_scom(chip, PB_CENT_MODE); + pb_cent_mode = read_scom(chip, PB_CENT_MODE); PPC_INSERT(pb_cent_mode, (num_x_links_cfg == 0), PB_CFG_CHIP_IS_SYSTEM, 1); PPC_INSERT(pb_cent_mode, 0x3F, PB_CFG_SP_HW_MARK, PB_CFG_SP_HW_MARK_LEN); PPC_INSERT(pb_cent_mode, 0x3F, PB_CFG_GP_HW_MARK, PB_CFG_GP_HW_MARK_LEN); PPC_INSERT(pb_cent_mode, 0x2A, PB_CFG_LCL_HW_MARK, PB_CFG_LCL_HW_MARK_LEN); - put_scom(chip, PB_CENT_MODE, pb_cent_mode); + write_scom(chip, PB_CENT_MODE, pb_cent_mode); - put_scom(chip, PB_CENT_GP_CMD_RATE_DP0, get_scom(chip, PB_CENT_GP_CMD_RATE_DP0) & 0); - put_scom(chip, PB_CENT_GP_CMD_RATE_DP1, get_scom(chip, PB_CENT_GP_CMD_RATE_DP1) & 0); + scom_and(chip, PB_CENT_GP_CMD_RATE_DP0, 0); + scom_and(chip, PB_CENT_GP_CMD_RATE_DP1, 0); - (void)get_scom(chip, PB_CENT_RGP_CMD_RATE_DP0); + (void)read_scom(chip, PB_CENT_RGP_CMD_RATE_DP0); pb_cent_rgp_cmd_rate_dp0 = (num_x_links_cfg == 0 ? 0 : 0x030406080A0C1218); - put_scom(chip, PB_CENT_RGP_CMD_RATE_DP0, pb_cent_rgp_cmd_rate_dp0); + write_scom(chip, PB_CENT_RGP_CMD_RATE_DP0, pb_cent_rgp_cmd_rate_dp0); - (void)get_scom(chip, PB_CENT_RGP_CMD_RATE_DP1); + (void)read_scom(chip, PB_CENT_RGP_CMD_RATE_DP1); pb_cent_rgp_cmd_rate_dp1 = (num_x_links_cfg == 0 ? 0 : 0x040508080A0C1218); - put_scom(chip, PB_CENT_RGP_CMD_RATE_DP1, pb_cent_rgp_cmd_rate_dp1); + write_scom(chip, PB_CENT_RGP_CMD_RATE_DP1, pb_cent_rgp_cmd_rate_dp1); - pb_cent_sp_cmd_rate_dp0 = get_scom(chip, PB_CENT_SP_CMD_RATE_DP0); + pb_cent_sp_cmd_rate_dp0 = read_scom(chip, PB_CENT_SP_CMD_RATE_DP0); pb_cent_sp_cmd_rate_dp0 = (num_x_links_cfg == 0 ? 0 : 0x030406080A0C1218); - put_scom(chip, PB_CENT_SP_CMD_RATE_DP0, pb_cent_sp_cmd_rate_dp0); + write_scom(chip, PB_CENT_SP_CMD_RATE_DP0, pb_cent_sp_cmd_rate_dp0); - pb_cent_sp_cmd_rate_dp1 = get_scom(chip, PB_CENT_SP_CMD_RATE_DP1); + pb_cent_sp_cmd_rate_dp1 = read_scom(chip, PB_CENT_SP_CMD_RATE_DP1); pb_cent_sp_cmd_rate_dp1 = (num_x_links_cfg == 0 ? 0 : 0x030406080A0C1218); - put_scom(chip, PB_CENT_SP_CMD_RATE_DP1, pb_cent_sp_cmd_rate_dp1); + write_scom(chip, PB_CENT_SP_CMD_RATE_DP1, pb_cent_sp_cmd_rate_dp1); - pb_east_mode = get_scom(chip, PB_EAST_MODE); + pb_east_mode = read_scom(chip, PB_EAST_MODE); PPC_INSERT(pb_east_mode, (num_x_links_cfg == 0), PB_CFG_CHIP_IS_SYSTEM, 1); PPC_INSERT(pb_east_mode, 0x3F, PB_CFG_SP_HW_MARK, PB_CFG_SP_HW_MARK_LEN); PPC_INSERT(pb_east_mode, 0x3F, PB_CFG_GP_HW_MARK, PB_CFG_GP_HW_MARK_LEN); PPC_INSERT(pb_east_mode, 0x2A, PB_CFG_LCL_HW_MARK, PB_CFG_LCL_HW_MARK_LEN); - put_scom(chip, PB_EAST_MODE, pb_east_mode); + write_scom(chip, PB_EAST_MODE, pb_east_mode); } static void p9_fbc_ioe_tl_scom(bool is_xbus_active, uint8_t chip) @@ -160,14 +160,14 @@ static void p9_fbc_ioe_tl_scom(bool is_xbus_active, uint8_t chip) uint64_t pb_elink_data_23_cfg_reg; uint64_t pb_misc_cfg, pb_trace_cfg; - pb_fp01_cfg = get_scom(chip, PB_FP01_CFG); + pb_fp01_cfg = read_scom(chip, PB_FP01_CFG); pb_fp01_cfg |= PPC_BIT(FP0_FMR_DISABLE); pb_fp01_cfg |= PPC_BIT(FP0_PRS_DISABLE); pb_fp01_cfg |= PPC_BIT(FP1_FMR_DISABLE); pb_fp01_cfg |= PPC_BIT(FP1_PRS_DISABLE); - put_scom(chip, PB_FP01_CFG, pb_fp01_cfg); + write_scom(chip, PB_FP01_CFG, pb_fp01_cfg); - pb_fp23_cfg = get_scom(chip, PB_FP23_CFG); + pb_fp23_cfg = read_scom(chip, PB_FP23_CFG); PPC_INSERT(pb_fp23_cfg, !is_xbus_active, FP2_FMR_DISABLE, 1); PPC_INSERT(pb_fp23_cfg, !is_xbus_active, FP2_PRS_DISABLE, 1); @@ -182,18 +182,18 @@ static void p9_fbc_ioe_tl_scom(bool is_xbus_active, uint8_t chip) PPC_INSERT(pb_fp23_cfg, 0x15 - (dd2_lo_limit_n / dd2_lo_limit_d), 36, 8); } - put_scom(chip, PB_FP23_CFG, pb_fp23_cfg); + write_scom(chip, PB_FP23_CFG, pb_fp23_cfg); - pb_fp45_cfg = get_scom(chip, PB_FP45_CFG); + pb_fp45_cfg = read_scom(chip, PB_FP45_CFG); pb_fp45_cfg |= PPC_BIT(FP4_FMR_DISABLE); pb_fp45_cfg |= PPC_BIT(FP4_PRS_DISABLE); pb_fp45_cfg |= PPC_BIT(FP5_FMR_DISABLE); pb_fp45_cfg |= PPC_BIT(FP5_PRS_DISABLE); - put_scom(chip, PB_FP45_CFG, pb_fp45_cfg); + write_scom(chip, PB_FP45_CFG, pb_fp45_cfg); - put_scom(chip, PB_ELINK_DATA_01_CFG_REG, get_scom(chip, PB_ELINK_DATA_01_CFG_REG)); + write_scom(chip, PB_ELINK_DATA_01_CFG_REG, read_scom(chip, PB_ELINK_DATA_01_CFG_REG)); - pb_elink_data_23_cfg_reg = get_scom(chip, PB_ELINK_DATA_23_CFG_REG); + pb_elink_data_23_cfg_reg = read_scom(chip, PB_ELINK_DATA_23_CFG_REG); if (is_xbus_active) { PPC_INSERT(pb_elink_data_23_cfg_reg, 0x1F, 24, 5); PPC_INSERT(pb_elink_data_23_cfg_reg, 0x40, 1, 7); @@ -203,24 +203,24 @@ static void p9_fbc_ioe_tl_scom(bool is_xbus_active, uint8_t chip) PPC_INSERT(pb_elink_data_23_cfg_reg, 0x3C, 17, 7); PPC_INSERT(pb_elink_data_23_cfg_reg, 0x3C, 49, 7); } - put_scom(chip, PB_ELINK_DATA_23_CFG_REG, pb_elink_data_23_cfg_reg); + write_scom(chip, PB_ELINK_DATA_23_CFG_REG, pb_elink_data_23_cfg_reg); - put_scom(chip, PB_ELINK_DATA_45_CFG_REG, get_scom(chip, PB_ELINK_DATA_45_CFG_REG)); + write_scom(chip, PB_ELINK_DATA_45_CFG_REG, read_scom(chip, PB_ELINK_DATA_45_CFG_REG)); - pb_misc_cfg = get_scom(chip, PB_MISC_CFG); + pb_misc_cfg = read_scom(chip, PB_MISC_CFG); PPC_INSERT(pb_misc_cfg, 0x00, IOE01_IS_LOGICAL_PAIR, 1); PPC_INSERT(pb_misc_cfg, is_xbus_active, IOE23_IS_LOGICAL_PAIR, 1); PPC_INSERT(pb_misc_cfg, 0x00, IOE45_IS_LOGICAL_PAIR, 1); - put_scom(chip, PB_MISC_CFG, pb_misc_cfg); + write_scom(chip, PB_MISC_CFG, pb_misc_cfg); - pb_trace_cfg = get_scom(chip, PB_TRACE_CFG); + pb_trace_cfg = read_scom(chip, PB_TRACE_CFG); if (is_xbus_active) { PPC_INSERT(pb_trace_cfg, 0x4, 16, 4); PPC_INSERT(pb_trace_cfg, 0x4, 24, 4); PPC_INSERT(pb_trace_cfg, 0x1, 20, 4); PPC_INSERT(pb_trace_cfg, 0x1, 28, 4); } - put_scom(chip, PB_TRACE_CFG, pb_trace_cfg); + write_scom(chip, PB_TRACE_CFG, pb_trace_cfg); } static void p9_fbc_ioe_dl_scom(uint8_t chip) @@ -242,25 +242,25 @@ static void p9_fbc_ioe_dl_scom(uint8_t chip) uint64_t ioel_config, ioel_replay_threshold, ioel_sl_ecc_threshold; - ioel_config = get_scom(chip, IOEL_CONFIG); + ioel_config = read_scom(chip, xbus_addr(IOEL_CONFIG)); ioel_config |= PPC_BIT(LL1_CONFIG_LINK_PAIR); ioel_config |= PPC_BIT(LL1_CONFIG_CRC_LANE_ID); ioel_config |= PPC_BIT(LL1_CONFIG_SL_UE_CRC_ERR); PPC_INSERT(ioel_config, 0xF, 11, 5); PPC_INSERT(ioel_config, 0xF, 28, 4); - put_scom(chip, IOEL_CONFIG, ioel_config); + write_scom(chip, xbus_addr(IOEL_CONFIG), ioel_config); - ioel_replay_threshold = get_scom(chip, IOEL_REPLAY_THRESHOLD); + ioel_replay_threshold = read_scom(chip, xbus_addr(IOEL_REPLAY_THRESHOLD)); PPC_INSERT(ioel_replay_threshold, 0x7, 8, 3); PPC_INSERT(ioel_replay_threshold, 0xF, 4, 4); PPC_INSERT(ioel_replay_threshold, 0x6, 0, 4); - put_scom(chip, IOEL_REPLAY_THRESHOLD, ioel_replay_threshold); + write_scom(chip, xbus_addr(IOEL_REPLAY_THRESHOLD), ioel_replay_threshold); - ioel_sl_ecc_threshold = get_scom(chip, IOEL_SL_ECC_THRESHOLD); + ioel_sl_ecc_threshold = read_scom(chip, xbus_addr(IOEL_SL_ECC_THRESHOLD)); PPC_INSERT(ioel_sl_ecc_threshold, 0x7, 8, 3); PPC_INSERT(ioel_sl_ecc_threshold, 0xF, 4, 4); PPC_INSERT(ioel_sl_ecc_threshold, 0x7, 0, 4); - put_scom(chip, IOEL_SL_ECC_THRESHOLD, ioel_sl_ecc_threshold); + write_scom(chip, xbus_addr(IOEL_SL_ECC_THRESHOLD), ioel_sl_ecc_threshold); } static void chiplet_fabric_scominit(bool is_xbus_active, uint8_t chip) @@ -300,28 +300,31 @@ static void chiplet_fabric_scominit(bool is_xbus_active, uint8_t chip) p9_fbc_ioe_tl_scom(is_xbus_active, chip); /* TL/DL FIRs are configured by us only if not already setup by SBE */ - fbc_cent_fir = get_scom(chip, PU_PB_CENT_SM0_FIR_REG); + fbc_cent_fir = read_scom(chip, PU_PB_CENT_SM0_FIR_REG); init_firs = !(fbc_cent_fir & PPC_BIT(PU_PB_CENT_SM0_FIR_MASK_REG_SPARE_13)); if (init_firs) { uint64_t fir_mask; - put_scom(chip, PU_PB_IOE_FIR_ACTION0_REG, FBC_IOE_TL_FIR_ACTION0); - put_scom(chip, PU_PB_IOE_FIR_ACTION1_REG, FBC_IOE_TL_FIR_ACTION1); + write_scom(chip, PU_PB_IOE_FIR_ACTION0_REG, FBC_IOE_TL_FIR_ACTION0); + write_scom(chip, PU_PB_IOE_FIR_ACTION1_REG, FBC_IOE_TL_FIR_ACTION1); fir_mask = FBC_IOE_TL_FIR_MASK | FBC_IOE_TL_FIR_MASK_X0_NF | FBC_IOE_TL_FIR_MASK_X2_NF; - put_scom(chip, PU_PB_IOE_FIR_MASK_REG, fir_mask); + write_scom(chip, PU_PB_IOE_FIR_MASK_REG, fir_mask); } /* Setup IOE (XBUS FBC IO) DL SCOMs */ p9_fbc_ioe_dl_scom(chip); if (init_firs) { - put_scom(chip, XBUS_LL0_IOEL_FIR_ACTION0_REG, FBC_IOE_DL_FIR_ACTION0); - put_scom(chip, XBUS_LL0_IOEL_FIR_ACTION1_REG, FBC_IOE_DL_FIR_ACTION1); - put_scom(chip, XBUS_LL0_IOEL_FIR_MASK_REG, FBC_IOE_DL_FIR_MASK); + write_scom(chip, xbus_addr(XBUS_LL0_IOEL_FIR_ACTION0_REG), + FBC_IOE_DL_FIR_ACTION0); + write_scom(chip, xbus_addr(XBUS_LL0_IOEL_FIR_ACTION1_REG), + FBC_IOE_DL_FIR_ACTION1); + write_scom(chip, xbus_addr(XBUS_LL0_IOEL_FIR_MASK_REG), + FBC_IOE_DL_FIR_MASK); } } diff --git a/src/soc/ibm/power9/istep_9_2.c b/src/soc/ibm/power9/istep_9_2.c index e22a827d50e..bbf67eb12eb 100644 --- a/src/soc/ibm/power9/istep_9_2.c +++ b/src/soc/ibm/power9/istep_9_2.c @@ -92,8 +92,8 @@ static void config_run_bus_group_mode(uint8_t chip, int group) const uint64_t offset = group * XBUS_LINK_GROUP_OFFSET; /* Same registers are read for both groups */ - uint32_t pval = (get_scom(chip, P9A_XBUS_TX_IMPCAL_PVAL_PB) >> 7) & 0x1FF; - uint32_t nval = (get_scom(chip, P9A_XBUS_TX_IMPCAL_NVAL_PB) >> 7) & 0x1FF; + uint32_t pval = (read_scom(chip, xbus_addr(P9A_XBUS_TX_IMPCAL_PVAL_PB)) >> 7) & 0x1FF; + uint32_t nval = (read_scom(chip, xbus_addr(P9A_XBUS_TX_IMPCAL_NVAL_PB)) >> 7) & 0x1FF; uint32_t sel_margin_pu; uint32_t sel_margin_pd; @@ -113,56 +113,56 @@ static void config_run_bus_group_mode(uint8_t chip, int group) sel_margin_pd = MIN(sel_margin_pd, MIN(p.en_margin_pd, MIN(n.en_margin_pd, sel_margin_pu))); - val = get_scom(chip, 0x800D340006010C3F + offset); + val = read_scom(chip, xbus_addr(0x800D340006010C3F + offset)); /* EDIP_TX_PSEG_PRE_EN (pre bank pseg enable) */ PPC_INSERT(val, convert_4r_with_2r(PRE_4R_TOTAL, PRE_WIDTH), 51, 5); /* EDIP_TX_PSEG_PRE_SEL (pre bank pseg mode selection) */ PPC_INSERT(val, convert_4r_with_2r(p.sel_pre, PRE_WIDTH), 56, 5); - put_scom(chip, 0x800D340006010C3F + offset, val); - val = get_scom(chip, 0x800D3C0006010C3F + offset); + write_scom(chip, xbus_addr(0x800D340006010C3F + offset), val); + val = read_scom(chip, xbus_addr(0x800D3C0006010C3F + offset)); /* EDIP_TX_NSEG_PRE_EN (pre bank nseg enable) */ PPC_INSERT(val, convert_4r_with_2r(PRE_4R_TOTAL, PRE_WIDTH), 51, 5); /* EDIP_TX_NSEG_PRE_SEL (pre bank nseg mode selection) */ PPC_INSERT(val, convert_4r_with_2r(n.sel_pre, PRE_WIDTH), 56, 5); - put_scom(chip, 0x800D3C0006010C3F + offset, val); - val = get_scom(chip, 0x800D440006010C3F + offset); + write_scom(chip, xbus_addr(0x800D3C0006010C3F + offset), val); + val = read_scom(chip, xbus_addr(0x800D440006010C3F + offset)); /* EDIP_TX_PSEG_MARGINPD_EN (margin pull-down bank pseg enable) */ PPC_INSERT(val, convert_4r(p.en_margin_pd), 56, 8); /* EDIP_TX_PSEG_MARGINPU_EN (margin pull-up bank pseg enable) */ PPC_INSERT(val, convert_4r(p.en_margin_pu), 48, 8); - put_scom(chip, 0x800D440006010C3F + offset, val); - val = get_scom(chip, 0x800D4C0006010C3F + offset); + write_scom(chip, xbus_addr(0x800D440006010C3F + offset), val); + val = read_scom(chip, xbus_addr(0x800D4C0006010C3F + offset)); /* EDIP_TX_NSEG_MARGINPD_EN (margin pull-down bank nseg enable) */ PPC_INSERT(val, convert_4r(n.en_margin_pd), 56, 8); /* EDIP_TX_NSEG_MARGINPU_EN (margin pull-up bank nseg enable) */ PPC_INSERT(val, convert_4r(n.en_margin_pu), 48, 8); - put_scom(chip, 0x800D4C0006010C3F + offset, val); - val = get_scom(chip, 0x800D540006010C3F + offset); + write_scom(chip, xbus_addr(0x800D4C0006010C3F + offset), val); + val = read_scom(chip, xbus_addr(0x800D540006010C3F + offset)); /* EDIP_TX_MARGINPD_SEL (margin pull-down bank mode selection) */ PPC_INSERT(val, convert_4r(sel_margin_pd), 56, 8); /* EDIP_TX_MARGINPU_SEL (margin pull-up bank mode selection) */ PPC_INSERT(val, convert_4r(sel_margin_pu), 48, 8); - put_scom(chip, 0x800D540006010C3F + offset, val); + write_scom(chip, xbus_addr(0x800D540006010C3F + offset), val); /* EDIP_TX_PSEG_MAIN_EN (main bank pseg enable) */ - val = get_scom(chip, 0x800D5C0006010C3F + offset); + val = read_scom(chip, xbus_addr(0x800D5C0006010C3F + offset)); PPC_INSERT(val, convert_4r_with_2r(p.en_main, 13), 51, 13); - put_scom(chip, 0x800D5C0006010C3F + offset, val); + write_scom(chip, xbus_addr(0x800D5C0006010C3F + offset), val); /* EDIP_TX_NSEG_MAIN_EN (main bank nseg enable) */ - val = get_scom(chip, 0x800D640006010C3F + offset); + val = read_scom(chip, xbus_addr(0x800D640006010C3F + offset)); PPC_INSERT(val, convert_4r_with_2r(n.en_main, 13), 51, 13); - put_scom(chip, 0x800D640006010C3F + offset, val); + write_scom(chip, xbus_addr(0x800D640006010C3F + offset), val); } static void config_run_bus_mode(uint8_t chip) @@ -176,15 +176,15 @@ static void config_run_bus_mode(uint8_t chip) long time; /* Set EDIP_TX_ZCAL_REQ to start Tx Impedance Calibration */ - or_scom(chip, P9A_XBUS_TX_IMPCAL_PB, PPC_BIT(49)); + scom_or(chip, xbus_addr(P9A_XBUS_TX_IMPCAL_PB), PPC_BIT(49)); mdelay(20); - time = wait_us(200 * 10, get_scom(chip, P9A_XBUS_TX_IMPCAL_PB) & + time = wait_us(200 * 10, read_scom(chip, xbus_addr(P9A_XBUS_TX_IMPCAL_PB)) & (PPC_BIT(EDIP_TX_ZCAL_DONE) | PPC_BIT(EDIP_TX_ZCAL_ERROR))); if (!time) die("Timed out waiting for I/O EDI+ Xbus Tx Z Calibration\n"); - if (get_scom(chip, P9A_XBUS_TX_IMPCAL_PB) & PPC_BIT(EDIP_TX_ZCAL_ERROR)) + if (read_scom(chip, xbus_addr(P9A_XBUS_TX_IMPCAL_PB)) & PPC_BIT(EDIP_TX_ZCAL_ERROR)) die("I/O EDI+ Xbus Tx Z Calibration failed\n"); config_run_bus_group_mode(chip, /*group=*/0); @@ -201,7 +201,8 @@ static void rx_dc_calibration_start(uint8_t chip, int group) for (int i = 0; i < XBUS_LANE_COUNT; i++) { uint64_t lane_offset = PPC_PLACE(i, 27, 5); /* EDIP_RX_LANE_INVALID */ - and_scom(chip, 0x8002400006010C3F | offset | lane_offset, ~PPC_BIT(50)); + scom_and(chip, xbus_addr(0x8002400006010C3F | offset | lane_offset), + ~PPC_BIT(50)); } /* Start Cleanup Pll */ @@ -212,7 +213,7 @@ static void rx_dc_calibration_start(uint8_t chip, int group) * 51 - EDIP_RX_PLL_REFCLKSEL_SCOM_EN (0 - pll controls selects refclk, * 1 - and gcr register does it) */ - or_scom(chip, 0x8009F80006010C3F + offset, PPC_BIT(50) | PPC_BIT(51)); + scom_or(chip, xbus_addr(0x8009F80006010C3F + offset), PPC_BIT(50) | PPC_BIT(51)); udelay(150); /* @@ -220,21 +221,21 @@ static void rx_dc_calibration_start(uint8_t chip, int group) * 48 - EDIP_RX_WT_CU_PLL_PGOOD (0 - places rx pll in reset, * 1 - sets pgood on rx pll for locking) */ - or_scom(chip, 0x8009F80006010C3F + offset, PPC_BIT(48)); + scom_or(chip, xbus_addr(0x8009F80006010C3F + offset), PPC_BIT(48)); udelay(5); /* * EDIP_RX_DC_CALIBRATE_DONE * (when this bit is read as a 1, the dc calibration steps have been completed) */ - and_scom(chip, 0x800A380006010C3F + offset, ~PPC_BIT(53)); + scom_and(chip, xbus_addr(0x800A380006010C3F + offset), ~PPC_BIT(53)); /* * EDIP_RX_START_DC_CALIBRATE * (when this register is written to a 1 the training state machine will run the dc * calibrate substeps defined in eye optimizations) */ - or_scom(chip, 0x8009F00006010C3F + offset, PPC_BIT(53)); + scom_or(chip, xbus_addr(0x8009F00006010C3F + offset), PPC_BIT(53)); } static void rx_dc_calibration_poll(uint8_t chip, int group) @@ -247,7 +248,8 @@ static void rx_dc_calibration_poll(uint8_t chip, int group) * EDIP_RX_DC_CALIBRATE_DONE * (when this bit is read as a 1, the dc calibration steps have been completed) */ - time = wait_ms(200 * 10, get_scom(chip, 0x800A380006010C3F + offset) & PPC_BIT(53)); + time = wait_ms(200 * 10, + read_scom(chip, xbus_addr(0x800A380006010C3F + offset)) & PPC_BIT(53)); if (!time) die("Timed out waiting for Rx Dc Calibration\n"); @@ -256,7 +258,7 @@ static void rx_dc_calibration_poll(uint8_t chip, int group) * (when this register is written to a 1 the training state machine will run the dc * calibrate substeps defined in eye optimizations) */ - and_scom(chip, 0x8009F00006010C3F + offset, ~PPC_BIT(53)); + scom_and(chip, xbus_addr(0x8009F00006010C3F + offset), ~PPC_BIT(53)); /* * EDIP_RX_CTL_CNTL4_E_PG @@ -266,14 +268,16 @@ static void rx_dc_calibration_poll(uint8_t chip, int group) * 51 - EDIP_RX_PLL_REFCLKSEL_SCOM_EN (0 - pll controls selects refclk, * 1 - and gcr register does it) */ - and_scom(chip, 0x8009F80006010C3F + offset, ~(PPC_BIT(48) | PPC_BIT(50) | PPC_BIT(51))); + scom_and(chip, xbus_addr(0x8009F80006010C3F + offset), + ~(PPC_BIT(48) | PPC_BIT(50) | PPC_BIT(51))); udelay(111); /* Restore the invalid bits, Wiretest will modify these as training is run */ for (int i = 0; i < XBUS_LANE_COUNT; i++) { uint64_t lane_offset = PPC_PLACE(i, 27, 5); /* EDIP_RX_LANE_INVALID */ - or_scom(chip, 0x8002400006010C3F | offset | lane_offset, PPC_BIT(50)); + scom_or(chip, xbus_addr(0x8002400006010C3F | offset | lane_offset), + PPC_BIT(50)); } } diff --git a/src/soc/ibm/power9/istep_9_4.c b/src/soc/ibm/power9/istep_9_4.c index 23e9209f31b..47ce3611fc1 100644 --- a/src/soc/ibm/power9/istep_9_4.c +++ b/src/soc/ibm/power9/istep_9_4.c @@ -18,24 +18,24 @@ static void tx_serializer_sync_power_on(uint8_t master_chip, uint8_t slave_chip, * it should not be necessary to use the sync logic on the clock slice * since it has no fifo but control is available just in case) */ - and_scom(master_chip, 0x800C1C0006010C3F + offset, ~PPC_BIT(50)); - and_scom(slave_chip, 0x800C1C0006010C3F + offset, ~PPC_BIT(50)); + scom_and(master_chip, xbus_addr(0x800C1C0006010C3F + offset), ~PPC_BIT(50)); + scom_and(slave_chip, xbus_addr(0x800C1C0006010C3F + offset), ~PPC_BIT(50)); /* * EDIP_TX_CLK_RUN_COUNT * (set to 1 to enable the tx clock slice serializer; this should be * enabled at all times but control is available just in case) */ - and_scom(master_chip, 0x800C1C0006010C3F + offset, ~PPC_BIT(51)); - and_scom(slave_chip, 0x800C1C0006010C3F + offset, ~PPC_BIT(51)); + scom_and(master_chip, xbus_addr(0x800C1C0006010C3F + offset), ~PPC_BIT(51)); + scom_and(slave_chip, xbus_addr(0x800C1C0006010C3F + offset), ~PPC_BIT(51)); /* EDIP_TX_CLK_RUN_COUNT (see above) */ - or_scom(master_chip, 0x800C1C0006010C3F + offset, PPC_BIT(51)); - or_scom(slave_chip, 0x800C1C0006010C3F + offset, PPC_BIT(51)); + scom_or(master_chip, xbus_addr(0x800C1C0006010C3F + offset), PPC_BIT(51)); + scom_or(slave_chip, xbus_addr(0x800C1C0006010C3F + offset), PPC_BIT(51)); /* EDIP_TX_CLK_UNLOAD_CLK_DISABLE (see above) */ - or_scom(master_chip, 0x800C1C0006010C3F + offset, PPC_BIT(50)); - or_scom(slave_chip, 0x800C1C0006010C3F + offset, PPC_BIT(50)); + scom_or(master_chip, xbus_addr(0x800C1C0006010C3F + offset), PPC_BIT(50)); + scom_or(slave_chip, xbus_addr(0x800C1C0006010C3F + offset), PPC_BIT(50)); for (int i = 0; i < XBUS_LANE_COUNT; ++i) { uint64_t lane_offset = PPC_PLACE(i, 27, 5); @@ -44,8 +44,10 @@ static void tx_serializer_sync_power_on(uint8_t master_chip, uint8_t slave_chip, * (set to 0 to enable sync of tx custom serializer via tx_fifo_init register, * set to 1 to clock off sync logic and save power) */ - and_scom(master_chip, 0x80040C0006010C3F | offset | lane_offset, ~PPC_BIT(56)); - and_scom(slave_chip, 0x80040C0006010C3F | offset | lane_offset, ~PPC_BIT(56)); + scom_and(master_chip, xbus_addr(0x80040C0006010C3F | offset | lane_offset), + ~PPC_BIT(56)); + scom_and(slave_chip, xbus_addr(0x80040C0006010C3F | offset | lane_offset), + ~PPC_BIT(56)); } } @@ -81,9 +83,9 @@ static void xbus_linktrain(uint8_t master_chip, uint8_t slave_chip, int group) * EDIP_RX_START_WDERF_ALIAS (alias for rx_start_* bits) * Slave training must start first. */ - and_or_scom(slave_chip, 0x8009F00006010C3F + offset, + scom_and_or(slave_chip, xbus_addr(0x8009F00006010C3F + offset), ~PPC_BITMASK(48, 52), PPC_PLACE(WDERF, 48, 5)); - and_or_scom(master_chip, 0x8009F00006010C3F + offset, + scom_and_or(master_chip, xbus_addr(0x8009F00006010C3F + offset), ~PPC_BITMASK(48, 52), PPC_PLACE(WDERF, 48, 5)); /* @@ -91,7 +93,7 @@ static void xbus_linktrain(uint8_t master_chip, uint8_t slave_chip, int group) * 56-60 EDIP_RX_WDERF_FAILED_ALIAS (alias for rx_*_failed bits) */ wait_ms(100 * 1, - (tmp = get_scom(master_chip, 0x800A380006010C3F + offset), + (tmp = read_scom(master_chip, xbus_addr(0x800A380006010C3F + offset)), (((tmp >> 11) & 0x1F) == WDERF || ((tmp >> 3) & 0x1F) != 0))); if (((tmp >> 3) & 0x1F) != 0) die("I/O EDI+ Xbus link training failed.\n"); diff --git a/src/soc/ibm/power9/istep_9_6.c b/src/soc/ibm/power9/istep_9_6.c index b21f0037749..f552c5419eb 100644 --- a/src/soc/ibm/power9/istep_9_6.c +++ b/src/soc/ibm/power9/istep_9_6.c @@ -19,7 +19,7 @@ static void smp_link_layer(uint8_t chip) /* Hostboot uses PUTSCOMMASK operation of SBE IO. Assuming that it's * equivalent to a RMW sequence. */ - or_scom(chip, XBUS_LL1_IOEL_CONTROL, + scom_or(chip, xbus_addr(XBUS_LL1_IOEL_CONTROL), PPC_BIT(XBUS_LL0_IOEL_CONTROL_LINK0_STARTUP) | PPC_BIT(XBUS_LL0_IOEL_CONTROL_LINK1_STARTUP)); } diff --git a/src/soc/ibm/power9/istep_9_7.c b/src/soc/ibm/power9/istep_9_7.c index af068e7fc50..0061f3e1fa8 100644 --- a/src/soc/ibm/power9/istep_9_7.c +++ b/src/soc/ibm/power9/istep_9_7.c @@ -23,7 +23,7 @@ static void p9_fab_iovalid_link_validate(uint8_t chip) /* Only OBus seems to be retrained, so this XBus-only code is * much simpler compared to corresponding code in Hostboot */ - uint64_t dl_fir_reg = get_scom(chip, XBUS_LL1_IOEL_FIR_REG); + uint64_t dl_fir_reg = read_scom(chip, xbus_addr(XBUS_LL1_IOEL_FIR_REG)); bool dl_trained = (dl_fir_reg & PPC_BIT(DL_FIR_LINK0_TRAINED_BIT)) && (dl_fir_reg & PPC_BIT(DL_FIR_LINK1_TRAINED_BIT)); @@ -57,23 +57,23 @@ static void p9_fab_iovalid(uint8_t chip) p9_fab_iovalid_link_validate(chip); /* Clear RAS FIR mask for link if not already set up by SBE */ - fbc_cent_fir_data = get_scom(chip, PU_PB_CENT_SM0_PB_CENT_FIR_REG); + fbc_cent_fir_data = read_scom(chip, PU_PB_CENT_SM0_PB_CENT_FIR_REG); if (!(fbc_cent_fir_data & PPC_BIT(PU_PB_CENT_SM0_PB_CENT_FIR_MASK_REG_SPARE_13))) { - and_scom(chip, PU_PB_CENT_SM1_EXTFIR_ACTION0_REG, + scom_and(chip, PU_PB_CENT_SM1_EXTFIR_ACTION0_REG, ~PPC_BIT(PERV_CPLT_CONF1_IOVALID_6D)); - and_scom(chip, PU_PB_CENT_SM1_EXTFIR_ACTION1_REG, - ~PPC_BIT(PERV_CPLT_CONF1_IOVALID_6D)); - put_scom(chip, PU_PB_CENT_SM1_EXTFIR_MASK_REG_AND, + scom_and(chip, PU_PB_CENT_SM1_EXTFIR_ACTION1_REG, ~PPC_BIT(PERV_CPLT_CONF1_IOVALID_6D)); + write_scom(chip, PU_PB_CENT_SM1_EXTFIR_MASK_REG_AND, + ~PPC_BIT(PERV_CPLT_CONF1_IOVALID_6D)); } /* * Use AND/OR mask registers to atomically update link specific fields * in iovalid control register. */ - put_scom(chip, PERV_XB_CPLT_CONF1_OR, - PPC_BIT(PERV_CPLT_CONF1_IOVALID_6D) | - PPC_BIT(PERV_CPLT_CONF1_IOVALID_6D + 1)); + write_scom(chip, xbus_addr(PERV_XB_CPLT_CONF1_OR), + PPC_BIT(PERV_CPLT_CONF1_IOVALID_6D) | + PPC_BIT(PERV_CPLT_CONF1_IOVALID_6D + 1)); } void istep_9_7(uint8_t chips) diff --git a/src/soc/ibm/power9/xbus.c b/src/soc/ibm/power9/xbus.c deleted file mode 100644 index be53808de18..00000000000 --- a/src/soc/ibm/power9/xbus.c +++ /dev/null @@ -1,47 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ - -#include "xbus.h" - -#include -#include - -/* Updates address that targets XBus chiplet to use specified XBus link number. - * Does nothing to non-XBus addresses. */ -static uint64_t xbus_addr(uint8_t xbus, uint64_t addr) -{ - enum { - XBUS_COUNT = 0x3, // number of XBus links - XB_IOX_0_RING_ID = 0x3, // IOX_0 - XB_PBIOX_0_RING_ID = 0x6, // PBIOX_0 - }; - - uint8_t ring = (addr >> 10) & 0xF; - uint8_t chiplet = (addr >> 24) & 0x3F; - - if (chiplet != XB_CHIPLET_ID) - return addr; - - if (ring >= XB_IOX_0_RING_ID && ring < XB_IOX_0_RING_ID + XBUS_COUNT) - ring = XB_IOX_0_RING_ID + xbus; - else if (ring >= XB_PBIOX_0_RING_ID && ring < XB_PBIOX_0_RING_ID + XBUS_COUNT) - ring = XB_PBIOX_0_RING_ID + xbus; - - addr &= ~PPC_BITMASK(50, 53); - addr |= PPC_PLACE(ring, 50, 4); - - return addr; -} - -void put_scom(uint8_t chip, uint64_t addr, uint64_t data) -{ - addr = xbus_addr(/*xbus=*/1, addr); - - write_scom(chip, addr, data); -} - -uint64_t get_scom(uint8_t chip, uint64_t addr) -{ - addr = xbus_addr(/*xbus=*/1, addr); - - return read_scom(chip, addr); -} diff --git a/src/soc/ibm/power9/xbus.h b/src/soc/ibm/power9/xbus.h index 6455b95fcfa..77cf87b694d 100644 --- a/src/soc/ibm/power9/xbus.h +++ b/src/soc/ibm/power9/xbus.h @@ -3,73 +3,36 @@ #ifndef __SOC_IBM_POWER9_XBUS_H #define __SOC_IBM_POWER9_XBUS_H +#include #include -/* - * Define DEBUG_XBUS before including this header to get debug prints from this - * unit - */ - #define XBUS_LANE_COUNT 17 #define XBUS_LINK_GROUP_OFFSET 0x2000000000 -/* - * The API below is meant to be used after SBE for the second CPU is up (so - * after istep 8.4), but prior to XSCOM working for it, which covers range of - * isteps that initialize XBus and SMP. - * - * The functions use XSCOM for the first CPU and SBE IO for the second one. When - * SCOM address targets XBus chiplet, ring part of the address is updated to - * XBus link #1 if necessary (addresses in code use link #0, which also matches - * Hostboot logs). - * - * No need to use this interface once Powerbus is activated (after istep 10.1) - * and XSCOM can access SCOMs on both CPUs. - */ - -void put_scom(uint8_t chip, uint64_t addr, uint64_t data); -uint64_t get_scom(uint8_t chip, uint64_t addr); - -#ifdef DEBUG_XBUS -#include +/* Updates address that targets XBus chiplet to use a specific XBus link number. + * Does nothing to non-XBus addresses. */ +static inline uint64_t xbus_addr(uint64_t addr) +{ + enum { + XBUS_COUNT = 0x3, // number of XBus links + XBUS_LINK = 0x1, // hard-coded link number + XB_IOX_0_RING_ID = 0x3, // IOX_0 + XB_PBIOX_0_RING_ID = 0x6, // PBIOX_0 + }; -#define put_scom(c, x, y) \ -({ \ - uint8_t __cw = c; \ - uint64_t __xw = x; \ - uint64_t __yw = y; \ - printk(BIOS_EMERG, "PUTSCOM %d %016llX %016llX\n", __cw, __xw, __yw); \ - put_scom(__cw, __xw, __yw); \ -}) + uint8_t ring = (addr >> 10) & 0xF; + uint8_t chiplet = (addr >> 24) & 0x3F; -#define get_scom(c, x) \ -({ \ - uint8_t __cr = c; \ - uint64_t __xr = x; \ - uint64_t __yr = get_scom(__cr, __xr); \ - printk(BIOS_EMERG, "GETSCOM %d %016llX %016llX\n", __cr, __xr, __yr); \ - __yr; \ -}) + if (chiplet != XB_CHIPLET_ID) + return addr; -#endif + if (ring >= XB_IOX_0_RING_ID && ring < XB_IOX_0_RING_ID + XBUS_COUNT) + PPC_INSERT(addr, XB_IOX_0_RING_ID + XBUS_LINK, 50, 4); + else if (ring >= XB_PBIOX_0_RING_ID && ring < XB_PBIOX_0_RING_ID + XBUS_COUNT) + PPC_INSERT(addr, XB_PBIOX_0_RING_ID + XBUS_LINK, 50, 4); -static inline void and_scom(uint8_t chip, uint64_t addr, uint64_t mask) -{ - put_scom(chip, addr, get_scom(chip, addr) & mask); -} - -static inline void or_scom(uint8_t chip, uint64_t addr, uint64_t mask) -{ - put_scom(chip, addr, get_scom(chip, addr) | mask); -} - -static inline void and_or_scom(uint8_t chip, uint64_t addr, uint64_t and, uint64_t or) -{ - uint64_t data = get_scom(chip, addr); - data &= and; - data |= or; - put_scom(chip, addr, data); + return addr; } #endif /* __SOC_IBM_POWER9_XBUS_H */ From 870b2a57150f20b02a5586178c7d939f1841ca44 Mon Sep 17 00:00:00 2001 From: Kacper Stojek Date: Thu, 14 Jul 2022 12:11:13 +0200 Subject: [PATCH 188/213] soc/power9/: move istep logs to report_istep() I've moved all start istep messages to report_istep() and removed end istep messages, also changed lof level to INFO. Change-Id: Ifb3180afe2942ccd23429dbd1fa141b74e3b2129 Signed-off-by: Kacper Stojek --- src/arch/ppc64/include/arch/io.h | 2 ++ src/soc/ibm/power9/istep_10_1.c | 3 --- src/soc/ibm/power9/istep_10_10.c | 3 --- src/soc/ibm/power9/istep_10_12.c | 3 --- src/soc/ibm/power9/istep_10_13.c | 4 ---- src/soc/ibm/power9/istep_10_6.c | 3 --- src/soc/ibm/power9/istep_13_10.c | 3 --- src/soc/ibm/power9/istep_13_11.c | 3 --- src/soc/ibm/power9/istep_13_13.c | 3 --- src/soc/ibm/power9/istep_13_2.c | 3 --- src/soc/ibm/power9/istep_13_3.c | 3 --- src/soc/ibm/power9/istep_13_4.c | 3 --- src/soc/ibm/power9/istep_13_6.c | 3 --- src/soc/ibm/power9/istep_13_8.c | 3 --- src/soc/ibm/power9/istep_13_9.c | 3 --- src/soc/ibm/power9/istep_14_1.c | 4 ---- src/soc/ibm/power9/istep_14_2.c | 3 --- src/soc/ibm/power9/istep_14_3.c | 3 --- src/soc/ibm/power9/istep_14_5.c | 3 --- src/soc/ibm/power9/istep_18_11.c | 3 --- src/soc/ibm/power9/istep_18_12.c | 2 -- src/soc/ibm/power9/istep_8_1.c | 3 --- src/soc/ibm/power9/istep_8_10.c | 3 --- src/soc/ibm/power9/istep_8_11.c | 3 --- src/soc/ibm/power9/istep_8_2.c | 3 --- src/soc/ibm/power9/istep_8_3.c | 3 --- src/soc/ibm/power9/istep_8_4.c | 3 --- src/soc/ibm/power9/istep_8_9.c | 3 --- src/soc/ibm/power9/istep_9_2.c | 3 --- src/soc/ibm/power9/istep_9_4.c | 3 --- src/soc/ibm/power9/istep_9_6.c | 3 --- src/soc/ibm/power9/istep_9_7.c | 3 --- 32 files changed, 2 insertions(+), 94 deletions(-) diff --git a/src/arch/ppc64/include/arch/io.h b/src/arch/ppc64/include/arch/io.h index 5c65d75e67b..5c0ea4c0e15 100644 --- a/src/arch/ppc64/include/arch/io.h +++ b/src/arch/ppc64/include/arch/io.h @@ -4,6 +4,7 @@ #define _ASM_IO_H #include +#include /* Set MSB to 1 to ignore HRMOR */ #define MMIO_GROUP0_CHIP0_LPC_BASE_ADDR 0x8006030000000000 @@ -64,6 +65,7 @@ static inline uint32_t inl(uint16_t port) static inline void report_istep(uint8_t step, uint8_t substep) { + printk(BIOS_INFO, "starting istep %d.%d\n", step, substep); outb(step, 0x81); outb(substep, 0x82); } diff --git a/src/soc/ibm/power9/istep_10_1.c b/src/soc/ibm/power9/istep_10_1.c index 7bcb18dc3db..e88ddddc33f 100644 --- a/src/soc/ibm/power9/istep_10_1.c +++ b/src/soc/ibm/power9/istep_10_1.c @@ -760,7 +760,6 @@ static void p9_build_smp(uint8_t chips) void istep_10_1(uint8_t chips) { - printk(BIOS_EMERG, "starting istep 10.1\n"); report_istep(10,1); p9_build_smp(chips); @@ -774,6 +773,4 @@ void istep_10_1(uint8_t chips) fsi_reset_pib2opb(/*chip=*/1); } - - printk(BIOS_EMERG, "ending istep 10.1\n"); } diff --git a/src/soc/ibm/power9/istep_10_10.c b/src/soc/ibm/power9/istep_10_10.c index d671bff94bb..fa966726078 100644 --- a/src/soc/ibm/power9/istep_10_10.c +++ b/src/soc/ibm/power9/istep_10_10.c @@ -603,7 +603,6 @@ static void phase1(uint8_t chip, const struct lane_config_row **pec_cfgs, void istep_10_10(uint8_t chips, struct pci_info *pci_info) { - printk(BIOS_EMERG, "starting istep 10.10\n"); report_istep(10, 10); for (uint8_t chip = 0; chip < MAX_CHIPS; chip++) { @@ -620,6 +619,4 @@ void istep_10_10(uint8_t chips, struct pci_info *pci_info) phase1(chip, pec_cfgs, pci_info[chip].iovalid_enable); } - - printk(BIOS_EMERG, "ending istep 10.10\n"); } diff --git a/src/soc/ibm/power9/istep_10_12.c b/src/soc/ibm/power9/istep_10_12.c index dfdae731e04..8fe939bf92e 100644 --- a/src/soc/ibm/power9/istep_10_12.c +++ b/src/soc/ibm/power9/istep_10_12.c @@ -35,13 +35,10 @@ static void enable_ridi(uint8_t chip) void istep_10_12(uint8_t chips) { - printk(BIOS_EMERG, "starting istep 10.12\n"); report_istep(10, 12); for (uint8_t chip = 0; chip < MAX_CHIPS; chip++) { if (chips & (1 << chip)) enable_ridi(chip); } - - printk(BIOS_EMERG, "ending istep 10.12\n"); } diff --git a/src/soc/ibm/power9/istep_10_13.c b/src/soc/ibm/power9/istep_10_13.c index d8909a3bcd1..e961697ace7 100644 --- a/src/soc/ibm/power9/istep_10_13.c +++ b/src/soc/ibm/power9/istep_10_13.c @@ -88,14 +88,10 @@ void istep_10_13(uint8_t chips) { uint8_t chip; - printk(BIOS_EMERG, "starting istep 10.13\n"); - report_istep(10, 13); for (chip = 0; chip < MAX_CHIPS; chip++) { if (chips & (1 << chip)) host_rng_bist(chip); } - - printk(BIOS_EMERG, "ending istep 10.13\n"); } diff --git a/src/soc/ibm/power9/istep_10_6.c b/src/soc/ibm/power9/istep_10_6.c index 4458e8faf40..d67e33f6e5b 100644 --- a/src/soc/ibm/power9/istep_10_6.c +++ b/src/soc/ibm/power9/istep_10_6.c @@ -410,7 +410,6 @@ void istep_10_6(uint8_t chips) { uint8_t dd = get_dd(); // XXX: this should probably be chip-specific - printk(BIOS_EMERG, "starting istep 10.6\n"); report_istep(10, 6); for (uint8_t chip = 0; chip < MAX_CHIPS; chip++) { @@ -419,6 +418,4 @@ void istep_10_6(uint8_t chips) psi_scom(chip); } } - - printk(BIOS_EMERG, "ending istep 10.6\n"); } diff --git a/src/soc/ibm/power9/istep_13_10.c b/src/soc/ibm/power9/istep_13_10.c index 85bee5f715a..af9d6e0d0fa 100644 --- a/src/soc/ibm/power9/istep_13_10.c +++ b/src/soc/ibm/power9/istep_13_10.c @@ -530,13 +530,10 @@ void istep_13_10(uint8_t chips) { uint8_t chip; - printk(BIOS_EMERG, "starting istep 13.10\n"); report_istep(13, 10); for (chip = 0; chip < MAX_CHIPS; chip++) { if (chips & (1 << chip)) mss_draminit(chip); } - - printk(BIOS_EMERG, "ending istep 13.10\n"); } diff --git a/src/soc/ibm/power9/istep_13_11.c b/src/soc/ibm/power9/istep_13_11.c index 020b9a83def..397657fb661 100644 --- a/src/soc/ibm/power9/istep_13_11.c +++ b/src/soc/ibm/power9/istep_13_11.c @@ -1364,13 +1364,10 @@ void istep_13_11(uint8_t chips) { uint8_t chip; - printk(BIOS_EMERG, "starting istep 13.11\n"); report_istep(13, 11); for (chip = 0; chip < MAX_CHIPS; chip++) { if (chips & (1 << chip)) mss_draminit_training(chip); } - - printk(BIOS_EMERG, "ending istep 13.11\n"); } diff --git a/src/soc/ibm/power9/istep_13_13.c b/src/soc/ibm/power9/istep_13_13.c index 7a70be8421a..80e041338bb 100644 --- a/src/soc/ibm/power9/istep_13_13.c +++ b/src/soc/ibm/power9/istep_13_13.c @@ -719,13 +719,10 @@ void istep_13_13(uint8_t chips) { uint8_t chip; - printk(BIOS_EMERG, "starting istep 13.13\n"); report_istep(13, 13); for (chip = 0; chip < MAX_CHIPS; chip++) { if (chips & (1 << chip)) mss_draminit_mc(chip); } - - printk(BIOS_EMERG, "ending istep 13.13\n"); } diff --git a/src/soc/ibm/power9/istep_13_2.c b/src/soc/ibm/power9/istep_13_2.c index 8dd8bfdc845..ccccf13f95f 100644 --- a/src/soc/ibm/power9/istep_13_2.c +++ b/src/soc/ibm/power9/istep_13_2.c @@ -172,7 +172,6 @@ void istep_13_2(uint8_t chips) { uint8_t chip; - printk(BIOS_EMERG, "starting istep 13.2\n"); report_istep(13, 2); /* Assuming MC doesn't run in sync mode with Fabric, otherwise this is no-op */ @@ -181,6 +180,4 @@ void istep_13_2(uint8_t chips) if (chips & (1 << chip)) mem_pll_reset(chip); } - - printk(BIOS_EMERG, "ending istep 13.2\n"); } diff --git a/src/soc/ibm/power9/istep_13_3.c b/src/soc/ibm/power9/istep_13_3.c index eeb6ef20705..f2debcf829f 100644 --- a/src/soc/ibm/power9/istep_13_3.c +++ b/src/soc/ibm/power9/istep_13_3.c @@ -127,13 +127,10 @@ void istep_13_3(uint8_t chips) { uint8_t chip; - printk(BIOS_EMERG, "starting istep 13.3\n"); report_istep(13, 3); for (chip = 0; chip < MAX_CHIPS; chip++) { if (chips & (1 << chip)) mem_pll_initf(chip); } - - printk(BIOS_EMERG, "ending istep 13.3\n"); } diff --git a/src/soc/ibm/power9/istep_13_4.c b/src/soc/ibm/power9/istep_13_4.c index 191c42e3dd3..e920e3c62ff 100644 --- a/src/soc/ibm/power9/istep_13_4.c +++ b/src/soc/ibm/power9/istep_13_4.c @@ -121,7 +121,6 @@ void istep_13_4(uint8_t chips) { uint8_t chip; - printk(BIOS_EMERG, "starting istep 13.4\n"); report_istep(13, 4); /* Assuming MC doesn't run in sync mode with Fabric, otherwise this is no-op */ @@ -130,6 +129,4 @@ void istep_13_4(uint8_t chips) if (chips & (1 << chip)) mem_pll_setup(chip); } - - printk(BIOS_EMERG, "ending istep 13.4\n"); } diff --git a/src/soc/ibm/power9/istep_13_6.c b/src/soc/ibm/power9/istep_13_6.c index a3dd602560b..55eaef3bcd7 100644 --- a/src/soc/ibm/power9/istep_13_6.c +++ b/src/soc/ibm/power9/istep_13_6.c @@ -324,7 +324,6 @@ void istep_13_6(uint8_t chips) { uint8_t chip; - printk(BIOS_EMERG, "starting istep 13.6\n"); report_istep(13, 6); /* Assuming MC doesn't run in sync mode with Fabric, otherwise this is no-op */ @@ -333,6 +332,4 @@ void istep_13_6(uint8_t chips) if (chips & (1 << chip)) mem_startclocks(chip); } - - printk(BIOS_EMERG, "ending istep 13.6\n"); } diff --git a/src/soc/ibm/power9/istep_13_8.c b/src/soc/ibm/power9/istep_13_8.c index b123ae4a5fb..3d23c6d1b42 100644 --- a/src/soc/ibm/power9/istep_13_8.c +++ b/src/soc/ibm/power9/istep_13_8.c @@ -2400,13 +2400,10 @@ void istep_13_8(uint8_t chips) { uint8_t chip; - printk(BIOS_EMERG, "starting istep 13.8\n"); report_istep(13, 8); for (chip = 0; chip < MAX_CHIPS; chip++) { if (chips & (1 << chip)) mss_scominit(chip); } - - printk(BIOS_EMERG, "ending istep 13.8\n"); } diff --git a/src/soc/ibm/power9/istep_13_9.c b/src/soc/ibm/power9/istep_13_9.c index 07e9ba3e2a1..0c4f4859612 100644 --- a/src/soc/ibm/power9/istep_13_9.c +++ b/src/soc/ibm/power9/istep_13_9.c @@ -805,13 +805,10 @@ void istep_13_9(uint8_t chips) { uint8_t chip; - printk(BIOS_EMERG, "starting istep 13.9\n"); report_istep(13, 9); for (chip = 0; chip < MAX_CHIPS; chip++) { if (chips & (1 << chip)) mss_ddr_phy_reset(chip); } - - printk(BIOS_EMERG, "ending istep 13.9\n"); } diff --git a/src/soc/ibm/power9/istep_14_1.c b/src/soc/ibm/power9/istep_14_1.c index f91538c2c03..d6f8c72cf11 100644 --- a/src/soc/ibm/power9/istep_14_1.c +++ b/src/soc/ibm/power9/istep_14_1.c @@ -539,10 +539,6 @@ static void mss_memdiag(uint8_t chips) void istep_14_1(uint8_t chips) { - printk(BIOS_EMERG, "starting istep 14.1\n"); report_istep(14, 1); - mss_memdiag(chips); - - printk(BIOS_EMERG, "ending istep 14.1\n"); } diff --git a/src/soc/ibm/power9/istep_14_2.c b/src/soc/ibm/power9/istep_14_2.c index bb73c2adece..c929a2b461d 100644 --- a/src/soc/ibm/power9/istep_14_2.c +++ b/src/soc/ibm/power9/istep_14_2.c @@ -56,7 +56,6 @@ void istep_14_2(uint8_t chips) uint8_t chip; report_istep(14, 2); - printk(BIOS_EMERG, "starting istep 14.2\n"); for (chip = 0; chip < MAX_CHIPS; chip++) { if (chips & (1 << chip)) { @@ -64,6 +63,4 @@ void istep_14_2(uint8_t chips) throttle_sync(chip); } } - - printk(BIOS_EMERG, "ending istep 14.2\n"); } diff --git a/src/soc/ibm/power9/istep_14_3.c b/src/soc/ibm/power9/istep_14_3.c index 81c1f7d0e64..53577cf5447 100644 --- a/src/soc/ibm/power9/istep_14_3.c +++ b/src/soc/ibm/power9/istep_14_3.c @@ -503,7 +503,6 @@ static void init_phbs(uint8_t chip, uint8_t phb_active_mask, const uint8_t *iova void istep_14_3(uint8_t chips, const struct pci_info *pci_info) { - printk(BIOS_EMERG, "starting istep 14.3\n"); report_istep(14, 3); for (uint8_t chip = 0; chip < MAX_CHIPS; chip++) { @@ -513,6 +512,4 @@ void istep_14_3(uint8_t chips, const struct pci_info *pci_info) init_pecs(chip, pci_info[chip].iovalid_enable); init_phbs(chip, pci_info[chip].phb_active_mask, pci_info[chip].iovalid_enable); } - - printk(BIOS_EMERG, "ending istep 14.3\n"); } diff --git a/src/soc/ibm/power9/istep_14_5.c b/src/soc/ibm/power9/istep_14_5.c index a555ee00eba..805e3fa5d83 100644 --- a/src/soc/ibm/power9/istep_14_5.c +++ b/src/soc/ibm/power9/istep_14_5.c @@ -338,7 +338,6 @@ void istep_14_5(uint8_t chips) { uint8_t chip; - printk(BIOS_EMERG, "starting istep 14.5\n"); report_istep(14, 5); /* Start MCS reset */ @@ -348,6 +347,4 @@ void istep_14_5(uint8_t chips) if (chips & (1 << chip)) proc_setup_bars(chip); } - - printk(BIOS_EMERG, "ending istep 14.5\n"); } diff --git a/src/soc/ibm/power9/istep_18_11.c b/src/soc/ibm/power9/istep_18_11.c index c71477817da..98f392066cb 100644 --- a/src/soc/ibm/power9/istep_18_11.c +++ b/src/soc/ibm/power9/istep_18_11.c @@ -439,7 +439,6 @@ void istep_18_11(uint8_t chips, uint8_t *mdmt) uint8_t sec_mdmt; uint8_t chip; - printk(BIOS_EMERG, "starting istep 18.11\n"); report_istep(18, 11); if (chips != 0x01 && chips != 0x03) @@ -462,6 +461,4 @@ void istep_18_11(uint8_t chips, uint8_t *mdmt) if (chips & (1 << chip)) configure_tod(chip, chips, pri_mdmt, sec_mdmt); } - - printk(BIOS_EMERG, "ending istep 18.11\n"); } diff --git a/src/soc/ibm/power9/istep_18_12.c b/src/soc/ibm/power9/istep_18_12.c index efe41f9248a..c58c4725d85 100644 --- a/src/soc/ibm/power9/istep_18_12.c +++ b/src/soc/ibm/power9/istep_18_12.c @@ -138,8 +138,6 @@ static void init_tod_node(uint8_t chips, uint8_t mdmt) void istep_18_12(uint8_t chips, uint8_t mdmt) { - printk(BIOS_EMERG, "starting istep 18.12\n"); report_istep(18, 12); init_tod_node(chips, mdmt); - printk(BIOS_EMERG, "ending istep 18.12\n"); } diff --git a/src/soc/ibm/power9/istep_8_1.c b/src/soc/ibm/power9/istep_8_1.c index eca0f0c54aa..1cd0c25d70d 100644 --- a/src/soc/ibm/power9/istep_8_1.c +++ b/src/soc/ibm/power9/istep_8_1.c @@ -303,7 +303,6 @@ void istep_8_1(uint8_t chips) { int boot_seeprom_side; - printk(BIOS_EMERG, "starting istep 8.1\n"); report_istep(8, 1); boot_seeprom_side = get_master_sbe_boot_seeprom(); @@ -315,6 +314,4 @@ void istep_8_1(uint8_t chips) set_sbe_boot_seeprom(chip, boot_seeprom_side); } } - - printk(BIOS_EMERG, "ending istep 8.1\n"); } diff --git a/src/soc/ibm/power9/istep_8_10.c b/src/soc/ibm/power9/istep_8_10.c index 6332ea9a7e1..d581042052c 100644 --- a/src/soc/ibm/power9/istep_8_10.c +++ b/src/soc/ibm/power9/istep_8_10.c @@ -367,13 +367,10 @@ static void xbus_scominit(int group) void istep_8_10(uint8_t chips) { - printk(BIOS_EMERG, "starting istep 8.10\n"); report_istep(8,10); if (chips != 0x01) { xbus_scominit(/*group=*/0); xbus_scominit(/*group=*/1); } - - printk(BIOS_EMERG, "ending istep 8.10\n"); } diff --git a/src/soc/ibm/power9/istep_8_11.c b/src/soc/ibm/power9/istep_8_11.c index 9fc17454c76..08ffeb782d8 100644 --- a/src/soc/ibm/power9/istep_8_11.c +++ b/src/soc/ibm/power9/istep_8_11.c @@ -30,13 +30,10 @@ void istep_8_11(uint8_t chips) { uint8_t chip; - printk(BIOS_EMERG, "starting istep 8.11\n"); report_istep(8,11); for (chip = 0; chip < MAX_CHIPS; chip++) { if (chips & (1 << chip)) xbus_enable_ridi(chip); } - - printk(BIOS_EMERG, "ending istep 8.11\n"); } diff --git a/src/soc/ibm/power9/istep_8_2.c b/src/soc/ibm/power9/istep_8_2.c index ee7b16d5350..51a4e6eb740 100644 --- a/src/soc/ibm/power9/istep_8_2.c +++ b/src/soc/ibm/power9/istep_8_2.c @@ -36,7 +36,6 @@ static void set_fsi_gp_shadow(uint8_t chip) void istep_8_2(uint8_t chips) { - printk(BIOS_EMERG, "starting istep 8.2\n"); report_istep(8, 2); /* Skipping master chip */ @@ -44,6 +43,4 @@ void istep_8_2(uint8_t chips) if (chips & (1 << chip)) set_fsi_gp_shadow(chip); } - - printk(BIOS_EMERG, "ending istep 8.2\n"); } diff --git a/src/soc/ibm/power9/istep_8_3.c b/src/soc/ibm/power9/istep_8_3.c index 6836374cf28..2cfcb871280 100644 --- a/src/soc/ibm/power9/istep_8_3.c +++ b/src/soc/ibm/power9/istep_8_3.c @@ -126,7 +126,6 @@ static void start_cbs(uint8_t chip) void istep_8_3(uint8_t chips) { - printk(BIOS_EMERG, "starting istep 8.3\n"); report_istep(8, 3); /* Skipping master chip */ @@ -141,6 +140,4 @@ void istep_8_3(uint8_t chips) start_cbs(chip); } } - - printk(BIOS_EMERG, "ending istep 8.3\n"); } diff --git a/src/soc/ibm/power9/istep_8_4.c b/src/soc/ibm/power9/istep_8_4.c index 5182153888d..545120b0abb 100644 --- a/src/soc/ibm/power9/istep_8_4.c +++ b/src/soc/ibm/power9/istep_8_4.c @@ -98,7 +98,6 @@ static bool sbe_run_extract_msg_reg(uint8_t chip) void istep_8_4(uint8_t chips) { - printk(BIOS_EMERG, "starting istep 8.4\n"); report_istep(8, 4); /* Skipping master chip */ @@ -108,6 +107,4 @@ void istep_8_4(uint8_t chips) die("SBE for chip #%d did not boot properly.\n", chip); } } - - printk(BIOS_EMERG, "ending istep 8.4\n"); } diff --git a/src/soc/ibm/power9/istep_8_9.c b/src/soc/ibm/power9/istep_8_9.c index 2c60bdf6fc8..6e2abafc8df 100644 --- a/src/soc/ibm/power9/istep_8_9.c +++ b/src/soc/ibm/power9/istep_8_9.c @@ -330,7 +330,6 @@ static void chiplet_fabric_scominit(bool is_xbus_active, uint8_t chip) void istep_8_9(uint8_t chips) { - printk(BIOS_EMERG, "starting istep 8.9\n"); report_istep(8,9); /* Not skipping master chip and initializing it even if we don't have a second chip */ @@ -338,6 +337,4 @@ void istep_8_9(uint8_t chips) if (chips & (1 << chip)) chiplet_fabric_scominit(/*is_xbus_active=*/chips == 0x03, chip); } - - printk(BIOS_EMERG, "ending istep 8.9\n"); } diff --git a/src/soc/ibm/power9/istep_9_2.c b/src/soc/ibm/power9/istep_9_2.c index bbf67eb12eb..a16cac2dcd4 100644 --- a/src/soc/ibm/power9/istep_9_2.c +++ b/src/soc/ibm/power9/istep_9_2.c @@ -302,7 +302,6 @@ static void config_bus_mode(void) void istep_9_2(uint8_t chips) { - printk(BIOS_EMERG, "starting istep 9.2\n"); report_istep(9,2); if (chips != 0x01) { @@ -311,6 +310,4 @@ void istep_9_2(uint8_t chips) config_bus_mode(); } - - printk(BIOS_EMERG, "ending istep 9.2\n"); } diff --git a/src/soc/ibm/power9/istep_9_4.c b/src/soc/ibm/power9/istep_9_4.c index 47ce3611fc1..48fac5d49d6 100644 --- a/src/soc/ibm/power9/istep_9_4.c +++ b/src/soc/ibm/power9/istep_9_4.c @@ -104,13 +104,10 @@ static void xbus_linktrain(uint8_t master_chip, uint8_t slave_chip, int group) void istep_9_4(uint8_t chips) { - printk(BIOS_EMERG, "starting istep 9.4\n"); report_istep(9,4); if (chips != 0x01) { xbus_linktrain(/*master_chip=*/0, /*slave_chip=*/1, /*group=*/0); xbus_linktrain(/*master_chip=*/0, /*slave_chip=*/1, /*group=*/1); } - - printk(BIOS_EMERG, "ending istep 9.4\n"); } diff --git a/src/soc/ibm/power9/istep_9_6.c b/src/soc/ibm/power9/istep_9_6.c index f552c5419eb..befa14feb3c 100644 --- a/src/soc/ibm/power9/istep_9_6.c +++ b/src/soc/ibm/power9/istep_9_6.c @@ -26,13 +26,10 @@ static void smp_link_layer(uint8_t chip) void istep_9_6(uint8_t chips) { - printk(BIOS_EMERG, "starting istep 9.6\n"); report_istep(9,6); if (chips != 0x01) { smp_link_layer(/*chip=*/0); smp_link_layer(/*chip=*/1); } - - printk(BIOS_EMERG, "ending istep 9.6\n"); } diff --git a/src/soc/ibm/power9/istep_9_7.c b/src/soc/ibm/power9/istep_9_7.c index 0061f3e1fa8..9281ef42682 100644 --- a/src/soc/ibm/power9/istep_9_7.c +++ b/src/soc/ibm/power9/istep_9_7.c @@ -78,7 +78,6 @@ static void p9_fab_iovalid(uint8_t chip) void istep_9_7(uint8_t chips) { - printk(BIOS_EMERG, "starting istep 9.7\n"); report_istep(9,7); if (chips != 0x01) { @@ -94,6 +93,4 @@ void istep_9_7(uint8_t chips) p9_fab_iovalid(/*chip=*/0); p9_fab_iovalid(/*chip=*/1); } - - printk(BIOS_EMERG, "ending istep 9.7\n"); } From 26d819d13b57ec883ea9e9ba72c320a690556983 Mon Sep 17 00:00:00 2001 From: Kacper Stojek Date: Thu, 14 Jul 2022 12:24:31 +0200 Subject: [PATCH 189/213] soc/power9/romstage.c: change 0xF000F log level from DEBUG to WARNING Change-Id: I4f12d13ac560b2f7cb42b057922343d25af6bfb2 Signed-off-by: Kacper Stojek --- src/soc/ibm/power9/romstage.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/soc/ibm/power9/romstage.c b/src/soc/ibm/power9/romstage.c index 8414d5abf05..d63047a0330 100644 --- a/src/soc/ibm/power9/romstage.c +++ b/src/soc/ibm/power9/romstage.c @@ -447,7 +447,7 @@ void main(void) timestamp_add_now(TS_INITRAM_END); /* Test if SCOM still works. Maybe should check also indirect access? */ - printk(BIOS_DEBUG, "0xF000F = %llx\n", read_scom(0, 0xF000F)); + printk(BIOS_WARNING, "0xF000F = %llx\n", read_scom(0, 0xF000F)); /* * Halt to give a chance to inspect FIRs, otherwise checkstops from From 9dabdf4926bd02bef6ed7b9468c77c0b0cf43bee Mon Sep 17 00:00:00 2001 From: Kacper Stojek Date: Thu, 14 Jul 2022 13:59:02 +0200 Subject: [PATCH 190/213] soc/power9/: change log levels in istep files Change-Id: Ibe35da73ebf116f6a1821df758c13b054d57a96a Signed-off-by: Kacper Stojek Signed-off-by: Sergii Dmytruk --- src/soc/ibm/power9/homer.c | 18 +++++++++--------- src/soc/ibm/power9/istep_13_11.c | 2 +- src/soc/ibm/power9/istep_13_3.c | 2 +- src/soc/ibm/power9/istep_14_3.c | 4 ++-- src/soc/ibm/power9/istep_8_4.c | 2 +- src/soc/ibm/power9/occ.c | 25 ++++++++++++++++--------- src/soc/ibm/power9/romstage.c | 6 +++--- 7 files changed, 33 insertions(+), 26 deletions(-) diff --git a/src/soc/ibm/power9/homer.c b/src/soc/ibm/power9/homer.c index 299bcbb054e..cb957807686 100644 --- a/src/soc/ibm/power9/homer.c +++ b/src/soc/ibm/power9/homer.c @@ -925,7 +925,7 @@ static void psu_command(uint8_t flags, long time) die("MBOX to SBE busy, this should not happen\n"); if (read_scom(0, 0x000D0063) & PPC_BIT(0)) { - printk(BIOS_ERR, "SBE to Host doorbell already active, clearing it\n"); + printk(BIOS_WARNING, "SBE to Host doorbell already active, clearing it\n"); write_scom(0, 0x000D0064, ~PPC_BIT(0)); } @@ -1075,7 +1075,7 @@ static void istep_16_1(int this_core) * This will request SBE to wake us up after we enter STOP 15. Hopefully * we will come back to the place where we were before. */ - printk(BIOS_ERR, "XIVE configured, entering dead man loop\n"); + printk(BIOS_DEBUG, "XIVE configured, entering dead man loop\n"); psu_command(DEADMAN_LOOP_START, time); block_wakeup_int(this_core, 1); @@ -1327,7 +1327,7 @@ static void pstate_gpe_init(uint8_t chip, struct homer_st *homer, uint64_t cores /* OCCFLG2_PGPE_HCODE_FIT_ERR_INJ | OCCFLG2_PGPE_HCODE_PSTATE_REQ_ERR_INJ */ write_scom(chip, PU_OCB_OCI_OCCFLG2_CLEAR, 0x1100000000); - printk(BIOS_ERR, "Attempting PGPE activation...\n"); + printk(BIOS_DEBUG, "Attempting PGPE activation...\n"); write_scom(chip, PU_GPE2_PPE_XIXCR, PPC_PLACE(HARD_RESET, 1, 3)); write_scom(chip, PU_GPE2_PPE_XIXCR, PPC_PLACE(TOGGLE_XSR_TRH, 1, 3)); @@ -1338,7 +1338,7 @@ static void pstate_gpe_init(uint8_t chip, struct homer_st *homer, uint64_t cores (read_scom(chip, PU_GPE2_PPE_XIDBGPRO) & PPC_BIT(HALTED_STATE))); if (read_scom(chip, PU_OCB_OCI_OCCS2_SCOM) & PPC_BIT(PGPE_ACTIVE)) - printk(BIOS_ERR, "PGPE was activated successfully\n"); + printk(BIOS_DEBUG, "PGPE was activated successfully\n"); else die("Failed to activate PGPE\n"); @@ -1528,16 +1528,16 @@ static void istep_21_1(uint8_t chips, struct homer_st *homers, const uint64_t *c load_pm_complex(chip, &homers[chip]); } - printk(BIOS_ERR, "Starting PM complex...\n"); + printk(BIOS_DEBUG, "Starting PM complex...\n"); for (uint8_t chip = 0; chip < MAX_CHIPS; chip++) { if (chips & (1 << chip)) start_pm_complex(chip, &homers[chip], cores[chip]); } - printk(BIOS_ERR, "Done starting PM complex\n"); + printk(BIOS_DEBUG, "Done starting PM complex\n"); - printk(BIOS_ERR, "Activating OCC...\n"); + printk(BIOS_DEBUG, "Activating OCC...\n"); activate_occ(chips, homers); - printk(BIOS_ERR, "Done activating OCC\n"); + printk(BIOS_DEBUG, "Done activating OCC\n"); } /* Extracts rings for a specific Programmable PowerPC-lite Engine */ @@ -2570,7 +2570,7 @@ void build_homer_image(void *homer_bar, void *common_occ_area, uint64_t nominal_ if (this_core == -1) die("Couldn't found active core\n"); - printk(BIOS_ERR, "DD%2.2x, boot core: %d\n", dd, this_core); + printk(BIOS_DEBUG, "DD%2.2x, boot core: %d\n", dd, this_core); /* HOMER must be aligned to 4M because CME HRMOR has bit for 2M set */ if (!IS_ALIGNED((uint64_t) homer_bar, 4 * MiB)) diff --git a/src/soc/ibm/power9/istep_13_11.c b/src/soc/ibm/power9/istep_13_11.c index 397657fb661..f2cbb752dae 100644 --- a/src/soc/ibm/power9/istep_13_11.c +++ b/src/soc/ibm/power9/istep_13_11.c @@ -144,7 +144,7 @@ static void dump_cal_errors(uint8_t chip, int mcs_i, int mca_i) printk(BIOS_ERR, "DP %d\n", dp); printk(BIOS_ERR, "\t%#16.16llx - RD_VREF_CAL_ERROR\n", dp_mca_read(chip, id, dp, mca_i, DDRPHY_DP16_RD_VREF_CAL_ERROR_P0_0)); - printk(BIOS_ERR, "\t%#16.16llx - DQ_BIT_DISABLE_RP0\n", + printk(RAM_DEBUG, "\t%#16.16llx - DQ_BIT_DISABLE_RP0\n", dp_mca_read(chip, id, dp, mca_i, DDRPHY_DP16_DQ_BIT_DISABLE_RP0_P0_0)); printk(BIOS_ERR, "\t%#16.16llx - DQS_BIT_DISABLE_RP0\n", dp_mca_read(chip, id, dp, mca_i, DDRPHY_DP16_DQS_BIT_DISABLE_RP0_P0_0)); diff --git a/src/soc/ibm/power9/istep_13_3.c b/src/soc/ibm/power9/istep_13_3.c index f2debcf829f..686023e438a 100644 --- a/src/soc/ibm/power9/istep_13_3.c +++ b/src/soc/ibm/power9/istep_13_3.c @@ -115,7 +115,7 @@ static void mem_pll_initf(uint8_t chip) /* This may depend on the requested frequency, but for current setup in our * lab this is ~3ms both for coreboot and Hostboot. */ - printk(BIOS_EMERG, "putRing took %ld ms\n", time); + printk(RAM_DEBUG, "putRing took %ld ms\n", time); // Clear SBE->host doorbell // TP.TPCHIP.PIB.PSU.PSU_HOST_DOORBELL_REG_AND diff --git a/src/soc/ibm/power9/istep_14_3.c b/src/soc/ibm/power9/istep_14_3.c index 53577cf5447..be3b1f5a200 100644 --- a/src/soc/ibm/power9/istep_14_3.c +++ b/src/soc/ibm/power9/istep_14_3.c @@ -51,7 +51,7 @@ static void init_pecs(uint8_t chip, const uint8_t *iovalid_enable) for (pec = 0; pec < MAX_PEC_PER_PROC; ++pec) { uint64_t val = 0; - printk(BIOS_EMERG, "Initializing PEC%d...\n", pec); + printk(BIOS_INFO, "Initializing PEC%d...\n", pec); /* * ATTR_FABRIC_ADDR_EXTENSION_GROUP_ID = 0 @@ -318,7 +318,7 @@ static void init_phbs(uint8_t chip, uint8_t phb_active_mask, const uint8_t *iova if (!(phb_active_mask & (PHB0_MASK >> phb))) continue; - printk(BIOS_EMERG, "Initializing PHB%d...\n", phb); + printk(BIOS_INFO, "Initializing PHB%d...\n", phb); /* * Phase2 init step 12_a (yes, out of order) diff --git a/src/soc/ibm/power9/istep_8_4.c b/src/soc/ibm/power9/istep_8_4.c index 545120b0abb..2a3521141b5 100644 --- a/src/soc/ibm/power9/istep_8_4.c +++ b/src/soc/ibm/power9/istep_8_4.c @@ -66,7 +66,7 @@ static bool sbe_run_extract_msg_reg(uint8_t chip) break; if ((i * SBE_WAIT_SLEEP_MS) % 1000 == 0) - printk(BIOS_EMERG, "SBE for chip #%d is booting...\n", chip); + printk(BIOS_NOTICE, "SBE for chip #%d is booting...\n", chip); /* Hostboot resets watchdog before sleeping, we might want to do it too or just increase timer after experimenting */ diff --git a/src/soc/ibm/power9/occ.c b/src/soc/ibm/power9/occ.c index ae0422b95a7..33dacbaac36 100644 --- a/src/soc/ibm/power9/occ.c +++ b/src/soc/ibm/power9/occ.c @@ -485,9 +485,11 @@ static bool write_occ_cmd(uint8_t chip, struct homer_st *homer, uint8_t occ_cmd, occ_cmd); } - printk(BIOS_WARNING, "Received OCC response:\n"); - hexdump(response, *response_len); - printk(BIOS_WARNING, "Failed to parse OCC response\n"); + if (console_log_level(BIOS_WARNING)) { + printk(BIOS_WARNING, "Received OCC response:\n"); + hexdump(response, *response_len); + printk(BIOS_WARNING, "Failed to parse OCC response\n"); + } return false; } @@ -514,7 +516,7 @@ static void send_occ_cmd(uint8_t chip, struct homer_st *homer, uint8_t occ_cmd, break; if (i < MAX_TRIES - 1) - printk(BIOS_WARNING, "Retrying running OCC command 0x%02x\n", occ_cmd); + printk(BIOS_DEBUG, "Retrying running OCC command 0x%02x\n", occ_cmd); } if (i == MAX_TRIES) @@ -545,8 +547,10 @@ static void handle_occ_error(uint8_t chip, struct homer_st *homer, read_occ_sram(chip, response->error_address, (uint64_t *)error_log_buf, error_length); - printk(BIOS_WARNING, "OCC error log:\n"); - hexdump(error_log_buf, error_length); + if (console_log_level(BIOS_WARNING)) { + printk(BIOS_WARNING, "OCC error log:\n"); + hexdump(error_log_buf, error_length); + } /* Confirm to OCC that we've read the log */ send_occ_cmd(chip, homer, OCC_CMD_CLEAR_ERROR_LOG, @@ -580,8 +584,10 @@ static void poll_occ(uint8_t chip, struct homer_st *homer, bool flush_all_errors --max_more_errors; if (max_more_errors == 0) { - printk(BIOS_WARNING, "Last OCC poll response:\n"); - hexdump(response, response_len); + if (console_log_level(BIOS_WARNING)) { + printk(BIOS_WARNING, "Last OCC poll response:\n"); + hexdump(response, response_len); + } die("Hit too many errors on polling OCC\n"); } } @@ -1171,7 +1177,8 @@ static void set_occ_state(uint8_t chip, struct homer_st *homer, uint8_t state) poll_occ(chip, homer, /*flush_all_errors=*/true, &poll_response); if (poll_response.state != state) - die("State of OCC is 0x%02x instead of 0x%02x.\n", poll_response.state, state); + die("State of OCC is 0x%02x instead of 0x%02x.\n", + poll_response.state, state); } static void set_occ_active_state(uint8_t chip, struct homer_st *homer) diff --git a/src/soc/ibm/power9/romstage.c b/src/soc/ibm/power9/romstage.c index d63047a0330..56a48157191 100644 --- a/src/soc/ibm/power9/romstage.c +++ b/src/soc/ibm/power9/romstage.c @@ -362,7 +362,7 @@ static void build_mvpds(uint8_t chips) { uint8_t chip; - printk(BIOS_EMERG, "Building MVPDs...\n"); + printk(BIOS_NOTICE, "Building MVPDs...\n"); /* Calling mvpd_get_available_cores() triggers building and caching of MVPD */ for (chip = 0; chip < MAX_CHIPS; ++chip) { @@ -393,10 +393,10 @@ void main(void) */ (void)ipmi_init_and_start_bmc_wdt(CONFIG_BMC_BT_BASE, 120, TIMEOUT_HARD_RESET); - printk(BIOS_EMERG, "Initializing FSI...\n"); + printk(BIOS_DEBUG, "Initializing FSI...\n"); fsi_init(); chips = fsi_get_present_chips(); - printk(BIOS_EMERG, "Initialized FSI (chips mask: 0x%02X)\n", chips); + printk(BIOS_DEBUG, "Initialized FSI (chips mask: 0x%02X)\n", chips); build_mvpds(chips); From eeb1e8487b01f8a363a9362fe6e604314c6cff33 Mon Sep 17 00:00:00 2001 From: Kacper Stojek Date: Fri, 15 Jul 2022 07:06:09 +0200 Subject: [PATCH 191/213] soc/power9/: change logs in dump_cal_errors() Change-Id: Idea5bd1ff8785da99f808e18b4dfa2765fe88788 Signed-off-by: Kacper Stojek Signed-off-by: Sergii Dmytruk --- src/soc/ibm/power9/ccs.c | 73 ++++++++++++++++---------------- src/soc/ibm/power9/istep_13_11.c | 40 +++++++++-------- 2 files changed, 55 insertions(+), 58 deletions(-) diff --git a/src/soc/ibm/power9/ccs.c b/src/soc/ibm/power9/ccs.c index 36612c70267..be7603db5ca 100644 --- a/src/soc/ibm/power9/ccs.c +++ b/src/soc/ibm/power9/ccs.c @@ -104,55 +104,54 @@ static void dump_cal_errors(uint8_t chip, chiplet_id_t id, int mca_i) /* Stop CCS so it won't mess up with the values */ write_scom_for_chiplet(chip, id, CCS_CNTLQ, PPC_BIT(CCS_CNTLQ_CCS_STOP)); -#if CONFIG(DEBUG_RAM_SETUP) int dp; for (dp = 0; dp < 5; dp++) { - printk(BIOS_ERR, "DP %d\n", dp); - printk(BIOS_ERR, "\t%#16.16llx - RD_VREF_CAL_ERROR\n", - dp_mca_read(id, dp, mca_i, DDRPHY_DP16_RD_VREF_CAL_ERROR_P0_0)); - printk(BIOS_ERR, "\t%#16.16llx - DQ_BIT_DISABLE_RP0\n", - dp_mca_read(id, dp, mca_i, DDRPHY_DP16_DQ_BIT_DISABLE_RP0_P0_0)); - printk(BIOS_ERR, "\t%#16.16llx - DQS_BIT_DISABLE_RP0\n", - dp_mca_read(id, dp, mca_i, DDRPHY_DP16_DQS_BIT_DISABLE_RP0_P0_0)); - printk(BIOS_ERR, "\t%#16.16llx - WR_ERROR0\n", - dp_mca_read(id, dp, mca_i, DDRPHY_DP16_WR_ERROR0_P0_0)); - printk(BIOS_ERR, "\t%#16.16llx - RD_STATUS0\n", - dp_mca_read(id, dp, mca_i, DDRPHY_DP16_RD_STATUS0_P0_0)); - printk(BIOS_ERR, "\t%#16.16llx - RD_LVL_STATUS2\n", - dp_mca_read(id, dp, mca_i, DDRPHY_DP16_RD_LVL_STATUS2_P0_0)); - printk(BIOS_ERR, "\t%#16.16llx - RD_LVL_STATUS0\n", - dp_mca_read(id, dp, mca_i, DDRPHY_DP16_RD_LVL_STATUS0_P0_0)); - printk(BIOS_ERR, "\t%#16.16llx - WR_VREF_ERROR0\n", - dp_mca_read(id, dp, mca_i, DDRPHY_DP16_WR_VREF_ERROR0_P0_0)); - printk(BIOS_ERR, "\t%#16.16llx - WR_VREF_ERROR1\n", - dp_mca_read(id, dp, mca_i, DDRPHY_DP16_WR_VREF_ERROR1_P0_0)); + printk(RAM_SPEW, "DP %d\n", dp); + printk(RAM_SPEW, "\t%#16.16llx - RD_VREF_CAL_ERROR\n", + dp_mca_read(chip, id, dp, mca_i, DDRPHY_DP16_RD_VREF_CAL_ERROR_P0_0)); + printk(RAM_SPEW, "\t%#16.16llx - DQ_BIT_DISABLE_RP0\n", + dp_mca_read(chip, id, dp, mca_i, DDRPHY_DP16_DQ_BIT_DISABLE_RP0_P0_0)); + printk(RAM_SPEW, "\t%#16.16llx - DQS_BIT_DISABLE_RP0\n", + dp_mca_read(chip, id, dp, mca_i, DDRPHY_DP16_DQS_BIT_DISABLE_RP0_P0_0)); + printk(RAM_SPEW, "\t%#16.16llx - WR_ERROR0\n", + dp_mca_read(chip, id, dp, mca_i, DDRPHY_DP16_WR_ERROR0_P0_0)); + printk(RAM_SPEW, "\t%#16.16llx - RD_STATUS0\n", + dp_mca_read(chip, id, dp, mca_i, DDRPHY_DP16_RD_STATUS0_P0_0)); + printk(RAM_SPEW, "\t%#16.16llx - RD_LVL_STATUS2\n", + dp_mca_read(chip, id, dp, mca_i, DDRPHY_DP16_RD_LVL_STATUS2_P0_0)); + printk(RAM_SPEW, "\t%#16.16llx - RD_LVL_STATUS0\n", + dp_mca_read(chip, id, dp, mca_i, DDRPHY_DP16_RD_LVL_STATUS0_P0_0)); + printk(RAM_SPEW, "\t%#16.16llx - WR_VREF_ERROR0\n", + dp_mca_read(chip, id, dp, mca_i, DDRPHY_DP16_WR_VREF_ERROR0_P0_0)); + printk(RAM_SPEW, "\t%#16.16llx - WR_VREF_ERROR1\n", + dp_mca_read(chip, id, dp, mca_i, DDRPHY_DP16_WR_VREF_ERROR1_P0_0)); } - printk(BIOS_ERR, "%#16.16llx - APB_ERROR_STATUS0\n", - mca_read(id, mca_i, DDRPHY_APB_ERROR_STATUS0_P0)); + printk(RAM_SPEW, "%#16.16llx - APB_ERROR_STATUS0\n", + mca_read(chip, id, mca_i, DDRPHY_APB_ERROR_STATUS0_P0)); - printk(BIOS_ERR, "%#16.16llx - RC_ERROR_STATUS0\n", - mca_read(id, mca_i, DDRPHY_RC_ERROR_STATUS0_P0)); + printk(RAM_SPEW, "%#16.16llx - RC_ERROR_STATUS0\n", + mca_read(chip, id, mca_i, DDRPHY_RC_ERROR_STATUS0_P0)); - printk(BIOS_ERR, "%#16.16llx - SEQ_ERROR_STATUS0\n", - mca_read(id, mca_i, DDRPHY_SEQ_ERROR_STATUS0_P0)); + printk(RAM_SPEW, "%#16.16llx - SEQ_ERROR_STATUS0\n", + mca_read(chip, id, mca_i, DDRPHY_SEQ_ERROR_STATUS0_P0)); - printk(BIOS_ERR, "%#16.16llx - WC_ERROR_STATUS0\n", - mca_read(id, mca_i, DDRPHY_WC_ERROR_STATUS0_P0)); + printk(RAM_SPEW, "%#16.16llx - WC_ERROR_STATUS0\n", + mca_read(chip, id, mca_i, DDRPHY_WC_ERROR_STATUS0_P0)); - printk(BIOS_ERR, "%#16.16llx - PC_ERROR_STATUS0\n", - mca_read(id, mca_i, DDRPHY_PC_ERROR_STATUS0_P0)); + printk(RAM_SPEW, "%#16.16llx - PC_ERROR_STATUS0\n", + mca_read(chip, id, mca_i, DDRPHY_PC_ERROR_STATUS0_P0)); - printk(BIOS_ERR, "%#16.16llx - PC_INIT_CAL_ERROR\n", - mca_read(id, mca_i, DDRPHY_PC_INIT_CAL_ERROR_P0)); + printk(RAM_SPEW, "%#16.16llx - PC_INIT_CAL_ERROR\n", + mca_read(chip, id, mca_i, DDRPHY_PC_INIT_CAL_ERROR_P0)); - printk(BIOS_ERR, "%#16.16llx - PC_INIT_CAL_STATUS\n", - mca_read(id, mca_i, DDRPHY_PC_INIT_CAL_STATUS_P0)); + printk(RAM_SPEW, "%#16.16llx - PC_INIT_CAL_STATUS\n", + mca_read(chip, id, mca_i, DDRPHY_PC_INIT_CAL_STATUS_P0)); + + printk(RAM_SPEW, "%#16.16llx - IOM_PHY0_DDRPHY_FIR_REG\n", + mca_read(chip, id, mca_i, IOM_PHY0_DDRPHY_FIR_REG)); - printk(BIOS_ERR, "%#16.16llx - IOM_PHY0_DDRPHY_FIR_REG\n", - mca_read(id, mca_i, IOM_PHY0_DDRPHY_FIR_REG)); -#endif die("CCS execution timeout\n"); } diff --git a/src/soc/ibm/power9/istep_13_11.c b/src/soc/ibm/power9/istep_13_11.c index f2cbb752dae..b6e134f86d8 100644 --- a/src/soc/ibm/power9/istep_13_11.c +++ b/src/soc/ibm/power9/istep_13_11.c @@ -130,7 +130,6 @@ static void clear_initial_cal_errors(uint8_t chip, int mcs_i, int mca_i) static void dump_cal_errors(uint8_t chip, int mcs_i, int mca_i) { -#if CONFIG(DEBUG_RAM_SETUP) chiplet_id_t id = mcs_ids[mcs_i]; int dp; @@ -141,55 +140,54 @@ static void dump_cal_errors(uint8_t chip, int mcs_i, int mca_i) * Serial over LAN and may not print few last characters. */ for (dp = 0; dp < 5; dp++) { - printk(BIOS_ERR, "DP %d\n", dp); - printk(BIOS_ERR, "\t%#16.16llx - RD_VREF_CAL_ERROR\n", + printk(RAM_SPEW, "DP %d\n", dp); + printk(RAM_SPEW, "\t%#16.16llx - RD_VREF_CAL_ERROR\n", dp_mca_read(chip, id, dp, mca_i, DDRPHY_DP16_RD_VREF_CAL_ERROR_P0_0)); - printk(RAM_DEBUG, "\t%#16.16llx - DQ_BIT_DISABLE_RP0\n", + printk(RAM_SPEW, "\t%#16.16llx - DQ_BIT_DISABLE_RP0\n", dp_mca_read(chip, id, dp, mca_i, DDRPHY_DP16_DQ_BIT_DISABLE_RP0_P0_0)); - printk(BIOS_ERR, "\t%#16.16llx - DQS_BIT_DISABLE_RP0\n", + printk(RAM_SPEW, "\t%#16.16llx - DQS_BIT_DISABLE_RP0\n", dp_mca_read(chip, id, dp, mca_i, DDRPHY_DP16_DQS_BIT_DISABLE_RP0_P0_0)); - printk(BIOS_ERR, "\t%#16.16llx - WR_ERROR0\n", + printk(RAM_SPEW, "\t%#16.16llx - WR_ERROR0\n", dp_mca_read(chip, id, dp, mca_i, DDRPHY_DP16_WR_ERROR0_P0_0)); - printk(BIOS_ERR, "\t%#16.16llx - RD_STATUS0\n", + printk(RAM_SPEW, "\t%#16.16llx - RD_STATUS0\n", dp_mca_read(chip, id, dp, mca_i, DDRPHY_DP16_RD_STATUS0_P0_0)); - printk(BIOS_ERR, "\t%#16.16llx - RD_LVL_STATUS2\n", + printk(RAM_SPEW, "\t%#16.16llx - RD_LVL_STATUS2\n", dp_mca_read(chip, id, dp, mca_i, DDRPHY_DP16_RD_LVL_STATUS2_P0_0)); - printk(BIOS_ERR, "\t%#16.16llx - RD_LVL_STATUS0\n", + printk(RAM_SPEW, "\t%#16.16llx - RD_LVL_STATUS0\n", dp_mca_read(chip, id, dp, mca_i, DDRPHY_DP16_RD_LVL_STATUS0_P0_0)); - printk(BIOS_ERR, "\t%#16.16llx - WR_VREF_ERROR0\n", + printk(RAM_SPEW, "\t%#16.16llx - WR_VREF_ERROR0\n", dp_mca_read(chip, id, dp, mca_i, DDRPHY_DP16_WR_VREF_ERROR0_P0_0)); - printk(BIOS_ERR, "\t%#16.16llx - WR_VREF_ERROR1\n", + printk(RAM_SPEW, "\t%#16.16llx - WR_VREF_ERROR1\n", dp_mca_read(chip, id, dp, mca_i, DDRPHY_DP16_WR_VREF_ERROR1_P0_0)); } - printk(BIOS_ERR, "%#16.16llx - APB_ERROR_STATUS0\n", + printk(RAM_SPEW, "%#16.16llx - APB_ERROR_STATUS0\n", mca_read(chip, id, mca_i, DDRPHY_APB_ERROR_STATUS0_P0)); - printk(BIOS_ERR, "%#16.16llx - RC_ERROR_STATUS0\n", + printk(RAM_SPEW, "%#16.16llx - RC_ERROR_STATUS0\n", mca_read(chip, id, mca_i, DDRPHY_RC_ERROR_STATUS0_P0)); - printk(BIOS_ERR, "%#16.16llx - SEQ_ERROR_STATUS0\n", + printk(RAM_SPEW, "%#16.16llx - SEQ_ERROR_STATUS0\n", mca_read(chip, id, mca_i, DDRPHY_SEQ_ERROR_STATUS0_P0)); - printk(BIOS_ERR, "%#16.16llx - WC_ERROR_STATUS0\n", + printk(RAM_SPEW, "%#16.16llx - WC_ERROR_STATUS0\n", mca_read(chip, id, mca_i, DDRPHY_WC_ERROR_STATUS0_P0)); - printk(BIOS_ERR, "%#16.16llx - PC_ERROR_STATUS0\n", + printk(RAM_SPEW, "%#16.16llx - PC_ERROR_STATUS0\n", mca_read(chip, id, mca_i, DDRPHY_PC_ERROR_STATUS0_P0)); - printk(BIOS_ERR, "%#16.16llx - PC_INIT_CAL_ERROR\n", + printk(RAM_SPEW, "%#16.16llx - PC_INIT_CAL_ERROR\n", mca_read(chip, id, mca_i, DDRPHY_PC_INIT_CAL_ERROR_P0)); /* 0x8000 on success for first rank, 0x4000 for second */ - printk(BIOS_ERR, "%#16.16llx - PC_INIT_CAL_STATUS\n", + printk(RAM_SPEW, "%#16.16llx - PC_INIT_CAL_STATUS\n", mca_read(chip, id, mca_i, DDRPHY_PC_INIT_CAL_STATUS_P0)); - printk(BIOS_ERR, "%#16.16llx - IOM_PHY0_DDRPHY_FIR_REG\n", + printk(RAM_SPEW, "%#16.16llx - IOM_PHY0_DDRPHY_FIR_REG\n", mca_read(chip, id, mca_i, IOM_PHY0_DDRPHY_FIR_REG)); - printk(BIOS_ERR, "%#16.16llx - MBACALFIRQ\n", + printk(RAM_SPEW, "%#16.16llx - MBACALFIRQ\n", mca_read(chip, id, mca_i, MBACALFIR)); -#endif } /* Based on ATTR_MSS_MRW_RESET_DELAY_BEFORE_CAL, by default do it. */ From 608afe0245eba65ae27e5d90ac7e62df78314554 Mon Sep 17 00:00:00 2001 From: Krystian Hebel Date: Wed, 22 Mar 2023 21:15:33 +0100 Subject: [PATCH 192/213] lib: make fields in CBMEM always little endian Fields for coreboot table checksums in commonlib/coreboot_tables.h were split from uint32_t to two uint16_t (checksum and padding) each. This doesn't change anything for LE targets, but allows for accessing correct bytes on BE platforms without shifting or pointer juggling. Signed-off-by: Krystian Hebel Change-Id: I52f6b8bf1110fd3cb0d499be15de0ad147605c4b --- .../include/commonlib/coreboot_tables.h | 6 +- src/lib/bootmem.c | 12 +- src/lib/cbmem_console.c | 43 +++-- src/lib/coreboot_table.c | 175 +++++++++--------- src/lib/imd_cbmem.c | 11 +- 5 files changed, 131 insertions(+), 116 deletions(-) diff --git a/src/commonlib/include/commonlib/coreboot_tables.h b/src/commonlib/include/commonlib/coreboot_tables.h index 422fedfa99b..864a11894f1 100644 --- a/src/commonlib/include/commonlib/coreboot_tables.h +++ b/src/commonlib/include/commonlib/coreboot_tables.h @@ -109,9 +109,11 @@ typedef __aligned(4) uint64_t lb_uint64_t; struct lb_header { uint8_t signature[4]; /* LBIO */ uint32_t header_bytes; - uint32_t header_checksum; + uint16_t header_checksum; + uint16_t pad0; uint32_t table_bytes; - uint32_t table_checksum; + uint16_t table_checksum; + uint16_t pad1; uint32_t table_entries; }; diff --git a/src/lib/bootmem.c b/src/lib/bootmem.c index 078f9609325..7722d791866 100644 --- a/src/lib/bootmem.c +++ b/src/lib/bootmem.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -103,6 +104,7 @@ void bootmem_write_memory_table(struct lb_memory *mem) { const struct range_entry *r; struct lb_memory_range *lb_r; + uint32_t entry_size = le32toh(mem->size); lb_r = &mem->map[0]; @@ -110,14 +112,16 @@ void bootmem_write_memory_table(struct lb_memory *mem) bootmem_dump_ranges(); memranges_each_entry(r, &bootmem_os) { - lb_r->start = range_entry_base(r); - lb_r->size = range_entry_size(r); - lb_r->type = bootmem_to_lb_tag(range_entry_tag(r)); + lb_r->start = htole64(range_entry_base(r)); + lb_r->size = htole64(range_entry_size(r)); + lb_r->type = htole32(bootmem_to_lb_tag(range_entry_tag(r))); lb_r++; - mem->size += sizeof(struct lb_memory_range); + entry_size += sizeof(struct lb_memory_range); } + mem->size = htole32(entry_size); + table_written = 1; } diff --git a/src/lib/cbmem_console.c b/src/lib/cbmem_console.c index ad3b99bbba9..7ca5008d45e 100644 --- a/src/lib/cbmem_console.c +++ b/src/lib/cbmem_console.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -57,9 +58,9 @@ static u8 static_console[STATIC_CONSOLE_SIZE]; static int buffer_valid(struct cbmem_console *cbm_cons_p, u32 total_space) { - return (cbm_cons_p->cursor & CURSOR_MASK) < cbm_cons_p->size && - cbm_cons_p->size <= MAX_SIZE && - cbm_cons_p->size == total_space - sizeof(struct cbmem_console); + return (le32toh(cbm_cons_p->cursor) & CURSOR_MASK) < le32toh(cbm_cons_p->size) && + le32toh(cbm_cons_p->size) <= MAX_SIZE && + le32toh(cbm_cons_p->size) == total_space - sizeof(struct cbmem_console); } static void init_console_ptr(void *storage, u32 total_space) @@ -72,7 +73,7 @@ static void init_console_ptr(void *storage, u32 total_space) } if (!buffer_valid(cbm_cons_p, total_space)) { - cbm_cons_p->size = total_space - sizeof(struct cbmem_console); + cbm_cons_p->size = htole32(total_space - sizeof(struct cbmem_console)); cbm_cons_p->cursor = 0; } @@ -102,16 +103,16 @@ void cbmemc_tx_byte(unsigned char data) if (!current_console || !current_console->size || console_paused) return; - u32 flags = current_console->cursor & ~CURSOR_MASK; - u32 cursor = current_console->cursor & CURSOR_MASK; + u32 flags = le32toh(current_console->cursor) & ~CURSOR_MASK; + u32 cursor = le32toh(current_console->cursor) & CURSOR_MASK; current_console->body[cursor++] = data; - if (cursor >= current_console->size) { + if (cursor >= le32toh(current_console->size)) { cursor = 0; flags |= OVERFLOW; } - current_console->cursor = flags | cursor; + current_console->cursor = htole32(flags | cursor); } /* @@ -128,17 +129,17 @@ static void copy_console_buffer(struct cbmem_console *src_cons_p) if (!src_cons_p) return; - if (src_cons_p->cursor & OVERFLOW) { + if (le32toh(src_cons_p->cursor) & OVERFLOW) { const char overflow_warning[] = "\n*** Pre-CBMEM " ENV_STRING " console overflowed, log truncated! ***\n"; for (c = 0; c < sizeof(overflow_warning) - 1; c++) cbmemc_tx_byte(overflow_warning[c]); - for (c = src_cons_p->cursor & CURSOR_MASK; - c < src_cons_p->size; c++) + for (c = le32toh(src_cons_p->cursor) & CURSOR_MASK; + c < le32toh(src_cons_p->size); c++) cbmemc_tx_byte(src_cons_p->body[c]); } - for (c = 0; c < (src_cons_p->cursor & CURSOR_MASK); c++) + for (c = 0; c < (le32toh(src_cons_p->cursor) & CURSOR_MASK); c++) cbmemc_tx_byte(src_cons_p->body[c]); /* Invalidate the source console, so it will be reinitialized on the @@ -184,9 +185,9 @@ void cbmem_dump_console_to_uart(void) console_index = get_uart_for_console(); uart_init(console_index); - if (current_console->cursor & OVERFLOW) { - for (cursor = current_console->cursor & CURSOR_MASK; - cursor < current_console->size; cursor++) { + if (le32toh(current_console->cursor) & OVERFLOW) { + for (cursor = le32toh(current_console->cursor) & CURSOR_MASK; + cursor < le32toh(current_console->size); cursor++) { if (BIOS_LOG_IS_MARKER(current_console->body[cursor])) continue; if (current_console->body[cursor] == '\n') @@ -194,7 +195,8 @@ void cbmem_dump_console_to_uart(void) uart_tx_byte(console_index, current_console->body[cursor]); } } - for (cursor = 0; cursor < (current_console->cursor & CURSOR_MASK); cursor++) { + for (cursor = 0; cursor < (le32toh(current_console->cursor) & CURSOR_MASK); + cursor++) { if (BIOS_LOG_IS_MARKER(current_console->body[cursor])) continue; if (current_console->body[cursor] == '\n') @@ -212,12 +214,13 @@ void cbmem_dump_console(void) console_paused = true; - if (current_console->cursor & OVERFLOW) - for (cursor = current_console->cursor & CURSOR_MASK; - cursor < current_console->size; cursor++) + if (le32toh(current_console->cursor) & OVERFLOW) + for (cursor = le32toh(current_console->cursor) & CURSOR_MASK; + cursor < le32toh(current_console->size); cursor++) if (!BIOS_LOG_IS_MARKER(current_console->body[cursor])) do_putchar(current_console->body[cursor]); - for (cursor = 0; cursor < (current_console->cursor & CURSOR_MASK); cursor++) + for (cursor = 0; cursor < (le32toh(current_console->cursor) & CURSOR_MASK); + cursor++) if (!BIOS_LOG_IS_MARKER(current_console->body[cursor])) do_putchar(current_console->body[cursor]); diff --git a/src/lib/coreboot_table.c b/src/lib/coreboot_table.c index 77e449cd510..67523d5b67a 100644 --- a/src/lib/coreboot_table.c +++ b/src/lib/coreboot_table.c @@ -49,7 +49,7 @@ static struct lb_header *lb_table_init(unsigned long addr) header->signature[1] = 'B'; header->signature[2] = 'I'; header->signature[3] = 'O'; - header->header_bytes = sizeof(*header); + header->header_bytes = htole32(sizeof(*header)); header->header_checksum = 0; header->table_bytes = 0; header->table_checksum = 0; @@ -68,7 +68,7 @@ static struct lb_record *lb_last_record(struct lb_header *header) { struct lb_record *rec; rec = (void *)(((char *)header) + sizeof(*header) - + header->table_bytes); + + le32toh(header->table_bytes)); return rec; } @@ -77,11 +77,12 @@ struct lb_record *lb_new_record(struct lb_header *header) struct lb_record *rec; rec = lb_last_record(header); if (header->table_entries) - header->table_bytes += rec->size; + header->table_bytes = htole32(le32toh(header->table_bytes) + + le32toh(rec->size)); rec = lb_last_record(header); - header->table_entries++; - rec->tag = LB_TAG_UNUSED; - rec->size = sizeof(*rec); + header->table_entries = htole32(le32toh(header->table_entries) + 1); + rec->tag = htole32(LB_TAG_UNUSED); + rec->size = htole32(sizeof(*rec)); return rec; } @@ -91,8 +92,8 @@ static struct lb_memory *lb_memory(struct lb_header *header) struct lb_memory *mem; rec = lb_new_record(header); mem = (struct lb_memory *)rec; - mem->tag = LB_TAG_MEMORY; - mem->size = sizeof(*mem); + mem->tag = htole32(LB_TAG_MEMORY); + mem->size = htole32(sizeof(*mem)); return mem; } @@ -102,14 +103,14 @@ void lb_add_serial(struct lb_serial *new_serial, void *data) struct lb_serial *serial; serial = (struct lb_serial *)lb_new_record(header); - serial->tag = LB_TAG_SERIAL; - serial->size = sizeof(*serial); - serial->type = new_serial->type; - serial->baseaddr = new_serial->baseaddr; - serial->baud = new_serial->baud; - serial->regwidth = new_serial->regwidth; - serial->input_hertz = new_serial->input_hertz; - serial->uart_pci_addr = new_serial->uart_pci_addr; + serial->tag = htole32(LB_TAG_SERIAL); + serial->size = htole32(sizeof(*serial)); + serial->type = htole32(new_serial->type); + serial->baseaddr = htole32(new_serial->baseaddr); + serial->baud = htole32(new_serial->baud); + serial->regwidth = htole32(new_serial->regwidth); + serial->input_hertz = htole32(new_serial->input_hertz); + serial->uart_pci_addr = htole32(new_serial->uart_pci_addr); } void lb_add_console(uint16_t consoletype, void *data) @@ -118,14 +119,16 @@ void lb_add_console(uint16_t consoletype, void *data) struct lb_console *console; console = (struct lb_console *)lb_new_record(header); - console->tag = LB_TAG_CONSOLE; - console->size = sizeof(*console); - console->type = consoletype; + console->tag = htole32(LB_TAG_CONSOLE); + console->size = htole32(sizeof(*console)); + console->type = htole16(consoletype); } static void lb_pcie(struct lb_header *header) { - struct lb_pcie pcie = { .tag = LB_TAG_PCIE, .size = sizeof(pcie) }; + struct lb_pcie pcie = { .tag = htole32(LB_TAG_PCIE), + .size = htole32(sizeof(pcie)), + }; if (lb_fill_pcie(&pcie) != CB_SUCCESS) return; @@ -143,8 +146,8 @@ static void lb_framebuffer(struct lb_header *header) framebuffer = (struct lb_framebuffer *)lb_new_record(header); memcpy(framebuffer, &fb, sizeof(*framebuffer)); - framebuffer->tag = LB_TAG_FRAMEBUFFER; - framebuffer->size = sizeof(*framebuffer); + framebuffer->tag = htole32(LB_TAG_FRAMEBUFFER); + framebuffer->size = htole32(sizeof(*framebuffer)); if (CONFIG(BOOTSPLASH)) { uint8_t *fb_ptr = (uint8_t *)(uintptr_t)framebuffer->physical_address; @@ -160,9 +163,9 @@ void lb_add_gpios(struct lb_gpios *gpios, const struct lb_gpio *gpio_table, { size_t table_size = count * sizeof(struct lb_gpio); - memcpy(&gpios->gpios[gpios->count], gpio_table, table_size); - gpios->count += count; - gpios->size += table_size; + memcpy(&gpios->gpios[le32toh(gpios->count)], gpio_table, table_size); + gpios->count = htole32(le32toh(gpios->count) + count); + gpios->size = htole32(le32toh(gpios->size) + table_size); } static void lb_gpios(struct lb_header *header) @@ -171,25 +174,25 @@ static void lb_gpios(struct lb_header *header) struct lb_gpio *g; gpios = (struct lb_gpios *)lb_new_record(header); - gpios->tag = LB_TAG_GPIO; - gpios->size = sizeof(*gpios); + gpios->tag = htole32(LB_TAG_GPIO); + gpios->size = htole32(sizeof(*gpios)); gpios->count = 0; fill_lb_gpios(gpios); printk(BIOS_INFO, "Passing %u GPIOs to payload:\n" " NAME | PORT | POLARITY | VALUE\n", - gpios->count); - for (g = &gpios->gpios[0]; g < &gpios->gpios[gpios->count]; g++) { + le32toh(gpios->count)); + for (g = &gpios->gpios[0]; g < &gpios->gpios[le32toh(gpios->count)]; g++) { printk(BIOS_INFO, "%16.16s | ", g->name); if (g->port == -1) printk(BIOS_INFO, " undefined | "); else - printk(BIOS_INFO, "%#.8x | ", g->port); - if (g->polarity == ACTIVE_HIGH) + printk(BIOS_INFO, "%#.8x | ", le32toh(g->port)); + if (g->polarity == htole32(ACTIVE_HIGH)) printk(BIOS_INFO, " high | "); else printk(BIOS_INFO, " low | "); - switch (g->value) { + switch (htole32(g->value)) { case 0: printk(BIOS_INFO, " low\n"); break; @@ -221,14 +224,14 @@ static void lb_boot_media_params(struct lb_header *header) return; bmp = (struct lb_boot_media_params *)lb_new_record(header); - bmp->tag = LB_TAG_BOOT_MEDIA_PARAMS; - bmp->size = sizeof(*bmp); + bmp->tag = htole32(LB_TAG_BOOT_MEDIA_PARAMS); + bmp->size = htole32(sizeof(*bmp)); - bmp->cbfs_offset = region_device_offset(&cbd->rdev); - bmp->cbfs_size = region_device_sz(&cbd->rdev); - bmp->boot_media_size = region_device_sz(boot_dev); + bmp->cbfs_offset = htole64(region_device_offset(&cbd->rdev)); + bmp->cbfs_size = htole64(region_device_sz(&cbd->rdev)); + bmp->boot_media_size = htole64(region_device_sz(boot_dev)); - bmp->fmap_offset = get_fmap_flash_offset(); + bmp->fmap_offset = htole64(get_fmap_flash_offset()); } static void lb_mmc_info(struct lb_header *header) @@ -242,9 +245,9 @@ static void lb_mmc_info(struct lb_header *header) rec = (struct lb_mmc_info *)lb_new_record(header); - rec->tag = LB_TAG_MMC_INFO; - rec->size = sizeof(*rec); - rec->early_cmd1_status = *ms_cbmem; + rec->tag = htole32(LB_TAG_MMC_INFO); + rec->size = htole32(sizeof(*rec)); + rec->early_cmd1_status = htole32(*ms_cbmem); } static void add_cbmem_pointers(struct lb_header *header) @@ -283,9 +286,9 @@ static void add_cbmem_pointers(struct lb_header *header) printk(BIOS_ERR, "No more room in coreboot table!\n"); break; } - cbmem_ref->tag = sid->table_tag; - cbmem_ref->size = sizeof(*cbmem_ref); - cbmem_ref->cbmem_addr = (unsigned long)cbmem_addr; + cbmem_ref->tag = htole32(sid->table_tag); + cbmem_ref->size = htole32(sizeof(*cbmem_ref)); + cbmem_ref->cbmem_addr = htole64((uintptr_t)cbmem_addr); } } @@ -295,11 +298,11 @@ static struct lb_mainboard *lb_mainboard(struct lb_header *header) struct lb_mainboard *mainboard; rec = lb_new_record(header); mainboard = (struct lb_mainboard *)rec; - mainboard->tag = LB_TAG_MAINBOARD; + mainboard->tag = htole32(LB_TAG_MAINBOARD); - mainboard->size = ALIGN_UP(sizeof(*mainboard) + - strlen(mainboard_vendor) + 1 + - strlen(mainboard_part_number) + 1, 8); + mainboard->size = htole32(ALIGN_UP(sizeof(*mainboard) + + strlen(mainboard_vendor) + 1 + + strlen(mainboard_part_number) + 1, 8)); mainboard->vendor_idx = 0; mainboard->part_number_idx = strlen(mainboard_vendor) + 1; @@ -319,23 +322,23 @@ static struct lb_board_config *lb_board_config(struct lb_header *header) rec = lb_new_record(header); config = (struct lb_board_config *)rec; - config->tag = LB_TAG_BOARD_CONFIG; - config->size = sizeof(*config); + config->tag = htole32(LB_TAG_BOARD_CONFIG); + config->size = htole32(sizeof(*config)); const uint64_t fw_config = fw_config_get(); - config->board_id = board_id(); - config->ram_code = ram_code(); - config->sku_id = sku_id(); - config->fw_config = fw_config; - - if (config->board_id != UNDEFINED_STRAPPING_ID) - printk(BIOS_INFO, "Board ID: %d\n", config->board_id); - if (config->ram_code != UNDEFINED_STRAPPING_ID) - printk(BIOS_INFO, "RAM code: %d\n", config->ram_code); - if (config->sku_id != UNDEFINED_STRAPPING_ID) - printk(BIOS_INFO, "SKU ID: %d\n", config->sku_id); - if (fw_config != UNDEFINED_FW_CONFIG) - printk(BIOS_INFO, "FW config: %#" PRIx64 "\n", fw_config); + config->board_id = htole32(board_id()); + config->ram_code = htole32(ram_code()); + config->sku_id = htole32(sku_id()); + config->fw_config = htole64(fw_config); + + if (config->board_id != htole32(UNDEFINED_STRAPPING_ID)) + printk(BIOS_INFO, "Board ID: %d\n", htole32(config->board_id)); + if (config->ram_code != htole32(UNDEFINED_STRAPPING_ID)) + printk(BIOS_INFO, "RAM code: %d\n", htole32(config->ram_code)); + if (config->sku_id != htole32(UNDEFINED_STRAPPING_ID)) + printk(BIOS_INFO, "SKU ID: %d\n", htole32(config->sku_id)); + if (fw_config != htole64(UNDEFINED_FW_CONFIG)) + printk(BIOS_INFO, "FW config: %#" PRIx64 "\n", htole64(fw_config)); return config; } @@ -347,14 +350,14 @@ static struct cmos_checksum *lb_cmos_checksum(struct lb_header *header) struct cmos_checksum *cmos_checksum; rec = lb_new_record(header); cmos_checksum = (struct cmos_checksum *)rec; - cmos_checksum->tag = LB_TAG_OPTION_CHECKSUM; + cmos_checksum->tag = htole32(LB_TAG_OPTION_CHECKSUM); - cmos_checksum->size = (sizeof(*cmos_checksum)); + cmos_checksum->size = htole32(sizeof(*cmos_checksum)); - cmos_checksum->range_start = LB_CKS_RANGE_START * 8; - cmos_checksum->range_end = (LB_CKS_RANGE_END * 8) + 7; - cmos_checksum->location = LB_CKS_LOC * 8; - cmos_checksum->type = CHECKSUM_PCBIOS; + cmos_checksum->range_start = htole32(LB_CKS_RANGE_START * 8); + cmos_checksum->range_end = htole32((LB_CKS_RANGE_END * 8) + 7); + cmos_checksum->location = htole32(LB_CKS_LOC * 8); + cmos_checksum->type = htole32(CHECKSUM_PCBIOS); return cmos_checksum; } @@ -377,8 +380,8 @@ static void lb_strings(struct lb_header *header) size_t len; rec = (struct lb_string *)lb_new_record(header); len = strlen(strings[i].string); - rec->tag = strings[i].tag; - rec->size = ALIGN_UP(sizeof(*rec) + len + 1, 8); + rec->tag = htole32(strings[i].tag); + rec->size = htole32(ALIGN_UP(sizeof(*rec) + len + 1, 8)); memcpy(rec->string, strings[i].string, len+1); } @@ -388,9 +391,9 @@ static void lb_record_version_timestamp(struct lb_header *header) { struct lb_timestamp *rec; rec = (struct lb_timestamp *)lb_new_record(header); - rec->tag = LB_TAG_VERSION_TIMESTAMP; - rec->size = sizeof(*rec); - rec->timestamp = coreboot_version_timestamp; + rec->tag = htole32(LB_TAG_VERSION_TIMESTAMP); + rec->size = htole32(sizeof(*rec)); + rec->timestamp = htole32(coreboot_version_timestamp); } void __weak lb_board(struct lb_header *header) { /* NOOP */ } @@ -410,9 +413,9 @@ static struct lb_forward *lb_forward(struct lb_header *header, struct lb_forward *forward; rec = lb_new_record(header); forward = (struct lb_forward *)rec; - forward->tag = LB_TAG_FORWARD; - forward->size = sizeof(*forward); - forward->forward = (uint64_t)(unsigned long)next_header; + forward->tag = htole32(LB_TAG_FORWARD); + forward->size = htole32(sizeof(*forward)); + forward->forward = htole64((uint64_t)(unsigned long)next_header); return forward; } @@ -421,17 +424,19 @@ static unsigned long lb_table_fini(struct lb_header *head) struct lb_record *rec, *first_rec; rec = lb_last_record(head); if (head->table_entries) - head->table_bytes += rec->size; + head->table_bytes = htole32(le32toh(head->table_bytes) + + le32toh(rec->size)); first_rec = lb_first_record(head); + /* compute_ip_checksum() returns checksum in correct endianness */ head->table_checksum = compute_ip_checksum(first_rec, - head->table_bytes); + le32toh(head->table_bytes)); head->header_checksum = 0; head->header_checksum = compute_ip_checksum(head, sizeof(*head)); printk(BIOS_DEBUG, "Wrote coreboot table at: %p, 0x%x bytes, checksum %x\n", - head, head->table_bytes, head->table_checksum); - return (unsigned long)rec + rec->size; + head, le32toh(head->table_bytes), le32toh(head->table_checksum)); + return (unsigned long)rec + le32toh(rec->size); } static void lb_add_acpi_rsdp(struct lb_header *head) @@ -439,9 +444,9 @@ static void lb_add_acpi_rsdp(struct lb_header *head) struct lb_acpi_rsdp *acpi_rsdp; struct lb_record *rec = lb_new_record(head); acpi_rsdp = (struct lb_acpi_rsdp *)rec; - acpi_rsdp->tag = LB_TAG_ACPI_RSDP; - acpi_rsdp->size = sizeof(*acpi_rsdp); - acpi_rsdp->rsdp_pointer = get_coreboot_rsdp(); + acpi_rsdp->tag = htole32(LB_TAG_ACPI_RSDP); + acpi_rsdp->size = htole32(sizeof(*acpi_rsdp)); + acpi_rsdp->rsdp_pointer = htole64(get_coreboot_rsdp()); } size_t write_coreboot_forwarding_table(uintptr_t entry, uintptr_t target) @@ -475,7 +480,7 @@ static uintptr_t write_coreboot_table(uintptr_t rom_table_end) /* Copy the option config table, it's already a * lb_record... */ - memcpy(rec_dest, option_table, option_table->size); + memcpy(rec_dest, option_table, le32toh(option_table->size)); /* Create CMOS checksum entry in coreboot table */ lb_cmos_checksum(head); } else { diff --git a/src/lib/imd_cbmem.c b/src/lib/imd_cbmem.c index a855cf18b3f..3af2effc185 100644 --- a/src/lib/imd_cbmem.c +++ b/src/lib/imd_cbmem.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -242,10 +243,10 @@ void cbmem_add_records_to_cbtable(struct lb_header *header) continue; lbe = (struct lb_cbmem_entry *)lb_new_record(header); - lbe->tag = LB_TAG_CBMEM_ENTRY; - lbe->size = sizeof(*lbe); - lbe->address = (uintptr_t)imd_entry_at(&imd, e); - lbe->entry_size = imd_entry_size(e); - lbe->id = id; + lbe->tag = htole32(LB_TAG_CBMEM_ENTRY); + lbe->size = htole32(sizeof(*lbe)); + lbe->address = htole64((uintptr_t)imd_entry_at(&imd, e)); + lbe->entry_size = htole32(imd_entry_size(e)); + lbe->id = htole32(id); } } From 8a71dd5354c3b5dfc121159d4c9dd6227c7e5cca Mon Sep 17 00:00:00 2001 From: Krystian Hebel Date: Wed, 29 Mar 2023 18:56:18 +0200 Subject: [PATCH 193/213] security/tpm: allow for TPM2 log format for TPM1 devices Signed-off-by: Krystian Hebel Change-Id: Ie501915df4a51e7ca394b3c4a542c3faa0e2bf9a --- src/security/tpm/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/security/tpm/Kconfig b/src/security/tpm/Kconfig index 8466d80dbe6..7f2aa9c5fce 100644 --- a/src/security/tpm/Kconfig +++ b/src/security/tpm/Kconfig @@ -112,7 +112,7 @@ config TPM_LOG_TPM1 See "TCG PC Client Specific Implementation Specification for Conventional BIOS". config TPM_LOG_TPM2 bool "TPM 2.0 format" - depends on TPM2 + depends on TPM2 || TPM1 help Log per TPM 2.0 specification. See "TCG PC Client Platform Firmware Profile Specification". From 274fc3d06229c99f0f3ee580ddb25c8a8ffbfea7 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Sat, 4 Jun 2022 16:25:11 +0300 Subject: [PATCH 194/213] drivers/i2c/tpm/tpm.c: fix endianness of DID_VID constants It's not clear why bytes were swapped only in check for 9535, but updating both constants seems to make sense. Signed-off-by: Sergii Dmytruk --- src/drivers/i2c/tpm/tpm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/drivers/i2c/tpm/tpm.c b/src/drivers/i2c/tpm/tpm.c index 840b947ee74..f325687b151 100644 --- a/src/drivers/i2c/tpm/tpm.c +++ b/src/drivers/i2c/tpm/tpm.c @@ -41,8 +41,8 @@ #define MAX_COUNT_LONG 50 /* expected value for DIDVID register */ -#define TPM_TIS_I2C_DID_VID_9635 0x000b15d1L -#define TPM_TIS_I2C_DID_VID_9645 0x001a15d1L +#define TPM_TIS_I2C_DID_VID_9635 be32_to_cpu(0xd1150b00L) +#define TPM_TIS_I2C_DID_VID_9645 be32_to_cpu(0xd1151a00L) enum i2c_chip_type { SLB9635, @@ -527,7 +527,7 @@ int tpm_vendor_init(struct tpm_chip *chip, unsigned int bus, uint32_t dev_addr) if (vendor == TPM_TIS_I2C_DID_VID_9645) { tpm_dev.chip_type = SLB9645; - } else if (be32_to_cpu(vendor) == TPM_TIS_I2C_DID_VID_9635) { + } else if (vendor == TPM_TIS_I2C_DID_VID_9635) { tpm_dev.chip_type = SLB9635; } else { printk(BIOS_DEBUG, "Vendor ID 0x%08x not recognized.\n", From 7445e8fc4b9c99c147ad3805443fc59ee156ac75 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Sun, 5 Jun 2022 16:26:36 +0300 Subject: [PATCH 195/213] security/tpm/tspi/tspi.c: allow larger stack for tpm_measure_region() Change-Id: I469d0c12a40b9bbb3bec53d486511c4bfc326499 Signed-off-by: Sergii Dmytruk --- src/security/tpm/tspi/tspi.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/security/tpm/tspi/tspi.c b/src/security/tpm/tspi/tspi.c index 633834918cd..758b1a6558c 100644 --- a/src/security/tpm/tspi/tspi.c +++ b/src/security/tpm/tspi/tspi.c @@ -252,6 +252,10 @@ uint32_t tpm_extend_pcr(int pcr, enum vb2_hash_algorithm digest_algo, } #if CONFIG(VBOOT_LIB) +#pragma GCC diagnostic push +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC diagnostic ignored "-Wstack-usage=" +#endif uint32_t tpm_measure_region(const struct region_device *rdev, uint8_t pcr, const char *rname) { @@ -293,4 +297,5 @@ uint32_t tpm_measure_region(const struct region_device *rdev, uint8_t pcr, } return tpm_extend_pcr(pcr, TPM_MEASURE_ALGO, digest, digest_len, rname); } +#pragma GCC diagnostic pop #endif /* VBOOT_LIB */ From 675541cea3ce0e223027940240ebd38e93b0825d Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Sun, 5 Jun 2022 16:28:40 +0300 Subject: [PATCH 196/213] soc/power9/: optional support for Infineon I2C TPM1 chips Change-Id: I9a8bc4386c637ebf8398af0d1c4d61bc051cc001 Signed-off-by: Sergii Dmytruk --- src/mainboard/raptor-cs/talos-2/Kconfig | 18 +++++++++ src/mainboard/raptor-cs/talos-2/memlayout.ld | 1 + src/soc/ibm/power9/chip.c | 42 ++++++++++++++++++++ 3 files changed, 61 insertions(+) diff --git a/src/mainboard/raptor-cs/talos-2/Kconfig b/src/mainboard/raptor-cs/talos-2/Kconfig index 0b6ace4c307..e4fd4d3aeba 100644 --- a/src/mainboard/raptor-cs/talos-2/Kconfig +++ b/src/mainboard/raptor-cs/talos-2/Kconfig @@ -27,6 +27,24 @@ config BOARD_SPECIFIC_OPTIONS select IPMI_BT_ROMSTAGE select FLATTENED_DEVICE_TREE +config TALOS_2_INFINEON_TPM_1 + bool "I2C TPM1 chip compatible with SLB9635TT" + default n + select I2C_TPM + select MAINBOARD_HAS_TPM1 + select TPM_MEASURED_BOOT + select TPM_LOG_TPM2 + +config DRIVER_TPM_I2C_BUS + hex + default 0x02 + depends on TALOS_2_INFINEON_TPM_1 + +config DRIVER_TPM_I2C_ADDR + hex "I2C TPM chip address" + default 0x20 + depends on TALOS_2_INFINEON_TPM_1 + config MEMLAYOUT_LD_FILE string default "src/mainboard/\$(CONFIG_MAINBOARD_DIR)/memlayout.ld" diff --git a/src/mainboard/raptor-cs/talos-2/memlayout.ld b/src/mainboard/raptor-cs/talos-2/memlayout.ld index e4422b63e97..749a920964f 100644 --- a/src/mainboard/raptor-cs/talos-2/memlayout.ld +++ b/src/mainboard/raptor-cs/talos-2/memlayout.ld @@ -63,6 +63,7 @@ SECTIONS FMAP_CACHE( 0xF8230000, 4K) CBFS_MCACHE( 0xF8231000, 8K) TIMESTAMP( 0xF8233000, 4K) + TPM_LOG( 0xF8234000, 2K) ROMSTAGE( 0xF8240000, 256K) diff --git a/src/soc/ibm/power9/chip.c b/src/soc/ibm/power9/chip.c index c216fe68fb8..c761131514f 100644 --- a/src/soc/ibm/power9/chip.c +++ b/src/soc/ibm/power9/chip.c @@ -13,6 +13,8 @@ #include #include #include // xzalloc +#include + #include "istep_13_scom.h" #include "chip.h" @@ -489,6 +491,45 @@ static void add_cb_fdt_data(struct device_tree *tree) dt_add_reg_prop(coreboot_node, reg_addrs, reg_sizes, 2, addr_cells, size_cells); } +static void add_tpm_node(struct device_tree *tree) +{ +#if CONFIG(TALOS_2_INFINEON_TPM_1) + uint32_t xscom_base = 0xA0000 | (CONFIG_DRIVER_TPM_I2C_BUS << 12); + uint8_t port = (CONFIG_DRIVER_TPM_I2C_ADDR & 0x80 ? 1 : 0); + uint8_t addr = (CONFIG_DRIVER_TPM_I2C_ADDR & 0x7F); + + struct device_tree_node *tpm; + struct device_tree_node *sb; + char path[64]; + + /* TODO: is the XSCOM address always the same? */ + snprintf(path, sizeof(path), "/xscom@603fc00000000/i2cm@%x/i2c-bus@%x/tpm@%x", + xscom_base, port, addr); + + tpm = dt_find_node_by_path(tree, path, NULL, NULL, 1); + + dt_add_string_prop(tpm, "compatible", "infineon,slb9645tt"); + dt_add_u32_prop(tpm, "reg", addr); + +#if CONFIG(TPM_MEASURED_BOOT) + const struct cbmem_entry *evtlog; + + evtlog = cbmem_entry_find(CBMEM_ID_TPM2_TCG_LOG); + if (evtlog == NULL) + die("TPM events log is missing from CBMEM!"); + + dt_add_u64_prop(tpm, "ibm,sml-base", (uintptr_t)cbmem_entry_start(evtlog)); + dt_add_u32_prop(tpm, "ibm,sml-size", cbmem_entry_size(evtlog)); + + /* Not hard-coding into DTS-file in case will need to store key hash here */ + sb = dt_find_node_by_path(tree, "/ibm,secureboot", NULL, NULL, 1); + dt_add_string_prop(sb, "compatible", "ibm,secureboot-v1-softrom"); + dt_add_string_prop(sb, "hash-algo", "sha512"); + dt_add_u32_prop(sb, "trusted-enabled", 1); +#endif +#endif +} + /* * Device tree passed to Skiboot has to have phandles set either for all nodes * or none at all. Because relative phandles are set for cpu->l2_cache->l3_cache @@ -503,6 +544,7 @@ static int dt_platform_update(struct device_tree *tree, uint8_t chips) add_memory_nodes(tree); add_dimm_sensor_nodes(tree, chips); add_cb_fdt_data(tree); + add_tpm_node(tree); /* Find "cpus" node */ cpus = dt_find_node_by_path(tree, "/cpus", NULL, NULL, 0); From 85dc271b994f59b9274dbd5867d64d555f6cc855 Mon Sep 17 00:00:00 2001 From: Kacper Stojek Date: Thu, 14 Jul 2022 13:59:02 +0200 Subject: [PATCH 197/213] soc/power9/: change log levels in istep files Change-Id: Ibe35da73ebf116f6a1821df758c13b054d57a96a Signed-off-by: Kacper Stojek Signed-off-by: Sergii Dmytruk --- src/soc/ibm/power9/fsi.h | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/soc/ibm/power9/fsi.h b/src/soc/ibm/power9/fsi.h index 7c9cf26b0c2..9b872e04160 100644 --- a/src/soc/ibm/power9/fsi.h +++ b/src/soc/ibm/power9/fsi.h @@ -25,11 +25,18 @@ uint32_t fsi_op(uint8_t chip, uint32_t addr, uint32_t data, bool is_read, uint8_ static inline uint32_t read_fsi(uint8_t chip, uint32_t addr) { - return fsi_op(chip, addr, /*data=*/0, /*is_read=*/true, /*size=*/4); + uint32_t data = fsi_op(chip, addr, /*data=*/0, /*is_read=*/true, /*size=*/4); +#ifdef DEBUG_FSI + printk(BIOS_DEBUG, "read_fsi(%d, 0x%08x) = 0x%08x\n", chip, addr, data); +#endif + return data; } static inline void write_fsi(uint8_t chip, uint32_t addr, uint32_t data) { +#ifdef DEBUG_FSI + printk(BIOS_DEBUG, "write_fsi(%d, 0x%08x) = 0x%08x\n", chip, addr, data); +#endif (void)fsi_op(chip, addr, data, /*is_read=*/false, /*size=*/4); } @@ -55,7 +62,11 @@ static inline uint32_t cfam_addr_to_fsi(uint32_t cfam) static inline uint32_t read_cfam(uint8_t chip, uint32_t addr) { - return read_fsi(chip, cfam_addr_to_fsi(addr)); + uint32_t data = read_fsi(chip, cfam_addr_to_fsi(addr)); +#ifdef DEBUG_FSI + printk(BIOS_DEBUG, "read_cfam(%d, 0x%08x) = 0x%08x\n", chip, addr, data); +#endif + return data; } static inline void write_cfam(uint8_t chip, uint32_t addr, uint32_t data) From 99289490b59e507a2967247b6d2b231da3bfa30e Mon Sep 17 00:00:00 2001 From: Krystian Hebel Date: Tue, 29 Nov 2022 14:12:27 +0100 Subject: [PATCH 198/213] commonlib/helpers.h: use unsigned literals in definitions This protects against some of unintentional integer promotions, e.g.: uint16_t freq_mhz = 2148; // or bigger uint64_t freq = freq_mhz * MHz; In this example, MHz used to be treated as int32_t, and because int32_t can represent every value that uin16_t can, multiplication was being performed with both arguments treated as int32_t. During assignment, result of multiplication is promoted to int64_t (because it can represent each value that int32_t can), and finally implicitly converted to uint64_t. Promotions preserve the value, including the sign, so if result of multiplication is negative, the same negative number (but extended to 64 bits) is converted to unsigned number. Signed-off-by: Krystian Hebel Change-Id: Ie60a6ee82db80328e44639175272cc8097f36c3b --- src/commonlib/bsd/include/commonlib/bsd/helpers.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/commonlib/bsd/include/commonlib/bsd/helpers.h b/src/commonlib/bsd/include/commonlib/bsd/helpers.h index 49953055b04..73cd25f8f42 100644 --- a/src/commonlib/bsd/include/commonlib/bsd/helpers.h +++ b/src/commonlib/bsd/include/commonlib/bsd/helpers.h @@ -72,13 +72,13 @@ } while (0) /* Standard units. */ -#define KiB (1<<10) -#define MiB (1<<20) -#define GiB (1<<30) +#define KiB (1U<<10) +#define MiB (1U<<20) +#define GiB (1U<<30) -#define KHz (1000) -#define MHz (1000 * KHz) -#define GHz (1000 * MHz) +#define KHz (1000U) +#define MHz (1000U * KHz) +#define GHz (1000U * MHz) #ifndef offsetof #define offsetof(TYPE, MEMBER) __builtin_offsetof(TYPE, MEMBER) From 80e6155a3779a77283bc8e96ad895fa09a74e534 Mon Sep 17 00:00:00 2001 From: Krystian Hebel Date: Thu, 8 Dec 2022 20:23:02 +0100 Subject: [PATCH 199/213] soc/power9: prepare loop for secondary threads Signed-off-by: Krystian Hebel Change-Id: I2bc14191268e6049cd63832cf0ff27bda0cc032d --- src/soc/ibm/power9/chip.c | 11 +++++----- src/soc/ibm/power9/homer.c | 25 +++++++++++++++++++++++ src/soc/ibm/power9/int_vectors.S | 35 +++++++++++++++++++++++++++++++- 3 files changed, 65 insertions(+), 6 deletions(-) diff --git a/src/soc/ibm/power9/chip.c b/src/soc/ibm/power9/chip.c index c761131514f..0c0569f53f4 100644 --- a/src/soc/ibm/power9/chip.c +++ b/src/soc/ibm/power9/chip.c @@ -814,6 +814,8 @@ static void *load_fdt(const char *dtb_file, uint8_t chips) return fdt; } +extern struct prog *__payload; + void platform_prog_run(struct prog *prog) { uint8_t chips = fsi_get_present_chips(); @@ -845,12 +847,11 @@ void platform_prog_run(struct prog *prog) /* * Now that the payload and its interrupt vectors are already loaded - * perform 16.2. - * - * This MUST be done as late as possible so that none of the newly - * activated threads start execution before current thread jumps into - * the payload. + * let secondary threads jump into payload. The order of jumping into + * Skiboot doesn't matter, as long as the thread that lands as first + * has FDT in %r3. */ + __payload = prog; for (uint8_t chip = 0; chip < MAX_CHIPS; chip++) { if (chips & (1 << chip)) activate_slave_cores(chip); diff --git a/src/soc/ibm/power9/homer.c b/src/soc/ibm/power9/homer.c index cb957807686..ffb3782a7e6 100644 --- a/src/soc/ibm/power9/homer.c +++ b/src/soc/ibm/power9/homer.c @@ -969,6 +969,22 @@ static void block_wakeup_int(int core, int state) write_scom_for_chiplet(0, EC00_CHIPLET_ID + core, 0x200F0107, PPC_BIT(1)); } +struct prog *__payload; + +static void secondary_entry(void) +{ + while (__payload == NULL) + asm volatile("stop" ::: "memory"); + + /* + * arch_prog_run(__payload); + * + * Doing it asm way to avoid using stack and overwriting %lr. + */ + register void *r3 asm ("r3") = __payload; + asm volatile("b arch_prog_run" : "+r"(r3)); +} + /* * Some time will be lost between entering and exiting STOP 15, but we don't * have a way of calculating it. In theory we could read tick count from one of @@ -982,6 +998,8 @@ struct save_state { uint64_t nia; uint64_t tb; uint64_t lr; + uint64_t bsp_pir; + void *sec_entry; } sstate; static void cpu_winkle(void) @@ -1000,6 +1018,13 @@ static void cpu_winkle(void) write_spr(SPR_LPCR, lpcr); write_spr(SPR_PSSCR, 0x00000000003F00FF); sstate.msr = read_msr(); + sstate.bsp_pir = read_spr(SPR_PIR); + + /* + * Not used by current thread, but will be used later by secondary + * threads, may as well set this now. Note that this is OPD address. + */ + sstate.sec_entry = secondary_entry; /* * Cannot clobber: diff --git a/src/soc/ibm/power9/int_vectors.S b/src/soc/ibm/power9/int_vectors.S index 4a7b58db1fc..455d68b71ba 100644 --- a/src/soc/ibm/power9/int_vectors.S +++ b/src/soc/ibm/power9/int_vectors.S @@ -23,21 +23,35 @@ /* * System reset vector (0x100) * + * Main thread: * - reload r1 and r2 from saved state * - add saved TB value to the current value (self-restore took some time) * - TB can't be written with one mtspr * - have to use 3 writes to deal with possible overflow of lower half - * - move nia and msr to HSRR0/1 + * - move NIA and MSR to HSRR0/1 * - return from hypervisor interrupt * - due to clobbers in inline assembly in cpu_winkle all other registers are * reloaded by compiler * - contents of vector and floating point registers are lost + * + * Secondary threads: + * - copy MSR from main thread + * - return into stack-less secondary_entry() + * - can't loop in this handler, it will be overwritten by the payload + * - must also load TOC pointer to access global data */ .globl sys_reset_int sys_reset_int: li %r0, 0 /* WARNING: this assumes that ramstage is not relocatable */ LOAD_IMM64(%r3, sstate) + + /* PIR of main thread */ + ld %r4, 48(%r3) + mfspr %r5, 1023 + cmpd %r4, %r5 + bne .secondary + /* Time Base */ ld %r2, 32(%r3) mftb %r4 @@ -46,6 +60,7 @@ sys_reset_int: mttbl %r0 mttbu %r5 mttbl %r4 + /* Stack */ ld %r1, 0(%r3) /* TOC */ @@ -60,6 +75,24 @@ sys_reset_int: ld %r4, 40(%r3) mtlr %r4 hrfid + +.secondary: + /* MSR -> HSRR1 */ + ld %r4, 16(%r3) + mtspr 315, %r4 + /* NIA -> HSRR0 and TOC pointer */ + ld %r2, 56(%r3) + ld %r4, 0(%r2) + mtspr 314, %r4 + ld %r2, 8(%r2) + /* Link register */ + li %r4, 0x100 + mtlr %r4 + /* Set PSSCR for STOP 1 */ + lis %r0, 0x004f + ori %r0, %r0, 0x0311 + mtspr 855, %r0 + hrfid .globl sys_reset_int_end sys_reset_int_end: From e937676ce7ea19d250476ec2695132f246cb2f37 Mon Sep 17 00:00:00 2001 From: Krystian Hebel Date: Thu, 8 Dec 2022 20:31:20 +0100 Subject: [PATCH 200/213] soc/power9: enable auto special wakeup for functional cores This is required for proper power management by OS. Signed-off-by: Krystian Hebel Change-Id: I6c16db1dc9c7af9cf33bf2ab271af21507a20266 --- src/soc/ibm/power9/chip.c | 132 +++++++++++++++++++++++++------------ src/soc/ibm/power9/homer.c | 4 -- 2 files changed, 90 insertions(+), 46 deletions(-) diff --git a/src/soc/ibm/power9/chip.c b/src/soc/ibm/power9/chip.c index 0c0569f53f4..9cc759011d6 100644 --- a/src/soc/ibm/power9/chip.c +++ b/src/soc/ibm/power9/chip.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include // xzalloc @@ -679,6 +680,59 @@ static void rng_init(uint8_t chips) } } +static void activate_secondary_threads(uint8_t chip) +{ + enum { DOORBELL_MSG_TYPE = 0x0000000028000000 }; + + uint8_t i; + + /* Read OCC CCSR written by the code earlier */ + const uint64_t functional_cores = read_scom(chip, 0x0006C090); + + /* + * This is for ATTR_PROC_FABRIC_PUMP_MODE == PUMP_MODE_CHIP_IS_GROUP, + * when chip ID is actually a group ID and "chip ID" field is zero. + */ + const uint64_t chip_msg = DOORBELL_MSG_TYPE | PPC_PLACE(chip, 49, 4); + + /* Find and process the first core in a separate loop to slightly + * simplify processing of all the other cores by removing a conditional */ + for (i = 0; i < MAX_CORES_PER_CHIP; ++i) { + uint8_t thread; + uint64_t core_msg; + + if (!IS_EC_FUNCTIONAL(i, functional_cores)) + continue; + + /* Message value for thread 0 of the current core */ + core_msg = chip_msg | (i << 2); + + /* Skip sending doorbell to the current thread of the current core */ + for (thread = (chip == 0 ? 1 : 0); thread < 4; ++thread) { + register uint64_t msg = core_msg | thread; + asm volatile("msgsnd %0" :: "r" (msg)); + } + + break; + } + + for (++i; i < MAX_CORES_PER_CHIP; ++i) { + uint8_t thread; + uint64_t core_msg; + + if (!IS_EC_FUNCTIONAL(i, functional_cores)) + continue; + + /* Message value for thread 0 of the i-th core */ + core_msg = chip_msg | (i << 2); + + for (thread = 0; thread < 4; ++thread) { + register uint64_t msg = core_msg | thread; + asm volatile("msgsnd %0" :: "r" (msg)); + } + } +} + static void enable_soc_dev(struct device *dev) { int chip, idx = 0; @@ -737,59 +791,53 @@ static void enable_soc_dev(struct device *dev) rng_init(chips); istep_18_11(chips, &tod_mdmt); istep_18_12(chips, tod_mdmt); -} - -static void activate_slave_cores(uint8_t chip) -{ - enum { DOORBELL_MSG_TYPE = 0x0000000028000000 }; - - uint8_t i; - - /* Read OCC CCSR written by the code earlier */ - const uint64_t functional_cores = read_scom(chip, 0x0006C090); /* - * This is for ATTR_PROC_FABRIC_PUMP_MODE == PUMP_MODE_CHIP_IS_GROUP, - * when chip ID is actually a group ID and "chip ID" field is zero. + * We have to disable FSP special wakeups on all cores, but to do so + * the core has to be powered up. It would be enough to start just one + * thread on each core, but this makes hand-off to payload much more + * complicated. To keep things simple, start each thread now. They will + * stay in STOP 1 until platform_prog_run() tells them to start the + * payload. */ - const uint64_t chip_msg = DOORBELL_MSG_TYPE | PPC_PLACE(chip, 49, 4); + for (chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) + activate_secondary_threads(chip); + } - /* Find and process the first core in a separate loop to slightly - * simplify processing of all the other cores by removing a conditional */ - for (i = 0; i < MAX_CORES_PER_CHIP; ++i) { - uint8_t thread; - uint64_t core_msg; + /* + * Give some time for cores to actually wake up. 3800 us is "estimated + * target and subject to change" latency for wake-up from STOP 11 + * according to POWER9 Processor User's Manual, but this proven to be + * not enough in tests. 5 ms worked each time, using twice as much to + * be safe. + */ + mdelay(10); - if (!IS_EC_FUNCTIONAL(i, functional_cores)) + for (chip = 0; chip < MAX_CHIPS; chip++) { + if (!(chips & (1 << chip))) continue; - /* Message value for thread 0 of the current core */ - core_msg = chip_msg | (i << 2); + const uint64_t cores = read_scom(chip, 0x0006C090); - /* Skip sending doorbell to the current thread of the current core */ - for (thread = (chip == 0 ? 1 : 0); thread < 4; ++thread) { - register uint64_t msg = core_msg | thread; - asm volatile("msgsnd %0" :: "r" (msg)); - } - - break; - } - - for (++i; i < MAX_CORES_PER_CHIP; ++i) { - uint8_t thread; - uint64_t core_msg; - - if (!IS_EC_FUNCTIONAL(i, functional_cores)) - continue; + for (uint8_t core = 0; core < MAX_CORES_PER_CHIP; ++core) { + if (!IS_EC_FUNCTIONAL(core, cores)) + continue; - /* Message value for thread 0 of the i-th core */ - core_msg = chip_msg | (i << 2); + /* Enable auto special wakeup for functional cores. */ + write_scom_for_chiplet(chip, EP00_CHIPLET_ID + core/4, + 0x1001203B + 0x400 * ((core/2) % 2), + PPC_BIT(12 + (core % 2))); - for (thread = 0; thread < 4; ++thread) { - register uint64_t msg = core_msg | thread; - asm volatile("msgsnd %0" :: "r" (msg)); + /* De-assert FSP special wakeup before activate_occ(). */ + scom_and_for_chiplet(chip, EC00_CHIPLET_ID + core, 0x200F010B, + ~PPC_BIT(0)); } } + + printk(BIOS_DEBUG, "Activating OCC...\n"); + activate_occ(chips, (void *)(top * 1024)); + printk(BIOS_DEBUG, "Done activating OCC\n"); } static void *load_fdt(const char *dtb_file, uint8_t chips) @@ -854,7 +902,7 @@ void platform_prog_run(struct prog *prog) __payload = prog; for (uint8_t chip = 0; chip < MAX_CHIPS; chip++) { if (chips & (1 << chip)) - activate_slave_cores(chip); + activate_secondary_threads(chip); } } diff --git a/src/soc/ibm/power9/homer.c b/src/soc/ibm/power9/homer.c index ffb3782a7e6..2ad897caa98 100644 --- a/src/soc/ibm/power9/homer.c +++ b/src/soc/ibm/power9/homer.c @@ -1559,10 +1559,6 @@ static void istep_21_1(uint8_t chips, struct homer_st *homers, const uint64_t *c start_pm_complex(chip, &homers[chip], cores[chip]); } printk(BIOS_DEBUG, "Done starting PM complex\n"); - - printk(BIOS_DEBUG, "Activating OCC...\n"); - activate_occ(chips, homers); - printk(BIOS_DEBUG, "Done activating OCC\n"); } /* Extracts rings for a specific Programmable PowerPC-lite Engine */ From be5a5c8ddfa89281de834b1bd4c5ffb42aa84e89 Mon Sep 17 00:00:00 2001 From: Krystian Hebel Date: Tue, 28 Mar 2023 21:22:55 +0200 Subject: [PATCH 201/213] util/cbmem: add option to read CBMEM from file instead of RAM CBMEM contents may be dumped to file and parsed offline, e.g. for QEMU targets. This will also be used to access CBMEM exposed in sysfs after coreboot driver in Linux is updated. Signed-off-by: Krystian Hebel Change-Id: I273e1eed320b3d99a57ae5b361b79238f40cf825 --- util/cbmem/cbmem.c | 141 +++++++++++++++++++++++++++++++-------------- 1 file changed, 99 insertions(+), 42 deletions(-) diff --git a/util/cbmem/cbmem.c b/util/cbmem/cbmem.c index b4106a1568b..9609d79d5d4 100644 --- a/util/cbmem/cbmem.c +++ b/util/cbmem/cbmem.c @@ -58,12 +58,20 @@ static int verbose = 0; #define debug(x...) if(verbose) printf(x) /* File handle used to access /dev/mem */ -static int mem_fd; +static int mem_fd = -1; static struct mapping lbtable_mapping; +/* File handle used to parse CBMEM from file instead of RAM */ +static int file_fd = -1; + /* TSC frequency from the LB_TAG_TSC_INFO record. 0 if not present. */ static uint32_t tsc_freq_khz = 0; +static struct lb_cbmem_ref timestamps; +static struct lb_cbmem_ref console; +static struct lb_cbmem_ref tcpa_log; +static struct lb_memory_range cbmem; + static void die(const char *msg) { if (msg) @@ -101,40 +109,74 @@ static void *mapping_virt(const struct mapping *mapping) static void *map_memory_with_prot(struct mapping *mapping, unsigned long long phys, size_t sz, int prot) { - void *v; - unsigned long long page_size; + if (file_fd < 0) { + void *v; + unsigned long long page_size; - page_size = system_page_size(); + page_size = system_page_size(); - mapping->virt = NULL; - mapping->offset = phys % page_size; - mapping->virt_size = sz + mapping->offset; - mapping->size = sz; - mapping->phys = phys; - - if (size_to_mib(mapping->virt_size) == 0) { - debug("Mapping %zuB of physical memory at 0x%llx (requested 0x%llx).\n", - mapping->virt_size, phys - mapping->offset, phys); - } else { - debug("Mapping %zuMB of physical memory at 0x%llx (requested 0x%llx).\n", - size_to_mib(mapping->virt_size), phys - mapping->offset, - phys); - } + mapping->virt = NULL; + mapping->offset = phys % page_size; + mapping->virt_size = sz + mapping->offset; + mapping->size = sz; + mapping->phys = phys; - v = mmap(NULL, mapping->virt_size, prot, MAP_SHARED, mem_fd, - phys - mapping->offset); + if (size_to_mib(mapping->virt_size) == 0) { + debug("Mapping %zuB of physical memory at 0x%llx (requested 0x%llx).\n", + mapping->virt_size, phys - mapping->offset, phys); + } else { + debug("Mapping %zuMB of physical memory at 0x%llx (requested 0x%llx).\n", + size_to_mib(mapping->virt_size), phys - mapping->offset, + phys); + } - if (v == MAP_FAILED) { - debug("Mapping failed %zuB of physical memory at 0x%llx.\n", - mapping->virt_size, phys - mapping->offset); - return NULL; - } + v = mmap(NULL, mapping->virt_size, prot, MAP_SHARED, mem_fd, + phys - mapping->offset); + + if (v == MAP_FAILED) { + debug("Mapping failed %zuB of physical memory at 0x%llx.\n", + mapping->virt_size, phys - mapping->offset); + return NULL; + } + + mapping->virt = v; + + if (mapping->offset != 0) + debug(" ... padding virtual address with 0x%zx bytes.\n", + mapping->offset); + } else { + ssize_t ret; + mapping->virt = malloc(sz); + mapping->offset = 0; + mapping->virt_size = sz; + mapping->size = sz; + mapping->phys = phys; + + debug("map_memory phys = %llx, size = %zuB, cbmem.start = %lx\n", phys, sz, cbmem.start); + + if (mapping->virt == NULL) { + debug("Couldn't allocate %zuB of memory.\n", sz); + return NULL; + } - mapping->virt = v; + if (lseek(file_fd, mapping->phys - cbmem.start, SEEK_SET) < 0) { + debug("Couldn't read file from offset %llx.\n", + mapping->phys - cbmem.start); + free(mapping->virt); + return NULL; + } - if (mapping->offset != 0) - debug(" ... padding virtual address with 0x%zx bytes.\n", - mapping->offset); + ret = read(file_fd, mapping->virt, sz); + if (ret < 0) { + debug("Error reading file: %s\n", strerror(errno)); + free(mapping->virt); + return NULL; + } + if ((size_t)ret != sz) { + debug("Truncated read from offset %llx, requested %zuB, got %zuB.\n", + mapping->phys - cbmem.start, sz, ret); + } + } return mapping_virt(mapping); } @@ -153,7 +195,11 @@ static int unmap_memory(struct mapping *mapping) if (mapping->virt == NULL) return -1; - munmap(mapping->virt, mapping->virt_size); + if (file_fd < 0) + munmap(mapping->virt, mapping->virt_size); + else + free(mapping->virt); + mapping->virt = NULL; mapping->offset = 0; mapping->virt_size = 0; @@ -265,11 +311,6 @@ static int find_cbmem_entry(uint32_t id, uint64_t *addr, size_t *size) * none found. */ -static struct lb_cbmem_ref timestamps; -static struct lb_cbmem_ref console; -static struct lb_cbmem_ref tpm_cb_log; -static struct lb_memory_range cbmem; - /* This is a work-around for a nasty problem introduced by initially having * pointer sized entries in the lb_cbmem_ref structures. This caused problems * on 64bit x86 systems because coreboot is 32bit on those systems. @@ -1340,6 +1381,7 @@ static void print_usage(const char *name, int exit_code) " -S | --stacked-timestamps: print stacked timestamps (e.g. for flame graph tools)\n" " -a | --add-timestamp ID: append timestamp with ID\n" " -L | --tcpa-log print TPM log\n" + " -f | --file FILE: read CBMEM from FILE instead of memory\n" " -V | --verbose: verbose (debugging) output\n" " -v | --version: print the version\n" " -h | --help: print this help\n" @@ -1493,12 +1535,13 @@ int main(int argc, char** argv) {"add-timestamp", required_argument, 0, 'a'}, {"hexdump", 0, 0, 'x'}, {"rawdump", required_argument, 0, 'r'}, + {"file", required_argument, 0, 'f'}, {"verbose", 0, 0, 'V'}, {"version", 0, 0, 'v'}, {"help", 0, 0, 'h'}, {0, 0, 0, 0} }; - while ((opt = getopt_long(argc, argv, "c12B:CltTSa:LxVvh?r:", + while ((opt = getopt_long(argc, argv, "c12B:CltTSa:LxVvh?r:f:", long_options, &option_index)) != EOF) { switch (opt) { case 'c': @@ -1539,6 +1582,14 @@ int main(int argc, char** argv) print_defaults = 0; rawdump_id = strtoul(optarg, NULL, 16); break; + case 'f': + file_fd = open(optarg, O_RDONLY, 0); + if (file_fd < 0) { + fprintf(stderr, "Failed to open file '%s': %s\n", + optarg, strerror(errno)); + return 1; + } + break; case 't': timestamp_type = TIMESTAMPS_PRINT_NORMAL; print_defaults = 0; @@ -1580,11 +1631,13 @@ int main(int argc, char** argv) print_usage(argv[0], 1); } - mem_fd = open("/dev/mem", timestamp_id ? O_RDWR : O_RDONLY, 0); - if (mem_fd < 0) { - fprintf(stderr, "Failed to gain memory access: %s\n", - strerror(errno)); - return 1; + if (file_fd < 0) { + mem_fd = open("/dev/mem", timestamp_id ? O_RDWR : O_RDONLY, 0); + if (mem_fd < 0) { + fprintf(stderr, "Failed to gain memory access: %s\n", + strerror(errno)); + return 1; + } } #if defined(__arm__) || defined(__aarch64__) @@ -1681,6 +1734,10 @@ int main(int argc, char** argv) unmap_memory(&lbtable_mapping); - close(mem_fd); + if (file_fd >= 0) + close(file_fd); + if (mem_fd >= 0) + close(mem_fd); + return 0; } From eb5f9c83d124de1fcef7574fa8bd865da3fd116d Mon Sep 17 00:00:00 2001 From: Krystian Hebel Date: Mon, 3 Jul 2023 13:29:35 +0200 Subject: [PATCH 202/213] util/cbmem: search for CBMEM in sysfs by default Kernel driver may expose CBMEM in sysfs. By doing so, it is possible to use cbmem utility even with CONFIG_STRICT_DEVMEM. If there is no file exposed, cbmem falls back to probing memory directly. Signed-off-by: Krystian Hebel --- util/cbmem/cbmem.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/util/cbmem/cbmem.c b/util/cbmem/cbmem.c index 9609d79d5d4..96595f542a0 100644 --- a/util/cbmem/cbmem.c +++ b/util/cbmem/cbmem.c @@ -1631,6 +1631,11 @@ int main(int argc, char** argv) print_usage(argv[0], 1); } + /* Check if Linux driver exposes CBMEM in sysfs. */ + if (file_fd < 0) { + file_fd = open("/sys/firmware/cbmem", O_RDONLY, 0); + } + if (file_fd < 0) { mem_fd = open("/dev/mem", timestamp_id ? O_RDWR : O_RDONLY, 0); if (mem_fd < 0) { From dcc5e1a6941beda4728b331663b834acff4b5ef0 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Wed, 22 Jun 2022 16:27:57 +0300 Subject: [PATCH 203/213] soc/power9/chip.c: exclude reserved ranges from system RAM This way this memory will be available for mapping via /dev/mem even when kernel is configured with CONFIG_[IO_]STRICT_DEVMEM. This makes CBMEM available in host via cbmem tool. Change-Id: I968067ebb8dfcf816fe72ea9c00985d0d10f5fbb Signed-off-by: Sergii Dmytruk --- src/soc/ibm/power9/chip.c | 103 +++++++++++++++++++++++++------------- 1 file changed, 69 insertions(+), 34 deletions(-) diff --git a/src/soc/ibm/power9/chip.c b/src/soc/ibm/power9/chip.c index 9cc759011d6..f1c18fcafe2 100644 --- a/src/soc/ibm/power9/chip.c +++ b/src/soc/ibm/power9/chip.c @@ -28,6 +28,11 @@ */ #define CHIP_ID(chip) ((chip) << 3) +struct mem_map { + struct memranges mem; + struct memranges reserved; +}; + /* Copy of data put together by the romstage */ mcbist_data_t mem_data[MAX_CHIPS]; @@ -276,14 +281,15 @@ static void fill_cpu_node(struct device_tree *tree, dt_add_u32_prop(node, "tlb-sets", 4); } -static void add_memory_node(struct device_tree *tree, uint8_t chip, uint64_t reg) +static void add_memory_node(struct device_tree *tree, uint8_t chip, uint64_t start, + uint64_t len) { struct device_tree_node *node; /* /memory@0123456789abcdef - 24 characters + null byte */ char path[26] = {}; - union {uint32_t u32[2]; uint64_t u64;} addr = { .u64 = base_k(reg) * KiB }; - union {uint32_t u32[2]; uint64_t u64;} size = { .u64 = size_k(reg) * KiB }; + union {uint32_t u32[2]; uint64_t u64;} addr = { .u64 = start }; + union {uint32_t u32[2]; uint64_t u64;} size = { .u64 = len }; snprintf(path, sizeof(path), "/memory@%llx", addr.u64); node = dt_find_node_by_path(tree, path, NULL, NULL, 1); @@ -296,54 +302,83 @@ static void add_memory_node(struct device_tree *tree, uint8_t chip, uint64_t reg dt_add_u32_prop(node, "ibm,chip-id", chip << 3); } -static bool add_mem_reserve_node(const struct range_entry *r, void *arg) +static bool build_memory_map(const struct range_entry *r, void *arg) { - struct device_tree *tree = arg; + struct mem_map *map = arg; - if (range_entry_tag(r) != BM_MEM_RAM) { - struct device_tree_reserve_map_entry *entry = xzalloc(sizeof(*entry)); - entry->start = range_entry_base(r); - entry->size = range_entry_size(r); + /* + * Kernel likes its available memory areas at least 1MB + * aligned, let's trim the regions such that unaligned padding + * is added to reserved memory. + */ + if (range_entry_tag(r) == BM_MEM_RAM) { + uint64_t new_start = ALIGN_UP(range_entry_base(r), 1 * MiB); + uint64_t new_end = ALIGN_DOWN(range_entry_end(r), 1 * MiB); - list_insert_after(&entry->list_node, &tree->reserve_map); + if (new_start != range_entry_base(r)) + memranges_insert(&map->reserved, range_entry_base(r), + new_start - range_entry_base(r), BM_MEM_RESERVED); + + if (new_start != new_end) + memranges_insert(&map->mem, new_start, new_end - new_start, BM_MEM_RAM); + + if (new_end != range_entry_end(r)) + memranges_insert(&map->reserved, new_end, range_entry_end(r) - new_end, + BM_MEM_RESERVED); + } else { + memranges_insert(&map->reserved, range_entry_base(r), range_entry_size(r), + BM_MEM_RESERVED); } return true; } +static void add_reserved_memory_node(struct device_tree *tree, uint64_t start, uint64_t size) +{ + struct device_tree_node *node; + char path[45]; + + snprintf(path, sizeof(path), "/reserved-memory/coreboot@%llx", start); + node = dt_find_node_by_path(tree, path, NULL, NULL, 1); + /* Use 2 cells each for address and size */ + dt_add_reg_prop(node, &start, &size, 1, 2, 2); +} + static void add_memory_nodes(struct device_tree *tree) { - uint8_t chip; - uint8_t chips = fsi_get_present_chips(); + struct mem_map map; + const struct range_entry *r; - /* - * Not using bootmem_walk_os_mem() to be consistent with Hostboot, - * whose "memory" nodes include reserved regions. - */ - for (chip = 0; chip < MAX_CHIPS; chip++) { - int mcs_i; + memranges_init_empty(&map.mem, NULL, 0); + memranges_init_empty(&map.reserved, NULL, 0); - if (!(chips & (1 << chip))) - continue; + bootmem_walk_os_mem(build_memory_map, &map); - for (mcs_i = 0; mcs_i < MCS_PER_PROC; mcs_i++) { - uint64_t reg; - chiplet_id_t nest = mcs_to_nest[mcs_ids[mcs_i]]; + memranges_each_entry(r, &map.mem) { + /* + * This is for ATTR_PROC_FABRIC_PUMP_MODE == PUMP_MODE_CHIP_IS_GROUP, + * when chip ID is actually a group ID and "chip ID" field is zero. + */ + uint8_t chip = (range_entry_base(r) >> 45) & 0xF; - /* These registers are undocumented, see istep 14.5. */ - /* MCS_MCFGP */ - reg = read_scom_for_chiplet(chip, nest, 0x0501080A); - if (reg & PPC_BIT(0)) - add_memory_node(tree, chip, reg); + add_memory_node(tree, chip, range_entry_base(r), range_entry_size(r)); + } - /* MCS_MCFGPM */ - reg = read_scom_for_chiplet(chip, nest, 0x0501080C); - if (reg & PPC_BIT(0)) - add_memory_node(tree, chip, reg); - } + /* Createe properly initialized /reserved-memory/ node */ + (void)dt_init_reserved_memory_node(tree); + + memranges_each_entry(r, &map.reserved) { + struct device_tree_reserve_map_entry *entry = xzalloc(sizeof(*entry)); + entry->start = range_entry_base(r); + entry->size = range_entry_size(r); + + add_reserved_memory_node(tree, entry->start, entry->size); + + list_insert_after(&entry->list_node, &tree->reserve_map); } - bootmem_walk_os_mem(add_mem_reserve_node, tree); + memranges_teardown(&map.mem); + memranges_teardown(&map.reserved); } /* Finds first root complex for a given chip that's present in DT else returns NULL */ From a73dac76634f16a9ecfb72c6337a2befe29fa1f8 Mon Sep 17 00:00:00 2001 From: Krystian Hebel Date: Thu, 30 Mar 2023 13:33:41 +0200 Subject: [PATCH 204/213] payloads/external/skiboot/Kconfig: change repo and revision Signed-off-by: Krystian Hebel Change-Id: Ia2f5687f8b7901847bc8ff3143609bde434f49cb --- payloads/external/skiboot/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/payloads/external/skiboot/Kconfig b/payloads/external/skiboot/Kconfig index 22024cd5fa1..3cb7b96c108 100644 --- a/payloads/external/skiboot/Kconfig +++ b/payloads/external/skiboot/Kconfig @@ -8,14 +8,14 @@ config PAYLOAD_FILE config SKIBOOT_GIT_REPO string "Git repository of skiboot payload" default "https://github.com/open-power/skiboot" if !BOARD_RAPTOR_CS_TALOS_2 - default "https://git.raptorcs.com/git/talos-skiboot" if BOARD_RAPTOR_CS_TALOS_2 + default "https://github.com/Dasharo/skiboot.git" if BOARD_RAPTOR_CS_TALOS_2 help Git repository which will be used to clone skiboot. config SKIBOOT_REVISION string "Revision of skiboot payload" default "d93ddbd39b4eeac0bc11dacbdadea76df2996c13" if BOARD_EMULATION_QEMU_POWER9 - default "9858186353f2203fe477f316964e03609d12fd1d" if BOARD_RAPTOR_CS_TALOS_2 + default "fa060c2c98b6eefde06a8bd78d1d8096c2bede37" if BOARD_RAPTOR_CS_TALOS_2 help Revision, that skiboot repository will be checked out to, before building an image. From d3427dfbffce0119b28a167f178fe7826d25972a Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Sun, 23 Oct 2022 00:55:03 +0300 Subject: [PATCH 205/213] util/cbmem: add parsing of TPM logs per standard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit coreboot is made to export the range allocated for the log. The range is helpful as there is no easy way to determine the size of the log from its header without parsing vendor info. Change-Id: Ib76dc7dec56dd1789a219539a1ac05a958f47a5c Ticket: https://ticket.coreboot.org/issues/425 Signed-off-by: Krystian Hebel Signed-off-by: Michał Żygowski Signed-off-by: Sergii Dmytruk --- .../bsd/include/commonlib/bsd/tpm_log_defs.h | 64 +++++ .../include/commonlib/coreboot_tables.h | 1 + src/lib/coreboot_table.c | 26 ++ util/cbmem/cbmem.c | 243 +++++++++++++++++- 4 files changed, 331 insertions(+), 3 deletions(-) diff --git a/src/commonlib/bsd/include/commonlib/bsd/tpm_log_defs.h b/src/commonlib/bsd/include/commonlib/bsd/tpm_log_defs.h index 144d55a3319..d90182d021c 100644 --- a/src/commonlib/bsd/include/commonlib/bsd/tpm_log_defs.h +++ b/src/commonlib/bsd/include/commonlib/bsd/tpm_log_defs.h @@ -9,6 +9,26 @@ #define TCPA_SPEC_ID_EVENT_SIGNATURE "Spec ID Event00" #define TCG_EFI_SPEC_ID_EVENT_SIGNATURE "Spec ID Event03" +struct tcpa_log_entry { + uint32_t pcr; + uint32_t event_type; + uint8_t digest[20]; + uint32_t event_data_size; + uint8_t event[0]; +} __packed; + +struct tcpa_spec_entry { + struct tcpa_log_entry entry; + uint8_t signature[16]; + uint32_t platform_class; + uint8_t spec_version_minor; + uint8_t spec_version_major; + uint8_t spec_errata; + uint8_t reserved; + uint8_t vendor_info_size; + uint8_t vendor_info[0]; +} __packed; + #define TPM2_ALG_ERROR 0x0000 #define TPM2_ALG_HMAC 0x0005 #define TPM2_ALG_NULL 0x0010 @@ -54,6 +74,28 @@ struct spec_id_event_data { uint8_t vendor_info_size; } __packed; +union tpm_hash_digest { + uint8_t sha1[SHA1_DIGEST_SIZE]; + uint8_t sha256[SHA256_DIGEST_SIZE]; + uint8_t sm3_256[SM3_256_DIGEST_SIZE]; + uint8_t sha384[SHA384_DIGEST_SIZE]; + uint8_t sha512[SHA512_DIGEST_SIZE]; +}; + +struct tpm_hash_algorithm { + uint16_t hashAlg; + union tpm_hash_digest digest; +} __packed; + +struct tcg_pcr_event2_header { + uint32_t pcr_index; + uint32_t event_type; + uint32_t digest_count; + uint8_t digests[0]; + /* uint32_t event_size; */ + /* uint8_t event[0]; */ +} __packed; + struct tpm_digest_sizes { uint16_t alg_id; uint16_t digest_size; @@ -76,4 +118,26 @@ struct tcg_efi_spec_id_event { /* uint8_t vendor_info[vendor_info_size]; */ } __packed; +static const char *tpm_event_types[] __maybe_unused = { + [EV_PREBOOT_CERT] = "Reserved", + [EV_POST_CODE] = "POST code", + [EV_UNUSED] = "Unused", + [EV_NO_ACTION] = "No action", + [EV_SEPARATOR] = "Separator", + [EV_ACTION] = "Action", + [EV_EVENT_TAG] = "Event tag", + [EV_S_CRTM_CONTENTS] = "S-CRTM contents", + [EV_S_CRTM_VERSION] = "S-CRTM version", + [EV_CPU_MICROCODE] = "CPU microcode", + [EV_PLATFORM_CONFIG_FLAGS] = "Platform configuration flags", + [EV_TABLE_OF_DEVICES] = "Table of devices", + [EV_COMPACT_HASH] = "Compact hash", + [EV_IPL] = "IPL", + [EV_IPL_PARTITION_DATA] = "IPL partition data", + [EV_NONHOST_CODE] = "Non-host code", + [EV_NONHOST_CONFIG] = "Non-host configuration", + [EV_NONHOST_INFO] = "Non-host information", + [EV_OMIT_BOOT_DEVICE_EVENTS] = "Omit boot device events", +}; + #endif diff --git a/src/commonlib/include/commonlib/coreboot_tables.h b/src/commonlib/include/commonlib/coreboot_tables.h index 864a11894f1..f574aed968b 100644 --- a/src/commonlib/include/commonlib/coreboot_tables.h +++ b/src/commonlib/include/commonlib/coreboot_tables.h @@ -87,6 +87,7 @@ enum { LB_TAG_TYPE_C_INFO = 0x0042, LB_TAG_ACPI_RSDP = 0x0043, LB_TAG_PCIE = 0x0044, + LB_TAG_TPM_STD_LOG = 0x0045, /* The following options are CMOS-related */ LB_TAG_CMOS_OPTION_TABLE = 0x00c8, LB_TAG_OPTION = 0x00c9, diff --git a/src/lib/coreboot_table.c b/src/lib/coreboot_table.c index 67523d5b67a..a818704a986 100644 --- a/src/lib/coreboot_table.c +++ b/src/lib/coreboot_table.c @@ -23,6 +23,7 @@ #include #include #include +#include #if CONFIG(USE_OPTION_TABLE) #include @@ -292,6 +293,28 @@ static void add_cbmem_pointers(struct lb_header *header) } } +static void lb_tpm_std_log(struct lb_header *header) +{ + struct lb_range *lb_range; + const struct cbmem_entry *entry; + + entry = cbmem_entry_find(CBMEM_ID_TCPA_TCG_LOG); + if (entry == NULL) + entry = cbmem_entry_find(CBMEM_ID_TPM2_TCG_LOG); + if (entry == NULL) + return; /* The section is not present */ + + lb_range = (struct lb_range *)lb_new_record(header); + if (lb_range == NULL) { + printk(BIOS_ERR, "No more room in coreboot table!\n"); + return; + } + lb_range->tag = htole32(LB_TAG_TPM_STD_LOG); + lb_range->size = htole32(sizeof(*lb_range)); + lb_range->range_start = htole64((uintptr_t)cbmem_entry_start(entry)); + lb_range->range_size = htole32(cbmem_entry_size(entry)); +} + static struct lb_mainboard *lb_mainboard(struct lb_header *header) { struct lb_record *rec; @@ -493,6 +516,9 @@ static uintptr_t write_coreboot_table(uintptr_t rom_table_end) /* Serialize resource map into mem table types (LB_MEM_*) */ bootmem_write_memory_table(lb_memory(head)); + /* Record reference to TPM log composed according to specification (either one) */ + lb_tpm_std_log(head); + /* Record our motherboard */ lb_mainboard(head); diff --git a/util/cbmem/cbmem.c b/util/cbmem/cbmem.c index 96595f542a0..37c5245176a 100644 --- a/util/cbmem/cbmem.c +++ b/util/cbmem/cbmem.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -69,7 +70,8 @@ static uint32_t tsc_freq_khz = 0; static struct lb_cbmem_ref timestamps; static struct lb_cbmem_ref console; -static struct lb_cbmem_ref tcpa_log; +static struct lb_cbmem_ref tpm_cb_log; +static struct lb_range tpm_std_log; static struct lb_memory_range cbmem; static void die(const char *msg) @@ -332,6 +334,13 @@ static struct lb_cbmem_ref parse_cbmem_ref(const struct lb_cbmem_ref *cbmem_ref) return ret; } +static struct lb_range parse_range(const struct lb_range *range) +{ + struct lb_range ret; + aligned_memcpy(&ret, range, sizeof(ret)); + return ret; +} + static void parse_memory_tags(const struct lb_memory *mem) { int num_entries; @@ -387,6 +396,11 @@ static int parse_cbtable_entries(const struct mapping *table_mapping) debug(" Found TSC info.\n"); tsc_freq_khz = ((struct lb_tsc_info *)lbr_p)->freq_khz; continue; + case LB_TAG_TPM_STD_LOG: { + debug(" Found TPM standard log table.\n"); + tpm_std_log = parse_range((struct lb_range *)lbr_p); + continue; + } case LB_TAG_FORWARD: { int ret; /* @@ -884,6 +898,225 @@ static void timestamp_add_now(uint32_t timestamp_id) unmap_memory(×tamp_mapping); } +static bool can_print(const uint8_t *data, size_t len) +{ + unsigned int i; + for (i = 0; i < len; i++) { + if (!isprint(data[i]) && !isspace(data[i])) { + /* If printable prefix is followed by zeroes, this is a valid string */ + for (; i < len; i++) { + if (data[i] != 0) + return false; + } + return true; + } + } + return true; +} + +static void print_hex(uint8_t *hex, size_t len) +{ + unsigned int i; + for (i = 0; i < len; i++) + printf("%02x", *(hex + i)); + printf("\n"); +} + +static void parse_tpm12_log(const struct tcpa_spec_entry *spec_log) +{ + static uint8_t zero_block[sizeof(struct tcpa_spec_entry)]; + + uintptr_t current; + uint32_t counter = 0; + + printf("TCPA log:\n"); + printf("\tSpecification: %d.%d%d", + spec_log->spec_version_major, + spec_log->spec_version_minor, + spec_log->spec_errata); + printf("\tPlatform class: %s\n", + le32toh(spec_log->platform_class) == 0 ? "PC Client" : + le32toh(spec_log->platform_class) == 1 ? "Server" : "Unknown"); + + current = (uintptr_t)&spec_log->vendor_info[spec_log->vendor_info_size]; + while (memcmp((const void *)current, (const void *)zero_block, sizeof(zero_block))) { + uint32_t len; + struct tcpa_log_entry *log_entry = (void *)current; + uint32_t event_type = le32toh(log_entry->event_type); + + printf("TCPA log entry %u:\n", ++counter); + printf("\tPCR: %d\n", le32toh(log_entry->pcr)); + if (event_type >= ARRAY_SIZE(tpm_event_types)) + printf("\tEvent type: Unknown (0x%x)\n", event_type); + else + printf("\tEvent type: %s\n", tpm_event_types[event_type]); + printf("\tDigest: "); + print_hex(log_entry->digest, SHA1_DIGEST_SIZE); + current += sizeof(struct tcpa_log_entry); + len = le32toh(log_entry->event_data_size); + if (len != 0) { + current += len; + printf("\tEvent data: "); + if (can_print(log_entry->event, len)) + printf("%.*s\n", len, log_entry->event); + else + print_hex(log_entry->event, len); + } else { + printf("\tEvent data not provided\n"); + } + } +} + +static uint32_t print_tpm2_digests(struct tcg_pcr_event2_header *log_entry) +{ + unsigned int i; + uintptr_t current = (uintptr_t)log_entry->digests; + + for (i = 0; i < le32toh(log_entry->digest_count); i++) { + struct tpm_hash_algorithm *hash = (struct tpm_hash_algorithm *)current; + switch (le16toh(hash->hashAlg)) { + case TPM2_ALG_SHA1: + printf("\t\t SHA1: "); + print_hex(hash->digest.sha1, SHA1_DIGEST_SIZE); + current += sizeof(hash->hashAlg) + SHA1_DIGEST_SIZE; + break; + case TPM2_ALG_SHA256: + printf("\t\t SHA256: "); + print_hex(hash->digest.sha256, SHA256_DIGEST_SIZE); + current += sizeof(hash->hashAlg) + SHA256_DIGEST_SIZE; + break; + case TPM2_ALG_SHA384: + printf("\t\t SHA384: "); + print_hex(hash->digest.sha384, SHA384_DIGEST_SIZE); + current += sizeof(hash->hashAlg) + SHA384_DIGEST_SIZE; + break; + case TPM2_ALG_SHA512: + printf("\t\t SHA512: "); + print_hex(hash->digest.sha512, SHA512_DIGEST_SIZE); + current += sizeof(hash->hashAlg) + SHA512_DIGEST_SIZE; + break; + case TPM2_ALG_SM3_256: + printf("\t\t SM3: "); + print_hex(hash->digest.sm3_256, SM3_256_DIGEST_SIZE); + current += sizeof(hash->hashAlg) + SM3_256_DIGEST_SIZE; + break; + default: + die("Unknown hash algorithm\n"); + } + } + + return current - (uintptr_t)&log_entry->digest_count; +} + +static void parse_tpm2_log(const struct tcg_efi_spec_id_event *tpm2_log) +{ + static uint8_t zero_block[12]; /* Only PCR index, event type and digest count */ + + uintptr_t current; + uint32_t counter = 0; + + printf("TPM2 log:\n"); + printf("\tSpecification: %d.%d%d\n", + tpm2_log->spec_version_major, + tpm2_log->spec_version_minor, + tpm2_log->spec_errata); + printf("\tPlatform class: %s\n", + le32toh(tpm2_log->platform_class) == 0 ? "PC Client" : + le32toh(tpm2_log->platform_class) == 1 ? "Server" : "Unknown"); + + /* Start after the first variable-sized part of the header */ + current = (uintptr_t)&tpm2_log->digest_sizes[le32toh(tpm2_log->num_of_algorithms)]; + /* current is at `uint8_t vendor_info_size` here */ + current += 1 + *(uint8_t *)current; + + while (memcmp((const void *)current, (const void *)zero_block, sizeof(zero_block))) { + uint32_t len; + struct tcg_pcr_event2_header *log_entry = (void *)current; + uint32_t event_type = le32toh(log_entry->event_type); + + printf("TPM2 log entry %u:\n", ++counter); + printf("\tPCR: %d\n", le32toh(log_entry->pcr_index)); + if (event_type >= ARRAY_SIZE(tpm_event_types)) + printf("\tEvent type: Unknown (0x%x)\n", event_type); + else + printf("\tEvent type: %s\n", tpm_event_types[event_type]); + + current = (uintptr_t)&log_entry->digest_count; + if (le32toh(log_entry->digest_count) > 0) { + printf("\tDigests:\n"); + current += print_tpm2_digests(log_entry); + } else { + printf("\tNo digests in this log entry\n"); + current += sizeof(log_entry->digest_count); + } + /* Now event size and event are left to be parsed */ + len = le32toh(*(uint32_t *)current); + current += sizeof(uint32_t); + if (len != 0) { + printf("\tEvent data: %d %.*s\n", len, len, (const char *)current); + current += len; + } else { + printf("\tEvent data not provided\n"); + } + } +} + +/* Dump the TPM log table in format defined by specifications */ +static void dump_tpm_log(void) +{ + const void *event_log; + const struct tcpa_spec_entry *tspec_entry; + const struct tcg_efi_spec_id_event *tcg_spec_entry; + uint64_t addr; + size_t size; + struct mapping log_mapping; + + if (tpm_std_log.tag != LB_TAG_TPM_STD_LOG) { + fprintf(stderr, "No TPM log found in coreboot table.\n"); + return; + } + + addr = tpm_std_log.range_start; + size = tpm_std_log.range_size; + + event_log = map_memory(&log_mapping, addr, size); + if (!event_log) + die("Unable to map TPM eventlog\n"); + + tspec_entry = event_log; + if (!strcmp((const char *)tspec_entry->signature, TCPA_SPEC_ID_EVENT_SIGNATURE)) { + if (tspec_entry->spec_version_major == 1 && + tspec_entry->spec_version_minor == 2 && + tspec_entry->spec_errata >= 1 && + le32toh(tspec_entry->entry.event_type) == EV_NO_ACTION) { + parse_tpm12_log(tspec_entry); + } else { + fprintf(stderr, "Unknown TPM1.2 log specification\n"); + } + unmap_memory(&log_mapping); + return; + } + + tcg_spec_entry = event_log; + if (!strcmp((const char *)tcg_spec_entry->signature, TCG_EFI_SPEC_ID_EVENT_SIGNATURE)) { + if (tcg_spec_entry->spec_version_major == 2 && + tcg_spec_entry->spec_version_minor == 0 && + le32toh(tcg_spec_entry->event_type) == EV_NO_ACTION) { + parse_tpm2_log(tcg_spec_entry); + } else { + fprintf(stderr, "Unknown TPM2 log specification.\n"); + } + unmap_memory(&log_mapping); + return; + } + + fprintf(stderr, "Unknown TPM log specification: %.*s\n", + (int)sizeof(tcg_spec_entry->signature), + (const char *)tcg_spec_entry->signature); + + unmap_memory(&log_mapping); +} + /* dump the TPM CB log table */ static void dump_tpm_cb_log(void) { @@ -1734,8 +1967,12 @@ int main(int argc, char** argv) if (timestamp_type != TIMESTAMPS_PRINT_NONE) dump_timestamps(timestamp_type); - if (print_tcpa_log) - dump_tpm_cb_log(); + if (print_tcpa_log) { + if (tpm_std_log.tag != LB_TAG_UNUSED) + dump_tpm_log(); + else + dump_tpm_cb_log(); + } unmap_memory(&lbtable_mapping); From 68d4bd2f006cf78b832ce55627d28371cc673198 Mon Sep 17 00:00:00 2001 From: Krystian Hebel Date: Wed, 29 Mar 2023 13:18:51 +0200 Subject: [PATCH 206/213] .gitmodules: use absolute URLs Signed-off-by: Krystian Hebel Change-Id: Ib0fbe66baf4bd6d109a09493935186a847c65b25 --- .gitmodules | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/.gitmodules b/.gitmodules index 4d0a18d202e..3e12f48b332 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,69 +1,69 @@ [submodule "3rdparty/blobs"] path = 3rdparty/blobs - url = ../blobs.git + url = https://review.coreboot.org/blobs.git update = none ignore = dirty [submodule "util/nvidia-cbootimage"] path = util/nvidia/cbootimage - url = ../nvidia-cbootimage.git + url = https://review.coreboot.org/nvidia-cbootimage.git [submodule "vboot"] path = 3rdparty/vboot - url = ../vboot.git + url = https://review.coreboot.org/vboot.git branch = main [submodule "arm-trusted-firmware"] path = 3rdparty/arm-trusted-firmware - url = ../arm-trusted-firmware.git + url = https://review.coreboot.org/arm-trusted-firmware.git [submodule "3rdparty/chromeec"] path = 3rdparty/chromeec - url = ../chrome-ec.git + url = https://review.coreboot.org/chrome-ec.git [submodule "libhwbase"] path = 3rdparty/libhwbase - url = ../libhwbase.git + url = https://review.coreboot.org/libhwbase.git [submodule "libgfxinit"] path = 3rdparty/libgfxinit - url = ../libgfxinit.git + url = https://review.coreboot.org/libgfxinit.git [submodule "3rdparty/fsp"] path = 3rdparty/fsp - url = ../fsp.git + url = https://review.coreboot.org/fsp.git update = none ignore = dirty [submodule "opensbi"] path = 3rdparty/opensbi - url = ../opensbi.git + url = https://review.coreboot.org/opensbi.git [submodule "intel-microcode"] path = 3rdparty/intel-microcode - url = ../intel-microcode.git + url = https://review.coreboot.org/intel-microcode.git update = none ignore = dirty branch = main [submodule "3rdparty/ffs"] path = 3rdparty/ffs - url = ../ffs.git + url = https://review.coreboot.org/ffs.git [submodule "3rdparty/amd_blobs"] path = 3rdparty/amd_blobs - url = ../amd_blobs + url = https://review.coreboot.org/amd_blobs update = none ignore = dirty [submodule "3rdparty/cmocka"] path = 3rdparty/cmocka - url = ../cmocka.git + url = https://review.coreboot.org/cmocka.git update = none branch = stable-1.1 [submodule "3rdparty/qc_blobs"] path = 3rdparty/qc_blobs - url = ../qc_blobs.git + url = https://review.coreboot.org/qc_blobs.git update = none ignore = dirty [submodule "3rdparty/intel-sec-tools"] path = 3rdparty/intel-sec-tools - url = ../9esec-security-tooling.git + url = https://review.coreboot.org/9esec-security-tooling.git [submodule "3rdparty/stm"] path = 3rdparty/stm - url = ../STM + url = https://review.coreboot.org/STM branch = stmpe [submodule "util/goswid"] path = util/goswid - url = ../goswid + url = https://review.coreboot.org/goswid branch = trunk [submodule "3rdparty/sb-signing-utils"] path = 3rdparty/sb-signing-utils From 4845c31190f469472e5030c35ed050d4db6c5a08 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Sat, 20 May 2023 21:35:25 +0300 Subject: [PATCH 207/213] soc/power9/occ.c: make sensor IDs chip-specific IDs of sensors of processor state and sensors of core temperature and frequency are chip-specific. Without this change BMC could fail to control fan of the second CPU. Signed-off-by: Sergii Dmytruk --- src/soc/ibm/power9/occ.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/soc/ibm/power9/occ.c b/src/soc/ibm/power9/occ.c index 33dacbaac36..00d4ed78903 100644 --- a/src/soc/ibm/power9/occ.c +++ b/src/soc/ibm/power9/occ.c @@ -745,9 +745,9 @@ static void add_sensor_id(uint8_t *data, uint16_t *index, uint32_t sensor_id) * Sensors IDs listed here are valid for Talos II. Values come from talos.xml * and may or may not be different for other boards. */ -#define PROC_CALLOUT_ID 0x08 -#define CORE0_TEMP_ID 0x5B -#define CORE0_FREQ_ID 0xA0 +#define PROC0_CALLOUT_ID 0x08 +#define PROC0_CORE0_TEMP_ID 0x5B +#define PROC0_CORE0_FREQ_ID 0xA0 #define BACKPLANE_CALLOUT_ID 0x8C #define APSS_CALLOUT_ID 0x93 /* Same as Backplane Callout ID */ @@ -785,15 +785,17 @@ static void get_sys_cfg_msg_data(const struct occ_cfg_inputs *inputs, data[index++] = system_type; /* Processor Callout Sensor ID */ - add_sensor_id(data, &index, PROC_CALLOUT_ID); + add_sensor_id(data, &index, PROC0_CALLOUT_ID + inputs->chip); /* Next 24*2 IDs are for core sensors */ for (i = 0; i < MAX_CORES_PER_CHIP; ++i) { /* Core Temp Sensor ID */ - add_sensor_id(data, &index, CORE0_TEMP_ID + i); + add_sensor_id(data, &index, + PROC0_CORE0_TEMP_ID + inputs->chip * MAX_CORES_PER_CHIP + i); /* Core Frequency Sensor ID */ - add_sensor_id(data, &index, CORE0_FREQ_ID + i); + add_sensor_id(data, &index, + PROC0_CORE0_FREQ_ID + inputs->chip * MAX_CORES_PER_CHIP + i); } /* Backplane Callout Sensor ID */ From cd43a42bed8314749a78f40a4b70636c849d0e8c Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Sun, 21 May 2023 00:45:22 +0300 Subject: [PATCH 208/213] soc/power9/occ.c: replace a TODO about activation Sending those IPMI messages seems to have no effect and Hostboot likely has a bug in doing it, so there is probably no need to add the corresponding code. Signed-off-by: Sergii Dmytruk --- src/soc/ibm/power9/occ.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/soc/ibm/power9/occ.c b/src/soc/ibm/power9/occ.c index 00d4ed78903..a5a2837f933 100644 --- a/src/soc/ibm/power9/occ.c +++ b/src/soc/ibm/power9/occ.c @@ -1223,8 +1223,13 @@ void activate_occ(uint8_t chips, struct homer_st *homers) /* Switch for OCC to active state (sent only to master OCC) */ set_occ_active_state(/*chip=*/0, &homers[0]); - /* TODO: Hostboot sets active sensors for all OCCs here, so BMC can start - communication with OCCs. */ + /* + * Hostboot sets active sensors for all OCCs via IPMI here, so BMC can + * start communication with them. However, in practice that seems to + * make no difference and Hostboot seems to have a bug in that it uses + * wrong operation code when it tries to change sensor's state likely + * turning the interaction into a no-op. + */ } void pm_occ_fir_init(uint8_t chip) From c94aea8bc273e68555bda7a77db6bacd872362d4 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Fri, 2 Jun 2023 00:25:54 +0300 Subject: [PATCH 209/213] mb/raptor-cs/talos-2/1-cpu.dts: set 2s I2C timeout for TPM port Without this skiboot uses its default of 1ms, which is inappropriate for a TPM device as it performs clock stretching for a second or two. Signed-off-by: Sergii Dmytruk --- src/mainboard/raptor-cs/talos-2/1-cpu.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mainboard/raptor-cs/talos-2/1-cpu.dts b/src/mainboard/raptor-cs/talos-2/1-cpu.dts index 9a749e4eb36..6573c7925cc 100644 --- a/src/mainboard/raptor-cs/talos-2/1-cpu.dts +++ b/src/mainboard/raptor-cs/talos-2/1-cpu.dts @@ -392,6 +392,7 @@ #address-cells = <0x01>; compatible = "ibm,opal-i2c\0ibm,power8-i2c-port\0ibm,power9-i2c-port"; bus-frequency = <0x61a80>; + timeout-ms = <2000>; eeprom@50 { reg = <0x50>; From 68307ed8b690d30ff9f8a2a298e8f1d597f04bfa Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Sat, 20 May 2023 21:30:48 +0300 Subject: [PATCH 210/213] payloads/external/skiboot/Kconfig: update revision To pick up fix in Infineon TPM driver that caused the device to be disabled by skiboot during load even when it functions well (just returns all ones for registers sometimes). Signed-off-by: Sergii Dmytruk --- payloads/external/skiboot/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/payloads/external/skiboot/Kconfig b/payloads/external/skiboot/Kconfig index 3cb7b96c108..90e29dbaa13 100644 --- a/payloads/external/skiboot/Kconfig +++ b/payloads/external/skiboot/Kconfig @@ -15,7 +15,7 @@ config SKIBOOT_GIT_REPO config SKIBOOT_REVISION string "Revision of skiboot payload" default "d93ddbd39b4eeac0bc11dacbdadea76df2996c13" if BOARD_EMULATION_QEMU_POWER9 - default "fa060c2c98b6eefde06a8bd78d1d8096c2bede37" if BOARD_RAPTOR_CS_TALOS_2 + default "1b14dd0b695b6113805186faad9b2def1d1bfeca" if BOARD_RAPTOR_CS_TALOS_2 help Revision, that skiboot repository will be checked out to, before building an image. From bf4e30fd16618d14b194e714cd4737583f293c27 Mon Sep 17 00:00:00 2001 From: Sergii Dmytruk Date: Wed, 7 Jun 2023 01:06:49 +0300 Subject: [PATCH 211/213] payloads/external/skiboot/Makefile: build only skiboot.elf Not specifying target results in building of unused LID image and also building tools (for GCOV and for building LID) using host GCC whose version isn't known for sure and can cause a build failure. Since we only need skiboot.elf, don't build anything more. Signed-off-by: Sergii Dmytruk --- payloads/external/skiboot/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/payloads/external/skiboot/Makefile b/payloads/external/skiboot/Makefile index e4db9bb5616..b3920cd95b1 100644 --- a/payloads/external/skiboot/Makefile +++ b/payloads/external/skiboot/Makefile @@ -14,7 +14,7 @@ unexport $(COREBOOT_EXPORTS) all: $(skiboot_elf) $(skiboot_elf): | $(skiboot_dir) $(build_dir) - +$(MAKE) -C $(skiboot_dir) CROSS="$(skiboot_cross)" + +$(MAKE) -C $(skiboot_dir) CROSS="$(skiboot_cross)" skiboot.elf cp $(skiboot_dir)/skiboot.elf $@ # skiboot is always built with debug information due to unconditional -ggdb $(skiboot_cross)strip $@ From cfc885d714b2e3584c6ba14208d49e468535d09a Mon Sep 17 00:00:00 2001 From: Krystian Hebel Date: Mon, 3 Jul 2023 13:28:00 +0200 Subject: [PATCH 212/213] src/Kconfig: enable timestamps by default on PPC64 PPC64 has architectural reliable timer, so there is no reason to have timestamps disabled. Signed-off-by: Krystian Hebel --- src/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Kconfig b/src/Kconfig index 0d3879ecbf7..aa8c5e5632e 100644 --- a/src/Kconfig +++ b/src/Kconfig @@ -229,7 +229,7 @@ config INCLUDE_CONFIG_FILE config COLLECT_TIMESTAMPS bool "Create a table of timestamps collected during boot" - default y if ARCH_X86 + default y if ARCH_X86 || ARCH_PPC64 help Make coreboot create a table of timer-ID/timer-value pairs to allow measuring time spent at different phases of the boot process. From c8aed443c631042ad2b0326c35cd0b774752b924 Mon Sep 17 00:00:00 2001 From: Krystian Hebel Date: Tue, 11 Jul 2023 12:07:25 +0200 Subject: [PATCH 213/213] lib/timestamp: explicitly write multibyte fields as little endian This change allows for proper parsing of timestamp table on platforms that use big endian in coreboot and little endian in target OS. POWER9 is an example of such architecture. util/cbmem assumes that it runs on little endian system in most of its code, not only related to timestamps. This patch doesn't change that. TEST=run cbmem -t on Talos 2, get sane results Signed-off-by: Krystian Hebel --- src/lib/timestamp.c | 64 +++++++++++++++++++++++++++------------------ 1 file changed, 39 insertions(+), 25 deletions(-) diff --git a/src/lib/timestamp.c b/src/lib/timestamp.c index c7e1c0cf4ce..3dad8cdb658 100644 --- a/src/lib/timestamp.c +++ b/src/lib/timestamp.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -18,11 +19,11 @@ static struct timestamp_table *glob_ts_table; static void timestamp_cache_init(struct timestamp_table *ts_cache, uint64_t base) { - ts_cache->num_entries = 0; - ts_cache->base_time = base; - ts_cache->max_entries = (REGION_SIZE(timestamp) - + ts_cache->num_entries = htole32(0); + ts_cache->base_time = htole64(base); + ts_cache->max_entries = htole16((REGION_SIZE(timestamp) - offsetof(struct timestamp_table, entries)) - / sizeof(struct timestamp_entry); + / sizeof(struct timestamp_entry)); } static struct timestamp_table *timestamp_cache_get(void) @@ -52,9 +53,9 @@ static struct timestamp_table *timestamp_alloc_cbmem_table(void) if (!tst) return NULL; - tst->base_time = 0; - tst->max_entries = MAX_TIMESTAMPS; - tst->num_entries = 0; + tst->base_time = htole64(0); + tst->max_entries = htole16(MAX_TIMESTAMPS); + tst->num_entries = htole32(0); return tst; } @@ -105,15 +106,17 @@ static void timestamp_add_table_entry(struct timestamp_table *ts_table, enum timestamp_id id, int64_t ts_time) { struct timestamp_entry *tse; + uint32_t num_entries = le32toh(ts_table->num_entries); - if (ts_table->num_entries >= ts_table->max_entries) + if (num_entries >= le16toh(ts_table->max_entries)) return; - tse = &ts_table->entries[ts_table->num_entries++]; - tse->entry_id = id; - tse->entry_stamp = ts_time; + tse = &ts_table->entries[num_entries++]; + ts_table->num_entries = htole32(num_entries); + tse->entry_id = htole32(id); + tse->entry_stamp = htole64(ts_time); - if (ts_table->num_entries == ts_table->max_entries) + if (num_entries == le16toh(ts_table->max_entries)) printk(BIOS_ERR, "Timestamp table full\n"); } @@ -131,7 +134,7 @@ void timestamp_add(enum timestamp_id id, int64_t ts_time) return; } - ts_time -= ts_table->base_time; + ts_time -= le64toh(ts_table->base_time); timestamp_add_table_entry(ts_table, id, ts_time); if (CONFIG(TIMESTAMPS_ON_CONSOLE)) @@ -196,14 +199,18 @@ static void timestamp_sync_cache_to_cbmem(struct timestamp_table *ts_cbmem_table /* Inherit cache base_time. */ ts_cbmem_table->base_time = ts_cache_table->base_time; - for (i = 0; i < ts_cache_table->num_entries; i++) { + for (i = 0; i < le32toh(ts_cache_table->num_entries); i++) { struct timestamp_entry *tse = &ts_cache_table->entries[i]; - timestamp_add_table_entry(ts_cbmem_table, tse->entry_id, - tse->entry_stamp); + /* timestamp_add_table_entry() converts endianness, but it is already + * converted to LE in cache. We need to swap it back to host endianness + * so it will be properly written as LE to cbmem. + */ + timestamp_add_table_entry(ts_cbmem_table, le32toh(tse->entry_id), + le64toh(tse->entry_stamp)); } /* Cache no longer required. */ - ts_cache_table->num_entries = 0; + ts_cache_table->num_entries = htole32(0); } static void timestamp_reinit(int is_recovery) @@ -233,7 +240,7 @@ static void timestamp_reinit(int is_recovery) /* Seed the timestamp tick frequency in ENV_PAYLOAD_LOADER. */ if (ENV_PAYLOAD_LOADER) - ts_cbmem_table->tick_freq_mhz = timestamp_tick_freq_mhz(); + ts_cbmem_table->tick_freq_mhz = htole16(timestamp_tick_freq_mhz()); timestamp_table_set(ts_cbmem_table); } @@ -241,6 +248,7 @@ static void timestamp_reinit(int is_recovery) void timestamp_rescale_table(uint16_t N, uint16_t M) { uint32_t i; + uint64_t base_time; struct timestamp_table *ts_table; if (!timestamp_should_run()) @@ -257,12 +265,18 @@ void timestamp_rescale_table(uint16_t N, uint16_t M) return; } - ts_table->base_time /= M; - ts_table->base_time *= N; - for (i = 0; i < ts_table->num_entries; i++) { + base_time = le64toh(ts_table->base_time); + base_time /= M; + base_time *= N; + ts_table->base_time = htole64(base_time); + + for (i = 0; i < le32toh(ts_table->num_entries); i++) { + int64_t entry_stamp; struct timestamp_entry *tse = &ts_table->entries[i]; - tse->entry_stamp /= M; - tse->entry_stamp *= N; + entry_stamp = le64toh(tse->entry_stamp); + entry_stamp /= M; + entry_stamp *= N; + tse->entry_stamp = htole64(entry_stamp); } } @@ -274,9 +288,9 @@ uint32_t get_us_since_boot(void) { struct timestamp_table *ts = timestamp_table_get(); - if (ts == NULL || ts->tick_freq_mhz == 0) + if (ts == NULL || le16toh(ts->tick_freq_mhz) == 0) return 0; - return (timestamp_get() - ts->base_time) / ts->tick_freq_mhz; + return (timestamp_get() - le64toh(ts->base_time)) / le16toh(ts->tick_freq_mhz); } CBMEM_READY_HOOK(timestamp_reinit);