diff --git a/.checkpatch.conf b/.checkpatch.conf index dbb1aaa7444..809d27a3f3b 100644 --- a/.checkpatch.conf +++ b/.checkpatch.conf @@ -38,3 +38,4 @@ --exclude src/vendorcode/cavium --exclude src/vendorcode/intel --exclude src/vendorcode/mediatek +--exclude src/vendorcode/ibm diff --git a/.gitmodules b/.gitmodules index 6f62952f435..3e12f48b332 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,67 +1,70 @@ [submodule "3rdparty/blobs"] path = 3rdparty/blobs - url = ../blobs.git + url = https://review.coreboot.org/blobs.git update = none ignore = dirty [submodule "util/nvidia-cbootimage"] path = util/nvidia/cbootimage - url = ../nvidia-cbootimage.git + url = https://review.coreboot.org/nvidia-cbootimage.git [submodule "vboot"] path = 3rdparty/vboot - url = ../vboot.git + url = https://review.coreboot.org/vboot.git branch = main [submodule "arm-trusted-firmware"] path = 3rdparty/arm-trusted-firmware - url = ../arm-trusted-firmware.git + url = https://review.coreboot.org/arm-trusted-firmware.git [submodule "3rdparty/chromeec"] path = 3rdparty/chromeec - url = ../chrome-ec.git + url = https://review.coreboot.org/chrome-ec.git [submodule "libhwbase"] path = 3rdparty/libhwbase - url = ../libhwbase.git + url = https://review.coreboot.org/libhwbase.git [submodule "libgfxinit"] path = 3rdparty/libgfxinit - url = ../libgfxinit.git + url = https://review.coreboot.org/libgfxinit.git [submodule "3rdparty/fsp"] path = 3rdparty/fsp - url = ../fsp.git + url = https://review.coreboot.org/fsp.git update = none ignore = dirty [submodule "opensbi"] path = 3rdparty/opensbi - url = ../opensbi.git + url = https://review.coreboot.org/opensbi.git [submodule "intel-microcode"] path = 3rdparty/intel-microcode - url = ../intel-microcode.git + url = https://review.coreboot.org/intel-microcode.git update = none ignore = dirty branch = main [submodule "3rdparty/ffs"] path = 3rdparty/ffs - url = ../ffs.git + url = https://review.coreboot.org/ffs.git [submodule "3rdparty/amd_blobs"] path = 3rdparty/amd_blobs - url = ../amd_blobs + url = https://review.coreboot.org/amd_blobs update = none ignore = dirty [submodule "3rdparty/cmocka"] path = 3rdparty/cmocka - url = ../cmocka.git + url = https://review.coreboot.org/cmocka.git update = none branch = stable-1.1 [submodule "3rdparty/qc_blobs"] path = 3rdparty/qc_blobs - url = ../qc_blobs.git + url = https://review.coreboot.org/qc_blobs.git update = none ignore = dirty [submodule "3rdparty/intel-sec-tools"] path = 3rdparty/intel-sec-tools - url = ../9esec-security-tooling.git + url = https://review.coreboot.org/9esec-security-tooling.git [submodule "3rdparty/stm"] path = 3rdparty/stm - url = ../STM + url = https://review.coreboot.org/STM branch = stmpe [submodule "util/goswid"] path = util/goswid - url = ../goswid + url = https://review.coreboot.org/goswid branch = trunk +[submodule "3rdparty/sb-signing-utils"] + path = 3rdparty/sb-signing-utils + url = https://github.com/open-power/sb-signing-utils.git diff --git a/3rdparty/sb-signing-utils b/3rdparty/sb-signing-utils new file mode 160000 index 00000000000..591c8f53482 --- /dev/null +++ b/3rdparty/sb-signing-utils @@ -0,0 +1 @@ +Subproject commit 591c8f53482243626901e1cc8a4ae321f314040d diff --git a/Documentation/drivers/ipmi_bt.md b/Documentation/drivers/ipmi_bt.md new file mode 100644 index 00000000000..18b97c6c8a0 --- /dev/null +++ b/Documentation/drivers/ipmi_bt.md @@ -0,0 +1,34 @@ +# IPMI BT driver + +The driver can be found in `src/drivers/ipmi/` (same as KCS). It works with BMC +that provide a BT I/O interface as specified in the [IPMI] standard. + +The driver detects the IPMI version and reserves the I/O space in coreboot's +resource allocator. + +## For developers + +To use the driver, select the `IPMI_BT` Kconfig and add the following PNP +device (in example for the BT at 0xe4): + +``` + chip drivers/ipmi + device pnp e4.0 on end # IPMI BT + end +``` + +**Note:** The I/O base address needs to be aligned to 4. + +The following registers can be set: + +* `wait_for_bmc` + * Boolean + * Wait for BMC to boot. This can be used if the BMC takes a long time to boot + after PoR. +* `bmc_boot_timeout` + * Integer + * The timeout in seconds to wait for the IPMI service to be loaded. + Will be used if wait_for_bmc is true. + + +[IPMI]: https://www.intel.com/content/dam/www/public/us/en/documents/product-briefs/ipmi-second-gen-interface-spec-v2-rev1-1.pdf diff --git a/Documentation/drivers/ipmi_kcs.md b/Documentation/drivers/ipmi_kcs.md index f6f0fb986a6..c4db492e008 100644 --- a/Documentation/drivers/ipmi_kcs.md +++ b/Documentation/drivers/ipmi_kcs.md @@ -42,6 +42,15 @@ The following registers can be set: * `gpe_interrupt` * Integer * The bit in GPE (SCI) used to notify about a change on the KCS. +* `wait_for_bmc` + * Boolean + * Wait for BMC to boot. This can be used if the BMC takes a long time to boot + after PoR: + - AST2400 on Supermicro X11SSH: 34 s +* `bmc_boot_timeout` + * Integer + * The timeout in seconds to wait for the IPMI service to be loaded. + Will be used if wait_for_bmc is true. [IPMI]: https://www.intel.com/content/dam/www/public/us/en/documents/product-briefs/ipmi-second-gen-interface-spec-v2-rev1-1.pdf diff --git a/Makefile.inc b/Makefile.inc index 0dd4864e20d..ca285bfe4b0 100644 --- a/Makefile.inc +++ b/Makefile.inc @@ -519,7 +519,7 @@ endif additional-dirs += $(objutil)/cbfstool $(objutil)/ifdtool \ $(objutil)/options $(objutil)/amdfwtool \ - $(objutil)/cbootimage + $(objutil)/cbootimage $(objutil)/ffs export $(COREBOOT_EXPORTS) @@ -561,6 +561,8 @@ IFITTOOL:=$(objutil)/cbfstool/ifittool AMDCOMPRESS:=$(objutil)/cbfstool/amdcompress CSE_FPT:=$(objutil)/cbfstool/cse_fpt CSE_SERGER:=$(objutil)/cbfstool/cse_serger +ECCTOOL:=$(objutil)/ffs/ecc/ecc +SBSIGNTOOLS:=$(objutil)/sb-signing-utils/create-container $(obj)/cbfstool: $(CBFSTOOL) cp $< $@ @@ -601,6 +603,18 @@ IFDTOOL:=$(objutil)/ifdtool/ifdtool AMDFWTOOL:=$(objutil)/amdfwtool/amdfwtool +$(ECCTOOL): + @printf " Compile ECCTOOL\n" + cp -r $(top)/3rdparty/ffs $(objutil) + cd $(objutil)/ffs && autoreconf -i && ./configure + +$(MAKE) -C $(objutil)/ffs + +$(SBSIGNTOOLS): + @printf " Compile SB SIGNING UTILS\n" + cp -r $(top)/3rdparty/sb-signing-utils $(objutil) + cd $(objutil)/sb-signing-utils && autoreconf -i -Wno-unsupported && ./configure + +$(MAKE) -C $(objutil)/sb-signing-utils + APCB_EDIT_TOOL:=$(top)/util/apcb/apcb_edit.py APCB_V3_EDIT_TOOL:=$(top)/util/apcb/apcb_v3_edit.py @@ -701,7 +715,7 @@ install-git-commit-clangfmt: include util/crossgcc/Makefile.inc .PHONY: tools -tools: $(objutil)/kconfig/conf $(objutil)/kconfig/toada $(CBFSTOOL) $(objutil)/cbfstool/cbfs-compression-tool $(FMAPTOOL) $(RMODTOOL) $(IFWITOOL) $(objutil)/nvramtool/nvramtool $(objutil)/sconfig/sconfig $(IFDTOOL) $(CBOOTIMAGE) $(AMDFWTOOL) $(AMDCOMPRESS) $(FUTILITY) $(BINCFG) $(IFITTOOL) $(objutil)/supermicro/smcbiosinfo $(CSE_FPT) $(CSE_SERGER) +tools: $(objutil)/kconfig/conf $(objutil)/kconfig/toada $(CBFSTOOL) $(objutil)/cbfstool/cbfs-compression-tool $(FMAPTOOL) $(RMODTOOL) $(IFWITOOL) $(objutil)/nvramtool/nvramtool $(objutil)/sconfig/sconfig $(IFDTOOL) $(CBOOTIMAGE) $(AMDFWTOOL) $(AMDCOMPRESS) $(FUTILITY) $(BINCFG) $(IFITTOOL) $(objutil)/supermicro/smcbiosinfo $(CSE_FPT) $(CSE_SERGER) $(ECCTOOL) $(SBSIGNTOOLS) ########################################################################### # Common recipes for all stages @@ -1146,7 +1160,9 @@ add_intermediate = \ $(1): $(obj)/coreboot.pre $(2) | $(INTERMEDIATE) \ $(eval INTERMEDIATE+=$(1)) $(eval PHONY+=$(1)) -$(obj)/coreboot.rom: $(obj)/coreboot.pre $(CBFSTOOL) $(IFITTOOL) $$(INTERMEDIATE) +KEYLOC?=/tmp/keys + +$(obj)/coreboot.rom: $(obj)/coreboot.pre $(CBFSTOOL) $(IFITTOOL) $(ECCTOOL) $(SBSIGNTOOLS) $$(INTERMEDIATE) @printf " CBFS $(subst $(obj)/,,$(@))\n" # The full ROM may be larger than the CBFS part, so create an empty # file (filled with \377 = 0xff) and copy the CBFS image over it. @@ -1173,6 +1189,28 @@ ifeq ($(CONFIG_CBFS_VERIFICATION),y) exit 1 ;\ fi endif # CONFIG_CBFS_VERIFICATION +ifeq ($(CONFIG_ARCH_PPC64),y) + cp -r $(top)/3rdparty/sb-signing-utils/test/keys /tmp + @printf " SBSIGN $(subst $(obj)/,,$(@))\n" + $(SBSIGNTOOLS) -a $(KEYLOC)/hw_key_a.key -b $(KEYLOC)/hw_key_b.key -c $(KEYLOC)/hw_key_c.key \ + -p $(KEYLOC)/hw_key_a.key --payload $(top)/$@ --imagefile $(top)/$@.signed + @printf " ECC $(subst $(obj)/,,$(@))\n" + $(ECCTOOL) --inject $(top)/$@.signed --output $(top)/$@.signed.ecc --p8 +ifeq ($(CONFIG_BOOTBLOCK_IN_SEEPROM),y) + @printf " ECC bootblock\n" + $(ECCTOOL) --inject $(top)/$(objcbfs)/bootblock.bin --output $(obj)/bootblock.ecc --p8 +else + @printf " SBSIGN bootblock\n" + $(SBSIGNTOOLS) -a $(KEYLOC)/hw_key_a.key -b $(KEYLOC)/hw_key_b.key -c $(KEYLOC)/hw_key_c.key \ + -p $(KEYLOC)/hw_key_a.key --payload $(top)/$(objcbfs)/bootblock.bin \ + --imagefile $(top)/$(obj)/bootblock.signed + $(ECCTOOL) --inject $(top)/$@ --output $(top)/$@.ecc --p8 + @printf " ECC bootblock\n" + dd if=$(obj)/bootblock.signed of=$(obj)/bootblock.signed.pad ibs=25486 conv=sync 2> /dev/null + $(ECCTOOL) --inject $(obj)/bootblock.signed.pad --output $(obj)/bootblock.signed.ecc --p8 + rm $(obj)/bootblock.signed $(obj)/bootblock.signed.pad +endif # CONFIG_BOOTBLOCK_IN_SEEPROM +endif # CONFIG_ARCH_PPC64 cbfs-files-y += $(CONFIG_CBFS_PREFIX)/romstage $(CONFIG_CBFS_PREFIX)/romstage-file := $(objcbfs)/romstage.elf diff --git a/configs/config.raptor-cs-talos-2 b/configs/config.raptor-cs-talos-2 new file mode 100644 index 00000000000..800300f3869 --- /dev/null +++ b/configs/config.raptor-cs-talos-2 @@ -0,0 +1,2 @@ +CONFIG_VENDOR_RAPTOR_CS=y +CONFIG_PAYLOAD_SKIBOOT=y diff --git a/payloads/external/skiboot/Kconfig b/payloads/external/skiboot/Kconfig index 3198358ecba..90e29dbaa13 100644 --- a/payloads/external/skiboot/Kconfig +++ b/payloads/external/skiboot/Kconfig @@ -7,13 +7,15 @@ config PAYLOAD_FILE config SKIBOOT_GIT_REPO string "Git repository of skiboot payload" - default "https://github.com/open-power/skiboot" + default "https://github.com/open-power/skiboot" if !BOARD_RAPTOR_CS_TALOS_2 + default "https://github.com/Dasharo/skiboot.git" if BOARD_RAPTOR_CS_TALOS_2 help Git repository which will be used to clone skiboot. config SKIBOOT_REVISION string "Revision of skiboot payload" default "d93ddbd39b4eeac0bc11dacbdadea76df2996c13" if BOARD_EMULATION_QEMU_POWER9 + default "1b14dd0b695b6113805186faad9b2def1d1bfeca" if BOARD_RAPTOR_CS_TALOS_2 help Revision, that skiboot repository will be checked out to, before building an image. diff --git a/payloads/external/skiboot/Makefile b/payloads/external/skiboot/Makefile index 5cf630ea254..b3920cd95b1 100644 --- a/payloads/external/skiboot/Makefile +++ b/payloads/external/skiboot/Makefile @@ -14,7 +14,7 @@ unexport $(COREBOOT_EXPORTS) all: $(skiboot_elf) $(skiboot_elf): | $(skiboot_dir) $(build_dir) - +$(MAKE) -C $(skiboot_dir) CROSS="$(skiboot_cross)" + +$(MAKE) -C $(skiboot_dir) CROSS="$(skiboot_cross)" skiboot.elf cp $(skiboot_dir)/skiboot.elf $@ # skiboot is always built with debug information due to unconditional -ggdb $(skiboot_cross)strip $@ @@ -32,5 +32,6 @@ distclean: clean clean: # Redefine RM because it's used like `$(RM) non-existent-file` # Also ignore useless messages about removing test files - [ ! -d $(skiboot_dir) ] || $(MAKE) -C $(skiboot_dir) RM="rm -rf" clean > /dev/null + [ ! -d $(skiboot_dir) ] || \ + $(MAKE) -C $(skiboot_dir) RM="rm -rf" CROSS="$(skiboot_cross)" clean > /dev/null rm -rf $(build_dir) diff --git a/src/Kconfig b/src/Kconfig index 0d3879ecbf7..aa8c5e5632e 100644 --- a/src/Kconfig +++ b/src/Kconfig @@ -229,7 +229,7 @@ config INCLUDE_CONFIG_FILE config COLLECT_TIMESTAMPS bool "Create a table of timestamps collected during boot" - default y if ARCH_X86 + default y if ARCH_X86 || ARCH_PPC64 help Make coreboot create a table of timer-ID/timer-value pairs to allow measuring time spent at different phases of the boot process. diff --git a/src/arch/ppc64/Makefile.inc b/src/arch/ppc64/Makefile.inc index 8ccd62bfab5..144a04ec091 100644 --- a/src/arch/ppc64/Makefile.inc +++ b/src/arch/ppc64/Makefile.inc @@ -1,8 +1,8 @@ ## SPDX-License-Identifier: GPL-2.0-only -ppc64_flags = -I$(src)/arch/ppc64/ -mbig-endian -mcpu=power8 -mtune=power8 +ppc64_flags = -I$(src)/arch/ppc64/ -mbig-endian -mcpu=power8 -mtune=power8 -mno-pointers-to-nested-functions -ppc64_asm_flags = +ppc64_asm_flags = -Wa,--fatal-warnings ################################################################################ ## bootblock @@ -12,7 +12,6 @@ ifeq ($(CONFIG_ARCH_BOOTBLOCK_PPC64),y) bootblock-y = bootblock_crt0.S bootblock-y += arch_timer.c bootblock-y += boot.c -bootblock-y += rom_media.c bootblock-y += \ $(top)/src/lib/memchr.c \ $(top)/src/lib/memcmp.c \ @@ -38,7 +37,6 @@ ifeq ($(CONFIG_ARCH_ROMSTAGE_PPC64),y) romstage-y += arch_timer.c romstage-y += boot.c romstage-y += stages.c -romstage-y += rom_media.c romstage-y += \ $(top)/src/lib/memchr.c \ $(top)/src/lib/memcmp.c \ @@ -46,8 +44,6 @@ romstage-y += \ $(top)/src/lib/memmove.c \ $(top)/src/lib/memset.c -romstage-$(CONFIG_COLLECT_TIMESTAMPS) += timestamp.c - # Build the romstage $(objcbfs)/romstage.debug: $$(romstage-objs) @@ -64,7 +60,7 @@ endif ################################################################################ ifeq ($(CONFIG_ARCH_RAMSTAGE_PPC64),y) -ramstage-y += rom_media.c +ramstage-y += arch_timer.c ramstage-y += stages.c ramstage-y += arch_timer.c ramstage-y += boot.c @@ -78,8 +74,6 @@ ramstage-y += \ $(eval $(call create_class_compiler,rmodules,power8)) -ramstage-$(CONFIG_COLLECT_TIMESTAMPS) += timestamp.c - ramstage-srcs += src/mainboard/$(MAINBOARDDIR)/mainboard.c # Build the ramstage diff --git a/src/arch/ppc64/bootblock_crt0.S b/src/arch/ppc64/bootblock_crt0.S index 5a9496024e2..4254a191eba 100644 --- a/src/arch/ppc64/bootblock_crt0.S +++ b/src/arch/ppc64/bootblock_crt0.S @@ -2,6 +2,7 @@ /* * Early initialization code for POWER8/POWER9. */ +#include #include @@ -27,15 +28,67 @@ oris r,r, (e)@h; \ ori r,r, (e)@l; +/* Load an immediate 32-bit value into a register */ +#define LOAD_IMM32(r, e) \ + li r, 0; \ + oris r,r, (e)@h; \ + ori r,r, (e)@l; + +/* + * On POWER, 0 is wherever HRMOR points to rather than physical DRAM start. + * HRMOR is ORed with address, not added to it, meaning that memory space + * overlaps after 2^(least significant set bit of HRMOR). This becomes + * chaotic when nonconsecutive bits are set... + * + * Two and a half possible cases: + * 0. bootblock started with QEMU in hb-mode + * - NIA = 0x10 (bug?) + * - HRMOR = 0x08000000 (128M) + * - no physical memory to enable/train, everything accessible from start + * 1. bootblock loaded by HBBL + * - NIA = 0 + * - HRMOR = 0xF8000000 (4G - 128M) + * - initialized L3 = 0x400000 (4M) + * - top address before RAM = 0xF8400000 + * 2. bootblock in SEEPROM, loaded by SBE + * - NIA = 0x3000 (placeholder for int. vectors) + * - HRMOR = 0xF8200000 (4G - 128 M + 2 M) + * - initialized L3 = 0x8000 (bootblock/HBBL size = 32K) + * - no way 32K will be enough, must initialize more L3 in bootblock + * - HRMOR still applies, so memory overlaps every 2M + * + * Common subset (assuming 2. initializes as much memory as possible) is + * 0xF8200000-0xF8400000. 2M should be more than enough for pre-RAM code, + * but it isn't enough to load ramstage. We could implement postcar stage, + * but KISS: initialize L3 from _ebootblock to 0xF8980000: up to 9.5M into + * cache, leaving bottom 2M (0xF8000000-0xF8200000) either uninitialized + * (when started from SEEPROM) or just unused for anything but bootblock + * (loaded by HBBL). Last 0.5M of L3 cache is left for interrupt vectors + * normally located at address 0. + * + * Set HRMOR to 0 before jumping to C code in bootblock and forget it even + * exists. + * + * For QEMU s/0xF8/0x08/ in above description but code remains the same. + * L3 initialization is unnecessary in this case but won't break anything. + * + * TODO: there is a structure with SBE->HBBL data at 0 in 2nd option. It + * holds some useful data like XSCOM BAR and LPC BAR. If, for any reason, + * these addresses are different than default, they should be used instead + * of predefined values. + */ + .section ".text._start", "ax", %progbits .globl _start _start: /* QEMU with hb-mode=on starts at address 0x10, while hardware at 0x0 */ +#if !CONFIG(BOOTBLOCK_IN_SEEPROM) nop nop nop nop FIXUP_ENDIAN +#endif /* Store FDT address provided by QEMU in %r3 to pass it later to * payload */ @@ -44,32 +97,54 @@ _start: /* Set program priority to medium */ or %r2, %r2, %r2 - /* Stack */ - lis %r1, _estack@ha - addi %r1, %r1, _estack@l + li %r10, 1 + rotldi %r10, %r10, 63 /* MSB is "ignore HRMOR" */ - /* Clear .bss section */ - /* Currently not needed, .bss is zeroed in the file. If it were to be - * used, make sure that .bss is 128B aligned (size of cache line), - * otherwise dcbz will clear (part of) .opd section! */ -/* - lis %r5, _bss@ha - addi %r5, %r5, _bss@l - lis %r6, _ebss@ha - addi %r6, %r6, _ebss@l + /* Assumption: we are linked at address that isn't changed by HRMOR */ + LOAD_IMM32(%r7, ignoreHRMOR) + or %r9, %r7, %r10 + + mtlr %r9 + blr + +ignoreHRMOR: + /* Now we are at 0x8000000000000000 | linked address */ + li %r0, 0 + mtspr SPR_HRMOR, %r0 /* Clear HRMOR */ + isync + + /* We can't just "b stopIgnoringHRMOR", it would use relative offset */ + addi %r9, %r7, stopIgnoringHRMOR - ignoreHRMOR + mtlr %r9 + blr + +stopIgnoringHRMOR: + /* Now we are at linked address */ + slbia 7 + sync + isync + + /* + * When coming from SBE, L3 cache is invalid except for [2M, end of HBBL] + * range. Make the rest of it valid, or embrace the checkstops. + */ + + /* Validate and initialize to zeroes [end of HBBL, 9.5M] range */ + LOAD_IMM32(%r5, _ebootblock) /* Assume it is at least 128B aligned */ + LOAD_IMM32(%r6, _epreram_cbfs_cache) /* Same */ addi %r6, %r6, -1 1: dcbz 0, %r5 addi %r5, %r5, 128 cmpld cr7, %r5, %r6 blt cr7, 1b -*/ + + /* Stack */ + LOAD_IMM32(%r1, _estack) /* This is tested by checkstack() just before jumping to payload */ LOAD_IMM64(%r3, 0xDEADBEEFDEADBEEF) - lis %r5, _stack@ha - addi %r5, %r5, _stack@l - subi %r5, %r5, 8 + LOAD_IMM32(%r5, _stack - 8) sub %r4, %r1, %r5 sradi %r4, %r4, 3 /* Divide by 8 */ mtctr %r4 @@ -87,11 +162,10 @@ _start: mfmsr %r3 ori %r3, %r3, 0x2000 /* FP = 1 */ oris %r3, %r3, 0x0280 /* VEC = 1, VSX = 1 */ - mtmsr %r3 + mtmsrd %r3 /* Load official procedure descriptor address for main() */ - lis %r12, main@ha - addi %r12, %r12, main@l + LOAD_IMM32(%r12, main) /* Load TOC pointer and jump to main() */ ld %r2, 8(%r12) diff --git a/src/arch/ppc64/include/arch/byteorder.h b/src/arch/ppc64/include/arch/byteorder.h index 8ff857675cf..8b8c0865b3c 100644 --- a/src/arch/ppc64/include/arch/byteorder.h +++ b/src/arch/ppc64/include/arch/byteorder.h @@ -11,8 +11,65 @@ #ifndef __ASSEMBLER__ #include + +/* + * Assigns part of a 64-bit value: lhs[pos:pos + len] = rhs + */ +#define PPC_INSERT(lhs, rhs, pos, len) do { \ + uint64_t __placed = PPC_PLACE(rhs, pos, len); \ + uint64_t __mask = PPC_BITMASK(pos, (pos) + (len) - 1); \ + (lhs) = ((lhs) & ~__mask) | __placed; \ + } while (0) + +/* + * The pos parameter specifies MSB/leftmost bit. Passing compile-time constants + * (literals or expressions) for parameters allows for the following + * compile-time checks (not all are performed, depends on which parameter values + * are known at compile-time): + * - pos is in range [0; 63] + * - len is in range [1; 64] + * - (pos + len) <= 64 + * - (val & ~len-based-mask) == 0 + */ +#define PPC_PLACE(val, pos, len) \ + /* Incorrect arguments detected in PPC_PLACE */ __builtin_choose_expr( \ + PPC_PLACE_GOOD_ARGS(val, pos, len), \ + PPC_PLACE_IMPL(val, pos, len), \ + (void)0) + +#define PPC_PLACE_GOOD_ARGS(val, pos, len) ( \ + /* pos value */ \ + __builtin_choose_expr( \ + __builtin_constant_p(pos), \ + ((pos) >= 0) && ((pos) <= 63), \ + 1) && \ + /* len value */ \ + __builtin_choose_expr( \ + __builtin_constant_p(len), \ + ((len) >= 1) && ((len) <= 64), \ + 1) && \ + /* range */ \ + __builtin_choose_expr( \ + __builtin_constant_p(pos) && __builtin_constant_p(len), \ + (pos) + (len) <= 64, \ + 1) && \ + /* value */ \ + __builtin_choose_expr( \ + __builtin_constant_p(val) && __builtin_constant_p(len), \ + ((val) & ~(((uint64_t)1 << (len)) - 1)) == 0, \ + 1) \ + ) + +#define PPC_PLACE_IMPL(val, pos, len) \ + PPC_SHIFT((val) & (((uint64_t)1 << (len)) - 1), ((pos) + ((len) - 1))) + #define PPC_SHIFT(val, lsb) (((uint64_t)(val)) << (63 - (lsb))) +/* Sanity checks and usage examples for PPC_PLACE */ +_Static_assert(PPC_PLACE(0x12345, 0, 20) == 0x1234500000000000, ""); +_Static_assert(PPC_PLACE(0x12345, 0, 24) == 0x0123450000000000, ""); +_Static_assert(PPC_PLACE(0x12345, 8, 24) == 0x0001234500000000, ""); + #else #define PPC_SHIFT(val, lsb) ((val) << (63 - (lsb))) #endif diff --git a/src/arch/ppc64/include/arch/io.h b/src/arch/ppc64/include/arch/io.h index cfaae33f600..5c0ea4c0e15 100644 --- a/src/arch/ppc64/include/arch/io.h +++ b/src/arch/ppc64/include/arch/io.h @@ -4,14 +4,16 @@ #define _ASM_IO_H #include +#include /* Set MSB to 1 to ignore HRMOR */ #define MMIO_GROUP0_CHIP0_LPC_BASE_ADDR 0x8006030000000000 #define LPCHC_IO_SPACE 0xD0010000 +#define LPCHC_FW_SPACE 0xF0000000 #define FLASH_IO_SPACE 0xFC000000 +#define FW_SPACE_SIZE 0x10000000 #define LPC_BASE_ADDR (MMIO_GROUP0_CHIP0_LPC_BASE_ADDR + LPCHC_IO_SPACE) #define FLASH_BASE_ADDR (MMIO_GROUP0_CHIP0_LPC_BASE_ADDR + FLASH_IO_SPACE) -#define MMIO_GROUP0_CHIP0_SCOM_BASE_ADDR 0x800603FC00000000 /* Enforce In-order Execution of I/O */ static inline void eieio(void) @@ -63,6 +65,7 @@ static inline uint32_t inl(uint16_t port) static inline void report_istep(uint8_t step, uint8_t substep) { + printk(BIOS_INFO, "starting istep %d.%d\n", step, substep); outb(step, 0x81); outb(substep, 0x82); } diff --git a/src/arch/ppc64/stages.c b/src/arch/ppc64/stages.c index 01b9efaba8d..6d5ae20a3c4 100644 --- a/src/arch/ppc64/stages.c +++ b/src/arch/ppc64/stages.c @@ -14,6 +14,7 @@ #include #include #include +#include void stage_entry(uintptr_t stage_arg) { @@ -23,6 +24,8 @@ void stage_entry(uintptr_t stage_arg) if (!ENV_ROMSTAGE_OR_BEFORE) _cbmem_top_ptr = stage_arg; + else + timestamp_init(read_spr(SPR_TB)); #if ENV_RAMSTAGE hrmor = read_spr(SPR_HRMOR); diff --git a/src/arch/x86/car.ld b/src/arch/x86/car.ld index 132937f4ee9..1b8307ddf32 100644 --- a/src/arch/x86/car.ld +++ b/src/arch/x86/car.ld @@ -20,9 +20,9 @@ VBOOT2_WORK(., 12K) #endif #if CONFIG(TPM_MEASURED_BOOT) - /* Vboot measured boot TCPA log measurements. + /* Vboot measured boot TPM log measurements. * Needs to be transferred until CBMEM is available */ - TPM_TCPA_LOG(., 2K) + TPM_LOG(., 2K) #endif /* Stack for CAR stages. Since it persists across all stages that * use CAR it can be reused. The chipset/SoC is expected to provide diff --git a/src/commonlib/bsd/include/commonlib/bsd/cbmem_id.h b/src/commonlib/bsd/include/commonlib/bsd/cbmem_id.h index 0cf9c7fdef6..76e495af720 100644 --- a/src/commonlib/bsd/include/commonlib/bsd/cbmem_id.h +++ b/src/commonlib/bsd/include/commonlib/bsd/cbmem_id.h @@ -59,10 +59,10 @@ #define CBMEM_ID_STAGEx_CACHE 0x57a9e100 #define CBMEM_ID_STAGEx_RAW 0x57a9e200 #define CBMEM_ID_STORAGE_DATA 0x53746f72 -#define CBMEM_ID_TCPA_LOG 0x54435041 -#define CBMEM_ID_TCPA_TCG_LOG 0x54445041 +#define CBMEM_ID_TPM_CB_LOG 0x54435041 /* TPM log in coreboot-specific format */ +#define CBMEM_ID_TCPA_TCG_LOG 0x54445041 /* TPM log per TPM 1.2 specification */ #define CBMEM_ID_TIMESTAMP 0x54494d45 -#define CBMEM_ID_TPM2_TCG_LOG 0x54504d32 +#define CBMEM_ID_TPM2_TCG_LOG 0x54504d32 /* TPM log per TPM 2.0 specification */ #define CBMEM_ID_TPM_PPI 0x54505049 #define CBMEM_ID_VBOOT_HANDOFF 0x780074f0 /* deprecated */ #define CBMEM_ID_VBOOT_SEL_REG 0x780074f1 /* deprecated */ @@ -135,7 +135,7 @@ { CBMEM_ID_SMBIOS, "SMBIOS " }, \ { CBMEM_ID_SMM_SAVE_SPACE, "SMM BACKUP " }, \ { CBMEM_ID_STORAGE_DATA, "SD/MMC/eMMC" }, \ - { CBMEM_ID_TCPA_LOG, "TCPA LOG " }, \ + { CBMEM_ID_TPM_CB_LOG, "TPM CB LOG " }, \ { CBMEM_ID_TCPA_TCG_LOG, "TCPA TCGLOG" }, \ { CBMEM_ID_TIMESTAMP, "TIME STAMP " }, \ { CBMEM_ID_TPM2_TCG_LOG, "TPM2 TCGLOG" }, \ diff --git a/src/commonlib/bsd/include/commonlib/bsd/helpers.h b/src/commonlib/bsd/include/commonlib/bsd/helpers.h index 49953055b04..73cd25f8f42 100644 --- a/src/commonlib/bsd/include/commonlib/bsd/helpers.h +++ b/src/commonlib/bsd/include/commonlib/bsd/helpers.h @@ -72,13 +72,13 @@ } while (0) /* Standard units. */ -#define KiB (1<<10) -#define MiB (1<<20) -#define GiB (1<<30) +#define KiB (1U<<10) +#define MiB (1U<<20) +#define GiB (1U<<30) -#define KHz (1000) -#define MHz (1000 * KHz) -#define GHz (1000 * MHz) +#define KHz (1000U) +#define MHz (1000U * KHz) +#define GHz (1000U * MHz) #ifndef offsetof #define offsetof(TYPE, MEMBER) __builtin_offsetof(TYPE, MEMBER) diff --git a/src/commonlib/bsd/include/commonlib/bsd/tpm_log_defs.h b/src/commonlib/bsd/include/commonlib/bsd/tpm_log_defs.h new file mode 100644 index 00000000000..d90182d021c --- /dev/null +++ b/src/commonlib/bsd/include/commonlib/bsd/tpm_log_defs.h @@ -0,0 +1,143 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ + +#ifndef COMMONLIB_BSD_TPM_LOG_DEFS_H +#define COMMONLIB_BSD_TPM_LOG_DEFS_H + +#include +#include + +#define TCPA_SPEC_ID_EVENT_SIGNATURE "Spec ID Event00" +#define TCG_EFI_SPEC_ID_EVENT_SIGNATURE "Spec ID Event03" + +struct tcpa_log_entry { + uint32_t pcr; + uint32_t event_type; + uint8_t digest[20]; + uint32_t event_data_size; + uint8_t event[0]; +} __packed; + +struct tcpa_spec_entry { + struct tcpa_log_entry entry; + uint8_t signature[16]; + uint32_t platform_class; + uint8_t spec_version_minor; + uint8_t spec_version_major; + uint8_t spec_errata; + uint8_t reserved; + uint8_t vendor_info_size; + uint8_t vendor_info[0]; +} __packed; + +#define TPM2_ALG_ERROR 0x0000 +#define TPM2_ALG_HMAC 0x0005 +#define TPM2_ALG_NULL 0x0010 +#define TPM2_ALG_SHA1 0x0004 +#define TPM2_ALG_SHA256 0x000B +#define TPM2_ALG_SHA384 0x000C +#define TPM2_ALG_SHA512 0x000D +#define TPM2_ALG_SM3_256 0x0012 + +#define SHA1_DIGEST_SIZE 20 +#define SHA256_DIGEST_SIZE 32 +#define SHA384_DIGEST_SIZE 48 +#define SHA512_DIGEST_SIZE 64 +#define SM3_256_DIGEST_SIZE 32 + +#define EV_PREBOOT_CERT 0x00000000 +#define EV_POST_CODE 0x00000001 +#define EV_UNUSED 0x00000002 +#define EV_NO_ACTION 0x00000003 +#define EV_SEPARATOR 0x00000004 +#define EV_ACTION 0x00000005 +#define EV_EVENT_TAG 0x00000006 +#define EV_S_CRTM_CONTENTS 0x00000007 +#define EV_S_CRTM_VERSION 0x00000008 +#define EV_CPU_MICROCODE 0x00000009 +#define EV_PLATFORM_CONFIG_FLAGS 0x0000000A +#define EV_TABLE_OF_DEVICES 0x0000000B +#define EV_COMPACT_HASH 0x0000000C +#define EV_IPL 0x0000000D +#define EV_IPL_PARTITION_DATA 0x0000000E +#define EV_NONHOST_CODE 0x0000000F +#define EV_NONHOST_CONFIG 0x00000010 +#define EV_NONHOST_INFO 0x00000011 +#define EV_OMIT_BOOT_DEVICE_EVENTS 0x00000012 + +struct spec_id_event_data { + char signature[16]; + uint32_t platform_class; + uint8_t spec_version_minor; + uint8_t spec_version_major; + uint8_t spec_errata; + uint8_t reserved; + uint8_t vendor_info_size; +} __packed; + +union tpm_hash_digest { + uint8_t sha1[SHA1_DIGEST_SIZE]; + uint8_t sha256[SHA256_DIGEST_SIZE]; + uint8_t sm3_256[SM3_256_DIGEST_SIZE]; + uint8_t sha384[SHA384_DIGEST_SIZE]; + uint8_t sha512[SHA512_DIGEST_SIZE]; +}; + +struct tpm_hash_algorithm { + uint16_t hashAlg; + union tpm_hash_digest digest; +} __packed; + +struct tcg_pcr_event2_header { + uint32_t pcr_index; + uint32_t event_type; + uint32_t digest_count; + uint8_t digests[0]; + /* uint32_t event_size; */ + /* uint8_t event[0]; */ +} __packed; + +struct tpm_digest_sizes { + uint16_t alg_id; + uint16_t digest_size; +} __packed; + +struct tcg_efi_spec_id_event { + uint32_t pcr_index; + uint32_t event_type; + uint8_t digest[20]; + uint32_t event_size; + uint8_t signature[16]; + uint32_t platform_class; + uint8_t spec_version_minor; + uint8_t spec_version_major; + uint8_t spec_errata; + uint8_t uintn_size; + uint32_t num_of_algorithms; + struct tpm_digest_sizes digest_sizes[0]; /* variable number of members */ + /* uint8_t vendor_info_size; */ + /* uint8_t vendor_info[vendor_info_size]; */ +} __packed; + +static const char *tpm_event_types[] __maybe_unused = { + [EV_PREBOOT_CERT] = "Reserved", + [EV_POST_CODE] = "POST code", + [EV_UNUSED] = "Unused", + [EV_NO_ACTION] = "No action", + [EV_SEPARATOR] = "Separator", + [EV_ACTION] = "Action", + [EV_EVENT_TAG] = "Event tag", + [EV_S_CRTM_CONTENTS] = "S-CRTM contents", + [EV_S_CRTM_VERSION] = "S-CRTM version", + [EV_CPU_MICROCODE] = "CPU microcode", + [EV_PLATFORM_CONFIG_FLAGS] = "Platform configuration flags", + [EV_TABLE_OF_DEVICES] = "Table of devices", + [EV_COMPACT_HASH] = "Compact hash", + [EV_IPL] = "IPL", + [EV_IPL_PARTITION_DATA] = "IPL partition data", + [EV_NONHOST_CODE] = "Non-host code", + [EV_NONHOST_CONFIG] = "Non-host configuration", + [EV_NONHOST_INFO] = "Non-host information", + [EV_OMIT_BOOT_DEVICE_EVENTS] = "Omit boot device events", +}; + +#endif diff --git a/src/commonlib/include/commonlib/coreboot_tables.h b/src/commonlib/include/commonlib/coreboot_tables.h index 3f7ff2df29a..f574aed968b 100644 --- a/src/commonlib/include/commonlib/coreboot_tables.h +++ b/src/commonlib/include/commonlib/coreboot_tables.h @@ -77,7 +77,7 @@ enum { LB_TAG_MAC_ADDRS = 0x0033, LB_TAG_VBOOT_WORKBUF = 0x0034, LB_TAG_MMC_INFO = 0x0035, - LB_TAG_TCPA_LOG = 0x0036, + LB_TAG_TPM_CB_LOG = 0x0036, LB_TAG_FMAP = 0x0037, LB_TAG_PLATFORM_BLOB_VERSION = 0x0038, LB_TAG_SMMSTOREV2 = 0x0039, @@ -87,6 +87,7 @@ enum { LB_TAG_TYPE_C_INFO = 0x0042, LB_TAG_ACPI_RSDP = 0x0043, LB_TAG_PCIE = 0x0044, + LB_TAG_TPM_STD_LOG = 0x0045, /* The following options are CMOS-related */ LB_TAG_CMOS_OPTION_TABLE = 0x00c8, LB_TAG_OPTION = 0x00c9, @@ -109,9 +110,11 @@ typedef __aligned(4) uint64_t lb_uint64_t; struct lb_header { uint8_t signature[4]; /* LBIO */ uint32_t header_bytes; - uint32_t header_checksum; + uint16_t header_checksum; + uint16_t pad0; uint32_t table_bytes; - uint32_t table_checksum; + uint16_t table_checksum; + uint16_t pad1; uint32_t table_entries; }; diff --git a/src/commonlib/include/commonlib/tcpa_log_serialized.h b/src/commonlib/include/commonlib/tcpa_log_serialized.h deleted file mode 100644 index 4190a7db647..00000000000 --- a/src/commonlib/include/commonlib/tcpa_log_serialized.h +++ /dev/null @@ -1,29 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ - -#ifndef __TCPA_LOG_SERIALIZED_H__ -#define __TCPA_LOG_SERIALIZED_H__ - -#include - -#define MAX_TCPA_LOG_ENTRIES 50 -#define TCPA_DIGEST_MAX_LENGTH 64 -#define TCPA_PCR_HASH_NAME 50 -#define TCPA_PCR_HASH_LEN 10 -/* Assumption of 2K TCPA log size reserved for CAR/SRAM */ -#define MAX_PRERAM_TCPA_LOG_ENTRIES 15 - -struct tcpa_entry { - uint32_t pcr; - char digest_type[TCPA_PCR_HASH_LEN]; - uint8_t digest[TCPA_DIGEST_MAX_LENGTH]; - uint32_t digest_length; - char name[TCPA_PCR_HASH_NAME]; -} __packed; - -struct tcpa_table { - uint16_t max_entries; - uint16_t num_entries; - struct tcpa_entry entries[0]; /* Variable number of entries */ -} __packed; - -#endif diff --git a/src/commonlib/include/commonlib/tpm_log_serialized.h b/src/commonlib/include/commonlib/tpm_log_serialized.h new file mode 100644 index 00000000000..8372f94dab6 --- /dev/null +++ b/src/commonlib/include/commonlib/tpm_log_serialized.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef COMMONLIB_TPM_LOG_SERIALIZED_H +#define COMMONLIB_TPM_LOG_SERIALIZED_H + +#include +#include + +#define TPM_CB_LOG_DIGEST_MAX_LENGTH 64 +#define TPM_CB_LOG_PCR_HASH_NAME 50 +#define TPM_CB_LOG_PCR_HASH_LEN 10 + +struct tpm_cb_log_entry { + uint32_t pcr; + char digest_type[TPM_CB_LOG_PCR_HASH_LEN]; + uint8_t digest[TPM_CB_LOG_DIGEST_MAX_LENGTH]; + uint32_t digest_length; + char name[TPM_CB_LOG_PCR_HASH_NAME]; +} __packed; + +struct tpm_cb_log_table { + uint16_t max_entries; + uint16_t num_entries; + struct tpm_cb_log_entry entries[0]; /* Variable number of entries */ +} __packed; + +#endif diff --git a/src/cpu/power9/Kconfig.debug_cpu b/src/cpu/power9/Kconfig.debug_cpu new file mode 100644 index 00000000000..b3679f05026 --- /dev/null +++ b/src/cpu/power9/Kconfig.debug_cpu @@ -0,0 +1,10 @@ +config DEBUG_SCOM + bool "Print SCOM accesses" if DEFAULT_CONSOLE_LOGLEVEL_8 || CONSOLE_OVERRIDE_LOGLEVEL + default n + help + This option enables SCOM debug messages. + + Some accesses are time-critical or happen too often to be logged. + In such cases, '#define SKIP_SCOM_DEBUG' before first inclusion of + scom.h can be used to disable logging for a given file. Messages + are currently suppressed for CCS, MCBIST and I2C. diff --git a/src/cpu/power9/Makefile.inc b/src/cpu/power9/Makefile.inc index 2fe9e57a96d..bcab6df03e7 100644 --- a/src/cpu/power9/Makefile.inc +++ b/src/cpu/power9/Makefile.inc @@ -1,6 +1,5 @@ ## SPDX-License-Identifier: GPL-2.0-or-later -ramstage-y += power9.c - -bootblock-y += scom.c romstage-y += scom.c + +ramstage-y += power9.c diff --git a/src/cpu/power9/scom.c b/src/cpu/power9/scom.c index e55d149bff3..06bc89da42b 100644 --- a/src/cpu/power9/scom.c +++ b/src/cpu/power9/scom.c @@ -1,135 +1,39 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ +/* Avoids defining read/write_scom as a macro */ +#define SKIP_SCOM_DEBUG + #include -#include // HMER #include -#define XSCOM_DATA_IND_READ PPC_BIT(0) -#define XSCOM_DATA_IND_COMPLETE PPC_BIT(32) -#define XSCOM_DATA_IND_ERR PPC_BITMASK(33, 35) -#define XSCOM_DATA_IND_DATA PPC_BITMASK(48, 63) -#define XSCOM_DATA_IND_FORM1_DATA PPC_BITMASK(12, 63) -#define XSCOM_IND_MAX_RETRIES 10 - -#define XSCOM_RCVED_STAT_REG 0x00090018 -#define XSCOM_LOG_REG 0x00090012 -#define XSCOM_ERR_REG 0x00090013 - -uint64_t read_scom_direct(uint64_t reg_address) -{ - uint64_t val; - uint64_t hmer = 0; - do { - /* - * Clearing HMER on every SCOM access seems to slow down CCS up - * to a point where it starts hitting timeout on "less ideal" - * DIMMs for write centering. Clear it only if this do...while - * executes more than once. - */ - if ((hmer & SPR_HMER_XSCOM_STATUS) == SPR_HMER_XSCOM_OCCUPIED) - clear_hmer(); +extern uint64_t read_xscom(uint8_t chip, uint64_t addr); +extern void write_xscom(uint8_t chip, uint64_t addr, uint64_t data); - eieio(); - asm volatile( - "ldcix %0, %1, %2" : - "=r"(val) : - "b"(MMIO_GROUP0_CHIP0_SCOM_BASE_ADDR), - "r"(reg_address << 3)); - eieio(); - hmer = read_hmer(); - } while ((hmer & SPR_HMER_XSCOM_STATUS) == SPR_HMER_XSCOM_OCCUPIED); +extern uint64_t read_sbe_scom(uint8_t chip, uint64_t addr); +extern void write_sbe_scom(uint8_t chip, uint64_t addr, uint64_t data); - if (hmer & SPR_HMER_XSCOM_STATUS) { - reset_scom_engine(); - /* - * All F's are returned in case of error, but code polls for a set bit - * after changes that can make such error appear (e.g. clock settings). - * Return 0 so caller won't have to test for all F's in that case. - */ - return 0; - } - return val; -} +/* Start with SBEIO. Change this to FSI if needed. */ +static uint64_t (*read_scom_secondary)(uint8_t, uint64_t) = read_sbe_scom; +static void (*write_scom_secondary)(uint8_t, uint64_t, uint64_t) = write_sbe_scom; -void write_scom_direct(uint64_t reg_address, uint64_t data) +void switch_secondary_scom_to_xscom(void) { - uint64_t hmer = 0; - do { - /* See comment in read_scom_direct() */ - if ((hmer & SPR_HMER_XSCOM_STATUS) == SPR_HMER_XSCOM_OCCUPIED) - clear_hmer(); - - eieio(); - asm volatile( - "stdcix %0, %1, %2":: - "r"(data), - "b"(MMIO_GROUP0_CHIP0_SCOM_BASE_ADDR), - "r"(reg_address << 3)); - eieio(); - hmer = read_hmer(); - } while ((hmer & SPR_HMER_XSCOM_STATUS) == SPR_HMER_XSCOM_OCCUPIED); - - if (hmer & SPR_HMER_XSCOM_STATUS) - reset_scom_engine(); + read_scom_secondary = read_xscom; + write_scom_secondary = write_xscom; } -void write_scom_indirect(uint64_t reg_address, uint64_t value) +uint64_t read_scom(uint8_t chip, uint64_t addr) { - uint64_t addr; - uint64_t data; - addr = reg_address & 0x7FFFFFFF; - data = reg_address & XSCOM_ADDR_IND_ADDR; - data |= value & XSCOM_ADDR_IND_DATA; - - write_scom_direct(addr, data); - - for (int retries = 0; retries < XSCOM_IND_MAX_RETRIES; ++retries) { - data = read_scom_direct(addr); - if ((data & XSCOM_DATA_IND_COMPLETE) && ((data & XSCOM_DATA_IND_ERR) == 0)) { - return; - } else if (data & XSCOM_DATA_IND_COMPLETE) { - printk(BIOS_EMERG, "SCOM WR error %16.16llx = %16.16llx : %16.16llx\n", - reg_address, value, data); - } - // TODO: delay? - } -} - -uint64_t read_scom_indirect(uint64_t reg_address) -{ - uint64_t addr; - uint64_t data; - addr = reg_address & 0x7FFFFFFF; - data = XSCOM_DATA_IND_READ | (reg_address & XSCOM_ADDR_IND_ADDR); - - write_scom_direct(addr, data); - - for (int retries = 0; retries < XSCOM_IND_MAX_RETRIES; ++retries) { - data = read_scom_direct(addr); - if ((data & XSCOM_DATA_IND_COMPLETE) && ((data & XSCOM_DATA_IND_ERR) == 0)) { - break; - } else if (data & XSCOM_DATA_IND_COMPLETE) { - printk(BIOS_EMERG, "SCOM RD error %16.16llx : %16.16llx\n", - reg_address, data); - } - // TODO: delay? - } - - return data & XSCOM_DATA_IND_DATA; + if (chip == 0) + return read_xscom(chip, addr); + else + return read_scom_secondary(chip, addr); } -/* This function should be rarely called, don't make it inlined */ -void reset_scom_engine(void) +void write_scom(uint8_t chip, uint64_t addr, uint64_t data) { - /* - * With cross-CPU SCOM accesses, first register should be cleared on the - * executing CPU, the other two on target CPU. In that case it may be - * necessary to do the remote writes in assembly directly to skip checking - * HMER and possibly end in a loop. - */ - write_scom_direct(XSCOM_RCVED_STAT_REG, 0); - write_scom_direct(XSCOM_LOG_REG, 0); - write_scom_direct(XSCOM_ERR_REG, 0); - clear_hmer(); - eieio(); + if (chip == 0) + write_xscom(chip, addr, data); + else + write_scom_secondary(chip, addr, data); } diff --git a/src/device/dram/Makefile.inc b/src/device/dram/Makefile.inc index b91b128f85c..04c25759d60 100644 --- a/src/device/dram/Makefile.inc +++ b/src/device/dram/Makefile.inc @@ -1,3 +1,3 @@ -romstage-y += lpddr4.c ddr4.c ddr3.c ddr2.c ddr_common.c +romstage-y += lpddr4.c ddr4.c ddr3.c ddr2.c ddr_common.c rcd.c ramstage-y += lpddr4.c ddr4.c ddr3.c ddr2.c ddr_common.c spd.c diff --git a/src/device/dram/ddr4.c b/src/device/dram/ddr4.c index a66ee86fd15..314bc6b7e42 100644 --- a/src/device/dram/ddr4.c +++ b/src/device/dram/ddr4.c @@ -248,8 +248,11 @@ int spd_decode_ddr4(struct dimm_attr_ddr4_st *dimm, spd_raw_data spd) dimm->vdd_voltage = 1200; /* calculate size */ + /* FIXME: this is wrong for 3DS devices */ dimm->size_mb = cap_per_die_mbit / 8 * bus_width / sdram_width * dimm->ranks; + dimm->ecc_extension = spd[13] & SPD_ECC_8BIT; + /* make sure we have the manufacturing information block */ if (spd_bytes_used > 320) { dimm->manufacturer_id = (spd[351] << 8) | spd[350]; @@ -325,3 +328,224 @@ enum cb_err spd_add_smbios17_ddr4(const u8 channel, const u8 slot, const u16 sel return CB_SUCCESS; } + +static mrs_cmd_t ddr4_wr_to_mr0_map(u8 wr) +{ + static const u16 enc[] = {0, 1, 2, 3, 4, 5, 7, 6, 8}; + int wr_idx = wr/2 - 5; + if (wr_idx < 0 || wr_idx >= ARRAY_SIZE(enc)) + die("WR out of bounds\n"); + + return enc[wr_idx] << 9; +} + +static mrs_cmd_t ddr4_cas_to_mr0_map(u8 cas) +{ + static const u16 enc[] = + { + /* V VVV V + *111111 + *5432109876543210 */ + 0b0000000000000000, /* CL = 9 */ + 0b0000000000000100, /* CL = 10 */ + 0b0000000000010000, /* CL = 11 */ + 0b0000000000010100, /* CL = 12 */ + 0b0000000000100000, /* CL = 13 */ + 0b0000000000100100, /* CL = 14 */ + 0b0000000000110000, /* CL = 15 */ + 0b0000000000110100, /* CL = 16 */ + 0b0000000001100100, /* CL = 17 */ + 0b0000000001000000, /* CL = 18 */ + 0b0000000001110000, /* CL = 19 */ + 0b0000000001000100, /* CL = 20 */ + 0b0000000001110100, /* CL = 21 */ + 0b0000000001010000, /* CL = 22 */ + 0b0000000001100000, /* CL = 23 */ + 0b0000000001010100, /* CL = 24 */ + 0b0001000000000000, /* CL = 25 */ + 0b0001000000000100, /* CL = 26 */ + 0b0001000000010000, /* CL = 27 (only 3DS) */ + 0b0001000000010100, /* CL = 28 */ + 0b0001000000100000, /* reserved for CL = 29 */ + 0b0001000000100100, /* CL = 30 */ + 0b0001000000110000, /* reserved for CL = 31 */ + 0b0001000000110100, /* CL = 32 */ + }; + + int cas_idx = cas - 9; + if (cas_idx < 0 || cas_idx >= ARRAY_SIZE(enc)) + die("CL out of bounds\n"); + + return enc[cas_idx]; +} + +mrs_cmd_t ddr4_get_mr0(u8 write_recovery, + enum ddr4_mr0_dll_reset dll_reset, + enum ddr4_mr0_mode mode, + u8 cas, + enum ddr4_mr0_burst_type burst_type, + enum ddr4_mr0_burst_length burst_length) +{ + mrs_cmd_t cmd = 0 << 20; + + cmd |= ddr4_wr_to_mr0_map(write_recovery); + cmd |= dll_reset << 8; + cmd |= mode << 7; + cmd |= ddr4_cas_to_mr0_map(cas); + cmd |= burst_type << 3; + cmd |= burst_length << 0; + + return cmd; +} + +mrs_cmd_t ddr4_get_mr1(enum ddr4_mr1_qoff qoff, + enum ddr4_mr1_tqds tqds, + enum ddr4_mr1_rtt_nom rtt_nom, + enum ddr4_mr1_write_leveling write_leveling, + enum ddr4_mr1_odimp output_drive_impedance, + enum ddr4_mr1_additive_latency additive_latency, + enum ddr4_mr1_dll dll_enable) +{ + mrs_cmd_t cmd = 1 << 20; + + cmd |= qoff << 12; + cmd |= tqds << 11; + cmd |= rtt_nom << 8; + cmd |= write_leveling << 7; + cmd |= output_drive_impedance << 1; + cmd |= additive_latency << 3; + cmd |= dll_enable << 0; + + return cmd; +} + +static mrs_cmd_t ddr4_cwl_to_mr2_map(u8 cwl) +{ + /* Encoding is (starting with 0): 9, 10, 11, 12, 14, 16, 18, 20 */ + if (cwl < 14) { + cwl -= 9; + } else { + cwl = (cwl - 14) / 2 + 4; + } + + return cwl << 3; +} + +mrs_cmd_t ddr4_get_mr2(enum ddr4_mr2_wr_crc wr_crc, + enum ddr4_mr2_rtt_wr rtt_wr, + enum ddr4_mr2_lp_asr self_refresh, u8 cwl) +{ + mrs_cmd_t cmd = 2 << 20; + + cmd |= wr_crc << 12; + cmd |= rtt_wr << 9; + cmd |= self_refresh << 6; + cmd |= ddr4_cwl_to_mr2_map(cwl); + + return cmd; +} + +mrs_cmd_t ddr4_get_mr3(enum ddr4_mr3_mpr_read_format mpr_read_format, + enum ddr4_mr3_wr_cmd_lat_crc_dm command_latency_crc_dm, + enum ddr4_mr3_fine_gran_ref fine_refresh, + enum ddr4_mr3_temp_sensor_readout temp_sensor, + enum ddr4_mr3_pda pda, + enum ddr4_mr3_geardown_mode geardown, + enum ddr4_mr3_mpr_operation mpr_operation, + u8 mpr_page) +{ + mrs_cmd_t cmd = 3 << 20; + + cmd |= mpr_read_format << 11; + cmd |= command_latency_crc_dm << 9; + cmd |= fine_refresh << 6; + cmd |= temp_sensor << 5; + cmd |= pda << 4; + cmd |= geardown << 3; + cmd |= mpr_operation << 2; + cmd |= (mpr_page & 3) << 0; + + return cmd; +} + +mrs_cmd_t ddr4_get_mr4(enum ddr4_mr4_hppr hppr, + enum ddr4_mr4_wr_preamble wr_preamble, + enum ddr4_mr4_rd_preamble rd_preamble, + enum ddr4_mr4_rd_preamble_training rd_preamble_train, + enum ddr4_mr4_self_refr_abort self_ref_abrt, + enum ddr4_mr4_cs_to_cmd_latency cs2cmd_lat, + enum ddr4_mr4_sppr sppr, + enum ddr4_mr4_internal_vref_mon int_vref_mon, + enum ddr4_mr4_temp_controlled_refr temp_ctrl_ref, + enum ddr4_mr4_max_pd_mode max_pd) +{ + mrs_cmd_t cmd = 4 << 20; + + cmd |= hppr << 13; + cmd |= wr_preamble << 12; + cmd |= rd_preamble << 11; + cmd |= rd_preamble_train << 10; + cmd |= self_ref_abrt << 9; + cmd |= cs2cmd_lat << 6; + cmd |= sppr << 5; + cmd |= int_vref_mon << 4; + cmd |= temp_ctrl_ref << 2; + cmd |= max_pd << 1; + + return cmd; +} + +mrs_cmd_t ddr4_get_mr5(enum ddr4_mr5_rd_dbi rd_dbi, + enum ddr4_mr5_wr_dbi wr_dbi, + enum ddr4_mr5_data_mask dm, + enum ddr4_mr5_rtt_park rtt_park, + enum ddr4_mr5_odt_pd odt_pd, + enum ddr4_mr5_ca_parity_lat pl) +{ + mrs_cmd_t cmd = 5 << 20; + + cmd |= rd_dbi << 12; + cmd |= wr_dbi << 11; + cmd |= dm << 10; + cmd |= rtt_park << 6; + cmd |= odt_pd << 5; + cmd |= pl << 0; + + return cmd; +} + +static mrs_cmd_t ddr4_tccd_l_to_mr6_map(u8 tccd_l) +{ + if (tccd_l < 4 || tccd_l > 8) + die("tCCD_l out of range\n"); + + return (tccd_l - 4) << 10; +} + +mrs_cmd_t ddr4_get_mr6(u8 tccd_l, + enum ddr4_mr6_vrefdq_training vrefdq_training, + enum ddr4_mr6_vrefdq_training_range range, + u8 vrefdq_value) +{ + mrs_cmd_t cmd = 6 << 20; + + cmd |= ddr4_tccd_l_to_mr6_map(tccd_l); + cmd |= vrefdq_training << 7; + cmd |= range << 6; + cmd |= vrefdq_value & 0x3F; + + return cmd; +} + +/* + * ZQCL: A16 = H, A15 = H, A14 = L, A10 = H, rest either L or H + * ZQCS: A16 = H, A15 = H, A14 = L, A10 = L, rest either L or H + */ +mrs_cmd_t ddr4_get_zqcal_cmd(enum ddr4_zqcal_ls long_short) +{ + mrs_cmd_t cmd = 1 << 16 | 1 << 15; + + cmd |= long_short << 10; + + return cmd; +} diff --git a/src/device/dram/rcd.c b/src/device/dram/rcd.c new file mode 100644 index 00000000000..588bed79067 --- /dev/null +++ b/src/device/dram/rcd.c @@ -0,0 +1,214 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include +#include + +/** + * + * Registering Clock Driver (RCD) is responsible for driving address and control + * nets on RDIMM and LRDIMM applications. Its operation is configurable by a set + * of Register Control Words (RCWs). There are two ways of accessing RCWs: + * in-band on the memory channel as an MRS commands ("MR7") or through I2C. + * + * From JESD82-31: "For changes to the control word setting, (...) the + * controller needs to wait tMRD after _the last control word access_, before + * further access _to the DRAM_ can take place". MRS is passed to rank 0 of the + * DRAM, but MR7 is reserved so it is ignored by DRAM. tMRD (8nCK) applies here, + * unless longer delay is needed for RCWs which control the clock timing (see + * JESD82-31 for list of such). This makes sense from DRAMs point of view, + * however we are talking to the Registering Clock Driver (RCD), not DRAM. From + * parts marked in the sentence above one may assume that only one delay at the + * end is necessary and RCWs can be written back to back; however, in the same + * document in table 141 tMRD is defined as "Number of clock cycles between two + * control word accesses, MRS accesses, or any DRAM commands". + * + * I2C access to RCWs is required to support byte (8b), word (16b) and double + * word (32b) write size. Bigger blocks are not required. Reads must always be + * 32b, 32b-aligned blocks, even when reading just one RCW. RCD ignores the two + * lowest bits so unaligned accesses would return shifted values. RCWs are + * tightly packed in I2C space, so it is not possible to write just one 4b RCW + * without writing its neighbor. This is especially important for F0RC06, + * Command Space Control Word, as it it able to reset the state of RCD. For this + * reason, the mentioned register has NOP command (all 1's). JESD82-31 does not + * specify timeouts required for such multi-RCWs writes, or any other writes. + * These are not MRS accesses, so it would be strange to apply those timeouts. + * Perhaps only the registers that actually change the clock settings require + * time to stabilize. On the other hand, I2C is relatively slow, so it is + * possible that the write itself is long enough. + * + * RCD I2C address is 0xBx (or 0x58 + DIMM number, depending on convention), it + * is located on the same bus as SPD. It uses a bus command encoding, see + * section 3.3 in JESD82-31 for description of reading and writing register + * values. + * + * This file includes only functions for access through I2C - it is generic, + * while MRS commands are passed to memory controller registers in an + * implementation specific way. + */ + +#define RCD_CMD_BEGIN 0x80 +#define RCD_CMD_END 0x40 +#define RCD_CMD_PEC 0x10 +#define RCD_CMD_RD_DWORD 0x00 +#define RCD_CMD_WR_BYTE 0x04 +#define RCD_CMD_WR_WORD 0x08 +#define RCD_CMD_WR_DWORD 0x0C +#define RCD_CMD_BUS_BYTE 0x00 +#define RCD_CMD_BUS_BLOCK 0x02 + +/* Shorthand for block transfers */ +#define RCD_CMD_BLOCK (RCD_CMD_BEGIN | RCD_CMD_END | RCD_CMD_BUS_BLOCK) + +/* Excluding size of data */ +#define RCD_CMD_BYTES 4 + +/* Use byte fields to get rid of endianness issues. */ +struct rcd_i2c_cmd { + uint8_t cmd; + uint8_t bytes; /* From next byte up to PEC (excluding) */ + uint8_t reserved; + uint8_t devfun; + uint8_t reg_h; + uint8_t reg_l; + union { /* Not used for reads, can use 1, 2 or 4 for writes */ + uint8_t bdata[4]; + uint16_t wdata[2]; + uint32_t ddata; + }; + /* Optional PEC */ +} __packed; + +#define RCD_STS_SUCCESS 0x01 +#define RCD_STS_INTERNAL_TARGET_ABORT 0x10 + +/* Always 4 bytes data + status (for block commands) */ +#define RCD_RSP_BYTES 5 + +struct rcd_i2c_rsp { + uint8_t bytes; /* From next byte up to PEC (excluding) */ + uint8_t status; + union { + uint8_t bdata[4]; + uint32_t ddata; + }; + /* Optional PEC */ +} __packed; + +static inline int rcd_readd(unsigned int bus, uint8_t slave, uint8_t reg, + uint32_t *data) +{ + struct i2c_msg seg[2]; + struct rcd_i2c_cmd cmd = { + .cmd = RCD_CMD_BLOCK | RCD_CMD_RD_DWORD, + .bytes = RCD_CMD_BYTES, + .reg_l = reg + }; + struct rcd_i2c_rsp rsp = { 0xaa, 0x55 }; + + seg[0].flags = 0; + seg[0].slave = slave; + seg[0].buf = (uint8_t *)&cmd; + seg[0].len = cmd.bytes + 2; /* + .cmd and .bytes fields */ + + i2c_transfer(bus, seg, 1); + + seg[0].len = 1; /* Send just the command again */ + seg[1].flags = I2C_M_RD; + seg[1].slave = slave; + seg[1].buf = (uint8_t *)&rsp; + seg[1].len = RCD_RSP_BYTES + 1; /* + .bytes field */ + + i2c_transfer(bus, seg, ARRAY_SIZE(seg)); + + /* Data is sent MSB to LSB, i.e. higher registers to lower, reverse it. */ + *data = swab32(rsp.ddata); + + return rsp.status == RCD_STS_SUCCESS; +} + +static inline int rcd_writed(unsigned int bus, uint8_t slave, uint8_t reg, + uint32_t data) +{ + struct i2c_msg seg; + struct rcd_i2c_cmd cmd = { + .cmd = RCD_CMD_BLOCK | RCD_CMD_WR_DWORD, + .bytes = RCD_CMD_BYTES + sizeof(data), + .reg_l = reg, + .ddata = swab32(data) + }; + + seg.flags = 0; + seg.slave = slave; + seg.buf = (uint8_t *)&cmd; + seg.len = cmd.bytes + 2; /* + .cmd and .bytes fields */ + + return i2c_transfer(bus, &seg, 1); +} + +static inline int rcd_writeb(unsigned int bus, uint8_t slave, uint8_t reg, + uint8_t data) +{ + struct i2c_msg seg; + struct rcd_i2c_cmd cmd = { + .cmd = RCD_CMD_BLOCK | RCD_CMD_WR_BYTE, + .bytes = RCD_CMD_BYTES + sizeof(data), + .reg_l = reg, + .bdata[0] = data + }; + + seg.flags = 0; + seg.slave = slave; + seg.buf = (uint8_t *)&cmd; + seg.len = cmd.bytes + 2; /* + .cmd and .bytes fields */ + + return i2c_transfer(bus, &seg, 1); +} + +int rcd_write_reg(unsigned int bus, uint8_t slave, enum rcw_idx reg, + uint8_t data) +{ + if (reg < F0RC00_01 || reg > F0RCFx) { + printk(BIOS_ERR, "Trying to write to illegal RCW %#2.2x\n", + reg); + return 0; + } + + return rcd_writeb(bus, slave, reg, data); +} + +int rcd_write_32b(unsigned int bus, uint8_t slave, enum rcw_idx reg, + uint32_t data) +{ + if (reg < F0RC00_01 || reg > F0RCFx) { + printk(BIOS_ERR, "Trying to write to illegal RCW %#2.2x\n", + reg); + return 0; + } + + if (reg & 3) { + /* + * RCD would silently mask out the lowest bits, assume that this + * is not what caller wanted. + */ + printk(BIOS_ERR, "Unaligned RCW %#2.2x, aborting\n", reg); + return 0; + } + + return rcd_writed(bus, slave, reg, data); +} + +void dump_rcd(unsigned int bus, u8 addr) +{ + /* Can only read in 32b chunks */ + uint8_t buf[RCW_ALL_ALIGNED]; + int i; + + for (i = 0; i < RCW_ALL_ALIGNED; i += sizeof(uint32_t)) { + rcd_readd(bus, addr, i, (uint32_t *) &buf[i]); + } + + printk(BIOS_DEBUG, "RCD dump for I2C address %#2.2x:\n", addr); + hexdump(buf, sizeof(buf)); +} diff --git a/src/drivers/i2c/tpm/tpm.c b/src/drivers/i2c/tpm/tpm.c index 840b947ee74..f325687b151 100644 --- a/src/drivers/i2c/tpm/tpm.c +++ b/src/drivers/i2c/tpm/tpm.c @@ -41,8 +41,8 @@ #define MAX_COUNT_LONG 50 /* expected value for DIDVID register */ -#define TPM_TIS_I2C_DID_VID_9635 0x000b15d1L -#define TPM_TIS_I2C_DID_VID_9645 0x001a15d1L +#define TPM_TIS_I2C_DID_VID_9635 be32_to_cpu(0xd1150b00L) +#define TPM_TIS_I2C_DID_VID_9645 be32_to_cpu(0xd1151a00L) enum i2c_chip_type { SLB9635, @@ -527,7 +527,7 @@ int tpm_vendor_init(struct tpm_chip *chip, unsigned int bus, uint32_t dev_addr) if (vendor == TPM_TIS_I2C_DID_VID_9645) { tpm_dev.chip_type = SLB9645; - } else if (be32_to_cpu(vendor) == TPM_TIS_I2C_DID_VID_9635) { + } else if (vendor == TPM_TIS_I2C_DID_VID_9635) { tpm_dev.chip_type = SLB9635; } else { printk(BIOS_DEBUG, "Vendor ID 0x%08x not recognized.\n", diff --git a/src/drivers/ipmi/Kconfig b/src/drivers/ipmi/Kconfig index 012f67887c6..ef19ca43422 100644 --- a/src/drivers/ipmi/Kconfig +++ b/src/drivers/ipmi/Kconfig @@ -53,3 +53,31 @@ config DRIVERS_IPMI_SUPERMICRO_OEM The following features are implemented: * Communicates the BIOS version to the BMC * Communicates the BIOS date to the BMC + +config IPMI_BT + bool + default n + depends on !IPMI_KCS + +config IPMI_BT_ROMSTAGE + bool + default n + depends on IPMI_BT + help + IPMI BT support in romstage. + +config BMC_BT_BASE + hex + default 0xe4 + depends on IPMI_BT + help + The PNP base address of BMC BT. It must be equal to the + pnp port value defined in devicetree for chip drivers/ipmi. + +config IPMI_BT_TIMEOUT_MS + int + default 5000 + depends on IPMI_BT + help + The time unit is millisecond for each IPMI BT transfer. + The default is the same as for KCS as the implementation uses polling. diff --git a/src/drivers/ipmi/Makefile.inc b/src/drivers/ipmi/Makefile.inc index e4bcf313ebc..83859c6c613 100644 --- a/src/drivers/ipmi/Makefile.inc +++ b/src/drivers/ipmi/Makefile.inc @@ -1,8 +1,20 @@ +ramstage-$(CONFIG_IPMI_KCS) += ipmi_if.c ramstage-$(CONFIG_IPMI_KCS) += ipmi_kcs.c ramstage-$(CONFIG_IPMI_KCS) += ipmi_kcs_ops.c ramstage-$(CONFIG_IPMI_KCS) += ipmi_ops.c ramstage-$(CONFIG_IPMI_KCS) += ipmi_fru.c ramstage-$(CONFIG_DRIVERS_IPMI_SUPERMICRO_OEM) += supermicro_oem.c -romstage-$(CONFIG_IPMI_KCS_ROMSTAGE) += ipmi_kcs_ops_premem.c +romstage-$(CONFIG_IPMI_KCS_ROMSTAGE) += ipmi_if.c +romstage-$(CONFIG_IPMI_KCS_ROMSTAGE) += ipmi_ops_premem.c romstage-$(CONFIG_IPMI_KCS_ROMSTAGE) += ipmi_kcs.c romstage-$(CONFIG_IPMI_KCS_ROMSTAGE) += ipmi_ops.c + +ramstage-$(CONFIG_IPMI_BT) += ipmi_if.c +ramstage-$(CONFIG_IPMI_BT) += ipmi_bt.c +ramstage-$(CONFIG_IPMI_BT) += ipmi_bt_ops.c +ramstage-$(CONFIG_IPMI_BT) += ipmi_ops.c +ramstage-$(CONFIG_IPMI_BT) += ipmi_fru.c +romstage-$(CONFIG_IPMI_BT_ROMSTAGE) += ipmi_if.c +romstage-$(CONFIG_IPMI_BT_ROMSTAGE) += ipmi_ops_premem.c +romstage-$(CONFIG_IPMI_BT_ROMSTAGE) += ipmi_bt.c +romstage-$(CONFIG_IPMI_BT_ROMSTAGE) += ipmi_ops.c diff --git a/src/drivers/ipmi/chip.h b/src/drivers/ipmi/chip.h index 4e9d9e1985b..3b970c9fa40 100644 --- a/src/drivers/ipmi/chip.h +++ b/src/drivers/ipmi/chip.h @@ -8,6 +8,7 @@ #include struct drivers_ipmi_config { +#if CONFIG(IPMI_KCS) u8 bmc_i2c_address; u8 have_nv_storage; u8 nv_storage_device_address; @@ -25,6 +26,9 @@ struct drivers_ipmi_config { /* "POST complete" GPIO and polarity */ u32 post_complete_gpio; bool post_complete_invert; + unsigned int uid; /* Auto-filled by ipmi_ssdt() */ +#endif + /* * Wait for BMC to boot. * This can be used if the BMC takes a long time to boot after PoR: @@ -36,7 +40,6 @@ struct drivers_ipmi_config { * Will be used if wait_for_bmc is true. */ u16 bmc_boot_timeout; - unsigned int uid; /* Auto-filled by ipmi_ssdt() */ }; #endif /* _IMPI_CHIP_H_ */ diff --git a/src/drivers/ipmi/ipmi_bt.c b/src/drivers/ipmi/ipmi_bt.c new file mode 100644 index 00000000000..0fefcef0db0 --- /dev/null +++ b/src/drivers/ipmi/ipmi_bt.c @@ -0,0 +1,200 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +/* + * IPMI specification: + * https://www.intel.com/content/www/us/en/servers/ipmi/ipmi-intelligent-platform-mgt-interface-spec-2nd-gen-v2-0-spec-update.html + * + * LUN seems to be always zero. + */ + +#include "ipmi_bt.h" + +#include +#include +#include +#include +#include +#include + +#include "ipmi_if.h" + +#define MAX_SIZE 255 +#define MAX_PAYLOAD_SIZE (MAX_SIZE - 4) + +#define BT_CTRL_INC 0 // Typical address of BT_CTRL is 0xE4 +#define HOST2BMC_INC 1 // Typical address of HOST2BMC is 0xE5 +#define BMC2HOST_INC 1 // Typical address of BMC2HOST is 0xE5 + +/* Bits of BT_CTRL */ +#define B_BUSY (1 << 7) +#define H_BUSY (1 << 6) +#define OEM0 (1 << 5) +#define EVT_ATN (1 << 4) +#define B2H_ATN (1 << 3) +#define H2B_ATN (1 << 2) +#define CLR_RD_PTR (1 << 1) +#define CLR_WR_PTR (1 << 0) + +static int wait_for_control_bit(uint16_t port, uint8_t bit, int set) +{ + uint16_t bt_ctrl_port = port + BT_CTRL_INC; + if (!wait_ms(CONFIG_IPMI_BT_TIMEOUT_MS, ((inb(bt_ctrl_port) & bit) != 0) == set)) { + printk(BIOS_ERR, "%s(0x%04x, 0x%02x, %d) timeout!\n", + __func__, port, bit, set); + return 1; + } + + return 0; +} + +int ipmi_bt_clear(uint16_t port) +{ + uint8_t bt_ctrl; + + /* + * First, set H_BUSY (if not set already) so BMC won't try to write new + * commands while we're resetting pointers. + */ + if ((inb(port + BT_CTRL_INC) & H_BUSY) == 0) + outb(H_BUSY, port + BT_CTRL_INC); + + /* If BMC is already in the process of writing, wait until it's done */ + if (wait_for_control_bit(port, B_BUSY, 0)) + return 1; + + bt_ctrl = inb(port + BT_CTRL_INC); + + printk(BIOS_SPEW, "BT_CTRL = %#2.2x\n", bt_ctrl); + + /* + * Clear all bits which are already set (they are either toggle bits or + * write-1-to-clear) and reset buffer pointers. This also clears H_BUSY. + */ + outb(bt_ctrl | CLR_RD_PTR | CLR_WR_PTR, port + BT_CTRL_INC); + + return 0; +} + +static int ipmi_bt_send(uint16_t port, uint8_t addr, uint8_t cmd, + const uint8_t *payload, uint8_t payload_len, + uint8_t seq_num) +{ + uint16_t i; + uint16_t len; + uint8_t buf[MAX_SIZE]; + + len = 3 + payload_len; + + buf[0] = len; + buf[1] = addr; + buf[2] = seq_num; + buf[3] = cmd; + memcpy(&buf[4], payload, payload_len); + + /* Wait for BMC to be available */ + if (wait_for_control_bit(port, B_BUSY, 0)) + return 1; + + /* Clear write pointer */ + outb(CLR_WR_PTR, port + BT_CTRL_INC); + + /* Send our message */ + for (i = 0; i < len + 1; ++i) + outb(buf[i], port + HOST2BMC_INC); + + /* Tell BMC to process the data */ + outb(H2B_ATN, port + BT_CTRL_INC); + + return 0; +} + +static int ipmi_bt_recv(uint16_t port, uint8_t addr, uint8_t cmd, + uint8_t *response, uint8_t response_len, + uint8_t seq_num) +{ + uint16_t i; + uint16_t len; + uint8_t buf[MAX_SIZE]; + + /* Wait for BMC's response */ + if (wait_for_control_bit(port, B2H_ATN, 1)) + return -1; + + /* Tell BMC that host is busy */ + outb(H_BUSY, port + BT_CTRL_INC); + + /* Acknowledge that response is being processed */ + outb(B2H_ATN, port + BT_CTRL_INC); + + /* Clear read pointer */ + outb(CLR_RD_PTR, port + BT_CTRL_INC); + + /* Receive response */ + len = inb(port + BMC2HOST_INC); + for (i = 0; i < len; ++i) + buf[i] = inb(port + BMC2HOST_INC); + + /* Indicate that the host is done working with the buffer */ + outb(H_BUSY, port + BT_CTRL_INC); + + if (buf[0] != addr) { + printk(BIOS_ERR, + "Invalid NETFN/LUN field in IPMI BT response: 0x%02x instead of 0x%02x\n", + buf[0], addr); + goto error; + } + if (buf[1] != seq_num) { + printk(BIOS_ERR, + "Invalid SEQ field in IPMI BT response: 0x%02x instead of 0x%02x\n", + buf[1], seq_num); + goto error; + } + if (buf[2] != cmd) { + printk(BIOS_ERR, + "Invalid CMD field in IPMI BT response: 0x%02x instead of 0x%02x\n", + buf[2], cmd); + goto error; + } + + if (response_len < len) + len = response_len; + + /* + * Copy response skipping sequence number to match KCS messages. + * Sequence number is really an implementation detail anyway. + */ + if (response_len != 0) + response[0] = buf[0]; + memcpy(&response[1], &buf[2], len - 1); + + return len; + +error: + printk(BIOS_ERR, " IPMI response length field: 0x%02x\n", len); + printk(BIOS_ERR, " IPMI netfn/lun: 0x%02x\n", addr); + printk(BIOS_ERR, " IPMI SEQ: 0x%02x\n", seq_num); + printk(BIOS_ERR, " IPMI command: 0x%02x\n", cmd); + return -1; +} + +int ipmi_message(int port, int netfn, int lun, int cmd, + const uint8_t *payload, int payload_len, + uint8_t *response, int response_len) +{ + static uint8_t seq_num = 0xff; + + uint8_t addr; + + assert(payload_len >= 0 && payload_len < MAX_PAYLOAD_SIZE); + assert(netfn >= 0 && netfn <= 0x3f); + assert(lun >= 0 && lun <= 0x3); + + addr = (netfn << 2) | (lun & 0x3); + if (ipmi_bt_send(port, addr, cmd, payload, payload_len, ++seq_num)) { + printk(BIOS_ERR, "Failed to send IPMI BT command 0x%02x\n", cmd); + return -1; + } + + addr = ((netfn + 1) << 2) | (lun & 0x3); + return ipmi_bt_recv(port, addr, cmd, response, response_len, seq_num); +} diff --git a/src/drivers/ipmi/ipmi_bt.h b/src/drivers/ipmi/ipmi_bt.h new file mode 100644 index 00000000000..afd4adaf894 --- /dev/null +++ b/src/drivers/ipmi/ipmi_bt.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __IPMI_BT_H +#define __IPMI_BT_H + +#include + +/* Drops events from BMC and resets state of the BT interface, returns zero on success. */ +int ipmi_bt_clear(uint16_t port); + +#endif /* __IPMI_BT_H */ diff --git a/src/drivers/ipmi/ipmi_bt_ops.c b/src/drivers/ipmi/ipmi_bt_ops.c new file mode 100644 index 00000000000..669ca8cff82 --- /dev/null +++ b/src/drivers/ipmi/ipmi_bt_ops.c @@ -0,0 +1,100 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +/* + * Place in devicetree.cb: + * + * chip drivers/ipmi + * device pnp e4.0 on end # IPMI BT + * end + */ + +#include +#include +#include + +#include "ipmi_if.h" +#include "ipmi_bt.h" + +static void ipmi_bt_init(struct device *dev) +{ + struct ipmi_devid_rsp rsp; + struct drivers_ipmi_config *conf = dev->chip_info; + + if (!conf) { + printk(BIOS_WARNING, "IPMI: chip_info is missing! Skip init.\n"); + return; + } + + printk(BIOS_DEBUG, "IPMI: PNP BT 0x%x\n", dev->path.pnp.port); + + if (ipmi_process_self_test_result(dev)) + dev->enabled = 0; + + if (!ipmi_get_device_id(dev, &rsp)) { + uint32_t man_id = 0; + uint32_t prod_id = 0; + + /* 4 bit encoding */ + u8 ipmi_revision_minor = IPMI_IPMI_VERSION_MINOR(rsp.ipmi_version); + u8 ipmi_revision_major = IPMI_IPMI_VERSION_MAJOR(rsp.ipmi_version); + + memcpy(&man_id, rsp.manufacturer_id, sizeof(rsp.manufacturer_id)); + + memcpy(&prod_id, rsp.product_id, sizeof(rsp.product_id)); + + printk(BIOS_INFO, "IPMI: Found man_id 0x%06x, prod_id 0x%04x\n", + man_id, prod_id); + + printk(BIOS_INFO, "IPMI: Version %01x.%01x\n", + ipmi_revision_major, ipmi_revision_minor); + } else { + dev->enabled = 0; + } + + if (ipmi_bt_clear(dev->path.pnp.port)) + dev->enabled = 0; +} + +static void ipmi_set_resources(struct device *dev) +{ + struct resource *res; + + for (res = dev->resource_list; res; res = res->next) { + if (!(res->flags & IORESOURCE_ASSIGNED)) + continue; + + res->flags |= IORESOURCE_STORED; + report_resource_stored(dev, res, ""); + } +} + +static void ipmi_read_resources(struct device *dev) +{ + struct resource *res = new_resource(dev, 0); + res->base = dev->path.pnp.port; + res->size = 3; + res->flags = IORESOURCE_IO | IORESOURCE_ASSIGNED | IORESOURCE_FIXED; +} + +static struct device_operations ops = { + .read_resources = ipmi_read_resources, + .set_resources = ipmi_set_resources, + .init = ipmi_bt_init, +}; + +static void enable_dev(struct device *dev) +{ + if (dev->path.type != DEVICE_PATH_PNP) + printk(BIOS_ERR, "%s: Unsupported device type\n", + dev_path(dev)); + else if (dev->path.pnp.port & 3) + printk(BIOS_ERR, "%s: Base address needs to be aligned to 4\n", + dev_path(dev)); + else + dev->ops = &ops; +} + +struct chip_operations drivers_ipmi_ops = { + CHIP_NAME("IPMI BT") + .enable_dev = enable_dev, +}; diff --git a/src/drivers/ipmi/ipmi_fru.c b/src/drivers/ipmi/ipmi_fru.c index 822e5bfa23c..f16530705fe 100644 --- a/src/drivers/ipmi/ipmi_fru.c +++ b/src/drivers/ipmi/ipmi_fru.c @@ -5,6 +5,7 @@ #include #include +#include "ipmi_if.h" #include "ipmi_ops.h" #define MAX_FRU_BUSY_RETRY 5 @@ -34,7 +35,7 @@ static enum cb_err ipmi_read_fru(const int port, struct ipmi_read_fru_data_req * req->count = CONFIG_IPMI_FRU_SINGLE_RW_SZ; while (retry_count <= MAX_FRU_BUSY_RETRY) { - ret = ipmi_kcs_message(port, IPMI_NETFN_STORAGE, 0x0, + ret = ipmi_message(port, IPMI_NETFN_STORAGE, 0x0, IPMI_READ_FRU_DATA, (const unsigned char *) req, sizeof(*req), (unsigned char *) &rsp, sizeof(rsp)); if (rsp.resp.completion_code == 0x81) { diff --git a/src/drivers/ipmi/ipmi_if.c b/src/drivers/ipmi/ipmi_if.c new file mode 100644 index 00000000000..4ff90047417 --- /dev/null +++ b/src/drivers/ipmi/ipmi_if.c @@ -0,0 +1,101 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include "ipmi_if.h" + +#include +#include + +#include "chip.h" + +int ipmi_get_device_id(const struct device *dev, struct ipmi_devid_rsp *rsp) +{ + int ret; + + ret = ipmi_message(dev->path.pnp.port, IPMI_NETFN_APPLICATION, 0, + IPMI_BMC_GET_DEVICE_ID, NULL, 0, (u8 *)rsp, + sizeof(*rsp)); + if (ret < sizeof(struct ipmi_rsp) || rsp->resp.completion_code) { + printk(BIOS_ERR, "IPMI: %s command failed (ret=%d resp=0x%x)\n", + __func__, ret, rsp->resp.completion_code); + return 1; + } + if (ret != sizeof(*rsp)) { + printk(BIOS_ERR, "IPMI: %s response truncated\n", __func__); + return 1; + } + return 0; +} + +static int ipmi_get_bmc_self_test_result(const struct device *dev, + struct ipmi_selftest_rsp *rsp) +{ + int ret; + + ret = ipmi_message(dev->path.pnp.port, IPMI_NETFN_APPLICATION, 0, + IPMI_BMC_GET_SELFTEST_RESULTS, NULL, 0, (u8 *)rsp, + sizeof(*rsp)); + + if (ret < sizeof(struct ipmi_rsp) || rsp->resp.completion_code) { + printk(BIOS_ERR, "IPMI: %s command failed (ret=%d resp=0x%x)\n", + __func__, ret, rsp->resp.completion_code); + return 1; + } + if (ret != sizeof(*rsp)) { + printk(BIOS_ERR, "IPMI: %s response truncated\n", __func__); + return 1; + } + + return 0; +} + +int ipmi_process_self_test_result(const struct device *dev) +{ + int failure = 0; + uint8_t retry_count = 0; + struct ipmi_selftest_rsp selftestrsp = {0}; + + const struct drivers_ipmi_config *conf = dev->chip_info; + uint8_t retry_limit = 0; + + if (conf && conf->wait_for_bmc) + retry_limit = conf->bmc_boot_timeout; + + if (retry_limit == 0) + /* Try to get self-test results at least once */ + retry_limit = 1; + + printk(BIOS_INFO, "Get BMC self test result..."); + for (retry_count = 0; retry_count < retry_limit; retry_count++) { + if (!ipmi_get_bmc_self_test_result(dev, &selftestrsp)) + break; + + mdelay(1000); + } + + switch (selftestrsp.result) { + case IPMI_APP_SELFTEST_NO_ERROR: /* 0x55 */ + printk(BIOS_DEBUG, "No Error\n"); + break; + case IPMI_APP_SELFTEST_NOT_IMPLEMENTED: /* 0x56 */ + printk(BIOS_DEBUG, "Function Not Implemented\n"); + break; + case IPMI_APP_SELFTEST_ERROR: /* 0x57 */ + printk(BIOS_ERR, "BMC: Corrupted or inaccessible data or device\n"); + failure = 1; + break; + case IPMI_APP_SELFTEST_FATAL_HW_ERROR: /* 0x58 */ + printk(BIOS_ERR, "BMC: Fatal Hardware Error\n"); + failure = 1; + break; + case IPMI_APP_SELFTEST_RESERVED: /* 0xFF */ + printk(BIOS_DEBUG, "Reserved\n"); + break; + + default: /* Other Device Specific Hardware Error */ + printk(BIOS_ERR, "BMC: Device Specific Error: 0x%02x\n", selftestrsp.result); + failure = 1; + break; + } + + return failure; +} diff --git a/src/drivers/ipmi/ipmi_if.h b/src/drivers/ipmi/ipmi_if.h new file mode 100644 index 00000000000..22b5db38c26 --- /dev/null +++ b/src/drivers/ipmi/ipmi_if.h @@ -0,0 +1,77 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __IPMI_IF_H +#define __IPMI_IF_H + +/* Common API and code for different IPMI interfaces in different stages */ + +#include + +#define IPMI_NETFN_CHASSIS 0x00 +#define IPMI_NETFN_BRIDGE 0x02 +#define IPMI_NETFN_SENSOREVENT 0x04 +#define IPMI_NETFN_APPLICATION 0x06 +#define IPMI_BMC_GET_DEVICE_ID 0x01 +#define IPMI_IPMI_VERSION_MINOR(x) ((x) >> 4) +#define IPMI_IPMI_VERSION_MAJOR(x) ((x) & 0xf) +#define IPMI_BMC_GET_SELFTEST_RESULTS 0x04 +#define IPMI_APP_SELFTEST_RESERVED 0xFF +#define IPMI_APP_SELFTEST_NO_ERROR 0x55 +#define IPMI_APP_SELFTEST_NOT_IMPLEMENTED 0x56 +#define IPMI_APP_SELFTEST_ERROR 0x57 +#define IPMI_APP_SELFTEST_FATAL_HW_ERROR 0x58 + +#define IPMI_NETFN_FIRMWARE 0x08 +#define IPMI_NETFN_STORAGE 0x0a +#define IPMI_READ_FRU_DATA 0x11 +#define IPMI_ADD_SEL_ENTRY 0x44 +#define IPMI_NETFN_TRANSPORT 0x0c + +#define IPMI_CMD_ACPI_POWERON 0x06 + +struct ipmi_rsp { + uint8_t lun; + uint8_t cmd; + uint8_t completion_code; +} __packed; + +/* Get Device ID */ +struct ipmi_devid_rsp { + struct ipmi_rsp resp; + uint8_t device_id; + uint8_t device_revision; + uint8_t fw_rev1; + uint8_t fw_rev2; + uint8_t ipmi_version; + uint8_t additional_device_support; + uint8_t manufacturer_id[3]; + uint8_t product_id[2]; +} __packed; + +/* Get Self Test Results */ +struct ipmi_selftest_rsp { + struct ipmi_rsp resp; + uint8_t result; + uint8_t param; +} __packed; + +struct device; + +/* + * Sends a command and reads its response. Input buffer is for payload, but + * output includes `struct ipmi_rsp` as a header. Returns number of bytes copied + * into the buffer or -1. + */ +int ipmi_message(int port, int netfn, int lun, int cmd, + const unsigned char *inmsg, int inlen, + unsigned char *outmsg, int outlen); + +/* Run basic IPMI init functions in romstage from the provided PnP device, + * returns CB_SUCCESS on success and CB_ERR if an error occurred. */ +enum cb_err ipmi_premem_init(const uint16_t port, const uint16_t device); + +int ipmi_get_device_id(const struct device *dev, struct ipmi_devid_rsp *rsp); + +int ipmi_process_self_test_result(const struct device *dev); + +#endif /* __IPMI_IF_H */ diff --git a/src/drivers/ipmi/ipmi_kcs.c b/src/drivers/ipmi/ipmi_kcs.c index 12cbe82f141..271a6a25386 100644 --- a/src/drivers/ipmi/ipmi_kcs.c +++ b/src/drivers/ipmi/ipmi_kcs.c @@ -4,6 +4,7 @@ #include #include #include +#include "ipmi_if.h" #include "ipmi_kcs.h" #define IPMI_KCS_STATE(_x) ((_x) >> 6) @@ -219,9 +220,9 @@ static int ipmi_kcs_read_message(int port, unsigned char *msg, int len) return ret; } -int ipmi_kcs_message(int port, int netfn, int lun, int cmd, - const unsigned char *inmsg, int inlen, - unsigned char *outmsg, int outlen) +int ipmi_message(int port, int netfn, int lun, int cmd, + const unsigned char *inmsg, int inlen, + unsigned char *outmsg, int outlen) { if (ipmi_kcs_send_message(port, netfn, lun, cmd, inmsg, inlen)) { printk(BIOS_ERR, "ipmi_kcs_send_message failed\n"); diff --git a/src/drivers/ipmi/ipmi_kcs.h b/src/drivers/ipmi/ipmi_kcs.h index 33ddd5f016f..17298b1038c 100644 --- a/src/drivers/ipmi/ipmi_kcs.h +++ b/src/drivers/ipmi/ipmi_kcs.h @@ -3,62 +3,8 @@ #ifndef __IPMI_KCS_H #define __IPMI_KCS_H -#define IPMI_NETFN_CHASSIS 0x00 -#define IPMI_NETFN_BRIDGE 0x02 -#define IPMI_NETFN_SENSOREVENT 0x04 -#define IPMI_NETFN_APPLICATION 0x06 -#define IPMI_BMC_GET_DEVICE_ID 0x01 -#define IPMI_IPMI_VERSION_MINOR(x) ((x) >> 4) -#define IPMI_IPMI_VERSION_MAJOR(x) ((x) & 0xf) -#define IPMI_BMC_GET_SELFTEST_RESULTS 0x04 -#define IPMI_APP_SELFTEST_RESERVED 0xFF -#define IPMI_APP_SELFTEST_NO_ERROR 0x55 -#define IPMI_APP_SELFTEST_NOT_IMPLEMENTED 0x56 -#define IPMI_APP_SELFTEST_ERROR 0x57 -#define IPMI_APP_SELFTEST_FATAL_HW_ERROR 0x58 - -#define IPMI_NETFN_FIRMWARE 0x08 -#define IPMI_NETFN_STORAGE 0x0a -#define IPMI_READ_FRU_DATA 0x11 -#define IPMI_ADD_SEL_ENTRY 0x44 -#define IPMI_NETFN_TRANSPORT 0x0c - -#define IPMI_CMD_ACPI_POWERON 0x06 - -extern int ipmi_kcs_message(int port, int netfn, int lun, int cmd, - const unsigned char *inmsg, int inlen, - unsigned char *outmsg, int outlen); - -/* Run basic IPMI init functions in romstage from the provided PnP device, - * returns CB_SUCCESS on success and CB_ERR if an error occurred. */ -enum cb_err ipmi_kcs_premem_init(const u16 port, const u16 device); +#include void ipmi_bmc_version(uint8_t *ipmi_bmc_major_revision, uint8_t *ipmi_bmc_minor_revision); -struct ipmi_rsp { - uint8_t lun; - uint8_t cmd; - uint8_t completion_code; -} __packed; - -/* Get Device ID */ -struct ipmi_devid_rsp { - struct ipmi_rsp resp; - uint8_t device_id; - uint8_t device_revision; - uint8_t fw_rev1; - uint8_t fw_rev2; - uint8_t ipmi_version; - uint8_t additional_device_support; - uint8_t manufacturer_id[3]; - uint8_t product_id[2]; -} __packed; - -/* Get Self Test Results */ -struct ipmi_selftest_rsp { - struct ipmi_rsp resp; - uint8_t result; - uint8_t param; -} __packed; - #endif diff --git a/src/drivers/ipmi/ipmi_kcs_ops.c b/src/drivers/ipmi/ipmi_kcs_ops.c index 4ffa91fe231..48b8e065ef0 100644 --- a/src/drivers/ipmi/ipmi_kcs_ops.c +++ b/src/drivers/ipmi/ipmi_kcs_ops.c @@ -25,6 +25,7 @@ #include #include #include "ipmi_kcs.h" +#include "ipmi_if.h" #include "ipmi_supermicro_oem.h" #include "chip.h" @@ -37,46 +38,6 @@ static u8 bmc_revision_minor = 0x0; static struct boot_state_callback bscb_post_complete; -static int ipmi_get_device_id(struct device *dev, struct ipmi_devid_rsp *rsp) -{ - int ret; - - ret = ipmi_kcs_message(dev->path.pnp.port, IPMI_NETFN_APPLICATION, 0, - IPMI_BMC_GET_DEVICE_ID, NULL, 0, (u8 *)rsp, - sizeof(*rsp)); - if (ret < sizeof(struct ipmi_rsp) || rsp->resp.completion_code) { - printk(BIOS_ERR, "IPMI: %s command failed (ret=%d resp=0x%x)\n", - __func__, ret, rsp->resp.completion_code); - return 1; - } - if (ret != sizeof(*rsp)) { - printk(BIOS_ERR, "IPMI: %s response truncated\n", __func__); - return 1; - } - return 0; -} - -static int ipmi_get_bmc_self_test_result(struct device *dev, struct ipmi_selftest_rsp *rsp) -{ - int ret; - - ret = ipmi_kcs_message(dev->path.pnp.port, IPMI_NETFN_APPLICATION, 0, - IPMI_BMC_GET_SELFTEST_RESULTS, NULL, 0, (u8 *)rsp, - sizeof(*rsp)); - - if (ret < sizeof(struct ipmi_rsp) || rsp->resp.completion_code) { - printk(BIOS_ERR, "IPMI: %s command failed (ret=%d resp=0x%x)\n", - __func__, ret, rsp->resp.completion_code); - return 1; - } - if (ret != sizeof(*rsp)) { - printk(BIOS_ERR, "IPMI: %s response truncated\n", __func__); - return 1; - } - - return 0; -} - static void bmc_set_post_complete_gpio_callback(void *arg) { struct drivers_ipmi_config *conf = arg; @@ -103,8 +64,6 @@ static void ipmi_kcs_init(struct device *dev) uint32_t man_id = 0, prod_id = 0; struct drivers_ipmi_config *conf = dev->chip_info; const struct gpio_operations *gpio_ops; - struct ipmi_selftest_rsp selftestrsp = {0}; - uint8_t retry_count; if (!conf) { printk(BIOS_WARNING, "IPMI: chip_info is missing! Skip init.\n"); @@ -154,41 +113,9 @@ static void ipmi_kcs_init(struct device *dev) } } - printk(BIOS_INFO, "Get BMC self test result..."); - for (retry_count = 0; retry_count < conf->bmc_boot_timeout; retry_count++) { - if (!ipmi_get_bmc_self_test_result(dev, &selftestrsp)) - break; - - mdelay(1000); - } - - switch (selftestrsp.result) { - case IPMI_APP_SELFTEST_NO_ERROR: /* 0x55 */ - printk(BIOS_DEBUG, "No Error\n"); - break; - case IPMI_APP_SELFTEST_NOT_IMPLEMENTED: /* 0x56 */ - printk(BIOS_DEBUG, "Function Not Implemented\n"); - break; - case IPMI_APP_SELFTEST_ERROR: /* 0x57 */ - printk(BIOS_ERR, "BMC: Corrupted or inaccessible data or device\n"); - /* Don't write tables if communication failed */ - dev->enabled = 0; - break; - case IPMI_APP_SELFTEST_FATAL_HW_ERROR: /* 0x58 */ - printk(BIOS_ERR, "BMC: Fatal Hardware Error\n"); - /* Don't write tables if communication failed */ - dev->enabled = 0; - break; - case IPMI_APP_SELFTEST_RESERVED: /* 0xFF */ - printk(BIOS_DEBUG, "Reserved\n"); - break; - - default: /* Other Device Specific Hardware Error */ - printk(BIOS_ERR, "BMC: Device Specific Error\n"); + if (ipmi_process_self_test_result(dev)) /* Don't write tables if communication failed */ dev->enabled = 0; - break; - } if (!ipmi_get_device_id(dev, &rsp)) { /* Queried the IPMI revision from BMC */ diff --git a/src/drivers/ipmi/ipmi_kcs_ops_premem.c b/src/drivers/ipmi/ipmi_kcs_ops_premem.c deleted file mode 100644 index e1ae0dc3e5b..00000000000 --- a/src/drivers/ipmi/ipmi_kcs_ops_premem.c +++ /dev/null @@ -1,113 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ - -#include -#include -#include -#include -#include - -#include "ipmi_kcs.h" -#include "chip.h" - -static int ipmi_get_bmc_self_test_result(const struct device *dev, - struct ipmi_selftest_rsp *rsp) -{ - int ret; - - ret = ipmi_kcs_message(dev->path.pnp.port, IPMI_NETFN_APPLICATION, 0, - IPMI_BMC_GET_SELFTEST_RESULTS, NULL, 0, (u8 *)rsp, - sizeof(*rsp)); - - if (ret < sizeof(struct ipmi_rsp) || rsp->resp.completion_code) { - printk(BIOS_ERR, "IPMI: %s command failed (ret=%d resp=0x%x)\n", - __func__, ret, rsp->resp.completion_code); - return 1; - } - if (ret != sizeof(*rsp)) { - printk(BIOS_ERR, "IPMI: %s response truncated\n", __func__); - return 1; - } - - return 0; -} - -enum cb_err ipmi_kcs_premem_init(const u16 port, const u16 device) -{ - const struct drivers_ipmi_config *conf = NULL; - struct ipmi_selftest_rsp selftestrsp = {0}; - uint8_t retry_count; - const struct device *dev; - - /* Find IPMI PNP device from devicetree in romstage */ - dev = dev_find_slot_pnp(port, device); - - if (!dev) { - printk(BIOS_ERR, "IPMI: Cannot find PNP device port: %x, device %x\n", - port, device); - return CB_ERR; - } - if (!dev->enabled) { - printk(BIOS_ERR, "IPMI: device is not enabled\n"); - return CB_ERR; - } - printk(BIOS_DEBUG, "IPMI: romstage PNP KCS 0x%x\n", dev->path.pnp.port); - if (dev->chip_info) - conf = dev->chip_info; - - if (conf && conf->wait_for_bmc && conf->bmc_boot_timeout) { - struct stopwatch sw; - stopwatch_init_msecs_expire(&sw, conf->bmc_boot_timeout * 1000); - printk(BIOS_DEBUG, "IPMI: Waiting for BMC...\n"); - - while (!stopwatch_expired(&sw)) { - if (inb(dev->path.pnp.port) != 0xff) - break; - mdelay(100); - } - if (stopwatch_expired(&sw)) { - printk(BIOS_INFO, "IPMI: Waiting for BMC timed out\n"); - return CB_ERR; - } - } - - printk(BIOS_INFO, "Get BMC self test result..."); - if (conf && conf->bmc_boot_timeout) { - for (retry_count = 0; retry_count < conf->bmc_boot_timeout; retry_count++) { - if (!ipmi_get_bmc_self_test_result(dev, &selftestrsp)) - break; - - mdelay(1000); - } - } else { - /* At least run once */ - ipmi_get_bmc_self_test_result(dev, &selftestrsp); - } - - int ret = CB_ERR; - switch (selftestrsp.result) { - case IPMI_APP_SELFTEST_NO_ERROR: /* 0x55 */ - printk(BIOS_DEBUG, "No Error\n"); - ret = CB_SUCCESS; - break; - case IPMI_APP_SELFTEST_NOT_IMPLEMENTED: /* 0x56 */ - printk(BIOS_DEBUG, "Function Not Implemented\n"); - ret = CB_SUCCESS; - break; - case IPMI_APP_SELFTEST_ERROR: /* 0x57 */ - printk(BIOS_ERR, "Corrupted or inaccessible data or device\n"); - break; - case IPMI_APP_SELFTEST_FATAL_HW_ERROR: /* 0x58 */ - printk(BIOS_ERR, "Fatal Hardware Error\n"); - break; - case IPMI_APP_SELFTEST_RESERVED: /* 0xFF */ - printk(BIOS_DEBUG, "Reserved\n"); - ret = CB_SUCCESS; - break; - - default: /* Other Device Specific Hardware Error */ - printk(BIOS_ERR, "Device Specific Error 0x%x 0x%x\n", selftestrsp.result, - selftestrsp.param); - break; - } - return ret; -} diff --git a/src/drivers/ipmi/ipmi_ops.c b/src/drivers/ipmi/ipmi_ops.c index 73a02e1f37c..d9b3256eec3 100644 --- a/src/drivers/ipmi/ipmi_ops.c +++ b/src/drivers/ipmi/ipmi_ops.c @@ -2,6 +2,7 @@ #include #include "ipmi_ops.h" +#include "ipmi_if.h" #include #include @@ -18,7 +19,7 @@ enum cb_err ipmi_init_and_start_bmc_wdt(const int port, uint16_t countdown, /* clear BIOS FRB2 expiration flag */ req.timer_use_expiration_flags_clr = 2; req.initial_countdown_val = countdown; - ret = ipmi_kcs_message(port, IPMI_NETFN_APPLICATION, 0x0, + ret = ipmi_message(port, IPMI_NETFN_APPLICATION, 0x0, IPMI_BMC_SET_WDG_TIMER, (const unsigned char *) &req, sizeof(req), (unsigned char *) &rsp, sizeof(rsp)); @@ -32,7 +33,7 @@ enum cb_err ipmi_init_and_start_bmc_wdt(const int port, uint16_t countdown, } /* Reset command to start timer */ - ret = ipmi_kcs_message(port, IPMI_NETFN_APPLICATION, 0x0, + ret = ipmi_message(port, IPMI_NETFN_APPLICATION, 0x0, IPMI_BMC_RESET_WDG_TIMER, NULL, 0, (unsigned char *) &rsp, sizeof(rsp)); @@ -56,7 +57,7 @@ enum cb_err ipmi_stop_bmc_wdt(const int port) struct ipmi_rsp resp; /* Get current timer first */ - ret = ipmi_kcs_message(port, IPMI_NETFN_APPLICATION, 0x0, + ret = ipmi_message(port, IPMI_NETFN_APPLICATION, 0x0, IPMI_BMC_GET_WDG_TIMER, NULL, 0, (unsigned char *) &rsp, sizeof(rsp)); @@ -76,7 +77,7 @@ enum cb_err ipmi_stop_bmc_wdt(const int port) rsp.data.timer_use &= ~(1 << 6); rsp.data.initial_countdown_val = 0; req = rsp.data; - ret = ipmi_kcs_message(port, IPMI_NETFN_APPLICATION, 0x0, + ret = ipmi_message(port, IPMI_NETFN_APPLICATION, 0x0, IPMI_BMC_SET_WDG_TIMER, (const unsigned char *) &req, sizeof(req), (unsigned char *) &resp, sizeof(resp)); @@ -104,7 +105,7 @@ enum cb_err ipmi_get_system_guid(const int port, uint8_t *uuid) return CB_ERR; } - ret = ipmi_kcs_message(port, IPMI_NETFN_APPLICATION, 0x0, + ret = ipmi_message(port, IPMI_NETFN_APPLICATION, 0x0, IPMI_BMC_GET_SYSTEM_GUID, NULL, 0, (unsigned char *) &rsp, sizeof(rsp)); @@ -128,7 +129,7 @@ enum cb_err ipmi_add_sel(const int port, struct sel_event_record *sel) return CB_ERR; } - ret = ipmi_kcs_message(port, IPMI_NETFN_STORAGE, 0x0, + ret = ipmi_message(port, IPMI_NETFN_STORAGE, 0x0, IPMI_ADD_SEL_ENTRY, (const unsigned char *) sel, 16, (unsigned char *) &rsp, sizeof(rsp)); diff --git a/src/drivers/ipmi/ipmi_ops.h b/src/drivers/ipmi/ipmi_ops.h index d900272e38a..7a92a28121a 100644 --- a/src/drivers/ipmi/ipmi_ops.h +++ b/src/drivers/ipmi/ipmi_ops.h @@ -4,7 +4,7 @@ #define __IPMI_OPS_H #include -#include "ipmi_kcs.h" +#include "ipmi_if.h" #define IPMI_BMC_RESET_WDG_TIMER 0x22 #define IPMI_BMC_SET_WDG_TIMER 0x24 #define IPMI_BMC_GET_WDG_TIMER 0x25 diff --git a/src/drivers/ipmi/ipmi_ops_premem.c b/src/drivers/ipmi/ipmi_ops_premem.c new file mode 100644 index 00000000000..3aae0a8a3d6 --- /dev/null +++ b/src/drivers/ipmi/ipmi_ops_premem.c @@ -0,0 +1,63 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include +#include +#include + +#include "ipmi_if.h" +#include "chip.h" + +#if CONFIG(IPMI_BT) +#include "ipmi_bt.h" +#endif + +enum cb_err ipmi_premem_init(const u16 port, const u16 device) +{ + const struct drivers_ipmi_config *conf = NULL; + const struct device *dev; + + /* Find IPMI PNP device from devicetree in romstage */ + dev = dev_find_slot_pnp(port, device); + + if (!dev) { + printk(BIOS_ERR, "IPMI: Cannot find PNP device port: %x, device %x\n", + port, device); + return CB_ERR; + } + if (!dev->enabled) { + printk(BIOS_ERR, "IPMI: device is not enabled\n"); + return CB_ERR; + } + printk(BIOS_DEBUG, "IPMI: romstage PNP %s 0x%x\n", + CONFIG(IPMI_KCS) ? "KCS" : "BT", dev->path.pnp.port); + if (dev->chip_info) + conf = dev->chip_info; + + if (conf && conf->wait_for_bmc && conf->bmc_boot_timeout) { + struct stopwatch sw; + stopwatch_init_msecs_expire(&sw, conf->bmc_boot_timeout * 1000); + printk(BIOS_DEBUG, "IPMI: Waiting for BMC...\n"); + + while (!stopwatch_expired(&sw)) { + if (inb(dev->path.pnp.port) != 0xff) + break; + mdelay(100); + } + if (stopwatch_expired(&sw)) { + printk(BIOS_INFO, "IPMI: Waiting for BMC timed out\n"); + return CB_ERR; + } + } + + if (ipmi_process_self_test_result(dev)) + return CB_ERR; + +#if CONFIG(IPMI_BT) + if (ipmi_bt_clear(dev->path.pnp.port)) + return CB_ERR; +#endif + + return CB_SUCCESS; +} diff --git a/src/drivers/ipmi/ocp/ipmi_ocp.c b/src/drivers/ipmi/ocp/ipmi_ocp.c index 11161a8ae47..9f583be934e 100644 --- a/src/drivers/ipmi/ocp/ipmi_ocp.c +++ b/src/drivers/ipmi/ocp/ipmi_ocp.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include @@ -28,8 +28,9 @@ static enum cb_err ipmi_set_ppin(struct device *dev) req.cpu1_lo = xeon_sp_ppin[1].lo; req.cpu1_hi = xeon_sp_ppin[1].hi; } - ret = ipmi_kcs_message(dev->path.pnp.port, IPMI_NETFN_OEM, 0x0, IPMI_OEM_SET_PPIN, - (const unsigned char *) &req, sizeof(req), (unsigned char *) &rsp, sizeof(rsp)); + ret = ipmi_message(dev->path.pnp.port, IPMI_NETFN_OEM, 0x0, IPMI_OEM_SET_PPIN, + (const unsigned char *) &req, sizeof(req), + (unsigned char *) &rsp, sizeof(rsp)); if (ret < sizeof(struct ipmi_rsp) || rsp.completion_code) { printk(BIOS_ERR, "IPMI: %s command failed (ret=%d resp=0x%x)\n", diff --git a/src/drivers/ipmi/ocp/ipmi_ocp_romstage.c b/src/drivers/ipmi/ocp/ipmi_ocp_romstage.c index 8e43d8d159e..7b0b9ea0360 100644 --- a/src/drivers/ipmi/ocp/ipmi_ocp_romstage.c +++ b/src/drivers/ipmi/ocp/ipmi_ocp_romstage.c @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ #include -#include +#include #include "ipmi_ocp.h" @@ -10,9 +10,9 @@ enum cb_err ipmi_set_post_start(const int port) int ret; struct ipmi_rsp rsp; - ret = ipmi_kcs_message(port, IPMI_NETFN_OEM, 0x0, - IPMI_BMC_SET_POST_START, NULL, 0, (u8 *) &rsp, - sizeof(rsp)); + ret = ipmi_message(port, IPMI_NETFN_OEM, 0x0, + IPMI_BMC_SET_POST_START, NULL, 0, (u8 *) &rsp, + sizeof(rsp)); if (ret < sizeof(struct ipmi_rsp) || rsp.completion_code) { printk(BIOS_ERR, "IPMI: %s command failed (ret=%d rsp=0x%x)\n", @@ -42,10 +42,10 @@ enum cb_err ipmi_set_cmos_clear(void) /* IPMI OEM get bios boot order command to check if the valid bit and the CMOS clear bit are both set from the response BootMode byte. */ - ret = ipmi_kcs_message(CONFIG_BMC_KCS_BASE, IPMI_NETFN_OEM, 0x0, - IPMI_OEM_GET_BIOS_BOOT_ORDER, - NULL, 0, - (unsigned char *) &rsp, sizeof(rsp)); + ret = ipmi_message(CONFIG_BMC_KCS_BASE, IPMI_NETFN_OEM, 0x0, + IPMI_OEM_GET_BIOS_BOOT_ORDER, + NULL, 0, + (unsigned char *) &rsp, sizeof(rsp)); if (ret < sizeof(struct ipmi_rsp) || rsp.resp.completion_code) { printk(BIOS_ERR, "IPMI: %s command failed (read ret=%d resp=0x%x)\n", @@ -56,10 +56,10 @@ enum cb_err ipmi_set_cmos_clear(void) if (!IS_CMOS_AND_VALID_BIT(rsp.data.boot_mode)) { req = rsp.data; SET_CMOS_AND_VALID_BIT(req.boot_mode); - ret = ipmi_kcs_message(CONFIG_BMC_KCS_BASE, IPMI_NETFN_OEM, 0x0, - IPMI_OEM_SET_BIOS_BOOT_ORDER, - (const unsigned char *) &req, sizeof(req), - (unsigned char *) &rsp, sizeof(rsp)); + ret = ipmi_message(CONFIG_BMC_KCS_BASE, IPMI_NETFN_OEM, 0x0, + IPMI_OEM_SET_BIOS_BOOT_ORDER, + (const unsigned char *) &req, sizeof(req), + (unsigned char *) &rsp, sizeof(rsp)); if (ret < sizeof(struct ipmi_rsp) || rsp.resp.completion_code) { printk(BIOS_ERR, "IPMI: %s command failed (sent ret=%d resp=0x%x)\n", diff --git a/src/drivers/ipmi/supermicro_oem.c b/src/drivers/ipmi/supermicro_oem.c index 9d5ffc77a46..7af4e3b8a47 100644 --- a/src/drivers/ipmi/supermicro_oem.c +++ b/src/drivers/ipmi/supermicro_oem.c @@ -3,7 +3,7 @@ #include #include -#include +#include #include #include #include "ipmi_supermicro_oem.h" @@ -35,9 +35,9 @@ static void set_coreboot_ver(const uint16_t kcs_port) bios_ver.str[i] = 0; bios_ver.ver = IPMI_LUN0_AC_SET_BIOS_VER; - ret = ipmi_kcs_message(kcs_port, IPMI_NETFN_OEM, 0, IPMI_LUN0_SET_BIOS_STRING, - (const unsigned char *) &bios_ver, sizeof(bios_ver), - (unsigned char *) &rsp, sizeof(rsp)); + ret = ipmi_message(kcs_port, IPMI_NETFN_OEM, 0, IPMI_LUN0_SET_BIOS_STRING, + (const unsigned char *) &bios_ver, sizeof(bios_ver), + (unsigned char *) &rsp, sizeof(rsp)); if (ret < sizeof(rsp) || rsp.completion_code) { printk(BIOS_ERR, "BMC_IPMI: %s command failed (ret=%d resp=0x%x)\n", __func__, ret, rsp.completion_code); @@ -54,9 +54,9 @@ static void set_coreboot_date(const uint16_t kcs_port) bios_ver.str[15] = 0; bios_ver.ver = IPMI_LUN0_AC_SET_BIOS_DATE; - ret = ipmi_kcs_message(kcs_port, IPMI_NETFN_OEM, 0, IPMI_LUN0_SET_BIOS_STRING, - (const unsigned char *) &bios_ver, sizeof(bios_ver), - (unsigned char *) &rsp, sizeof(rsp)); + ret = ipmi_message(kcs_port, IPMI_NETFN_OEM, 0, IPMI_LUN0_SET_BIOS_STRING, + (const unsigned char *) &bios_ver, sizeof(bios_ver), + (unsigned char *) &rsp, sizeof(rsp)); if (ret < sizeof(rsp) || rsp.completion_code) { printk(BIOS_ERR, "BMC_IPMI: %s command failed (ret=%d resp=0x%x)\n", __func__, ret, rsp.completion_code); diff --git a/src/include/cpu/power/istep_10.h b/src/include/cpu/power/istep_10.h new file mode 100644 index 00000000000..5c74840b90c --- /dev/null +++ b/src/include/cpu/power/istep_10.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef CPU_PPC64_ISTEP10_H +#define CPU_PPC64_ISTEP10_H + +#include + +struct pci_info; + +void istep_10_1(uint8_t chips); +void istep_10_6(uint8_t chips); +void istep_10_10(uint8_t chips, struct pci_info *pci_info); +void istep_10_12(uint8_t chips); +void istep_10_13(uint8_t chips); + +#endif /* CPU_PPC64_ISTEP10_H */ diff --git a/src/include/cpu/power/istep_13.h b/src/include/cpu/power/istep_13.h new file mode 100644 index 00000000000..f321a8d6cb1 --- /dev/null +++ b/src/include/cpu/power/istep_13.h @@ -0,0 +1,307 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include +#include +#include +#include + +/* These should be in one of the SPD headers. */ +/* + * Note: code in 13.3 depends on width/density having values as encoded in SPD + * and below. Please do not change them. + */ +#define WIDTH_x4 0 +#define WIDTH_x8 1 + +#define DENSITY_256Mb 0 +#define DENSITY_512Mb 1 +#define DENSITY_1Gb 2 +#define DENSITY_2Gb 3 +#define DENSITY_4Gb 4 +#define DENSITY_8Gb 5 +#define DENSITY_16Gb 6 +#define DENSITY_32Gb 7 + +#define PSEC_PER_NSEC 1000 +#define PSEC_PER_USEC 1000000 + +/* Values are the same across all supported speed bins */ +static const int tMRD = 8; +static const int tMOD = 24; +static const int tZQinit = 1024; + +typedef struct { + bool present; + uint8_t mranks; + uint8_t log_ranks; // In total, not per mrank + uint8_t width; + uint8_t density; + uint8_t *spd; + uint8_t rcd_i2c_addr; + uint16_t size_gb; // 2S8Rx4 8Gb DIMMs are 256GB +} rdimm_data_t; + +typedef struct { + bool functional; + rdimm_data_t dimm[DIMMS_PER_MCA]; + + /* + * The following fields are read and/or calculated from SPD obtained + * from DIMMs, but they are here because we can only set them per + * MCA/port/channel and not per DIMM. All units are clock cycles, + * absolute time values are rarely used. + */ + uint16_t nfaw; + uint16_t nras; + uint16_t nrfc; + uint16_t nrfc_dlr; // nRFC for Different Logical Rank (3DS only) + uint8_t cl; + uint8_t nccd_l; + uint8_t nwtr_s; + uint8_t nwtr_l; + uint8_t nrcd; + uint8_t nrp; + uint8_t nwr; + uint8_t nrrd_s; + uint8_t nrrd_l; +} mca_data_t; + +typedef struct { + bool functional; + mca_data_t mca[MCA_PER_MCS]; +} mcs_data_t; + +typedef struct { + /* Do we need 'bool functional' here as well? */ + mcs_data_t mcs[MCS_PER_PROC]; + + /* + * Unclear whether we can have different speeds between MCSs. + * Documentation says we can, but ring ID in 13.3 is sent per MCBIST. + * ATTR_MSS_FREQ is defined for SYSTEM target type, implying only one + * speed for whole platform. + * + * FIXME: maybe these should be in mcs_data_t and 13.3 should send + * a second Ring ID for the second MCS. How to test it? + */ + uint16_t speed; // MT/s + /* + * These depend just on memory frequency (and specification), and even + * though they describe DRAM/DIMM/MCA settings, there is no need to have + * multiple copies of identical data. + */ + uint16_t nrefi; // 7.8 us in normal temperature range (0-85 deg Celsius) + uint8_t cwl; + uint8_t nrtp; // max(4 nCK, 7.5 ns) = 7.5 ns for every supported speed +} mcbist_data_t; + +extern mcbist_data_t mem_data[MAX_CHIPS]; +static const chiplet_id_t mcs_ids[MCS_PER_PROC] = {MC01_CHIPLET_ID, MC23_CHIPLET_ID}; + +/* + * All time conversion functions assume that both MCSs have the same frequency. + * Change it if proven otherwise by adding a second argument - memory speed or + * MCS index. + * + * These functions should not be used before setting mem_data.speed to a valid + * non-0 value. + */ +static inline uint64_t tck_in_ps(uint8_t chip) +{ + /* + * Speed is in MT/s, we need to divide it by 2 to get MHz. + * tCK(avg) should be rounded down to the next valid speed bin, which + * corresponds to value obtained by using standardized MT/s values. + */ + return 1000000 / (mem_data[chip].speed / 2); +} + +static inline uint64_t ps_to_nck(uint8_t chip, uint64_t ps) +{ + /* Algorithm taken from JEDEC Standard No. 21-C */ + return ((ps * 1000 / tck_in_ps(chip)) + 974) / 1000; +} + +static inline uint64_t mtb_ftb_to_nck(uint8_t chip, uint64_t mtb, int8_t ftb) +{ + /* ftb is signed (always byte?) */ + return ps_to_nck(chip, mtb * 125 + ftb); +} + +static inline uint64_t ns_to_nck(uint8_t chip, uint64_t ns) +{ + return ps_to_nck(chip, ns * PSEC_PER_NSEC); +} + +static inline uint64_t nck_to_ps(uint8_t chip, uint64_t nck) +{ + return nck * tck_in_ps(chip); +} + +/* + * To be used in delays, so always round up. + * + * Microsecond is the best precision exposed by coreboot API. tCK is somewhere + * around 1 ns, so most smaller delays will be rounded up to 1 us. For better + * resolution we would have to read TBR (Time Base Register) directly. + */ +static inline uint64_t nck_to_us(uint8_t chip, uint64_t nck) +{ + return (nck_to_ps(chip, nck) + PSEC_PER_USEC - 1) / PSEC_PER_USEC; +} + +static inline void delay_nck(uint8_t chip, uint64_t nck) +{ + udelay(nck_to_us(chip, nck)); +} + +/* TODO: consider non-RMW variants */ +static inline void mca_and_or(uint8_t chip, chiplet_id_t mcs, int mca, uint64_t scom, + uint64_t and, uint64_t or) +{ + /* + * Indirect registers have different stride than the direct ones in + * general, except for (only?) direct PHY registers. + */ + unsigned mul = (scom & PPC_BIT(0) || + (scom & 0xFFFFF000) == 0x07011000) ? 0x400 : 0x40; + scom_and_or_for_chiplet(chip, mcs, scom + mca * mul, and, or); +} + +static inline void dp_mca_and_or(uint8_t chip, chiplet_id_t mcs, int dp, int mca, + uint64_t scom, uint64_t and, uint64_t or) +{ + mca_and_or(chip, mcs, mca, scom + dp * 0x40000000000, and, or); +} + +static inline uint64_t mca_read(uint8_t chip, chiplet_id_t mcs, int mca, uint64_t scom) +{ + /* Indirect registers have different stride than the direct ones in + * general, except for (only?) direct PHY registers. */ + unsigned mul = (scom & PPC_BIT(0) || + (scom & 0xFFFFF000) == 0x07011000) ? 0x400 : 0x40; + return read_scom_for_chiplet(chip, mcs, scom + mca * mul); +} + +static inline void mca_write(uint8_t chip, chiplet_id_t mcs, int mca, uint64_t scom, + uint64_t val) +{ + /* Indirect registers have different stride than the direct ones in + * general, except for (only?) direct PHY registers. */ + unsigned mul = (scom & PPC_BIT(0) || + (scom & 0xFFFFF000) == 0x07011000) ? 0x400 : 0x40; + write_scom_for_chiplet(chip, mcs, scom + mca * mul, val); +} +static inline uint64_t dp_mca_read(uint8_t chip, chiplet_id_t mcs, int dp, int mca, + uint64_t scom) +{ + return mca_read(chip, mcs, mca, scom + dp * 0x40000000000); +} + +enum rank_selection { + NO_RANKS = 0, + DIMM0_RANK0 = 1 << 0, + DIMM0_RANK1 = 1 << 1, + DIMM0_ALL_RANKS = DIMM0_RANK0 | DIMM0_RANK1, + DIMM1_RANK0 = 1 << 2, + DIMM1_RANK1 = 1 << 3, + DIMM1_ALL_RANKS = DIMM1_RANK0 | DIMM1_RANK1, + BOTH_DIMMS_1R = DIMM0_RANK0 | DIMM1_RANK0, + BOTH_DIMMS_2R = DIMM0_ALL_RANKS | DIMM1_ALL_RANKS +}; + +enum cal_config { + CAL_WR_LEVEL = PPC_BIT(48), + CAL_INITIAL_PAT_WR = PPC_BIT(49), + CAL_DQS_ALIGN = PPC_BIT(50), + CAL_RDCLK_ALIGN = PPC_BIT(51), + CAL_READ_CTR = PPC_BIT(52), + CAL_WRITE_CTR = PPC_BIT(53), + CAL_INITIAL_COARSE_WR = PPC_BIT(54), + CAL_COARSE_RD = PPC_BIT(55), + CAL_CUSTOM_RD = PPC_BIT(56), + CAL_CUSTOM_WR = PPC_BIT(57) +}; + +void ccs_add_instruction(uint8_t chip, chiplet_id_t id, mrs_cmd_t mrs, uint8_t csn, + uint8_t cke, uint16_t idles); +void ccs_add_mrs(uint8_t chip, chiplet_id_t id, mrs_cmd_t mrs, enum rank_selection ranks, + int mirror, uint16_t idles); +void ccs_phy_hw_step(uint8_t chip, chiplet_id_t id, int mca_i, int rp, enum cal_config conf, + uint64_t step_cycles); +void ccs_execute(uint8_t chip, chiplet_id_t id, int mca_i); + +static inline enum ddr4_mr5_rtt_park vpd_to_rtt_park(uint8_t vpd) +{ + /* Fun fact: this is 240/vpd with bit order reversed */ + switch (vpd) { + case 34: + return DDR4_MR5_RTT_PARK_RZQ_7; + case 40: + return DDR4_MR5_RTT_PARK_RZQ_6; + case 48: + return DDR4_MR5_RTT_PARK_RZQ_5; + case 60: + return DDR4_MR5_RTT_PARK_RZQ_4; + case 80: + return DDR4_MR5_RTT_PARK_RZQ_3; + case 120: + return DDR4_MR5_RTT_PARK_RZQ_2; + case 240: + return DDR4_MR5_RTT_PARK_RZQ_1; + default: + return DDR4_MR5_RTT_PARK_OFF; + } +} + +static inline enum ddr4_mr2_rtt_wr vpd_to_rtt_wr(uint8_t vpd) +{ + switch (vpd) { + case 0: + return DDR4_MR2_RTT_WR_OFF; + case 80: + return DDR4_MR2_RTT_WR_RZQ_3; + case 120: + return DDR4_MR2_RTT_WR_RZQ_2; + case 240: + return DDR4_MR2_RTT_WR_RZQ_1; + default: + /* High-Z is 1 in VPD */ + return DDR4_MR2_RTT_WR_HI_Z; + } +} + +static inline enum ddr4_mr1_rtt_nom vpd_to_rtt_nom(uint8_t vpd) +{ + /* Fun fact: this is 240/vpd with bit order reversed */ + switch (vpd) { + case 34: + return DDR4_MR1_RTT_NOM_RZQ_7; + case 40: + return DDR4_MR1_RTT_NOM_RZQ_6; + case 48: + return DDR4_MR1_RTT_NOM_RZQ_5; + case 60: + return DDR4_MR1_RTT_NOM_RZQ_4; + case 80: + return DDR4_MR1_RTT_NOM_RZQ_3; + case 120: + return DDR4_MR1_RTT_NOM_RZQ_2; + case 240: + return DDR4_MR1_RTT_NOM_RZQ_1; + default: + return DDR4_MR1_RTT_NOM_OFF; + } +} + +void istep_13_2(uint8_t chips); +void istep_13_3(uint8_t chips); +void istep_13_4(uint8_t chips); +void istep_13_6(uint8_t chips); +void istep_13_8(uint8_t chips); // TODO: takes MSS data from 7.4 +void istep_13_9(uint8_t chips); +void istep_13_10(uint8_t chips); +void istep_13_11(uint8_t chips); +void istep_13_13(uint8_t chips); diff --git a/src/include/cpu/power/istep_14.h b/src/include/cpu/power/istep_14.h new file mode 100644 index 00000000000..877114f930b --- /dev/null +++ b/src/include/cpu/power/istep_14.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef CPU_PPC64_ISTEP_14_H +#define CPU_PPC64_ISTEP_14_H + +#include + +struct pci_info; + +void istep_14_1(uint8_t chips); +void istep_14_2(uint8_t chips); +void istep_14_3(uint8_t chips, const struct pci_info *pci_info); +void istep_14_5(uint8_t chips); + +#endif /* CPU_PPC64_ISTEP_14_H */ diff --git a/src/include/cpu/power/istep_18.h b/src/include/cpu/power/istep_18.h new file mode 100644 index 00000000000..bc43d2fc0e9 --- /dev/null +++ b/src/include/cpu/power/istep_18.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef CPU_PPC64_ISTEP18_H +#define CPU_PPC64_ISTEP18_H + +#include + +void istep_18_11(uint8_t chips, uint8_t *mdmt); +void istep_18_12(uint8_t chips, uint8_t mdmt); + +#endif /* CPU_PPC64_ISTEP18_H */ diff --git a/src/include/cpu/power/istep_8.h b/src/include/cpu/power/istep_8.h new file mode 100644 index 00000000000..4eaef81a9f8 --- /dev/null +++ b/src/include/cpu/power/istep_8.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef CPU_PPC64_ISTEP8_H +#define CPU_PPC64_ISTEP8_H + +#include + +void istep_8_1(uint8_t chips); +void istep_8_2(uint8_t chips); +void istep_8_3(uint8_t chips); +void istep_8_4(uint8_t chips); +void istep_8_9(uint8_t chips); +void istep_8_10(uint8_t chips); +void istep_8_11(uint8_t chips); + +#endif /* CPU_PPC64_ISTEP8_H */ diff --git a/src/include/cpu/power/istep_9.h b/src/include/cpu/power/istep_9.h new file mode 100644 index 00000000000..ef5c3484072 --- /dev/null +++ b/src/include/cpu/power/istep_9.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef CPU_PPC64_ISTEP9_H +#define CPU_PPC64_ISTEP9_H + +#include + +void istep_9_2(uint8_t chips); +void istep_9_4(uint8_t chips); +void istep_9_6(uint8_t chips); +void istep_9_7(uint8_t chips); + +#endif /* CPU_PPC64_ISTEP8_H */ diff --git a/src/include/cpu/power/memd.h b/src/include/cpu/power/memd.h new file mode 100644 index 00000000000..9c26b4bc5f6 --- /dev/null +++ b/src/include/cpu/power/memd.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef CPU_PPC64_MEMD_H +#define CPU_PPC64_MEMD_H + +struct region_device; + +void memd_device_init(void); + +void memd_device_unmount(void); + +const struct region_device *memd_device_ro(void); + +#endif /* CPU_PPC64_MEMD_H */ diff --git a/src/include/cpu/power/mvpd.h b/src/include/cpu/power/mvpd.h new file mode 100644 index 00000000000..7da12a673c7 --- /dev/null +++ b/src/include/cpu/power/mvpd.h @@ -0,0 +1,74 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef CPU_PPC64_MVPD_H +#define CPU_PPC64_MVPD_H + +#include +#include + +/* Single bucket within #V keyword of version 3 */ +struct voltage_data { + uint16_t freq; // MHz + uint16_t vdd_voltage; + uint16_t idd_current; + uint16_t vcs_voltage; + uint16_t ics_current; +} __attribute__((__packed__)); + +/* Single bucket within #V keyword of version 3 */ +struct voltage_bucket_data { + uint8_t id; + + struct voltage_data nominal; + struct voltage_data powersave; + struct voltage_data turbo; + struct voltage_data ultra_turbo; + struct voltage_data powerbus; + + uint16_t sort_power_normal; + uint16_t sort_power_turbo; + + uint8_t reserved[6]; +} __attribute__((__packed__)); + +#define VOLTAGE_DATA_VERSION 3 +#define VOLTAGE_BUCKET_COUNT 6 + +/* #V of LRP[0-5] in MVPD */ +struct voltage_kwd { + uint8_t version; + uint8_t pnp[3]; + struct voltage_bucket_data buckets[VOLTAGE_BUCKET_COUNT]; +} __attribute__((__packed__)); + +struct region_device; + +void mvpd_device_init(void); + +void mvpd_device_unmount(void); + +const struct region_device *mvpd_device_ro(void); + +/* Reads #V of one of LRP records (mind that there is only one buffer) */ +const struct voltage_kwd *mvpd_get_voltage_data(uint8_t chip, int lrp); + +/* Sets pg[0] and pg[1] to partial good values for MC01_CHIPLET_ID and + * MC23_CHIPLET_ID respectively */ +void mvpd_get_mcs_pg(uint8_t chip, uint16_t *pg); + +/* Builds bitmask of functional cores based on Partial Good vector stored in PG + * keyword of CP00 record */ +uint64_t mvpd_get_available_cores(uint8_t chip); + +/* Finds a specific keyword in MVPD partition and extracts it. *size is updated + * to reflect needed or used space in the buffer. */ +bool mvpd_extract_keyword(uint8_t chip, const char *record_name, + const char *kwd_name, uint8_t *buf, uint32_t *size); + +/* Finds a specific ring in MVPD partition and extracts it */ +bool mvpd_extract_ring(uint8_t chip, const char *record_name, + const char *kwd_name, uint8_t chiplet_id, + uint8_t even_odd, uint16_t ring_id, + uint8_t *buf, uint32_t buf_size); + +#endif /* CPU_PPC64_MVPD_H */ diff --git a/src/include/cpu/power/occ.h b/src/include/cpu/power/occ.h new file mode 100644 index 00000000000..cd16a2a2b89 --- /dev/null +++ b/src/include/cpu/power/occ.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef CPU_PPC64_OCC_H +#define CPU_PPC64_OCC_H + +#include +#include + +struct homer_st; + +void clear_occ_special_wakeups(uint8_t chip, uint64_t cores); +void special_occ_wakeup_disable(uint8_t chip, uint64_t cores); +void occ_start_from_mem(uint8_t chip); +/* Moves OCC to active state */ +void activate_occ(uint8_t chip, struct homer_st *homer); + +void pm_occ_fir_init(uint8_t chip); +void pm_pba_fir_init(uint8_t chip); + +#endif /* CPU_PPC64_OCC_H */ diff --git a/src/include/cpu/power/powerbus.h b/src/include/cpu/power/powerbus.h new file mode 100644 index 00000000000..1c4298fdbce --- /dev/null +++ b/src/include/cpu/power/powerbus.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef CPU_PPC64_POWERBUS_H +#define CPU_PPC64_POWERBUS_H + +#include + +enum FABRIC_CORE_FLOOR_RATIO +{ + FABRIC_CORE_FLOOR_RATIO_RATIO_8_8 = 0x0, + FABRIC_CORE_FLOOR_RATIO_RATIO_7_8 = 0x1, + FABRIC_CORE_FLOOR_RATIO_RATIO_6_8 = 0x2, + FABRIC_CORE_FLOOR_RATIO_RATIO_5_8 = 0x3, + FABRIC_CORE_FLOOR_RATIO_RATIO_4_8 = 0x4, + FABRIC_CORE_FLOOR_RATIO_RATIO_2_8 = 0x5, +}; + +enum FABRIC_CORE_CEILING_RATIO +{ + FABRIC_CORE_CEILING_RATIO_RATIO_8_8 = 0x0, + FABRIC_CORE_CEILING_RATIO_RATIO_7_8 = 0x1, + FABRIC_CORE_CEILING_RATIO_RATIO_6_8 = 0x2, + FABRIC_CORE_CEILING_RATIO_RATIO_5_8 = 0x3, + FABRIC_CORE_CEILING_RATIO_RATIO_4_8 = 0x4, + FABRIC_CORE_CEILING_RATIO_RATIO_2_8 = 0x5, +}; + +#define NUM_EPSILON_READ_TIERS 3 +#define NUM_EPSILON_WRITE_TIERS 2 + +/* Description of PowerBus configuration */ +struct powerbus_cfg +{ + /* Data computed from #V of LRP0 in MVPD, is MHz */ + uint32_t freq_core_floor; + uint32_t freq_core_ceiling; + uint32_t fabric_freq; + + /* Derived from data above */ + enum FABRIC_CORE_FLOOR_RATIO core_floor_ratio; + enum FABRIC_CORE_CEILING_RATIO core_ceiling_ratio; + + /* Derived from all data above */ + /* ATTR_PROC_EPS_READ_CYCLES_T* */ + uint32_t eps_r[NUM_EPSILON_READ_TIERS]; + /* ATTR_PROC_EPS_WRITE_CYCLES_T* */ + uint32_t eps_w[NUM_EPSILON_WRITE_TIERS]; +}; + +const struct powerbus_cfg *powerbus_cfg(uint8_t chip); + +#endif // CPU_PPC64_POWERBUS_H diff --git a/src/include/cpu/power/proc.h b/src/include/cpu/power/proc.h new file mode 100644 index 00000000000..1af8ee48ebb --- /dev/null +++ b/src/include/cpu/power/proc.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __SOC_IBM_POWER9_PROC_H +#define __SOC_IBM_POWER9_PROC_H + +#include // PPC_BIT(), PPC_BITMASK() + +/* Maximum number of chips supported by the code. */ +#define MAX_CHIPS 2 + +_Static_assert(CONFIG_MAX_CPUS > 0, "At least one CPUs is needed"); +_Static_assert(CONFIG_MAX_CPUS <= MAX_CHIPS, "Too many CPUs requested"); + +#define MAX_CORES_PER_CHIP 24 +#define MAX_CORES_PER_EX 2 +#define MAX_QUADS_PER_CHIP (MAX_CORES_PER_CHIP / 4) +#define MAX_CMES_PER_CHIP (MAX_CORES_PER_CHIP / MAX_CORES_PER_EX) + +#define MCS_PER_PROC 2 +#define MCA_PER_MCS 2 +#define MCA_PER_PROC (MCA_PER_MCS * MCS_PER_PROC) +#define DIMMS_PER_MCA 2 +#define DIMMS_PER_MCS (DIMMS_PER_MCA * MCA_PER_MCS) +#define DIMMS_PER_PROC (DIMMS_PER_MCS * MCS_PER_PROC) + +#define I2C_BUSES_PER_CPU 4 +#define SPD_I2C_BUS 3 +#define FSI_I2C_BUS 8 // one bus to the second CPU + +/* cores is a 64-bit map of functional cores of a single chip */ +#define IS_EC_FUNCTIONAL(ec, cores) (!!((cores) & PPC_BIT(ec))) +#define IS_EX_FUNCTIONAL(ex, cores) (!!((cores) & PPC_BITMASK(2 * (ex), 2 * (ex) + 1))) +#define IS_EQ_FUNCTIONAL(eq, cores) (!!((cores) & PPC_BITMASK(4 * (eq), 4 * (eq) + 3))) + +/* Frequency of XBus for Nimbus */ +#define FREQ_X_MHZ 2000 + +/* + * This is for ATTR_PROC_FABRIC_PUMP_MODE == PUMP_MODE_CHIP_IS_GROUP, + * when chip ID is actually a group ID and "chip ID" field is zero. + * + * "nm" means non-mirrored. + */ +#define PROC_BASE_ADDR(chip, msel) ( \ + PPC_PLACE(0x0, 8, 5) | /* system ID */ \ + PPC_PLACE(msel, 13, 2) | /* msel (nm = 0b00/01, m = 0b10, mmio = 0b11) */ \ + PPC_PLACE(chip, 15, 4) | /* group ID */ \ + PPC_PLACE(0x0, 19, 3) /* chip ID */ \ + ) + +#endif /* __SOC_IBM_POWER9_PROC_H */ diff --git a/src/include/cpu/power/rom_media.h b/src/include/cpu/power/rom_media.h new file mode 100644 index 00000000000..630d7129100 --- /dev/null +++ b/src/include/cpu/power/rom_media.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef CPU_PPC64_ROM_MEDIA_H +#define CPU_PPC64_ROM_MEDIA_H + +struct mmap_helper_region_device; + +void mount_part_from_pnor(const char *part_name, + struct mmap_helper_region_device *mdev); + +#endif // CPU_PPC64_ROM_MEDIA_H diff --git a/src/include/cpu/power/scom.h b/src/include/cpu/power/scom.h index f5354c83148..19594782342 100644 --- a/src/include/cpu/power/scom.h +++ b/src/include/cpu/power/scom.h @@ -29,14 +29,9 @@ // F - Sat Offset (6 bits) [58:63] // Higher bits specify indirect address -#define XSCOM_ADDR_IND_FLAG PPC_BIT(0) -#define XSCOM_ADDR_IND_ADDR PPC_BITMASK(11, 31) -#define XSCOM_ADDR_IND_DATA PPC_BITMASK(48, 63) - #ifndef __ASSEMBLER__ #include #include -#include // TODO: these are probably specific to POWER9 typedef enum { @@ -88,75 +83,116 @@ typedef enum { EC23_CHIPLET_ID = 0x37 ///< Core23 chiplet (Quad5, EX11, C1) } chiplet_id_t; -void reset_scom_engine(void); - -uint64_t read_scom_direct(uint64_t reg_address); -void write_scom_direct(uint64_t reg_address, uint64_t data); - -uint64_t read_scom_indirect(uint64_t reg_address); -void write_scom_indirect(uint64_t reg_address, uint64_t data); - -static inline void write_scom(uint64_t addr, uint64_t data) +static const chiplet_id_t mcs_to_nest[] = { - if (addr & XSCOM_ADDR_IND_FLAG) - write_scom_indirect(addr, data); - else - write_scom_direct(addr, data); -} - -static inline uint64_t read_scom(uint64_t addr) + [MC01_CHIPLET_ID] = N3_CHIPLET_ID, + [MC23_CHIPLET_ID] = N1_CHIPLET_ID, +}; + +/* + * Usage of SCOM engines: + * - CPU0: + * - always XSCOM + * - CPU1++: + * - FSI SCOM before and during 8.4 (not needed, not implemented) + * - SBEIO SCOM after 8.4, before XSCOM is enabled in 10.1 + * - XSCOM after 10.1 + * + * Only romstage has to ever use anything else than XSCOM - bootblock doesn't + * access secondary CPUs at all and ramstage can use XSCOM from the beginning. + * SCOM dispatcher code is thus not compiled for stages other than romstage and + * assembly label is used to alias high-level functions directly to XSCOM engine + * implementation. + */ + +#if ENV_ROMSTAGE +void switch_secondary_scom_to_xscom(void); + +void write_scom(uint8_t chip, uint64_t addr, uint64_t data); +uint64_t read_scom(uint8_t chip, uint64_t addr); +#else +void write_scom(uint8_t chip, uint64_t addr, uint64_t data) asm("write_xscom"); +uint64_t read_scom(uint8_t chip, uint64_t addr) asm("read_xscom"); +#endif + +#if CONFIG(DEBUG_SCOM) && !defined(SKIP_SCOM_DEBUG) +#include + +#define write_scom(c, x, y) \ +({ \ + uint8_t __cw = (c); \ + uint64_t __xw = (x); \ + uint64_t __yw = (y); \ + printk(BIOS_SPEW, "SCOM W P%d %016llX %016llX\n", __cw, __xw, __yw); \ + write_scom(__cw, __xw, __yw); \ +}) + +#define read_scom(c, x) \ +({ \ + uint8_t __cr = (c); \ + uint64_t __xr = (x); \ + uint64_t __yr = read_scom(__cr, __xr); \ + printk(BIOS_SPEW, "SCOM R P%d %016llX %016llX\n", __cr, __xr, __yr); \ + __yr; \ +}) + +#endif + +static inline void scom_and_or(uint8_t chip, uint64_t addr, uint64_t and, uint64_t or) { - if (addr & XSCOM_ADDR_IND_FLAG) - return read_scom_indirect(addr); - else - return read_scom_direct(addr); + uint64_t data = read_scom(chip, addr); + write_scom(chip, addr, (data & and) | or); } -static inline void scom_and_or(uint64_t addr, uint64_t and, uint64_t or) +static inline void scom_and(uint8_t chip, int64_t addr, uint64_t and) { - uint64_t data = read_scom(addr); - write_scom(addr, (data & and) | or); + scom_and_or(chip, addr, and, 0); } -static inline void scom_and(uint64_t addr, uint64_t and) +static inline void scom_or(uint8_t chip, uint64_t addr, uint64_t or) { - scom_and_or(addr, and, 0); + scom_and_or(chip, addr, ~0, or); } -static inline void scom_or(uint64_t addr, uint64_t or) +static inline void write_scom_for_chiplet(uint8_t chip, chiplet_id_t chiplet, + uint64_t addr, uint64_t data) { - scom_and_or(addr, ~0, or); + addr &= ~PPC_BITMASK(34,39); + addr |= ((chiplet & 0x3F) << 24); + write_scom(chip, addr, data); } -static inline void write_scom_for_chiplet(chiplet_id_t chiplet, uint64_t addr, uint64_t data) +static inline uint64_t read_scom_for_chiplet(uint8_t chip, chiplet_id_t chiplet, uint64_t addr) { - addr &= ~PPC_BITMASK(34, 39); + addr &= ~PPC_BITMASK(34,39); addr |= ((chiplet & 0x3F) << 24); - write_scom(addr, data); + return read_scom(chip, addr); } -static inline uint64_t read_scom_for_chiplet(chiplet_id_t chiplet, uint64_t addr) +static inline void scom_and_or_for_chiplet(uint8_t chip, chiplet_id_t chiplet, + uint64_t addr, uint64_t and, uint64_t or) { - addr &= ~PPC_BITMASK(34, 39); - addr |= ((chiplet & 0x3F) << 24); - return read_scom(addr); + uint64_t data = read_scom_for_chiplet(chip, chiplet, addr); + write_scom_for_chiplet(chip, chiplet, addr, (data & and) | or); } -static inline void scom_and_or_for_chiplet(chiplet_id_t chiplet, uint64_t addr, - uint64_t and, uint64_t or) +static inline void scom_and_for_chiplet(uint8_t chip, chiplet_id_t chiplet, + uint64_t addr, uint64_t and) { - uint64_t data = read_scom_for_chiplet(chiplet, addr); - write_scom_for_chiplet(chiplet, addr, (data & and) | or); + scom_and_or_for_chiplet(chip, chiplet, addr, and, 0); } -static inline void scom_and_for_chiplet(chiplet_id_t chiplet, uint64_t addr, uint64_t and) +static inline void scom_or_for_chiplet(uint8_t chip, chiplet_id_t chiplet, + uint64_t addr, uint64_t or) { - scom_and_or_for_chiplet(chiplet, addr, and, 0); + scom_and_or_for_chiplet(chip, chiplet, addr, ~0, or); } -static inline void scom_or_for_chiplet(chiplet_id_t chiplet, uint64_t addr, uint64_t or) +static inline uint8_t get_dd(void) { - scom_and_or_for_chiplet(chiplet, addr, ~0, or); + uint64_t val = read_scom(0, 0xF000F); + val = ((val >> 52) & 0x0F) | ((val >> 56) & 0xF0); + return (uint8_t) val; } #endif /* __ASSEMBLER__ */ diff --git a/src/include/cpu/power/spr.h b/src/include/cpu/power/spr.h index 300147d7dda..2d36a02a5f7 100644 --- a/src/include/cpu/power/spr.h +++ b/src/include/cpu/power/spr.h @@ -5,18 +5,41 @@ #include // PPC_BIT() -#define SPR_TB 0x10C +#define SPR_DEC 22 +#define SPR_DEC_IMPLEMENTED_BITS 56 +#define SPR_DEC_LONGEST_TIME ((1ull << (SPR_DEC_IMPLEMENTED_BITS - 1)) - 1) -#define SPR_PVR 0x11F +#define SPR_SRR0 26 +#define SPR_SRR1 27 + +#define SPR_DAWR 180 +#define SPR_CIABR 187 +#define SPR_DAWRX 188 +#define SPR_TB 268 + +#define SPR_PVR 287 #define SPR_PVR_REV_MASK (PPC_BITMASK(52, 55) | PPC_BITMASK(60, 63)) #define SPR_PVR_REV(maj, min) (PPC_SHIFT((maj), 55) | PPC_SHIFT((min), 63)) -#define SPR_HSPRG0 0x130 -#define SPR_HSPRG1 0x131 +#define SPR_HSPRG0 304 +#define SPR_HSPRG1 305 -#define SPR_HRMOR 0x139 +#define SPR_HDEC 310 +#define SPR_HRMOR 313 -#define SPR_HMER 0x150 +#define SPR_LPCR 318 +#define SPR_LPCR_HVEE PPC_BIT(17) +#define SPR_LPCR_LD PPC_BIT(46) +#define SPR_LPCR_HDEE PPC_BIT(48) +#define SPR_LPCR_EEE PPC_BIT(49) +#define SPR_LPCR_DEE PPC_BIT(50) +#define SPR_LPCR_OEE PPC_BIT(51) +#define SPR_LPCR_HEIC PPC_BIT(59) +#define SPR_LPCR_HVICE PPC_BIT(62) +#define SPR_LPCR_HDICE PPC_BIT(63) + +#define SPR_HMER 336 +#define SPR_HMEER 337 /* Bits in HMER/HMEER */ #define SPR_HMER_MALFUNCTION_ALERT PPC_BIT(0) #define SPR_HMER_PROC_RECV_DONE PPC_BIT(2) @@ -34,6 +57,11 @@ #define SPR_HMER_XSCOM_STATUS PPC_BITMASK(21, 23) #define SPR_HMER_XSCOM_OCCUPIED PPC_BIT(23) +#define SPR_PTCR 464 +#define SPR_PSSCR 855 +#define SPR_PMCR 884 +#define SPR_PIR 1023 + #ifndef __ASSEMBLER__ #include @@ -66,9 +94,9 @@ static inline uint64_t read_msr(void) return val; } -static inline uint64_t pvr_revision(void) +static inline void write_msr(uint64_t val) { - return read_spr(SPR_PVR) & SPR_PVR_REV_MASK; + asm volatile("mtmsrd %0" :: "r"(val) : "memory"); } #endif /* __ASSEMBLER__ */ diff --git a/src/include/cpu/power/vpd.h b/src/include/cpu/power/vpd.h new file mode 100644 index 00000000000..461e142325f --- /dev/null +++ b/src/include/cpu/power/vpd.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef CPU_PPC64_VPD_H +#define CPU_PPC64_VPD_H + +#include +#include + +#define VPD_RECORD_NAME_LEN 4 +#define VPD_RECORD_SIZE_LEN 2 +#define VPD_KWD_NAME_LEN 2 + +void vpd_pnor_main(void); + +/* Finds a keyword by its name. Retrieves its size too. Returns NULL on + * failure. */ +const uint8_t *vpd_find_kwd(const uint8_t *record, const char *record_name, + const char *kwd_name, size_t *size); + +#endif /* CPU_PPC64_VPD_H */ diff --git a/src/include/cpu/power/vpd_data.h b/src/include/cpu/power/vpd_data.h new file mode 100644 index 00000000000..20fd8f3bb47 --- /dev/null +++ b/src/include/cpu/power/vpd_data.h @@ -0,0 +1,149 @@ +#ifndef CPU_PPC64_VPD_DATA_H +#define CPU_PPC64_VPD_DATA_H + +/* Memory rotator data */ + +/* FIXME: these can be updated by MVPD in istep 7.5. Values below (from MEMD) + * are different than in documentation. */ +extern uint8_t ATTR_MSS_VPD_MR_TSYS_ADR[4]; +extern uint8_t ATTR_MSS_VPD_MR_TSYS_DATA[4]; + +/* This data is the same for all configurations */ +extern uint8_t ATTR_MSS_VPD_MR_DPHY_GPO; +extern uint8_t ATTR_MSS_VPD_MR_DPHY_RLO; +extern uint8_t ATTR_MSS_VPD_MR_DPHY_WLO; +extern uint8_t ATTR_MSS_VPD_MR_MC_2N_MODE_AUTOSET; + +/* + * 43 tables for 43 signals. These probably are platform specific so in the + * final version we should read this from VPD partition. Hardcoding it will make + * one less possible fault point. + * + * Also, VPD layout may change. Right npw Talos uses first version of layout, + * but there is a newer version with one additional field __in the middle__ of + * the structure. + * + * Order: + * - J0 - PROC 0 MCS 0, 1 DIMM, 1866 MT/s + * - J1 - PROC 0 MCS 1, 1 DIMM, 1866 MT/s + * - J2 - PROC 1 MCS 0, 1 DIMM, 1866 MT/s + * - J3 - PROC 1 MCS 1, 1 DIMM, 1866 MT/s + * - J4 - PROC 0 MCS 0, 2 DIMMs, 1866 MT/s + * - J5 + * - J6 + * - J7 - PROC 1 MCS 1, 2 DIMMs, 1866 MT/s + * - J8 - PROC 0 MCS 0, 1 DIMM, 2133 MT/s + * - J9 + * - JA + * - JB - PROC 1 MCS 1, 1 DIMM, 2133 MT/s + * - JC - PROC 0 MCS 0, 2 DIMMs, 2133 MT/s + * - JD + * - JE + * - JF - PROC 1 MCS 1, 2 DIMMs, 2133 MT/s + * - JG - PROC 0 MCS 0, 1 DIMM, 2400 MT/s + * - JH + * - JI + * - JJ - PROC 1 MCS 1, 1 DIMM, 2400 MT/s + * - JK - PROC 0 MCS 0, 2 DIMMs, 2400 MT/s + * - JL + * - JM + * - JN - PROC 1 MCS 1, 2 DIMMs, 2400 MT/s + * - JO - PROC 0 MCS 0, 1 DIMM, 2666 MT/s + * - JP + * - JQ + * - JR - PROC 1 MCS 1, 1 DIMM, 2666 MT/s + * + * 2 DIMMs, 2666 MT/s is not supported (this is ensured by prepare_dimm_data()). + */ +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_CSN0[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_ADDR_WEN_A14[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_ODT1[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_C0[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_BA1[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A10[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_ODT1[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_BA0[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A00[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_ODT0[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_ODT0[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_ADDR_CASN_A15[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A13[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_CSN1[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_D0_CLKN[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_D0_CLKP[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A17[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_C1[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_D1_CLKN[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_D1_CLKP[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_C2[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_CSN1[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A02[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_PAR[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_CSN0[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_ADDR_RASN_A16[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A08[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A05[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A03[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A01[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A04[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A07[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A09[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A06[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_CKE1[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A12[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_ACTN[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A11[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_BG0[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_CKE0[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_CKE1[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_BG1[28][MCA_PER_MCS]; +extern uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_CKE0[28][MCA_PER_MCS]; + +/* End of rotator data */ + +/* Memory terminator data */ + +/* + * VPD has per rank settings, but both ranks (if present) are the same. Order: + * - 1R in DIMM0 and no DIMM1 + * - 1R in both DIMMs + * - 2R in DIMM0 and no DIMM1 + * - 2R in both DIMMs + */ +extern uint32_t ATTR_MSS_VPD_MT_VREF_MC_RD[4]; +extern uint8_t ATTR_MSS_VPD_MT_VREF_DRAM_WR[4]; +extern uint8_t ATTR_MSS_VPD_MT_DRAM_RTT_PARK[4]; +extern uint8_t ATTR_MSS_VPD_MT_DRAM_RTT_WR[4]; +extern uint8_t ATTR_MSS_VPD_MT_DRAM_RTT_NOM[4]; + +/* + * Warning: this is not a 1:1 copy from VPD. + * + * VPD uses uint8_t [2][2][4] table, indexed as [MCA][DIMM][RANK]. It tries to + * be generic, but for RDIMMs only 2 ranks are supported. This format also + * allows for different settings across MCAs, but in Talos they are identical. + * + * Tables below are uint8_t [4][2][2], indexed as [rank config.][DIMM][RANK]. + * + * There are 4 rank configurations, see comments in ATTR_MSS_VPD_MT_VREF_MC_RD. + */ +extern uint8_t ATTR_MSS_VPD_MT_ODT_RD[4][2][2]; +extern uint8_t ATTR_MSS_VPD_MT_ODT_WR[4][2][2]; + +/* This data is the same for all configurations */ +extern uint8_t ATTR_MSS_VPD_MT_DRAM_DRV_IMP_DQ_DQS; +extern uint32_t ATTR_MSS_VPD_MT_MC_DQ_ACBOOST_RD_UP; +extern uint32_t ATTR_MSS_VPD_MT_MC_DQ_ACBOOST_WR_DOWN; +extern uint32_t ATTR_MSS_VPD_MT_MC_DQ_ACBOOST_WR_UP; +extern uint64_t ATTR_MSS_VPD_MT_MC_DQ_CTLE_CAP; +extern uint64_t ATTR_MSS_VPD_MT_MC_DQ_CTLE_RES; +extern uint8_t ATTR_MSS_VPD_MT_MC_DRV_IMP_CLK; +extern uint8_t ATTR_MSS_VPD_MT_MC_DRV_IMP_CMD_ADDR; +extern uint8_t ATTR_MSS_VPD_MT_MC_DRV_IMP_CNTL; +extern uint8_t ATTR_MSS_VPD_MT_MC_DRV_IMP_CSCID; +extern uint8_t ATTR_MSS_VPD_MT_MC_DRV_IMP_DQ_DQS; +extern uint8_t ATTR_MSS_VPD_MT_MC_RCV_IMP_DQ_DQS; +extern uint8_t ATTR_MSS_VPD_MT_PREAMBLE; +extern uint16_t ATTR_MSS_VPD_MT_WINDAGE_RD_CTR; + +#endif /* CPU_PPC64_VPD_DATA_H */ diff --git a/src/include/device/dram/ddr4.h b/src/include/device/dram/ddr4.h index ee7a1ea5f87..b185b36601f 100644 --- a/src/include/device/dram/ddr4.h +++ b/src/include/device/dram/ddr4.h @@ -74,4 +74,302 @@ enum cb_err spd_add_smbios17_ddr4(const u8 channel, const u8 slot, */ uint16_t ddr4_speed_mhz_to_reported_mts(uint16_t speed_mhz); +/** + * \brief Representation of an MRS command + * + * This represents an MRS command as seen by the DIMM. This is not a memory + * address that can be read to generate an MRS command. The mapping of CPU + * to memory pins is hardware-dependent. + * \n + * The idea is to generalize the MRS code, and only need a hardware-specific + * function to map the MRS bits to CPU address bits. An MRS command can be + * sent like: + * @code{.c} + * u32 addr; + * mrs_cmd_t mrs; + * chipset_enable_mrs_command_mode(); + * mrs = ddr4_get_mr0(rtt_wr, srt, asr, cwl) + * if (rank_has_mirrorred_pins) + * mrs = ddr4_mrs_mirror_pins(mrs); + * addr = chipset_specific_get_mrs_addr(mrs); + * volatile_read(addr); + * @endcode + * + * The MRS representation has the following structure: + * - cmd[17:0] = Address pins A[13:0] + * - cmd[21:20] = Bank address BA[1:0] + * - cmd[23:22] = Bank group BG[1:0] + * + * Address pins A[16:14] are always low for MRS commands. A17 is reserved for + * future use, cmd[19:18] is left as a placeholder in case it is needed. + */ +typedef u32 mrs_cmd_t; + +/* Swap A3<->A4, A5<->A6, A7<->A8, A11<->A13, BA0<->BA1, BG0<->BG1 */ +static inline mrs_cmd_t ddr4_mrs_mirror_pins(mrs_cmd_t x) +{ + x = (x & 0x5000A8) << 1 | + (x & 0xA00150) >> 1 | + (x & ~0xF001F8); + x = (x & 0x000800) << 2 | + (x & 0x002000) >> 2 | + (x & ~0x002800); + + return x; +} + +enum ddr4_mr0_mode { + DDR4_MR0_MODE_NORMAL = 0, + DDR4_MR0_MODE_TEST = 1, +}; +enum ddr4_mr0_dll_reset { + DDR4_MR0_DLL_RESET_NO = 0, + DDR4_MR0_DLL_RESET_YES = 1, +}; +enum ddr4_mr0_burst_type { + DDR4_MR0_BURST_TYPE_SEQUENTIAL = 0, + DDR4_MR0_BURST_TYPE_INTERLEAVED = 1, +}; +enum ddr4_mr0_burst_length { + DDR4_MR0_BURST_LENGTH_FIXED_8 = 0, + DDR4_MR0_BURST_LENGTH_OTF = 1, + DDR4_MR0_BURST_LENGTH_FIXED_4 = 2, +}; + +mrs_cmd_t ddr4_get_mr0(u8 write_recovery, + enum ddr4_mr0_dll_reset dll_reset, + enum ddr4_mr0_mode mode, + u8 cas, + enum ddr4_mr0_burst_type burst_type, + enum ddr4_mr0_burst_length burst_length); + +enum ddr4_mr1_qoff { + DDR4_MR1_QOFF_ENABLE = 0, + DDR4_MR1_QOFF_DISABLE = 1, +}; +enum ddr4_mr1_tqds { + DDR4_MR1_TQDS_DISABLE = 0, + DDR4_MR1_TQDS_ENABLE = 1, +}; +enum ddr4_mr1_rtt_nom { + DDR4_MR1_RTT_NOM_OFF = 0, + DDR4_MR1_RTT_NOM_RZQ_4 = 1, + DDR4_MR1_RTT_NOM_RZQ_2 = 2, + DDR4_MR1_RTT_NOM_RZQ_6 = 3, + DDR4_MR1_RTT_NOM_RZQ_1 = 4, + DDR4_MR1_RTT_NOM_RZQ_5 = 5, + DDR4_MR1_RTT_NOM_RZQ_3 = 6, + DDR4_MR1_RTT_NOM_RZQ_7 = 7, +}; +enum ddr4_mr1_write_leveling { + DDR4_MR1_WRLVL_DISABLE = 0, + DDR4_MR1_WRLVL_ENABLE = 1, +}; +enum ddr4_mr1_additive_latency { + DDR4_MR1_AL_DISABLE = 0, + DDR4_MR1_AL_CL_MINUS_1 = 1, + DDR4_MR1_AL_CL_MINUS_2 = 2, +}; +enum ddr4_mr1_odimp { + DDR4_MR1_ODIMP_RZQ_7 = 0, + DDR4_MR1_ODIMP_RZQ_5 = 1, +}; +enum ddr4_mr1_dll { + DDR4_MR1_DLL_DISABLE = 0, + DDR4_MR1_DLL_ENABLE = 1, +}; + +mrs_cmd_t ddr4_get_mr1(enum ddr4_mr1_qoff qoff, + enum ddr4_mr1_tqds tqds, + enum ddr4_mr1_rtt_nom rtt_nom, + enum ddr4_mr1_write_leveling write_leveling, + enum ddr4_mr1_odimp output_drive_impedance, + enum ddr4_mr1_additive_latency additive_latency, + enum ddr4_mr1_dll dll_enable); + +enum ddr4_mr2_wr_crc { + DDR4_MR2_WR_CRC_DISABLE = 0, + DDR4_MR2_WR_CRC_ENABLE = 1, +}; +enum ddr4_mr2_rtt_wr { + DDR4_MR2_RTT_WR_OFF = 0, + DDR4_MR2_RTT_WR_RZQ_2 = 1, + DDR4_MR2_RTT_WR_RZQ_1 = 2, + DDR4_MR2_RTT_WR_HI_Z = 3, + DDR4_MR2_RTT_WR_RZQ_3 = 4, +}; +enum ddr4_mr2_lp_asr { + DDR4_MR2_ASR_MANUAL_NORMAL_RANGE = 0, + DDR4_MR2_ASR_MANUAL_REDUCED_RANGE = 1, + DDR4_MR2_ASR_MANUAL_EXTENDED_RANGE = 2, + DDR4_MR2_ASR_AUTO = 3, +}; + +mrs_cmd_t ddr4_get_mr2(enum ddr4_mr2_wr_crc wr_crc, + enum ddr4_mr2_rtt_wr rtt_wr, + enum ddr4_mr2_lp_asr self_refresh, u8 cwl); + +enum ddr4_mr3_mpr_read_format { + DDR4_MR3_MPR_SERIAL = 0, + DDR4_MR3_MPR_PARALLEL = 1, + DDR4_MR3_MPR_STAGGERED = 2, +}; +enum ddr4_mr3_wr_cmd_lat_crc_dm { + DDR4_MR3_CRC_DM_4 = 0, + DDR4_MR3_CRC_DM_5 = 1, + DDR4_MR3_CRC_DM_6 = 2, +}; +enum ddr4_mr3_fine_gran_ref { + DDR4_MR3_FINE_GRAN_REF_NORMAL = 0, + DDR4_MR3_FINE_GRAN_REF_FIXED_2 = 1, + DDR4_MR3_FINE_GRAN_REF_FIXED_4 = 2, + /* Two reserved values */ + DDR4_MR3_FINE_GRAN_REF_OTF_2 = 5, + DDR4_MR3_FINE_GRAN_REF_OTF_4 = 6, +}; +enum ddr4_mr3_temp_sensor_readout { + DDR4_MR3_TEMP_SENSOR_DISABLE = 0, + DDR4_MR3_TEMP_SENSOR_ENABLE = 1, +}; +enum ddr4_mr3_pda { + DDR4_MR3_PDA_DISABLE = 0, + DDR4_MR3_PDA_ENABLE = 1, +}; +enum ddr4_mr3_geardown_mode { + DDR4_MR3_GEARDOWN_1_2_RATE = 0, + DDR4_MR3_GEARDOWN_1_4_RATE = 1, +}; +enum ddr4_mr3_mpr_operation { + DDR4_MR3_MPR_NORMAL = 0, + DDR4_MR3_MPR_MPR = 1, +}; + +mrs_cmd_t ddr4_get_mr3(enum ddr4_mr3_mpr_read_format mpr_read_format, + enum ddr4_mr3_wr_cmd_lat_crc_dm command_latency_crc_dm, + enum ddr4_mr3_fine_gran_ref fine_refresh, + enum ddr4_mr3_temp_sensor_readout temp_sensor, + enum ddr4_mr3_pda pda, + enum ddr4_mr3_geardown_mode geardown, + enum ddr4_mr3_mpr_operation mpr_operation, + u8 mpr_page); + +enum ddr4_mr4_hppr { + DDR4_MR4_HPPR_DISABLE = 0, + DDR4_MR4_HPPR_ENABLE = 1, +}; +enum ddr4_mr4_wr_preamble { + DDR4_MR4_WR_PREAMBLE_1 = 0, + DDR4_MR4_WR_PREAMBLE_2 = 1, +}; +enum ddr4_mr4_rd_preamble { + DDR4_MR4_RD_PREAMBLE_1 = 0, + DDR4_MR4_RD_PREAMBLE_2 = 1, +}; +enum ddr4_mr4_rd_preamble_training { + DDR4_MR4_RD_PREAMBLE_TRAINING_DISABLE = 0, + DDR4_MR4_RD_PREAMBLE_TRAINING_ENABLE = 1, +}; +enum ddr4_mr4_self_refr_abort { + DDR4_MR4_SELF_REFRESH_ABORT_DISABLE = 0, + DDR4_MR4_SELF_REFRESH_ABORT_ENABLE = 1, +}; +enum ddr4_mr4_cs_to_cmd_latency { + DDR4_MR4_CS_TO_CMD_LAT_DISABLE = 0, + DDR4_MR4_CS_TO_CMD_LAT_3 = 1, + DDR4_MR4_CS_TO_CMD_LAT_4 = 2, + DDR4_MR4_CS_TO_CMD_LAT_5 = 3, + DDR4_MR4_CS_TO_CMD_LAT_6 = 4, + DDR4_MR4_CS_TO_CMD_LAT_8 = 5, +}; +enum ddr4_mr4_sppr { + DDR4_MR4_SPPR_DISABLE = 0, + DDR4_MR4_SPPR_ENABLE = 1, +}; +enum ddr4_mr4_internal_vref_mon { + DDR4_MR4_INTERNAL_VREF_MON_DISABLE = 0, + DDR4_MR4_INTERNAL_VREF_MON_ENABLE = 1, +}; +enum ddr4_mr4_temp_controlled_refr { + DDR4_MR4_TEMP_CONTROLLED_REFR_DISABLE = 0, + DDR4_MR4_TEMP_CONTROLLED_REFR_NORMAL = 2, + DDR4_MR4_TEMP_CONTROLLED_REFR_EXTENDED = 3, +}; +enum ddr4_mr4_max_pd_mode { + DDR4_MR4_MAX_PD_MODE_DISABLE = 0, + DDR4_MR4_MAX_PD_MODE_ENABLE = 1, +}; + +mrs_cmd_t ddr4_get_mr4(enum ddr4_mr4_hppr hppr, + enum ddr4_mr4_wr_preamble wr_preamble, + enum ddr4_mr4_rd_preamble rd_preamble, + enum ddr4_mr4_rd_preamble_training rd_preamble_train, + enum ddr4_mr4_self_refr_abort self_ref_abrt, + enum ddr4_mr4_cs_to_cmd_latency cs2cmd_lat, + enum ddr4_mr4_sppr sppr, + enum ddr4_mr4_internal_vref_mon int_vref_mon, + enum ddr4_mr4_temp_controlled_refr temp_ctrl_ref, + enum ddr4_mr4_max_pd_mode max_pd); + +enum ddr4_mr5_rd_dbi { + DDR4_MR5_RD_DBI_DISABLE = 0, + DDR4_MR5_RD_DBI_ENABLE = 1, +}; +enum ddr4_mr5_wr_dbi { + DDR4_MR5_WR_DBI_DISABLE = 0, + DDR4_MR5_WR_DBI_ENABLE = 1, +}; +enum ddr4_mr5_data_mask { + DDR4_MR5_DATA_MASK_DISABLE = 0, + DDR4_MR5_DATA_MASK_ENABLE = 1, +}; +enum ddr4_mr5_rtt_park { + DDR4_MR5_RTT_PARK_OFF = 0, + DDR4_MR5_RTT_PARK_RZQ_4 = 1, + DDR4_MR5_RTT_PARK_RZQ_2 = 2, + DDR4_MR5_RTT_PARK_RZQ_6 = 3, + DDR4_MR5_RTT_PARK_RZQ_1 = 4, + DDR4_MR5_RTT_PARK_RZQ_5 = 5, + DDR4_MR5_RTT_PARK_RZQ_3 = 6, + DDR4_MR5_RTT_PARK_RZQ_7 = 7, +}; +enum ddr4_mr5_odt_pd { + DDR4_MR5_ODT_PD_ACTIVADED = 0, + DDR4_MR5_ODT_PD_DEACTIVADED = 1, +}; +enum ddr4_mr5_ca_parity_lat { + DDR4_MR5_CA_PARITY_LAT_DISABLE = 0, + DDR4_MR5_CA_PARITY_LAT_4 = 1, /* 1600-2133 MT/s */ + DDR4_MR5_CA_PARITY_LAT_5 = 2, /* 2400-2666 MT/s */ + DDR4_MR5_CA_PARITY_LAT_6 = 3, /* 2933-3200 MT/s */ + DDR4_MR5_CA_PARITY_LAT_8 = 4, /* RFU */ +}; + +mrs_cmd_t ddr4_get_mr5(enum ddr4_mr5_rd_dbi rd_dbi, + enum ddr4_mr5_wr_dbi wr_dbi, + enum ddr4_mr5_data_mask dm, + enum ddr4_mr5_rtt_park rtt_park, + enum ddr4_mr5_odt_pd odt_pd, + enum ddr4_mr5_ca_parity_lat pl); + +enum ddr4_mr6_vrefdq_training { + DDR4_MR6_VREFDQ_TRAINING_DISABLE = 0, + DDR4_MR6_VREFDQ_TRAINING_ENABLE = 1, +}; +enum ddr4_mr6_vrefdq_training_range { + DDR4_MR6_VREFDQ_TRAINING_RANGE_1 = 0, /* 60% to 92.50% in 0.65% steps */ + DDR4_MR6_VREFDQ_TRAINING_RANGE_2 = 1, /* 40% to 77.50% in 0.65% steps */ +}; + +mrs_cmd_t ddr4_get_mr6(u8 tccd_l, + enum ddr4_mr6_vrefdq_training vrefdq_training, + enum ddr4_mr6_vrefdq_training_range range, + u8 vrefdq_value); + +enum ddr4_zqcal_ls { + DDR4_ZQCAL_SHORT = 0, + DDR4_ZQCAL_LONG = 1, +}; + +mrs_cmd_t ddr4_get_zqcal_cmd(enum ddr4_zqcal_ls long_short); + #endif /* DEVICE_DRAM_DDR4L_H */ diff --git a/src/include/device/dram/rcd.h b/src/include/device/dram/rcd.h new file mode 100644 index 00000000000..86655dd7154 --- /dev/null +++ b/src/include/device/dram/rcd.h @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef DEVICE_DRAM_RCD_H +#define DEVICE_DRAM_RCD_H + +#include +#include +#include + +/* Maybe these should land in types.h */ +typedef uint32_t le_uint32_t; +typedef uint16_t le_uint16_t; + +enum rcw_idx { + VEN_ID_L, + VEN_ID_H, + DEV_ID_L, + DEV_ID_H, + REV_ID, + RES_05, + RES_06, + RES_07, + F0RC00_01, + F0RC02_03, + F0RC04_05, + F0RC06_07, + F0RC08_09, + F0RC0A_0B, + F0RC0C_0D, + F0RC0E_0F, + F0RC1x, + F0RC2x, + F0RC3x, + F0RC4x, + F0RC5x, + F0RC6x, + F0RC7x, + F0RC8x, + F0RC9x, + F0RCAx, + F0RCBx, + F0RCCx, + F0RCDx, + F0RCEx, + F0RCFx, + RCW_ALL, /* Total num of bytes */ + RCW_ALL_ALIGNED /* Total num of bytes after aligning to 4B */ +}; + +_Static_assert(RCW_ALL_ALIGNED % sizeof(uint32_t) == 0, + "RCW_ALL_ALIGNED is not aligned"); + +int rcd_write_reg(unsigned int bus, uint8_t slave, enum rcw_idx reg, + uint8_t data); +int rcd_write_32b(unsigned int bus, uint8_t slave, enum rcw_idx reg, + le_uint32_t data); + + +void dump_rcd(unsigned int bus, uint8_t addr); + +#endif /* DEVICE_DRAM_RCD_H */ diff --git a/src/include/memlayout.h b/src/include/memlayout.h index 1e9fca81980..dee5eaf3b6e 100644 --- a/src/include/memlayout.h +++ b/src/include/memlayout.h @@ -168,9 +168,9 @@ STR(vboot2 work buffer size must be equivalent to \ VB2_FIRMWARE_WORKBUF_RECOMMENDED_SIZE! (sz))); -#define TPM_TCPA_LOG(addr, size) \ - REGION(tpm_tcpa_log, addr, size, 16) \ - _ = ASSERT(size >= 2K, "tpm tcpa log buffer must be at least 2K!"); +#define TPM_LOG(addr, size) \ + REGION(tpm_log, addr, size, 16) \ + _ = ASSERT(size >= 2K, "tpm log buffer must be at least 2K!"); #if ENV_SEPARATE_VERSTAGE #define VERSTAGE(addr, sz) \ diff --git a/src/include/spd_bin.h b/src/include/spd_bin.h index 973eb498a7b..5133cff99bf 100644 --- a/src/include/spd_bin.h +++ b/src/include/spd_bin.h @@ -46,6 +46,7 @@ void print_spd_info(uint8_t spd[]); uintptr_t spd_cbfs_map(u8 spd_index); void dump_spd_info(struct spd_block *blk); void get_spd_smbus(struct spd_block *blk); +void get_spd_i2c(uint8_t bus, struct spd_block *blk); /* * get_spd_sn returns the SODIMM serial number. It only supports DDR3 and DDR4. diff --git a/src/include/symbols.h b/src/include/symbols.h index ee7c5031fa8..e2623b09f71 100644 --- a/src/include/symbols.h +++ b/src/include/symbols.h @@ -37,7 +37,7 @@ DECLARE_OPTIONAL_REGION(postram_cbfs_cache) DECLARE_OPTIONAL_REGION(cbfs_cache) DECLARE_REGION(cbfs_mcache) DECLARE_REGION(fmap_cache) -DECLARE_REGION(tpm_tcpa_log) +DECLARE_REGION(tpm_log) #if ENV_ROMSTAGE && CONFIG(ASAN_IN_ROMSTAGE) DECLARE_REGION(bss) @@ -77,6 +77,7 @@ DECLARE_OPTIONAL_REGION(opensbi) DECLARE_OPTIONAL_REGION(bl31) DECLARE_REGION(transfer_buffer) DECLARE_OPTIONAL_REGION(watchdog_tombstone) +DECLARE_REGION(mvpd_cache) /* Returns true when pre-RAM symbols are known to the linker. * (Does not necessarily mean that the memory is accessible.) */ diff --git a/src/lib/bootmem.c b/src/lib/bootmem.c index 078f9609325..7722d791866 100644 --- a/src/lib/bootmem.c +++ b/src/lib/bootmem.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -103,6 +104,7 @@ void bootmem_write_memory_table(struct lb_memory *mem) { const struct range_entry *r; struct lb_memory_range *lb_r; + uint32_t entry_size = le32toh(mem->size); lb_r = &mem->map[0]; @@ -110,14 +112,16 @@ void bootmem_write_memory_table(struct lb_memory *mem) bootmem_dump_ranges(); memranges_each_entry(r, &bootmem_os) { - lb_r->start = range_entry_base(r); - lb_r->size = range_entry_size(r); - lb_r->type = bootmem_to_lb_tag(range_entry_tag(r)); + lb_r->start = htole64(range_entry_base(r)); + lb_r->size = htole64(range_entry_size(r)); + lb_r->type = htole32(bootmem_to_lb_tag(range_entry_tag(r))); lb_r++; - mem->size += sizeof(struct lb_memory_range); + entry_size += sizeof(struct lb_memory_range); } + mem->size = htole32(entry_size); + table_written = 1; } diff --git a/src/lib/cbfs.c b/src/lib/cbfs.c index e1334f41528..40800005c13 100644 --- a/src/lib/cbfs.c +++ b/src/lib/cbfs.c @@ -176,7 +176,7 @@ static bool cbfs_file_hash_mismatch(const void *buffer, size_t size, } if (tspi_cbfs_measurement(mdata->h.filename, be32toh(mdata->h.type), hash)) - ERROR("failed to measure '%s' into TCPA log\n", mdata->h.filename); + ERROR("failed to measure '%s' into TPM log\n", mdata->h.filename); /* We intentionally continue to boot on measurement errors. */ } diff --git a/src/lib/cbmem_console.c b/src/lib/cbmem_console.c index ad3b99bbba9..7ca5008d45e 100644 --- a/src/lib/cbmem_console.c +++ b/src/lib/cbmem_console.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -57,9 +58,9 @@ static u8 static_console[STATIC_CONSOLE_SIZE]; static int buffer_valid(struct cbmem_console *cbm_cons_p, u32 total_space) { - return (cbm_cons_p->cursor & CURSOR_MASK) < cbm_cons_p->size && - cbm_cons_p->size <= MAX_SIZE && - cbm_cons_p->size == total_space - sizeof(struct cbmem_console); + return (le32toh(cbm_cons_p->cursor) & CURSOR_MASK) < le32toh(cbm_cons_p->size) && + le32toh(cbm_cons_p->size) <= MAX_SIZE && + le32toh(cbm_cons_p->size) == total_space - sizeof(struct cbmem_console); } static void init_console_ptr(void *storage, u32 total_space) @@ -72,7 +73,7 @@ static void init_console_ptr(void *storage, u32 total_space) } if (!buffer_valid(cbm_cons_p, total_space)) { - cbm_cons_p->size = total_space - sizeof(struct cbmem_console); + cbm_cons_p->size = htole32(total_space - sizeof(struct cbmem_console)); cbm_cons_p->cursor = 0; } @@ -102,16 +103,16 @@ void cbmemc_tx_byte(unsigned char data) if (!current_console || !current_console->size || console_paused) return; - u32 flags = current_console->cursor & ~CURSOR_MASK; - u32 cursor = current_console->cursor & CURSOR_MASK; + u32 flags = le32toh(current_console->cursor) & ~CURSOR_MASK; + u32 cursor = le32toh(current_console->cursor) & CURSOR_MASK; current_console->body[cursor++] = data; - if (cursor >= current_console->size) { + if (cursor >= le32toh(current_console->size)) { cursor = 0; flags |= OVERFLOW; } - current_console->cursor = flags | cursor; + current_console->cursor = htole32(flags | cursor); } /* @@ -128,17 +129,17 @@ static void copy_console_buffer(struct cbmem_console *src_cons_p) if (!src_cons_p) return; - if (src_cons_p->cursor & OVERFLOW) { + if (le32toh(src_cons_p->cursor) & OVERFLOW) { const char overflow_warning[] = "\n*** Pre-CBMEM " ENV_STRING " console overflowed, log truncated! ***\n"; for (c = 0; c < sizeof(overflow_warning) - 1; c++) cbmemc_tx_byte(overflow_warning[c]); - for (c = src_cons_p->cursor & CURSOR_MASK; - c < src_cons_p->size; c++) + for (c = le32toh(src_cons_p->cursor) & CURSOR_MASK; + c < le32toh(src_cons_p->size); c++) cbmemc_tx_byte(src_cons_p->body[c]); } - for (c = 0; c < (src_cons_p->cursor & CURSOR_MASK); c++) + for (c = 0; c < (le32toh(src_cons_p->cursor) & CURSOR_MASK); c++) cbmemc_tx_byte(src_cons_p->body[c]); /* Invalidate the source console, so it will be reinitialized on the @@ -184,9 +185,9 @@ void cbmem_dump_console_to_uart(void) console_index = get_uart_for_console(); uart_init(console_index); - if (current_console->cursor & OVERFLOW) { - for (cursor = current_console->cursor & CURSOR_MASK; - cursor < current_console->size; cursor++) { + if (le32toh(current_console->cursor) & OVERFLOW) { + for (cursor = le32toh(current_console->cursor) & CURSOR_MASK; + cursor < le32toh(current_console->size); cursor++) { if (BIOS_LOG_IS_MARKER(current_console->body[cursor])) continue; if (current_console->body[cursor] == '\n') @@ -194,7 +195,8 @@ void cbmem_dump_console_to_uart(void) uart_tx_byte(console_index, current_console->body[cursor]); } } - for (cursor = 0; cursor < (current_console->cursor & CURSOR_MASK); cursor++) { + for (cursor = 0; cursor < (le32toh(current_console->cursor) & CURSOR_MASK); + cursor++) { if (BIOS_LOG_IS_MARKER(current_console->body[cursor])) continue; if (current_console->body[cursor] == '\n') @@ -212,12 +214,13 @@ void cbmem_dump_console(void) console_paused = true; - if (current_console->cursor & OVERFLOW) - for (cursor = current_console->cursor & CURSOR_MASK; - cursor < current_console->size; cursor++) + if (le32toh(current_console->cursor) & OVERFLOW) + for (cursor = le32toh(current_console->cursor) & CURSOR_MASK; + cursor < le32toh(current_console->size); cursor++) if (!BIOS_LOG_IS_MARKER(current_console->body[cursor])) do_putchar(current_console->body[cursor]); - for (cursor = 0; cursor < (current_console->cursor & CURSOR_MASK); cursor++) + for (cursor = 0; cursor < (le32toh(current_console->cursor) & CURSOR_MASK); + cursor++) if (!BIOS_LOG_IS_MARKER(current_console->body[cursor])) do_putchar(current_console->body[cursor]); diff --git a/src/lib/coreboot_table.c b/src/lib/coreboot_table.c index 2a7ccc5f55c..a818704a986 100644 --- a/src/lib/coreboot_table.c +++ b/src/lib/coreboot_table.c @@ -23,6 +23,7 @@ #include #include #include +#include #if CONFIG(USE_OPTION_TABLE) #include @@ -49,7 +50,7 @@ static struct lb_header *lb_table_init(unsigned long addr) header->signature[1] = 'B'; header->signature[2] = 'I'; header->signature[3] = 'O'; - header->header_bytes = sizeof(*header); + header->header_bytes = htole32(sizeof(*header)); header->header_checksum = 0; header->table_bytes = 0; header->table_checksum = 0; @@ -68,7 +69,7 @@ static struct lb_record *lb_last_record(struct lb_header *header) { struct lb_record *rec; rec = (void *)(((char *)header) + sizeof(*header) - + header->table_bytes); + + le32toh(header->table_bytes)); return rec; } @@ -77,11 +78,12 @@ struct lb_record *lb_new_record(struct lb_header *header) struct lb_record *rec; rec = lb_last_record(header); if (header->table_entries) - header->table_bytes += rec->size; + header->table_bytes = htole32(le32toh(header->table_bytes) + + le32toh(rec->size)); rec = lb_last_record(header); - header->table_entries++; - rec->tag = LB_TAG_UNUSED; - rec->size = sizeof(*rec); + header->table_entries = htole32(le32toh(header->table_entries) + 1); + rec->tag = htole32(LB_TAG_UNUSED); + rec->size = htole32(sizeof(*rec)); return rec; } @@ -91,8 +93,8 @@ static struct lb_memory *lb_memory(struct lb_header *header) struct lb_memory *mem; rec = lb_new_record(header); mem = (struct lb_memory *)rec; - mem->tag = LB_TAG_MEMORY; - mem->size = sizeof(*mem); + mem->tag = htole32(LB_TAG_MEMORY); + mem->size = htole32(sizeof(*mem)); return mem; } @@ -102,14 +104,14 @@ void lb_add_serial(struct lb_serial *new_serial, void *data) struct lb_serial *serial; serial = (struct lb_serial *)lb_new_record(header); - serial->tag = LB_TAG_SERIAL; - serial->size = sizeof(*serial); - serial->type = new_serial->type; - serial->baseaddr = new_serial->baseaddr; - serial->baud = new_serial->baud; - serial->regwidth = new_serial->regwidth; - serial->input_hertz = new_serial->input_hertz; - serial->uart_pci_addr = new_serial->uart_pci_addr; + serial->tag = htole32(LB_TAG_SERIAL); + serial->size = htole32(sizeof(*serial)); + serial->type = htole32(new_serial->type); + serial->baseaddr = htole32(new_serial->baseaddr); + serial->baud = htole32(new_serial->baud); + serial->regwidth = htole32(new_serial->regwidth); + serial->input_hertz = htole32(new_serial->input_hertz); + serial->uart_pci_addr = htole32(new_serial->uart_pci_addr); } void lb_add_console(uint16_t consoletype, void *data) @@ -118,14 +120,16 @@ void lb_add_console(uint16_t consoletype, void *data) struct lb_console *console; console = (struct lb_console *)lb_new_record(header); - console->tag = LB_TAG_CONSOLE; - console->size = sizeof(*console); - console->type = consoletype; + console->tag = htole32(LB_TAG_CONSOLE); + console->size = htole32(sizeof(*console)); + console->type = htole16(consoletype); } static void lb_pcie(struct lb_header *header) { - struct lb_pcie pcie = { .tag = LB_TAG_PCIE, .size = sizeof(pcie) }; + struct lb_pcie pcie = { .tag = htole32(LB_TAG_PCIE), + .size = htole32(sizeof(pcie)), + }; if (lb_fill_pcie(&pcie) != CB_SUCCESS) return; @@ -143,8 +147,8 @@ static void lb_framebuffer(struct lb_header *header) framebuffer = (struct lb_framebuffer *)lb_new_record(header); memcpy(framebuffer, &fb, sizeof(*framebuffer)); - framebuffer->tag = LB_TAG_FRAMEBUFFER; - framebuffer->size = sizeof(*framebuffer); + framebuffer->tag = htole32(LB_TAG_FRAMEBUFFER); + framebuffer->size = htole32(sizeof(*framebuffer)); if (CONFIG(BOOTSPLASH)) { uint8_t *fb_ptr = (uint8_t *)(uintptr_t)framebuffer->physical_address; @@ -160,9 +164,9 @@ void lb_add_gpios(struct lb_gpios *gpios, const struct lb_gpio *gpio_table, { size_t table_size = count * sizeof(struct lb_gpio); - memcpy(&gpios->gpios[gpios->count], gpio_table, table_size); - gpios->count += count; - gpios->size += table_size; + memcpy(&gpios->gpios[le32toh(gpios->count)], gpio_table, table_size); + gpios->count = htole32(le32toh(gpios->count) + count); + gpios->size = htole32(le32toh(gpios->size) + table_size); } static void lb_gpios(struct lb_header *header) @@ -171,25 +175,25 @@ static void lb_gpios(struct lb_header *header) struct lb_gpio *g; gpios = (struct lb_gpios *)lb_new_record(header); - gpios->tag = LB_TAG_GPIO; - gpios->size = sizeof(*gpios); + gpios->tag = htole32(LB_TAG_GPIO); + gpios->size = htole32(sizeof(*gpios)); gpios->count = 0; fill_lb_gpios(gpios); printk(BIOS_INFO, "Passing %u GPIOs to payload:\n" " NAME | PORT | POLARITY | VALUE\n", - gpios->count); - for (g = &gpios->gpios[0]; g < &gpios->gpios[gpios->count]; g++) { + le32toh(gpios->count)); + for (g = &gpios->gpios[0]; g < &gpios->gpios[le32toh(gpios->count)]; g++) { printk(BIOS_INFO, "%16.16s | ", g->name); if (g->port == -1) printk(BIOS_INFO, " undefined | "); else - printk(BIOS_INFO, "%#.8x | ", g->port); - if (g->polarity == ACTIVE_HIGH) + printk(BIOS_INFO, "%#.8x | ", le32toh(g->port)); + if (g->polarity == htole32(ACTIVE_HIGH)) printk(BIOS_INFO, " high | "); else printk(BIOS_INFO, " low | "); - switch (g->value) { + switch (htole32(g->value)) { case 0: printk(BIOS_INFO, " low\n"); break; @@ -221,14 +225,14 @@ static void lb_boot_media_params(struct lb_header *header) return; bmp = (struct lb_boot_media_params *)lb_new_record(header); - bmp->tag = LB_TAG_BOOT_MEDIA_PARAMS; - bmp->size = sizeof(*bmp); + bmp->tag = htole32(LB_TAG_BOOT_MEDIA_PARAMS); + bmp->size = htole32(sizeof(*bmp)); - bmp->cbfs_offset = region_device_offset(&cbd->rdev); - bmp->cbfs_size = region_device_sz(&cbd->rdev); - bmp->boot_media_size = region_device_sz(boot_dev); + bmp->cbfs_offset = htole64(region_device_offset(&cbd->rdev)); + bmp->cbfs_size = htole64(region_device_sz(&cbd->rdev)); + bmp->boot_media_size = htole64(region_device_sz(boot_dev)); - bmp->fmap_offset = get_fmap_flash_offset(); + bmp->fmap_offset = htole64(get_fmap_flash_offset()); } static void lb_mmc_info(struct lb_header *header) @@ -242,9 +246,9 @@ static void lb_mmc_info(struct lb_header *header) rec = (struct lb_mmc_info *)lb_new_record(header); - rec->tag = LB_TAG_MMC_INFO; - rec->size = sizeof(*rec); - rec->early_cmd1_status = *ms_cbmem; + rec->tag = htole32(LB_TAG_MMC_INFO); + rec->size = htole32(sizeof(*rec)); + rec->early_cmd1_status = htole32(*ms_cbmem); } static void add_cbmem_pointers(struct lb_header *header) @@ -263,7 +267,7 @@ static void add_cbmem_pointers(struct lb_header *header) {CBMEM_ID_ACPI_CNVS, LB_TAG_ACPI_CNVS}, {CBMEM_ID_VPD, LB_TAG_VPD}, {CBMEM_ID_WIFI_CALIBRATION, LB_TAG_WIFI_CALIBRATION}, - {CBMEM_ID_TCPA_LOG, LB_TAG_TCPA_LOG}, + {CBMEM_ID_TPM_CB_LOG, LB_TAG_TPM_CB_LOG}, {CBMEM_ID_FMAP, LB_TAG_FMAP}, {CBMEM_ID_VBOOT_WORKBUF, LB_TAG_VBOOT_WORKBUF}, {CBMEM_ID_TYPE_C_INFO, LB_TAG_TYPE_C_INFO}, @@ -283,23 +287,45 @@ static void add_cbmem_pointers(struct lb_header *header) printk(BIOS_ERR, "No more room in coreboot table!\n"); break; } - cbmem_ref->tag = sid->table_tag; - cbmem_ref->size = sizeof(*cbmem_ref); - cbmem_ref->cbmem_addr = (unsigned long)cbmem_addr; + cbmem_ref->tag = htole32(sid->table_tag); + cbmem_ref->size = htole32(sizeof(*cbmem_ref)); + cbmem_ref->cbmem_addr = htole64((uintptr_t)cbmem_addr); } } +static void lb_tpm_std_log(struct lb_header *header) +{ + struct lb_range *lb_range; + const struct cbmem_entry *entry; + + entry = cbmem_entry_find(CBMEM_ID_TCPA_TCG_LOG); + if (entry == NULL) + entry = cbmem_entry_find(CBMEM_ID_TPM2_TCG_LOG); + if (entry == NULL) + return; /* The section is not present */ + + lb_range = (struct lb_range *)lb_new_record(header); + if (lb_range == NULL) { + printk(BIOS_ERR, "No more room in coreboot table!\n"); + return; + } + lb_range->tag = htole32(LB_TAG_TPM_STD_LOG); + lb_range->size = htole32(sizeof(*lb_range)); + lb_range->range_start = htole64((uintptr_t)cbmem_entry_start(entry)); + lb_range->range_size = htole32(cbmem_entry_size(entry)); +} + static struct lb_mainboard *lb_mainboard(struct lb_header *header) { struct lb_record *rec; struct lb_mainboard *mainboard; rec = lb_new_record(header); mainboard = (struct lb_mainboard *)rec; - mainboard->tag = LB_TAG_MAINBOARD; + mainboard->tag = htole32(LB_TAG_MAINBOARD); - mainboard->size = ALIGN_UP(sizeof(*mainboard) + - strlen(mainboard_vendor) + 1 + - strlen(mainboard_part_number) + 1, 8); + mainboard->size = htole32(ALIGN_UP(sizeof(*mainboard) + + strlen(mainboard_vendor) + 1 + + strlen(mainboard_part_number) + 1, 8)); mainboard->vendor_idx = 0; mainboard->part_number_idx = strlen(mainboard_vendor) + 1; @@ -319,23 +345,23 @@ static struct lb_board_config *lb_board_config(struct lb_header *header) rec = lb_new_record(header); config = (struct lb_board_config *)rec; - config->tag = LB_TAG_BOARD_CONFIG; - config->size = sizeof(*config); + config->tag = htole32(LB_TAG_BOARD_CONFIG); + config->size = htole32(sizeof(*config)); const uint64_t fw_config = fw_config_get(); - config->board_id = board_id(); - config->ram_code = ram_code(); - config->sku_id = sku_id(); - config->fw_config = fw_config; - - if (config->board_id != UNDEFINED_STRAPPING_ID) - printk(BIOS_INFO, "Board ID: %d\n", config->board_id); - if (config->ram_code != UNDEFINED_STRAPPING_ID) - printk(BIOS_INFO, "RAM code: %d\n", config->ram_code); - if (config->sku_id != UNDEFINED_STRAPPING_ID) - printk(BIOS_INFO, "SKU ID: %d\n", config->sku_id); - if (fw_config != UNDEFINED_FW_CONFIG) - printk(BIOS_INFO, "FW config: %#" PRIx64 "\n", fw_config); + config->board_id = htole32(board_id()); + config->ram_code = htole32(ram_code()); + config->sku_id = htole32(sku_id()); + config->fw_config = htole64(fw_config); + + if (config->board_id != htole32(UNDEFINED_STRAPPING_ID)) + printk(BIOS_INFO, "Board ID: %d\n", htole32(config->board_id)); + if (config->ram_code != htole32(UNDEFINED_STRAPPING_ID)) + printk(BIOS_INFO, "RAM code: %d\n", htole32(config->ram_code)); + if (config->sku_id != htole32(UNDEFINED_STRAPPING_ID)) + printk(BIOS_INFO, "SKU ID: %d\n", htole32(config->sku_id)); + if (fw_config != htole64(UNDEFINED_FW_CONFIG)) + printk(BIOS_INFO, "FW config: %#" PRIx64 "\n", htole64(fw_config)); return config; } @@ -347,14 +373,14 @@ static struct cmos_checksum *lb_cmos_checksum(struct lb_header *header) struct cmos_checksum *cmos_checksum; rec = lb_new_record(header); cmos_checksum = (struct cmos_checksum *)rec; - cmos_checksum->tag = LB_TAG_OPTION_CHECKSUM; + cmos_checksum->tag = htole32(LB_TAG_OPTION_CHECKSUM); - cmos_checksum->size = (sizeof(*cmos_checksum)); + cmos_checksum->size = htole32(sizeof(*cmos_checksum)); - cmos_checksum->range_start = LB_CKS_RANGE_START * 8; - cmos_checksum->range_end = (LB_CKS_RANGE_END * 8) + 7; - cmos_checksum->location = LB_CKS_LOC * 8; - cmos_checksum->type = CHECKSUM_PCBIOS; + cmos_checksum->range_start = htole32(LB_CKS_RANGE_START * 8); + cmos_checksum->range_end = htole32((LB_CKS_RANGE_END * 8) + 7); + cmos_checksum->location = htole32(LB_CKS_LOC * 8); + cmos_checksum->type = htole32(CHECKSUM_PCBIOS); return cmos_checksum; } @@ -377,8 +403,8 @@ static void lb_strings(struct lb_header *header) size_t len; rec = (struct lb_string *)lb_new_record(header); len = strlen(strings[i].string); - rec->tag = strings[i].tag; - rec->size = ALIGN_UP(sizeof(*rec) + len + 1, 8); + rec->tag = htole32(strings[i].tag); + rec->size = htole32(ALIGN_UP(sizeof(*rec) + len + 1, 8)); memcpy(rec->string, strings[i].string, len+1); } @@ -388,9 +414,9 @@ static void lb_record_version_timestamp(struct lb_header *header) { struct lb_timestamp *rec; rec = (struct lb_timestamp *)lb_new_record(header); - rec->tag = LB_TAG_VERSION_TIMESTAMP; - rec->size = sizeof(*rec); - rec->timestamp = coreboot_version_timestamp; + rec->tag = htole32(LB_TAG_VERSION_TIMESTAMP); + rec->size = htole32(sizeof(*rec)); + rec->timestamp = htole32(coreboot_version_timestamp); } void __weak lb_board(struct lb_header *header) { /* NOOP */ } @@ -410,9 +436,9 @@ static struct lb_forward *lb_forward(struct lb_header *header, struct lb_forward *forward; rec = lb_new_record(header); forward = (struct lb_forward *)rec; - forward->tag = LB_TAG_FORWARD; - forward->size = sizeof(*forward); - forward->forward = (uint64_t)(unsigned long)next_header; + forward->tag = htole32(LB_TAG_FORWARD); + forward->size = htole32(sizeof(*forward)); + forward->forward = htole64((uint64_t)(unsigned long)next_header); return forward; } @@ -421,17 +447,19 @@ static unsigned long lb_table_fini(struct lb_header *head) struct lb_record *rec, *first_rec; rec = lb_last_record(head); if (head->table_entries) - head->table_bytes += rec->size; + head->table_bytes = htole32(le32toh(head->table_bytes) + + le32toh(rec->size)); first_rec = lb_first_record(head); + /* compute_ip_checksum() returns checksum in correct endianness */ head->table_checksum = compute_ip_checksum(first_rec, - head->table_bytes); + le32toh(head->table_bytes)); head->header_checksum = 0; head->header_checksum = compute_ip_checksum(head, sizeof(*head)); printk(BIOS_DEBUG, "Wrote coreboot table at: %p, 0x%x bytes, checksum %x\n", - head, head->table_bytes, head->table_checksum); - return (unsigned long)rec + rec->size; + head, le32toh(head->table_bytes), le32toh(head->table_checksum)); + return (unsigned long)rec + le32toh(rec->size); } static void lb_add_acpi_rsdp(struct lb_header *head) @@ -439,9 +467,9 @@ static void lb_add_acpi_rsdp(struct lb_header *head) struct lb_acpi_rsdp *acpi_rsdp; struct lb_record *rec = lb_new_record(head); acpi_rsdp = (struct lb_acpi_rsdp *)rec; - acpi_rsdp->tag = LB_TAG_ACPI_RSDP; - acpi_rsdp->size = sizeof(*acpi_rsdp); - acpi_rsdp->rsdp_pointer = get_coreboot_rsdp(); + acpi_rsdp->tag = htole32(LB_TAG_ACPI_RSDP); + acpi_rsdp->size = htole32(sizeof(*acpi_rsdp)); + acpi_rsdp->rsdp_pointer = htole64(get_coreboot_rsdp()); } size_t write_coreboot_forwarding_table(uintptr_t entry, uintptr_t target) @@ -475,7 +503,7 @@ static uintptr_t write_coreboot_table(uintptr_t rom_table_end) /* Copy the option config table, it's already a * lb_record... */ - memcpy(rec_dest, option_table, option_table->size); + memcpy(rec_dest, option_table, le32toh(option_table->size)); /* Create CMOS checksum entry in coreboot table */ lb_cmos_checksum(head); } else { @@ -488,6 +516,9 @@ static uintptr_t write_coreboot_table(uintptr_t rom_table_end) /* Serialize resource map into mem table types (LB_MEM_*) */ bootmem_write_memory_table(lb_memory(head)); + /* Record reference to TPM log composed according to specification (either one) */ + lb_tpm_std_log(head); + /* Record our motherboard */ lb_mainboard(head); diff --git a/src/lib/imd_cbmem.c b/src/lib/imd_cbmem.c index a855cf18b3f..3af2effc185 100644 --- a/src/lib/imd_cbmem.c +++ b/src/lib/imd_cbmem.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -242,10 +243,10 @@ void cbmem_add_records_to_cbtable(struct lb_header *header) continue; lbe = (struct lb_cbmem_entry *)lb_new_record(header); - lbe->tag = LB_TAG_CBMEM_ENTRY; - lbe->size = sizeof(*lbe); - lbe->address = (uintptr_t)imd_entry_at(&imd, e); - lbe->entry_size = imd_entry_size(e); - lbe->id = id; + lbe->tag = htole32(LB_TAG_CBMEM_ENTRY); + lbe->size = htole32(sizeof(*lbe)); + lbe->address = htole64((uintptr_t)imd_entry_at(&imd, e)); + lbe->entry_size = htole32(imd_entry_size(e)); + lbe->id = htole32(id); } } diff --git a/src/lib/timestamp.c b/src/lib/timestamp.c index c7e1c0cf4ce..3dad8cdb658 100644 --- a/src/lib/timestamp.c +++ b/src/lib/timestamp.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -18,11 +19,11 @@ static struct timestamp_table *glob_ts_table; static void timestamp_cache_init(struct timestamp_table *ts_cache, uint64_t base) { - ts_cache->num_entries = 0; - ts_cache->base_time = base; - ts_cache->max_entries = (REGION_SIZE(timestamp) - + ts_cache->num_entries = htole32(0); + ts_cache->base_time = htole64(base); + ts_cache->max_entries = htole16((REGION_SIZE(timestamp) - offsetof(struct timestamp_table, entries)) - / sizeof(struct timestamp_entry); + / sizeof(struct timestamp_entry)); } static struct timestamp_table *timestamp_cache_get(void) @@ -52,9 +53,9 @@ static struct timestamp_table *timestamp_alloc_cbmem_table(void) if (!tst) return NULL; - tst->base_time = 0; - tst->max_entries = MAX_TIMESTAMPS; - tst->num_entries = 0; + tst->base_time = htole64(0); + tst->max_entries = htole16(MAX_TIMESTAMPS); + tst->num_entries = htole32(0); return tst; } @@ -105,15 +106,17 @@ static void timestamp_add_table_entry(struct timestamp_table *ts_table, enum timestamp_id id, int64_t ts_time) { struct timestamp_entry *tse; + uint32_t num_entries = le32toh(ts_table->num_entries); - if (ts_table->num_entries >= ts_table->max_entries) + if (num_entries >= le16toh(ts_table->max_entries)) return; - tse = &ts_table->entries[ts_table->num_entries++]; - tse->entry_id = id; - tse->entry_stamp = ts_time; + tse = &ts_table->entries[num_entries++]; + ts_table->num_entries = htole32(num_entries); + tse->entry_id = htole32(id); + tse->entry_stamp = htole64(ts_time); - if (ts_table->num_entries == ts_table->max_entries) + if (num_entries == le16toh(ts_table->max_entries)) printk(BIOS_ERR, "Timestamp table full\n"); } @@ -131,7 +134,7 @@ void timestamp_add(enum timestamp_id id, int64_t ts_time) return; } - ts_time -= ts_table->base_time; + ts_time -= le64toh(ts_table->base_time); timestamp_add_table_entry(ts_table, id, ts_time); if (CONFIG(TIMESTAMPS_ON_CONSOLE)) @@ -196,14 +199,18 @@ static void timestamp_sync_cache_to_cbmem(struct timestamp_table *ts_cbmem_table /* Inherit cache base_time. */ ts_cbmem_table->base_time = ts_cache_table->base_time; - for (i = 0; i < ts_cache_table->num_entries; i++) { + for (i = 0; i < le32toh(ts_cache_table->num_entries); i++) { struct timestamp_entry *tse = &ts_cache_table->entries[i]; - timestamp_add_table_entry(ts_cbmem_table, tse->entry_id, - tse->entry_stamp); + /* timestamp_add_table_entry() converts endianness, but it is already + * converted to LE in cache. We need to swap it back to host endianness + * so it will be properly written as LE to cbmem. + */ + timestamp_add_table_entry(ts_cbmem_table, le32toh(tse->entry_id), + le64toh(tse->entry_stamp)); } /* Cache no longer required. */ - ts_cache_table->num_entries = 0; + ts_cache_table->num_entries = htole32(0); } static void timestamp_reinit(int is_recovery) @@ -233,7 +240,7 @@ static void timestamp_reinit(int is_recovery) /* Seed the timestamp tick frequency in ENV_PAYLOAD_LOADER. */ if (ENV_PAYLOAD_LOADER) - ts_cbmem_table->tick_freq_mhz = timestamp_tick_freq_mhz(); + ts_cbmem_table->tick_freq_mhz = htole16(timestamp_tick_freq_mhz()); timestamp_table_set(ts_cbmem_table); } @@ -241,6 +248,7 @@ static void timestamp_reinit(int is_recovery) void timestamp_rescale_table(uint16_t N, uint16_t M) { uint32_t i; + uint64_t base_time; struct timestamp_table *ts_table; if (!timestamp_should_run()) @@ -257,12 +265,18 @@ void timestamp_rescale_table(uint16_t N, uint16_t M) return; } - ts_table->base_time /= M; - ts_table->base_time *= N; - for (i = 0; i < ts_table->num_entries; i++) { + base_time = le64toh(ts_table->base_time); + base_time /= M; + base_time *= N; + ts_table->base_time = htole64(base_time); + + for (i = 0; i < le32toh(ts_table->num_entries); i++) { + int64_t entry_stamp; struct timestamp_entry *tse = &ts_table->entries[i]; - tse->entry_stamp /= M; - tse->entry_stamp *= N; + entry_stamp = le64toh(tse->entry_stamp); + entry_stamp /= M; + entry_stamp *= N; + tse->entry_stamp = htole64(entry_stamp); } } @@ -274,9 +288,9 @@ uint32_t get_us_since_boot(void) { struct timestamp_table *ts = timestamp_table_get(); - if (ts == NULL || ts->tick_freq_mhz == 0) + if (ts == NULL || le16toh(ts->tick_freq_mhz) == 0) return 0; - return (timestamp_get() - ts->base_time) / ts->tick_freq_mhz; + return (timestamp_get() - le64toh(ts->base_time)) / le16toh(ts->tick_freq_mhz); } CBMEM_READY_HOOK(timestamp_reinit); diff --git a/src/mainboard/emulation/qemu-power8/Makefile.inc b/src/mainboard/emulation/qemu-power8/Makefile.inc index 40119839232..c1484b96bcd 100644 --- a/src/mainboard/emulation/qemu-power8/Makefile.inc +++ b/src/mainboard/emulation/qemu-power8/Makefile.inc @@ -2,8 +2,11 @@ bootblock-y += bootblock.c bootblock-y += uart.c +bootblock-y += rom_media.c romstage-y += cbmem.c romstage-y += romstage.c ramstage-y += timer.c romstage-y += uart.c +romstage-y += rom_media.c ramstage-y += uart.c +ramstage-y += rom_media.c diff --git a/src/mainboard/emulation/qemu-power8/memlayout.ld b/src/mainboard/emulation/qemu-power8/memlayout.ld index 66f2c7577b3..993ee5fadfe 100644 --- a/src/mainboard/emulation/qemu-power8/memlayout.ld +++ b/src/mainboard/emulation/qemu-power8/memlayout.ld @@ -10,9 +10,19 @@ SECTIONS DRAM_START(0x0) BOOTBLOCK(0x100, 64K) ROMSTAGE(0x20000, 128K) + +#if !RAMSTAGE STACK(0x40000, 0x3ff00) +#endif + PRERAM_CBMEM_CONSOLE(0x80000, 8K) FMAP_CACHE(0x82000, 2K) CBFS_MCACHE(0x82800, 8K) + +#if !ENV_RAMSTAGE RAMSTAGE(0x100000, 16M) +#else + STACK(0xf8040000, 0x3ff00) + RAMSTAGE(0xf8100000, 16M) +#endif } diff --git a/src/arch/ppc64/rom_media.c b/src/mainboard/emulation/qemu-power8/rom_media.c similarity index 100% rename from src/arch/ppc64/rom_media.c rename to src/mainboard/emulation/qemu-power8/rom_media.c diff --git a/src/mainboard/emulation/qemu-power9/Makefile.inc b/src/mainboard/emulation/qemu-power9/Makefile.inc index ace00a75b78..a71c20fe1aa 100644 --- a/src/mainboard/emulation/qemu-power9/Makefile.inc +++ b/src/mainboard/emulation/qemu-power9/Makefile.inc @@ -1,6 +1,10 @@ ## SPDX-License-Identifier: GPL-2.0-only +bootblock-y += rom_media.c + romstage-y += cbmem.c romstage-y += romstage.c +romstage-y += rom_media.c ramstage-y += ramstage.c +ramstage-y += rom_media.c diff --git a/src/mainboard/emulation/qemu-power9/memlayout.ld b/src/mainboard/emulation/qemu-power9/memlayout.ld index 8209433020e..5aca5408b10 100644 --- a/src/mainboard/emulation/qemu-power9/memlayout.ld +++ b/src/mainboard/emulation/qemu-power9/memlayout.ld @@ -8,27 +8,70 @@ SECTIONS { DRAM_START(0x0) - BOOTBLOCK(0, 32K) + /* + * On POWER, 0 is wherever HRMOR points to rather than physical DRAM start. + * HRMOR is ORed with address, not added to it, meaning that memory space + * overlaps after 2^(least significant set bit of HRMOR). This becomes + * chaotic when nonconsecutive bits are set... + * + * Two and a half possible cases: + * 0. bootblock started with QEMU in hb-mode + * - NIA = 0x10 (bug?) + * - HRMOR = 0x08000000 (128M) + * - no physical memory to enable/train, everything accessible from start + * 1. bootblock loaded by HBBL + * - NIA = 0 + * - HRMOR = 0xF8000000 (4G - 128M) + * - initialized L3 = 0x400000 (4M) + * - top address before RAM = 0xF8400000 + * 2. bootblock in SEEPROM, loaded by SBE + * - NIA = 0x3000 (placeholder for int. vectors) + * - HRMOR = 0xF8200000 (4G - 128 M + 2 M) + * - initialized L3 = 0x8000 (bootblock/HBBL size = 32K) + * - no way 32K will be enough, must initialize more L3 in bootblock + * - HRMOR still applies, so memory overlaps every 2M + * + * Common subset (assuming 2. initializes as much memory as possible) is + * 0xF8200000-0xF8400000. 2M should be more than enough for pre-RAM code, + * but it isn't enough to load ramstage. We could implement postcar stage, + * but KISS: initialize L3 from _ebootblock to 0xF8980000: up to 9.5M into + * cache, leaving bottom 2M (0xF8000000-0xF8200000) either uninitialized + * (when started from SEEPROM) or just unused for anything but bootblock + * (loaded by HBBL). Last 0.5M of L3 cache is left for interrupt vectors + * normally located at address 0. + * + * Set HRMOR to 0 before jumping to C code in bootblock and forget it even + * exists. + * + * For QEMU s/0xF8/0x08/ in above description but code remains the same. + * L3 initialization is unnecessary in this case but won't break anything. + * + * TODO: there is a structure with SBE->HBBL data at 0 in 2nd option. It + * holds some useful data like XSCOM BAR and LPC BAR. If, for any reason, + * these addresses are different than default, they should be used instead + * of predefined values. + */ - ROMSTAGE(0x1f00000, 1M) - -#if !ENV_RAMSTAGE - STACK(0x2000000, 32K) +#if !CONFIG(BOOTBLOCK_IN_SEEPROM) + BOOTBLOCK( 0x08000000, 32K) +#else + BOOTBLOCK( 0x08203000, 20K) #endif - FMAP_CACHE(0x2108000, 4K) - CBFS_MCACHE(0x2109000, 8K) - TIMESTAMP(0x210b000, 4K) - CBFS_CACHE(0x210c000, 512K) - PRERAM_CBMEM_CONSOLE(0x218c000, 128K) - - /* By default all memory addresses are affected by the value of HRMOR - * (Hypervisor Real Mode Offset Register) which is ORed to them. HRMOR - * has initial value of 0x8000000 in QEMU and is changed to 0 in - * ramstage. This means that before ramstage 0 actually points to - * 0x8000000. */ -#if ENV_RAMSTAGE - STACK(0xa000000, 32K) -#endif - RAMSTAGE(0xa008000, 1M) + STACK( 0x08208000, 32K) + PRERAM_CBMEM_CONSOLE(0x08210000, 128K) + FMAP_CACHE( 0x08230000, 4K) + CBFS_MCACHE( 0x08231000, 8K) + TIMESTAMP( 0x08233000, 4K) + + ROMSTAGE( 0x08240000, 256K) + + /* + * bootblock_crt0.S assumes this is the last part of L3, leaving for + * interrupt vectors at least 0.5M because of cache associativity. If + * more CBFS_CACHE is needed, split this into pre-/postram caches. + */ + CBFS_CACHE( 0x08280000, 7M) + + RAMSTAGE( 0x09000000, 2M) } diff --git a/src/mainboard/emulation/qemu-power9/rom_media.c b/src/mainboard/emulation/qemu-power9/rom_media.c new file mode 100644 index 00000000000..2fd47669a80 --- /dev/null +++ b/src/mainboard/emulation/qemu-power9/rom_media.c @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include + +static const struct mem_region_device boot_dev = + MEM_REGION_DEV_RO_INIT(FLASH_BASE_ADDR, CONFIG_ROM_SIZE); + +const struct region_device *boot_device_ro(void) +{ + return &boot_dev.rdev; +} diff --git a/src/mainboard/ocp/deltalake/ipmi.c b/src/mainboard/ocp/deltalake/ipmi.c index 25a5a27cba6..1695d73589c 100644 --- a/src/mainboard/ocp/deltalake/ipmi.c +++ b/src/mainboard/ocp/deltalake/ipmi.c @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ #include -#include +#include #include #include #include diff --git a/src/mainboard/ocp/deltalake/romstage.c b/src/mainboard/ocp/deltalake/romstage.c index 05a7188da41..2efed1ca84d 100644 --- a/src/mainboard/ocp/deltalake/romstage.c +++ b/src/mainboard/ocp/deltalake/romstage.c @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include #include @@ -188,7 +188,7 @@ void mainboard_memory_init_params(FSPM_UPD *mupd) /* Since it's the first IPMI command, it's better to run get BMC selftest result first */ - if (ipmi_kcs_premem_init(CONFIG_BMC_KCS_BASE, 0) == CB_SUCCESS) { + if (ipmi_premem_init(CONFIG_BMC_KCS_BASE, 0) == CB_SUCCESS) { ipmi_set_post_start(CONFIG_BMC_KCS_BASE); init_frb2_wdt(); } diff --git a/src/mainboard/ocp/tiogapass/ipmi.c b/src/mainboard/ocp/tiogapass/ipmi.c index 74f96fe36c5..e97341b2b67 100644 --- a/src/mainboard/ocp/tiogapass/ipmi.c +++ b/src/mainboard/ocp/tiogapass/ipmi.c @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ #include -#include +#include #include #include #include diff --git a/src/mainboard/ocp/tiogapass/romstage.c b/src/mainboard/ocp/tiogapass/romstage.c index 20c74660ced..842e977d3e1 100644 --- a/src/mainboard/ocp/tiogapass/romstage.c +++ b/src/mainboard/ocp/tiogapass/romstage.c @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include #include @@ -54,7 +54,7 @@ static void mainboard_config_iio(FSPM_UPD *mupd) void mainboard_memory_init_params(FSPM_UPD *mupd) { /* It's better to run get BMC selftest result first */ - if (ipmi_kcs_premem_init(CONFIG_BMC_KCS_BASE, 0) == CB_SUCCESS) { + if (ipmi_premem_init(CONFIG_BMC_KCS_BASE, 0) == CB_SUCCESS) { ipmi_set_post_start(CONFIG_BMC_KCS_BASE); init_frb2_wdt(); } diff --git a/src/mainboard/raptor-cs/Kconfig b/src/mainboard/raptor-cs/Kconfig new file mode 100644 index 00000000000..7f916bbaf37 --- /dev/null +++ b/src/mainboard/raptor-cs/Kconfig @@ -0,0 +1,15 @@ +if VENDOR_RAPTOR_CS + +choice + prompt "Mainboard model" + +source "src/mainboard/raptor-cs/*/Kconfig.name" + +endchoice + +source "src/mainboard/raptor-cs/*/Kconfig" + +config MAINBOARD_VENDOR + default "Raptor Computing Systems" + +endif # VENDOR_RAPTOR_CS diff --git a/src/mainboard/raptor-cs/Kconfig.name b/src/mainboard/raptor-cs/Kconfig.name new file mode 100644 index 00000000000..7f9d8d6b9bd --- /dev/null +++ b/src/mainboard/raptor-cs/Kconfig.name @@ -0,0 +1,2 @@ +config VENDOR_RAPTOR_CS + bool "Raptor Computing Systems" diff --git a/src/mainboard/raptor-cs/talos-2/1-cpu.dts b/src/mainboard/raptor-cs/talos-2/1-cpu.dts new file mode 100644 index 00000000000..6573c7925cc --- /dev/null +++ b/src/mainboard/raptor-cs/talos-2/1-cpu.dts @@ -0,0 +1,589 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +/* This is a base DT common to 1- and 2-CPU configurations */ + +/dts-v1/; + +/ { + #address-cells = <0x02>; + #size-cells = <0x02>; + compatible = "ibm,powernv\0ibm,p9-openbmc\0rcs,talos"; + nest-frequency = <0x00 0x6f38e680>; + vendor = "IBM"; + ibm,sw-checkstop-fir = <0x5012000 0x1f>; + model = "T2P9D01 REV 1.00"; /* TODO: unhardcode */ + /* TODO: system-id = "A1234567" */ + model-name = "rcs,talos"; + + bmc { + #address-cells = <0x01>; + #size-cells = <0x00>; + compatible = "ibm,ast2500,openbmc"; + + sensors { + #address-cells = <0x01>; + #size-cells = <0x00>; + + sensor@1 { + compatible = "ibm,ipmi-sensor"; + reg = <0x01>; + ipmi-sensor-type = <0x22>; + }; + + sensor@2 { + compatible = "ibm,ipmi-sensor"; + reg = <0x02>; + ipmi-sensor-type = <0x0f>; + }; + + sensor@3 { + compatible = "ibm,ipmi-sensor"; + reg = <0x03>; + ipmi-sensor-type = <0x07>; + ibm,chip-id = <0x00>; + }; + + sensor@6 { + compatible = "ibm,ipmi-sensor"; + reg = <0x06>; + ipmi-sensor-type = <0x01>; + ibm,chip-id = <0x00>; + }; + + sensor@8 { + compatible = "ibm,ipmi-sensor"; + reg = <0x08>; + ipmi-sensor-type = <0x07>; + ibm,chip-id = <0x00>; + }; + + sensor@a { + compatible = "ibm,ipmi-sensor"; + reg = <0x0a>; + ipmi-sensor-type = <0xc1>; + }; + + sensor@8b { + compatible = "ibm,ipmi-sensor"; + reg = <0x8b>; + ipmi-sensor-type = <0xc3>; + }; + + sensor@8c { + compatible = "ibm,ipmi-sensor"; + reg = <0x8c>; + ipmi-sensor-type = <0xc7>; + }; + + sensor@8d { + compatible = "ibm,ipmi-sensor"; + reg = <0x8d>; + ipmi-sensor-type = <0xc7>; + }; + + sensor@8e { + compatible = "ibm,ipmi-sensor"; + reg = <0x8e>; + ipmi-sensor-type = <0xc7>; + }; + + sensor@8f { + compatible = "ibm,ipmi-sensor"; + reg = <0x8f>; + ipmi-sensor-type = <0xc7>; + }; + + sensor@90 { + compatible = "ibm,ipmi-sensor"; + reg = <0x90>; + ipmi-sensor-type = <0x12>; + }; + + sensor@91 { + compatible = "ibm,ipmi-sensor"; + reg = <0x91>; + ipmi-sensor-type = <0x1f>; + }; + + sensor@92 { + compatible = "ibm,ipmi-sensor"; + reg = <0x92>; + ipmi-sensor-type = <0xc4>; + }; + + sensor@93 { + compatible = "ibm,ipmi-sensor"; + reg = <0x93>; + ipmi-sensor-type = <0xc7>; + }; + + sensor@94 { + compatible = "ibm,ipmi-sensor"; + reg = <0x94>; + ipmi-sensor-type = <0xc2>; + }; + + sensor@95 { + compatible = "ibm,ipmi-sensor"; + reg = <0x95>; + ipmi-sensor-type = <0xca>; + }; + + sensor@96 { + compatible = "ibm,ipmi-sensor"; + reg = <0x96>; + ipmi-sensor-type = <0xc8>; + }; + + sensor@97 { + compatible = "ibm,ipmi-sensor"; + reg = <0x97>; + ipmi-sensor-type = <0xc6>; + }; + }; + }; + + cpus { + #address-cells = <0x01>; + #size-cells = <0x00>; + }; + + ibm,opal { + + leds { + led-mode = "lightpath"; + }; + + power-mgt { + ibm,enabled-stop-levels = <0xec000000>; + }; + + }; + + ibm,pcie-slots { + #address-cells = <0x02>; + #size-cells = <0x00>; + + root-complex@0,0 { + reg = <0x00 0x00>; + #address-cells = <0x02>; + #size-cells = <0x00>; + compatible = "ibm,pcie-port\0ibm,pcie-root-port"; + ibm,chip-id = <0x00>; + ibm,slot-label = "SLOT3"; + + pluggable { + mrw-slot-id = <0x03>; + lane-mask = <0xffff>; + ibm,slot-label = "SLOT3"; + }; + }; + + root-complex@0,1 { + reg = <0x00 0x01>; + #address-cells = <0x02>; + #size-cells = <0x00>; + compatible = "ibm,pcie-port\0ibm,pcie-root-port"; + ibm,chip-id = <0x00>; + lane-mask = <0xff00>; + lanes-reversed = <0x1111>; + + builtin { + }; + }; + + root-complex@0,2 { + reg = <0x00 0x02>; + #address-cells = <0x02>; + #size-cells = <0x00>; + compatible = "ibm,pcie-port\0ibm,pcie-root-port"; + ibm,chip-id = <0x00>; + lane-mask = <0xff>; + + builtin { + }; + }; + + root-complex@0,3 { + reg = <0x00 0x03>; + #address-cells = <0x02>; + #size-cells = <0x00>; + compatible = "ibm,pcie-port\0ibm,pcie-root-port"; + ibm,chip-id = <0x00>; + ibm,slot-label = "SLOT1"; + + pluggable { + mrw-slot-id = <0x01>; + lane-mask = <0xff00>; + ibm,slot-label = "SLOT1"; + }; + }; + + root-complex@0,4 { + reg = <0x00 0x04>; + #address-cells = <0x02>; + #size-cells = <0x00>; + compatible = "ibm,pcie-port\0ibm,pcie-root-port"; + ibm,chip-id = <0x00>; + lane-mask = <0xf0>; + + switch-up@10b5,8725 { + reg = <0x10b5 0x8725>; + #address-cells = <0x01>; + #size-cells = <0x00>; + upstream-port = <0x00>; + ibm,pluggable; + + down-port@a { + compatible = "ibm,pcie-port"; + reg = <0x0a>; + ibm,pluggable; + ibm,slot-label = "GPU0"; + + builtin { + ibm,slot-label = "GPU0"; + }; + }; + + down-port@b { + compatible = "ibm,pcie-port"; + reg = <0x0b>; + ibm,pluggable; + ibm,slot-label = "GPU1"; + + builtin { + ibm,slot-label = "GPU1"; + }; + }; + + down-port@c { + compatible = "ibm,pcie-port"; + reg = <0x0c>; + ibm,pluggable; + ibm,slot-label = "GPU2"; + + builtin { + ibm,slot-label = "GPU2"; + }; + }; + }; + }; + + root-complex@0,5 { + reg = <0x00 0x05>; + #address-cells = <0x02>; + #size-cells = <0x00>; + compatible = "ibm,pcie-port\0ibm,pcie-root-port"; + ibm,chip-id = <0x00>; + lane-mask = <0x0f>; + + builtin { + }; + }; + }; + + lpcm-opb@6030000000000 { + #address-cells = <0x01>; + #size-cells = <0x01>; + compatible = "ibm,power9-lpcm-opb\0simple-bus"; + reg = <0x60300 0x00 0x01 0x00>; + ibm,chip-id = <0x00>; + ranges = <0x00 0x60300 0x00 0x80000000 0x80000000 0x60300 0x80000000 0x80000000>; + + lpc-controller@c0012000 { + compatible = "ibm,power9-lpc-controller"; + reg = <0xc0012000 0x100>; + }; + + lpc@0 { + #address-cells = <0x02>; + #size-cells = <0x01>; + compatible = "ibm,power9-lpc\0ibm,power8-lpc"; + ranges = <0x00 0x00 0xe0000000 0x10000000 0x01 0x00 0xd0010000 0x10000 0x03 0x00 0xf0000000 0x10000000>; + + serial@i3f8 { + reg = <0x01 0x3f8 0x01>; + compatible = "ns16550"; + current-speed = <0x1c200>; + clock-frequency = <0x1c2000>; + interrupts = <0x04>; + device_type = "serial"; + }; + }; + + opb-arbiter@c0011000 { + compatible = "ibm,power9-lpcm-opb-arbiter"; + reg = <0xc0011000 0x08>; + }; + + opb-master@c0010000 { + compatible = "ibm,power9-lpcm-opb-master"; + reg = <0xc0010000 0x60>; + }; + }; + + xscom@603fc00000000 { + ibm,chip-id = <0x00>; + ibm,proc-chip-id = <0x00>; + #address-cells = <0x01>; + #size-cells = <0x01>; + scom-controller; + compatible = "ibm,xscom\0ibm,power9-xscom"; + reg = <0x603fc 0x00 0x08 0x00>; + bus-frequency = <0x00 0x1bce39a0>; + primary; + + chiptod@40000 { + reg = <0x40000 0x34>; + compatible = "ibm,power-chiptod\0ibm,power9-chiptod"; + primary; + }; + + i2cm@a1000 { + reg = <0xa1000 0x1000>; + compatible = "ibm,power8-i2cm\0ibm,power9-i2cm"; + #size-cells = <0x00>; + #address-cells = <0x01>; + chip-engine# = <0x01>; + clock-frequency = <0x6f38e68>; + + i2c-bus@0 { + reg = <0x00>; + #size-cells = <0x00>; + #address-cells = <0x01>; + compatible = "ibm,opal-i2c\0ibm,power8-i2c-port\0ibm,power9-i2c-port"; + bus-frequency = <0xf4240>; + + eeprom@50 { + reg = <0x50>; + link-id = <0x02>; + compatible = "atmel,24c512"; + label = "module-vpd"; + }; + }; + + i2c-bus@2 { + reg = <0x02>; + #size-cells = <0x00>; + #address-cells = <0x01>; + compatible = "ibm,opal-i2c\0ibm,power8-i2c-port\0ibm,power9-i2c-port"; + bus-frequency = <0xf4240>; + + eeprom@50 { + reg = <0x50>; + link-id = <0x04>; + compatible = "atmel,24c128"; + label = "module-vpd"; + }; + }; + }; + + i2cm@a2000 { + reg = <0xa2000 0x1000>; + compatible = "ibm,power8-i2cm\0ibm,power9-i2cm"; + #size-cells = <0x00>; + #address-cells = <0x01>; + chip-engine# = <0x02>; + clock-frequency = <0x6f38e68>; + + i2c-bus@0 { + reg = <0x00>; + #size-cells = <0x00>; + #address-cells = <0x01>; + compatible = "ibm,opal-i2c\0ibm,power8-i2c-port\0ibm,power9-i2c-port"; + bus-frequency = <0x61a80>; + timeout-ms = <2000>; + + eeprom@50 { + reg = <0x50>; + link-id = <0x06>; + compatible = "atmel,24c128"; + label = "module-vpd"; + }; + }; + }; + + i2cm@a3000 { + reg = <0xa3000 0x1000>; + compatible = "ibm,power8-i2cm\0ibm,power9-i2cm"; + #size-cells = <0x00>; + #address-cells = <0x01>; + chip-engine# = <0x03>; + clock-frequency = <0x6f38e68>; + + i2c-bus@0 { + reg = <0x00>; + #size-cells = <0x00>; + #address-cells = <0x01>; + compatible = "ibm,opal-i2c\0ibm,power8-i2c-port\0ibm,power9-i2c-port"; + bus-frequency = <0x61a80>; + + eeprom@50 { + compatible = "spd"; + label = "dimm-spd"; + link-id = <0x12>; + reg = <0x50>; + }; + + eeprom@51 { + compatible = "spd"; + label = "dimm-spd"; + link-id = <0x0c>; + reg = <0x51>; + }; + + eeprom@52 { + compatible = "spd"; + label = "dimm-spd"; + link-id = <0x01>; + reg = <0x52>; + }; + + eeprom@53 { + compatible = "spd"; + label = "dimm-spd"; + link-id = <0x03>; + reg = <0x53>; + }; + }; + + i2c-bus@1 { + reg = <0x01>; + #size-cells = <0x00>; + #address-cells = <0x01>; + compatible = "ibm,opal-i2c\0ibm,power8-i2c-port\0ibm,power9-i2c-port"; + bus-frequency = <0x61a80>; + + eeprom@54 { + compatible = "spd"; + label = "dimm-spd"; + link-id = <0x0d>; + reg = <0x54>; + }; + + eeprom@55 { + compatible = "spd"; + label = "dimm-spd"; + link-id = <0x0e>; + reg = <0x55>; + }; + + eeprom@56 { + compatible = "spd"; + label = "dimm-spd"; + link-id = <0x10>; + reg = <0x56>; + }; + + eeprom@57 { + compatible = "spd"; + label = "dimm-spd"; + link-id = <0x00>; + reg = <0x57>; + }; + }; + }; + + nmmu@5012c40 { + compatible = "ibm,power9-nest-mmu"; + reg = <0x5012c40 0x20>; + }; + + nx@2010000 { + compatible = "ibm,power9-nx"; + reg = <0x2010000 0x4000>; + }; + + pbcq@4010c00 { + reg = <0x4010c00 0x100 0xd010800 0x200>; + compatible = "ibm,power9-pbcq"; + ibm,pec-index = <0x00>; + #address-cells = <0x01>; + #size-cells = <0x00>; + ibm,hub-id = <0x00>; + + stack@0 { + reg = <0x00>; + ibm,phb-index = <0x00>; + compatible = "ibm,power9-phb-stack"; + status = "okay"; + ibm,lane-eq = <0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x77777777 0x77777777 0x77777777 0x77777777>; + }; + }; + + pbcq@4011000 { + reg = <0x4011000 0x100 0xe010800 0x200>; + compatible = "ibm,power9-pbcq"; + ibm,pec-index = <0x01>; + #address-cells = <0x01>; + #size-cells = <0x00>; + ibm,hub-id = <0x00>; + + stack@0 { + reg = <0x00>; + ibm,phb-index = <0x01>; + compatible = "ibm,power9-phb-stack"; + status = "okay"; + ibm,lane-eq = <0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x77777777 0x77777777 0x77777777 0x77777777>; + }; + + stack@1 { + reg = <0x01>; + ibm,phb-index = <0x02>; + compatible = "ibm,power9-phb-stack"; + status = "okay"; + ibm,lane-eq = <0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x77777777 0x77777777 0x77777777 0x77777777>; + }; + }; + + pbcq@4011400 { + reg = <0x4011400 0x100 0xf010800 0x200>; + compatible = "ibm,power9-pbcq"; + ibm,pec-index = <0x02>; + #address-cells = <0x01>; + #size-cells = <0x00>; + ibm,hub-id = <0x00>; + + stack@0 { + reg = <0x00>; + ibm,phb-index = <0x03>; + compatible = "ibm,power9-phb-stack"; + status = "okay"; + ibm,lane-eq = <0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x77777777 0x77777777 0x77777777 0x77777777>; + }; + + stack@1 { + reg = <0x01>; + ibm,phb-index = <0x04>; + compatible = "ibm,power9-phb-stack"; + status = "okay"; + ibm,lane-eq = <0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x77777777 0x77777777 0x77777777 0x77777777>; + }; + + stack@2 { + reg = <0x02>; + ibm,phb-index = <0x05>; + compatible = "ibm,power9-phb-stack"; + status = "okay"; + ibm,lane-eq = <0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x77777777 0x77777777 0x77777777 0x77777777>; + }; + }; + + psihb@5012900 { + reg = <0x5012900 0x100>; + compatible = "ibm,power9-psihb-x\0ibm,psihb-x"; + }; + + vas@3011800 { + reg = <0x3011800 0x300>; + compatible = "ibm,power9-vas-x"; + ibm,vas-id = <0x00>; + }; + + xive@5013000 { + reg = <0x5013000 0x300>; + compatible = "ibm,power9-xive-x"; + force-assign-bars; + }; + }; +}; diff --git a/src/mainboard/raptor-cs/talos-2/2-cpus.dts b/src/mainboard/raptor-cs/talos-2/2-cpus.dts new file mode 100644 index 00000000000..d22660f23f3 --- /dev/null +++ b/src/mainboard/raptor-cs/talos-2/2-cpus.dts @@ -0,0 +1,372 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +/* This file adds nodes specific to systems with 2 CPUs */ + +/dts-v1/; + +/include/ "1-cpu.dts" + +/ { + bmc { + sensors { + sensor@4 { + compatible = "ibm,ipmi-sensor"; + ibm,chip-id = <0x08>; + ipmi-sensor-type = <0x07>; + reg = <0x04>; + }; + + sensor@7 { + compatible = "ibm,ipmi-sensor"; + ibm,chip-id = <0x08>; + ipmi-sensor-type = <0x01>; + reg = <0x07>; + }; + + sensor@9 { + compatible = "ibm,ipmi-sensor"; + ibm,chip-id = <0x08>; + ipmi-sensor-type = <0x07>; + reg = <0x09>; + }; + }; + }; + + ibm,pcie-slots { + root-complex@8,0 { + #address-cells = <0x02>; + #size-cells = <0x00>; + compatible = "ibm,pcie-port", "ibm,pcie-root-port"; + ibm,chip-id = <0x08>; + ibm,slot-label = "SLOT2"; + reg = <0x08 0x00>; + + pluggable { + ibm,slot-label = "SLOT2"; + lane-mask = <0xffff>; + max-power = <0x00>; + mrw-slot-id = <0x02>; + }; + }; + + root-complex@8,3 { + #address-cells = <0x02>; + #size-cells = <0x00>; + compatible = "ibm,pcie-port", "ibm,pcie-root-port"; + ibm,chip-id = <0x08>; + ibm,slot-label = "SLOT1"; + reg = <0x08 0x03>; + + pluggable { + ibm,slot-label = "SLOT1"; + lane-mask = <0xff00>; + max-power = <0x00>; + mrw-slot-id = <0x01>; + }; + }; + + root-complex@8,4 { + #address-cells = <0x02>; + #size-cells = <0x00>; + compatible = "ibm,pcie-port", "ibm,pcie-root-port"; + ibm,chip-id = <0x08>; + ibm,slot-label = "SLOT0"; + reg = <0x08 0x04>; + + pluggable { + ibm,slot-label = "SLOT0"; + lane-mask = <0xf0>; + max-power = <0x00>; + }; + }; + + root-complex@8,5 { + #address-cells = <0x02>; + #size-cells = <0x00>; + compatible = "ibm,pcie-port", "ibm,pcie-root-port"; + ibm,chip-id = <0x08>; + lane-mask = <0x0f>; + reg = <0x08 0x05>; + + switch-up@10b5,8725 { + #address-cells = <0x01>; + #size-cells = <0x00>; + ibm,pluggable; + reg = <0x10b5 0x8725>; + upstream-port = <0x01>; + + down-port@4 { + compatible = "ibm,pcie-port"; + ibm,pluggable; + ibm,slot-label = "GPU3"; + reg = <0x04>; + + builtin { + ibm,slot-label = "GPU3"; + }; + }; + + down-port@5 { + compatible = "ibm,pcie-port"; + ibm,pluggable; + ibm,slot-label = "GPU4"; + reg = <0x05>; + + builtin { + ibm,slot-label = "GPU4"; + }; + }; + + down-port@d { + compatible = "ibm,pcie-port"; + ibm,pluggable; + ibm,slot-label = "GPU5"; + reg = <0x0d>; + + builtin { + ibm,slot-label = "GPU5"; + }; + }; + }; + }; + }; + + xscom@623fc00000000 { + #address-cells = <0x01>; + #size-cells = <0x01>; + bus-frequency = <0x00 0x1bce39a0>; + compatible = "ibm,xscom\0ibm,power9-xscom"; + ibm,chip-id = <0x08>; + ibm,proc-chip-id = <0x01>; + reg = <0x623fc 0x00 0x08 0x00>; + scom-controller; + + chiptod@40000 { + reg = <0x40000 0x34>; + compatible = "ibm,power-chiptod\0ibm,power9-chiptod"; + secondary; + }; + + i2cm@a1000 { + #address-cells = <0x01>; + #size-cells = <0x00>; + chip-engine# = <0x01>; + clock-frequency = <0x6f38e68>; + compatible = "ibm,power8-i2cm\0ibm,power9-i2cm"; + reg = <0xa1000 0x1000>; + + i2c-bus@0 { + #address-cells = <0x01>; + #size-cells = <0x00>; + bus-frequency = <0xf4240>; + compatible = "ibm,opal-i2c\0ibm,power8-i2c-port\0ibm,power9-i2c-port"; + reg = <0x00>; + + eeprom@50 { + compatible = "atmel,24c512"; + label = "module-vpd"; + link-id = <0x10000>; + reg = <0x50>; + }; + }; + + i2c-bus@2 { + #address-cells = <0x01>; + #size-cells = <0x00>; + bus-frequency = <0xf4240>; + compatible = "ibm,opal-i2c\0ibm,power8-i2c-port\0ibm,power9-i2c-port"; + reg = <0x02>; + + eeprom@50 { + compatible = "atmel,24c128"; + label = "module-vpd"; + link-id = <0x1000b>; + reg = <0x50>; + }; + }; + }; + + i2cm@a3000 { + #address-cells = <0x01>; + #size-cells = <0x00>; + chip-engine# = <0x03>; + clock-frequency = <0x6f38e68>; + compatible = "ibm,power8-i2cm\0ibm,power9-i2cm"; + reg = <0xa3000 0x1000>; + + i2c-bus@0 { + #address-cells = <0x01>; + #size-cells = <0x00>; + bus-frequency = <0x61a80>; + compatible = "ibm,opal-i2c\0ibm,power8-i2c-port\0ibm,power9-i2c-port"; + reg = <0x00>; + + eeprom@50 { + compatible = "spd"; + label = "dimm-spd"; + link-id = <0x10011>; + reg = <0x50>; + }; + + eeprom@51 { + compatible = "spd"; + label = "dimm-spd"; + link-id = <0x10009>; + reg = <0x51>; + }; + + eeprom@52 { + compatible = "spd"; + label = "dimm-spd"; + link-id = <0x10001>; + reg = <0x52>; + }; + + eeprom@53 { + compatible = "spd"; + label = "dimm-spd"; + link-id = <0x10002>; + reg = <0x53>; + }; + }; + + i2c-bus@1 { + #address-cells = <0x01>; + #size-cells = <0x00>; + bus-frequency = <0x61a80>; + compatible = "ibm,opal-i2c\0ibm,power8-i2c-port\0ibm,power9-i2c-port"; + reg = <0x01>; + + eeprom@54 { + compatible = "spd"; + label = "dimm-spd"; + link-id = <0x1000a>; + reg = <0x54>; + }; + + eeprom@55 { + compatible = "spd"; + label = "dimm-spd"; + link-id = <0x1000c>; + reg = <0x55>; + }; + + eeprom@56 { + compatible = "spd"; + label = "dimm-spd"; + link-id = <0x1000e>; + reg = <0x56>; + }; + + eeprom@57 { + compatible = "spd"; + label = "dimm-spd"; + link-id = <0x1000f>; + reg = <0x57>; + }; + }; + }; + + nmmu@5012c40 { + compatible = "ibm,power9-nest-mmu"; + reg = <0x5012c40 0x20>; + }; + + nx@2010000 { + compatible = "ibm,power9-nx"; + reg = <0x2010000 0x4000>; + }; + + pbcq@4010c00 { + #address-cells = <0x01>; + #size-cells = <0x00>; + compatible = "ibm,power9-pbcq"; + ibm,hub-id = <0x01>; + ibm,pec-index = <0x00>; + reg = <0x4010c00 0x100 0xd010800 0x200>; + + stack@0 { + compatible = "ibm,power9-phb-stack"; + ibm,lane-eq = <0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x77777777 0x77777777 0x77777777 0x77777777>; + ibm,phb-index = <0x00>; + reg = <0x00>; + status = "okay"; + }; + }; + + pbcq@4011000 { + #address-cells = <0x01>; + #size-cells = <0x00>; + compatible = "ibm,power9-pbcq"; + ibm,hub-id = <0x01>; + ibm,pec-index = <0x01>; + reg = <0x4011000 0x100 0xe010800 0x200>; + + stack@0 { + compatible = "ibm,power9-phb-stack"; + ibm,lane-eq = <0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x77777777 0x77777777 0x77777777 0x77777777>; + ibm,phb-index = <0x01>; + reg = <0x00>; + status = "okay"; + }; + + stack@1 { + compatible = "ibm,power9-phb-stack"; + ibm,lane-eq = <0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x77777777 0x77777777 0x77777777 0x77777777>; + ibm,phb-index = <0x02>; + reg = <0x01>; + status = "okay"; + }; + }; + + pbcq@4011400 { + #address-cells = <0x01>; + #size-cells = <0x00>; + compatible = "ibm,power9-pbcq"; + ibm,hub-id = <0x01>; + ibm,pec-index = <0x02>; + reg = <0x4011400 0x100 0xf010800 0x200>; + + stack@0 { + compatible = "ibm,power9-phb-stack"; + ibm,lane-eq = <0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x77777777 0x77777777 0x77777777 0x77777777>; + ibm,phb-index = <0x03>; + reg = <0x00>; + status = "okay"; + }; + + stack@1 { + compatible = "ibm,power9-phb-stack"; + ibm,lane-eq = <0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x77777777 0x77777777 0x77777777 0x77777777>; + ibm,phb-index = <0x04>; + reg = <0x01>; + status = "disabled"; + }; + + stack@2 { + compatible = "ibm,power9-phb-stack"; + ibm,lane-eq = <0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x54545454 0x77777777 0x77777777 0x77777777 0x77777777>; + ibm,phb-index = <0x05>; + reg = <0x02>; + status = "disabled"; + }; + }; + + psihb@5012900 { + reg = <0x5012900 0x100>; + compatible = "ibm,power9-psihb-x\0ibm,psihb-x"; + }; + + vas@3011800 { + reg = <0x3011800 0x300>; + compatible = "ibm,power9-vas-x"; + ibm,vas-id = <0x01>; + }; + + xive@5013000 { + reg = <0x5013000 0x300>; + compatible = "ibm,power9-xive-x"; + force-assign-bars; + }; + }; +}; diff --git a/src/mainboard/raptor-cs/talos-2/Kconfig b/src/mainboard/raptor-cs/talos-2/Kconfig new file mode 100644 index 00000000000..e4fd4d3aeba --- /dev/null +++ b/src/mainboard/raptor-cs/talos-2/Kconfig @@ -0,0 +1,92 @@ +## SPDX-License-Identifier: GPL-2.0-only + +if BOARD_RAPTOR_CS_TALOS_2 + +config BOOTBLOCK_IN_SEEPROM + bool "Bootblock in SEEPROM" + default n + help + Enable this option if coreboot shall build image with separate + bootblock (i.e. not in coreboot.rom) to be put into SEEPROM + directly. + +config BOARD_SPECIFIC_OPTIONS + def_bool y + select CPU_POWER9 + select CPU_IBM_POWER9 + select BOARD_ROMSIZE_KB_1024 + select ARCH_BOOTBLOCK_PPC64 + select ARCH_VERSTAGE_PPC64 + select ARCH_ROMSTAGE_PPC64 + select ARCH_RAMSTAGE_PPC64 + select SUPERIO_ASPEED_AST2400 + select BOOT_DEVICE_NOT_SPI_FLASH + select MISSING_BOARD_RESET + select HAVE_DEBUG_RAM_SETUP + select IPMI_BT + select IPMI_BT_ROMSTAGE + select FLATTENED_DEVICE_TREE + +config TALOS_2_INFINEON_TPM_1 + bool "I2C TPM1 chip compatible with SLB9635TT" + default n + select I2C_TPM + select MAINBOARD_HAS_TPM1 + select TPM_MEASURED_BOOT + select TPM_LOG_TPM2 + +config DRIVER_TPM_I2C_BUS + hex + default 0x02 + depends on TALOS_2_INFINEON_TPM_1 + +config DRIVER_TPM_I2C_ADDR + hex "I2C TPM chip address" + default 0x20 + depends on TALOS_2_INFINEON_TPM_1 + +config MEMLAYOUT_LD_FILE + string + default "src/mainboard/\$(CONFIG_MAINBOARD_DIR)/memlayout.ld" + +config FMDFILE + string + default "src/mainboard/\$(CONFIG_MAINBOARD_DIR)/board-bootblock-in-seeprom.fmd" if BOOTBLOCK_IN_SEEPROM + default "src/mainboard/\$(CONFIG_MAINBOARD_DIR)/board.fmd" if !BOOTBLOCK_IN_SEEPROM + + +config MAINBOARD_DIR + string + default "raptor-cs/talos-2" + +config HEAP_SIZE + hex + default 0x200000 + +config MAINBOARD_PART_NUMBER + string + default "Talos II" + +# I'm not sure how to deal with 2 CPUs with regard to DIMM count, but that's +# a problem for later. +config DIMM_MAX + int + default 8 + +config DIMM_SPD_SIZE + int + default 512 + +config MAX_CPUS + int "Maximum number of CPUs to use (1-2)" + default 2 + +config MAINBOARD_VENDOR + string + default "Raptor CS" + +config DRAM_SIZE_MB + int + default 32768 + +endif # BOARD_RAPTOR_CS_TALOS_2 diff --git a/src/mainboard/raptor-cs/talos-2/Kconfig.name b/src/mainboard/raptor-cs/talos-2/Kconfig.name new file mode 100644 index 00000000000..8a75417628d --- /dev/null +++ b/src/mainboard/raptor-cs/talos-2/Kconfig.name @@ -0,0 +1,2 @@ +config BOARD_RAPTOR_CS_TALOS_2 + bool "Talos II" diff --git a/src/mainboard/raptor-cs/talos-2/Makefile.inc b/src/mainboard/raptor-cs/talos-2/Makefile.inc new file mode 100644 index 00000000000..d55179f8d44 --- /dev/null +++ b/src/mainboard/raptor-cs/talos-2/Makefile.inc @@ -0,0 +1,3 @@ +## SPDX-License-Identifier: GPL-2.0-only + +romstage-y += cbmem.c diff --git a/src/mainboard/raptor-cs/talos-2/board-bootblock-in-seeprom.fmd b/src/mainboard/raptor-cs/talos-2/board-bootblock-in-seeprom.fmd new file mode 100644 index 00000000000..8884751120f --- /dev/null +++ b/src/mainboard/raptor-cs/talos-2/board-bootblock-in-seeprom.fmd @@ -0,0 +1,15 @@ +# layout for firmware when flash address space matches used address layout +# +-------------+ <-- 0 +# | unspecified | +# +-------------+ <-- BIOS_BASE +# | FMAP | +# +-------------+ <-- BIOS_BASE + 128K + FMAP_SIZE +# | CBFS | +# +-------------+ <-- ROM_SIZE + +FLASH@0 CONFIG_ROM_SIZE { + BIOS@0 CONFIG_ROM_SIZE { + FMAP 0x200 + COREBOOT(CBFS) + } +} diff --git a/src/mainboard/raptor-cs/talos-2/board.fmd b/src/mainboard/raptor-cs/talos-2/board.fmd new file mode 100644 index 00000000000..462cdea7512 --- /dev/null +++ b/src/mainboard/raptor-cs/talos-2/board.fmd @@ -0,0 +1,18 @@ +# layout for firmware when flash address space matches used address layout +# +-------------+ <-- 0 +# | unspecified | +# +-------------+ <-- BIOS_BASE +# | bootblock | +# +-------------+ <-- BIOS_BASE + 128K +# | FMAP | +# +-------------+ <-- BIOS_BASE + 128K + FMAP_SIZE +# | CBFS | +# +-------------+ <-- ROM_SIZE + +FLASH@0 CONFIG_ROM_SIZE { + BIOS@0 CONFIG_ROM_SIZE { + BOOTBLOCK 128K + FMAP 0x200 + COREBOOT(CBFS) + } +} diff --git a/src/mainboard/raptor-cs/talos-2/board_info.txt b/src/mainboard/raptor-cs/talos-2/board_info.txt new file mode 100644 index 00000000000..aa2269185be --- /dev/null +++ b/src/mainboard/raptor-cs/talos-2/board_info.txt @@ -0,0 +1,2 @@ +Board name: Raptor CS Talos II +Category: desktop diff --git a/src/mainboard/raptor-cs/talos-2/cbmem.c b/src/mainboard/raptor-cs/talos-2/cbmem.c new file mode 100644 index 00000000000..15c20f8de42 --- /dev/null +++ b/src/mainboard/raptor-cs/talos-2/cbmem.c @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include + +void *cbmem_top_chipset(void) +{ + /* Top of cbmem is at lowest usable DRAM address below 4GiB. */ + /* For now, last 1M of 4G */ + void *ptr = (void *) ((1ULL << 32) - 1048576); + return ptr; +} diff --git a/src/mainboard/raptor-cs/talos-2/devicetree.cb b/src/mainboard/raptor-cs/talos-2/devicetree.cb new file mode 100644 index 00000000000..eec041b2f88 --- /dev/null +++ b/src/mainboard/raptor-cs/talos-2/devicetree.cb @@ -0,0 +1,9 @@ +## SPDX-License-Identifier: GPL-2.0-only + +chip soc/ibm/power9 + device cpu_cluster 0 on end + + chip drivers/ipmi + device pnp e4.0 on end # IPMI BT + end +end diff --git a/src/mainboard/raptor-cs/talos-2/mainboard.c b/src/mainboard/raptor-cs/talos-2/mainboard.c new file mode 100644 index 00000000000..a1a35f08339 --- /dev/null +++ b/src/mainboard/raptor-cs/talos-2/mainboard.c @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include + +static void mainboard_enable(struct device *dev) +{ + if (!dev) + die("No dev0; die\n"); +} + +struct chip_operations mainboard_ops = { + .enable_dev = mainboard_enable, +}; diff --git a/src/mainboard/raptor-cs/talos-2/memlayout.ld b/src/mainboard/raptor-cs/talos-2/memlayout.ld new file mode 100644 index 00000000000..749a920964f --- /dev/null +++ b/src/mainboard/raptor-cs/talos-2/memlayout.ld @@ -0,0 +1,80 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include + +#include + +SECTIONS +{ + DRAM_START(0x0) + + /* + * On POWER, 0 is wherever HRMOR points to rather than physical DRAM start. + * HRMOR is ORed with address, not added to it, meaning that memory space + * overlaps after 2^(least significant set bit of HRMOR). This becomes + * chaotic when nonconsecutive bits are set... + * + * Two and a half possible cases: + * 0. bootblock started with QEMU in hb-mode + * - NIA = 0x10 (bug?) + * - HRMOR = 0x08000000 (128M) + * - no physical memory to enable/train, everything accessible from start + * 1. bootblock loaded by HBBL + * - NIA = 0 + * - HRMOR = 0xF8000000 (4G - 128M) + * - initialized L3 = 0x400000 (4M) + * - top address before RAM = 0xF8400000 + * 2. bootblock in SEEPROM, loaded by SBE + * - NIA = 0x3000 (placeholder for int. vectors) + * - HRMOR = 0xF8200000 (4G - 128 M + 2 M) + * - initialized L3 = 0x8000 (bootblock/HBBL size = 32K) + * - no way 32K will be enough, must initialize more L3 in bootblock + * - HRMOR still applies, so memory overlaps every 2M + * + * Common subset (assuming 2. initializes as much memory as possible) is + * 0xF8200000-0xF8400000. 2M should be more than enough for pre-RAM code, + * but it isn't enough to load ramstage. We could implement postcar stage, + * but KISS: initialize L3 from _ebootblock to 0xF8980000: up to 9.5M into + * cache, leaving bottom 2M (0xF8000000-0xF8200000) either uninitialized + * (when started from SEEPROM) or just unused for anything but bootblock + * (loaded by HBBL). Last 0.5M of L3 cache is left for interrupt vectors + * normally located at address 0. + * + * Set HRMOR to 0 before jumping to C code in bootblock and forget it even + * exists. + * + * For QEMU s/0xF8/0x08/ in above description but code remains the same. + * L3 initialization is unnecessary in this case but won't break anything. + * + * TODO: there is a structure with SBE->HBBL data at 0 in 2nd option. It + * holds some useful data like XSCOM BAR and LPC BAR. If, for any reason, + * these addresses are different than default, they should be used instead + * of predefined values. + */ + +#if !CONFIG(BOOTBLOCK_IN_SEEPROM) + BOOTBLOCK( 0xF8000000, 32K) +#else + BOOTBLOCK( 0xF8203000, 20K) +#endif + + STACK( 0xF8208000, 32K) + PRERAM_CBMEM_CONSOLE(0xF8210000, 128K) + FMAP_CACHE( 0xF8230000, 4K) + CBFS_MCACHE( 0xF8231000, 8K) + TIMESTAMP( 0xF8233000, 4K) + TPM_LOG( 0xF8234000, 2K) + + ROMSTAGE( 0xF8240000, 256K) + + REGION(mvpd_cache, 0xF8280000, 128K, 8) + + /* + * bootblock_crt0.S assumes this is the last part of L3, leaving for + * interrupt vectors at least 0.5M because of cache associativity. If + * more CBFS_CACHE is needed, split this into pre-/postram caches. + */ + CBFS_CACHE( 0xF8380000, 6M) + + RAMSTAGE( 0xF9000000, 3M) +} diff --git a/src/security/tpm/Kconfig b/src/security/tpm/Kconfig index fc339a2b3af..7f2aa9c5fce 100644 --- a/src/security/tpm/Kconfig +++ b/src/security/tpm/Kconfig @@ -94,6 +94,48 @@ config TPM_MEASURED_BOOT help Enables measured boot (experimental) +choice + prompt "TPM event log format" + depends on TPM_MEASURED_BOOT + default TPM_LOG_TPM1 if TPM1 + default TPM_LOG_TPM2 if TPM2 + +config TPM_LOG_CB + bool "coreboot's custom format" + help + Custom coreboot-specific format of the log derived from TPM1 log format. +config TPM_LOG_TPM1 + bool "TPM 1.2 format" + depends on TPM1 + help + Log per TPM 1.2 specification. + See "TCG PC Client Specific Implementation Specification for Conventional BIOS". +config TPM_LOG_TPM2 + bool "TPM 2.0 format" + depends on TPM2 || TPM1 + help + Log per TPM 2.0 specification. + See "TCG PC Client Platform Firmware Profile Specification". + +endchoice + +choice + prompt "TPM2 hashing algorithm" + depends on TPM_MEASURED_BOOT && TPM_LOG_TPM2 + default TPM_HASH_SHA1 if TPM1 + default TPM_HASH_SHA256 if TPM2 + +config TPM_HASH_SHA1 + bool "SHA1" +config TPM_HASH_SHA256 + bool "SHA256" +config TPM_HASH_SHA384 + bool "SHA384" +config TPM_HASH_SHA512 + bool "SHA512" + +endchoice + config TPM_MEASURED_BOOT_INIT_BOOTBLOCK bool depends on TPM_MEASURED_BOOT && !VBOOT diff --git a/src/security/tpm/Makefile.inc b/src/security/tpm/Makefile.inc index 8f633a89bf7..ae06cb0ea6c 100644 --- a/src/security/tpm/Makefile.inc +++ b/src/security/tpm/Makefile.inc @@ -55,10 +55,22 @@ romstage-y += tspi/crtm.c ramstage-y += tspi/crtm.c postcar-y += tspi/crtm.c -ramstage-y += tspi/log.c -romstage-y += tspi/log.c -verstage-y += tspi/log.c -postcar-y += tspi/log.c -bootblock-y += tspi/log.c +ramstage-$(CONFIG_TPM_LOG_CB) += tspi/log.c +romstage-$(CONFIG_TPM_LOG_CB) += tspi/log.c +verstage-$(CONFIG_TPM_LOG_CB) += tspi/log.c +postcar-$(CONFIG_TPM_LOG_CB) += tspi/log.c +bootblock-$(CONFIG_TPM_LOG_CB) += tspi/log.c + +ramstage-$(CONFIG_TPM_LOG_TPM1) += tspi/log-tpm1.c +romstage-$(CONFIG_TPM_LOG_TPM1) += tspi/log-tpm1.c +verstage-$(CONFIG_TPM_LOG_TPM1) += tspi/log-tpm1.c +postcar-$(CONFIG_TPM_LOG_TPM1) += tspi/log-tpm1.c +bootblock-$(CONFIG_TPM_LOG_TPM1) += tspi/log-tpm1.c + +ramstage-$(CONFIG_TPM_LOG_TPM2) += tspi/log-tpm2.c +romstage-$(CONFIG_TPM_LOG_TPM2) += tspi/log-tpm2.c +verstage-$(CONFIG_TPM_LOG_TPM2) += tspi/log-tpm2.c +postcar-$(CONFIG_TPM_LOG_TPM2) += tspi/log-tpm2.c +bootblock-$(CONFIG_TPM_LOG_TPM2) += tspi/log-tpm2.c endif # CONFIG_TPM_MEASURED_BOOT diff --git a/src/security/tpm/tpm1_log_serialized.h b/src/security/tpm/tpm1_log_serialized.h new file mode 100644 index 00000000000..a8b7a60798a --- /dev/null +++ b/src/security/tpm/tpm1_log_serialized.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ + +#ifndef __TPM1_LOG_SERIALIZED_H__ +#define __TPM1_LOG_SERIALIZED_H__ + +#include +#include +#include + +#define TPM_1_LOG_DIGEST_MAX_LENGTH 20 +#define TPM_1_LOG_DATA_MAX_LENGTH 50 + +#define TPM_1_LOG_VI_MAGIC 0x31544243 /* "CBT1" in LE */ +#define TPM_1_LOG_VI_MAJOR 1 +#define TPM_1_LOG_VI_MINOR 0 + +struct tpm_1_log_entry { + uint32_t pcr; + uint32_t event_type; + uint8_t digest[TPM_1_LOG_DIGEST_MAX_LENGTH]; + uint32_t data_length; + uint8_t data[TPM_1_LOG_DATA_MAX_LENGTH]; +} __packed; + +struct tpm_1_vendor { + uint8_t reserved; + uint8_t version_major; + uint8_t version_minor; + uint32_t magic; + uint16_t max_entries; + uint16_t num_entries; + uint32_t entry_size; +} __packed; + +struct tpm_1_log_table { + /* The first entry of the log is inlined and describes the log itself */ + uint32_t pcr; + uint32_t event_type; + uint8_t digest[TPM_1_LOG_DIGEST_MAX_LENGTH]; + uint32_t spec_id_size; + struct spec_id_event_data spec_id; + struct tpm_1_vendor vendor; + + struct tpm_1_log_entry entries[0]; /* Variable number of entries */ +} __packed; + +#endif diff --git a/src/security/tpm/tpm2_log_serialized.h b/src/security/tpm/tpm2_log_serialized.h new file mode 100644 index 00000000000..2b4e43c6354 --- /dev/null +++ b/src/security/tpm/tpm2_log_serialized.h @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ + +#ifndef __TPM2_LOG_SERIALIZED_H__ +#define __TPM2_LOG_SERIALIZED_H__ + +#include + +#define TPM_20_SPEC_ID_EVENT_SIGNATURE "Spec ID Event03" +#define TPM_20_LOG_DATA_MAX_LENGTH 50 + +#define TPM_20_LOG_VI_MAGIC 0x32544243 /* "CBT2" in LE */ +#define TPM_20_LOG_VI_MAJOR 1 +#define TPM_20_LOG_VI_MINOR 0 + +/* + * TPM2.0 log entries can't be generally represented as C structures due to + * varying number of digests and their sizes. However, it works as long as + * we're only using single kind of digests. + */ +#if CONFIG(TPM_LOG_TPM2) +# if CONFIG(TPM_HASH_SHA1) +# define TPM_20_LOG_DIGEST_MAX_LENGTH SHA1_DIGEST_SIZE +# endif +# if CONFIG(TPM_HASH_SHA256) +# define TPM_20_LOG_DIGEST_MAX_LENGTH SHA256_DIGEST_SIZE +# endif +# if CONFIG(TPM_HASH_SHA384) +# define TPM_20_LOG_DIGEST_MAX_LENGTH SHA384_DIGEST_SIZE +# endif +# if CONFIG(TPM_HASH_SHA512) +# define TPM_20_LOG_DIGEST_MAX_LENGTH SHA512_DIGEST_SIZE +# endif + +# ifndef TPM_20_LOG_DIGEST_MAX_LENGTH +# error "Misconfiguration: failed to determine TPM hashing algorithm" +# endif +#else +# define TPM_20_LOG_DIGEST_MAX_LENGTH 1 /* To avoid compilation error */ +#endif + +/* TCG_PCR_EVENT2 */ +struct tpm_2_log_entry { + uint32_t pcr; + uint32_t event_type; + uint32_t digest_count; /* Always 1 in current implementation */ + uint16_t digest_type; + uint8_t digest[TPM_20_LOG_DIGEST_MAX_LENGTH]; + uint32_t data_length; + uint8_t data[TPM_20_LOG_DATA_MAX_LENGTH]; +} __packed; + +struct tpm_2_vendor { + uint8_t reserved; + uint8_t version_major; + uint8_t version_minor; + uint32_t magic; + uint16_t max_entries; + uint16_t num_entries; + uint32_t entry_size; +} __packed; + +struct tpm_2_log_table { + struct tcg_efi_spec_id_event header; /* TCG_PCR_EVENT actually */ + struct tpm_digest_sizes digest_sizes[1]; + uint8_t vendor_info_size; + struct tpm_2_vendor vendor; + struct tpm_2_log_entry entries[0]; /* Variable number of entries */ +} __packed; + +#endif diff --git a/src/security/tpm/tspi.h b/src/security/tpm/tspi.h index 7157b4d7303..33f363cab6c 100644 --- a/src/security/tpm/tspi.h +++ b/src/security/tpm/tspi.h @@ -3,43 +3,130 @@ #ifndef TSPI_H_ #define TSPI_H_ +#include +#include +#include #include -#include +#include #include #include #define TPM_PCR_MAX_LEN 64 #define HASH_DATA_CHUNK_SIZE 1024 +#define MAX_TPM_LOG_ENTRIES 50 +/* Assumption of 2K TCPA log size reserved for CAR/SRAM */ +#define MAX_PRERAM_TPM_LOG_ENTRIES 15 /** * Get the pointer to the single instance of global - * tcpa log data, and initialize it when necessary + * TPM log data, and initialize it when necessary */ -struct tcpa_table *tcpa_log_init(void); +void *tpm_log_init(void); /** - * Clears the pre-RAM tcpa log data and initializes + * Get the pointer to the single CBMEM instance of global + * TPM log data, and initialize it when necessary + */ +static inline void *tpm_log_cbmem_init(void) +{ + if (CONFIG(TPM_LOG_CB)) + return tpm_cb_log_cbmem_init(); + if (CONFIG(TPM_LOG_TPM1)) + return tpm1_log_cbmem_init(); + if (CONFIG(TPM_LOG_TPM2)) + return tpm2_log_cbmem_init(); + return NULL; +} + +/** + * Clears the pre-RAM TPM log data and initializes * any content with default values */ -void tcpa_preram_log_clear(void); +static inline void tpm_preram_log_clear(void) +{ + if (CONFIG(TPM_LOG_CB)) + tpm_cb_preram_log_clear(); + else if (CONFIG(TPM_LOG_TPM1)) + tpm1_preram_log_clear(); + else if (CONFIG(TPM_LOG_TPM2)) + tpm2_preram_log_clear(); +} + +/** + * Retrieves number of entries currently stored in the log. + */ +static inline uint16_t tpm_log_get_size(const void *log_table) +{ + if (CONFIG(TPM_LOG_CB)) + return tpm_cb_log_get_size(log_table); + if (CONFIG(TPM_LOG_TPM1)) + return tpm1_log_get_size(log_table); + if (CONFIG(TPM_LOG_TPM2)) + return tpm2_log_get_size(log_table); + return 0; +} + +/** + * Copies data from pre-RAM TPM log to CBMEM (RAM) log + */ +static inline void tpm_log_copy_entries(const void *from, void *to) +{ + if (CONFIG(TPM_LOG_CB)) + tpm_cb_log_copy_entries(from, to); + else if (CONFIG(TPM_LOG_TPM1)) + tpm1_log_copy_entries(from, to); + else if (CONFIG(TPM_LOG_TPM2)) + tpm2_log_copy_entries(from, to); +} + +/** + * Retrieves an entry from a log. Returns non-zero on invalid index or error. + */ +static inline int tpm_log_get(int entry_idx, int *pcr, const uint8_t **digest_data, + enum vb2_hash_algorithm *digest_algo, const char **event_name) +{ + if (CONFIG(TPM_LOG_CB)) + return tpm_cb_log_get(entry_idx, pcr, digest_data, digest_algo, event_name); + if (CONFIG(TPM_LOG_TPM1)) + return tpm1_log_get(entry_idx, pcr, digest_data, digest_algo, event_name); + if (CONFIG(TPM_LOG_TPM2)) + return tpm2_log_get(entry_idx, pcr, digest_data, digest_algo, event_name); + return 1; +} /** - * Add table entry for cbmem TCPA log. + * Add table entry for cbmem TPM log. * @param name Name of the hashed data * @param pcr PCR used to extend hashed data * @param diget_algo sets the digest algorithm * @param digest sets the hash extended into the tpm * @param digest_len the length of the digest */ -void tcpa_log_add_table_entry(const char *name, const uint32_t pcr, - enum vb2_hash_algorithm digest_algo, - const uint8_t *digest, - const size_t digest_len); +static inline void tpm_log_add_table_entry(const char *name, const uint32_t pcr, + enum vb2_hash_algorithm digest_algo, + const uint8_t *digest, + const size_t digest_len) +{ + if (CONFIG(TPM_LOG_CB)) + tpm_cb_log_add_table_entry(name, pcr, digest_algo, digest, digest_len); + else if (CONFIG(TPM_LOG_TPM1)) + tpm1_log_add_table_entry(name, pcr, digest_algo, digest, digest_len); + else if (CONFIG(TPM_LOG_TPM2)) + tpm2_log_add_table_entry(name, pcr, digest_algo, digest, digest_len); +} /** - * Dump TCPA log entries on console + * Dump TPM log entries on console */ -void tcpa_log_dump(void *unused); +static inline void tpm_log_dump(void *unused) +{ + if (CONFIG(TPM_LOG_CB)) + tpm_cb_log_dump(); + else if (CONFIG(TPM_LOG_TPM1)) + tpm1_log_dump(); + else if (CONFIG(TPM_LOG_TPM2)) + tpm2_log_dump(); +} /** * Ask vboot for a digest and extend a TPM PCR with it. diff --git a/src/security/tpm/tspi/crtm.c b/src/security/tpm/tspi/crtm.c index 24133d9f9fc..a7efcf21453 100644 --- a/src/security/tpm/tspi/crtm.c +++ b/src/security/tpm/tspi/crtm.c @@ -2,15 +2,17 @@ #include #include +#include #include +#include #include "crtm.h" #include -static int tcpa_log_initialized; -static inline int tcpa_log_available(void) +static int tpm_log_initialized; +static inline int tpm_log_available(void) { if (ENV_BOOTBLOCK) - return tcpa_log_initialized; + return tpm_log_initialized; return 1; } @@ -33,10 +35,10 @@ static inline int tcpa_log_available(void) */ static uint32_t tspi_init_crtm(void) { - /* Initialize TCPA PRERAM log. */ - if (!tcpa_log_available()) { - tcpa_preram_log_clear(); - tcpa_log_initialized = 1; + /* Initialize TPM PRERAM log. */ + if (!tpm_log_available()) { + tpm_preram_log_clear(); + tpm_log_initialized = 1; } else { printk(BIOS_WARNING, "TSPI: CRTM already initialized!\n"); return VB2_SUCCESS; @@ -109,9 +111,9 @@ static bool is_runtime_data(const char *name) uint32_t tspi_cbfs_measurement(const char *name, uint32_t type, const struct vb2_hash *hash) { uint32_t pcr_index; - char tcpa_metadata[TCPA_PCR_HASH_NAME]; + char tpm_log_metadata[TPM_CB_LOG_PCR_HASH_NAME]; - if (!tcpa_log_available()) { + if (!tpm_log_available()) { if (tspi_init_crtm() != VB2_SUCCESS) { printk(BIOS_WARNING, "Initializing CRTM failed!\n"); @@ -142,45 +144,85 @@ uint32_t tspi_cbfs_measurement(const char *name, uint32_t type, const struct vb2 break; } - snprintf(tcpa_metadata, TCPA_PCR_HASH_NAME, "CBFS: %s", name); + snprintf(tpm_log_metadata, TPM_CB_LOG_PCR_HASH_NAME, "CBFS: %s", name); return tpm_extend_pcr(pcr_index, hash->algo, hash->raw, vb2_digest_size(hash->algo), - tcpa_metadata); + tpm_log_metadata); +} + +void *tpm_log_init(void) +{ + static void *tclt; + + /* We are dealing here with pre CBMEM environment. + * If cbmem isn't available use CAR or SRAM */ + if (!cbmem_possibly_online() && + !CONFIG(VBOOT_RETURN_FROM_VERSTAGE)) + return _tpm_log; + else if (ENV_CREATES_CBMEM + && !CONFIG(VBOOT_RETURN_FROM_VERSTAGE)) { + tclt = tpm_log_cbmem_init(); + if (!tclt) + return _tpm_log; + } else { + tclt = tpm_log_cbmem_init(); + } + + return tclt; } int tspi_measure_cache_to_pcr(void) { int i; - struct tcpa_table *tclt = tcpa_log_init(); + int pcr; + const char *event_name; + const uint8_t *digest_data; + enum vb2_hash_algorithm digest_algo; /* This means the table is empty. */ - if (!tcpa_log_available()) + if (!tpm_log_available()) return VB2_SUCCESS; - if (!tclt) { - printk(BIOS_WARNING, "TCPA: Log non-existent!\n"); + if (tpm_log_init() == NULL) { + printk(BIOS_WARNING, "TPM LOG: log non-existent!\n"); return VB2_ERROR_UNKNOWN; } - printk(BIOS_DEBUG, "TPM: Write digests cached in TCPA log to PCR\n"); - for (i = 0; i < tclt->num_entries; i++) { - struct tcpa_entry *tce = &tclt->entries[i]; - if (tce) { - printk(BIOS_DEBUG, "TPM: Write digest for" - " %s into PCR %d\n", - tce->name, tce->pcr); - int result = tlcl_extend(tce->pcr, - tce->digest, - NULL); - if (result != TPM_SUCCESS) { - printk(BIOS_ERR, "TPM: Writing digest" - " of %s into PCR failed with error" - " %d\n", - tce->name, result); - return VB2_ERROR_UNKNOWN; - } + printk(BIOS_DEBUG, "TPM: Write digests cached in TPM log to PCR\n"); + i = 0; + while (!tpm_log_get(i++, &pcr, &digest_data, &digest_algo, &event_name)) { + printk(BIOS_DEBUG, "TPM: Write digest for %s into PCR %d\n", event_name, pcr); + int result = tlcl_extend(pcr, digest_data, digest_algo); + if (result != TPM_SUCCESS) { + printk(BIOS_ERR, + "TPM: Writing digest of %s into PCR failed with error %d\n", + event_name, result); + return VB2_ERROR_UNKNOWN; } } return VB2_SUCCESS; } + +#if !CONFIG(VBOOT_RETURN_FROM_VERSTAGE) +static void recover_tpm_log(int is_recovery) +{ + const void *preram_log = _tpm_log; + void *ram_log = tpm_log_cbmem_init(); + + if (tpm_log_get_size(preram_log) > MAX_PRERAM_TPM_LOG_ENTRIES) { + printk(BIOS_WARNING, "TPM LOG: pre-RAM log is too full, possible corruption\n"); + return; + } + + if (ram_log == NULL) { + printk(BIOS_WARNING, "TPM LOG: CBMEM not available, something went wrong\n"); + return; + } + + tpm_log_copy_entries(_tpm_log, ram_log); +} +CBMEM_CREATION_HOOK(recover_tpm_log); +#endif + +BOOT_STATE_INIT_ENTRY(BS_PAYLOAD_BOOT, BS_ON_ENTRY, tpm_log_dump, NULL); diff --git a/src/security/tpm/tspi/crtm.h b/src/security/tpm/tspi/crtm.h index bd5bc5785d0..ffa4867594c 100644 --- a/src/security/tpm/tspi/crtm.h +++ b/src/security/tpm/tspi/crtm.h @@ -16,10 +16,37 @@ */ #define TPM_RUNTIME_DATA_PCR 3 -#define TPM_MEASURE_ALGO (CONFIG(TPM1) ? VB2_HASH_SHA1 : VB2_HASH_SHA256) +#if CONFIG(TPM_LOG_CB) && CONFIG(TPM1) +# define TPM_MEASURE_ALGO VB2_HASH_SHA1 +#elif CONFIG(TPM_LOG_CB) && CONFIG(TPM2) +# define TPM_MEASURE_ALGO VB2_HASH_SHA256 +#elif CONFIG(TPM_LOG_TPM1) +# define TPM_MEASURE_ALGO VB2_HASH_SHA1 +#elif CONFIG(TPM_LOG_TPM2) +# if CONFIG(TPM_HASH_SHA1) +# define TPM_MEASURE_ALGO VB2_HASH_SHA1 +# endif +# if CONFIG(TPM_HASH_SHA256) +# define TPM_MEASURE_ALGO VB2_HASH_SHA256 +# endif +# if CONFIG(TPM_HASH_SHA384) +# define TPM_MEASURE_ALGO VB2_HASH_SHA384 +# endif +# if CONFIG(TPM_HASH_SHA512) +# define TPM_MEASURE_ALGO VB2_HASH_SHA512 +# endif +#endif + +#if !defined(TPM_MEASURE_ALGO) +# if !CONFIG(TPM_MEASURED_BOOT) +# define TPM_MEASURE_ALGO VB2_HASH_INVALID +# else +# error "Misconfiguration: failed to determine TPM hashing algorithm" +# endif +#endif /** - * Measure digests cached in TCPA log entries into PCRs + * Measure digests cached in TPM log entries into PCRs */ int tspi_measure_cache_to_pcr(void); diff --git a/src/security/tpm/tspi/log-tpm1.c b/src/security/tpm/tspi/log-tpm1.c new file mode 100644 index 00000000000..52944263044 --- /dev/null +++ b/src/security/tpm/tspi/log-tpm1.c @@ -0,0 +1,179 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +/* + * Unlike log.c this implements TPM log according to TPM1.2 specification + * rather than using coreboot-specific log format. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +void *tpm1_log_cbmem_init(void) +{ + static struct tpm_1_log_table *tclt; + if (tclt) + return tclt; + + if (cbmem_possibly_online()) { + size_t tpm_log_len; + struct spec_id_event_data *hdr; + + tclt = cbmem_find(CBMEM_ID_TCPA_TCG_LOG); + if (tclt) + return tclt; + + tpm_log_len = sizeof(*tclt) + MAX_TPM_LOG_ENTRIES * sizeof(tclt->entries[0]); + tclt = cbmem_add(CBMEM_ID_TCPA_TCG_LOG, tpm_log_len); + if (!tclt) + return NULL; + + memset(tclt, 0, sizeof(*tclt)); + hdr = &tclt->spec_id; + + /* Fill in first "header" entry. */ + tclt->event_type = htole32(EV_NO_ACTION); + tclt->spec_id_size = htole32(sizeof(tclt->spec_id) + sizeof(tclt->vendor)); + strcpy(hdr->signature, TCPA_SPEC_ID_EVENT_SIGNATURE); + hdr->platform_class = htole32(0x00); // client platform + hdr->spec_version_minor = 0x02; + hdr->spec_version_major = 0x01; + hdr->spec_errata = 0x01; + hdr->vendor_info_size = sizeof(tclt->vendor); + + tclt->vendor.reserved = 0; + tclt->vendor.version_major = TPM_1_LOG_VI_MAJOR; + tclt->vendor.version_minor = TPM_1_LOG_VI_MINOR; + tclt->vendor.magic = htole32(TPM_1_LOG_VI_MAGIC); + tclt->vendor.max_entries = htole16(MAX_TPM_LOG_ENTRIES); + tclt->vendor.num_entries = htole16(0); + tclt->vendor.entry_size = htole32(sizeof(tclt->entries[0])); + } + + return tclt; +} + +void tpm1_log_dump(void) +{ + int i, j; + struct tpm_1_log_table *tclt; + + tclt = tpm_log_init(); + if (!tclt) + return; + + printk(BIOS_INFO, "coreboot TPM 1.2 measurements:\n\n"); + for (i = 0; i < le16toh(tclt->vendor.num_entries); i++) { + struct tpm_1_log_entry *tce = &tclt->entries[i]; + + printk(BIOS_INFO, " PCR-%u ", le32toh(tce->pcr)); + + for (j = 0; j < TPM_1_LOG_DIGEST_MAX_LENGTH; j++) + printk(BIOS_INFO, "%02x", tce->digest[j]); + + printk(BIOS_INFO, " %s [%s]\n", "SHA1", (char *)tce->data); + } + printk(BIOS_INFO, "\n"); +} + +void tpm1_log_add_table_entry(const char *name, const uint32_t pcr, + enum vb2_hash_algorithm digest_algo, + const uint8_t *digest, + const size_t digest_len) +{ + struct tpm_1_log_table *tclt; + struct tpm_1_log_entry *tce; + + tclt = tpm_log_init(); + if (!tclt) { + printk(BIOS_WARNING, "TPM LOG: non-existent!\n"); + return; + } + + if (!name) { + printk(BIOS_WARNING, "TPM LOG: entry name not set\n"); + return; + } + + if (digest_algo != VB2_HASH_SHA1) { + printk(BIOS_WARNING, "TPM LOG: unsupported hash algorithm\n"); + return; + } + + if (le16toh(tclt->vendor.num_entries) >= le16toh(tclt->vendor.max_entries)) { + printk(BIOS_WARNING, "TPM LOG: log table is full\n"); + return; + } + + tce = &tclt->entries[le16toh(tclt->vendor.num_entries)]; + tclt->vendor.num_entries = htole16(le16toh(tclt->vendor.num_entries) + 1); + + tce->pcr = htole32(pcr); + tce->event_type = htole32(EV_ACTION); + + memcpy(tce->digest, digest, digest_len); + + tce->data_length = htole32(TPM_1_LOG_DATA_MAX_LENGTH); + strncpy((char *)tce->data, name, sizeof(tce->data) - 1); + tce->data[sizeof(tce->data) - 1] = '\0'; +} + +void tpm1_preram_log_clear(void) +{ + printk(BIOS_INFO, "TPM LOG: clearing the log\n"); + /* + * Pre-RAM log is only for internal use and isn't exported anywhere, hence it's header + * is not initialized. + */ + struct tpm_1_log_table *tclt = (struct tpm_1_log_table *)_tpm_log; + tclt->vendor.max_entries = htole16(MAX_TPM_LOG_ENTRIES); + tclt->vendor.num_entries = htole16(0); +} + +int tpm1_log_get(int entry_idx, int *pcr, const uint8_t **digest_data, + enum vb2_hash_algorithm *digest_algo, const char **event_name) +{ + struct tpm_1_log_table *tclt; + struct tpm_1_log_entry *tce; + + tclt = tpm_log_init(); + if (!tclt) + return 1; + + if (entry_idx < 0 || entry_idx >= le16toh(tclt->vendor.num_entries)) + return 1; + + tce = &tclt->entries[entry_idx]; + + *pcr = le32toh(tce->pcr); + *digest_data = tce->digest; + *digest_algo = VB2_HASH_SHA1; + *event_name = (char *)tce->data; + return 0; +} + +uint16_t tpm1_log_get_size(const void *log_table) +{ + const struct tpm_1_log_table *tclt = log_table; + return le16toh(tclt->vendor.num_entries); +} + +void tpm1_log_copy_entries(const void *from, void *to) +{ + const struct tpm_1_log_table *from_log = from; + struct tpm_1_log_table *to_log = to; + int i; + + for (i = 0; i < le16toh(from_log->vendor.num_entries); i++) { + struct tpm_1_log_entry *tce = + &to_log->entries[le16toh(to_log->vendor.num_entries)]; + memcpy(tce, &from_log->entries[i], sizeof(*tce)); + + to_log->vendor.num_entries = htole16(le16toh(to_log->vendor.num_entries) + 1); + } +} diff --git a/src/security/tpm/tspi/log-tpm2.c b/src/security/tpm/tspi/log-tpm2.c new file mode 100644 index 00000000000..897ccedbff3 --- /dev/null +++ b/src/security/tpm/tspi/log-tpm2.c @@ -0,0 +1,230 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +/* + * Unlike log.c this implements TPM log according to TPM2.0 specification + * rather then using coreboot-specific log format. + * + * First entry is in TPM1.2 format and serves as a header, the rest are in + * a newer (agile) format which supports SHA256 and multiple hashes, but we + * store only one hash. + * + * This is defined in "TCG EFI Protocol Specification". + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static uint16_t tpmalg_from_vb2_hash(enum vb2_hash_algorithm hash_type) +{ + switch (hash_type) { + case VB2_HASH_SHA1: + return TPM2_ALG_SHA1; + case VB2_HASH_SHA256: + return TPM2_ALG_SHA256; + case VB2_HASH_SHA384: + return TPM2_ALG_SHA384; + case VB2_HASH_SHA512: + return TPM2_ALG_SHA512; + + default: + return 0xFF; + } +} + +void *tpm2_log_cbmem_init(void) +{ + static struct tpm_2_log_table *tclt; + if (tclt) + return tclt; + + if (cbmem_possibly_online()) { + size_t tpm_log_len; + struct tcg_efi_spec_id_event *hdr; + + tclt = cbmem_find(CBMEM_ID_TPM2_TCG_LOG); + if (tclt) + return tclt; + + tpm_log_len = sizeof(struct tpm_2_log_table) + + MAX_TPM_LOG_ENTRIES * sizeof(struct tpm_2_log_entry); + tclt = cbmem_add(CBMEM_ID_TPM2_TCG_LOG, tpm_log_len); + if (!tclt) + return NULL; + + memset(tclt, 0, tpm_log_len); + hdr = &tclt->header; + + hdr->event_type = htole32(EV_NO_ACTION); + hdr->event_size = htole32(33 + sizeof(tclt->vendor)); + strcpy((char *)hdr->signature, TPM_20_SPEC_ID_EVENT_SIGNATURE); + hdr->platform_class = htole32(0x00); // client platform + hdr->spec_version_minor = 0x00; + hdr->spec_version_major = 0x02; + hdr->spec_errata = 0x00; + hdr->uintn_size = 0x02; // 64-bit UINT + hdr->num_of_algorithms = htole32(1); + hdr->digest_sizes[0].alg_id = htole16(tpmalg_from_vb2_hash(TPM_MEASURE_ALGO)); + hdr->digest_sizes[0].digest_size = htole16(vb2_digest_size(TPM_MEASURE_ALGO)); + + tclt->vendor_info_size = sizeof(tclt->vendor); + tclt->vendor.reserved = 0; + tclt->vendor.version_major = TPM_20_LOG_VI_MAJOR; + tclt->vendor.version_minor = TPM_20_LOG_VI_MINOR; + tclt->vendor.magic = htole32(TPM_20_LOG_VI_MAGIC); + tclt->vendor.max_entries = htole16(MAX_TPM_LOG_ENTRIES); + tclt->vendor.num_entries = htole16(0); + tclt->vendor.entry_size = htole32(sizeof(struct tpm_2_log_entry)); + } + + return tclt; +} + +void tpm2_log_dump(void) +{ + int i, j; + struct tpm_2_log_table *tclt; + int hash_size; + const char *alg_name; + + tclt = tpm_log_init(); + if (!tclt) + return; + + hash_size = vb2_digest_size(TPM_MEASURE_ALGO); + alg_name = vb2_get_hash_algorithm_name(TPM_MEASURE_ALGO); + + printk(BIOS_INFO, "coreboot TPM 2.0 measurements:\n\n"); + for (i = 0; i < le16toh(tclt->vendor.num_entries); i++) { + struct tpm_2_log_entry *tce = &tclt->entries[i]; + + printk(BIOS_INFO, " PCR-%u ", le32toh(tce->pcr)); + + for (j = 0; j < hash_size; j++) + printk(BIOS_INFO, "%02x", tce->digest[j]); + + printk(BIOS_INFO, " %s [%s]\n", alg_name, tce->data); + } + printk(BIOS_INFO, "\n"); +} + +void tpm2_log_add_table_entry(const char *name, const uint32_t pcr, + enum vb2_hash_algorithm digest_algo, + const uint8_t *digest, + const size_t digest_len) +{ + struct tpm_2_log_table *tclt; + struct tpm_2_log_entry *tce; + + tclt = tpm_log_init(); + if (!tclt) { + printk(BIOS_WARNING, "TPM LOG: non-existent!\n"); + return; + } + + if (!name) { + printk(BIOS_WARNING, "TPM LOG: entry name not set\n"); + return; + } + + if (digest_algo != TPM_MEASURE_ALGO) { + printk(BIOS_WARNING, "TPM LOG: digest is of unsupported type: %s\n", + vb2_get_hash_algorithm_name(digest_algo)); + return; + } + + if (digest_len != vb2_digest_size(TPM_MEASURE_ALGO)) { + printk(BIOS_WARNING, "TPM LOG: digest has invalid length: %d\n", + (int)digest_len); + return; + } + + if (le16toh(tclt->vendor.num_entries) >= le16toh(tclt->vendor.max_entries)) { + printk(BIOS_WARNING, "TPM LOG: log table is full\n"); + return; + } + + tce = &tclt->entries[le16toh(tclt->vendor.num_entries)]; + tclt->vendor.num_entries = htole16(le16toh(tclt->vendor.num_entries) + 1); + + tce->pcr = htole32(pcr); + tce->event_type = htole32(EV_ACTION); + + tce->digest_count = htole32(1); + tce->digest_type = htole16(tpmalg_from_vb2_hash(TPM_MEASURE_ALGO)); + memcpy(tce->digest, digest, vb2_digest_size(TPM_MEASURE_ALGO)); + + tce->data_length = htole32(sizeof(tce->data)); + strncpy((char *)tce->data, name, sizeof(tce->data) - 1); + tce->data[sizeof(tce->data) - 1] = '\0'; +} + +int tpm2_log_get(int entry_idx, int *pcr, const uint8_t **digest_data, + enum vb2_hash_algorithm *digest_algo, const char **event_name) +{ + struct tpm_2_log_table *tclt; + struct tpm_2_log_entry *tce; + + tclt = tpm_log_init(); + if (!tclt) + return 1; + + if (entry_idx < 0 || entry_idx >= le16toh(tclt->vendor.num_entries)) + return 1; + + tce = &tclt->entries[entry_idx]; + + *pcr = le32toh(tce->pcr); + *digest_data = tce->digest; + *digest_algo = TPM_MEASURE_ALGO; /* We validate algorithm on addition */ + *event_name = (char *)tce->data; + return 0; +} + +uint16_t tpm2_log_get_size(const void *log_table) +{ + const struct tpm_2_log_table *tclt = log_table; + return le16toh(tclt->vendor.num_entries); +} + +void tpm2_preram_log_clear(void) +{ + printk(BIOS_INFO, "TPM LOG: clearing the log\n"); + /* + * Pre-RAM log is only for internal use and isn't exported anywhere, hence it's header + * is not initialized. + */ + struct tpm_2_log_table *tclt = (struct tpm_2_log_table *)_tpm_log; + tclt->vendor.max_entries = htole16(MAX_TPM_LOG_ENTRIES); + tclt->vendor.num_entries = htole16(0); +} + +void tpm2_log_copy_entries(const void *from, void *to) +{ + const struct tpm_2_log_table *from_log = from; + struct tpm_2_log_table *to_log = to; + int i; + + for (i = 0; i < le16toh(from_log->vendor.num_entries); i++) { + struct tpm_2_log_entry *tce = + &to_log->entries[le16toh(to_log->vendor.num_entries)]; + to_log->vendor.num_entries = htole16(le16toh(to_log->vendor.num_entries) + 1); + + tce->pcr = from_log->entries[i].pcr; + tce->event_type = from_log->entries[i].event_type; + + tce->digest_count = from_log->entries[i].digest_count; + tce->digest_type = from_log->entries[i].digest_type; + memcpy(tce->digest, from_log->entries[i].digest, sizeof(tce->digest)); + + tce->data_length = from_log->entries[i].data_length; + memcpy(tce->data, from_log->entries[i].data, sizeof(tce->data)); + } +} diff --git a/src/security/tpm/tspi/log.c b/src/security/tpm/tspi/log.c index 296cb2d1082..9a8fde347b9 100644 --- a/src/security/tpm/tspi/log.c +++ b/src/security/tpm/tspi/log.c @@ -1,28 +1,28 @@ /* SPDX-License-Identifier: GPL-2.0-only */ #include +#include #include #include #include #include #include -#include #include -static struct tcpa_table *tcpa_cbmem_init(void) +void *tpm_cb_log_cbmem_init(void) { - static struct tcpa_table *tclt; + static struct tpm_cb_log_table *tclt; if (tclt) return tclt; if (cbmem_possibly_online()) { - tclt = cbmem_find(CBMEM_ID_TCPA_LOG); + tclt = cbmem_find(CBMEM_ID_TPM_CB_LOG); if (!tclt) { - size_t tcpa_log_len = sizeof(struct tcpa_table) + - MAX_TCPA_LOG_ENTRIES * sizeof(struct tcpa_entry); - tclt = cbmem_add(CBMEM_ID_TCPA_LOG, tcpa_log_len); + size_t tpm_log_len = sizeof(struct tpm_cb_log_table) + + MAX_TPM_LOG_ENTRIES * sizeof(struct tpm_cb_log_entry); + tclt = cbmem_add(CBMEM_ID_TPM_CB_LOG, tpm_log_len); if (tclt) { - tclt->max_entries = MAX_TCPA_LOG_ENTRIES; + tclt->max_entries = MAX_TPM_LOG_ENTRIES; tclt->num_entries = 0; } } @@ -30,39 +30,18 @@ static struct tcpa_table *tcpa_cbmem_init(void) return tclt; } -struct tcpa_table *tcpa_log_init(void) -{ - static struct tcpa_table *tclt; - - /* We are dealing here with pre CBMEM environment. - * If cbmem isn't available use CAR or SRAM */ - if (!cbmem_possibly_online() && - !CONFIG(VBOOT_RETURN_FROM_VERSTAGE)) - return (struct tcpa_table *)_tpm_tcpa_log; - else if (ENV_CREATES_CBMEM - && !CONFIG(VBOOT_RETURN_FROM_VERSTAGE)) { - tclt = tcpa_cbmem_init(); - if (!tclt) - return (struct tcpa_table *)_tpm_tcpa_log; - } else { - tclt = tcpa_cbmem_init(); - } - - return tclt; -} - -void tcpa_log_dump(void *unused) +void tpm_cb_log_dump(void) { int i, j; - struct tcpa_table *tclt; + struct tpm_cb_log_table *tclt; - tclt = tcpa_log_init(); + tclt = tpm_log_init(); if (!tclt) return; - printk(BIOS_INFO, "coreboot TCPA measurements:\n\n"); + printk(BIOS_INFO, "coreboot TPM log measurements:\n\n"); for (i = 0; i < tclt->num_entries; i++) { - struct tcpa_entry *tce = &tclt->entries[i]; + struct tpm_cb_log_entry *tce = &tclt->entries[i]; if (tce) { printk(BIOS_INFO, " PCR-%u ", tce->pcr); @@ -76,85 +55,107 @@ void tcpa_log_dump(void *unused) printk(BIOS_INFO, "\n"); } -void tcpa_log_add_table_entry(const char *name, const uint32_t pcr, - enum vb2_hash_algorithm digest_algo, - const uint8_t *digest, - const size_t digest_len) +void tpm_cb_log_add_table_entry(const char *name, const uint32_t pcr, + enum vb2_hash_algorithm digest_algo, + const uint8_t *digest, + const size_t digest_len) { - struct tcpa_table *tclt = tcpa_log_init(); + struct tpm_cb_log_table *tclt = tpm_log_init(); if (!tclt) { - printk(BIOS_WARNING, "TCPA: Log non-existent!\n"); + printk(BIOS_WARNING, "TPM LOG: Log non-existent!\n"); return; } if (tclt->num_entries >= tclt->max_entries) { - printk(BIOS_WARNING, "TCPA: TCPA log table is full\n"); + printk(BIOS_WARNING, "TPM LOG: log table is full\n"); return; } if (!name) { - printk(BIOS_WARNING, "TCPA: TCPA entry name not set\n"); + printk(BIOS_WARNING, "TPM LOG: entry name not set\n"); return; } - struct tcpa_entry *tce = &tclt->entries[tclt->num_entries++]; - strncpy(tce->name, name, TCPA_PCR_HASH_NAME - 1); + struct tpm_cb_log_entry *tce = &tclt->entries[tclt->num_entries++]; + strncpy(tce->name, name, TPM_CB_LOG_PCR_HASH_NAME - 1); tce->pcr = pcr; - if (digest_len > TCPA_DIGEST_MAX_LENGTH) { - printk(BIOS_WARNING, "TCPA: PCR digest too long for TCPA log entry\n"); + if (digest_len > TPM_CB_LOG_DIGEST_MAX_LENGTH) { + printk(BIOS_WARNING, "TPM LOG: PCR digest too long for log entry\n"); return; } strncpy(tce->digest_type, - vb2_get_hash_algorithm_name(digest_algo), - TCPA_PCR_HASH_LEN - 1); + vb2_get_hash_algorithm_name(digest_algo), + TPM_CB_LOG_PCR_HASH_LEN - 1); tce->digest_length = digest_len; memcpy(tce->digest, digest, tce->digest_length); } -void tcpa_preram_log_clear(void) +void tpm_cb_preram_log_clear(void) { - printk(BIOS_INFO, "TCPA: Clearing coreboot TCPA log\n"); - struct tcpa_table *tclt = (struct tcpa_table *)_tpm_tcpa_log; - tclt->max_entries = MAX_TCPA_LOG_ENTRIES; + printk(BIOS_INFO, "TPM LOG: clearing preram log\n"); + struct tpm_cb_log_table *tclt = (struct tpm_cb_log_table *)_tpm_log; + tclt->max_entries = MAX_TPM_LOG_ENTRIES; tclt->num_entries = 0; } -#if !CONFIG(VBOOT_RETURN_FROM_VERSTAGE) -static void recover_tcpa_log(int is_recovery) +int tpm_cb_log_get(int entry_idx, int *pcr, const uint8_t **digest_data, + enum vb2_hash_algorithm *digest_algo, const char **event_name) { - struct tcpa_table *preram_log = (struct tcpa_table *)_tpm_tcpa_log; - struct tcpa_table *ram_log = NULL; - int i; + struct tpm_cb_log_table *tclt; + struct tpm_cb_log_entry *tce; + enum vb2_hash_algorithm algo; - if (preram_log->num_entries > MAX_PRERAM_TCPA_LOG_ENTRIES) { - printk(BIOS_WARNING, "TCPA: Pre-RAM TCPA log is too full, possible corruption\n"); - return; - } + tclt = tpm_log_init(); + if (!tclt) + return 1; - ram_log = tcpa_cbmem_init(); - if (!ram_log) { - printk(BIOS_WARNING, "TCPA: CBMEM not available something went wrong\n"); - return; + if (entry_idx < 0 || entry_idx >= tclt->num_entries) + return 1; + + tce = &tclt->entries[entry_idx]; + + *pcr = tce->pcr; + *digest_data = tce->digest; + *event_name = tce->name; + + *digest_algo = VB2_HASH_INVALID; + for (algo = VB2_HASH_INVALID; algo != VB2_HASH_ALG_COUNT; ++algo) { + if (strcmp(tce->digest_type, vb2_hash_names[algo]) == 0) { + *digest_algo = algo; + break; + } } + return 0; +} - for (i = 0; i < preram_log->num_entries; i++) { - struct tcpa_entry *tce = &ram_log->entries[ram_log->num_entries++]; - strncpy(tce->name, preram_log->entries[i].name, TCPA_PCR_HASH_NAME - 1); - tce->pcr = preram_log->entries[i].pcr; +uint16_t tpm_cb_log_get_size(const void *log_table) +{ + const struct tpm_cb_log_table *tclt = log_table; + return tclt->num_entries; +} - if (preram_log->entries[i].digest_length > TCPA_DIGEST_MAX_LENGTH) { - printk(BIOS_WARNING, "TCPA: PCR digest too long for TCPA log entry\n"); +void tpm_cb_log_copy_entries(const void *from, void *to) +{ + const struct tpm_cb_log_table *from_log = from; + struct tpm_cb_log_table *to_log = to; + int i; + + for (i = 0; i < from_log->num_entries; i++) { + struct tpm_cb_log_entry *tce = &to_log->entries[to_log->num_entries++]; + strncpy(tce->name, from_log->entries[i].name, TPM_CB_LOG_PCR_HASH_NAME - 1); + tce->pcr = from_log->entries[i].pcr; + + if (from_log->entries[i].digest_length > TPM_CB_LOG_DIGEST_MAX_LENGTH) { + printk(BIOS_WARNING, "TPM LOG: PCR digest too long for log entry\n"); return; } - strncpy(tce->digest_type, preram_log->entries[i].digest_type, TCPA_PCR_HASH_LEN - 1); - tce->digest_length = MIN(preram_log->entries[i].digest_length, TCPA_DIGEST_MAX_LENGTH); - memcpy(tce->digest, preram_log->entries[i].digest, tce->digest_length); + strncpy(tce->digest_type, from_log->entries[i].digest_type, + TPM_CB_LOG_PCR_HASH_LEN - 1); + tce->digest_length = MIN(from_log->entries[i].digest_length, + TPM_CB_LOG_DIGEST_MAX_LENGTH); + memcpy(tce->digest, from_log->entries[i].digest, tce->digest_length); } } -CBMEM_CREATION_HOOK(recover_tcpa_log); -#endif - -BOOT_STATE_INIT_ENTRY(BS_PAYLOAD_BOOT, BS_ON_ENTRY, tcpa_log_dump, NULL); diff --git a/src/security/tpm/tspi/logs.h b/src/security/tpm/tspi/logs.h new file mode 100644 index 00000000000..2d802f0bc5f --- /dev/null +++ b/src/security/tpm/tspi/logs.h @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef LOGS_H_ +#define LOGS_H_ + +#include +#include + +/* coreboot-specific TPM log format */ + +void *tpm_cb_log_init(void); +void *tpm_cb_log_cbmem_init(void); +void tpm_cb_preram_log_clear(void); +uint16_t tpm_cb_log_get_size(const void *log_table); +void tpm_cb_log_copy_entries(const void *from, void *to); +int tpm_cb_log_get(int entry_idx, int *pcr, const uint8_t **digest_data, + enum vb2_hash_algorithm *digest_algo, const char **event_name); +void tpm_cb_log_add_table_entry(const char *name, const uint32_t pcr, + enum vb2_hash_algorithm digest_algo, + const uint8_t *digest, + const size_t digest_len); +void tpm_cb_log_dump(void); + +/* TPM 1.2 log format */ + +void *tpm1_log_init(void); +void *tpm1_log_cbmem_init(void); +void tpm1_preram_log_clear(void); +uint16_t tpm1_log_get_size(const void *log_table); +void tpm1_log_copy_entries(const void *from, void *to); +int tpm1_log_get(int entry_idx, int *pcr, const uint8_t **digest_data, + enum vb2_hash_algorithm *digest_algo, const char **event_name); +void tpm1_log_add_table_entry(const char *name, const uint32_t pcr, + enum vb2_hash_algorithm digest_algo, + const uint8_t *digest, + const size_t digest_len); +void tpm1_log_dump(void); + +/* TPM 2.0 log format */ + +void *tpm2_log_init(void); +void *tpm2_log_cbmem_init(void); +void tpm2_preram_log_clear(void); +uint16_t tpm2_log_get_size(const void *log_table); +void tpm2_log_copy_entries(const void *from, void *to); +int tpm2_log_get(int entry_idx, int *pcr, const uint8_t **digest_data, + enum vb2_hash_algorithm *digest_algo, const char **event_name); +void tpm2_log_add_table_entry(const char *name, const uint32_t pcr, + enum vb2_hash_algorithm digest_algo, + const uint8_t *digest, + const size_t digest_len); +void tpm2_log_dump(void); + +#endif /* LOGS_H_ */ diff --git a/src/security/tpm/tspi/tspi.c b/src/security/tpm/tspi/tspi.c index 891f9153272..758b1a6558c 100644 --- a/src/security/tpm/tspi/tspi.c +++ b/src/security/tpm/tspi/tspi.c @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -233,7 +234,7 @@ uint32_t tpm_extend_pcr(int pcr, enum vb2_hash_algorithm digest_algo, } printk(BIOS_DEBUG, "TPM: Extending digest for `%s` into PCR %d\n", name, pcr); - result = tlcl_extend(pcr, digest, NULL); + result = tlcl_extend(pcr, digest, digest_algo); if (result != TPM_SUCCESS) { printk(BIOS_ERR, "TPM: Extending hash for `%s` into PCR %d failed.\n", name, pcr); @@ -242,8 +243,7 @@ uint32_t tpm_extend_pcr(int pcr, enum vb2_hash_algorithm digest_algo, } if (CONFIG(TPM_MEASURED_BOOT)) - tcpa_log_add_table_entry(name, pcr, digest_algo, - digest, digest_len); + tpm_log_add_table_entry(name, pcr, digest_algo, digest, digest_len); printk(BIOS_DEBUG, "TPM: Digest of `%s` to PCR %d %s\n", name, pcr, tspi_tpm_is_setup() ? "measured" : "logged"); @@ -252,6 +252,10 @@ uint32_t tpm_extend_pcr(int pcr, enum vb2_hash_algorithm digest_algo, } #if CONFIG(VBOOT_LIB) +#pragma GCC diagnostic push +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC diagnostic ignored "-Wstack-usage=" +#endif uint32_t tpm_measure_region(const struct region_device *rdev, uint8_t pcr, const char *rname) { @@ -293,4 +297,5 @@ uint32_t tpm_measure_region(const struct region_device *rdev, uint8_t pcr, } return tpm_extend_pcr(pcr, TPM_MEASURE_ALGO, digest, digest_len, rname); } +#pragma GCC diagnostic pop #endif /* VBOOT_LIB */ diff --git a/src/security/tpm/tss.h b/src/security/tpm/tss.h index f68e1f47bab..a85503d7fdf 100644 --- a/src/security/tpm/tss.h +++ b/src/security/tpm/tss.h @@ -10,6 +10,7 @@ #define TSS_H_ #include +#include #include #include @@ -187,8 +188,8 @@ uint32_t tlcl_lock_nv_write(uint32_t index); /** * Perform a TPM_Extend. */ -uint32_t tlcl_extend(int pcr_num, const uint8_t *in_digest, - uint8_t *out_digest); +uint32_t tlcl_extend(int pcr_num, const uint8_t *digest_data, + enum vb2_hash_algorithm digest_algo); /** * Disable platform hierarchy. Specific to TPM2. The TPM error code is returned. diff --git a/src/security/tpm/tss/tcg-1.2/tss.c b/src/security/tpm/tss/tcg-1.2/tss.c index 52bc2722b25..6b79aabe871 100644 --- a/src/security/tpm/tss/tcg-1.2/tss.c +++ b/src/security/tpm/tss/tcg-1.2/tss.c @@ -331,25 +331,20 @@ uint32_t tlcl_set_global_lock(void) return tlcl_write(TPM_NV_INDEX0, NULL, 0); } -uint32_t tlcl_extend(int pcr_num, const uint8_t *in_digest, - uint8_t *out_digest) +uint32_t tlcl_extend(int pcr_num, const uint8_t *digest_data, + enum vb2_hash_algorithm digest_algo) { struct s_tpm_extend_cmd cmd; uint8_t response[kTpmResponseHeaderLength + kPcrDigestLength]; - uint32_t result; + + if (digest_algo != VB2_HASH_SHA1) + return TPM_E_INVALID_ARG; memcpy(&cmd, &tpm_extend_cmd, sizeof(cmd)); to_tpm_uint32(cmd.buffer + tpm_extend_cmd.pcrNum, pcr_num); - memcpy(cmd.buffer + cmd.inDigest, in_digest, kPcrDigestLength); - - result = tlcl_send_receive(cmd.buffer, response, sizeof(response)); - if (result != TPM_SUCCESS) - return result; + memcpy(cmd.buffer + cmd.inDigest, digest_data, kPcrDigestLength); - if (out_digest) - memcpy(out_digest, response + kTpmResponseHeaderLength, - kPcrDigestLength); - return result; + return tlcl_send_receive(cmd.buffer, response, sizeof(response)); } uint32_t tlcl_get_permissions(uint32_t index, uint32_t *permissions) diff --git a/src/security/tpm/tss/tcg-2.0/tss.c b/src/security/tpm/tss/tcg-2.0/tss.c index 8c9d12f7b08..06b0d6b8f92 100644 --- a/src/security/tpm/tss/tcg-2.0/tss.c +++ b/src/security/tpm/tss/tcg-2.0/tss.c @@ -118,21 +118,40 @@ uint32_t tlcl_assert_physical_presence(void) return TPM_SUCCESS; } -/* - * The caller will provide the digest in a 32 byte buffer, let's consider it a - * sha256 digest. - */ -uint32_t tlcl_extend(int pcr_num, const uint8_t *in_digest, - uint8_t *out_digest) +static TPM_ALG_ID tpmalg_from_vb2_hash(enum vb2_hash_algorithm hash_type) +{ + switch (hash_type) { + case VB2_HASH_SHA1: + return TPM_ALG_SHA1; + case VB2_HASH_SHA256: + return TPM_ALG_SHA256; + case VB2_HASH_SHA384: + return TPM_ALG_SHA384; + case VB2_HASH_SHA512: + return TPM_ALG_SHA512; + + default: + return TPM_ALG_ERROR; + } +} + +uint32_t tlcl_extend(int pcr_num, const uint8_t *digest_data, + enum vb2_hash_algorithm digest_type) { struct tpm2_pcr_extend_cmd pcr_ext_cmd; struct tpm2_response *response; + TPM_ALG_ID alg; + + alg = tpmalg_from_vb2_hash(digest_type); + if (alg == TPM_ALG_ERROR) + return TPM_E_HASH_ERROR; pcr_ext_cmd.pcrHandle = HR_PCR + pcr_num; pcr_ext_cmd.digests.count = 1; - pcr_ext_cmd.digests.digests[0].hashAlg = TPM_ALG_SHA256; - memcpy(pcr_ext_cmd.digests.digests[0].digest.sha256, in_digest, - sizeof(pcr_ext_cmd.digests.digests[0].digest.sha256)); + pcr_ext_cmd.digests.digests[0].hashAlg = alg; + /* Always copying to sha512 as it's the largest one */ + memcpy(pcr_ext_cmd.digests.digests[0].digest.sha512, digest_data, + vb2_digest_size(digest_type)); response = tpm_process_command(TPM2_PCR_Extend, &pcr_ext_cmd); diff --git a/src/soc/cavium/cn81xx/memlayout.ld b/src/soc/cavium/cn81xx/memlayout.ld index 0257b23ae3c..41f091415ec 100644 --- a/src/soc/cavium/cn81xx/memlayout.ld +++ b/src/soc/cavium/cn81xx/memlayout.ld @@ -23,7 +23,7 @@ SECTIONS BOOTBLOCK(BOOTROM_OFFSET + 0x20000, 56K) CBFS_MCACHE(BOOTROM_OFFSET + 0x2e000, 8K) VBOOT2_WORK(BOOTROM_OFFSET + 0x30000, 12K) - TPM_TCPA_LOG(BOOTROM_OFFSET + 0x33000, 2K) + TPM_LOG(BOOTROM_OFFSET + 0x33000, 2K) VERSTAGE(BOOTROM_OFFSET + 0x33800, 50K) ROMSTAGE(BOOTROM_OFFSET + 0x40000, 256K) diff --git a/src/soc/ibm/Kconfig b/src/soc/ibm/Kconfig new file mode 100644 index 00000000000..40a09dc19f0 --- /dev/null +++ b/src/soc/ibm/Kconfig @@ -0,0 +1 @@ +source "src/soc/ibm/*/Kconfig" diff --git a/src/soc/ibm/power9/Kconfig b/src/soc/ibm/power9/Kconfig new file mode 100644 index 00000000000..6ab6aae4cc0 --- /dev/null +++ b/src/soc/ibm/power9/Kconfig @@ -0,0 +1,25 @@ +config CPU_IBM_POWER9 + bool + help + This SoC is the minimal template working on POWER9 + Talos II platform. + +if CPU_IBM_POWER9 + +# Doing delays with monotonic timers is suboptimal on this platform +config GENERIC_UDELAY + bool + default n + +config SKIP_INITIAL_ECC_SCRUB + bool "Skip initial ECC scrubbing" + default n + help + Enabling this option will skip initial ECC scrubbing that is normally + done right after memory is written with an initial pattern. This is + not the same as clearing memory after init, ECC memory always has to + be filled with any value in order to make ECC correct. + Selecting this option almost halves the time needed for memory + initialization (istep 14.1). + +endif diff --git a/src/soc/ibm/power9/Makefile.inc b/src/soc/ibm/power9/Makefile.inc new file mode 100644 index 00000000000..ce6978f9cd9 --- /dev/null +++ b/src/soc/ibm/power9/Makefile.inc @@ -0,0 +1,83 @@ +## SPDX-License-Identifier: GPL-2.0-only + +ifeq ($(CONFIG_CPU_IBM_POWER9),y) + +bootblock-y += bootblock.c +bootblock-y += rom_media.c +romstage-y += rom_media.c +romstage-y += romstage.c +romstage-y += mvpd.c +romstage-y += vpd.c +romstage-y += powerbus.c +romstage-y += istep_8_1.c +romstage-y += istep_8_2.c +romstage-y += istep_8_3.c +romstage-y += istep_8_4.c +romstage-y += istep_8_9.c +romstage-y += istep_8_10.c +romstage-y += istep_8_11.c +romstage-y += istep_9_2.c +romstage-y += istep_9_4.c +romstage-y += istep_9_6.c +romstage-y += istep_9_7.c +romstage-y += istep_10_1.c +romstage-y += istep_10_6.c +romstage-y += istep_10_10.c +romstage-y += istep_10_12.c +romstage-y += istep_10_13.c +romstage-y += istep_13_2.c +romstage-y += istep_13_3.c +romstage-y += istep_13_4.c +romstage-y += istep_13_6.c +romstage-y += istep_13_8.c +romstage-y += istep_13_9.c +romstage-y += istep_13_10.c +romstage-y += istep_13_11.c +romstage-y += istep_13_13.c +romstage-y += istep_14_1.c +romstage-y += istep_14_2.c +romstage-y += istep_14_3.c +romstage-y += istep_14_5.c +romstage-y += i2c.c +romstage-y += ccs.c +romstage-y += mcbist.c +romstage-y += timer.c +romstage-y += fsi.c +romstage-y += sbeio.c +romstage-y += xscom.c +ramstage-y += chip.c +ramstage-y += homer.c +ramstage-y += rom_media.c +ramstage-y += timer.c +ramstage-y += istep_18_11.c +ramstage-y += istep_18_12.c +ramstage-y += mvpd.c +ramstage-y += vpd.c +ramstage-y += tor.c +ramstage-y += powerbus.c +ramstage-y += pstates.c +ramstage-y += xive.c +ramstage-y += int_vectors.S +ramstage-y += i2c.c +ramstage-y += occ.c +ramstage-y += fsi.c +ramstage-y += xscom.c + +MB_DIR = src/mainboard/$(MAINBOARDDIR) +ONECPU_DTB = 1-cpu.dtb +TWOCPU_DTB = 2-cpus.dtb + +$(obj)/%.dtb: $(MB_DIR)/%.dts + dtc -I dts -O dtb -o $@ -i $(MB_DIR) $< + +$(obj)/$(TWOCPU_DTB): $(obj)/$(ONECPU_DTB) + +cbfs-files-y += $(ONECPU_DTB) +$(ONECPU_DTB)-file := $(obj)/$(ONECPU_DTB) +$(ONECPU_DTB)-type := raw + +cbfs-files-y += $(TWOCPU_DTB) +$(TWOCPU_DTB)-file := $(obj)/$(TWOCPU_DTB) +$(TWOCPU_DTB)-type := raw + +endif diff --git a/src/soc/ibm/power9/bootblock.c b/src/soc/ibm/power9/bootblock.c new file mode 100644 index 00000000000..86217285b9a --- /dev/null +++ b/src/soc/ibm/power9/bootblock.c @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include + +void bootblock_soc_early_init(void) +{ +} diff --git a/src/soc/ibm/power9/ccs.c b/src/soc/ibm/power9/ccs.c new file mode 100644 index 00000000000..be7603db5ca --- /dev/null +++ b/src/soc/ibm/power9/ccs.c @@ -0,0 +1,363 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +/* Debug is too slow here, hits timeouts */ +#define SKIP_SCOM_DEBUG + +#include +#include +#include + +#include "istep_13_scom.h" + +static inline uint64_t reverse_bits(uint64_t x) +{ + x = swab64(x); // reverse bytes + x = (x & 0xF0F0F0F0F0F0F0F0) >> 4 | + (x & 0x0F0F0F0F0F0F0F0F) << 4; // reverse nibbles in bytes + x = (x & 0x1111111111111111) << 3 | + (x & 0x2222222222222222) << 1 | + (x & 0x4444444444444444) >> 1 | + (x & 0x8888888888888888) >> 3; // reverse bits in nibbles + + return x; +} + +/* 32 total, but last one is reserved for ccs_execute() */ +#define MAX_CCS_INSTR 31 + +static unsigned instr; +static uint64_t total_cycles; + +/* TODO: 4R, CID? */ +void ccs_add_instruction(uint8_t chip, chiplet_id_t id, mrs_cmd_t mrs, uint8_t csn, + uint8_t cke, uint16_t idles) +{ + /* + * CSS_INST_ARR0_n layout (bits from MRS): + * [0-13] A0-A13 + * [14] A17 + * [15] BG1 + * [17-18] BA0-1 + * [19] BG0 + * [21] A16 + * [22] A15 + * [23] A14 + */ + uint64_t mrs64 = (reverse_bits(mrs) & PPC_BITMASK(0, 13)) | /* A0-A13 */ + PPC_PLACE(mrs >> 14, 23, 1) | /* A14 */ + PPC_PLACE(mrs >> 15, 22, 1) | /* A15 */ + PPC_PLACE(mrs >> 16, 21, 1) | /* A16 */ + PPC_PLACE(mrs >> 17, 14, 1) | /* A17 */ + PPC_PLACE(mrs >> 20, 17, 1) | /* BA0 */ + PPC_PLACE(mrs >> 21, 18, 1) | /* BA1 */ + PPC_PLACE(mrs >> 22, 19, 1) | /* BG0 */ + PPC_PLACE(mrs >> 23, 15, 1); /* BA1 */ + + /* MC01.MCBIST.CCS.CCS_INST_ARR0_n + [all] 0 + // "ACT is high. It's a no-care in the spec but it seems to raise + // questions when people look at the trace, so lets set it high." + [20] CCS_INST_ARR0_00_CCS_DDR_ACTN = 1 + // "CKE is high Note: P8 set all 4 of these high - not sure if that's + // correct. BRS" + [24-27] CCS_INST_ARR0_00_CCS_DDR_CKE = cke + [32-33] CCS_INST_ARR0_00_CCS_DDR_CSN_0_1 = csn[0:1] + [36-37] CCS_INST_ARR0_00_CCS_DDR_CSN_2_3 = csn[2:3] + */ + write_scom_for_chiplet(chip, id, CCS_INST_ARR0_00 + instr, + mrs64 | PPC_BIT(CCS_INST_ARR0_00_CCS_DDR_ACTN) | + PPC_PLACE(cke, CCS_INST_ARR0_00_CCS_DDR_CKE, + CCS_INST_ARR0_00_CCS_DDR_CKE_LEN) | + PPC_PLACE(csn >> 2, CCS_INST_ARR0_00_CCS_DDR_CSN_0_1, + CCS_INST_ARR0_00_CCS_DDR_CSN_0_1_LEN) | + PPC_PLACE(csn, CCS_INST_ARR0_00_CCS_DDR_CSN_2_3, + CCS_INST_ARR0_00_CCS_DDR_CSN_2_3_LEN)); + + /* MC01.MCBIST.CCS.CCS_INST_ARR1_n + [all] 0 + [0-15] CCS_INST_ARR1_00_IDLES = idles + [59-63] CCS_INST_ARR1_00_GOTO_CMD = instr + 1 + */ + write_scom_for_chiplet(chip, id, CCS_INST_ARR1_00 + instr, + PPC_PLACE(idles, CCS_INST_ARR1_00_IDLES, + CCS_INST_ARR1_00_IDLES_LEN) | + PPC_PLACE(instr + 1, CCS_INST_ARR1_00_GOTO_CMD, + CCS_INST_ARR1_00_GOTO_CMD_LEN)); + + /* + * For the last instruction in the stream we could decrease it by one (final + * DES added in ccs_execute()), but subtracting it would take longer than + * that one cycle, so leave it. + */ + total_cycles += idles; + instr++; + + if (instr >= MAX_CCS_INSTR) { + /* Maybe call ccs_execute() here? Would need mca_i... */ + die("CCS instructions overflowed\n"); + } +} + +/* This isn't useful for anything but calibration steps, do we want it? */ +static void dump_cal_errors(uint8_t chip, chiplet_id_t id, int mca_i) +{ + /* Stop CCS so it won't mess up with the values */ + write_scom_for_chiplet(chip, id, CCS_CNTLQ, PPC_BIT(CCS_CNTLQ_CCS_STOP)); + + int dp; + + for (dp = 0; dp < 5; dp++) { + printk(RAM_SPEW, "DP %d\n", dp); + printk(RAM_SPEW, "\t%#16.16llx - RD_VREF_CAL_ERROR\n", + dp_mca_read(chip, id, dp, mca_i, DDRPHY_DP16_RD_VREF_CAL_ERROR_P0_0)); + printk(RAM_SPEW, "\t%#16.16llx - DQ_BIT_DISABLE_RP0\n", + dp_mca_read(chip, id, dp, mca_i, DDRPHY_DP16_DQ_BIT_DISABLE_RP0_P0_0)); + printk(RAM_SPEW, "\t%#16.16llx - DQS_BIT_DISABLE_RP0\n", + dp_mca_read(chip, id, dp, mca_i, DDRPHY_DP16_DQS_BIT_DISABLE_RP0_P0_0)); + printk(RAM_SPEW, "\t%#16.16llx - WR_ERROR0\n", + dp_mca_read(chip, id, dp, mca_i, DDRPHY_DP16_WR_ERROR0_P0_0)); + printk(RAM_SPEW, "\t%#16.16llx - RD_STATUS0\n", + dp_mca_read(chip, id, dp, mca_i, DDRPHY_DP16_RD_STATUS0_P0_0)); + printk(RAM_SPEW, "\t%#16.16llx - RD_LVL_STATUS2\n", + dp_mca_read(chip, id, dp, mca_i, DDRPHY_DP16_RD_LVL_STATUS2_P0_0)); + printk(RAM_SPEW, "\t%#16.16llx - RD_LVL_STATUS0\n", + dp_mca_read(chip, id, dp, mca_i, DDRPHY_DP16_RD_LVL_STATUS0_P0_0)); + printk(RAM_SPEW, "\t%#16.16llx - WR_VREF_ERROR0\n", + dp_mca_read(chip, id, dp, mca_i, DDRPHY_DP16_WR_VREF_ERROR0_P0_0)); + printk(RAM_SPEW, "\t%#16.16llx - WR_VREF_ERROR1\n", + dp_mca_read(chip, id, dp, mca_i, DDRPHY_DP16_WR_VREF_ERROR1_P0_0)); + } + + printk(RAM_SPEW, "%#16.16llx - APB_ERROR_STATUS0\n", + mca_read(chip, id, mca_i, DDRPHY_APB_ERROR_STATUS0_P0)); + + printk(RAM_SPEW, "%#16.16llx - RC_ERROR_STATUS0\n", + mca_read(chip, id, mca_i, DDRPHY_RC_ERROR_STATUS0_P0)); + + printk(RAM_SPEW, "%#16.16llx - SEQ_ERROR_STATUS0\n", + mca_read(chip, id, mca_i, DDRPHY_SEQ_ERROR_STATUS0_P0)); + + printk(RAM_SPEW, "%#16.16llx - WC_ERROR_STATUS0\n", + mca_read(chip, id, mca_i, DDRPHY_WC_ERROR_STATUS0_P0)); + + printk(RAM_SPEW, "%#16.16llx - PC_ERROR_STATUS0\n", + mca_read(chip, id, mca_i, DDRPHY_PC_ERROR_STATUS0_P0)); + + printk(RAM_SPEW, "%#16.16llx - PC_INIT_CAL_ERROR\n", + mca_read(chip, id, mca_i, DDRPHY_PC_INIT_CAL_ERROR_P0)); + + printk(RAM_SPEW, "%#16.16llx - PC_INIT_CAL_STATUS\n", + mca_read(chip, id, mca_i, DDRPHY_PC_INIT_CAL_STATUS_P0)); + + printk(RAM_SPEW, "%#16.16llx - IOM_PHY0_DDRPHY_FIR_REG\n", + mca_read(chip, id, mca_i, IOM_PHY0_DDRPHY_FIR_REG)); + + die("CCS execution timeout\n"); +} + +void ccs_execute(uint8_t chip, chiplet_id_t id, int mca_i) +{ + uint64_t poll_timeout; + long time; + + /* + * Polling parameters: initial delay is total_cycles/8, no delay between + * polls (coreboot API checks in a busy loop, but there is nothing else to + * do than wait), poll count is whatever it takes to get to total_cycles + * times 4 just in case (won't hurt unless calibration fails anyway). + */ + if (total_cycles < 8) + total_cycles = 8; + poll_timeout = nck_to_us(chip, (total_cycles * 7 * 4) / 8); + + write_scom_for_chiplet(chip, id, CCS_CNTLQ, PPC_BIT(CCS_CNTLQ_CCS_STOP)); + time = wait_us(1, !(read_scom_for_chiplet(chip, id, CCS_STATQ) & + PPC_BIT(CCS_STATQ_CCS_IP))); + + /* Is it always as described below (CKE, CSN) or is it a copy of last instr? */ + /* Final DES - CCS does not wait for IDLES for the last command before + * clearing IP (in progress) bit, so we must use one separate DES + * instruction at the end. + MC01.MCBIST.CCS.CCS_INST_ARR0_n + [all] 0 + [20] CCS_INST_ARR0_00_CCS_DDR_ACTN = 1 + [24-27] CCS_INST_ARR0_00_CCS_DDR_CKE = 0xf + [32-33] CCS_INST_ARR0_00_CCS_DDR_CSN_0_1 = 3 + [36-37] CCS_INST_ARR0_00_CCS_DDR_CSN_2_3 = 3 + MC01.MCBIST.CCS.CCS_INST_ARR1_n + [all] 0 + [58] CCS_INST_ARR1_00_CCS_END = 1 + */ + write_scom_for_chiplet(chip, id, CCS_INST_ARR0_00 + instr, + PPC_BIT(CCS_INST_ARR0_00_CCS_DDR_ACTN) | + PPC_PLACE(0xF, CCS_INST_ARR0_00_CCS_DDR_CKE, + CCS_INST_ARR0_00_CCS_DDR_CKE_LEN) | + PPC_PLACE(3, CCS_INST_ARR0_00_CCS_DDR_CSN_0_1, + CCS_INST_ARR0_00_CCS_DDR_CSN_0_1_LEN) | + PPC_PLACE(3, CCS_INST_ARR0_00_CCS_DDR_CSN_2_3, + CCS_INST_ARR0_00_CCS_DDR_CSN_2_3_LEN)); + write_scom_for_chiplet(chip, id, CCS_INST_ARR1_00 + instr, + PPC_BIT(CCS_INST_ARR1_00_CCS_END)); + + /* Select ports + MC01.MCBIST.MBA_SCOMFIR.MCB_CNTLQ + // Broadcast mode is not supported, set only one bit at a time + [2-5] MCB_CNTLQ_MCBCNTL_PORT_SEL = bitmap with MCA index + */ + scom_and_or_for_chiplet(chip, id, MCB_CNTLQ, ~PPC_BITMASK(2, 5), PPC_BIT(2 + mca_i)); + + /* Lets go */ + write_scom_for_chiplet(chip, id, CCS_CNTLQ, PPC_BIT(CCS_CNTLQ_CCS_START)); + + /* With microsecond resolution we are probably wasting a lot of time here. */ + delay_nck(chip, total_cycles / 8); + + /* timeout(50*10ns): + if MC01.MCBIST.MBA_SCOMFIR.CCS_STATQ[0] (CCS_STATQ_CCS_IP) != 1: break + delay(10ns) + if MC01.MCBIST.MBA_SCOMFIR.CCS_STATQ != 0x40..00: report failure // only [1] set, others 0 + */ + time = wait_us(poll_timeout, (udelay(1), !(read_scom_for_chiplet(chip, id, CCS_STATQ) & + PPC_BIT(CCS_STATQ_CCS_IP)))); + + /* This isn't useful for anything but calibration steps, do we want it? */ + if (!time) + dump_cal_errors(chip, id, mca_i); + + printk(BIOS_DEBUG, "CCS took %lld us (%lld us timeout), %d instruction(s)\n", + time + nck_to_us(chip, total_cycles / 8), + poll_timeout + nck_to_us(chip, total_cycles / 8), instr); + + if (read_scom_for_chiplet(chip, id, CCS_STATQ) != PPC_BIT(CCS_STATQ_CCS_DONE)) + die("(%#16.16llx) CCS execution error\n", + read_scom_for_chiplet(chip, id, CCS_STATQ)); + + instr = 0; + total_cycles = 0; + + /* LRDIMM only */ + // cleanup_from_execute(); +} + +/* + * Constant to invert A3-A9, A11, A13, BA0-1, BG0-1. This also changes BG1 to 1, + * which automatically selects B-side. Note that A17 is not included here. + */ +static const mrs_cmd_t invert = 0xF02BF8; + +/* + * Procedure for sending MRS through CCS + * + * We need to remember about two things here: + * - RDIMM has A-side and B-side, some address bits are inverted for B-side; + * side is selected by DBG1 (when mirroring is enabled DBG0 is used for odd + * ranks to select side, instead of DBG1) + * - odd ranks may or may not have mirrored lines, depending on SPD[136]. + * + * Because of those two reasons we cannot simply repeat MRS data for all sides + * and ranks, we have to do some juggling instead. Inverting is easy, we just + * have to XOR with appropriate mask (special case for A17, it is not inverted + * if it isn't used). Mirroring will require manual bit manipulations. + * + * There are no signals that are mirrored but not inverted, which means that + * the order of those operations doesn't matter. + */ +/* TODO: add support for A17. For now it is blocked in initial SPD parsing. */ +void ccs_add_mrs(uint8_t chip, chiplet_id_t id, mrs_cmd_t mrs, enum rank_selection ranks, + int mirror, uint16_t idles) +{ + if (ranks & DIMM0_RANK0) { + /* DIMM 0, rank 0, side A */ + /* + * "Not sure if we can get tricky here and only delay after the b-side MR. + * The question is whether the delay is needed/assumed by the register or is + * purely a DRAM mandated delay. We know we can't go wrong having both + * delays but if we can ever confirm that we only need one we can fix this. + * BRS" + */ + ccs_add_instruction(chip, id, mrs, 0x7, 0xF, idles); + + /* DIMM 0, rank 0, side B - invert A3-A9, A11, A13, A17 (TODO), BA0-1, BG0-1 */ + ccs_add_instruction(chip, id, mrs ^ invert, 0x7, 0xF, idles); + } + + if (ranks & DIMM0_RANK1) { + /* DIMM 0, rank 1, side A, mirror if needed */ + if (mirror) + mrs = ddr4_mrs_mirror_pins(mrs); + + ccs_add_instruction(chip, id, mrs, 0xB, 0xF, idles); + + /* DIMM 0, rank 1, side B - MRS is already mirrored, just invert it */ + ccs_add_instruction(chip, id, mrs ^ invert, 0xB, 0xF, idles); + } + + if (ranks & DIMM1_RANK0) { + /* DIMM 1, rank 0, side A */ + ccs_add_instruction(chip, id, mrs, 0xD, 0xF, idles); + + /* DIMM 1, rank 0, side B - invert A3-A9, A11, A13, A17 (TODO), BA0-1, BG0-1 */ + ccs_add_instruction(chip, id, mrs ^ invert, 0xD, 0xF, idles); + } + + if (ranks & DIMM1_RANK1) { + /* DIMM 1, rank 1, side A, mirror if needed */ + if (mirror) + mrs = ddr4_mrs_mirror_pins(mrs); + + ccs_add_instruction(chip, id, mrs, 0xE, 0xF, idles); + + /* DIMM 1, rank 1, side B - MRS is already mirrored, just invert it */ + ccs_add_instruction(chip, id, mrs ^ invert, 0xE, 0xF, idles); + } +} + +void ccs_phy_hw_step(uint8_t chip, chiplet_id_t id, int mca_i, int rp, enum cal_config conf, + uint64_t step_cycles) +{ + /* MC01.MCBIST.CCS.CCS_INST_ARR0_n + [all] 0 + // "CKE is high Note: P8 set all 4 of these high - not sure if that's correct. BRS" + [24-27] CCS_INST_ARR0_00_CCS_DDR_CKE = 0xf + [32-33] CCS_INST_ARR0_00_CCS_DDR_CSN_0_1 = 3 // Not used by the engine for calibration? + [36-37] CCS_INST_ARR0_00_CCS_DDR_CSN_2_3 = 3 // Not used by the engine for calibration? + [56-59] CCS_INST_ARR0_00_CCS_DDR_CAL_TYPE = 0xc + */ + write_scom_for_chiplet(chip, id, CCS_INST_ARR0_00 + instr, + PPC_PLACE(0xF, CCS_INST_ARR0_00_CCS_DDR_CKE, + CCS_INST_ARR0_00_CCS_DDR_CKE_LEN) | + PPC_PLACE(3, CCS_INST_ARR0_00_CCS_DDR_CSN_0_1, + CCS_INST_ARR0_00_CCS_DDR_CSN_0_1_LEN) | + PPC_PLACE(3, CCS_INST_ARR0_00_CCS_DDR_CSN_2_3, + CCS_INST_ARR0_00_CCS_DDR_CSN_2_3_LEN) | + PPC_PLACE(0xC, CCS_INST_ARR0_00_CCS_DDR_CAL_TYPE, + CCS_INST_ARR0_00_CCS_DDR_CAL_TYPE_LEN)); + + /* MC01.MCBIST.CCS.CCS_INST_ARR1_n + [all] 0 + [53-56] CCS_INST_ARR1_00_DDR_CAL_RANK = rp + [57] CCS_INST_ARR1_00_DDR_CALIBRATION_ENABLE = 1 + [59-63] CCS_INST_ARR1_00_GOTO_CMD = instr + 1 + */ + write_scom_for_chiplet(chip, id, CCS_INST_ARR1_00 + instr, + PPC_PLACE(rp, CCS_INST_ARR1_00_DDR_CAL_RANK, + CCS_INST_ARR1_00_DDR_CAL_RANK_LEN) | + PPC_BIT(CCS_INST_ARR1_00_DDR_CALIBRATION_ENABLE) | + PPC_PLACE(instr + 1, CCS_INST_ARR1_00_GOTO_CMD, + CCS_INST_ARR1_00_GOTO_CMD_LEN)); + + total_cycles += step_cycles; + instr++; + + /* Setup calibration config + IOM0.DDRPHY_PC_INIT_CAL_CONFIG0_P0 + [48-57] i_cal_config // cal_config is already encoded, don't shift + [58] ABORT_ON_CAL_ERROR = 0 + [60+rp] ENA_RANK_PAIR = 1 // So, rp must be [0-3] + */ + mca_and_or(chip, id, mca_i, DDRPHY_PC_INIT_CAL_CONFIG0_P0, + ~(PPC_BITMASK(48, 58) | PPC_BITMASK(60, 63)), + conf | PPC_BIT(ENA_RANK_PAIR_MSB + rp)); + + ccs_execute(chip, id, mca_i); +} diff --git a/src/soc/ibm/power9/chip.c b/src/soc/ibm/power9/chip.c new file mode 100644 index 00000000000..f1c18fcafe2 --- /dev/null +++ b/src/soc/ibm/power9/chip.c @@ -0,0 +1,983 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include // xzalloc +#include + + +#include "istep_13_scom.h" +#include "chip.h" +#include "fsi.h" +#include "pci.h" + +/* + * Chip ID, not sure why it's shifted (group id + chip id?). + * Might be specific to group pump mode. + */ +#define CHIP_ID(chip) ((chip) << 3) + +struct mem_map { + struct memranges mem; + struct memranges reserved; +}; + +/* Copy of data put together by the romstage */ +mcbist_data_t mem_data[MAX_CHIPS]; + +#define SIZE_MASK PPC_BITMASK(13,23) +#define SIZE_SHIFT (63 - 23) +#define BASE_MASK PPC_BITMASK(24,47) +#define BASE_SHIFT (63 - 47) + +/* Values in registers are in 4GB units, ram_resource_kb() expects kilobytes. */ +#define CONVERT_4GB_TO_KB(x) ((x) << 22) + +static inline unsigned long base_k(uint64_t reg) +{ + return CONVERT_4GB_TO_KB((reg & BASE_MASK) >> BASE_SHIFT); +} + +static inline unsigned long size_k(uint64_t reg) +{ + return CONVERT_4GB_TO_KB(((reg & SIZE_MASK) >> SIZE_SHIFT) + 1); +} + +static uint64_t nominal_freq[MAX_CHIPS]; + +/* + * These are various definitions of the page sizes and segment sizes supported + * by the MMU. Values are the same as dumped from original firmware, comments + * are copied from Hostboot for POWER8. Compared to POWER8, POWER9 doesn't have + * 1M entries in segment page sizes. + */ +static uint32_t page_sizes[4] = { 0xC, 0x10, 0x18, 0x22 }; +static uint32_t segment_sizes[4] = { 0x1C, 0x28, 0xFFFFFFFF, 0xFFFFFFFF }; +static uint32_t segment_page_sizes[] = +{ + 12, 0x0, 3, /* 4k SLB page size, L,LP = 0,x1, 3 page size encodings */ + 12, 0x0, /* 4K PTE page size, L,LP = 0,x0 */ + 16, 0x7, /* 64K PTE page size, L,LP = 1,x7 */ + 24, 0x38, /* 16M PTE page size, L,LP = 1,x38 */ + 16, 0x110, 2, /* 64K SLB page size, L,LP = 1,x1, 2 page size encodings*/ + 16, 0x1, /* 64K PTE page size, L,LP = 1,x1 */ + 24, 0x8, /* 16M PTE page size, L,LP = 1,x8 */ + 24, 0x100, 1, /* 16M SLB page size, L,LP = 1,x0, 1 page size encoding */ + 24, 0x0, /* 16M PTE page size, L,LP = 1,x0 */ + 34, 0x120, 1, /* 16G SLB page size, L,LP = 1,x2, 1 page size encoding */ + 34, 0x3 /* 16G PTE page size, L,LP = 1,x3 */ +}; +static uint32_t radix_AP_enc[4] = { 0x0C, 0xA0000010, 0x20000015, 0x4000001E }; + +/* + * Dumped from Hostboot, might need reviewing. Comment in + * skiboot/external/mambo/skiboot.tcl says that PAPR defines up to byte 63 (plus + * 2 bytes for header), but the newest version I found describes only up to byte + * number 23 (Revision 2.9_pre7 from June 11, 2020). + */ +static uint8_t pa_features[] = +{ + 64, 0, /* Header: size and format, respectively */ + 0xF6, 0x3F, 0xC7, 0xC0, 0x80, 0xD0, 0x80, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x00, 0x00, + 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x00, 0x00, + 0x80, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00, + 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, + 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, + 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00 +}; + +static void dt_assign_new_phandle(struct device_tree *tree, + struct device_tree_node *node) +{ + struct device_tree_property *prop; + uint32_t phandle; + + list_for_each(prop, node->properties, list_node) { + if (!strcmp("phandle", prop->prop.name)) { + /* Node already has phandle set, keep it */ + return; + } + } + + phandle = ++tree->max_phandle; + node->phandle = phandle; + dt_add_u32_prop(node, "phandle", phandle); +} + +static void dt_fill_all_phandles(struct device_tree *tree, + struct device_tree_node *node) +{ + struct device_tree_node *child; + + dt_assign_new_phandle(tree, node); + + list_for_each(child, node->children, list_node) + dt_fill_all_phandles(tree, child); +} + +static void fill_l3_node(struct device_tree *tree, + struct device_tree_node *node, uint32_t pir) +{ + dt_assign_new_phandle(tree, node); + dt_add_u32_prop(node, "reg", pir); + dt_add_string_prop(node, "device_type", "cache"); + dt_add_bin_prop(node, "cache-unified", NULL, 0); + dt_add_string_prop(node, "status", "okay"); + + /* POWER9 Processor User's Manual, 7.3 */ + dt_add_u32_prop(node, "d-cache-size", 10 * MiB); + dt_add_u32_prop(node, "d-cache-sets", 8); /* Per Hostboot. Why not 20? */ + dt_add_u32_prop(node, "i-cache-size", 10 * MiB); + dt_add_u32_prop(node, "i-cache-sets", 8); /* Per Hostboot. Why not 20? */ +} + +static void fill_l2_node(struct device_tree *tree, + struct device_tree_node *node, uint32_t pir, + uint32_t next_lvl_phandle) +{ + dt_assign_new_phandle(tree, node); + /* This is not a typo, "l2-cache" points to the node of L3 cache */ + dt_add_u32_prop(node, "l2-cache", next_lvl_phandle); + dt_add_u32_prop(node, "reg", pir); + dt_add_string_prop(node, "device_type", "cache"); + dt_add_bin_prop(node, "cache-unified", NULL, 0); + dt_add_string_prop(node, "status", "okay"); + + /* POWER9 Processor User's Manual, 6.1 */ + dt_add_u32_prop(node, "d-cache-size", 512 * KiB); + dt_add_u32_prop(node, "d-cache-sets", 8); + dt_add_u32_prop(node, "i-cache-size", 512 * KiB); + dt_add_u32_prop(node, "i-cache-sets", 8); +} + +static void fill_cpu_node(struct device_tree *tree, + struct device_tree_node *node, uint8_t chip, uint32_t pir, + uint32_t next_lvl_phandle) +{ + /* Mandatory/standard properties */ + dt_assign_new_phandle(tree, node); + dt_add_string_prop(node, "device_type", "cpu"); + dt_add_bin_prop(node, "64-bit", NULL, 0); + dt_add_bin_prop(node, "32-64-bridge", NULL, 0); + dt_add_bin_prop(node, "graphics", NULL, 0); + dt_add_bin_prop(node, "general-purpose", NULL, 0); + dt_add_u32_prop(node, "l2-cache", next_lvl_phandle); + + /* + * The "status" property indicate whether the core is functional. It's + * a string containing "okay" for a good core or "bad" for a non-functional + * one. You can also just omit the non-functional ones from the DT + */ + dt_add_string_prop(node, "status", "okay"); + + /* + * This is the same value as the PIR of thread 0 of that core + * (ie same as the @xx part of the node name) + */ + dt_add_u32_prop(node, "reg", pir); + dt_add_u32_prop(node, "ibm,pir", pir); + + /* Chip ID of this core */ + dt_add_u32_prop(node, "ibm,chip-id", CHIP_ID(chip)); + + /* + * Interrupt server numbers (aka HW processor numbers) of all threads + * on that core. This should have 4 numbers and the first one should + * have the same value as the above ibm,pir and reg properties + */ + uint32_t int_srvrs[4] = {pir, pir+1, pir+2, pir+3}; + /* + * This will be added to actual FDT later, so local array on stack can't + * be used. + */ + void *int_srvrs_ptr = xmalloc(sizeof(int_srvrs)); + memcpy(int_srvrs_ptr, int_srvrs, sizeof(int_srvrs)); + dt_add_bin_prop(node, "ibm,ppc-interrupt-server#s", int_srvrs_ptr, + sizeof(int_srvrs)); + + /* + * This is the "architected processor version" as defined in PAPR. + */ + dt_add_u32_prop(node, "cpu-version", read_spr(SPR_PVR)); + + /* + * Page sizes and segment sizes supported by the MMU. + */ + dt_add_bin_prop(node, "ibm,processor-page-sizes", &page_sizes, + sizeof(page_sizes)); + dt_add_bin_prop(node, "ibm,processor-segment-sizes", &segment_sizes, + sizeof(segment_sizes)); + dt_add_bin_prop(node, "ibm,segment-page-sizes", &segment_page_sizes, + sizeof(segment_page_sizes)); + dt_add_bin_prop(node, "ibm,processor-radix-AP-encodings", &radix_AP_enc, + sizeof(radix_AP_enc)); + + dt_add_bin_prop(node, "ibm,pa-features", &pa_features, + sizeof(pa_features)); + + /* SLB size, use as-is */ + dt_add_u32_prop(node, "ibm,slb-size", 0x20); + + /* VSX support, use as-is */ + dt_add_u32_prop(node, "ibm,vmx", 0x2); + + /* DFP support, use as-is */ + dt_add_u32_prop(node, "ibm,dfp", 0x2); + + /* PURR/SPURR support, use as-is */ + dt_add_u32_prop(node, "ibm,purr", 0x1); + dt_add_u32_prop(node, "ibm,spurr", 0x1); + + /* + * Old-style core clock frequency. Only create this property if the + * frequency fits in a 32-bit number. Do not create it if it doesn't. + */ + if ((nominal_freq[chip] >> 32) == 0) + dt_add_u32_prop(node, "clock-frequency", nominal_freq[chip]); + + /* + * Mandatory: 64-bit version of the core clock frequency, always create + * this property. + */ + dt_add_u64_prop(node, "ibm,extended-clock-frequency", nominal_freq[chip]); + + /* Timebase freq has a fixed value, always use that */ + dt_add_u32_prop(node, "timebase-frequency", 512 * MHz); + /* extended-timebase-frequency will be deprecated at some point */ + dt_add_u64_prop(node, "ibm,extended-timebase-frequency", 512 * MHz); + + /* Use as-is, values dumped from booted system */ + dt_add_u32_prop(node, "reservation-granule-size", 0x80); + dt_add_u64_prop(node, "performance-monitor", 1); + /* POWER9 Processor User's Manual, 2.3.1 */ + dt_add_u32_prop(node, "i-cache-size", 32 * KiB); + dt_add_u32_prop(node, "i-cache-sets", 8); + dt_add_u32_prop(node, "i-cache-block-size", 128); + dt_add_u32_prop(node, "i-cache-line-size", 128); // Makes Linux happier + dt_add_u32_prop(node, "i-tlb-size", 0); + dt_add_u32_prop(node, "i-tlb-sets", 0); + /* POWER9 Processor User's Manual, 2.3.5 */ + dt_add_u32_prop(node, "d-cache-size", 32 * KiB); + dt_add_u32_prop(node, "d-cache-sets", 8); + dt_add_u32_prop(node, "d-cache-block-size", 128); + dt_add_u32_prop(node, "d-cache-line-size", 128); // Makes Linux happier + /* POWER9 Processor User's Manual, 2.3.7 */ + dt_add_u32_prop(node, "d-tlb-size", 1024); + dt_add_u32_prop(node, "d-tlb-sets", 4); + dt_add_u32_prop(node, "tlb-size", 1024); + dt_add_u32_prop(node, "tlb-sets", 4); +} + +static void add_memory_node(struct device_tree *tree, uint8_t chip, uint64_t start, + uint64_t len) +{ + struct device_tree_node *node; + /* /memory@0123456789abcdef - 24 characters + null byte */ + char path[26] = {}; + + union {uint32_t u32[2]; uint64_t u64;} addr = { .u64 = start }; + union {uint32_t u32[2]; uint64_t u64;} size = { .u64 = len }; + + snprintf(path, sizeof(path), "/memory@%llx", addr.u64); + node = dt_find_node_by_path(tree, path, NULL, NULL, 1); + + dt_add_string_prop(node, "device_type", (char *)"memory"); + /* Use 2 cells each for address and size. This assumes BE. */ + dt_add_reg_prop(node, &addr.u64, &size.u64, 1, 2, 2); + + /* Don't know why the value needs to be shifted (group id + chip id?) */ + dt_add_u32_prop(node, "ibm,chip-id", chip << 3); +} + +static bool build_memory_map(const struct range_entry *r, void *arg) +{ + struct mem_map *map = arg; + + /* + * Kernel likes its available memory areas at least 1MB + * aligned, let's trim the regions such that unaligned padding + * is added to reserved memory. + */ + if (range_entry_tag(r) == BM_MEM_RAM) { + uint64_t new_start = ALIGN_UP(range_entry_base(r), 1 * MiB); + uint64_t new_end = ALIGN_DOWN(range_entry_end(r), 1 * MiB); + + if (new_start != range_entry_base(r)) + memranges_insert(&map->reserved, range_entry_base(r), + new_start - range_entry_base(r), BM_MEM_RESERVED); + + if (new_start != new_end) + memranges_insert(&map->mem, new_start, new_end - new_start, BM_MEM_RAM); + + if (new_end != range_entry_end(r)) + memranges_insert(&map->reserved, new_end, range_entry_end(r) - new_end, + BM_MEM_RESERVED); + } else { + memranges_insert(&map->reserved, range_entry_base(r), range_entry_size(r), + BM_MEM_RESERVED); + } + + return true; +} + +static void add_reserved_memory_node(struct device_tree *tree, uint64_t start, uint64_t size) +{ + struct device_tree_node *node; + char path[45]; + + snprintf(path, sizeof(path), "/reserved-memory/coreboot@%llx", start); + node = dt_find_node_by_path(tree, path, NULL, NULL, 1); + /* Use 2 cells each for address and size */ + dt_add_reg_prop(node, &start, &size, 1, 2, 2); +} + +static void add_memory_nodes(struct device_tree *tree) +{ + struct mem_map map; + const struct range_entry *r; + + memranges_init_empty(&map.mem, NULL, 0); + memranges_init_empty(&map.reserved, NULL, 0); + + bootmem_walk_os_mem(build_memory_map, &map); + + memranges_each_entry(r, &map.mem) { + /* + * This is for ATTR_PROC_FABRIC_PUMP_MODE == PUMP_MODE_CHIP_IS_GROUP, + * when chip ID is actually a group ID and "chip ID" field is zero. + */ + uint8_t chip = (range_entry_base(r) >> 45) & 0xF; + + add_memory_node(tree, chip, range_entry_base(r), range_entry_size(r)); + } + + /* Createe properly initialized /reserved-memory/ node */ + (void)dt_init_reserved_memory_node(tree); + + memranges_each_entry(r, &map.reserved) { + struct device_tree_reserve_map_entry *entry = xzalloc(sizeof(*entry)); + entry->start = range_entry_base(r); + entry->size = range_entry_size(r); + + add_reserved_memory_node(tree, entry->start, entry->size); + + list_insert_after(&entry->list_node, &tree->reserve_map); + } + + memranges_teardown(&map.mem); + memranges_teardown(&map.reserved); +} + +/* Finds first root complex for a given chip that's present in DT else returns NULL */ +static bool dt_contains_pcie(struct device_tree *tree, uint8_t chip_id) +{ + int phb; + + /* See comment before pec0_lane_cfg global variable in istep_10_10.c */ + for (phb = 0; phb < MAX_PHB_PER_PROC; phb++) { + struct device_tree_node *node; + + char path[40]; + snprintf(path, sizeof(path), "/ibm,pcie-slots/root-complex@%d,%d", chip_id, + phb); + + node = dt_find_node_by_path(tree, path, NULL, NULL, 0); + if (node != NULL) + return true; + } + + return false; +} + +/* Checks input device tree for sanity and dies on failure */ +static void validate_dt(struct device_tree *tree, uint8_t chips) +{ + struct device_tree_node *xscom; + bool found_pcie; + + /* Find xscom node, halt if not found */ + /* TODO: is the address always the same? */ + xscom = dt_find_node_by_path(tree, "/xscom@603fc00000000", NULL, NULL, 0); + if (xscom == NULL) + die("No 'xscom' node for chip#0 in device tree!\n"); + + /* Check for xscom node of the second CPU (assuming group pump mode) */ + xscom = dt_find_node_by_path(tree, "/xscom@623fc00000000", NULL, NULL, 0); + if (chips & 0x2) { + if (xscom == NULL) + die("No 'xscom' node for chip#1 in device tree!\n"); + } else { + if (xscom != NULL) + die("Found 'xscom' node for missing chip#1 in device tree!\n"); + } + + if (!dt_contains_pcie(tree, /*chip_id=*/0)) + die("No 'root-complex' nodes for chip#0 in device tree!\n"); + + /* Check for xscom node of the second CPU (assuming group pump mode) */ + found_pcie = dt_contains_pcie(tree, /*chip_id=*/CHIP_ID(1)); + if (chips & 0x2) { + if (!found_pcie) + die("No 'root-complex' node for chip#1 in device tree!\n"); + } else { + if (found_pcie) + die("Found 'root-complex' node for missing chip#1 in device tree!\n"); + } +} + +/* Mind that this function creates nodes without chip ID, but some types of sensors need it */ +static void add_sensor_node(struct device_tree *tree, uint8_t number, uint8_t ipmi_type) +{ + char path[32]; + struct device_tree_node *node; + + snprintf(path, sizeof(path), "/bmc/sensors/sensor@%x", number); + node = dt_find_node_by_path(tree, path, NULL, NULL, 1); + + dt_add_string_prop(node, "compatible", "ibm,ipmi-sensor"); + dt_add_u32_prop(node, "reg", number); + dt_add_u32_prop(node, "ipmi-sensor-type", ipmi_type); +} + +static void add_dimm_sensor_nodes(struct device_tree *tree, uint8_t chips) +{ + enum { + /* Base numbers for sensor ids */ + DIMM_STATE_BASE = 0x0B, + DIMM_TEMP_BASE = 0x1B, + + /* IPMI sensor types */ + STATE_IPMI_SENSOR = 0x0C, + TEMP_IPMI_SENSOR = 0x01, + }; + + int dimm_i; + for (dimm_i = 0; dimm_i < MAX_CHIPS * DIMMS_PER_PROC; dimm_i++) { + int chip = dimm_i / DIMMS_PER_PROC; + int mcs = (dimm_i % DIMMS_PER_PROC) / DIMMS_PER_MCS; + int mca = (dimm_i % DIMMS_PER_MCS) / DIMMS_PER_MCA; + int dimm = dimm_i % DIMMS_PER_MCA; + + if (!(chips & (1 << chip))) + continue; + if (!mem_data[chip].mcs[mcs].mca[mca].dimm[dimm].present) + continue; + + add_sensor_node(tree, DIMM_STATE_BASE + dimm_i, STATE_IPMI_SENSOR); + add_sensor_node(tree, DIMM_TEMP_BASE + dimm_i, TEMP_IPMI_SENSOR); + } +} + +/* Add coreboot tables and CBMEM information to the device tree */ +static void add_cb_fdt_data(struct device_tree *tree) +{ + u32 addr_cells = 1, size_cells = 1; + u64 reg_addrs[2], reg_sizes[2]; + void *baseptr = NULL; + size_t size = 0; + + static const char *firmware_path[] = {"firmware", NULL}; + struct device_tree_node *firmware_node = dt_find_node(tree->root, + firmware_path, &addr_cells, &size_cells, 1); + + /* Need to add 'ranges' to the intermediate node to make 'reg' work. */ + dt_add_bin_prop(firmware_node, "ranges", NULL, 0); + + static const char *coreboot_path[] = {"coreboot", NULL}; + struct device_tree_node *coreboot_node = dt_find_node(firmware_node, + coreboot_path, &addr_cells, &size_cells, 1); + + dt_add_string_prop(coreboot_node, "compatible", "coreboot"); + + /* Fetch CB tables from cbmem */ + void *cbtable = cbmem_find(CBMEM_ID_CBTABLE); + if (!cbtable) { + printk(BIOS_WARNING, "FIT: No coreboot table found!\n"); + return; + } + + /* First 'reg' address range is the coreboot table. */ + const struct lb_header *header = cbtable; + reg_addrs[0] = (uintptr_t)header; + reg_sizes[0] = header->header_bytes + header->table_bytes; + + /* Second is the CBMEM area (which usually includes the coreboot table). */ + cbmem_get_region(&baseptr, &size); + if (!baseptr || size == 0) { + printk(BIOS_WARNING, "FIT: CBMEM pointer/size not found!\n"); + return; + } + + reg_addrs[1] = (uintptr_t)baseptr; + reg_sizes[1] = size; + + dt_add_reg_prop(coreboot_node, reg_addrs, reg_sizes, 2, addr_cells, size_cells); +} + +static void add_tpm_node(struct device_tree *tree) +{ +#if CONFIG(TALOS_2_INFINEON_TPM_1) + uint32_t xscom_base = 0xA0000 | (CONFIG_DRIVER_TPM_I2C_BUS << 12); + uint8_t port = (CONFIG_DRIVER_TPM_I2C_ADDR & 0x80 ? 1 : 0); + uint8_t addr = (CONFIG_DRIVER_TPM_I2C_ADDR & 0x7F); + + struct device_tree_node *tpm; + struct device_tree_node *sb; + char path[64]; + + /* TODO: is the XSCOM address always the same? */ + snprintf(path, sizeof(path), "/xscom@603fc00000000/i2cm@%x/i2c-bus@%x/tpm@%x", + xscom_base, port, addr); + + tpm = dt_find_node_by_path(tree, path, NULL, NULL, 1); + + dt_add_string_prop(tpm, "compatible", "infineon,slb9645tt"); + dt_add_u32_prop(tpm, "reg", addr); + +#if CONFIG(TPM_MEASURED_BOOT) + const struct cbmem_entry *evtlog; + + evtlog = cbmem_entry_find(CBMEM_ID_TPM2_TCG_LOG); + if (evtlog == NULL) + die("TPM events log is missing from CBMEM!"); + + dt_add_u64_prop(tpm, "ibm,sml-base", (uintptr_t)cbmem_entry_start(evtlog)); + dt_add_u32_prop(tpm, "ibm,sml-size", cbmem_entry_size(evtlog)); + + /* Not hard-coding into DTS-file in case will need to store key hash here */ + sb = dt_find_node_by_path(tree, "/ibm,secureboot", NULL, NULL, 1); + dt_add_string_prop(sb, "compatible", "ibm,secureboot-v1-softrom"); + dt_add_string_prop(sb, "hash-algo", "sha512"); + dt_add_u32_prop(sb, "trusted-enabled", 1); +#endif +#endif +} + +/* + * Device tree passed to Skiboot has to have phandles set either for all nodes + * or none at all. Because relative phandles are set for cpu->l2_cache->l3_cache + * chain, only first option is possible. + */ +static int dt_platform_update(struct device_tree *tree, uint8_t chips) +{ + struct device_tree_node *cpus; + + validate_dt(tree, chips); + + add_memory_nodes(tree); + add_dimm_sensor_nodes(tree, chips); + add_cb_fdt_data(tree); + add_tpm_node(tree); + + /* Find "cpus" node */ + cpus = dt_find_node_by_path(tree, "/cpus", NULL, NULL, 0); + if (cpus == NULL) + die("No 'cpus' node in device tree!\n"); + + for (uint8_t chip = 0; chip < MAX_CHIPS; chip++) { + uint64_t cores; + + if (!(chips & (1 << chip))) + continue; + + cores = read_scom(chip, 0x0006C090); + assert(cores != 0); + + for (int core_id = 0; core_id < MAX_CORES_PER_CHIP; core_id++) { + if (!IS_EC_FUNCTIONAL(core_id, cores)) + continue; + + /* + * Not sure who is the original author of this comment, it is + * duplicated in Hostboot and Skiboot, and now also here. It + * lacks one important piece of information: PIR is PIR value + * of thread 0 of _first_ core in pair, both for L2 and L3. + */ + /* + * Cache nodes. Those are siblings of the processor nodes under /cpus + * and represent the various level of caches. + * + * The unit address (and reg property) is mostly free-for-all as long as + * there is no collisions. On HDAT machines we use the following + * encoding which I encourage you to also follow to limit surprises: + * + * L2 : (0x20 << 24) | PIR (PIR is PIR value of thread 0 of core) + * L3 : (0x30 << 24) | PIR + * L3.5 : (0x35 << 24) | PIR + * + * In addition, each cache points to the next level cache via its + * own "l2-cache" (or "next-level-cache") property, so the core node + * points to the L2, the L2 points to the L3 etc... + */ + /* + * This is for ATTR_PROC_FABRIC_PUMP_MODE == PUMP_MODE_CHIP_IS_GROUP, + * when chip ID is actually a group ID and "chip ID" field is zero. + */ + uint32_t pir = PPC_PLACE(chip, 49, 4) | PPC_PLACE(core_id, 57, 5); + uint32_t l2_pir = (0x20 << 24) | (pir & ~7); + uint32_t l3_pir = (0x30 << 24) | (pir & ~7); + /* "/cpus/l?-cache@12345678" -> 23 characters + terminator */ + char l2_path[24]; + char l3_path[24]; + snprintf(l2_path, sizeof(l2_path), "/cpus/l%d-cache@%x", 2, l2_pir); + snprintf(l3_path, sizeof(l3_path), "/cpus/l%d-cache@%x", 3, l3_pir); + + /* + * 21 for "/cpus/PowerPC,POWER9@", 4 for PIR just in case (2nd CPU), + * 1 for terminator + */ + char cpu_path[26]; + snprintf(cpu_path, sizeof(cpu_path), "/cpus/PowerPC,POWER9@%x", pir); + + struct device_tree_node *l2_node = + dt_find_node_by_path(tree, l2_path, NULL, NULL, 1); + struct device_tree_node *l3_node = + dt_find_node_by_path(tree, l3_path, NULL, NULL, 1); + struct device_tree_node *cpu_node = + dt_find_node_by_path(tree, cpu_path, NULL, NULL, 1); + + /* + * Cache nodes may already be created if this is the second active + * core in a pair. If L3 node doesn't exist, L2 also doesn't - they + * are created at the same time, no need to test both. + */ + if (!l3_node->phandle) { + fill_l3_node(tree, l3_node, l3_pir); + fill_l2_node(tree, l2_node, l2_pir, l3_node->phandle); + } + + fill_cpu_node(tree, cpu_node, chip, pir, l2_node->phandle); + } + } + + dt_fill_all_phandles(tree, tree->root); + + return 0; +} + +static void rng_init(uint8_t chips) +{ + uint8_t chip; + + for (chip = 0; chip < MAX_CHIPS; chip++) { + if (!(chips & (1 << chip))) + continue; + + /* + * RNG is allowed to run for M cycles (M = enough time to complete init; + * recommend 1 second of time). + * + * The only thing that ensures this is delay between istep 10.13 and now. + * 14.1 is the most time-consuming istep, its duration depends on the amount + * of installed RAM under the bigger of MCBISTs (i.e. sides of CPU on the + * board). This is more than enough in Hostboot. + * + * TODO: test if this is enough for coreboot with initial ECC scrubbing + * skipped, low amount of RAM and no debug output. + */ + /* NX.PBI.PBI_RNG.NX_RNG_CFG + * [0-9] FAIL_REG - abort if any of these bits is set + * [17] BIST_COMPLETE - should be 1 at this point + */ + uint64_t rng_status = read_scom(chip, 0x020110E0); + assert(rng_status & PPC_BIT(17)); + while (!((rng_status = read_scom(chip, 0x020110E0)) & PPC_BIT(17))); + + if (rng_status & PPC_BITMASK(0, 9)) + die("RNG initialization failed, NX_RNG_CFG = %#16.16llx\n", rng_status); + + /* + * Hostboot sets 'enable' bit again even though it was already set. + * Following that behavior just in case. + */ + write_scom(chip, 0x020110E0, rng_status | PPC_BIT(63)); + + /* + * This would be the place to set BARs, but it is done as part of quad SCOM + * restore. + */ + + /* Lock NX RNG configuration */ + scom_or(chip, 0x00010005, PPC_BIT(9)); + } +} + +static void activate_secondary_threads(uint8_t chip) +{ + enum { DOORBELL_MSG_TYPE = 0x0000000028000000 }; + + uint8_t i; + + /* Read OCC CCSR written by the code earlier */ + const uint64_t functional_cores = read_scom(chip, 0x0006C090); + + /* + * This is for ATTR_PROC_FABRIC_PUMP_MODE == PUMP_MODE_CHIP_IS_GROUP, + * when chip ID is actually a group ID and "chip ID" field is zero. + */ + const uint64_t chip_msg = DOORBELL_MSG_TYPE | PPC_PLACE(chip, 49, 4); + + /* Find and process the first core in a separate loop to slightly + * simplify processing of all the other cores by removing a conditional */ + for (i = 0; i < MAX_CORES_PER_CHIP; ++i) { + uint8_t thread; + uint64_t core_msg; + + if (!IS_EC_FUNCTIONAL(i, functional_cores)) + continue; + + /* Message value for thread 0 of the current core */ + core_msg = chip_msg | (i << 2); + + /* Skip sending doorbell to the current thread of the current core */ + for (thread = (chip == 0 ? 1 : 0); thread < 4; ++thread) { + register uint64_t msg = core_msg | thread; + asm volatile("msgsnd %0" :: "r" (msg)); + } + + break; + } + + for (++i; i < MAX_CORES_PER_CHIP; ++i) { + uint8_t thread; + uint64_t core_msg; + + if (!IS_EC_FUNCTIONAL(i, functional_cores)) + continue; + + /* Message value for thread 0 of the i-th core */ + core_msg = chip_msg | (i << 2); + + for (thread = 0; thread < 4; ++thread) { + register uint64_t msg = core_msg | thread; + asm volatile("msgsnd %0" :: "r" (msg)); + } + } +} + +static void enable_soc_dev(struct device *dev) +{ + int chip, idx = 0; + unsigned long reserved_size, homers_size, occ_area, top = 0; + uint8_t chips = fsi_get_present_chips(); + uint8_t tod_mdmt; + + for (chip = 0; chip < MAX_CHIPS; chip++) { + int mcs_i; + + if (!(chips & (1 << chip))) + continue; + + for (mcs_i = 0; mcs_i < MCS_PER_PROC; mcs_i++) { + uint64_t reg; + chiplet_id_t nest = mcs_to_nest[mcs_ids[mcs_i]]; + + /* These registers are undocumented, see istep 14.5. */ + /* MCS_MCFGP */ + reg = read_scom_for_chiplet(chip, nest, 0x0501080A); + if (reg & PPC_BIT(0)) { + uint64_t end = base_k(reg) + size_k(reg); + ram_resource_kb(dev, idx++, base_k(reg), size_k(reg)); + if (end > top) + top = end; + } + + /* MCS_MCFGPM */ + reg = read_scom_for_chiplet(chip, nest, 0x0501080C); + if (reg & PPC_BIT(0)) { + uint64_t end = base_k(reg) + size_k(reg); + ram_resource_kb(dev, idx++, base_k(reg), size_k(reg)); + if (end > top) + top = end; + } + } + } + + /* + * Reserve top 8M (OCC common area) + 4M (HOMER). + * + * 8M + (4M per CPU), hostboot always reserves 8M + 8 * 4M. + */ + homers_size = 4*1024 * __builtin_popcount(chips); + reserved_size = 8*1024 + homers_size; + top -= reserved_size; + reserved_ram_resource_kb(dev, idx++, top, reserved_size); + + /* + * Assumption: OCC boots successfully or coreboot die()s, booting in safe + * mode without runtime power management is not supported. + */ + occ_area = top + homers_size; + build_homer_image((void *)(top * 1024), (void *)(occ_area * 1024), nominal_freq); + + rng_init(chips); + istep_18_11(chips, &tod_mdmt); + istep_18_12(chips, tod_mdmt); + + /* + * We have to disable FSP special wakeups on all cores, but to do so + * the core has to be powered up. It would be enough to start just one + * thread on each core, but this makes hand-off to payload much more + * complicated. To keep things simple, start each thread now. They will + * stay in STOP 1 until platform_prog_run() tells them to start the + * payload. + */ + for (chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) + activate_secondary_threads(chip); + } + + /* + * Give some time for cores to actually wake up. 3800 us is "estimated + * target and subject to change" latency for wake-up from STOP 11 + * according to POWER9 Processor User's Manual, but this proven to be + * not enough in tests. 5 ms worked each time, using twice as much to + * be safe. + */ + mdelay(10); + + for (chip = 0; chip < MAX_CHIPS; chip++) { + if (!(chips & (1 << chip))) + continue; + + const uint64_t cores = read_scom(chip, 0x0006C090); + + for (uint8_t core = 0; core < MAX_CORES_PER_CHIP; ++core) { + if (!IS_EC_FUNCTIONAL(core, cores)) + continue; + + /* Enable auto special wakeup for functional cores. */ + write_scom_for_chiplet(chip, EP00_CHIPLET_ID + core/4, + 0x1001203B + 0x400 * ((core/2) % 2), + PPC_BIT(12 + (core % 2))); + + /* De-assert FSP special wakeup before activate_occ(). */ + scom_and_for_chiplet(chip, EC00_CHIPLET_ID + core, 0x200F010B, + ~PPC_BIT(0)); + } + } + + printk(BIOS_DEBUG, "Activating OCC...\n"); + activate_occ(chips, (void *)(top * 1024)); + printk(BIOS_DEBUG, "Done activating OCC\n"); +} + +static void *load_fdt(const char *dtb_file, uint8_t chips) +{ + void *fdt; + void *fdt_rom; + struct device_tree *tree; + + fdt_rom = cbfs_map(dtb_file, NULL); + if (fdt_rom == NULL) + die("Unable to load %s from CBFS\n", dtb_file); + + tree = fdt_unflatten(fdt_rom); + + dt_platform_update(tree, chips); + + fdt = malloc(dt_flat_size(tree)); + if (fdt == NULL) + die("Unable to allocate memory for flat device tree\n"); + + dt_flatten(tree, fdt); + return fdt; +} + +extern struct prog *__payload; + +void platform_prog_run(struct prog *prog) +{ + uint8_t chips = fsi_get_present_chips(); + + void *fdt; + const char *dtb_file; + + assert(chips == 0x01 || chips == 0x03); + + dtb_file = (chips == 0x01 ? "1-cpu.dtb" : "2-cpus.dtb"); + fdt = load_fdt(dtb_file, chips); + + /* See asm/head.S in skiboot where fdt_entry starts at offset 0x10 */ + prog_set_entry(prog, prog_start(prog) + 0x10, fdt); + + /* + * Clear SMS_ATN aka EVT_ATN in BT_CTRL - Block Transfer IPMI protocol + * + * BMC sends event telling us that HIOMAP (access to flash, either real or + * emulated, through LPC) daemon has been started. This sets the mentioned bit. + * Skiboot enables interrupts, but because those are triggered on 0->1 + * transition and bit is already set, they do not arrive. + * + * While we're at it, clear read and write pointers, in case circular buffer + * rolls over. + */ + if (ipmi_bt_clear(CONFIG_BMC_BT_BASE)) + die("ipmi_bt_clear() has failed.\n"); + + /* + * Now that the payload and its interrupt vectors are already loaded + * let secondary threads jump into payload. The order of jumping into + * Skiboot doesn't matter, as long as the thread that lands as first + * has FDT in %r3. + */ + __payload = prog; + for (uint8_t chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) + activate_secondary_threads(chip); + } +} + +struct chip_operations soc_ibm_power9_ops = { + CHIP_NAME("POWER9") + .enable_dev = enable_soc_dev, +}; + +/* Restores global mem_data variable from cbmem */ +static void restore_mem_data(int is_recovery) +{ + const struct cbmem_entry *entry; + uint8_t *data; + int dimm_i; + + (void)is_recovery; /* unused */ + + entry = cbmem_entry_find(CBMEM_ID_MEMINFO); + if (entry == NULL) + die("Failed to find mem_data entry in CBMEM in ramstage!"); + + /* Layout: mem_data itself then SPD data of each dimm which has it */ + data = cbmem_entry_start(entry); + + memcpy(&mem_data, data, sizeof(mem_data)); + data += sizeof(mem_data); + + for (dimm_i = 0; dimm_i < MAX_CHIPS * DIMMS_PER_PROC; dimm_i++) { + int chip = dimm_i / DIMMS_PER_PROC; + int mcs = (dimm_i % DIMMS_PER_PROC) / DIMMS_PER_MCS; + int mca = (dimm_i % DIMMS_PER_MCS) / DIMMS_PER_MCA; + int dimm = dimm_i % DIMMS_PER_MCA; + + rdimm_data_t *dimm_data = &mem_data[chip].mcs[mcs].mca[mca].dimm[dimm]; + if (dimm_data->spd == NULL) + continue; + + /* We're not deleting the entry so this is valid */ + dimm_data->spd = data; + data += CONFIG_DIMM_SPD_SIZE; + } +} +CBMEM_READY_HOOK(restore_mem_data); diff --git a/src/soc/ibm/power9/chip.h b/src/soc/ibm/power9/chip.h new file mode 100644 index 00000000000..89ab3a9733f --- /dev/null +++ b/src/soc/ibm/power9/chip.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __SOC_IBM_POWER9_CHIP_H +#define __SOC_IBM_POWER9_CHIP_H + +struct soc_ibm_power9_config { +}; + +void build_homer_image(void *homer_bar, void *common_occ_area, uint64_t nominal_freq[]); + +#endif /* __SOC_CAVIUM_CN81XX_CHIP_H */ diff --git a/src/soc/ibm/power9/fsi.c b/src/soc/ibm/power9/fsi.c new file mode 100644 index 00000000000..b19ec0850be --- /dev/null +++ b/src/soc/ibm/power9/fsi.c @@ -0,0 +1,408 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +/* FSI is used to read MVPD, logging which takes too much time */ +#define SKIP_SCOM_DEBUG + +#include "fsi.h" + +#include +#include +#include +#include +#include +#include + +/* + * Some of the code relies on the fact that we're only interested in the current + * CPU (its MFSI) and the other CPU on port #1. Nothing is actually connected to + * any other ports, so using chip==0 for MFSI of CPU and chip==1 (port #1) and + * not being able to work with port #0 is OK. Getting rid of this requires + * passing both chip and port around (possibly encoded into a single variable), + * which is unnecessary otherwise. + */ + +enum { + MAX_SLAVE_PORTS = 8, + + FSI2OPB_OFFSET_0 = 0x00020000, // SCOM address for FSI interactions + + MFSI_CONTROL_REG = 0x003400, // MFSI Control Register + + OPB_REG_CMD = 0x0000, // Command Register + OPB_REG_STAT = 0x0001, // Status Register + OPB_REG_RES = 0x0004, // Reset Register + + /* FSI Control Registers */ + FSI_MMODE_000 = 0x000, + FSI_MDLYR_004 = 0x004, + FSI_MLEVP0_018 = 0x018, + FSI_MSENP0_018 = 0x018, + FSI_MCENP0_020 = 0x020, + FSI_MSIEP0_030 = 0x030, + FSI_MAEB_070 = 0x070, // MREFP0 + FSI_MRESP0_0D0 = 0x0D0, + FSI_MESRB0_1D0 = 0x1D0, + FSI_MECTRL_2E0 = 0x2E0, + + /* FSI2PIB Engine (SCOM) */ + FSI2PIB_ENGINE = 0x001000, + FSI2PIB_RESET = FSI2PIB_ENGINE | 0x18, // see CFAM 1006 + FSI2PIB_COMPMASK = FSI2PIB_ENGINE | 0x30, // see CFAM 100C + FSI2PIB_TRUEMASK = FSI2PIB_ENGINE | 0x34, // see CFAM 100D + + /* MFSI Ports (512KB for each of 8 slaves) */ + MFSI_PORT_0 = 0x080000, + + /* FSI Slave Register */ + SLAVE_REGS = 0x000800, + SMODE_00 = SLAVE_REGS | 0x00, + SLRES_34 = SLAVE_REGS | 0x34, + + /* Bitmasks for OPB status register */ + OPB_STAT_ANYERR = 0x8000000000000000, // 0 is Any error + OPB_STAT_ERR_OPB = 0x7FEC000000000000, // 1:10,12:13 are OPB errors + OPB_STAT_ERRACK = 0x0010000000000000, // 11 is OPB errAck + OPB_STAT_READ_VALID = 0x0002000000000000, // 14 is the Valid Read bit + OPB_STAT_BUSY = 0x0001000000000000, // 15 is the Busy bit + OPB_STAT_ERR_CMFSI = 0x0000FC0000000000, // 16:21 are cMFSI errors + OPB_STAT_ERR_MFSI = 0x000000FC00000000, // 24:29 are MFSI errors + + OPB_STAT_NON_MFSI_ERR = (OPB_STAT_ERR_OPB | + OPB_STAT_ERRACK | + OPB_STAT_ANYERR), + OPB_STAT_ERR_ANY = (OPB_STAT_NON_MFSI_ERR | + OPB_STAT_ERR_CMFSI | + OPB_STAT_ERR_MFSI), +}; + +void fsi_reset_pib2opb(uint8_t chip) +{ + write_scom(chip, FSI2OPB_OFFSET_0 | OPB_REG_RES, 0x8000000000000000); + write_scom(chip, FSI2OPB_OFFSET_0 | OPB_REG_STAT, 0x8000000000000000); +} + +static void cleanup_port_maeb_error(uint8_t port) +{ + /* See comment at the top of the file */ + const uint8_t master_chip = 0; + const uint8_t slave_chip = port; + + uint32_t compmask; + uint32_t truemask; + + /* + * Reset the bridge to clear up the residual errors + * 0=Bridge: General reset + */ + write_fsi(master_chip, MFSI_CONTROL_REG | FSI_MESRB0_1D0, 0x80000000); + + /* + * Perform error reset on Centaur FSI slave: write 0x4000000 to addr=834 + * + * Hostboot does this unconditionally, even though not all Power9 models + * have Centaur chips. Kept here just in case. + */ + write_fsi(slave_chip, SLRES_34, 0x4000000); + + /* Need to save/restore the true/comp masks or the FSP will get annoyed */ + compmask = read_fsi(slave_chip, FSI2PIB_COMPMASK); + truemask = read_fsi(slave_chip, FSI2PIB_TRUEMASK); + + /* Then, write arbitrary data to 1018 (putcfam 1006) to reset any + * pending FSI2PIB errors */ + write_fsi(slave_chip, FSI2PIB_RESET, 0xFFFFFFFF); + + /* Restore the true/comp masks */ + write_fsi(slave_chip, FSI2PIB_COMPMASK, compmask); + write_fsi(slave_chip, FSI2PIB_TRUEMASK, truemask); +} + +static void init_fsi_port(uint8_t port) +{ + /* See comment at the top of the file */ + const uint8_t master_chip = 0; + const uint8_t slave_chip = port; + + uint8_t port_bit = (0x80 >> port); + + /* Write the port enable (enables clocks for FSI link) */ + write_fsi(master_chip, MFSI_CONTROL_REG | FSI_MSENP0_018, (uint32_t)port_bit << 24); + + /* Hostboot reads FSI_MESRB0_1D0 and does nothing to it, skipped here + * with the assumption that it has no effect */ + + /* + * Send the BREAK command to all slaves on this port (target slave0) + * part of FSI definition, write magic string into address zero. + */ + write_fsi(slave_chip, 0x00, 0xC0DE0000); + + if (read_fsi(master_chip, MFSI_CONTROL_REG | FSI_MAEB_070) != 0) { + /* Alternative is to pretend this slave doesn't exist */ + die("Detected MAEB error on FSI port #%d.\n", port); + } + + /* + * Setup the FSI slave to enable HW recovery, lbus ratio + * 2= Enable HW error recovery (bit 2) + * 6:7= Slave ID: 3 (default) + * 8:11= Echo delay: 0xF (default) + * 12:15= Send delay cycles: 0xF + * 20:23= Local bus ratio: 0x1 + */ + write_fsi(slave_chip, SMODE_00, 0x23FF0100); + + /* Wait for a little bit to be sure everything is done */ + udelay(1000); // 1ms + + /* + * Reset the port to clear up any previous error state (using idec reg + * as arbitrary address for lookups). Note, initial cfam reset should + * have cleaned up everything but this makes sure we're in a consistent + * state. + */ + cleanup_port_maeb_error(port); +} + +static void basic_master_init(void) +{ + const uint8_t chip = 0; + const uint32_t ctrl_reg = MFSI_CONTROL_REG; + + uint64_t tmp; + + /* Cleanup any initial error states */ + fsi_reset_pib2opb(chip); + + /* Ensure we don't have any errors before we even start */ + tmp = read_scom(0, FSI2OPB_OFFSET_0 | OPB_REG_STAT); + if (tmp & OPB_STAT_NON_MFSI_ERR) + die("Unclearable errors on MFSI initialization: 0x%016llx\n", tmp); + + /* + * Setup clock ratios and some error checking + * 1= Enable hardware error recovery + * 3= Enable parity checking + * 4:13= FSI clock ratio 0 is 1:1 + * 14:23= FSI clock ratio 1 is 4:1 + */ + write_fsi(chip, ctrl_reg | FSI_MMODE_000, 0x50040400); + + /* + * Setup error control reg to do nothing + * 16= Enable OPB_errAck [=1] + * 18= Freeze FSI port on FSI/OPB bridge error [=0] + */ + write_fsi(chip, ctrl_reg | FSI_MECTRL_2E0, 0x00008000); + + /* + * Note that this actually resets 4 ports twice rather than 8 ports + * once: OR makes 0x01XX equivalent to 0x00XX due to 0xD being + * 0b00001101 and ORing 0b00000001 to it changes nothing. Hostboot does + * it this way... + */ + for (uint8_t port = 0; port < MAX_SLAVE_PORTS; port++) { + /* + * 0= Port: General reset + * 1= Port: Error reset + * 2= General reset to all bridges + * 3= General reset to all port controllers + * 4= Reset all FSI Master control registers + * 5= Reset parity error source latch + */ + write_fsi(chip, ctrl_reg | FSI_MRESP0_0D0 | (port * 4), 0xFC000000); + } + + /* Wait a little bit to be sure the reset is done */ + udelay(1000); // 1ms delay + + /* + * Setup error control reg for regular use + * (somehow this is the same as "to do nothing", a bug in Hostboot?) + * 16= Enable OPB_errAck [=1] + * 18= Freeze FSI port on FSI/OPB bridge error [=0] + */ + write_fsi(chip, ctrl_reg | FSI_MECTRL_2E0, 0x00008000); + + /* + * Set MMODE reg to enable HW recovery, parity checking, setup clock + * ratio + * 1= Enable hardware error recovery + * 3= Enable parity checking + * 4:13= FSI clock ratio 0 is 1:1 + * 14:23= FSI clock ratio 1 is 4:1 + */ + tmp = 0x50040400; + /* + * Setup timeout so that: + * code(10ms) > masterproc (0.9ms) > remote fsi master (0.8ms) + */ + tmp |= 0x00000010; // 26:27= Timeout (b01) = 0.9ms + write_fsi(chip, ctrl_reg | FSI_MMODE_000, tmp); +} + +static void basic_slave_init(void) +{ + const uint8_t chip = 0; + const uint32_t ctrl_reg = MFSI_CONTROL_REG; + + uint64_t tmp; + + /* Clear FSI Slave Interrupt on ports 0-7 */ + write_fsi(chip, ctrl_reg | FSI_MSIEP0_030, 0x00000000); + + /* + * Set the delay rates: + * 0:3,8:11= Echo delay cycles is 15 + * 4:7,12:15= Send delay cycles is 15 + */ + write_fsi(chip, ctrl_reg | FSI_MDLYR_004, 0xFFFF0000); + + /* Enable the ports */ + write_fsi(chip, ctrl_reg | FSI_MSENP0_018, 0xFF000000); + + udelay(1000); // 1ms + + /* Clear the port enable */ + write_fsi(chip, ctrl_reg | FSI_MCENP0_020, 0xFF000000); + + /* + * Reset all bridges and ports (again?). + * Line above is from Hostboot. Actually this seems to reset only port + * 0 and with a bit different mask (0xFC000000 above). + */ + write_fsi(chip, ctrl_reg | FSI_MRESP0_0D0, 0xF0000000); + + /* Wait a little bit to be sure reset is done */ + udelay(1000); // 1ms + + /* Note: not enabling IPOLL because hotplug is not supported */ + + /* Turn off Legacy mode */ + tmp = read_fsi(chip, ctrl_reg | FSI_MMODE_000); + tmp &= ~0x00000040; // bit 25: clock/4 mode + write_fsi(chip, ctrl_reg | FSI_MMODE_000, tmp); +} + +void fsi_init(void) +{ + uint8_t chips; + + basic_master_init(); + basic_slave_init(); + + chips = fsi_get_present_chips(); + if (chips & 0x2) + init_fsi_port(/*port=*/1); + + fsi_i2c_init(chips); +} + +uint8_t fsi_get_present_chips(void) +{ + const uint8_t chip = 0; + const uint32_t ctrl_reg = MFSI_CONTROL_REG; + + uint8_t chips; + uint8_t present_slaves; + + present_slaves = (read_fsi(chip, ctrl_reg | FSI_MLEVP0_018) >> 24); + + /* First CPU is always there (it executes this code) */ + chips = 0x01; + /* Status of the second CPU (connected to port #1) */ + chips |= ((present_slaves & 0x40) >> 5); + + return chips & ((1 << CONFIG_MAX_CPUS) - 1); +} + +/* Polls OPB dying on error or timeout */ +static inline uint64_t poll_opb(uint8_t chip) +{ + enum { + MAX_WAIT_LOOPS = 1000, + TIMEOUT_STEP_US = 10, + }; + + const uint64_t stat_addr = FSI2OPB_OFFSET_0 | OPB_REG_STAT; + + int i; + uint64_t tmp; + + uint64_t err_mask; + + /* MFSI are irrelevant for access to the chip we're running on, only + * OPB bits are of interest */ + err_mask = OPB_STAT_NON_MFSI_ERR; + if (chip == 1) { + /* Second CPU is routed through MFSI of the first CPU */ + err_mask |= OPB_STAT_ERR_MFSI; + } + + /* Timeout after 10ms, check every 10us, supposedly there is hardware + * timeout after 1ms */ + tmp = read_scom(0, stat_addr); + for (i = 0; (tmp & OPB_STAT_BUSY) && !(tmp & err_mask) && i < MAX_WAIT_LOOPS; i++) { + udelay(TIMEOUT_STEP_US); + tmp = read_scom(0, stat_addr); + } + + if (tmp & err_mask) + die("Detected an error while polling OPB for chip #%d: 0x%016llx\n", chip, tmp); + + if (i == MAX_WAIT_LOOPS) { + die("Timed out while polling OPB for chip #%d, last response: 0x%016llx\n", + chip, tmp); + } + + return tmp; +} + +uint32_t fsi_op(uint8_t chip, uint32_t addr, uint32_t data, bool is_read, uint8_t size) +{ + enum { + WRITE_NOT_READ = PPC_BIT(0), + SIZE_1B = PPC_PLACE(0, 1, 2), + SIZE_4B = PPC_PLACE(3, 1, 2), + }; + + uint64_t cmd; + uint64_t response; + + assert(size == 1 || size == 4); + + /* See comment at the top of the file */ + if (chip != 0) { + const uint8_t port = chip; + addr |= MFSI_PORT_0 * (port + 1); + } + + /* Make sure there are no other ops running before we start. The + * function will die on error, so not handling return value. */ + (void)poll_opb(chip); + + /* + * Register is mentioned in the docs, but contains mostly reserved + * fields. This is what can be decoded from code: + * [0] WRITE_NOT_READ = 1 for write, 0 for read + * [1-2] size = 3 // 0b00 - 1B; 0b01 - 2B; 0b11 - 4B + * [3-31] FSI address = addr // FSI spec says address is 23 bits + * [32-63] data to write = data // don't care for read + */ + cmd = (size == 4 ? SIZE_4B : SIZE_1B) | PPC_PLACE(addr, 3, 29) | data; + if (!is_read) + cmd |= WRITE_NOT_READ; + + write_scom(0, FSI2OPB_OFFSET_0 | OPB_REG_CMD, cmd); + + /* Poll for complete and get the data back. */ + response = poll_opb(chip); + + /* A write operation is done if poll_opb hasn't died */ + if (!is_read) + return 0; + + if (!(response & OPB_STAT_READ_VALID)) + die("FSI read has failed.\n"); + return (response & 0xFFFFFFFF); +} diff --git a/src/soc/ibm/power9/fsi.h b/src/soc/ibm/power9/fsi.h new file mode 100644 index 00000000000..9b872e04160 --- /dev/null +++ b/src/soc/ibm/power9/fsi.h @@ -0,0 +1,91 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __SOC_IBM_POWER9_FSI_H +#define __SOC_IBM_POWER9_FSI_H + +#include +#include + +/* Base FSI address for registers of a FSI I2C master */ +#define I2C_FSI_MASTER_BASE_ADDR 0x01800 + +void fsi_init(void); + +/* Returns mask of available CPU chips (either 0x01 or 0x03) */ +uint8_t fsi_get_present_chips(void); + +void fsi_i2c_init(uint8_t chips); + +void fsi_reset_pib2opb(uint8_t chip); + +/* This isn't meant to be used directly, see below for interface of this unit */ +uint32_t fsi_op(uint8_t chip, uint32_t addr, uint32_t data, bool is_read, uint8_t size); + +/* FSI-functions operate on byte addresses */ + +static inline uint32_t read_fsi(uint8_t chip, uint32_t addr) +{ + uint32_t data = fsi_op(chip, addr, /*data=*/0, /*is_read=*/true, /*size=*/4); +#ifdef DEBUG_FSI + printk(BIOS_DEBUG, "read_fsi(%d, 0x%08x) = 0x%08x\n", chip, addr, data); +#endif + return data; +} + +static inline void write_fsi(uint8_t chip, uint32_t addr, uint32_t data) +{ +#ifdef DEBUG_FSI + printk(BIOS_DEBUG, "write_fsi(%d, 0x%08x) = 0x%08x\n", chip, addr, data); +#endif + (void)fsi_op(chip, addr, data, /*is_read=*/false, /*size=*/4); +} + +/* CFAM-functions are FSI-functions that operate on 4-byte word addresses */ + +static inline uint32_t cfam_addr_to_fsi(uint32_t cfam) +{ + /* + * Such masks allow overlapping of two components after address + * translation (real engine mask is probably 0xF100), but let's be in + * sync with Hostboot on this to play it safe. + */ + const uint32_t CFAM_ADDRESS_MASK = 0x1FF; + const uint32_t CFAM_ENGINE_OFFSET_MASK = 0xFE00; + + /* + * Address needs to be multiplied by 4 because CFAM register addresses + * are word offsets but FSI addresses are byte offsets. Address + * modification needs to preserve the engine's offset in the top byte. + */ + return ((cfam & CFAM_ADDRESS_MASK) * 4) | (cfam & CFAM_ENGINE_OFFSET_MASK); +} + +static inline uint32_t read_cfam(uint8_t chip, uint32_t addr) +{ + uint32_t data = read_fsi(chip, cfam_addr_to_fsi(addr)); +#ifdef DEBUG_FSI + printk(BIOS_DEBUG, "read_cfam(%d, 0x%08x) = 0x%08x\n", chip, addr, data); +#endif + return data; +} + +static inline void write_cfam(uint8_t chip, uint32_t addr, uint32_t data) +{ + write_fsi(chip, cfam_addr_to_fsi(addr), data); +} + +/* Operations on FSI I2C registers */ + +static inline void write_fsi_i2c(uint8_t chip, uint8_t reg, uint32_t data, uint8_t size) +{ + uint32_t addr = I2C_FSI_MASTER_BASE_ADDR + reg * 4; + fsi_op(chip, addr, data, /*is_read=*/false, size); +} + +static inline uint32_t read_fsi_i2c(uint8_t chip, uint8_t reg, uint8_t size) +{ + uint32_t addr = I2C_FSI_MASTER_BASE_ADDR + reg * 4; + return fsi_op(chip, addr, /*data=*/0, /*is_read=*/true, size); +} + +#endif /* __SOC_IBM_POWER9_FSI_H */ diff --git a/src/soc/ibm/power9/homer.c b/src/soc/ibm/power9/homer.c new file mode 100644 index 00000000000..2ad897caa98 --- /dev/null +++ b/src/soc/ibm/power9/homer.c @@ -0,0 +1,2678 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include // memset, memcpy +#include +#include +#include + +#include "chip.h" +#include "homer.h" +#include "fsi.h" +#include "ops.h" +#include "tor.h" +#include "xip.h" + +#define L2_EPS_DIVIDER 1 +#define L3_EPS_DIVIDER 1 + +#define EX_L2_RD_EPS_REG 0x10010810 +#define EX_L2_WR_EPS_REG 0x10010811 +#define EX_L3_RD_EPS_REG 0x10011829 +#define EX_L3_WR_EPS_REG 0x1001182A +#define EX_DRAM_REF_REG 0x1001180F +#define EX_0_NCU_DARN_BAR_REG 0x10011011 + +#define ODD_EVEN_EX_POS 0x00000400 + +#define MAX_EQ_SCOM_ENTRIES 31 +#define MAX_L2_SCOM_ENTRIES 16 +#define MAX_L3_SCOM_ENTRIES 16 + +#define QUAD_BIT_POS 24 + +/* Subsections of STOP image that contain SCOM entries */ +enum scom_section { + STOP_SECTION_CORE_SCOM, + STOP_SECTION_EQ_SCOM, + STOP_SECTION_L2, + STOP_SECTION_L3, +}; + +#define INIT_CONFIG_VALUE 0x8000000C09800000ull +#define QPMR_PROC_CONFIG_POS 0xBFC18 + +/* Undocumented */ +#define PU_OCB_OCI_OCCFLG2_CLEAR 0x0006C18B +#define PU_PBAXCFG_SCOM 0x00068021 + +/* Host configuration information passed from host to OCC */ +struct occ_host_config { + uint32_t version; // Version of this structure + + uint32_t nest_freq; // For computation of timebase frequency + + /* + * Interrupt type to the host: + * - 0x00000000 = FSI2HOST Mailbox + * - 0x00000001 = OCC interrupt line through PSIHB complex + */ + uint32_t interrupt_type; + + uint32_t is_fir_master; // If this OCC is the FIR master + + /* FIR collection configuration data needed by FIR Master OCC in the + * event of a checkstop */ + uint8_t firdataConfig[3072]; + + uint32_t is_smf_mode; // Whether SMF mode is enabled +}; + +/* Bit positions for various chiplets in host configuration vector */ +enum { + MCS_POS = 1, + MBA_POS = 9, // This is actually MCA_POS + MEM_BUF_POS = 17, + XBUS_POS = 25, + PHB_POS = 30, + CAPP_POS = 37, + OBUS_POS = 41, + ABUS_POS = 41, + NVLINK_POS = 45, + + OBUS_BRICK_0_POS = 0, + OBUS_BRICK_1_POS = 1, + OBUS_BRICK_2_POS = 2, + OBUS_BRICK_9_POS = 9, + OBUS_BRICK_10_POS = 10, + OBUS_BRICK_11_POS = 11, +}; + +struct ring_data { + void *rings_buf; + void *work_buf1; + void *work_buf2; + void *work_buf3; + uint32_t rings_buf_size; + uint32_t work_buf1_size; + uint32_t work_buf2_size; + uint32_t work_buf3_size; +}; + +struct cme_cmn_ring_list { + uint16_t ring[8]; // In order: EC_FUNC, EC_GPTR, EC_TIME, EC_MODE, EC_ABST, 3 reserved + uint8_t payload[]; +}; + +struct cme_inst_ring_list { + uint16_t ring[4]; // In order: EC_REPR0, EC_REPR1, 2 reserved + uint8_t payload[]; +}; + +struct sgpe_cmn_ring_list { + // See the list in layout_cmn_rings_for_sgpe() skipping EQ_ANA_BNDY, 3 reserved + uint16_t ring[64]; + uint8_t payload[]; +}; + +struct sgpe_inst_ring_list { + /* For each quad, in order: + EQ_REPR0, EX0_L3_REPR, EX1_L3_REPR, EX0_L2_REPR, EX1_L2_REPR, + EX0_L3_REFR_REPR, EX1_L3_REFR_REPR, EX0_L3_REFR_TIME, + EX1_L3_REFR_TIME, 3 reserved. */ + uint16_t ring[MAX_QUADS_PER_CHIP][12]; + + uint8_t payload[]; +}; + +struct scom_entry_t { + uint32_t hdr; + uint32_t address; + uint64_t data; +}; + +struct stop_cache_section_t { + struct scom_entry_t non_cache_area[MAX_EQ_SCOM_ENTRIES]; + struct scom_entry_t l2_cache_area[MAX_L2_SCOM_ENTRIES]; + struct scom_entry_t l3_cache_area[MAX_L3_SCOM_ENTRIES]; +}; + +enum scom_operation { + SCOM_APPEND, + SCOM_REPLACE +}; + +enum operation_type { + COPY, + FIND +}; + +static size_t copy_section(void *dst, struct xip_section *section, void *base, + uint8_t dd, enum operation_type op) +{ + if (!section->dd_support) { + if (op == COPY) { + memcpy(dst, base + section->offset, section->size); + } else { + *(void **)dst = base + section->offset; + } + return section->size; + } + + struct dd_container *cont = base + section->offset; + int i; + + assert(cont->magic == DD_CONTAINER_MAGIC); + for (i = 0; i < cont->num; i++) { + if (cont->blocks[i].dd == dd) { + if (op == COPY) { + memcpy(dst, (void *)cont + cont->blocks[i].offset, + cont->blocks[i].size); + } else { + *(void **)dst = (void *)cont + cont->blocks[i].offset; + } + return cont->blocks[i].size; + } + } + + die("XIP: Can't find container for DD=%x\n", dd); +} + +static void build_sgpe(struct homer_st *homer, struct xip_sgpe_header *sgpe, + uint8_t dd) +{ + struct sgpe_img_header *hdr; + size_t size; + + assert(sgpe->magic == XIP_MAGIC_SGPE); + + /* SGPE header */ + size = copy_section(&homer->qpmr.sgpe.header, &sgpe->qpmr, sgpe, dd, COPY); + assert(size <= sizeof(struct qpmr_header)); + + /* + * 0xFFF00000 (SRAM base) + 4k (IPC) + 60k (GPE0) + 64k (GPE1) + 50k (PGPE) + * + 2k (aux) + 2k (shared) = 0xFFF2D800 + * + * WARNING: I have no idea if this is constant or depends on SGPE version. + */ + assert(homer->qpmr.sgpe.header.sram_region_start == 0xFFF2D800); + assert(homer->qpmr.sgpe.header.sram_region_size == SGPE_SRAM_IMG_SIZE); + /* + * Apart from these the only filled fields (same values for all DDs) are: + * - magic ("XIP SGPE") + * - build_date + * - build_ver + * - img_offset (0x0a00, overwritten with the same value later by code) + * - img_len (0xbf64, ~49kB, overwritten with the same value later by code) + */ + + /* SGPE L1 bootloader */ + size = copy_section(&homer->qpmr.sgpe.l1_bootloader, &sgpe->l1_bootloader, + sgpe, dd, COPY); + homer->qpmr.sgpe.header.l1_offset = offsetof(struct qpmr_st, + sgpe.l1_bootloader); + assert(size <= GPE_BOOTLOADER_SIZE); + + /* SGPE L2 bootloader */ + size = copy_section(&homer->qpmr.sgpe.l2_bootloader, &sgpe->l2_bootloader, + sgpe, dd, COPY); + homer->qpmr.sgpe.header.l2_offset = offsetof(struct qpmr_st, + sgpe.l2_bootloader); + homer->qpmr.sgpe.header.l2_len = size; + assert(size <= GPE_BOOTLOADER_SIZE); + + /* SGPE HCODE */ + size = copy_section(&homer->qpmr.sgpe.sram_image, &sgpe->hcode, sgpe, dd, + COPY); + homer->qpmr.sgpe.header.img_offset = offsetof(struct qpmr_st, + sgpe.sram_image); + homer->qpmr.sgpe.header.img_len = size; + assert(size <= SGPE_SRAM_IMG_SIZE); + assert(size > (INT_VECTOR_SIZE + sizeof(struct sgpe_img_header))); + + /* Cache SCOM region */ + homer->qpmr.sgpe.header.scom_offset = + offsetof(struct qpmr_st, cache_scom_region); + homer->qpmr.sgpe.header.scom_len = CACHE_SCOM_REGION_SIZE; + + /* Update SRAM image header */ + hdr = (struct sgpe_img_header *) + &homer->qpmr.sgpe.sram_image[INT_VECTOR_SIZE]; + hdr->ivpr_addr = homer->qpmr.sgpe.header.sram_region_start; + hdr->cmn_ring_occ_offset = homer->qpmr.sgpe.header.img_len; + hdr->cmn_ring_ovrd_occ_offset = 0; + hdr->spec_ring_occ_offset = 0; + hdr->scom_offset = 0; + /* Nest frequency divided by 64. */ + hdr->timebase_hz = (1866 * MHz) / 64; + + /* SGPE auxiliary functions */ + /* + * TODO: check if it is really enabled. This comes from hostboot attributes, + * but I don't know if/where those are set. + */ + hdr->aux_control = 1 << 24; + /* + * 0x80000000 (HOMER in OCI PBA memory space) + 1M (QPMR offset) + * + 512k (offset to aux) + * + * This probably is full address and not offset. + */ + hdr->aux_offset = 0x80000000 + offsetof(struct homer_st, qpmr.aux); + hdr->aux_len = CACHE_SCOM_AUX_SIZE; + homer->qpmr.sgpe.header.enable_24x7_ima = 1; + /* Here offset is relative to QPMR */ + homer->qpmr.sgpe.header.aux_offset = offsetof(struct qpmr_st, aux); + homer->qpmr.sgpe.header.aux_len = CACHE_SCOM_AUX_SIZE; +} + +static const uint32_t _SMF = 0x5F534D46; // "_SMF" + +static const uint32_t init_cpureg_template[] = { + 0x63000000, /* ori %r24, %r0, 0 */ /* |= spr, key for lookup */ + 0x7C000278, /* xor %r0, %r0, %r0 */ + 0x64000000, /* oris %r0, %r0, 0 */ /* |= val >> 48 */ + 0x60000000, /* ori %r0, %r0, 0 */ /* |= (val >> 32) & 0x0000FFFF */ + 0x780007C6, /* rldicr %r0, %r0, 32, 31 */ + 0x64000000, /* oris %r0, %r0, 0 */ /* |= (val >> 16) & 0x0000FFFF */ + 0x60000000, /* ori %r0, %r0, 0 */ /* |= val & 0x0000FFFF */ + 0x7C0003A6, /* mtspr 0, %r0 */ /* |= spr, encoded */ +}; + +/* + * These SPRs are not described in PowerISA 3.0B. + * MSR is not SPR, but self restore code treats it this way. + */ +#define SPR_USPRG0 0x1F0 +#define SPR_USPRG1 0x1F1 +#define SPR_URMOR 0x1F9 +#define SPR_SMFCTRL 0x1FF +#define SPR_LDBAR 0x352 +#define SPR_HID 0x3F0 +#define SPR_MSR 0x7D0 + +static void add_init_cpureg_entry(uint32_t *base, uint16_t spr, uint64_t val, + int init) +{ + while ((*base != (init_cpureg_template[0] | spr)) && *base != BLR_OP) + base++; + + /* Must change next instruction from attn to blr when adding new entry. */ + if (*base == BLR_OP) + *(base + ARRAY_SIZE(init_cpureg_template)) = BLR_OP; + + memcpy(base, init_cpureg_template, sizeof(init_cpureg_template)); + base[0] |= spr; + + if (init) { + base[1] = SKIP_SPR_REST_INST; + } else { + base[2] |= (val >> 48) & 0xFFFF; + base[3] |= (val >> 32) & 0xFFFF; + base[5] |= (val >> 16) & 0xFFFF; + base[6] |= val & 0xFFFF; + } + + /* Few exceptions are handled differently. */ + if (spr == SPR_MSR) { + base[7] = MR_R0_TO_R21_OP; + } else if (spr == SPR_HRMOR) { + base[7] = MR_R0_TO_R10_OP; + } else if (spr == SPR_URMOR) { + base[7] = MR_R0_TO_R9_OP; + } else { + base[7] |= ((spr & 0x1F) << 16) | ((spr & 0x3E0) << 6); + } +} + +static const uint32_t init_save_self_template[] = { + 0x60000000, /* ori %r0, %r0, 0 */ /* |= i */ + 0x3BFF0020, /* addi %r31, %r31, 0x20 */ + 0x60000000, /* nop (ori %r0, %r0, 0) */ +}; + +/* Honestly, I have no idea why saving uses different key than restoring... */ +static void add_init_save_self_entry(uint32_t **ptr, int i) +{ + memcpy(*ptr, init_save_self_template, sizeof(init_save_self_template)); + **ptr |= i; + *ptr += ARRAY_SIZE(init_save_self_template); +} + +static const uint16_t thread_sprs[] = { + SPR_CIABR, + SPR_DAWR, + SPR_DAWRX, + SPR_HSPRG0, + SPR_LDBAR, + SPR_LPCR, + SPR_PSSCR, + SPR_MSR, + SPR_SMFCTRL, + SPR_USPRG0, + SPR_USPRG1, +}; + +static const uint16_t core_sprs[] = { + SPR_HRMOR, + SPR_HID, + SPR_HMEER, + SPR_PMCR, + SPR_PTCR, + SPR_URMOR, +}; + +static void build_self_restore(struct homer_st *homer, + struct xip_restore_header *rest, uint8_t dd, + uint64_t functional_cores) +{ + /* Assumptions: SMT4 only, SMF available but disabled. */ + size_t size; + uint32_t *ptr; + const uint64_t hrmor = read_spr(SPR_HRMOR); + /* See cpu_winkle(), additionally set Hypervisor Doorbell Exit Enable */ + const uint64_t lpcr = + (read_spr(SPR_LPCR) + & ~(SPR_LPCR_EEE | SPR_LPCR_DEE | SPR_LPCR_OEE | SPR_LPCR_HDICE)) + | (SPR_LPCR_HVICE | SPR_LPCR_HVEE | SPR_LPCR_HDEE); + + const uint64_t msr = read_msr(); + /* Clear en_attn for HID */ + const uint64_t hid = read_spr(SPR_HID) & ~PPC_BIT(3); + + /* + * Data in XIP has its first 256 bytes zeroed, reserved for header, so even + * though this is exe part of self restore region, we should copy it to + * header's address. + */ + size = copy_section(&homer->cpmr.header, &rest->self, rest, dd, COPY); + assert(size > sizeof(struct cpmr_header)); + + /* Now, overwrite header. */ + size = copy_section(&homer->cpmr.header, &rest->cpmr, rest, dd, COPY); + assert(size <= sizeof(struct cpmr_header)); + + /* + * According to comment in p9_hcode_image_build.C it is for Nimbus >= DD22. + * Earlier versions do things differently. For now die(), implement if + * needed. + * + * If _SMF doesn't exist: + * - fill memory from (CPMR + 8k + 256) for 192k with ATTN + * - starting from the beginning of that region change instruction at every + * 2k bytes into BLR + * + * If _SMF exists: + * - fill CPMR.core_self_restore with ATTN instructions + * - for every core: + * - change every thread's restore first instruction (at 0, 512, 1024, + * 1536 bytes) to BLR + * - change core's restore first instruction (at 3k) to BLR + */ + if (*(uint32_t *)&homer->cpmr.exe[0x1300 - sizeof(struct cpmr_header)] != + _SMF) + die("No _SMF magic number in self restore region\n"); + + ptr = (uint32_t *)homer->cpmr.core_self_restore; + for (size = 0; size < (192 * KiB) / sizeof(uint32_t); size++) { + ptr[size] = ATTN_OP; + } + + /* + * This loop combines three functions from hostboot: + * initSelfRestoreRegion(), initSelfSaveRestoreEntries() and + * applyHcodeGenCpuRegs(). There is inconsistency as for calling them for + * all cores vs only functional ones. As far as I can tell, cores are waken + * based on OCC CCSR register, so nonfunctional ones should be skipped and + * don't need any self-restore code. + */ + for (int core = 0; core < MAX_CORES_PER_CHIP; core++) { + /* + * TODO: test if we can skip both cpureg and save_self for nonfunctional + * cores + */ + if (!IS_EC_FUNCTIONAL(core, functional_cores)) + continue; + + struct smf_core_self_restore *csr = &homer->cpmr.core_self_restore[core]; + uint32_t *csa = csr->core_save_area; + + for (int thread = 0; thread < 4; thread++) { + csr->thread_restore_area[thread][0] = BLR_OP; + uint32_t *tsa = csr->thread_save_area[thread]; + *tsa++ = MFLR_R30_OP; + + for (int i = 0; i < ARRAY_SIZE(thread_sprs); i++) { + /* + * Hostboot uses strange calculation for *_save_area keys. + * I don't know if this is only used by hostboot and save/restore + * code generated by it, or if something else (CME?) requires such + * format. For now leave it as hostboot does it, we can simplify + * this later. + * + * CIABR through MSR: key = 0..7 + * SMFCTRL through USPRG1: key = 1C..1E + */ + int tsa_key = i; + if (i > 7) + tsa_key += 0x14; + + if (thread_sprs[i] == SPR_LPCR) { + add_init_cpureg_entry(csr->thread_restore_area[thread], + thread_sprs[i], lpcr, 0); + } else if (thread_sprs[i] == SPR_MSR && thread == 0) { + /* One MSR per core, restored last so must (?) be here */ + add_init_cpureg_entry(csr->thread_restore_area[thread], + thread_sprs[i], msr, 0); + } else { + add_init_cpureg_entry(csr->thread_restore_area[thread], + thread_sprs[i], 0, 1); + } + add_init_save_self_entry(&tsa, tsa_key); + } + + *tsa++ = MTLR_R30_OP; + *tsa++ = BLR_OP; + } + + csr->core_restore_area[0] = BLR_OP; + *csa++ = MFLR_R30_OP; + for (int i = 0; i < ARRAY_SIZE(core_sprs); i++) { + if (core_sprs[i] == SPR_HRMOR || core_sprs[i] == SPR_URMOR) { + add_init_cpureg_entry(csr->core_restore_area, core_sprs[i], + hrmor, 0); + } else if (core_sprs[i] == SPR_HID) { + add_init_cpureg_entry(csr->core_restore_area, core_sprs[i], + hid, 0); + } else { + add_init_cpureg_entry(csr->core_restore_area, core_sprs[i], + 0, 1); + } + /* + * HID through PTCR: key = 0x15..0x18 + * HRMOR and URMOR are skipped. + */ + if (core_sprs[i] != SPR_HRMOR && core_sprs[i] != SPR_URMOR) + add_init_save_self_entry(&csa, i + 0x14); + } + + *csa++ = MTLR_R30_OP; + *csa++ = BLR_OP; + } + + /* Populate CPMR header */ + homer->cpmr.header.fused_mode_status = 0xAA; // non-fused + + /* For SMF enabled */ +#if 0 + homer->cpmr.header.urmor_fix = 1; +#endif + + homer->cpmr.header.self_restore_ver = 2; + homer->cpmr.header.stop_api_ver = 2; + + /* + * WARNING: Hostboot filled CME header field with information whether cores + * are fused or not here. However, at this point CME image is not yet + * loaded, so that field will get overwritten. + */ +} + +static void build_cme(struct homer_st *homer, struct xip_cme_header *cme, + uint8_t dd) +{ + size_t size; + struct cme_img_header *hdr; + + size = copy_section(&homer->cpmr.cme_sram_region, &cme->hcode, cme, dd, + COPY); + assert(size <= CME_SRAM_IMG_SIZE); + assert(size > (INT_VECTOR_SIZE + sizeof(struct cme_img_header))); + + hdr = (struct cme_img_header *) + &homer->cpmr.cme_sram_region[INT_VECTOR_SIZE]; + + hdr->hcode_offset = 0; + hdr->hcode_len = size; + + hdr->pstate_region_offset = 0; + hdr->pstate_region_len = 0; + + hdr->cpmr_phy_addr = (uint64_t) homer | 2 * MiB; + /* With SMF disabled unsecure HOMER is the same as regular one */ + hdr->unsec_cpmr_phy_addr = hdr->cpmr_phy_addr; + + hdr->common_ring_offset = hdr->hcode_offset + hdr->hcode_len; + hdr->common_ring_len = 0; + + hdr->scom_offset = 0; + hdr->scom_len = CORE_SCOM_RESTORE_SIZE / MAX_CORES_PER_CHIP / 2; + + hdr->core_spec_ring_offset = 0; + hdr->max_spec_ring_len = 0; +} + +static void build_pgpe(struct homer_st *homer, struct xip_pgpe_header *pgpe, + uint8_t dd) +{ + size_t size; + struct pgpe_img_header *hdr; + + /* PPGE header */ + size = copy_section(&homer->ppmr.header, &pgpe->ppmr, pgpe, dd, COPY); + assert(size <= PPMR_HEADER_SIZE); + /* + * 0xFFF00000 (SRAM base) + 4k (IPC) + 60k (GPE0) + 64k (GPE1) = 0xFFF20000 + * + * WARNING: I have no idea if this is constant or depends on PPGE version. + */ + assert(homer->ppmr.header.sram_region_start == 0xFFF20000); + assert(homer->ppmr.header.sram_region_size == PGPE_SRAM_IMG_SIZE + + PGPE_AUX_TASK_SIZE + + PGPE_OCC_SHARED_SRAM_SIZE); + + /* PPGE L1 bootloader */ + size = copy_section(homer->ppmr.l1_bootloader, &pgpe->l1_bootloader, pgpe, + dd, COPY); + assert(size <= GPE_BOOTLOADER_SIZE); + homer->ppmr.header.l1_offset = offsetof(struct ppmr_st, l1_bootloader); + + /* PPGE L2 bootloader */ + size = copy_section(homer->ppmr.l2_bootloader, &pgpe->l2_bootloader, pgpe, + dd, COPY); + assert(size <= GPE_BOOTLOADER_SIZE); + homer->ppmr.header.l2_offset = offsetof(struct ppmr_st, l2_bootloader); + homer->ppmr.header.l2_len = size; + + /* PPGE HCODE */ + size = copy_section(homer->ppmr.pgpe_sram_img, &pgpe->hcode, pgpe, dd, + COPY); + assert(size <= PGPE_SRAM_IMG_SIZE); + assert(size > (INT_VECTOR_SIZE + sizeof(struct pgpe_img_header))); + homer->ppmr.header.hcode_offset = offsetof(struct ppmr_st, pgpe_sram_img); + homer->ppmr.header.hcode_len = size; + + /* PPGE auxiliary task */ + size = copy_section(homer->ppmr.aux_task, &pgpe->aux_task, pgpe, dd, COPY); + assert(size <= PGPE_AUX_TASK_SIZE); + homer->ppmr.header.aux_task_offset = offsetof(struct ppmr_st, aux_task); + homer->ppmr.header.aux_task_len = size; + + /* 0x80000000 = HOMER in OCI PBA memory space */ + homer->ppmr.header.doptrace_offset = + 0x80000000 + offsetof(struct homer_st, ppmr.doptrace); + homer->ppmr.header.doptrace_len = PGPE_DOPTRACE_SIZE; + + /* Update SRAM image header */ + hdr = (struct pgpe_img_header *) + &homer->ppmr.pgpe_sram_img[INT_VECTOR_SIZE]; + + /* SPGE auxiliary functions */ + hdr->aux_controls = 1 << 24; +} + +static void pba_slave_setup_runtime_phase(uint8_t chip) +{ + enum { + OCI_MASTER_ID_GPE2 = 0x2, + OCI_MASTER_ID_GPE3 = 0x3, + OCI_MASTER_ID_ICU = 0x5, + OCI_MASTER_ID_PGPE = OCI_MASTER_ID_GPE2, + OCI_MASTER_ID_SGPE = OCI_MASTER_ID_GPE3, + OCI_MASTER_ID_MASK_ALL = 0x7, + + PBA_READ_TTYPE_CL_RD_NC = 0x0, /// Cache line read + PBA_WRITE_GATHER_TIMEOUT_2_PULSES = 0x4, + PBA_READ_PREFETCH_NONE = 0x1, /// No prefetch + PBA_WRITE_TTYPE_DMA_PR_WR = 0x0, /// DMA Partial Write + + /* Values for PBA Mode register fields */ + PBA_OCI_REGION = 0x2, + PBA_BCE_OCI_TRANSACTION_64_BYTES = 0x1, + + PU_PBAMODE_SCOM = 0x00068000, + PU_PBASLVCTL0_SCOM = 0x00068004, + PU_PBASLVCTL1_SCOM = 0x00068005, + PU_PBASLVCTL2_SCOM = 0x00068006, + }; + + uint64_t data; + + /* + * Set the PBA_MODECTL register. It's not yet clear how PBA BCE + * transaction size will affect performance - for now we go with the + * largest size. The HTM marker space is enabled and configured. Slave + * fairness is enabled. The setting 'dis_slvmatch_order' ensures that PBA + * will correctly flush write data before allowing a read of the same + * address from a different master on a different slave. The second write + * buffer is enabled. + */ + + data = 0; + data |= PPC_PLACE(PBA_OCI_REGION, 16, 2); // pba_region + data |= PPC_PLACE(PBA_BCE_OCI_TRANSACTION_64_BYTES, 21, 2); // bcde_ocitrans + data |= PPC_PLACE(PBA_BCE_OCI_TRANSACTION_64_BYTES, 23, 2); // bcue_ocitrans + data |= PPC_BIT(8); // en_marker_ack + data |= PPC_PLACE(0x7, 18, 3); // oci_marker_space + data |= PPC_BIT(27); // en_slv_fairness + data |= PPC_BIT(10); // en_second_wrbuf + + write_scom(chip, PU_PBAMODE_SCOM, data); + + /* + * Slave 0 (SGPE STOP). This is a read/write slave in the event that + * the STOP functions needs to write to memory. + */ + + data = 0; + data |= PPC_BIT(0); // enable + data |= PPC_PLACE(OCI_MASTER_ID_SGPE, 1, 3); // mid_match_value + data |= PPC_PLACE(OCI_MASTER_ID_MASK_ALL, 5, 3); // mid_care_mask + data |= PPC_PLACE(PBA_READ_TTYPE_CL_RD_NC, 15, 1); // read_ttype + data |= PPC_PLACE(PBA_READ_PREFETCH_NONE, 16, 2); // read_prefetch_ctl + data |= PPC_PLACE(PBA_WRITE_TTYPE_DMA_PR_WR, 8, 3); // write_ttype + data |= PPC_BIT(20); // buf_alloc_a + data |= PPC_BIT(21); // buf_alloc_b + data |= PPC_BIT(22); // buf_alloc_c + data |= PPC_BIT(19); // buf_alloc_w + + write_scom(chip, PU_PBASLVCTL0_SCOM, data); + + /* + * Slave 1 (GPE 1, PPC405 booting). This is a read/write slave. Write gathering is + * allowed, but with the shortest possible timeout. + */ + + data = 0; + data |= PPC_BIT(0); // enable + data |= PPC_PLACE(OCI_MASTER_ID_ICU, 1, 3); // mid_match_value + data |= PPC_PLACE(OCI_MASTER_ID_ICU, 5, 3); // mid_care_mask + data |= PPC_PLACE(PBA_READ_TTYPE_CL_RD_NC, 15, 1); // read_ttype + data |= PPC_PLACE(PBA_READ_PREFETCH_NONE, 16, 2); // read_prefetch_ctl + data |= PPC_PLACE(PBA_WRITE_TTYPE_DMA_PR_WR, 8, 3); // write_ttype + data |= PPC_PLACE(PBA_WRITE_GATHER_TIMEOUT_2_PULSES, 25, 3); // wr_gather_timeout + data |= PPC_BIT(20); // buf_alloc_a + data |= PPC_BIT(21); // buf_alloc_b + data |= PPC_BIT(22); // buf_alloc_c + data |= PPC_BIT(19); // buf_alloc_w + + write_scom(chip, PU_PBASLVCTL1_SCOM, data); + + /* + * Slave 2 (PGPE Boot, Pstates/WOF). This is a read/write slave. Write gethering is + * allowed, but with the shortest possible timeout. This slave is + * effectively disabled soon after IPL. + */ + + data = 0; + data |= PPC_BIT(0); // enable + data |= PPC_PLACE(OCI_MASTER_ID_PGPE, 1, 3); // mid_match_value + data |= PPC_PLACE(OCI_MASTER_ID_MASK_ALL, 5, 3); // mid_care_mask + data |= PPC_PLACE(PBA_READ_TTYPE_CL_RD_NC, 15, 1); // read_ttype + data |= PPC_PLACE(PBA_READ_PREFETCH_NONE, 16, 2); // read_prefetch_ctl + data |= PPC_PLACE(PBA_WRITE_TTYPE_DMA_PR_WR, 8, 3); // write_ttype + data |= PPC_PLACE(PBA_WRITE_GATHER_TIMEOUT_2_PULSES, 25, 3); // wr_gather_timeout + data |= PPC_BIT(20); // buf_alloc_a + data |= PPC_BIT(21); // buf_alloc_b + data |= PPC_BIT(22); // buf_alloc_c + data |= PPC_BIT(19); // buf_alloc_w + + write_scom(chip, PU_PBASLVCTL2_SCOM, data); + + /* Slave 3 is not modified by this function, because it is owned by SBE */ +} + +static void pba_reset(uint8_t chip) +{ + long time; + /* Stopping Block Copy Download Engine + *0x00068010 // undocumented, PU_BCDE_CTL_SCOM + [all] 0 + [0] 1 + */ + write_scom(chip, 0x00068010, PPC_BIT(0)); + + /* Stopping Block Copy Upload Engine + *0x00068015 // undocumented, PU_BCUE_CTL_SCOM + [all] 0 + [0] 1 + */ + write_scom(chip, 0x00068015, PPC_BIT(0)); + + /* Polling on, to verify that BCDE & BCUE are indeed stopped + timeout(256*256us): + *0x00068012 // undocumented, PU_BCDE_STAT_SCOM + [0] PBA_BC_STAT_RUNNING? + *0x00068017 // undocumented, PU_BCUE_STAT_SCOM + [0] PBA_BC_STAT_RUNNING? + if both bits are clear: break + */ + time = wait_us(256*256, + (((read_scom(chip, 0x00068012) & PPC_BIT(0)) == 0) && + ((read_scom(chip, 0x00068017) & PPC_BIT(0)) == 0))); + + if (!time) + die("Timed out waiting for stopping of BCDE/BCUE\n"); + + /* Clear the BCDE and BCUE stop bits */ + write_scom(chip, 0x00068010, 0); + write_scom(chip, 0x00068015, 0); + + /* Reset each slave and wait for completion + timeout(16*1us): + // This write is inside the timeout loop. I don't know if this will cause slaves to reset + // on each iteration or not, but this is how it is done in hostboot. + *0x00068001 // undocumented, PU_PBASLVRST_SCOM + [all] 0 + [0] 1 // reset? + [1-2] sl + if *0x00068001[4 + sl] == 0: break // 4 + sl: reset in progress? + if *0x00068001[8 + sl]: die() // 8 + sl: busy? + */ + for (int sl = 0; sl < 3; sl++) { // Fourth is owned by SBE, do not reset + time = wait_us(16, + (write_scom(chip, 0x00068001, PPC_BIT(0) | PPC_PLACE(sl, 1, 2)), + (read_scom(chip, 0x00068001) & PPC_BIT(4 + sl)) == 0)); + + if (!time || read_scom(chip, 0x00068001) & PPC_BIT(8 + sl)) + die("Timed out waiting for slave %d reset\n", sl); + } + + /* Reset PBA regs + *0x00068013 // undocumented, PU_BCDE_PBADR_SCOM + *0x00068014 // undocumented, PU_BCDE_OCIBAR_SCOM + *0x00068015 // undocumented, PU_BCUE_CTL_SCOM + *0x00068016 // undocumented, PU_BCUE_SET_SCOM + *0x00068018 // undocumented, PU_BCUE_PBADR_SCOM + *0x00068019 // undocumented, PU_BCUE_OCIBAR_SCOM + *0x00068026 // undocumented, PU_PBAXSHBR0_SCOM + *0x0006802A // undocumented, PU_PBAXSHBR1_SCOM + *0x00068027 // undocumented, PU_PBAXSHCS0_SCOM + *0x0006802B // undocumented, PU_PBAXSHCS1_SCOM + *0x00068004 // undocumented, PU_PBASLVCTL0_SCOM + *0x00068005 // undocumented, PU_PBASLVCTL1_SCOM + *0x00068006 // undocumented, PU_PBASLVCTL2_SCOM + BRIDGE.PBA.PBAFIR // 0x05012840 + BRIDGE.PBA.PBAERRRPT0 // 0x0501284C + [all] 0 + */ + write_scom(chip, 0x00068013, 0); + write_scom(chip, 0x00068014, 0); + write_scom(chip, 0x00068015, 0); + write_scom(chip, 0x00068016, 0); + write_scom(chip, 0x00068018, 0); + write_scom(chip, 0x00068019, 0); + write_scom(chip, 0x00068026, 0); + write_scom(chip, 0x0006802A, 0); + write_scom(chip, 0x00068027, 0); + write_scom(chip, 0x0006802B, 0); + write_scom(chip, 0x00068004, 0); + write_scom(chip, 0x00068005, 0); + write_scom(chip, 0x00068006, 0); + write_scom(chip, 0x05012840, 0); + write_scom(chip, 0x0501284C, 0); + + /* Perform non-zero reset operations + BRIDGE.PBA.PBACFG // 0x0501284B + [all] 0 + [38] PBACFG_CHSW_DIS_GROUP_SCOPE = 1 + */ + write_scom(chip, 0x0501284B, PPC_BIT(38)); + + /* + *0x00068021 // Undocumented, PU_PBAXCFG_SCOM + [all] 0 + [2] 1 // PBAXCFG_SND_RESET? + [3] 1 // PBAXCFG_RCV_RESET? + */ + write_scom(chip, PU_PBAXCFG_SCOM, PPC_BIT(2) | PPC_BIT(3)); + + pba_slave_setup_runtime_phase(chip); +} + +static void stop_gpe_init(uint8_t chip, struct homer_st *homer) +{ + /* First check if SGPE_ACTIVE is not set in OCCFLAG register + if (TP.TPCHIP.OCC.OCI.OCB.OCB_OCI_OCCFLG[8] == 1): // 0x0006C08A + TP.TPCHIP.OCC.OCI.OCB.OCB_OCI_OCCFLG (CLEAR) // 0x0006C08B + [all] 0 + [8] 1 // SGPE_ACTIVE, bits in this register are defined by OCC firmware + */ + if (read_scom(chip, 0x0006C08A) & PPC_BIT(8)) { + printk(BIOS_WARNING, "SGPE_ACTIVE is set in OCCFLAG register, clearing it\n"); + write_scom(chip, 0x0006C08B, PPC_BIT(8)); + } + + /* + * Program SGPE IVPR + * ATTR_STOPGPE_BOOT_COPIER_IVPR_OFFSET is set in updateGpeAttributes() in 15.1 + TP.TPCHIP.OCC.OCI.GPE3.GPEIVPR // 0x00066001 + [all] 0 + [0-31] GPEIVPR_IVPR = ATTR_STOPGPE_BOOT_COPIER_IVPR_OFFSET + // Only bits [0-22] are actually defined, meaning IVPR must be aligned to 512B + */ + uint32_t ivpr = 0x80000000 + homer->qpmr.sgpe.header.l1_offset + + offsetof(struct homer_st, qpmr); + write_scom(chip, 0x00066001, PPC_PLACE(ivpr, 0, 32)); + + /* Program XCR to ACTIVATE SGPE + TP.TPCHIP.OCC.OCI.GPE3.GPENXIXCR // 0x00066010 + [all] 0 + [1-3] PPE_XIXCR_XCR = 6 // hard reset + TP.TPCHIP.OCC.OCI.GPE3.GPENXIXCR // 0x00066010 + [all] 0 + [1-3] PPE_XIXCR_XCR = 4 // toggle XSR[TRH] + TP.TPCHIP.OCC.OCI.GPE3.GPENXIXCR // 0x00066010 + [all] 0 + [1-3] PPE_XIXCR_XCR = 2 // resume + */ + write_scom(chip, 0x00066010, PPC_PLACE(6, 1, 3)); + write_scom(chip, 0x00066010, PPC_PLACE(4, 1, 3)); + write_scom(chip, 0x00066010, PPC_PLACE(2, 1, 3)); + + /* + * Now wait for SGPE to not be halted and for the HCode to indicate to be + * active. + * Warning: consts names in hostboot say timeouts are in ms, but code treats + * it as us. With debug output it takes much more than 20us between reads + * (~150us) and passes on 5th pass, which gives ~600us, +/- 150us on 4-core + * CPU (4 active CMEs). + timeout(125*20us): + if ((TP.TPCHIP.OCC.OCI.OCB.OCB_OCI_OCCFLG[8] == 1) && // 0x0006C08A + (TP.TPCHIP.OCC.OCI.GPE3.GPEXIXSR[0] == 0)): break // 0x00066021 + */ + long time = wait_us(125*20, ((read_scom(chip, 0x0006C08A) & PPC_BIT(8)) && + !(read_scom(chip, 0x00066021) & PPC_BIT(0)))); + + if (!time) + die("Timeout while waiting for SGPE activation\n"); +} + +static uint64_t get_available_cores(uint8_t chip, int *me) +{ + uint64_t ret = 0; + for (int i = 0; i < MAX_CORES_PER_CHIP; i++) { + uint64_t val = read_scom_for_chiplet(chip, EC00_CHIPLET_ID + i, 0xF0040); + if (val & PPC_BIT(0)) { + printk(BIOS_SPEW, "Core %d is functional%s\n", i, + (val & PPC_BIT(1)) ? "" : " and running"); + + ret |= PPC_BIT(i); + if ((val & PPC_BIT(1)) == 0 && me != NULL) + *me = i; + } + } + return ret; +} + +/* TODO: similar is used in 13.3. Add missing parameters and make it public? */ +static void psu_command(uint8_t flags, long time) +{ + /* TP.TPCHIP.PIB.PSU.PSU_SBE_DOORBELL_REG */ + if (read_scom(0, 0x000D0060) & PPC_BIT(0)) + die("MBOX to SBE busy, this should not happen\n"); + + if (read_scom(0, 0x000D0063) & PPC_BIT(0)) { + printk(BIOS_WARNING, "SBE to Host doorbell already active, clearing it\n"); + write_scom(0, 0x000D0064, ~PPC_BIT(0)); + } + + /* https://github.com/open-power/hostboot/blob/master/src/include/usr/sbeio/sbe_psudd.H#L418 */ + /* TP.TPCHIP.PIB.PSU.PSU_HOST_SBE_MBOX0_REG */ + /* REQUIRE_RESPONSE, CLASS_CORE_STATE, CMD_CONTROL_DEADMAN_LOOP, flags */ + write_scom(0, 0x000D0050, 0x000001000000D101 | PPC_PLACE(flags, 24, 8)); + + /* TP.TPCHIP.PIB.PSU.PSU_HOST_SBE_MBOX0_REG */ + write_scom(0, 0x000D0051, time); + + /* Ring the host->SBE doorbell */ + /* TP.TPCHIP.PIB.PSU.PSU_SBE_DOORBELL_REG_OR */ + write_scom(0, 0x000D0062, PPC_BIT(0)); + + /* Wait for response */ + /* TP.TPCHIP.PIB.PSU.PSU_HOST_DOORBELL_REG */ + time = wait_ms(time, read_scom(0, 0x000D0063) & PPC_BIT(0)); + + if (!time) + die("Timed out while waiting for SBE response\n"); + + /* Clear SBE->host doorbell */ + /* TP.TPCHIP.PIB.PSU.PSU_HOST_DOORBELL_REG_AND */ + write_scom(0, 0x000D0064, ~PPC_BIT(0)); +} + +#define DEADMAN_LOOP_START 0x0001 +#define DEADMAN_LOOP_STOP 0x0002 + +static void block_wakeup_int(int core, int state) +{ + // TP.TPCHIP.NET.PCBSLEC14.PPMC.PPM_COMMON_REGS.PPM_GPMMR // 0x200F0100 + /* Depending on requested state we write to SCOM1 (CLEAR) or SCOM2 (OR). */ + uint64_t scom = state ? 0x200F0102 : 0x200F0101; + + write_scom_for_chiplet(0, EC00_CHIPLET_ID + core, 0x200F0108, PPC_BIT(1)); + /* Register is documented, but its bits are reserved... */ + write_scom_for_chiplet(0, EC00_CHIPLET_ID + core, scom, PPC_BIT(6)); + + write_scom_for_chiplet(0, EC00_CHIPLET_ID + core, 0x200F0107, PPC_BIT(1)); +} + +struct prog *__payload; + +static void secondary_entry(void) +{ + while (__payload == NULL) + asm volatile("stop" ::: "memory"); + + /* + * arch_prog_run(__payload); + * + * Doing it asm way to avoid using stack and overwriting %lr. + */ + register void *r3 asm ("r3") = __payload; + asm volatile("b arch_prog_run" : "+r"(r3)); +} + +/* + * Some time will be lost between entering and exiting STOP 15, but we don't + * have a way of calculating it. In theory we could read tick count from one of + * the auxiliary chips (SBE, SGPE), but accessing those and converting to the + * frequency of TB may take longer than sleep took. + */ +struct save_state { + uint64_t r1; /* stack */ + uint64_t r2; /* TOC */ + uint64_t msr; + uint64_t nia; + uint64_t tb; + uint64_t lr; + uint64_t bsp_pir; + void *sec_entry; +} sstate; + +static void cpu_winkle(void) +{ + uint64_t lpcr = read_spr(SPR_LPCR); + /* + * Clear {External, Decrementer, Other} Exit Enable and Hypervisor + * Decrementer Interrupt Conditionally Enable + */ + lpcr &= ~(SPR_LPCR_EEE | SPR_LPCR_DEE | SPR_LPCR_OEE | SPR_LPCR_HDICE); + /* + * Set Hypervisor Virtualization Interrupt Conditionally Enable + * and Hypervisor Virtualization Exit Enable + */ + lpcr |= SPR_LPCR_HVICE | SPR_LPCR_HVEE; + write_spr(SPR_LPCR, lpcr); + write_spr(SPR_PSSCR, 0x00000000003F00FF); + sstate.msr = read_msr(); + sstate.bsp_pir = read_spr(SPR_PIR); + + /* + * Not used by current thread, but will be used later by secondary + * threads, may as well set this now. Note that this is OPD address. + */ + sstate.sec_entry = secondary_entry; + + /* + * Cannot clobber: + * - r1 (stack) - reloaded from sstate + * - r2 (TOC aka PIC register) - reloaded from sstate + * - r3 (address of sstate) - storage duration limited to block below + */ + { + register void *r3 asm ("r3") = &sstate; + asm volatile("std 1, 0(%0)\n" + "std 2, 8(%0)\n" + "mflr 1\n" + "std 1, 40(%0)\n" + "lnia 1\n" + "__tmp_nia:" + "addi 1, 1, wakey - __tmp_nia\n" + "std 1, 24(%0)\n" + "mftb 1\n" + "std 1, 32(%0)\n" /* TB - save as late as possible */ + "sync\n" + "stop\n" + "wakey:\n" + : "+r"(r3) :: + "r0", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", + "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19", + "r20", "r21", "r22", "r23", "r24", "r25", "r26", "r27", + "r28", "r29", "r30", "r31", "memory", "cc"); + } + + /* + * Hostboot restored two additional registers at this point: LPCR and PSSCR. + * + * LPCR was restored from core self-restore region, coreboot won't need to. + * + * PSSCR won't be used before next 'stop' instruction, which won't happen + * before new settings are written by the payload. + */ + + /* + * Timing facilities were lost, this includes DEC register. Because during + * self-restore Large Decrementer was disabled for few instructions, value + * of DEC is trimmed to 32 bits. Restore it to something bigger, otherwise + * interrupt would arrive in ~4 seconds. + */ + write_spr(SPR_DEC, SPR_DEC_LONGEST_TIME); +} + +static void istep_16_1(int this_core) +{ + report_istep(16, 1); + /* + * Wait time 10.5 sec, anything larger than 10737 ms can cause overflow on + * SBE side of the timeout calculations. + */ + long time = 10500; + + /* + * Debugging aid - 0xE40 is Hypervisor Emulation Assistance vector. It is + * taken when processor tries to execute unimplemented instruction. All 0s + * is (and will always be) such an instruction, meaning we will get here + * when processor jumps into uninitialized memory. If this instruction were + * also uninitialized, processor would hit another exception and again jump + * here. This time, however, it would overwrite original HSRR0 value with + * 0xE40. Instruction below is 'b .'. This way HSRR0 will retain its value + * - address of instruction which generated this exception. It can be then + * read with pdbg. + */ + *(volatile uint32_t *)0xE40 = 0x48000000; + + configure_xive(this_core); + + /* + * This will request SBE to wake us up after we enter STOP 15. Hopefully + * we will come back to the place where we were before. + */ + printk(BIOS_DEBUG, "XIVE configured, entering dead man loop\n"); + psu_command(DEADMAN_LOOP_START, time); + + block_wakeup_int(this_core, 1); + + cpu_winkle(); + + /* + * SBE sets this doorbell bit when it finishes its part of STOP 15 wakeup. + * No need to handle the timeout, if it happens, SBE will checkstop the + * system anyway. + */ + wait_us(time, read_scom(0, 0x000D0063) & PPC_BIT(2)); + + write_scom(0, 0x000D0064, ~PPC_BIT(2)); + + /* + * This tells SBE that we were properly awoken. Hostboot uses default + * timeout of 90 seconds, but if SBE doesn't answer in 10 there is no reason + * to believe it will answer at all. + */ + psu_command(DEADMAN_LOOP_STOP, time); + + // core_checkstop_helper_hwp(..., true) + // p9_core_checkstop_handler(___, true) + // core_checkstop_helper_homer() + // p9_stop_save_scom() and others +} + +/* Loads OCC Image from PNOR into HOMER */ +static void load_occ_image_to_homer(struct homer_st *homer) +{ + struct mmap_helper_region_device mdev = {0}; + + /* + * This will work as long as we don't call mmap(). mmap() calls + * mem_poll_alloc() which doesn't check if mdev->pool is valid or at least + * not NULL. + */ + mount_part_from_pnor("OCC", &mdev); + + rdev_readat(&mdev.rdev, &homer->occ_host_area, 0, 1 * MiB); +} + +/* Writes information about the host to be read by OCC */ +static void load_host_data_to_homer(uint8_t chip, struct homer_st *homer) +{ + enum { + OCC_HOST_DATA_VERSION = 0x00000090, + USE_PSIHB_COMPLEX = 0x00000001, + }; + + struct occ_host_config *config_data = + (void *)&homer->occ_host_area[HOMER_OFFSET_TO_OCC_HOST_DATA]; + + config_data->version = OCC_HOST_DATA_VERSION; + config_data->nest_freq = powerbus_cfg(chip)->fabric_freq; + config_data->interrupt_type = USE_PSIHB_COMPLEX; + config_data->is_fir_master = false; + config_data->is_smf_mode = false; +} + +static void load_pm_complex(uint8_t chip, struct homer_st *homer) +{ + /* + * Hostboot resets OCC here, but we haven't started it yet, so reset + * shouldn't be necessary. + */ + + /* OCC image is pre-loaded for us earlier */ + + load_host_data_to_homer(chip, homer); +} + +static void pm_corequad_init(uint8_t chip, uint64_t cores) +{ + enum { + EQ_QPPM_QPMMR_CLEAR = 0x100F0104, + EQ_QPPM_ERR = 0x100F0121, + EQ_QPPM_ERRMSK = 0x100F0122, + C_CPPM_CPMMR_CLEAR = 0x200F0107, + C_CPPM_ERR = 0x200F0121, + C_CPPM_CSAR_CLEAR = 0x200F0139, + C_CPPM_ERRMSK = 0x200F0122, + DOORBELLS_COUNT = 4, + }; + + const uint64_t CME_DOORBELL_CLEAR[DOORBELLS_COUNT] = { + 0x200F0191, 0x200F0195, 0x200F0199, 0x200F019D + }; + + /* + * This is supposed to be stored by pm_corequad_reset() in ATTR_QUAD_PPM_ERRMASK + * and ATTR_CORE_PPM_ERRMASK. + * + * If there was no reset, maybe no need to set it? + */ + uint32_t err_mask = 0; + + for (int quad = 0; quad < MAX_QUADS_PER_CHIP; ++quad) { + chiplet_id_t quad_chiplet = EP00_CHIPLET_ID + quad; + + if (!IS_EQ_FUNCTIONAL(quad, cores)) + continue; + + /* + * Setup the Quad PPM Mode Register + * Clear the following bits: + * 0 : Force FSAFE + * 1 - 11 : FSAFE + * 12 : Enable FSAFE on heartbeat loss + * 13 : Enable DROOP protect upon heartbeat loss + * 14 : Enable PFETs upon iVRMs dropout + * 18 - 19 : PCB interrupt + * 20,22,24,26: InterPPM Ivrm/Aclk/Vdata/Dpll enable + */ + write_scom_for_chiplet(chip, quad_chiplet, EQ_QPPM_QPMMR_CLEAR, + PPC_BIT(0) | + PPC_BITMASK(1, 11) | + PPC_BIT(12) | + PPC_BIT(13) | + PPC_BIT(14) | + PPC_BITMASK(18, 19) | + PPC_BIT(20) | + PPC_BIT(22) | + PPC_BIT(24) | + PPC_BIT(26)); + + /* Clear QUAD PPM ERROR Register */ + write_scom_for_chiplet(chip, quad_chiplet, EQ_QPPM_ERR, 0); + + /* Restore Quad PPM Error Mask */ + err_mask = 0xFFFFFF00; // from Hostboot's log + write_scom_for_chiplet(chip, quad_chiplet, EQ_QPPM_ERRMSK, + PPC_PLACE(err_mask, 0, 32)); + + for (int core = quad * 4; core < (quad + 1) * 4; ++core) { + chiplet_id_t core_chiplet = EC00_CHIPLET_ID + core; + + /* Clear the Core PPM CME DoorBells */ + for (int i = 0; i < DOORBELLS_COUNT; ++i) { + write_scom_for_chiplet(chip, core_chiplet, + CME_DOORBELL_CLEAR[i], + PPC_BITMASK(0, 63)); + } + + /* + * Setup Core PPM Mode register + * + * Clear the following bits: + * 1 : PPM Write control override + * 11 : Block interrupts + * 12 : PPM response for CME error + * 14 : enable pece + * 15 : cme spwu done dis + * + * Other bits are Init or Reset by STOP Hcode and, thus, not touched + * here: + * 0 : PPM Write control + * 9 : FUSED_CORE_MODE + * 10 : STOP_EXIT_TYPE_SEL + * 13 : WKUP_NOTIFY_SELECT + */ + write_scom_for_chiplet(chip, core_chiplet, C_CPPM_CPMMR_CLEAR, + PPC_BIT(1) | + PPC_BIT(11) | + PPC_BIT(12) | + PPC_BIT(14) | + PPC_BIT(15)); + + /* Clear Core PPM Errors */ + write_scom_for_chiplet(chip, core_chiplet, C_CPPM_ERR, 0); + + /* + * Clear Hcode Error Injection and other CSAR settings: + * 27 : FIT_HCODE_ERROR_INJECT + * 28 : ENABLE_PSTATE_REGISTRATION_INTERLOCK + * 29 : DISABLE_CME_NACK_ON_PROLONGED_DROOP + * 30 : PSTATE_HCODE_ERROR_INJECT + * 31 : STOP_HCODE_ERROR_INJECT + * + * DISABLE_CME_NACK_ON_PROLONGED_DROOP is NOT cleared + * as this is a persistent, characterization setting. + */ + write_scom_for_chiplet(chip, core_chiplet, C_CPPM_CSAR_CLEAR, + PPC_BIT(27) | + PPC_BIT(28) | + PPC_BIT(30) | + PPC_BIT(31)); + + /* Restore CORE PPM Error Mask */ + err_mask = 0xFFF00000; // from Hostboot's log + write_scom_for_chiplet(chip, core_chiplet, C_CPPM_ERRMSK, + PPC_PLACE(err_mask, 0, 32)); + } + } +} + +static void pstate_gpe_init(uint8_t chip, struct homer_st *homer, uint64_t cores) +{ + enum { + /* The following constants hold approximate values */ + PGPE_TIMEOUT_MS = 500, + PGPE_POLLTIME_MS = 20, + TIMEOUT_COUNT = PGPE_TIMEOUT_MS / PGPE_POLLTIME_MS, + + EQ_QPPM_QPMMR = 0x100F0103, + + PU_GPE2_PPE_XIXCR = 0x00064010, + PU_GPE2_PPE_XIDBGPRO = 0x00064015, + PU_GPE3_PPE_XIDBGPRO = 0x00066015, + + PU_GPE2_GPEIVPR_SCOM = 0x00064001, + PU_OCB_OCI_OCCS2_SCOM = 0x0006C088, + PU_OCB_OCI_OCCFLG_SCOM2 = 0x0006C08C, + PU_GPE2_GPETSEL_SCOM = 0x00064000, + + /* OCC SCRATCH2 */ + PGPE_ACTIVE = 0, + PGPE_PSTATE_PROTOCOL_ACTIVE = 1, + + /* XSR */ + HALTED_STATE = 0, + + /* XCR */ + RESUME = 2, + TOGGLE_XSR_TRH = 4, + HARD_RESET = 6, + }; + + uint64_t occ_scratch; + /* ATTR_VDD_AVSBUS_BUSNUM */ + uint8_t avsbus_number = 0; + /* ATTR_VDD_AVSBUS_RAIL */ + uint8_t avsbus_rail = 0; + + uint64_t ivpr = 0x80000000 + offsetof(struct homer_st, ppmr.l1_bootloader); + write_scom(chip, PU_GPE2_GPEIVPR_SCOM, ivpr << 32); + + /* Set up the OCC Scratch 2 register before PGPE boot */ + occ_scratch = read_scom(chip, PU_OCB_OCI_OCCS2_SCOM); + occ_scratch &= ~PPC_BIT(PGPE_ACTIVE); + occ_scratch &= ~PPC_BITMASK(27, 31); + occ_scratch |= PPC_PLACE(avsbus_number, 27, 1); + occ_scratch |= PPC_PLACE(avsbus_rail, 28, 4); + write_scom(chip, PU_OCB_OCI_OCCS2_SCOM, occ_scratch); + + write_scom(chip, PU_GPE2_GPETSEL_SCOM, 0x1A00000000000000); + + /* OCCFLG2_PGPE_HCODE_FIT_ERR_INJ | OCCFLG2_PGPE_HCODE_PSTATE_REQ_ERR_INJ */ + write_scom(chip, PU_OCB_OCI_OCCFLG2_CLEAR, 0x1100000000); + + printk(BIOS_DEBUG, "Attempting PGPE activation...\n"); + + write_scom(chip, PU_GPE2_PPE_XIXCR, PPC_PLACE(HARD_RESET, 1, 3)); + write_scom(chip, PU_GPE2_PPE_XIXCR, PPC_PLACE(TOGGLE_XSR_TRH, 1, 3)); + write_scom(chip, PU_GPE2_PPE_XIXCR, PPC_PLACE(RESUME, 1, 3)); + + wait_ms(PGPE_POLLTIME_MS * TIMEOUT_COUNT, + (read_scom(chip, PU_OCB_OCI_OCCS2_SCOM) & PPC_BIT(PGPE_ACTIVE)) || + (read_scom(chip, PU_GPE2_PPE_XIDBGPRO) & PPC_BIT(HALTED_STATE))); + + if (read_scom(chip, PU_OCB_OCI_OCCS2_SCOM) & PPC_BIT(PGPE_ACTIVE)) + printk(BIOS_DEBUG, "PGPE was activated successfully\n"); + else + die("Failed to activate PGPE\n"); + + OCCPstateParmBlock *oppb = (OCCPstateParmBlock *)homer->ppmr.occ_parm_block; + GlobalPstateParmBlock *gppb = (GlobalPstateParmBlock *) + &homer->ppmr.pgpe_sram_img[homer->ppmr.header.hcode_len]; + + uint32_t safe_mode_freq = oppb->frequency_min_khz / gppb->frequency_step_khz; + + for (int quad = 0; quad < MAX_QUADS_PER_CHIP; ++quad) { + if (!IS_EQ_FUNCTIONAL(quad, cores)) + continue; + + scom_and_or_for_chiplet(chip, EP00_CHIPLET_ID + quad, EQ_QPPM_QPMMR, + ~PPC_BITMASK(1, 11), + PPC_PLACE(safe_mode_freq, 1, 11)); + } +} + +static void pm_pba_init(uint8_t chip) +{ + enum { + PU_PBACFG = 0x0501284B, + PU_PBAFIR = 0x05012840, + + PU_PBACFG_CHSW_DIS_GROUP_SCOPE = 38, + + /* These don't have corresponding attributes */ + PBAX_DATA_TIMEOUT = 0x0, + PBAX_SND_RETRY_COMMIT_OVERCOMMIT = 0x0, + PBAX_SND_RETRY_THRESHOLD = 0x0, + PBAX_SND_TIMEOUT = 0x0, + }; + + uint64_t data = 0; + /* These group and chip IDs aren't affected by pump mode */ + uint8_t attr_pbax_groupid = 0; + uint8_t attr_pbax_chipid = chip; + uint8_t attr_pbax_broadcast_vector = 0; + + /* Assuming ATTR_CHIP_EC_FEATURE_HW423589_OPTION1 == true */ + write_scom(chip, PU_PBACFG, PPC_BIT(PU_PBACFG_CHSW_DIS_GROUP_SCOPE)); + + write_scom(chip, PU_PBAFIR, 0); + + data |= PPC_PLACE(attr_pbax_groupid, 4, 4); + data |= PPC_PLACE(attr_pbax_chipid, 8, 3); + data |= PPC_PLACE(attr_pbax_broadcast_vector, 12, 8); + data |= PPC_PLACE(PBAX_DATA_TIMEOUT, 20, 5); + data |= PPC_PLACE(PBAX_SND_RETRY_COMMIT_OVERCOMMIT, 27, 1); + data |= PPC_PLACE(PBAX_SND_RETRY_THRESHOLD, 28, 8); + data |= PPC_PLACE(PBAX_SND_TIMEOUT, 36, 5); + write_scom(chip, PU_PBAXCFG_SCOM, data); +} + +static void pm_pstate_gpe_init(uint8_t chip, struct homer_st *homer, uint64_t cores) +{ + pstate_gpe_init(chip, homer, cores); + pm_pba_init(chip); +} + +/* Generates host configuration vector and updates the value in HOMER */ +static void check_proc_config(uint8_t chip, struct homer_st *homer) +{ + uint64_t vector_value = INIT_CONFIG_VALUE; + uint64_t *conf_vector = (void *)((uint8_t *)&homer->qpmr + QPMR_PROC_CONFIG_POS); + + int mcs_i = 0; + + for (mcs_i = 0; mcs_i < MCS_PER_PROC; mcs_i++) { + chiplet_id_t nest = mcs_to_nest[mcs_ids[mcs_i]]; + + /* MCS_MCFGP and MCS_MCFGPM registers are undocumented, see istep 14.5. */ + if ((read_scom_for_chiplet(chip, nest, 0x0501080A) & PPC_BIT(0)) || + (read_scom_for_chiplet(chip, nest, 0x0501080C) & PPC_BIT(0))) { + uint8_t pos = MCS_POS + mcs_i; + *conf_vector |= PPC_BIT(pos); + + /* MCS and MBA/MCA seem to have equivalent values */ + pos = MBA_POS + mcs_i; + *conf_vector |= PPC_BIT(pos); + } + } + + /* TODO: set configuration bits for XBUS and PHB when their state is available */ + + *conf_vector = vector_value; +} + +static void pm_pss_init(uint8_t chip) +{ + enum { + PU_SPIPSS_ADC_CTRL_REG0 = 0x00070000, + PU_SPIPSS_ADC_WDATA_REG = 0x00070010, + PU_SPIPSS_P2S_CTRL_REG0 = 0x00070040, + PU_SPIPSS_P2S_WDATA_REG = 0x00070050, + PU_SPIPSS_100NS_REG = 0x00070028, + }; + + /* + * 0-5 frame size + * 12-17 in delay + */ + scom_and_or(chip, PU_SPIPSS_ADC_CTRL_REG0, + ~PPC_BITMASK(0, 5) & ~PPC_BITMASK(12, 17), + PPC_PLACE(0x20, 0, 6)); + + /* + * 0 adc_fsm_enable = 1 + * 1 adc_device = 0 + * 2 adc_cpol = 0 + * 3 adc_cpha = 0 + * 4-13 adc_clock_divider = set to 10Mhz + * 14-17 adc_nr_of_frames = 0x10 (for auto 2 mode) + * + * Truncating last value to 4 bits gives 0. + */ + scom_and_or(chip, PU_SPIPSS_ADC_CTRL_REG0 + 1, ~PPC_BITMASK(0, 17), + PPC_BIT(0) | PPC_PLACE(10, 4, 10) | PPC_PLACE(0, 14, 4)); + + /* + * 0-16 inter frame delay + */ + scom_and(chip, PU_SPIPSS_ADC_CTRL_REG0 + 2, ~PPC_BITMASK(0, 16)); + + write_scom(chip, PU_SPIPSS_ADC_WDATA_REG, 0); + + /* + * 0-5 frame size + * 12-17 in delay + */ + scom_and_or(chip, PU_SPIPSS_P2S_CTRL_REG0, + ~PPC_BITMASK(0, 5) & ~PPC_BITMASK(12, 17), + PPC_PLACE(0x20, 0, 6)); + + /* + * 0 p2s_fsm_enable = 1 + * 1 p2s_device = 0 + * 2 p2s_cpol = 0 + * 3 p2s_cpha = 0 + * 4-13 p2s_clock_divider = set to 10Mhz + * 17 p2s_nr_of_frames = 1 (for auto 2 mode) + */ + scom_and_or(chip, PU_SPIPSS_P2S_CTRL_REG0 + 1, + ~(PPC_BITMASK(0, 13) | PPC_BIT(17)), + PPC_BIT(0) | PPC_PLACE(10, 4, 10) | PPC_BIT(17)); + + /* + * 0-16 inter frame delay + */ + scom_and(chip, PU_SPIPSS_P2S_CTRL_REG0 + 2, ~PPC_BITMASK(0, 16)); + + write_scom(chip, PU_SPIPSS_P2S_WDATA_REG, 0); + + /* + * 0-31 100ns value + */ + scom_and_or(chip, PU_SPIPSS_100NS_REG, + PPC_BITMASK(0, 31), + PPC_PLACE(powerbus_cfg(chip)->fabric_freq / 40, 0, 32)); +} + +/* Initializes power-management and starts OCC */ +static void start_pm_complex(uint8_t chip, struct homer_st *homer, uint64_t cores) +{ + enum { STOP_RECOVERY_TRIGGER_ENABLE = 29 }; + + pm_corequad_init(chip, cores); + pm_pss_init(chip); + pm_occ_fir_init(chip); + pm_pba_fir_init(chip); + stop_gpe_init(chip, homer); + pm_pstate_gpe_init(chip, homer, cores); + + check_proc_config(chip, homer); + clear_occ_special_wakeups(chip, cores); + special_occ_wakeup_disable(chip, cores); + occ_start_from_mem(chip); + + write_scom(chip, PU_OCB_OCI_OCCFLG2_CLEAR, PPC_BIT(STOP_RECOVERY_TRIGGER_ENABLE)); +} + +static void istep_21_1(uint8_t chips, struct homer_st *homers, const uint64_t *cores) +{ + for (uint8_t chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) + load_pm_complex(chip, &homers[chip]); + } + + printk(BIOS_DEBUG, "Starting PM complex...\n"); + for (uint8_t chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) + start_pm_complex(chip, &homers[chip], cores[chip]); + } + printk(BIOS_DEBUG, "Done starting PM complex\n"); +} + +/* Extracts rings for a specific Programmable PowerPC-lite Engine */ +static void get_ppe_scan_rings(uint8_t chip, struct xip_hw_header *hw, uint8_t dd, + enum ppe_type ppe, struct ring_data *ring_data) +{ + const uint32_t max_rings_buf_size = ring_data->rings_buf_size; + + struct tor_hdr *rings; + struct tor_hdr *overlays; + + if (dd < 0x20) + die("DD must be at least 0x20!"); + if (!hw->overlays.dd_support) + die("Overlays must support DD!"); + + copy_section(&rings, &hw->rings, hw, dd, FIND); + copy_section(&overlays, &hw->overlays, hw, dd, FIND); + + if (!tor_access_ring(rings, UNDEFINED_RING_ID, ppe, UNDEFINED_RING_VARIANT, + UNDEFINED_INSTANCE_ID, ring_data->rings_buf, + &ring_data->rings_buf_size, GET_PPE_LEVEL_RINGS)) + die("Failed to access PPE level rings!"); + + assert(ring_data->work_buf1_size == MAX_RING_BUF_SIZE); + assert(ring_data->work_buf2_size == MAX_RING_BUF_SIZE); + assert(ring_data->work_buf3_size == MAX_RING_BUF_SIZE); + + tor_fetch_and_insert_vpd_rings(chip, + (struct tor_hdr *)ring_data->rings_buf, + &ring_data->rings_buf_size, max_rings_buf_size, + overlays, ppe, + ring_data->work_buf1, + ring_data->work_buf2, + ring_data->work_buf3); +} + +static void layout_cmn_rings_for_cme(struct homer_st *homer, + struct ring_data *ring_data, + enum ring_variant ring_variant, + uint32_t *ring_len) +{ + struct cme_cmn_ring_list *tmp = + (void *)&homer->cpmr.cme_sram_region[*ring_len]; + uint8_t *start = (void *)tmp; + uint8_t *payload = tmp->payload; + + uint32_t i = 0; + const enum ring_id ring_ids[] = { EC_FUNC, EC_GPTR, EC_TIME, EC_MODE }; + + for (i = 0; i < ARRAY_SIZE(ring_ids); ++i) { + const enum ring_id id = ring_ids[i]; + + uint32_t ring_size = MAX_RING_BUF_SIZE; + uint8_t *ring_dst = start + ALIGN_UP(payload - start, 8); + + enum ring_variant this_ring_variant = ring_variant; + if (id == EC_GPTR || id == EC_TIME) + this_ring_variant = RV_BASE; + + if (!tor_access_ring(ring_data->rings_buf, id, PT_CME, + this_ring_variant, EC00_CHIPLET_ID, + ring_dst, &ring_size, GET_RING_DATA)) + continue; + + tmp->ring[i] = ring_dst - start; + payload = ring_dst + ALIGN_UP(ring_size, 8); + } + + if (payload != tmp->payload) + *ring_len += payload - start; + + *ring_len = ALIGN_UP(*ring_len, 8); +} + +static void layout_inst_rings_for_cme(struct homer_st *homer, + struct ring_data *ring_data, + uint64_t cores, + enum ring_variant ring_variant, + uint32_t *ring_len) +{ + uint32_t max_ex_len = 0; + + uint32_t ex = 0; + + for (ex = 0; ex < MAX_CMES_PER_CHIP; ++ex) { + uint32_t i = 0; + uint32_t ex_len = 0; + + for (i = 0; i < MAX_CORES_PER_EX; ++i) { + const uint32_t core = ex * MAX_CORES_PER_EX + i; + + uint32_t ring_size = 0; + + if (!IS_EC_FUNCTIONAL(core, cores)) + continue; + + ring_size = ring_data->work_buf1_size; + if (!tor_access_ring(ring_data->rings_buf, EC_REPR, + PT_CME, RV_BASE, + EC00_CHIPLET_ID + core, + ring_data->work_buf1, + &ring_size, GET_RING_DATA)) + continue; + + ex_len += ALIGN_UP(ring_size, 8); + } + + if (ex_len > max_ex_len) + max_ex_len = ex_len; + } + + if (max_ex_len > 0) { + max_ex_len += sizeof(struct cme_inst_ring_list); + max_ex_len = ALIGN_UP(max_ex_len, 32); + } + + for (ex = 0; ex < MAX_CMES_PER_CHIP; ++ex) { + const uint32_t ex_offset = + ex * (max_ex_len + ALIGN_UP(sizeof(LocalPstateParmBlock), 32)); + + uint8_t *start = &homer->cpmr.cme_sram_region[*ring_len + ex_offset]; + struct cme_inst_ring_list *tmp = (void *)start; + uint8_t *payload = tmp->payload; + + uint32_t i = 0; + + for (i = 0; i < MAX_CORES_PER_EX; ++i) { + const uint32_t core = ex * MAX_CORES_PER_EX + i; + + uint32_t ring_size = MAX_RING_BUF_SIZE; + + if (!IS_EC_FUNCTIONAL(core, cores)) + continue; + + if ((payload - start) % 8 != 0) + payload = start + ALIGN_UP(payload - start, 8); + + if (!tor_access_ring(ring_data->rings_buf, EC_REPR, + PT_CME, RV_BASE, + EC00_CHIPLET_ID + core, + payload, + &ring_size, GET_RING_DATA)) + continue; + + tmp->ring[i] = payload - start; + payload += ALIGN_UP(ring_size, 8); + } + } + + *ring_len = max_ex_len; +} + +static void layout_rings_for_cme(struct homer_st *homer, + struct ring_data *ring_data, + uint64_t cores, + enum ring_variant ring_variant) +{ + struct cpmr_header *cpmr_hdr = &homer->cpmr.header; + struct cme_img_header *cme_hdr = (void *)&homer->cpmr.cme_sram_region[INT_VECTOR_SIZE]; + + uint32_t ring_len = cme_hdr->hcode_offset + cme_hdr->hcode_len; + + assert(cpmr_hdr->magic == CPMR_VDM_PER_QUAD); + + layout_cmn_rings_for_cme(homer, ring_data, ring_variant, &ring_len); + + cme_hdr->common_ring_len = ring_len - (cme_hdr->hcode_offset + cme_hdr->hcode_len); + + // if common ring is empty, force offset to be 0 + if (cme_hdr->common_ring_len == 0) + cme_hdr->common_ring_offset = 0; + + ring_len = ALIGN_UP(ring_len, 32); + + layout_inst_rings_for_cme(homer, ring_data, cores, RV_BASE, &ring_len); + + if (ring_len != 0) { + cme_hdr->max_spec_ring_len = ALIGN_UP(ring_len, 32) / 32; + cme_hdr->core_spec_ring_offset = + ALIGN_UP(cme_hdr->common_ring_offset + cme_hdr->common_ring_len, 32) / + 32; + } +} + +static enum ring_id resolve_eq_inex_bucket(uint8_t chip) +{ + switch (powerbus_cfg(chip)->core_floor_ratio) { + case FABRIC_CORE_FLOOR_RATIO_RATIO_8_8: + return EQ_INEX_BUCKET_4; + + case FABRIC_CORE_FLOOR_RATIO_RATIO_7_8: + case FABRIC_CORE_FLOOR_RATIO_RATIO_6_8: + case FABRIC_CORE_FLOOR_RATIO_RATIO_5_8: + case FABRIC_CORE_FLOOR_RATIO_RATIO_4_8: + return EQ_INEX_BUCKET_3; + + case FABRIC_CORE_FLOOR_RATIO_RATIO_2_8: + return EQ_INEX_BUCKET_2; + } + + die("Failed to resolve EQ_INEX_BUCKET_*!\n"); +} + +static void layout_cmn_rings_for_sgpe(uint8_t chip, + struct homer_st *homer, + struct ring_data *ring_data, + enum ring_variant ring_variant) +{ + const enum ring_id ring_ids[] = { + EQ_FURE, EQ_GPTR, EQ_TIME, EQ_INEX, EX_L3_FURE, EX_L3_GPTR, EX_L3_TIME, + EX_L2_MODE, EX_L2_FURE, EX_L2_GPTR, EX_L2_TIME, EX_L3_REFR_FURE, + EX_L3_REFR_GPTR, EQ_ANA_FUNC, EQ_ANA_GPTR, EQ_DPLL_FUNC, EQ_DPLL_GPTR, + EQ_DPLL_MODE, EQ_ANA_BNDY_BUCKET_0, EQ_ANA_BNDY_BUCKET_1, + EQ_ANA_BNDY_BUCKET_2, EQ_ANA_BNDY_BUCKET_3, EQ_ANA_BNDY_BUCKET_4, + EQ_ANA_BNDY_BUCKET_5, EQ_ANA_BNDY_BUCKET_6, EQ_ANA_BNDY_BUCKET_7, + EQ_ANA_BNDY_BUCKET_8, EQ_ANA_BNDY_BUCKET_9, EQ_ANA_BNDY_BUCKET_10, + EQ_ANA_BNDY_BUCKET_11, EQ_ANA_BNDY_BUCKET_12, EQ_ANA_BNDY_BUCKET_13, + EQ_ANA_BNDY_BUCKET_14, EQ_ANA_BNDY_BUCKET_15, EQ_ANA_BNDY_BUCKET_16, + EQ_ANA_BNDY_BUCKET_17, EQ_ANA_BNDY_BUCKET_18, EQ_ANA_BNDY_BUCKET_19, + EQ_ANA_BNDY_BUCKET_20, EQ_ANA_BNDY_BUCKET_21, EQ_ANA_BNDY_BUCKET_22, + EQ_ANA_BNDY_BUCKET_23, EQ_ANA_BNDY_BUCKET_24, EQ_ANA_BNDY_BUCKET_25, + EQ_ANA_BNDY_BUCKET_L3DCC, EQ_ANA_MODE, EQ_ANA_BNDY_BUCKET_26, + EQ_ANA_BNDY_BUCKET_27, EQ_ANA_BNDY_BUCKET_28, EQ_ANA_BNDY_BUCKET_29, + EQ_ANA_BNDY_BUCKET_30, EQ_ANA_BNDY_BUCKET_31, EQ_ANA_BNDY_BUCKET_32, + EQ_ANA_BNDY_BUCKET_33, EQ_ANA_BNDY_BUCKET_34, EQ_ANA_BNDY_BUCKET_35, + EQ_ANA_BNDY_BUCKET_36, EQ_ANA_BNDY_BUCKET_37, EQ_ANA_BNDY_BUCKET_38, + EQ_ANA_BNDY_BUCKET_39, EQ_ANA_BNDY_BUCKET_40, EQ_ANA_BNDY_BUCKET_41 + }; + + const enum ring_id eq_index_bucket_id = resolve_eq_inex_bucket(chip); + + struct qpmr_header *qpmr_hdr = &homer->qpmr.sgpe.header; + struct sgpe_cmn_ring_list *tmp = + (void *)&homer->qpmr.sgpe.sram_image[qpmr_hdr->img_len]; + uint8_t *start = (void *)tmp; + uint8_t *payload = tmp->payload; + + uint32_t i = 0; + + for (i = 0; i < ARRAY_SIZE(ring_ids); ++i) { + enum ring_variant this_ring_variant; + uint32_t ring_size = MAX_RING_BUF_SIZE; + + enum ring_id id = ring_ids[i]; + if (id == EQ_INEX) + id = eq_index_bucket_id; + + this_ring_variant = ring_variant; + if (id == EQ_GPTR || // EQ GPTR + id == EQ_ANA_GPTR || + id == EQ_DPLL_GPTR || + id == EX_L3_GPTR || // EX GPTR + id == EX_L2_GPTR || + id == EX_L3_REFR_GPTR || + id == EQ_TIME || // EQ TIME + id == EX_L3_TIME || // EX TIME + id == EX_L2_TIME) + this_ring_variant = RV_BASE; + + if ((payload - start) % 8 != 0) + payload = start + ALIGN_UP(payload - start, 8); + + if (!tor_access_ring(ring_data->rings_buf, id, PT_SGPE, + this_ring_variant, EP00_CHIPLET_ID, + payload, &ring_size, GET_RING_DATA)) + continue; + + tmp->ring[i] = payload - start; + payload += ALIGN_UP(ring_size, 8); + } + + qpmr_hdr->common_ring_len = payload - start; + qpmr_hdr->common_ring_offset = + offsetof(struct qpmr_st, sgpe.sram_image) + qpmr_hdr->img_len; +} + +static void layout_inst_rings_for_sgpe(struct homer_st *homer, + struct ring_data *ring_data, + uint64_t cores, + enum ring_variant ring_variant) +{ + struct qpmr_header *qpmr_hdr = &homer->qpmr.sgpe.header; + uint32_t inst_rings_offset = qpmr_hdr->img_len + qpmr_hdr->common_ring_len; + + uint8_t *start = &homer->qpmr.sgpe.sram_image[inst_rings_offset]; + struct sgpe_inst_ring_list *tmp = (void *)start; + uint8_t *payload = tmp->payload; + + /* It's EQ_REPR and three pairs of EX rings */ + const enum ring_id ring_ids[] = { + EQ_REPR, EX_L3_REPR, EX_L3_REPR, EX_L2_REPR, EX_L2_REPR, + EX_L3_REFR_REPR, EX_L3_REFR_REPR, EX_L3_REFR_TIME, + EX_L3_REFR_TIME + }; + + uint8_t quad = 0; + + for (quad = 0; quad < MAX_QUADS_PER_CHIP; ++quad) { + uint8_t i; + + /* Skip non-functional quads */ + if (!IS_EQ_FUNCTIONAL(quad, cores)) + continue; + + for (i = 0; i < ARRAY_SIZE(ring_ids); ++i) { + const enum ring_id id = ring_ids[i]; + + uint32_t ring_size = MAX_RING_BUF_SIZE; + + /* Despite the constant, this is not a SCOM chiplet ID, + it's just used as a base value */ + uint8_t instance_id = EP00_CHIPLET_ID + quad; + if (i != 0) { + instance_id += quad; + if (i % 2 == 0) + ++instance_id; + } + + if ((payload - start) % 8 != 0) + payload = start + ALIGN_UP(payload - start, 8); + + if (!tor_access_ring(ring_data->rings_buf, id, PT_SGPE, + ring_variant, instance_id, + payload, &ring_size, GET_RING_DATA)) + continue; + + tmp->ring[quad][i] = payload - start; + payload += ALIGN_UP(ring_size, 8); + } + } + + qpmr_hdr->spec_ring_offset = qpmr_hdr->common_ring_offset + qpmr_hdr->common_ring_len; + qpmr_hdr->spec_ring_len = payload - start; +} + +static void layout_rings_for_sgpe(uint8_t chip, struct homer_st *homer, + struct ring_data *ring_data, + struct xip_sgpe_header *sgpe, + uint64_t cores, + enum ring_variant ring_variant) +{ + struct qpmr_header *qpmr_hdr = &homer->qpmr.sgpe.header; + struct sgpe_img_header *sgpe_img_hdr = + (void *)&homer->qpmr.sgpe.sram_image[INT_VECTOR_SIZE]; + + layout_cmn_rings_for_sgpe(chip, homer, ring_data, ring_variant); + layout_inst_rings_for_sgpe(homer, ring_data, cores, RV_BASE); + + if (qpmr_hdr->common_ring_len == 0) { + /* If quad common rings don't exist, ensure its offset in image + header is zero */ + sgpe_img_hdr->cmn_ring_occ_offset = 0; + } + + if (qpmr_hdr->spec_ring_len > 0) { + sgpe_img_hdr->spec_ring_occ_offset = qpmr_hdr->img_len + + qpmr_hdr->common_ring_len; + sgpe_img_hdr->scom_offset = sgpe_img_hdr->spec_ring_occ_offset + + qpmr_hdr->spec_ring_len; + } +} + +static void stop_save_scom(struct homer_st *homer, uint32_t scom_address, + uint64_t scom_data, enum scom_section section, + enum scom_operation operation) +{ + enum { + STOP_API_VER = 0x00, + SCOM_ENTRY_START = 0xDEADDEAD, + }; + + chiplet_id_t chiplet_id = (scom_address >> 24) & 0x3F; + uint32_t max_scom_restore_entries = 0; + struct stop_cache_section_t *stop_cache_scom = NULL; + struct scom_entry_t *scom_entry = NULL; + struct scom_entry_t *nop_entry = NULL; + struct scom_entry_t *matching_entry = NULL; + struct scom_entry_t *end_entry = NULL; + struct scom_entry_t *entry = NULL; + uint32_t entry_limit = 0; + + if (chiplet_id >= EC00_CHIPLET_ID) { + uint32_t offset = (chiplet_id - EC00_CHIPLET_ID) + * CORE_SCOM_RESTORE_SIZE_PER_CORE; + scom_entry = (struct scom_entry_t *)&homer->cpmr.core_scom[offset]; + max_scom_restore_entries = homer->cpmr.header.core_max_scom_entry; + } else { + uint32_t offset = (chiplet_id - EP00_CHIPLET_ID) + * QUAD_SCOM_RESTORE_SIZE_PER_QUAD; + stop_cache_scom = + (struct stop_cache_section_t *)&homer->qpmr.cache_scom_region[offset]; + max_scom_restore_entries = homer->qpmr.sgpe.header.max_quad_restore_entry; + } + + if (stop_cache_scom == NULL) + die("Failed to prepare for updating STOP SCOM\n"); + + switch (section) { + case STOP_SECTION_CORE_SCOM: + entry_limit = max_scom_restore_entries; + break; + case STOP_SECTION_EQ_SCOM: + scom_entry = stop_cache_scom->non_cache_area; + entry_limit = MAX_EQ_SCOM_ENTRIES; + break; + default: + die("Unhandled STOP image section.\n"); + break; + } + + for (uint32_t i = 0; i < entry_limit; ++i) { + uint32_t entry_address = scom_entry[i].address; + uint32_t entry_hdr = scom_entry[i].hdr; + + if (entry_address == scom_address && matching_entry == NULL) + matching_entry = &scom_entry[i]; + + if ((entry_address == ORI_OP || entry_address == ATTN_OP || + entry_address == BLR_OP) && nop_entry == NULL) + nop_entry = &scom_entry[i]; + + /* If entry is either 0xDEADDEAD or has SCOM entry limit in LSB of its header, + * the place is already occupied */ + if (entry_hdr == SCOM_ENTRY_START || (entry_hdr & 0x000000FF)) + continue; + + end_entry = &scom_entry[i]; + break; + } + + if (matching_entry == NULL && end_entry == NULL) + die("Failed to find SCOM entry in STOP image.\n"); + + entry = end_entry; + if (operation == SCOM_APPEND && nop_entry != NULL) + entry = nop_entry; + else if (operation == SCOM_REPLACE && matching_entry != NULL) + entry = matching_entry; + + if (entry == NULL) + die("Failed to insert SCOM entry in STOP image.\n"); + + entry->hdr = (0x000000FF & max_scom_restore_entries) + | ((STOP_API_VER & 0x7) << 28); + entry->address = scom_address; + entry->data = scom_data; +} + +static void populate_epsilon_l2_scom_reg(uint8_t chip, struct homer_st *homer) +{ + const struct powerbus_cfg *pb_cfg = powerbus_cfg(chip); + + uint32_t eps_r_t0 = pb_cfg->eps_r[0] / 8 / L2_EPS_DIVIDER + 1; + uint32_t eps_r_t1 = pb_cfg->eps_r[1] / 8 / L2_EPS_DIVIDER + 1; + uint32_t eps_r_t2 = pb_cfg->eps_r[2] / 8 / L2_EPS_DIVIDER + 1; + + uint32_t eps_w_t0 = pb_cfg->eps_w[0] / 8 / L2_EPS_DIVIDER + 1; + uint32_t eps_w_t1 = pb_cfg->eps_w[1] / 8 / L2_EPS_DIVIDER + 1; + + uint64_t eps_r = PPC_PLACE(eps_r_t0, 0, 12) + | PPC_PLACE(eps_r_t1, 12, 12) + | PPC_PLACE(eps_r_t2, 24, 12); + + uint64_t eps_w = PPC_PLACE(eps_w_t0, 0, 12) + | PPC_PLACE(eps_w_t1, 12, 12) + | PPC_PLACE(L2_EPS_DIVIDER, 24, 4); + + uint8_t quad = 0; + + for (quad = 0; quad < MAX_QUADS_PER_CHIP; ++quad) { + uint32_t scom_addr; + + /* Create restore entry for epsilon L2 RD register */ + + scom_addr = (EX_L2_RD_EPS_REG | (quad << QUAD_BIT_POS)); + stop_save_scom(homer, scom_addr, eps_r, STOP_SECTION_EQ_SCOM, + SCOM_APPEND); + + scom_addr |= ODD_EVEN_EX_POS; + stop_save_scom(homer, scom_addr, eps_r, STOP_SECTION_EQ_SCOM, + SCOM_APPEND); + + /* Create restore entry for epsilon L2 WR register */ + + scom_addr = (EX_L2_WR_EPS_REG | (quad << QUAD_BIT_POS)); + stop_save_scom(homer, scom_addr, eps_w, STOP_SECTION_EQ_SCOM, + SCOM_APPEND); + + scom_addr |= ODD_EVEN_EX_POS; + stop_save_scom(homer, scom_addr, eps_w, STOP_SECTION_EQ_SCOM, + SCOM_APPEND); + } +} + +static void populate_epsilon_l3_scom_reg(uint8_t chip, struct homer_st *homer) +{ + const struct powerbus_cfg *pb_cfg = powerbus_cfg(chip); + + uint32_t eps_r_t0 = pb_cfg->eps_r[0] / 8 / L3_EPS_DIVIDER + 1; + uint32_t eps_r_t1 = pb_cfg->eps_r[1] / 8 / L3_EPS_DIVIDER + 1; + uint32_t eps_r_t2 = pb_cfg->eps_r[2] / 8 / L3_EPS_DIVIDER + 1; + + uint32_t eps_w_t0 = pb_cfg->eps_w[0] / 8 / L3_EPS_DIVIDER + 1; + uint32_t eps_w_t1 = pb_cfg->eps_w[1] / 8 / L3_EPS_DIVIDER + 1; + + uint64_t eps_r = PPC_PLACE(eps_r_t0, 0, 12) + | PPC_PLACE(eps_r_t1, 12, 12) + | PPC_PLACE(eps_r_t2, 24, 12); + + uint64_t eps_w = PPC_PLACE(eps_w_t0, 0, 12) + | PPC_PLACE(eps_w_t1, 12, 12) + | PPC_PLACE(L2_EPS_DIVIDER, 30, 4); + + uint8_t quad = 0; + + for (quad = 0; quad < MAX_QUADS_PER_CHIP; ++quad) { + uint32_t scom_addr; + + /* Create restore entry for epsilon L2 RD register */ + + scom_addr = (EX_L3_RD_EPS_REG | (quad << QUAD_BIT_POS)); + stop_save_scom(homer, scom_addr, eps_r, STOP_SECTION_EQ_SCOM, + SCOM_APPEND); + + scom_addr |= ODD_EVEN_EX_POS; + stop_save_scom(homer, scom_addr, eps_r, STOP_SECTION_EQ_SCOM, + SCOM_APPEND); + + /* Create restore entry for epsilon L2 WR register */ + + scom_addr = (EX_L3_WR_EPS_REG | (quad << QUAD_BIT_POS)); + stop_save_scom(homer, scom_addr, eps_w, STOP_SECTION_EQ_SCOM, + SCOM_APPEND); + + scom_addr |= ODD_EVEN_EX_POS; + stop_save_scom(homer, scom_addr, eps_w, STOP_SECTION_EQ_SCOM, + SCOM_APPEND); + } +} + +static void populate_l3_refresh_scom_reg(uint8_t chip, struct homer_st *homer, uint8_t dd) +{ + uint64_t refresh_val = 0x2000000000000000ULL; + + uint8_t quad = 0; + + /* ATTR_CHIP_EC_FEATURE_HW408892 === (DD <= 0x20) */ + if (powerbus_cfg(chip)->fabric_freq >= 2000 && dd > 0x20) + refresh_val |= PPC_PLACE(0x2, 8, 4); + + for (quad = 0; quad < MAX_QUADS_PER_CHIP; ++quad) { + /* Create restore entry for L3 Refresh Timer Divider register */ + + uint32_t scom_addr = (EX_DRAM_REF_REG | (quad << QUAD_BIT_POS)); + stop_save_scom(homer, scom_addr, refresh_val, + STOP_SECTION_EQ_SCOM, SCOM_APPEND); + + scom_addr |= ODD_EVEN_EX_POS; + stop_save_scom(homer, scom_addr, refresh_val, + STOP_SECTION_EQ_SCOM, SCOM_APPEND); + } +} + +static void populate_ncu_rng_bar_scom_reg(uint8_t chip, struct homer_st *homer) +{ + enum { NX_RANGE_BAR_ADDR_OFFSET = 0x00000302031D0000 }; + + uint8_t ex = 0; + + uint64_t data = PROC_BASE_ADDR(chip, /*msel=*/0x3) + NX_RANGE_BAR_ADDR_OFFSET; + + for (ex = 0; ex < MAX_CMES_PER_CHIP; ++ex) { + /* Create restore entry for NCU RNG register */ + + uint32_t scom_addr = EX_0_NCU_DARN_BAR_REG + | ((ex / 2) << 24) + | ((ex % 2) ? 0x0400 : 0x0000); + + stop_save_scom(homer, scom_addr, data, STOP_SECTION_EQ_SCOM, SCOM_REPLACE); + } +} + +static void update_headers(uint8_t chip, struct homer_st *homer, uint64_t cores) +{ + /* + * Update CPMR Header with Scan Ring details + * This function for each entry does one of: + * - write constant value + * - copy value form other field + * - one or both of the above with arithmetic operations + * Consider writing these fields in previous functions instead. + */ + struct cpmr_header *cpmr_hdr = &homer->cpmr.header; + struct cme_img_header *cme_hdr = (struct cme_img_header *) + &homer->cpmr.cme_sram_region[INT_VECTOR_SIZE]; + cpmr_hdr->img_offset = offsetof(struct cpmr_st, cme_sram_region) / 32; + cpmr_hdr->cme_pstate_offset = offsetof(struct cpmr_st, cme_sram_region) + cme_hdr->pstate_region_offset; + cpmr_hdr->cme_pstate_len = cme_hdr->pstate_region_len; + cpmr_hdr->img_len = cme_hdr->hcode_len; + cpmr_hdr->core_scom_offset = offsetof(struct cpmr_st, core_scom); + cpmr_hdr->core_scom_len = CORE_SCOM_RESTORE_SIZE; // 6k + cpmr_hdr->core_max_scom_entry = 15; + + if (cme_hdr->common_ring_len) { + cpmr_hdr->cme_common_ring_offset = offsetof(struct cpmr_st, cme_sram_region) + + cme_hdr->common_ring_offset; + cpmr_hdr->cme_common_ring_len = cme_hdr->common_ring_len; + } + + if (cme_hdr->max_spec_ring_len) { + cpmr_hdr->core_spec_ring_offset = ALIGN_UP(cpmr_hdr->img_offset * 32 + + cpmr_hdr->img_len + + cpmr_hdr->cme_pstate_len + + cpmr_hdr->cme_common_ring_len, + 32) / 32; + cpmr_hdr->core_spec_ring_len = cme_hdr->max_spec_ring_len; + } + + cme_hdr->custom_length = + ALIGN_UP(cme_hdr->max_spec_ring_len * 32 + sizeof(LocalPstateParmBlock), 32) / 32; + + for (int cme = 0; cme < MAX_CORES_PER_CHIP/2; cme++) { + /* + * CME index/position is the same as EX, however this means that Pstate + * offset is overwritten when there are 2 functional CMEs in one quad. + * Maybe we can use "for each functional quad" instead, but maybe + * 'cme * cme_hdr->custom_length' points to different data, based on + * whether there is one or two functional CMEs (is that even possible?). + */ + if (!IS_EX_FUNCTIONAL(cme, cores)) + continue; + + /* Assuming >= CPMR_2.0 */ + cpmr_hdr->quad_pstate_offset [cme/2] = cpmr_hdr->core_spec_ring_offset + + cpmr_hdr->core_spec_ring_len + + cme * cme_hdr->custom_length; + } + + /* Updating CME Image header */ + /* Assuming >= CPMR_2.0 */ + cme_hdr->scom_offset = + ALIGN_UP(cme_hdr->pstate_offset * 32 + sizeof(LocalPstateParmBlock), 32) / 32; + + /* Adding to it instance ring length which is already a multiple of 32B */ + cme_hdr->scom_len = 512; + + /* Timebase frequency */ + cme_hdr->timebase_hz = powerbus_cfg(chip)->fabric_freq * MHz / 64; + + /* + * Update QPMR Header area in HOMER + * In Hostboot, qpmrHdr is a copy of the header, it doesn't operate on HOMER + * directly until now - it fills the following fields in the copy and then + * does memcpy() to HOMER. As BAR is set up in next istep, I don't see why. + */ + homer->qpmr.sgpe.header.sram_img_size = + homer->qpmr.sgpe.header.img_len + + homer->qpmr.sgpe.header.common_ring_len + + homer->qpmr.sgpe.header.spec_ring_len; + homer->qpmr.sgpe.header.max_quad_restore_entry = 255; + homer->qpmr.sgpe.header.build_ver = 3; + struct sgpe_img_header *sgpe_hdr = (struct sgpe_img_header *) + &homer->qpmr.sgpe.sram_image[INT_VECTOR_SIZE]; + sgpe_hdr->scom_mem_offset = offsetof(struct homer_st, qpmr.cache_scom_region); + + /* Update PPMR Header area in HOMER */ + struct pgpe_img_header *pgpe_hdr = (struct pgpe_img_header *) + &homer->ppmr.pgpe_sram_img[INT_VECTOR_SIZE]; + pgpe_hdr->core_throttle_assert_cnt = 0; + pgpe_hdr->core_throttle_deassert_cnt = 0; + pgpe_hdr->ivpr_addr = 0xFFF20000; // OCC_SRAM_PGPE_BASE_ADDR + // = homer->ppmr.header.sram_region_start + pgpe_hdr->gppb_sram_addr = 0; // set by PGPE Hcode (or not?) + pgpe_hdr->hcode_len = homer->ppmr.header.hcode_len; + /* FIXME: remove hardcoded HOMER in OCI PBA */ + pgpe_hdr->gppb_mem_offset = 0x80000000 + offsetof(struct homer_st, ppmr) + + homer->ppmr.header.gppb_offset; + pgpe_hdr->gppb_len = homer->ppmr.header.gppb_len; + pgpe_hdr->gen_pstables_mem_offset = 0x80000000 + offsetof(struct homer_st, ppmr) + + homer->ppmr.header.pstables_offset; + pgpe_hdr->gen_pstables_len = homer->ppmr.header.pstables_len; + pgpe_hdr->occ_pstables_sram_addr = 0; + pgpe_hdr->occ_pstables_len = 0; + pgpe_hdr->beacon_addr = 0; + pgpe_hdr->quad_status_addr = 0; + pgpe_hdr->wof_state_address = 0; + pgpe_hdr->wof_values_address = 0; + pgpe_hdr->req_active_quad_address = 0; + pgpe_hdr->wof_table_addr = homer->ppmr.header.wof_table_offset; + pgpe_hdr->wof_table_len = homer->ppmr.header.wof_table_len; + pgpe_hdr->timebase_hz = 1866 * MHz / 64; + pgpe_hdr->doptrace_offset = homer->ppmr.header.doptrace_offset; + pgpe_hdr->doptrace_len = homer->ppmr.header.doptrace_len; + + /* Update magic numbers */ + homer->qpmr.sgpe.header.magic = 0x51504d525f312e30; // QPMR_1.0 + homer->cpmr.header.magic = 0x43504d525f322e30; // CPMR_2.0 + homer->ppmr.header.magic = 0x50504d525f312e30; // PPMR_1.0 + sgpe_hdr->magic = 0x534750455f312e30; // SGPE_1.0 + cme_hdr->magic = 0x434d455f5f312e30; // CME__1.0 + pgpe_hdr->magic = 0x504750455f312e30; // PGPE_1.0 +} + +const struct voltage_bucket_data * get_voltage_data(uint8_t chip) +{ + const struct voltage_kwd *voltage = NULL; + const struct voltage_bucket_data *bucket = NULL; + + uint8_t i = 0; + + /* Using LRP0 because frequencies are the same in all LRP records */ + voltage = mvpd_get_voltage_data(chip, /*lrp=*/0); + + for (i = 0; i < VOLTAGE_BUCKET_COUNT; ++i) { + bucket = &voltage->buckets[i]; + if (bucket->powerbus.freq != 0) + break; + } + + if (bucket == NULL) + die("Failed to find a valid voltage data bucket.\n"); + + return bucket; +} + +static void layout_rings(uint8_t chip, struct homer_st *homer, struct xip_hw_header *hw, + uint8_t dd, uint64_t cores) +{ + static uint8_t rings_buf[300 * KiB]; + + static uint8_t work_buf1[MAX_RING_BUF_SIZE]; + static uint8_t work_buf2[MAX_RING_BUF_SIZE]; + static uint8_t work_buf3[MAX_RING_BUF_SIZE]; + + struct ring_data ring_data = { + .rings_buf = rings_buf, .rings_buf_size = sizeof(rings_buf), + .work_buf1 = work_buf1, .work_buf1_size = sizeof(work_buf1), + .work_buf2 = work_buf2, .work_buf2_size = sizeof(work_buf2), + .work_buf3 = work_buf3, .work_buf3_size = sizeof(work_buf3), + }; + + enum ring_variant ring_variant = (dd < 0x23 ? RV_BASE : RV_RL4); + + get_ppe_scan_rings(chip, hw, dd, PT_CME, &ring_data); + layout_rings_for_cme(homer, &ring_data, cores, ring_variant); + + /* Reset buffer sizes to maximum values before reusing the structure */ + ring_data.rings_buf_size = sizeof(rings_buf); + ring_data.work_buf1_size = sizeof(work_buf1); + ring_data.work_buf2_size = sizeof(work_buf2); + ring_data.work_buf3_size = sizeof(work_buf3); + get_ppe_scan_rings(chip, hw, dd, PT_SGPE, &ring_data); + layout_rings_for_sgpe(chip, homer, &ring_data, + (struct xip_sgpe_header *)((uint8_t *)hw + hw->sgpe.offset), + cores, ring_variant); +} + +/* Set the Fabric System, Group and Chip IDs into SGPE and CME headers */ +static void set_fabric_ids(uint8_t chip, struct homer_st *homer) +{ + struct cme_img_header *cme_hdr = (void *)&homer->cpmr.cme_sram_region[INT_VECTOR_SIZE]; + struct sgpe_img_header *sgpe_hdr = (void *) + &homer->qpmr.sgpe.sram_image[INT_VECTOR_SIZE]; + + /* + * Location Ids has the form of: + * 0:3 Group ID (loaded from ATTR_PROC_FABRIC_GROUP_ID) + * 4:6 Chip ID (loaded from ATTR_PROC_FABRIC_CHIP_ID) + * 7 0 + * 8:12 System ID (loaded from ATTR_PROC_FABRIC_SYSTEM_ID) + * 13:15 00 + * + * This is for ATTR_PROC_FABRIC_PUMP_MODE == PUMP_MODE_CHIP_IS_GROUP, + * when chip ID is actually a group ID and "chip ID" field is zero. + */ + uint16_t location_id = chip << 12; + + cme_hdr->location_id = location_id; + sgpe_hdr->location_id = location_id; + + /* Extended addressing is supported, but it's all zeros for both chips */ + sgpe_hdr->addr_extension = 0; +} + +static void fill_homer_for_chip(uint8_t chip, struct homer_st *homer, struct xip_hw_header *hw, + uint8_t dd, uint64_t cores) +{ + enum { + CME_QM_FLAG_SYS_WOF_ENABLE = 0x1000, + PGPE_FLAG_WOF_ENABLE = 0x1000, + }; + + const OCCPstateParmBlock *oppb = (void *)homer->ppmr.occ_parm_block; + uint16_t qm_mode_flags; + uint16_t pgpe_flags; + + layout_rings(chip, homer, hw, dd, cores); + build_parameter_blocks(chip, homer, cores); + update_headers(chip, homer, cores); + + populate_epsilon_l2_scom_reg(chip, homer); + populate_epsilon_l3_scom_reg(chip, homer); + /* Update L3 Refresh Timer Control SCOM Registers */ + populate_l3_refresh_scom_reg(chip, homer, dd); + /* Populate HOMER with SCOM restore value of NCU RNG BAR SCOM Register */ + populate_ncu_rng_bar_scom_reg(chip, homer); + + /* Update flag fields in image headers */ + + qm_mode_flags = 0xE100; + pgpe_flags = 0xE032; + + if (oppb->wof.wof_enabled) { + qm_mode_flags |= CME_QM_FLAG_SYS_WOF_ENABLE; + pgpe_flags |= PGPE_FLAG_WOF_ENABLE; + } + + ((struct sgpe_img_header *)&homer->qpmr.sgpe.sram_image[INT_VECTOR_SIZE])->reserve_flags = 0x04000000; + ((struct cme_img_header *)&homer->cpmr.cme_sram_region[INT_VECTOR_SIZE])->qm_mode_flags = qm_mode_flags; + ((struct pgpe_img_header *)&homer->ppmr.pgpe_sram_img[INT_VECTOR_SIZE])->flags = pgpe_flags; + + set_fabric_ids(chip, homer); +} + +static void setup_wakeup_mode(uint8_t chip, uint64_t cores) +{ + for (int i = 0; i < MAX_CORES_PER_CHIP; i++) { + if (!IS_EC_FUNCTIONAL(i, cores)) + continue; + + /* + TP.TPCHIP.NET.PCBSLEC14.PPMC.PPM_CORE_REGS.CPPM_CPMMR // 0x200F0106 + // These bits, when set, make core wake up in HV (not UV) + [3] CPPM_CPMMR_RESERVED_2_9 = 1 + [4] CPPM_CPMMR_RESERVED_2_9 = 1 + */ + /* SCOM2 - OR, 0x200F0108 */ + write_scom_for_chiplet(chip, EC00_CHIPLET_ID + i, 0x200F0108, + PPC_BIT(3) | PPC_BIT(4)); + } +} + +/* 15.2 set HOMER BAR */ +static void istep_15_2(uint8_t chip, struct homer_st *homer, void *common_occ_area) +{ + write_scom(chip, 0x05012B00, (uint64_t)homer); + write_scom(chip, 0x05012B04, (4 * MiB - 1) & ~((uint64_t)MiB - 1)); + + write_scom(chip, 0x05012B02, (uint64_t)common_occ_area); + write_scom(chip, 0x05012B06, (8 * MiB - 1) & ~((uint64_t)MiB - 1)); +} + +/* 15.3 establish EX chiplet */ +static void istep_15_3(uint8_t chip, uint64_t cores) +{ + const uint64_t group_mask = PPC_BITMASK(3,5); + + /* Assign multicast groups for cores */ + for (int i = 0; i < MAX_CORES_PER_CHIP; i++) { + const chiplet_id_t chiplet = EC00_CHIPLET_ID + i; + + if (!IS_EC_FUNCTIONAL(i, cores)) + continue; + + if ((read_scom_for_chiplet(chip, chiplet, 0xF0001) & group_mask) == group_mask) + scom_and_or_for_chiplet(chip, chiplet, 0xF0001, + ~(group_mask | PPC_BITMASK(16,23)), + PPC_BITMASK(19,21)); + + if ((read_scom_for_chiplet(chip, chiplet, 0xF0002) & group_mask) == group_mask) + scom_and_or_for_chiplet(chip, chiplet, 0xF0002, + ~(group_mask | PPC_BITMASK(16,23)), + PPC_BIT(5) | PPC_BITMASK(19,21)); + } + + for (int i = 0; i < MAX_QUADS_PER_CHIP; i++) { + const chiplet_id_t chiplet = EP00_CHIPLET_ID + i; + + if (!IS_EQ_FUNCTIONAL(i, cores)) + continue; + + if ((read_scom_for_chiplet(chip, chiplet, 0xF0001) & group_mask) == group_mask) + scom_and_or_for_chiplet(chip, chiplet, 0xF0001, + ~(group_mask | PPC_BITMASK(16,23)), + PPC_BITMASK(19,21)); + } + + /* Writing OCC CCSR */ + write_scom(chip, 0x0006C090, cores); + + /* Writing OCC QCSR */ + uint64_t qcsr = 0; + for (int i = 0; i < MAX_CMES_PER_CHIP; i++) { + if (IS_EX_FUNCTIONAL(i, cores)) + qcsr |= PPC_BIT(i); + } + write_scom(chip, 0x0006C094, qcsr); + + if (chip != 0) { + /* + * PU_OCB_OCI_QSSR_SCOM2 (OR) + * Start no CMEs on slave CPUs (set bit implies stop state). + */ + write_scom(chip, 0x0006C09A, PPC_BITMASK(0, 11) | /* CMEs */ + PPC_BITMASK(14, 19) /* EQs */); + } +} + +/* + * 15.4 start STOP engine + * + * SGPE startup is actually done as part of istep 21.1 after all + * preparations here to not have to restart it there. + */ +static void istep_15_4(uint8_t chip, uint64_t cores) +{ + /* Initialize the PFET controllers */ + for (int i = 0; i < MAX_CORES_PER_CHIP; i++) { + if (IS_EC_FUNCTIONAL(i, cores)) { + // Periodic core quiesce workaround + /* + TP.TPCHIP.NET.PCBSLEC14.PPMC.PPM_CORE_REGS.CPPM_CPMMR (WOR) // 0x200F0108 + [all] 0 + [2] CPPM_CPMMR_RESERVED_2 = 1 + */ + write_scom_for_chiplet(chip, EC00_CHIPLET_ID + i, 0x200F0108, + PPC_BIT(2)); + + /* + TP.TPCHIP.NET.PCBSLEC14.PPMC.PPM_COMMON_REGS.PPM_PFDLY // 0x200F011B + [all] 0 + [0-3] PPM_PFDLY_POWDN_DLY = 0x9 // 250ns, converted and encoded + [4-7] PPM_PFDLY_POWUP_DLY = 0x9 + */ + write_scom_for_chiplet(chip, EC00_CHIPLET_ID + i, 0x200F011B, + PPC_PLACE(0x9, 0, 4) | PPC_PLACE(0x9, 4, 4)); + /* + TP.TPCHIP.NET.PCBSLEC14.PPMC.PPM_COMMON_REGS.PPM_PFOF // 0x200F011D + [all] 0 + [0-3] PPM_PFOFF_VDD_VOFF_SEL = 0x8 + [4-7] PPM_PFOFF_VCS_VOFF_SEL = 0x8 + */ + write_scom_for_chiplet(chip, EC00_CHIPLET_ID + i, 0x200F011D, + PPC_PLACE(0x8, 0, 4) | PPC_PLACE(0x8, 4, 4)); + } + + if ((i % 4) == 0 && IS_EQ_FUNCTIONAL(i/4, cores)) { + /* + TP.TPCHIP.NET.PCBSLEP03.PPMQ.PPM_COMMON_REGS.PPM_PFDLY // 0x100F011B + [all] 0 + [0-3] PPM_PFDLY_POWDN_DLY = 0x9 // 250ns, converted and encoded + [4-7] PPM_PFDLY_POWUP_DLY = 0x9 + */ + write_scom_for_chiplet(chip, EP00_CHIPLET_ID + i/4, 0x100F011B, + PPC_PLACE(0x9, 0, 4) | PPC_PLACE(0x9, 4, 4)); + /* + TP.TPCHIP.NET.PCBSLEP03.PPMQ.PPM_COMMON_REGS.PPM_PFOF // 0x100F011D + [all] 0 + [0-3] PPM_PFOFF_VDD_VOFF_SEL = 0x8 + [4-7] PPM_PFOFF_VCS_VOFF_SEL = 0x8 + */ + write_scom_for_chiplet(chip, EP00_CHIPLET_ID + i/4, 0x100F011D, + PPC_PLACE(0x8, 0, 4) | PPC_PLACE(0x8, 4, 4)); + } + } + + /* Condition the PBA back to the base boot configuration */ + pba_reset(chip); + + /* + * TODO: this is tested only if (ATTR_VDM_ENABLED || ATTR_IVRM_ENABLED), + * both are set (or not) in 15.1 - p9_pstate_parameter_block(). For now + * assume they are enabled. + */ + /* TP.TPCHIP.TPC.ITR.FMU.KVREF_AND_VMEAS_MODE_STATUS_REG // 0x01020007 + if ([16] == 0): die() + */ + if (!(read_scom(chip, 0x01020007) & PPC_BIT(16))) + die("VDMs/IVRM are enabled but necessary VREF calibration failed\n"); + + /* First mask bit 7 in OIMR and then clear bit 7 in OISR + TP.TPCHIP.OCC.OCI.OCB.OCB_OCI_OIMR0 (OR) // 0x0006C006 + [all] 0 + [7] OCB_OCI_OISR0_GPE2_ERROR = 1 + TP.TPCHIP.OCC.OCI.OCB.OCB_OCI_OISR0 (CLEAR) // 0x0006C001 + [all] 0 + [7] OCB_OCI_OISR0_GPE2_ERROR = 1 + */ + write_scom(chip, 0x0006C006, PPC_BIT(7)); + write_scom(chip, 0x0006C001, PPC_BIT(7)); + + /* + * Setup the SGPE Timer Selects + * These hardcoded values are assumed by the SGPE Hcode for setting up + * the FIT and Watchdog values. + TP.TPCHIP.OCC.OCI.GPE3.GPETSEL // 0x00066000 + [all] 0 + [0-3] GPETSEL_FIT_SEL = 0x1 // FIT - fixed interval timer + [4-7] GPETSEL_WATCHDOG_SEL = 0xA + */ + write_scom(chip, 0x00066000, PPC_PLACE(0x1, 0, 4) | PPC_PLACE(0xA, 4, 4)); + + /* Clear error injection bits + *0x0006C18B // Undocumented, PU_OCB_OCI_OCCFLG2_CLEAR + [all] 0 + [30] 1 // OCCFLG2_SGPE_HCODE_STOP_REQ_ERR_INJ + */ + write_scom(chip, PU_OCB_OCI_OCCFLG2_CLEAR, PPC_BIT(30)); +} + +/* + * This logic is for SMF disabled only! + */ +void build_homer_image(void *homer_bar, void *common_occ_area, uint64_t nominal_freq[]) +{ + const uint8_t chips = fsi_get_present_chips(); + + struct mmap_helper_region_device mdev = {0}; + struct homer_st *homer = homer_bar; + uint8_t dd = get_dd(); // XXX: does this need to be chip-specific? + int this_core = -1; + uint64_t cores[MAX_CHIPS] = { + get_available_cores(0, &this_core), + (chips & 0x02) ? get_available_cores(1, NULL) : 0, + }; + struct xip_hw_header *hw = xmalloc(1 * MiB); + uint8_t *hw_addr = (void *)hw; + + if (this_core == -1) + die("Couldn't found active core\n"); + + printk(BIOS_DEBUG, "DD%2.2x, boot core: %d\n", dd, this_core); + + /* HOMER must be aligned to 4M because CME HRMOR has bit for 2M set */ + if (!IS_ALIGNED((uint64_t) homer_bar, 4 * MiB)) + die("HOMER (%p) is not aligned to 4MB\n", homer_bar); + + memset(homer_bar, 0, 4 * MiB); + + /* + * This will work as long as we don't call mmap(). mmap() calls + * mem_poll_alloc() which doesn't check if mdev->pool is valid or at least + * not NULL. + */ + mount_part_from_pnor("HCODE", &mdev); + rdev_readat(&mdev.rdev, hw, 0, 1 * MiB); + + assert(hw->magic == XIP_MAGIC_HW); + assert(hw->image_size <= 1 * MiB); + + build_sgpe(homer, (struct xip_sgpe_header *)(hw_addr + hw->sgpe.offset), dd); + build_self_restore(homer, (struct xip_restore_header *)(hw_addr + hw->restore.offset), + dd, cores[0]); + build_cme(homer, (struct xip_cme_header *)(hw_addr + hw->cme.offset), dd); + build_pgpe(homer, (struct xip_pgpe_header *)(hw_addr + hw->pgpe.offset), dd); + + load_occ_image_to_homer(homer); + + /* + * Until this point, only self restore part is CPU specific, use current + * state of the first HOMER image as a base for the second one. + */ + if (chips & 0x02) { + struct cme_img_header *hdr; + + memcpy(&homer[1], &homer[0], sizeof(*homer)); + + /* Patch part of data initialized by build_cme() */ + hdr = (struct cme_img_header *)&homer[1].cpmr.cme_sram_region[INT_VECTOR_SIZE]; + hdr->cpmr_phy_addr = (uint64_t)&homer[1] | 2 * MiB; + hdr->unsec_cpmr_phy_addr = hdr->cpmr_phy_addr; + + /* Override data from the other CPU */ + build_self_restore(&homer[1], + (struct xip_restore_header *)(hw_addr + hw->restore.offset), + dd, cores[1]); + } + + for (uint8_t chip = 0; chip < MAX_CHIPS; chip++) { + if (!(chips & (1 << chip))) + continue; + + fill_homer_for_chip(chip, &homer[chip], hw, dd, cores[chip]); + nominal_freq[chip] = get_voltage_data(chip)->nominal.freq * MHz; + } + + free(hw); + + for (uint8_t chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) + setup_wakeup_mode(chip, cores[chip]); + } + + report_istep(15, 2); + for (uint8_t chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) + istep_15_2(chip, &homer[chip], common_occ_area); + } + + report_istep(15, 3); + for (uint8_t chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) + istep_15_3(chip, cores[chip]); + } + + report_istep(15, 4); + for (uint8_t chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) + istep_15_4(chip, cores[chip]); + } + + /* Boot OCC here and activate SGPE at the same time */ + istep_21_1(chips, homer, cores); + + istep_16_1(this_core); +} diff --git a/src/soc/ibm/power9/homer.h b/src/soc/ibm/power9/homer.h new file mode 100644 index 00000000000..f774f7d92fc --- /dev/null +++ b/src/soc/ibm/power9/homer.h @@ -0,0 +1,333 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __SOC_IBM_POWER9_HOMER_H +#define __SOC_IBM_POWER9_HOMER_H + +#include + +#include + +/* All fields are big-endian */ + +#define HOMER_ONE_REGION_SIZE (1 * MiB) + +/* + * OCC complex shares 768 kB SRAM, according to Figure 23-3 + * https://wiki.raptorcs.com/w/images/c/ce/POWER9_um_OpenPOWER_v21_10OCT2019_pub.pdf + */ +#define SGPE_SRAM_IMG_SIZE (74 * KiB) +#define PGPE_SRAM_IMG_SIZE (50 * KiB) + +/* According to above figure, CMEs have 32 kB SRAM each, how does it fit? */ +#define CME_SRAM_IMG_SIZE (64 * KiB) +#define GPE_BOOTLOADER_SIZE (1 * KiB) + +/* + * This is how CACHE_SCOM_REGION_SIZE is defined in hostboot. On the other hand, + * hostboot defines that quad has 256 entries, 16 bytes each. This gives 4kB per + * quad, and there are 6 quads (maximum) on POWER9 CPU, which gives 24kB total. + * One of the values is obviously wrong, but because this region is immediately + * followed by a padding it should not overwrite anything important. This is one + * of the reasons to clear whole HOMER, not just the used parts. + */ +#define CACHE_SCOM_REGION_SIZE (6 * KiB) +#define CACHE_SCOM_REGION_OFFSET (128 * KiB) +#define CACHE_SCOM_AUX_SIZE (64 * KiB) +#define CACHE_SCOM_AUX_OFFSET (512 * KiB) +#define SELF_RESTORE_REGION_SIZE (9 * KiB) +#define CORE_SCOM_RESTORE_SIZE (6 * KiB) +#define CORE_SCOM_RESTORE_OFFSET (256 * KiB) +#define PGPE_AUX_TASK_SIZE (2 * KiB) +#define PGPE_OCC_SHARED_SRAM_SIZE (2 * KiB) +#define PGPE_DOPTRACE_SIZE (64 * KiB) +#define PGPE_DOPTRACE_OFFSET (64 * KiB) +#define OCC_PARAM_BLOCK_REGION_SIZE (16 * KiB) +#define OCC_PARAM_BLOCK_REGION_OFFSET (128 * KiB) +#define PSTATE_OUTPUT_TABLES_SIZE (16 * KiB) +#define OCC_WOF_TABLES_SIZE (256 * KiB) +#define OCC_WOF_TABLES_OFFSET (768 * KiB) +#define PPMR_HEADER_SIZE (1 * KiB) + +#define SCOM_RESTORE_ENTRY_SIZE 16 // 4B pad, 4B address, 8B data +#define QUAD_SCOM_RESTORE_REGS_PER_QUAD 256 +#define QUAD_SCOM_RESTORE_SIZE_PER_QUAD \ + (SCOM_RESTORE_ENTRY_SIZE * QUAD_SCOM_RESTORE_REGS_PER_QUAD) +#define CORE_SCOM_RESTORE_REGS_PER_CORE 16 +#define CORE_SCOM_RESTORE_SIZE_PER_CORE \ + (SCOM_RESTORE_ENTRY_SIZE * CORE_SCOM_RESTORE_REGS_PER_CORE) + +/* Offset from HOMER to OCC Host Data Area */ +#define HOMER_OFFSET_TO_OCC_HOST_DATA (768 * KiB) + +/* =================== QPMR =================== */ + +struct qpmr_header { + uint64_t magic; /* "SGPE_1.0" */ + uint32_t l1_offset; + uint32_t reserved; + uint32_t l2_offset; + uint32_t l2_len; + uint32_t build_date; + uint32_t build_ver; + uint64_t reserved_flags; + uint32_t img_offset; + uint32_t img_len; + uint32_t common_ring_offset; + uint32_t common_ring_len; + uint32_t common_ovrd_offset; + uint32_t common_ovrd_len; + uint32_t spec_ring_offset; + uint32_t spec_ring_len; + uint32_t scom_offset; + uint32_t scom_len; + uint32_t aux_offset; + uint32_t aux_len; + uint32_t stop_ffdc_offset; + uint32_t stop_ffdc_len; + uint32_t boot_prog_code; + uint32_t sram_img_size; + uint32_t max_quad_restore_entry; + uint32_t enable_24x7_ima; + uint32_t sram_region_start; + uint32_t sram_region_size; +} __attribute__((packed, aligned(512))); + +/* This header is part of SRAM image, it starts after interrupt vectors. */ +#define INT_VECTOR_SIZE 384 +struct sgpe_img_header { + uint64_t magic; + uint32_t reset_addr; + uint32_t reserve1; + uint32_t ivpr_addr; + uint32_t timebase_hz; + uint32_t build_date; + uint32_t build_ver; + uint32_t reserve_flags; + uint16_t location_id; + uint16_t addr_extension; + uint32_t cmn_ring_occ_offset; + uint32_t cmn_ring_ovrd_occ_offset; + uint32_t spec_ring_occ_offset; + uint32_t scom_offset; + uint32_t scom_mem_offset; + uint32_t scom_mem_len; + uint32_t aux_offset; + uint32_t aux_len; + uint32_t aux_control; + uint32_t reserve4; + uint64_t chtm_mem_cfg; +}; + +struct sgpe_st { + struct qpmr_header header; + uint8_t l1_bootloader[GPE_BOOTLOADER_SIZE]; + uint8_t l2_bootloader[GPE_BOOTLOADER_SIZE]; + uint8_t sram_image[SGPE_SRAM_IMG_SIZE]; +}; + +check_member(sgpe_st, l1_bootloader, 512); + +struct qpmr_st { + struct sgpe_st sgpe; + uint8_t pad1[CACHE_SCOM_REGION_OFFSET - sizeof(struct sgpe_st)]; + uint8_t cache_scom_region[CACHE_SCOM_REGION_SIZE]; + uint8_t pad2[CACHE_SCOM_AUX_OFFSET + -(CACHE_SCOM_REGION_OFFSET + CACHE_SCOM_REGION_SIZE)]; + uint8_t aux[CACHE_SCOM_AUX_SIZE]; +}; + +check_member(qpmr_st, cache_scom_region, 128 * KiB); +check_member(qpmr_st, aux, 512 * KiB); + +/* =================== CPMR =================== */ + +#define CPMR_VDM_PER_QUAD 0x43504D525F322E30ull + +struct cpmr_header { + uint32_t attn_opcodes[2]; + uint64_t magic; /* "CPMR_2.0" */ + uint32_t build_date; + uint32_t version; + uint8_t reserved_flags[4]; + uint8_t self_restore_ver; + uint8_t stop_api_ver; + uint8_t urmor_fix; + uint8_t fused_mode_status; + uint32_t img_offset; + uint32_t img_len; + uint32_t cme_common_ring_offset; + uint32_t cme_common_ring_len; + uint32_t cme_pstate_offset; + uint32_t cme_pstate_len; + uint32_t core_spec_ring_offset; // = real offset / 32 + uint32_t core_spec_ring_len; // = real length / 32 + uint32_t core_scom_offset; + uint32_t core_scom_len; + uint32_t core_self_restore_offset; + uint32_t core_self_restore_len; + uint32_t core_max_scom_entry; + uint32_t quad_pstate_offset[MAX_QUADS_PER_CHIP]; +} __attribute__((packed, aligned(256))); + +struct smf_core_self_restore { + uint32_t thread_restore_area[4][512 / sizeof(uint32_t)]; + uint32_t thread_save_area[4][256 / sizeof(uint32_t)]; + uint32_t core_restore_area[512 / sizeof(uint32_t)]; + uint32_t core_save_area[512 / sizeof(uint32_t)]; +}; + +/* This header is part of SRAM image, it starts after interrupt vectors. */ +struct cme_img_header { + uint64_t magic; + uint32_t hcode_offset; + uint32_t hcode_len; + uint32_t common_ring_offset; + uint32_t cmn_ring_ovrd_offset; + uint32_t common_ring_len; + uint32_t pstate_region_offset; + uint32_t pstate_region_len; + uint32_t core_spec_ring_offset; // = real offset / 32 + uint32_t max_spec_ring_len; // = real length / 32 + uint32_t scom_offset; // = real offset / 32 + uint32_t scom_len; + uint32_t mode_flags; + uint16_t location_id; + uint16_t qm_mode_flags; + uint32_t timebase_hz; + uint64_t cpmr_phy_addr; + uint64_t unsec_cpmr_phy_addr; + uint32_t pstate_offset; // = real offset / 32 + uint32_t custom_length; // = real length / 32 +}; + +struct cpmr_st { + struct cpmr_header header; + uint8_t exe[SELF_RESTORE_REGION_SIZE - sizeof(struct cpmr_header)]; + struct smf_core_self_restore core_self_restore[MAX_CORES_PER_CHIP]; + uint8_t pad[CORE_SCOM_RESTORE_OFFSET - + (SELF_RESTORE_REGION_SIZE + + MAX_CORES_PER_CHIP * sizeof(struct smf_core_self_restore))]; + uint8_t core_scom[CORE_SCOM_RESTORE_SIZE]; + uint8_t cme_sram_region[CME_SRAM_IMG_SIZE]; +}; + +check_member(cpmr_st, core_self_restore, 9 * KiB); +check_member(cpmr_st, core_scom, 256 * KiB); + +/* =================== PPMR =================== */ + +struct ppmr_header { + uint64_t magic; + uint32_t l1_offset; + uint32_t reserved; + uint32_t l2_offset; + uint32_t l2_len; + uint32_t build_date; + uint32_t build_ver; + uint64_t reserved_flags; + uint32_t hcode_offset; + uint32_t hcode_len; + uint32_t gppb_offset; + uint32_t gppb_len; + uint32_t lppb_offset; + uint32_t lppb_len; + uint32_t oppb_offset; + uint32_t oppb_len; + uint32_t pstables_offset; + uint32_t pstables_len; + uint32_t sram_img_size; + uint32_t boot_prog_code; + uint32_t wof_table_offset; + uint32_t wof_table_len; + uint32_t aux_task_offset; + uint32_t aux_task_len; + uint32_t doptrace_offset; + uint32_t doptrace_len; + uint32_t sram_region_start; + uint32_t sram_region_size; +} __attribute__((packed, aligned(512))); + +/* This header is part of SRAM image, it starts after interrupt vectors. */ +struct pgpe_img_header { + uint64_t magic; + uint32_t sys_reset_addr; + uint32_t shared_sram_addr; + uint32_t ivpr_addr; + uint32_t shared_sram_len; + uint32_t build_date; + uint32_t build_ver; + uint16_t flags; + uint16_t reserve1; + uint32_t timebase_hz; + uint32_t gppb_sram_addr; + uint32_t hcode_len; + uint32_t gppb_mem_offset; + uint32_t gppb_len; + uint32_t gen_pstables_mem_offset; + uint32_t gen_pstables_len; + uint32_t occ_pstables_sram_addr; + uint32_t occ_pstables_len; + uint32_t beacon_addr; + uint32_t quad_status_addr; + uint32_t wof_state_address; + uint32_t req_active_quad_address; + uint32_t wof_table_addr; + uint32_t wof_table_len; + uint32_t core_throttle_assert_cnt; + uint32_t core_throttle_deassert_cnt; + uint32_t aux_controls; + uint32_t optrace_pointer; + uint32_t doptrace_offset; + uint32_t doptrace_len; + uint32_t wof_values_address; +}; + +struct ppmr_st { + struct ppmr_header header; + uint8_t pad0[PPMR_HEADER_SIZE - sizeof(struct ppmr_header)]; + uint8_t l1_bootloader[GPE_BOOTLOADER_SIZE]; + uint8_t l2_bootloader[GPE_BOOTLOADER_SIZE]; + uint8_t pgpe_sram_img[PGPE_SRAM_IMG_SIZE]; + uint8_t aux_task[PGPE_AUX_TASK_SIZE]; + uint8_t pad1[PGPE_DOPTRACE_OFFSET - (PPMR_HEADER_SIZE + + GPE_BOOTLOADER_SIZE + GPE_BOOTLOADER_SIZE + + PGPE_SRAM_IMG_SIZE + PGPE_AUX_TASK_SIZE)]; + /* Deep Operational Trace */ + uint8_t doptrace[PGPE_DOPTRACE_SIZE]; + /* + * Two following fields in hostboot have different sizes, but are padded + * to 16kB each anyway. There are two consecutive paddings between + * pstate_table and wof_tables in hostboot. + */ + uint8_t occ_parm_block[OCC_PARAM_BLOCK_REGION_SIZE]; + uint8_t pstate_table[PSTATE_OUTPUT_TABLES_SIZE]; + uint8_t pad2[OCC_WOF_TABLES_OFFSET - (OCC_PARAM_BLOCK_REGION_OFFSET + + OCC_PARAM_BLOCK_REGION_SIZE + PSTATE_OUTPUT_TABLES_SIZE)]; + uint8_t wof_tables[OCC_WOF_TABLES_SIZE]; +}; + +check_member(ppmr_st, occ_parm_block, 128 * KiB); +check_member(ppmr_st, pstate_table, 144 * KiB); +check_member(ppmr_st, wof_tables, 768 * KiB); + +struct homer_st { + uint8_t occ_host_area[HOMER_ONE_REGION_SIZE]; + struct qpmr_st qpmr; + uint8_t pad_qpmr[HOMER_ONE_REGION_SIZE - sizeof(struct qpmr_st)]; + struct cpmr_st cpmr; + uint8_t pad_cpmr[HOMER_ONE_REGION_SIZE - sizeof(struct cpmr_st)]; + struct ppmr_st ppmr; + uint8_t pad_ppmr[HOMER_ONE_REGION_SIZE - sizeof(struct ppmr_st)]; +}; + +check_member(homer_st, qpmr, 1 * MiB); +check_member(homer_st, cpmr, 2 * MiB); +check_member(homer_st, ppmr, 3 * MiB); + +struct voltage_bucket_data; + +void build_parameter_blocks(uint8_t chip, struct homer_st *homer, uint64_t functional_cores); +void configure_xive(int tgt_core); +const struct voltage_bucket_data * get_voltage_data(uint8_t chip); + +#endif /* __SOC_IBM_POWER9_HOMER_H */ diff --git a/src/soc/ibm/power9/i2c.c b/src/soc/ibm/power9/i2c.c new file mode 100644 index 00000000000..af214fb273b --- /dev/null +++ b/src/soc/ibm/power9/i2c.c @@ -0,0 +1,322 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +/* Debugging every access takes too much time */ +#define SKIP_SCOM_DEBUG + +#include +#include +#include +#include +#include + +#include "fsi.h" + +/* Base FSI address for registers of an FSI I2C master */ +#define I2C_HOST_MASTER_BASE_ADDR 0xA0004 + +#define FIFO_REG 0 +#define CMD_REG 1 +#define MODE_REG 2 +#define STATUS_REG 7 +#define RESET_REG 7 +#define RES_ERR_REG 8 + +// CMD register +#define LEN_PLACE(x) PPC_PLACE((x), 16, 16) +#define ADDR_PLACE(x) PPC_PLACE((x), 8, 7) +#define READ_NOT_WRITE 0x0001000000000000 +#define START 0x8000000000000000 +#define WITH_ADDR 0x4000000000000000 +#define READ_CONT 0x2000000000000000 +#define STOP 0x1000000000000000 + +// STATUS register +#define DATA_REQUEST 0x0200000000000000 +#define CMD_COMPLETE 0x0100000000000000 +#define FIFO_COUNT_FLD 0x0000000F00000000 +#define BUSY 0x0000030000000000 +#define SCL 0x0000080000000000 +#define SDA 0x0000040000000000 +#define UNRECOVERABLE 0xFC80000000000000 + +#define I2C_MAX_FIFO_CAPACITY 8 + +enum i2c_type { + HOST_I2C_CPU0, // I2C via XSCOM (first CPU) + HOST_I2C_CPU1, // I2C via XSCOM (second CPU) + FSI_I2C, // I2C via FSI (second CPU) +}; + +/* return -1 if SMBus errors otherwise return 0 */ +static int get_spd(uint8_t bus, u8 *spd, u8 addr) +{ + /* + * Second half of DIMMs is on the second I2C port. platform_i2c_transfer() + * changes this automatically for SPD and RCD, but not for SPD page select. + * For those commands, set MSB that is later masked out. + */ + uint8_t fix = addr & 0x80; + + if (i2c_read_bytes(bus, addr, 0, spd, SPD_PAGE_LEN) < 0) { + printk(BIOS_INFO, "No memory DIMM at address %02X\n", addr); + return -1; + } + + /* DDR4 spd is 512 byte. Switch to page 1 */ + i2c_writeb(bus, SPD_PAGE_1 | fix, 0, 0); + + /* No need to check again if DIMM is present */ + i2c_read_bytes(bus, addr, 0, spd + SPD_PAGE_LEN, SPD_PAGE_LEN); + /* Restore to page 0 */ + i2c_writeb(bus, SPD_PAGE_0 | fix, 0, 0); + + return 0; +} + +static u8 spd_data[MAX_CHIPS][CONFIG_DIMM_MAX][CONFIG_DIMM_SPD_SIZE]; + +void get_spd_i2c(uint8_t bus, struct spd_block *blk) +{ + u8 i; + u8 chip = bus / I2C_BUSES_PER_CPU; + + for (i = 0 ; i < CONFIG_DIMM_MAX; i++) { + if (blk->addr_map[i] == 0) { + blk->spd_array[i] = NULL; + continue; + } + + if (get_spd(bus, spd_data[chip][i], blk->addr_map[i]) == 0) + blk->spd_array[i] = spd_data[chip][i]; + else + blk->spd_array[i] = NULL; + } + + blk->len = SPD_PAGE_LEN_DDR4; +} + +/* The four functions below use 64-bit address and data as for SCOM and do + * translation for FSI which is 32-bit (as is actual I2C interface). They also + * interpret address as register number for FSI I2C. */ + +static void write_i2c(enum i2c_type type, uint64_t addr, uint64_t data) +{ + if (type != FSI_I2C) + write_scom(type == HOST_I2C_CPU0 ? 0 : 1, addr, data); + else + write_fsi_i2c(/*chip=*/1, addr, data >> 32, /*size=*/4); +} + +static uint64_t read_i2c(enum i2c_type type, uint64_t addr) +{ + if (type != FSI_I2C) + return read_scom(type == HOST_I2C_CPU0 ? 0 : 1, addr); + else + return (uint64_t)read_fsi_i2c(/*chip=*/1, addr, /*size=*/4) << 32; +} + +static void write_i2c_byte(enum i2c_type type, uint64_t addr, uint8_t data) +{ + if (type != FSI_I2C) + write_scom(type == HOST_I2C_CPU0 ? 0 : 1, addr, (uint64_t)data << 56); + else + write_fsi_i2c(/*chip=*/1, addr, (uint32_t)data << 24, /*size=*/1); +} + +static uint8_t read_i2c_byte(enum i2c_type type, uint64_t addr) +{ + if (type != FSI_I2C) + return read_scom(type == HOST_I2C_CPU0 ? 0 : 1, addr) >> 56; + else + return read_fsi_i2c(/*chip=*/1, addr, /*size=*/1) >> 24; +} + +/* + * There are 4 buses/engines, but the function accepts bus [0-8] in order to + * allow specifying buses of the second CPU and I2C FSI bus while still + * following coreboot's prototype for this function. [0-3] are buses of the + * first CPU, [4-7] of the second one (0-3 correspondingly) and 8 is FSI I2C of + * the second CPU. + */ +int platform_i2c_transfer(unsigned int bus, struct i2c_msg *segment, + int seg_count) +{ + int i; + uint64_t r; + + enum i2c_type type = HOST_I2C_CPU0; + if (bus >= I2C_BUSES_PER_CPU) { + bus -= I2C_BUSES_PER_CPU; + type = HOST_I2C_CPU1; + } + if (bus >= I2C_BUSES_PER_CPU) { + bus -= I2C_BUSES_PER_CPU; + type = FSI_I2C; + + /* There seems to be only one engine on FSI I2C */ + if (bus != 0) { + printk(BIOS_ERR, "FSI I2C bus out of range (%d)\n", bus); + return -1; + } + } + + if (bus >= I2C_BUSES_PER_CPU) { + printk(BIOS_ERR, "I2C bus out of range (%d)\n", bus); + return -1; + } + + uint32_t base = (type == FSI_I2C ? 0 : I2C_HOST_MASTER_BASE_ADDR | (bus << 12)); + /* Addition is fine, because there will be no carry in bus number bits */ + uint32_t fifo_reg = base + FIFO_REG; + uint32_t cmd_reg = base + CMD_REG; + uint32_t mode_reg = base + MODE_REG; + uint32_t status_reg = base + STATUS_REG; + uint32_t res_err_reg = base + RES_ERR_REG; + + uint64_t clear_err = (type != FSI_I2C ? PPC_BIT(0) : 0); + + /* + * Divisor fields in this register are poorly documented: + * + * Bits SCOM Field Mnemonic: Description + * 0:7 RWX BIT_RATE_DIVISOR_3: Decides the speed on the I2C bus. + * 8:9 RWX BIT_RATE_DIVISOR_3: Decides the speed on the I2C bus. + * 10:15 RWX BIT_RATE_DIVISOR_3: Decides the speed on the I2C bus. + * + * After issuing a fast command (SCOM A3000) they change like this: + * - 100 kHz - previous value is not changed + * - 50 kHz - 0x000B + * - 3400 kHz - 0x005E + * - 400 kHz - 0x0177 + * + * Use value for 400 kHz as it is the one used by Hostboot. + */ + uint16_t bit_rate_div = 0x0177; // 400kHz by default + if (bus == 2) { + /* + * Skiboot computes the value as: + * + * (((clock-frequency / bus-frequency) - 1) / 4) + * + * Frequencies are specified in the corresponding device tree entries. + * clock-frequency is from I2C master and bus-frequency is from I2C bus. + * + * At least for TPM default value doesn't work (results in NACK error) for + * bus #2. + */ + bit_rate_div = 0x0048; + } + + write_i2c(type, res_err_reg, clear_err); + + for (i = 0; i < seg_count; i++) { + unsigned int len; + uint64_t read_not_write, stop, read_cont, port; + + /* Only read for now, implement different flags when needed */ + if (segment[i].flags & ~I2C_M_RD) { + printk(BIOS_ERR, "Unsupported I2C flags (0x%4.4x)\n", segment[i].flags); + return -1; + } + + read_not_write = (segment[i].flags & I2C_M_RD) ? READ_NOT_WRITE : 0; + stop = (i == seg_count - 1) ? STOP : 0; + read_cont = (!stop && !read_not_write) ? READ_CONT : 0; + port = segment[i].slave & 0x80 ? 1 : 0; + + write_i2c(type, mode_reg, + PPC_PLACE(bit_rate_div, 0, 16) | PPC_PLACE(port, 16, 6)); + + write_i2c(type, res_err_reg, clear_err); + write_i2c(type, cmd_reg, + START | stop | WITH_ADDR | read_not_write | read_cont | + ADDR_PLACE(segment[i].slave) | + LEN_PLACE(segment[i].len)); + + for (len = 0; len < segment[i].len; len++) { + r = read_i2c(type, status_reg); + + if (read_not_write) { + /* Read */ + while ((r & (DATA_REQUEST | FIFO_COUNT_FLD)) == 0) { + if (r & UNRECOVERABLE) { + /* This may be DIMM not present so use low verbosity */ + printk(BIOS_INFO, "I2C transfer failed (0x%16.16llx)\n", r); + return -1; + } + r = read_i2c(type, status_reg); + } + + segment[i].buf[len] = read_i2c_byte(type, fifo_reg); + } + else + { + /* Write */ + while ((r & DATA_REQUEST) == 0) { + if (r & UNRECOVERABLE) { + printk(BIOS_INFO, "I2C transfer failed (0x%16.16llx)\n", r); + return -1; + } + r = read_i2c(type, status_reg); + } + + write_i2c_byte(type, fifo_reg, segment[i].buf[len]); + } + } + + r = read_i2c(type, status_reg); + while ((r & CMD_COMPLETE) == 0) { + if (r & UNRECOVERABLE) { + printk(BIOS_INFO, "I2C transfer failed to complete (0x%16.16llx)\n", r); + return -1; + } + r = read_i2c(type, status_reg); + } + + } + + return 0; +} + +/* Defined in fsi.h */ +void fsi_i2c_init(uint8_t chips) +{ + uint64_t status; + + /* Nothing to do if second CPU isn't present */ + if (!(chips & 0x02)) + return; + + /* + * Sometimes I2C status looks like 0x_____8__ (i.e., SCL is set, but + * not SDA), which indicates I2C hardware is in a messed up state that + * it won't leave on its own. Sending an additional STOP *before* reset + * addresses this and doesn't hurt when I2C isn't broken. + */ + write_i2c(FSI_I2C, CMD_REG, STOP); + + /* Reset I2C */ + write_i2c(FSI_I2C, RESET_REG, 0); + + /* Wait for SCL */ + status = read_i2c(FSI_I2C, STATUS_REG); + while ((status & SCL) == 0) { + if (status & UNRECOVERABLE) + die("Unrecoverable I2C error while waiting for SCL: 0x%016llx\n", + status); + status = read_i2c(FSI_I2C, STATUS_REG); + } + + /* Send STOP command */ + write_i2c(FSI_I2C, CMD_REG, STOP); + + status = read_i2c(FSI_I2C, STATUS_REG); + while ((status & CMD_COMPLETE) == 0) { + if (status & UNRECOVERABLE) + die("Unrecoverable I2C error on STOP: 0x%016llx\n", status); + status = read_i2c(FSI_I2C, STATUS_REG); + } + + if ((status & (SCL | SDA | BUSY)) != (SCL | SDA)) + die("Invalid I2C state after initialization: 0x%016llx\n", status); +} diff --git a/src/soc/ibm/power9/int_vectors.S b/src/soc/ibm/power9/int_vectors.S new file mode 100644 index 00000000000..455d68b71ba --- /dev/null +++ b/src/soc/ibm/power9/int_vectors.S @@ -0,0 +1,172 @@ +## SPDX-License-Identifier: GPL-2.0-only + +/* Load an immediate 64-bit value into a register */ +#define LOAD_IMM64(r, e) \ + lis r,(e)@highest; \ + ori r,r,(e)@higher; \ + rldicr r,r, 32, 31; \ + oris r,r, (e)@h; \ + ori r,r, (e)@l; + +/* + * Macro to check if code fills in the specified size. For proper behavior + * requires -Wa,--fatal-warnings. Value is saved into ".comment" section to be + * discarded during linking. + */ +.macro int_vector_check_size vec, max_size +.pushsection ".comment" +.byte 255 - \max_size + (\vec\()_end - \vec) +.popsection +.endm + +.section ".text", "ax", %progbits +/* + * System reset vector (0x100) + * + * Main thread: + * - reload r1 and r2 from saved state + * - add saved TB value to the current value (self-restore took some time) + * - TB can't be written with one mtspr + * - have to use 3 writes to deal with possible overflow of lower half + * - move NIA and MSR to HSRR0/1 + * - return from hypervisor interrupt + * - due to clobbers in inline assembly in cpu_winkle all other registers are + * reloaded by compiler + * - contents of vector and floating point registers are lost + * + * Secondary threads: + * - copy MSR from main thread + * - return into stack-less secondary_entry() + * - can't loop in this handler, it will be overwritten by the payload + * - must also load TOC pointer to access global data + */ +.globl sys_reset_int +sys_reset_int: + li %r0, 0 + /* WARNING: this assumes that ramstage is not relocatable */ + LOAD_IMM64(%r3, sstate) + + /* PIR of main thread */ + ld %r4, 48(%r3) + mfspr %r5, 1023 + cmpd %r4, %r5 + bne .secondary + + /* Time Base */ + ld %r2, 32(%r3) + mftb %r4 + add %r4, %r2, %r4 + rldicl %r5, %r4, 32, 32 + mttbl %r0 + mttbu %r5 + mttbl %r4 + + /* Stack */ + ld %r1, 0(%r3) + /* TOC */ + ld %r2, 8(%r3) + /* MSR -> HSRR1 */ + ld %r4, 16(%r3) + mtspr 315, %r4 + /* NIA -> HSRR0 */ + ld %r4, 24(%r3) + mtspr 314, %r4 + /* Link register */ + ld %r4, 40(%r3) + mtlr %r4 + hrfid + +.secondary: + /* MSR -> HSRR1 */ + ld %r4, 16(%r3) + mtspr 315, %r4 + /* NIA -> HSRR0 and TOC pointer */ + ld %r2, 56(%r3) + ld %r4, 0(%r2) + mtspr 314, %r4 + ld %r2, 8(%r2) + /* Link register */ + li %r4, 0x100 + mtlr %r4 + /* Set PSSCR for STOP 1 */ + lis %r0, 0x004f + ori %r0, %r0, 0x0311 + mtspr 855, %r0 + hrfid +.globl sys_reset_int_end +sys_reset_int_end: + +int_vector_check_size sys_reset_int 0x100 + +/* + * External interrupt vector (0x500) + * + * Only one LSI source is enabled - PSU. It has vector 0xD, which is written + * to bits 48:51 in the address below. + * - mask interrupt - read 0xD00 + * - read state (needed?) - read 0x800 + * - send EOI to XIVE IC (read returns 1 if interrupt pending) read 0x0006030203103000 + * - unmask interrupt + * + * No need for eieio: + * - "If two Store instructions or two Load instructions specify storage + * locations that are both Caching Inhibited and Guarded, the corresponding + * storage accesses are performed in program order with respect to any + * processor or mechanism." - POWER ISA 3.0B, Book II, 1.7.1 - Storage Access + * Ordering, we have only loads here, and + * - "The storage accesses caused by the instructions described in this section + * are performed as though the specified storage location is Caching + * Inhibited and Guarded." - Book III, 4.4.1 - Fixed-Point Load and Store + * Caching Inhibited Instructions + * - both interrupts and '(h)rfid' instructions are context synchronizing. + */ +.globl ext_int +ext_int: + mtsprg0 %r3 + mtsprg1 %r4 + LOAD_IMM64(%r3, 0x00060302031CDD00) + /* Mask interrupt */ + ldcix %r4, 0, %r3 + subi %r3, %r3, 0x500 + /* Read state */ + ldcix %r4, 0, %r3 + LOAD_IMM64(%r4, 0x0006030203103000) + /* Send EOI */ + ldcix %r4, 0, %r4 + addi %r3, %r3, 0x400 + /* Unmask interrupt */ + ldcix %r4, 0, %r3 + mfsprg0 %r3 + mfsprg1 %r4 + hrfid +.globl ext_int_end +ext_int_end: + +int_vector_check_size ext_int 0x100 + +/* + * Hypervisor Virtualization vector (0xEA0) + * + * Taken when cores are waken up by deadman loop. The occurrence of the + * interrupt does not cause the exception to cease to exist, it must be + * acknowledged by read16(0x0006020000001830). After that, External Exception + * is still asserted and must also be handled. + * + * There are only 0x20 bytes reserved for this handler, which gives just 8 + * instructions. LOAD_IMM64 would use 5 of those, so don't use it. Bits in + * address are set in a way that makes it possible to use two load instructions + * and one rotate/shift operation. + */ +.globl hyp_virt_int +hyp_virt_int: + mtsprg0 %r3 + li %r3, 0x0602 + rldicr %r3, %r3, 40, 23 + ori %r3, %r3, 0x1830 + lhzcix %r3, 0, %r3 + /* Jump to External Interrupt handler, skipping 'mtsprg0 %r3' */ + ba 0x504 +.globl hyp_virt_int_end +hyp_virt_int_end: + +int_vector_check_size hyp_virt_int 0x20 diff --git a/src/soc/ibm/power9/istep_10_1.c b/src/soc/ibm/power9/istep_10_1.c new file mode 100644 index 00000000000..e88ddddc33f --- /dev/null +++ b/src/soc/ibm/power9/istep_10_1.c @@ -0,0 +1,776 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include + +#include +#include +#include +#include +#include + +#include "fsi.h" + +enum build_smp_adu_action { + SWITCH_AB = 1, + SWITCH_CD = 2, + QUIESCE = 4, + RESET_SWITCH = 8 +}; + +enum adu_op { + PB_DIS_OPER, // pbop.disable_all + PMISC_OPER, // pmisc switch + PRE_SWITCH_CD, // do not issue PB command, pre-set for switch CD operation + PRE_SWITCH_AB, // do not issue PB command, pre-set for switch AB operation + POST_SWITCH // do not issue PB command, clear switch CD/AB flags +}; + +enum sbe_memory_access_flags { + SBE_MEM_ACCESS_FLAGS_TARGET_PROC = 0x00000001, + SBE_MEM_ACCESS_FLAGS_PB_DIS_MODE = 0x00000400, + SBE_MEM_ACCESS_FLAGS_SWITCH_MODE = 0x00000800, + SBE_MEM_ACCESS_FLAGS_PRE_SWITCH_CD_MODE = 0x00002000, + SBE_MEM_ACCESS_FLAGS_PRE_SWITCH_AB_MODE = 0x00004000, + SBE_MEM_ACCESS_FLAGS_POST_SWITCH_MODE = 0x00008000, +}; + +enum { + PU_ALTD_ADDR_REG = 0x00090000, + + PU_SND_MODE_REG = 0x00090021, + PU_SND_MODE_REG_PB_STOP = 22, + PU_SND_MODE_REG_ENABLE_PB_SWITCH_AB = 30, + PU_SND_MODE_REG_ENABLE_PB_SWITCH_CD = 31, + + PU_ALTD_CMD_REG = 0x00090001, + PU_ALTD_CMD_REG_FBC_START_OP = 2, + PU_ALTD_CMD_REG_FBC_CLEAR_STATUS = 3, + PU_ALTD_CMD_REG_FBC_RESET_FSM = 4, + PU_ALTD_CMD_REG_FBC_AXTYPE = 6, + PU_ALTD_CMD_REG_FBC_LOCKED = 11, + PU_ALTD_CMD_REG_FBC_SCOPE = 16, + PU_ALTD_CMD_REG_FBC_SCOPE_LEN = 3, + PU_ALTD_CMD_REG_FBC_DROP_PRIORITY = 20, + PU_ALTD_CMD_REG_FBC_OVERWRITE_PBINIT = 22, + PU_ALTD_CMD_REG_FBC_WITH_TM_QUIESCE = 24, + PU_ALTD_CMD_REG_FBC_TTYPE = 25, + PU_ALTD_CMD_REG_FBC_TTYPE_LEN = 7, + PU_ALTD_CMD_REG_FBC_TSIZE = 32, + PU_ALTD_CMD_REG_FBC_TSIZE_LEN = 8, + + ALTD_CMD_TTYPE_PB_OPER = 0x3F, + ALTD_CMD_TTYPE_PMISC_OPER = 0x31, + ALTD_CMD_PMISC_TSIZE_1 = 2, // PMISC SWITCH + ALTD_CMD_SCOPE_SYSTEM = 5, + ALTD_CMD_PB_DIS_OPERATION_TSIZE = 8, + + PU_ALTD_STATUS_REG = 0x00090003, + PU_ALTD_STATUS_REG_FBC_ALTD_BUSY = 0, + PU_ALTD_STATUS_REG_FBC_WAIT_CMD_ARBIT = 1, + PU_ALTD_STATUS_REG_FBC_ADDR_DONE = 2, + PU_ALTD_STATUS_REG_FBC_DATA_DONE = 3, + PU_ALTD_STATUS_REG_FBC_WAIT_RESP = 4, + PU_ALTD_STATUS_REG_FBC_OVERRUN_ERROR = 5, + PU_ALTD_STATUS_REG_FBC_AUTOINC_ERROR = 6, + PU_ALTD_STATUS_REG_FBC_COMMAND_ERROR = 7, + PU_ALTD_STATUS_REG_FBC_ADDRESS_ERROR = 8, + PU_ALTD_STATUS_REG_FBC_PBINIT_MISSING = 18, + PU_ALTD_STATUS_REG_FBC_ECC_CE = 48, + PU_ALTD_STATUS_REG_FBC_ECC_UE = 49, + PU_ALTD_STATUS_REG_FBC_ECC_SUE = 50, + + PU_ALTD_OPTION_REG = 0x00090002, + PU_ALTD_OPTION_REG_FBC_WITH_PRE_QUIESCE = 23, + PU_ALTD_OPTION_REG_FBC_AFTER_QUIESCE_WAIT_COUNT = 28, + PU_ALTD_OPTION_REG_FBC_AFTER_QUIESCE_WAIT_COUNT_LEN = 20, + PU_ALTD_OPTION_REG_FBC_WITH_POST_INIT = 51, + PU_ALTD_OPTION_REG_FBC_ALTD_HW397129 = 52, + PU_ALTD_OPTION_REG_FBC_BEFORE_INIT_WAIT_COUNT = 54, + PU_ALTD_OPTION_REG_FBC_BEFORE_INIT_WAIT_COUNT_LEN = 10, + + PU_ALTD_DATA_REG = 0x00090004, + + PU_PB_CENT_SM0_PB_CENT_MODE = 0x05011C0A, + + P9_BUILD_SMP_NUM_SHADOWS = 3, + + PU_PB_WEST_SM0_PB_WEST_HP_MODE_CURR = 0x0501180C, + PU_PB_CENT_SM0_PB_CENT_HP_MODE_CURR = 0x05011C0C, + PU_PB_EAST_HP_MODE_CURR = 0x0501200C, + + PU_PB_WEST_SM0_PB_WEST_HP_MODE_NEXT = 0x0501180B, + PU_PB_CENT_SM0_PB_CENT_HP_MODE_NEXT = 0x05011C0B, + PU_PB_EAST_HP_MODE_NEXT = 0x0501200B, + + PU_PB_WEST_SM0_PB_WEST_HPX_MODE_CURR = 0x05011810, + PU_PB_CENT_SM0_PB_CENT_HPX_MODE_CURR = 0x05011C10, + PU_PB_EAST_HPX_MODE_CURR = 0x05012010, + + PU_PB_WEST_SM0_PB_WEST_HPX_MODE_NEXT = 0x0501180F, + PU_PB_CENT_SM0_PB_CENT_HPX_MODE_NEXT = 0x05011C0F, + PU_PB_EAST_HPX_MODE_NEXT = 0x0501200F, + + PU_PB_WEST_SM0_PB_WEST_HPA_MODE_CURR = 0x0501180E, + PU_PB_CENT_SM0_PB_CENT_HPA_MODE_CURR = 0x05011C0E, + PU_PB_EAST_HPA_MODE_CURR = 0x0501200E, + + PU_PB_WEST_SM0_PB_WEST_HPA_MODE_NEXT = 0x0501180D, + PU_PB_CENT_SM0_PB_CENT_HPA_MODE_NEXT = 0x05011C0D, + PU_PB_EAST_HPA_MODE_NEXT = 0x0501200D, +}; + +/* HP (HotPlug Mode Register) */ +static const uint64_t PB_HP_MODE_CURR_SHADOWS[P9_BUILD_SMP_NUM_SHADOWS] = { + PU_PB_WEST_SM0_PB_WEST_HP_MODE_CURR, + PU_PB_CENT_SM0_PB_CENT_HP_MODE_CURR, + PU_PB_EAST_HP_MODE_CURR +}; +static const uint64_t PB_HP_MODE_NEXT_SHADOWS[P9_BUILD_SMP_NUM_SHADOWS] = { + PU_PB_WEST_SM0_PB_WEST_HP_MODE_NEXT, + PU_PB_CENT_SM0_PB_CENT_HP_MODE_NEXT, + PU_PB_EAST_HP_MODE_NEXT +}; + +/* HPX (Hotplug Mode Register Extension) */ +static const uint64_t PB_HPX_MODE_CURR_SHADOWS[P9_BUILD_SMP_NUM_SHADOWS] = { + PU_PB_WEST_SM0_PB_WEST_HPX_MODE_CURR, + PU_PB_CENT_SM0_PB_CENT_HPX_MODE_CURR, + PU_PB_EAST_HPX_MODE_CURR +}; +static const uint64_t PB_HPX_MODE_NEXT_SHADOWS[P9_BUILD_SMP_NUM_SHADOWS] = { + PU_PB_WEST_SM0_PB_WEST_HPX_MODE_NEXT, + PU_PB_CENT_SM0_PB_CENT_HPX_MODE_NEXT, + PU_PB_EAST_HPX_MODE_NEXT +}; + +/* HPA */ +static const uint64_t PB_HPA_MODE_CURR_SHADOWS[P9_BUILD_SMP_NUM_SHADOWS] = { + PU_PB_WEST_SM0_PB_WEST_HPA_MODE_CURR, + PU_PB_CENT_SM0_PB_CENT_HPA_MODE_CURR, + PU_PB_EAST_HPA_MODE_CURR +}; +static const uint64_t PB_HPA_MODE_NEXT_SHADOWS[P9_BUILD_SMP_NUM_SHADOWS] = { + PU_PB_WEST_SM0_PB_WEST_HPA_MODE_NEXT, + PU_PB_CENT_SM0_PB_CENT_HPA_MODE_NEXT, + PU_PB_EAST_HPA_MODE_NEXT +}; + +/* + * SCOM registers in this function are not documented. SCOM addresses that start with 0x9 + * are form 1 indirect addresses (bit 3 is set in this case) despite + * documentation ("1.2.2 PCB Address Space" section) not mentioning this form. + */ +static void p9_fbc_cd_hp1_scom(uint8_t chip, bool is_xbus_active) +{ + const struct powerbus_cfg *pb_cfg = powerbus_cfg(chip); + const uint32_t pb_freq_mhz = pb_cfg->fabric_freq; + + /* Frequency of XBus for Nimbus DD2 */ + const uint32_t xbus_freq_mhz = 2000; + + uint64_t val; + uint64_t tmp; + + val = PPC_PLACE(is_xbus_active ? 0x08 : 0x06, 54, 5) | PPC_PLACE(0x03, 59, 5); + write_scom(chip, 0x90000CB205012011, val); + + tmp = 0; + if (100 * xbus_freq_mhz >= 120 * pb_freq_mhz) + tmp = 0x09; + else if (100 * xbus_freq_mhz >= 100 * pb_freq_mhz) + tmp = 0x0A; + else if (105 * xbus_freq_mhz >= 100 * pb_freq_mhz) + tmp = 0x0B; + else if (125 * xbus_freq_mhz >= 100 * pb_freq_mhz) + tmp = 0x0C; + val = PPC_PLACE(tmp, 54, 5) | PPC_PLACE(3, 59, 5); + write_scom(chip, 0x90000CB305012011, val); + + val = PPC_PLACE(0x10, 51, 5) | PPC_PLACE(2, 58, 2) + | PPC_PLACE(is_xbus_active ? 0xF : 0x8, 60, 4); + write_scom(chip, 0x90000CDB05011C11, val); + + val = PPC_PLACE(7, 49, 3) | PPC_PLACE(4, 52, 6); + write_scom(chip, 0x90000CF405011C11, val); + + val = PPC_PLACE(0xC, 45, 4) | PPC_PLACE(1, 57, 2); + write_scom(chip, 0x90000D3F05011C11, val); + + val = PPC_PLACE(3, 41, 2) | PPC_PLACE(1, 43, 2) | PPC_PLACE(3, 45, 4) + | PPC_PLACE(0xC0, 49, 8); + write_scom(chip, 0x90000D7805011C11, val); + + val = PPC_PLACE(8, 38, 4) | PPC_PLACE(4, 42, 4) | PPC_PLACE(1, 57, 3) + | PPC_PLACE(is_xbus_active ? 0xF : 0x8, 60, 4); + write_scom(chip, 0x90000DAA05011C11, val); + + val = PPC_PLACE(4, 36, 3) | PPC_PLACE(0x20, 41, 8) | PPC_BIT(49) | PPC_BIT(51) + | PPC_BIT(52) | PPC_BIT(53) | PPC_BIT(55) | PPC_BIT(56) | PPC_BIT(57) + | PPC_PLACE(is_xbus_active ? 0xF : 0x8, 60, 4); + write_scom(chip, 0x90000DCC05011C11, val); + + val = PPC_PLACE(1, 41, 3) | PPC_PLACE(1, 44, 3) | PPC_PLACE(2, 47, 3) + | PPC_PLACE(3, 50, 3) | PPC_PLACE(5, 53, 3) | PPC_PLACE(5, 57, 3); + write_scom(chip, 0x90000E0605011C11, val); + + val = PPC_PLACE(0x06, 33, 5) | PPC_PLACE(0x0D, 38, 5) | PPC_PLACE(0x1E, 48, 5) + | PPC_PLACE(0x19, 53, 5) | PPC_BIT(63); + write_scom(chip, 0x90000E4305011C11, val); + + val = PPC_PLACE(0x400, 22, 12) | PPC_PLACE(0x400, 34, 12) + | PPC_PLACE(2, 46, 3) | PPC_PLACE(2, 49, 3) | PPC_PLACE(2, 52, 3) + | PPC_PLACE(2, 55, 3) | PPC_PLACE(2, 58, 3) | PPC_PLACE(2, 61, 3); + write_scom(chip, 0x90000EA205011C11, val); + + /* 44 - set because ATTR_CHIP_EC_FEATURE_HW409019 == 1 */ + val = PPC_PLACE(0x0C, 20, 8) | PPC_BIT(44); + write_scom(chip, 0x90000EC705011C11, val); + + val = PPC_PLACE(0x4, 18, 10) | PPC_PLACE(0x141, 28, 12) | PPC_PLACE(0x21B, 40, 12) + | PPC_PLACE(0x30D, 52, 12); + write_scom(chip, 0x90000EE105011C11, val); + + val = PPC_PLACE(1, 25, 3) | PPC_PLACE(1, 28, 3) | PPC_PLACE(2, 31, 3) + | PPC_PLACE(3, 34, 3) | PPC_PLACE(5, 37, 3) | PPC_PLACE(1, 49, 3) + | PPC_PLACE(1, 52, 3) | PPC_PLACE(2, 55, 3) | PPC_PLACE(3, 58, 3) + | PPC_PLACE(5, 61, 3); + write_scom(chip, 0x90000F0505011C11, val); + + val = PPC_PLACE(0x7, 14, 10) | PPC_PLACE(0x5, 24, 10) | PPC_PLACE(0x5, 34, 10) + | PPC_PLACE(0x4, 44, 10) | PPC_PLACE(0x5, 54, 10); + write_scom(chip, 0x90000F2005011C11, val); + + val = PPC_BIT(20) | PPC_PLACE(3, 32, 2) | PPC_PLACE(7, 34, 3) | PPC_PLACE(3, 37, 2) + | PPC_PLACE(1, 41, 1) | PPC_PLACE(1, 42, 1); + if (pb_cfg->core_ceiling_ratio != FABRIC_CORE_CEILING_RATIO_RATIO_8_8) + val |= PPC_PLACE(3, 24, 2) | PPC_PLACE(3, 44, 2); + tmp = (pb_cfg->core_ceiling_ratio == FABRIC_CORE_CEILING_RATIO_RATIO_8_8 ? 3 : 2); + val |= PPC_PLACE(tmp, 28, 2); + write_scom(chip, 0x90000F4005011811, val); + write_scom(chip, 0x90000F4005012011, val); + + val = PPC_BIT(12) | PPC_PLACE(4, 13, 4) | PPC_PLACE(4, 17, 4) | PPC_PLACE(4, 21, 4) + | PPC_PLACE(1, 25, 3) | PPC_PLACE(1, 28, 3) | PPC_PLACE(1, 31, 3) + | PPC_PLACE(0xFE, 34, 8) | PPC_PLACE(0xFE, 42, 8) | PPC_PLACE(1, 50, 2) + | PPC_PLACE(2, 54, 3) | PPC_PLACE(2, 57, 2) | PPC_BIT(60) | PPC_BIT(61) + | PPC_BIT(63); + write_scom(chip, 0x90000F4D05011C11, val); + + val = PPC_BIT(35) | PPC_PLACE(1, 36, 2) | PPC_PLACE(2, 39, 2) | PPC_BIT(49) + | PPC_PLACE(1, 51, 2); + + if (pb_cfg->core_floor_ratio == FABRIC_CORE_FLOOR_RATIO_RATIO_2_8) + tmp = 3; + else if (pb_cfg->core_floor_ratio == FABRIC_CORE_FLOOR_RATIO_RATIO_4_8) + tmp = 2; + else + tmp = 1; + val |= PPC_PLACE(tmp, 41, 2); + + if (pb_cfg->core_floor_ratio == FABRIC_CORE_FLOOR_RATIO_RATIO_2_8) + tmp = 0; + else if (pb_cfg->core_floor_ratio == FABRIC_CORE_FLOOR_RATIO_RATIO_4_8) + tmp = 3; + else + tmp = 2; + val |= PPC_PLACE(tmp, 44, 2); + + write_scom(chip, 0x90000E6105011811, val); + write_scom(chip, 0x90000E6105012011, val); +} + +/* + * SCOM registers in this function are not documented. SCOM addresses that start with 0x9 + * are form 1 indirect addresses (bit 3 is set in this case) despite + * documentation ("1.2.2 PCB Address Space" section) not mentioning this form. + */ +static void p9_fbc_cd_hp23_scom(uint8_t chip, bool is_xbus_active, int seq) +{ + const uint64_t tmp = (seq == 2); + + uint64_t val; + + val = PPC_PLACE(8, 38, 4) | PPC_PLACE(4, 42, 4) | PPC_PLACE(tmp, 50, 1) + | PPC_PLACE(1, 57, 3) | PPC_PLACE((seq == 2 && is_xbus_active) ? 0xF : 0x8, 60, 4); + write_scom(chip, 0x90000DAA05011C11, val); + + val = PPC_BIT(12) | PPC_PLACE(4, 13, 4) | PPC_PLACE(4, 17, 4) | PPC_PLACE(4, 21, 4) + | PPC_PLACE(1, 25, 3) | PPC_PLACE(1, 28, 3) | PPC_PLACE(1, 31, 3) + | PPC_PLACE(0xFE, 34, 8) | PPC_PLACE(0xFE, 42, 8) | PPC_PLACE(1, 50, 2) + | PPC_PLACE(2, 54, 3) | PPC_PLACE(2, 57, 2) | PPC_PLACE(tmp, 59, 1) + | PPC_PLACE(tmp, 60, 1) | PPC_BIT(61) | PPC_BIT(63); + write_scom(chip, 0x90000F4D05011C11, val); +} + +/* Set action which will occur on fabric pmisc switch command */ +static void p9_adu_coherent_utils_set_switch_action(uint8_t chip, enum adu_op adu_op) +{ + uint64_t mask = PPC_BIT(PU_SND_MODE_REG_ENABLE_PB_SWITCH_AB) + | PPC_BIT(PU_SND_MODE_REG_ENABLE_PB_SWITCH_CD); + + uint64_t data = 0; + if (adu_op == PRE_SWITCH_AB) + data |= PPC_BIT(PU_SND_MODE_REG_ENABLE_PB_SWITCH_AB); + if (adu_op == PRE_SWITCH_CD) + data |= PPC_BIT(PU_SND_MODE_REG_ENABLE_PB_SWITCH_CD); + + scom_and_or(chip, PU_SND_MODE_REG, ~mask, data); +} + +static void p9_adu_coherent_utils_check_fbc_state(uint8_t chip) +{ + /* PU_PB_CENT_SM0_PB_CENT_MODE_PB_CENT_PBIXXX_INIT */ + if (!(read_scom(chip, PU_PB_CENT_SM0_PB_CENT_MODE) & PPC_BIT(0))) + die("FBC isn't initialized!\n"); + + if (read_scom(chip, PU_SND_MODE_REG) & PPC_BIT(PU_SND_MODE_REG_PB_STOP)) + die("FBC isn't running!\n"); +} + +static void lock_adu(uint8_t chip) +{ + uint64_t data = 0; + + /* Configuring lock manipulation control data buffer to perform lock acquisition */ + data |= PPC_BIT(PU_ALTD_CMD_REG_FBC_LOCKED); + data |= PPC_BIT(PU_ALTD_CMD_REG_FBC_RESET_FSM); + data |= PPC_BIT(PU_ALTD_CMD_REG_FBC_CLEAR_STATUS); + + /* Write ADU command register to attempt lock manipulation */ + write_scom(chip, PU_ALTD_CMD_REG, data); +} + +/* Setup the value for ADU option register to enable quiesce & init around a + * switch operation */ +static void set_quiesce_init(uint8_t chip) +{ + enum { + QUIESCE_SWITCH_WAIT_COUNT = 128, + INIT_SWITCH_WAIT_COUNT = 128, + }; + + uint64_t data = 0; + + /* Setup quiesce */ + data |= PPC_BIT(PU_ALTD_OPTION_REG_FBC_WITH_PRE_QUIESCE); + PPC_INSERT(data, QUIESCE_SWITCH_WAIT_COUNT, + PU_ALTD_OPTION_REG_FBC_AFTER_QUIESCE_WAIT_COUNT, + PU_ALTD_OPTION_REG_FBC_AFTER_QUIESCE_WAIT_COUNT_LEN); + + /* Setup post-command init */ + data |= PPC_BIT(PU_ALTD_OPTION_REG_FBC_WITH_POST_INIT); + PPC_INSERT(data, INIT_SWITCH_WAIT_COUNT, + PU_ALTD_OPTION_REG_FBC_BEFORE_INIT_WAIT_COUNT, + PU_ALTD_OPTION_REG_FBC_BEFORE_INIT_WAIT_COUNT_LEN); + + /* Setup workaround for HW397129 to re-enable fastpath for DD2 */ + data |= PPC_BIT(PU_ALTD_OPTION_REG_FBC_ALTD_HW397129); + + write_scom(chip, PU_ALTD_OPTION_REG, data); +} + +static void p9_adu_coherent_setup_adu(uint8_t chip, enum adu_op adu_op) +{ + uint64_t cmd = 0x0; + uint32_t ttype = 0; + uint32_t tsize = 0; + + /* Write the address. Not sure if operations we support actually need + * this. */ + write_scom(chip, PU_ALTD_ADDR_REG, 0); + + /* This routine assumes the lock is held by the caller, preserve this + * locked state */ + cmd |= PPC_BIT(PU_ALTD_CMD_REG_FBC_LOCKED); + + if (adu_op == PB_DIS_OPER || adu_op == PMISC_OPER) { + cmd |= PPC_BIT(PU_ALTD_CMD_REG_FBC_START_OP); + + PPC_INSERT(cmd, ALTD_CMD_SCOPE_SYSTEM, + PU_ALTD_CMD_REG_FBC_SCOPE, PU_ALTD_CMD_REG_FBC_SCOPE_LEN); + + /* DROP_PRIORITY = HIGH */ + cmd |= PPC_BIT(PU_ALTD_CMD_REG_FBC_DROP_PRIORITY); + /* AXTYPE = Address only */ + cmd |= PPC_BIT(PU_ALTD_CMD_REG_FBC_AXTYPE); + cmd |= PPC_BIT(PU_ALTD_CMD_REG_FBC_WITH_TM_QUIESCE); + + if (adu_op == PB_DIS_OPER) { + ttype = ALTD_CMD_TTYPE_PB_OPER; + tsize = ALTD_CMD_PB_DIS_OPERATION_TSIZE; + } else { + ttype = ALTD_CMD_TTYPE_PMISC_OPER; + tsize = ALTD_CMD_PMISC_TSIZE_1; + + /* Set quiesce and init around a switch operation in option reg */ + set_quiesce_init(chip); + } + } + + PPC_INSERT(cmd, ttype, PU_ALTD_CMD_REG_FBC_TTYPE, PU_ALTD_CMD_REG_FBC_TTYPE_LEN); + PPC_INSERT(cmd, tsize, PU_ALTD_CMD_REG_FBC_TSIZE, PU_ALTD_CMD_REG_FBC_TSIZE_LEN); + + write_scom(chip, PU_ALTD_CMD_REG, cmd); +} + +static void p9_adu_setup(uint8_t chip, enum adu_op adu_op) +{ + /* Don't generate fabric command, just pre-condition ADU for upcoming switch */ + if (adu_op == PRE_SWITCH_AB || adu_op == PRE_SWITCH_CD || adu_op == POST_SWITCH) { + p9_adu_coherent_utils_set_switch_action(chip, adu_op); + return; + } + + /* Ensure fabric is running */ + p9_adu_coherent_utils_check_fbc_state(chip); + + /* + * Acquire ADU lock to guarantee exclusive use of the ADU resources. + * ADU state machine will be reset/cleared by this routine. + */ + lock_adu(chip); + + /* Setup the ADU registers for operation */ + p9_adu_coherent_setup_adu(chip, adu_op); +} + +static void p9_adu_coherent_status_check(uint8_t chip, bool is_addr_only) +{ + int i; + uint64_t status; + + //Check for a successful status 10 times + for (i = 0; i < 10; i++) { + status = read_scom(chip, PU_ALTD_STATUS_REG); + + if (!(status & PPC_BIT(PU_ALTD_STATUS_REG_FBC_ALTD_BUSY))) + break; + + /* Delay to allow the write/read/other command to finish */ + udelay(1); // actually need only 100ns, so delaying at the bottom + } + + if (!(status & PPC_BIT(PU_ALTD_STATUS_REG_FBC_ADDR_DONE))) + die("The address portion of ADU operation is not complete!\n"); + if (!is_addr_only && !(status & PPC_BIT(PU_ALTD_STATUS_REG_FBC_DATA_DONE))) + die("The data portion of ADU operation is not complete!\n"); + + if (status & PPC_BIT(PU_ALTD_STATUS_REG_FBC_WAIT_CMD_ARBIT)) + die("ADU is still waiting for command arbitrage!\n"); + if (status & PPC_BIT(PU_ALTD_STATUS_REG_FBC_WAIT_RESP)) + die("ADU is still waiting for a clean combined response!\n"); + if (status & PPC_BIT(PU_ALTD_STATUS_REG_FBC_OVERRUN_ERROR)) + die("ADU data overrun!\n"); + if (status & PPC_BIT(PU_ALTD_STATUS_REG_FBC_AUTOINC_ERROR)) + die("Internal ADU address counter rolled over the 0.5M boundary!\n"); + if (status & PPC_BIT(PU_ALTD_STATUS_REG_FBC_COMMAND_ERROR)) + die("New ADU command was issued before previous one finished!\n"); + if (status & PPC_BIT(PU_ALTD_STATUS_REG_FBC_ADDRESS_ERROR)) + die("Invalid ADU Address!\n"); + if (status & PPC_BIT(PU_ALTD_STATUS_REG_FBC_PBINIT_MISSING)) + die("Attempt to start an ADU command without pb_init active!\n"); + if (status & PPC_BIT(PU_ALTD_STATUS_REG_FBC_ECC_CE)) + die("ECC Correctable error from ADU!\n"); + if (status & PPC_BIT(PU_ALTD_STATUS_REG_FBC_ECC_UE)) + die("ECC Uncorrectable error from ADU!\n"); + if (status & PPC_BIT(PU_ALTD_STATUS_REG_FBC_ECC_SUE)) + die("ECC Special Uncorrectable error!\n"); + + if (i == 10) + die("ADU is busy for too long with status: 0x%016llx!\n", status); +} + +static void p9_adu_access(uint8_t chip, enum adu_op adu_op) +{ + const bool is_addr_only = (adu_op == PB_DIS_OPER || adu_op == PMISC_OPER); + + /* Don't generate fabric command */ + if (adu_op == PRE_SWITCH_AB || adu_op == PRE_SWITCH_CD || adu_op == POST_SWITCH) + return; + + if (is_addr_only) { + udelay(10); + } else { + write_scom(chip, PU_ALTD_DATA_REG, 0); + scom_or(chip, PU_ALTD_CMD_REG, PPC_BIT(PU_ALTD_CMD_REG_FBC_START_OP)); + + /* If it's not a cache inhibit operation, we just want to delay + * for a while and then it's done */ + udelay(10); + } + + /* We expect the busy bit to be cleared */ + p9_adu_coherent_status_check(chip, is_addr_only); + + /* If it's the last read/write cleanup the ADU */ + write_scom(chip, PU_ALTD_CMD_REG, 0); +} + +/* We don't write any specific data to ADU, just execute an action on it */ +static void p9_putmemproc(uint8_t chip, uint32_t mem_flags) +{ + enum adu_op adu_op; + + if (mem_flags & SBE_MEM_ACCESS_FLAGS_PB_DIS_MODE) + adu_op = PB_DIS_OPER; + else if (mem_flags & SBE_MEM_ACCESS_FLAGS_SWITCH_MODE) + adu_op = PMISC_OPER; + else if (mem_flags & SBE_MEM_ACCESS_FLAGS_PRE_SWITCH_CD_MODE) + adu_op = PRE_SWITCH_CD; + else if (mem_flags & SBE_MEM_ACCESS_FLAGS_PRE_SWITCH_AB_MODE) + adu_op = PRE_SWITCH_AB; + else if (mem_flags & SBE_MEM_ACCESS_FLAGS_POST_SWITCH_MODE) + adu_op = POST_SWITCH; + else + die("Invalid ADU putmem flags."); + + p9_adu_setup(chip, adu_op); + p9_adu_access(chip, adu_op); +} + +static void p9_build_smp_adu_set_switch_action(uint8_t chip, enum build_smp_adu_action action) +{ + uint32_t flags = SBE_MEM_ACCESS_FLAGS_TARGET_PROC; + + if (action == SWITCH_AB) + flags |= SBE_MEM_ACCESS_FLAGS_PRE_SWITCH_AB_MODE; + else if (action == SWITCH_CD) + flags |= SBE_MEM_ACCESS_FLAGS_PRE_SWITCH_CD_MODE; + else + flags |= SBE_MEM_ACCESS_FLAGS_POST_SWITCH_MODE; + + return p9_putmemproc(chip, flags); +} + +static void p9_build_smp_sequence_adu(uint8_t chips, enum build_smp_adu_action action) +{ + uint32_t flags = SBE_MEM_ACCESS_FLAGS_TARGET_PROC; + + switch (action) { + case SWITCH_AB: + case SWITCH_CD: + flags |= SBE_MEM_ACCESS_FLAGS_SWITCH_MODE; + break; + case QUIESCE: + flags |= SBE_MEM_ACCESS_FLAGS_PB_DIS_MODE; + break; + case RESET_SWITCH: + die("RESET_SWITCH is not a valid ADU action to request\n"); + } + + /* + * Condition for hotplug switch operation. All chips which were not + * quiesced prior to switch AB will need to observe the switch. + */ + if (action != QUIESCE) { + for (int chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) + p9_build_smp_adu_set_switch_action(chip, action); + } + } + + if (action == SWITCH_CD || action == SWITCH_AB) + p9_putmemproc(/*chip=*/0, flags); + if ((action == SWITCH_CD || action == QUIESCE) && (chips & 0x02)) + p9_putmemproc(/*chip=*/1, flags); + + if (action != QUIESCE) { + /* Operation complete, reset switch controls */ + for (int chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) + p9_build_smp_adu_set_switch_action(chip, RESET_SWITCH); + } + } +} + +static void p9_fbc_ab_hp_scom(uint8_t chip, bool is_xbus_active) +{ + const struct powerbus_cfg *pb_cfg = powerbus_cfg(chip); + const uint32_t pb_freq_mhz = pb_cfg->fabric_freq; + + /* Frequency of XBus for Nimbus DD2 */ + const uint32_t xbus_freq_mhz = 2000; + + const bool hw407123 = (get_dd() <= 0x20); + + const bool is_fabric_master = (chip == 0); + const uint8_t attached_chip = (is_xbus_active && chip == 0 ? 1 : 0); + + const uint64_t cmd_rate_4b_r = ((6 * pb_freq_mhz) % xbus_freq_mhz); + + const uint64_t cmd_rate_d = xbus_freq_mhz; + const uint64_t cmd_rate_4b_n = (6 * pb_freq_mhz); + + for (uint8_t i = 0; i < P9_BUILD_SMP_NUM_SHADOWS; i++) { + uint64_t val; + uint64_t tmp; + + /* *_HP_MODE_NEXT */ + + val = read_scom(chip, PB_HP_MODE_NEXT_SHADOWS[i]); + + if (!is_fabric_master) { + val &= ~PPC_BIT(0); // PB_COM_PB_CFG_MASTER_CHIP_NEXT_OFF + val &= ~PPC_BIT(1); // PB_COM_PB_CFG_TM_MASTER_NEXT_OFF + } + + val &= ~PPC_BIT(2); // PB_COM_PB_CFG_CHG_RATE_GP_MASTER_NEXT_OFF + + if (is_fabric_master) + val |= PPC_BIT(3); // PB_COM_PB_CFG_CHG_RATE_SP_MASTER_NEXT_ON + else + val &= ~PPC_BIT(3); // PB_COM_PB_CFG_CHG_RATE_SP_MASTER_NEXT_OFF + + val &= ~PPC_BIT(29); // PB_COM_PB_CFG_HOP_MODE_NEXT_OFF + + write_scom(chip, PB_HP_MODE_NEXT_SHADOWS[i], val); + + /* *_HPX_MODE_NEXT */ + + val = read_scom(chip, PB_HPX_MODE_NEXT_SHADOWS[i]); + + PPC_INSERT(val, is_xbus_active, 1, 1); // PB_COM_PB_CFG_LINK_X1_EN_NEXT + PPC_INSERT(val, attached_chip, 19, 3); // PB_COM_PB_CFG_LINK_X1_CHIPID_NEXT_ID + + val |= PPC_BIT(49); // PB_COM_PB_CFG_X_INDIRECT_EN_NEXT_ON + val |= PPC_BIT(50); // PB_COM_PB_CFG_X_GATHER_ENABLE_NEXT_ON + + if (cmd_rate_4b_r != 0 && hw407123) + tmp = (cmd_rate_4b_n / cmd_rate_d) + 3; + else if (cmd_rate_4b_r == 0 && hw407123) + tmp = (cmd_rate_4b_n / cmd_rate_d) + 2; + else if (cmd_rate_4b_r != 0) + tmp = (cmd_rate_4b_n / cmd_rate_d); + else + tmp = (cmd_rate_4b_n / cmd_rate_d) - 1; + PPC_INSERT(val, tmp, 56, 8); + + write_scom(chip, PB_HPX_MODE_NEXT_SHADOWS[i], val); + } +} + +static uint64_t p9_build_smp_get_hp_ab_shadow(uint8_t chip, const uint64_t shadow_regs[]) +{ + uint64_t last_data = 0; + + for (uint8_t i = 0; i < P9_BUILD_SMP_NUM_SHADOWS; i++) { + const uint64_t data = read_scom(chip, shadow_regs[i]); + + /* Check consistency of west/center/east register copies while + * reading them */ + if (i != 0 && data != last_data) + die("Values in shadow registers differ!\n"); + + last_data = data; + } + + return last_data; +} + +static void p9_build_smp_set_hp_ab_shadow(uint8_t chip, const uint64_t shadow_regs[], + uint64_t data) +{ + for (uint8_t i = 0; i < P9_BUILD_SMP_NUM_SHADOWS; i++) + write_scom(chip, shadow_regs[i], data); +} + +static void p9_build_smp_copy_hp_ab_next_curr(uint8_t chip) +{ + /* Read NEXT */ + uint64_t hp_mode_data = p9_build_smp_get_hp_ab_shadow(chip, PB_HP_MODE_NEXT_SHADOWS); + uint64_t hpx_mode_data = p9_build_smp_get_hp_ab_shadow(chip, PB_HPX_MODE_NEXT_SHADOWS); + uint64_t hpa_mode_data = p9_build_smp_get_hp_ab_shadow(chip, PB_HPA_MODE_NEXT_SHADOWS); + + /* Write CURR */ + p9_build_smp_set_hp_ab_shadow(chip, PB_HP_MODE_CURR_SHADOWS, hp_mode_data); + p9_build_smp_set_hp_ab_shadow(chip, PB_HPX_MODE_CURR_SHADOWS, hpx_mode_data); + p9_build_smp_set_hp_ab_shadow(chip, PB_HPA_MODE_CURR_SHADOWS, hpa_mode_data); +} + +static void p9_build_smp_copy_hp_ab_curr_next(uint8_t chip) +{ + /* Read CURR */ + uint64_t hp_mode_data = p9_build_smp_get_hp_ab_shadow(chip, PB_HP_MODE_CURR_SHADOWS); + uint64_t hpx_mode_data = p9_build_smp_get_hp_ab_shadow(chip, PB_HPX_MODE_CURR_SHADOWS); + uint64_t hpa_mode_data = p9_build_smp_get_hp_ab_shadow(chip, PB_HPA_MODE_CURR_SHADOWS); + + /* Write NEXT */ + p9_build_smp_set_hp_ab_shadow(chip, PB_HP_MODE_NEXT_SHADOWS, hp_mode_data); + p9_build_smp_set_hp_ab_shadow(chip, PB_HPX_MODE_NEXT_SHADOWS, hpx_mode_data); + p9_build_smp_set_hp_ab_shadow(chip, PB_HPA_MODE_NEXT_SHADOWS, hpa_mode_data); +} + +static void p9_build_smp_set_fbc_ab(uint8_t chips) +{ + const bool is_xbus_active = (chips == 0x03); + + /* + * quiesce 'slave' fabrics in preparation for joining + * PHASE1 -> quiesce all chips except the chip which is the new fabric master + * PHASE2 -> quiesce all drawers except the drawer containing the new fabric master + */ + p9_build_smp_sequence_adu(chips, QUIESCE); + + /* Program NEXT register set for all chips via initfile */ + for (int chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) + p9_fbc_ab_hp_scom(chip, is_xbus_active); + } + + /* Program CURR register set only for chips which were just quiesced */ + if (chips & 0x02) + p9_build_smp_copy_hp_ab_next_curr(/*chip=*/1); + + /* + * Issue switch AB reconfiguration from chip designated as new master + * (which is guaranteed to be a master now) + */ + p9_build_smp_sequence_adu(chips, SWITCH_AB); + + /* Reset NEXT register set (copy CURR->NEXT) for all chips */ + for (int chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) + p9_build_smp_copy_hp_ab_curr_next(chip); + } +} + +static void p9_build_smp(uint8_t chips) +{ + const bool is_xbus_active = (chips == 0x03); + + /* Apply three CD hotplug sequences to each chip to initialize SCOM + * chains */ + for (int seq = 1; seq <= 3; seq++) { + for (int chip = 0; chip < MAX_CHIPS; chip++) { + if (!(chips & (1 << chip))) + continue; + + if (seq == 1) + p9_fbc_cd_hp1_scom(chip, is_xbus_active); + else + p9_fbc_cd_hp23_scom(chip, is_xbus_active, seq); + } + + /* Issue switch CD on all chips to force updates to occur */ + p9_build_smp_sequence_adu(chips, SWITCH_CD); + } + + p9_build_smp_set_fbc_ab(chips); +} + +void istep_10_1(uint8_t chips) +{ + report_istep(10,1); + + p9_build_smp(chips); + + if (chips & 0x02) { + switch_secondary_scom_to_xscom(); + + /* Sanity check that XSCOM works for the second CPU */ + if (read_scom(1, 0xF000F) == 0xFFFFFFFFFFFFFFFF) + die("XSCOM doesn't work for the second CPU\n"); + + fsi_reset_pib2opb(/*chip=*/1); + } +} diff --git a/src/soc/ibm/power9/istep_10_10.c b/src/soc/ibm/power9/istep_10_10.c new file mode 100644 index 00000000000..fa966726078 --- /dev/null +++ b/src/soc/ibm/power9/istep_10_10.c @@ -0,0 +1,622 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "pci.h" + +#define MAX_LANE_GROUPS_PER_PEC 4 + +#define NUM_PCIE_LANES 16 +#define NUM_PCS_CONFIG 4 + +/* Enum indicating lane width (units = "number of lanes") */ +enum lane_width { + LANE_WIDTH_NC = 0, + LANE_WIDTH_4X = 4, + LANE_WIDTH_8X = 8, + LANE_WIDTH_16X = 16 +}; + +enum lane_mask { + LANE_MASK_X16 = 0xFFFF, + LANE_MASK_X8_GRP0 = 0xFF00, + LANE_MASK_X8_GRP1 = 0x00FF, + LANE_MASK_X4_GRP0 = 0x00F0, + LANE_MASK_X4_GRP1 = 0x000F, +}; + +/* Enumeration of PHB to PCI MAC mappings */ +enum phb_to_mac { + PHB_X16_MAC_MAP = 0x0000, + PHB_X8_X8_MAC_MAP = 0x0050, + PHB_X8_X4_X4_MAC_MAP = 0x0090, +}; + +/* + * Bit position of the PHB with the largest number a given PEC can use + * (see enum phb_active_mask for bit values). + */ +enum pec_phb_shift { + PEC0_PHB_SHIFT = 7, // PHB0 only + PEC1_PHB_SHIFT = 5, // PHB1 - PHB2 + PEC2_PHB_SHIFT = 2, // PHB3 - PHB5 +}; + +/* + * Struct for each row in PCIE IOP configuration table. + * Used by code to compute the IOP config and PHBs active mask. + */ +struct lane_config_row { + /* + * Grouping of lanes under one IOP. + * Value signifies width of each PCIE lane set (0, 4, 8, or 16). + */ + uint8_t lane_set[MAX_LANE_GROUPS_PER_PEC]; // enum lane_width + + /* IOP config value from PCIE IOP configuration table */ + uint8_t lane_config; + + /* PHB active mask (see phb_active_mask enum) */ + uint8_t phb_active; + + uint16_t phb_to_pcie_mac; // enum phb_to_mac +}; + +/* + * Currently there are three PEC config tables for procs with 48 usable PCIE + * lanes. In general, the code accumulates the current configuration of + * the PECs from the MRW and other dynamic information (such as bifurcation) + * then matches that config to one of the rows in the table. Once a match + * is discovered, the PEC config value is pulled from the matching row for + * future use. + * + * Each PEC can control up to 16 lanes: + * - PEC0 can give 16 lanes to PHB0 + * - PEC1 can split 16 lanes between PHB1 & PHB2 + * - PEC2 can split 16 lanes between PHB3, PHB4 & PHB5 + */ +static const struct lane_config_row pec0_lane_cfg[] = { + { + { LANE_WIDTH_NC, LANE_WIDTH_NC, LANE_WIDTH_NC, LANE_WIDTH_NC }, + 0x00, + PHB_MASK_NA, + PHB_X16_MAC_MAP + }, + { + { LANE_WIDTH_16X, LANE_WIDTH_NC, LANE_WIDTH_NC, LANE_WIDTH_NC }, + 0x00, + PHB0_MASK, + PHB_X16_MAC_MAP + }, +}; +static const struct lane_config_row pec1_lane_cfg[] = { + { + { LANE_WIDTH_NC, LANE_WIDTH_NC, LANE_WIDTH_NC, LANE_WIDTH_NC }, + 0x00, + PHB_MASK_NA, + PHB_X8_X8_MAC_MAP + }, + { + { LANE_WIDTH_8X, LANE_WIDTH_NC, LANE_WIDTH_8X, LANE_WIDTH_NC }, + 0x00, + PHB1_MASK | PHB2_MASK, + PHB_X8_X8_MAC_MAP + }, + { + { LANE_WIDTH_8X, LANE_WIDTH_NC, LANE_WIDTH_NC, LANE_WIDTH_NC }, + 0x00, + PHB1_MASK, + PHB_X8_X8_MAC_MAP + }, + { + { LANE_WIDTH_NC, LANE_WIDTH_NC, LANE_WIDTH_8X, LANE_WIDTH_NC }, + 0x00, + PHB2_MASK, + PHB_X8_X8_MAC_MAP + }, +}; +static const struct lane_config_row pec2_lane_cfg[] = { + { + { LANE_WIDTH_NC, LANE_WIDTH_NC, LANE_WIDTH_NC, LANE_WIDTH_NC }, + 0x00, + PHB_MASK_NA, + PHB_X16_MAC_MAP + }, + { + { LANE_WIDTH_16X, LANE_WIDTH_NC, LANE_WIDTH_NC, LANE_WIDTH_NC }, + 0x00, + PHB3_MASK, + PHB_X16_MAC_MAP + }, + { + { LANE_WIDTH_8X, LANE_WIDTH_NC, LANE_WIDTH_8X, LANE_WIDTH_NC }, + 0x10, + PHB3_MASK | PHB4_MASK, + PHB_X8_X8_MAC_MAP + }, + { + { LANE_WIDTH_8X, LANE_WIDTH_NC, LANE_WIDTH_4X, LANE_WIDTH_4X }, + 0x20, + PHB3_MASK | PHB4_MASK | PHB5_MASK, + PHB_X8_X4_X4_MAC_MAP + }, +}; + +static const struct lane_config_row *pec_lane_cfgs[] = { + pec0_lane_cfg, + pec1_lane_cfg, + pec2_lane_cfg +}; +static const size_t pec_lane_cfg_sizes[] = { + ARRAY_SIZE(pec0_lane_cfg), + ARRAY_SIZE(pec1_lane_cfg), + ARRAY_SIZE(pec2_lane_cfg) +}; + +/* + * Rows correspond to PEC_PCIE_LANE_MASK_NON_BIFURCATED values in processed + * talos.xml for each processor chip. Values correspond to lane_width + * enumeration. + */ +static uint16_t lane_masks[MAX_CHIPS][MAX_PEC_PER_PROC][MAX_LANE_GROUPS_PER_PEC] = { + { + { LANE_MASK_X16, 0x0, 0x0, 0x0 }, + { LANE_MASK_X8_GRP0, 0x0, LANE_MASK_X8_GRP1, 0x0 }, + { LANE_MASK_X8_GRP0, 0x0, LANE_MASK_X4_GRP0, LANE_MASK_X4_GRP1 }, + }, + { + { LANE_MASK_X16, 0x0, 0x0, 0x0 }, + { LANE_MASK_X8_GRP0, 0x0, LANE_MASK_X8_GRP1, 0x0 }, + { LANE_MASK_X16, 0x0, 0x0, 0x0 }, + }, +}; + +/* + * PROC_PCIE_IOP_SWAP from processed talos.xml for each PEC of each processor + * chip + */ +static uint8_t pcie_iop_swap[MAX_CHIPS][MAX_PEC_PER_PROC] = { + { 1, 0, 0 }, + { 1, 0, 4 }, +}; + +static const uint64_t RX_VGA_CTRL3_REGISTER[NUM_PCIE_LANES] = { + 0x8000008D0D010C3F, + 0x800000CD0D010C3F, + 0x8000018D0D010C3F, + 0x800001CD0D010C3F, + 0x8000028D0D010C3F, + 0x800002CD0D010C3F, + 0x8000038D0D010C3F, + 0x800003CD0D010C3F, + 0x8000088D0D010C3F, + 0x800008CD0D010C3F, + 0x8000098D0D010C3F, + 0x800009CD0D010C3F, + 0x80000A8D0D010C3F, + 0x80000ACD0D010C3F, + 0x80000B8D0D010C3F, + 0x80000BCD0D010C3F, +}; + +static const uint64_t RX_LOFF_CNTL_REGISTER[NUM_PCIE_LANES] = { + 0x800000A60D010C3F, + 0x800000E60D010C3F, + 0x800001A60D010C3F, + 0x800001E60D010C3F, + 0x800002A60D010C3F, + 0x800002E60D010C3F, + 0x800003A60D010C3F, + 0x800003E60D010C3F, + 0x800008A60D010C3F, + 0x800008E60D010C3F, + 0x800009A60D010C3F, + 0x800009E60D010C3F, + 0x80000AA60D010C3F, + 0x80000AE60D010C3F, + 0x80000BA60D010C3F, + 0x80000BE60D010C3F, +}; + +static enum lane_width lane_mask_to_width(uint16_t mask) +{ + enum lane_width width = LANE_WIDTH_NC; + + if (mask == LANE_MASK_X16) + width = LANE_WIDTH_16X; + else if (mask == LANE_MASK_X8_GRP0 || mask == LANE_MASK_X8_GRP1) + width = LANE_WIDTH_8X; + else if (mask == LANE_MASK_X4_GRP0 || mask == LANE_MASK_X4_GRP1) + width = LANE_WIDTH_4X; + + return width; +} + +static void determine_lane_configs(uint8_t chip, uint8_t *phb_active_mask, + const struct lane_config_row **pec_cfgs) +{ + uint8_t pec = 0; + + *phb_active_mask = 0; + + for (pec = 0; pec < MAX_PEC_PER_PROC; ++pec) { + uint8_t i; + uint8_t lane_group; + + struct lane_config_row config = { + { LANE_WIDTH_NC, LANE_WIDTH_NC, LANE_WIDTH_NC, LANE_WIDTH_NC }, + 0x00, + PHB_MASK_NA, + PHB_X16_MAC_MAP, + }; + + /* Transform effective config to match lane config table format */ + for (lane_group = 0; lane_group < MAX_LANE_GROUPS_PER_PEC; ++lane_group) { + const uint16_t mask = lane_masks[chip][pec][lane_group]; + config.lane_set[lane_group] = lane_mask_to_width(mask); + } + + for (i = 0; i < pec_lane_cfg_sizes[pec]; ++i) { + if (memcmp(pec_lane_cfgs[pec][i].lane_set, &config.lane_set, + sizeof(config.lane_set)) == 0) + break; + } + + if (i == pec_lane_cfg_sizes[pec]) + die("Failed to find PCIE IOP configuration for PEC%d\n", pec); + + *phb_active_mask |= pec_lane_cfgs[pec][i].phb_active; + + pec_cfgs[pec] = &pec_lane_cfgs[pec][i]; + + /* + * In the rest of the PCIe-related code the following PEC attributes have these + * values: + * - PEC[ATTR_PROC_PCIE_IOP_CONFIG] := pec_cfgs[pec]->lane_config + * - PEC[ATTR_PROC_PCIE_REFCLOCK_ENABLE] := 1 + * - PEC[ATTR_PROC_PCIE_PCS_SYSTEM_CNTL] := pec_cfgs[pec]->phb_to_pcie_mac + */ + } +} + +static uint64_t pec_val(int pec_id, uint8_t in, + int pec0_s, int pec0_c, + int pec1_s, int pec1_c, + int pec2_s, int pec2_c) +{ + uint64_t out = 0; + + switch (pec_id) { + case 0: + out = PPC_PLACE(in, pec0_s, pec0_c); + break; + case 1: + out = PPC_PLACE(in, pec1_s, pec1_c); + break; + case 2: + out = PPC_PLACE(in, pec2_s, pec2_c); + break; + default: + die("Unknown PEC ID: %d\n", pec_id); + } + + return out; +} + +static void phase1(uint8_t chip, const struct lane_config_row **pec_cfgs, + const uint8_t *iovalid_enable) +{ + enum { + PEC_CPLT_CONF1_OR = 0x0D000019, + PEC_CPLT_CTRL0_OR = 0x0D000010, + PEC_CPLT_CONF1_CLEAR = 0x0D000029, + + PEC_PCS_RX_ROT_CNTL_REG = 0x800004820D010C3F, + PEC_PCS_RX_CONFIG_MODE_REG = 0x800004800D010C3F, + PEC_PCS_RX_CDR_GAIN_REG = 0x800004B30D010C3F, + PEC_PCS_RX_SIGDET_CONTROL_REG = 0x800004A70D010C3F, + + PCI_IOP_FIR_ACTION0_REG = 0x0000000000000000ULL, + PCI_IOP_FIR_ACTION1_REG = 0xE000000000000000ULL, + PCI_IOP_FIR_MASK_REG = 0x1FFFFFFFF8000000ULL, + + PEC_FIR_ACTION0_REG = 0x0D010C06, + PEC_FIR_ACTION1_REG = 0x0D010C07, + PEC_FIR_MASK_REG = 0x0D010C03, + + PEC0_IOP_CONFIG_START_BIT = 13, + PEC1_IOP_CONFIG_START_BIT = 14, + PEC2_IOP_CONFIG_START_BIT = 10, + PEC0_IOP_BIT_COUNT = 1, + PEC1_IOP_BIT_COUNT = 2, + PEC2_IOP_BIT_COUNT = 3, + PEC0_IOP_SWAP_START_BIT = 12, + PEC1_IOP_SWAP_START_BIT = 12, + PEC2_IOP_SWAP_START_BIT = 7, + PEC0_IOP_IOVALID_ENABLE_START_BIT = 4, + PEC1_IOP_IOVALID_ENABLE_START_BIT = 4, + PEC2_IOP_IOVALID_ENABLE_START_BIT = 4, + PEC_IOP_IOVALID_ENABLE_STACK0_BIT = 4, + PEC_IOP_IOVALID_ENABLE_STACK1_BIT = 5, + PEC_IOP_IOVALID_ENABLE_STACK2_BIT = 6, + PEC_IOP_REFCLOCK_ENABLE_START_BIT = 32, + PEC_IOP_PMA_RESET_START_BIT = 29, + PEC_IOP_PIPE_RESET_START_BIT = 28, + + PEC_PCS_PCLCK_CNTL_PLLA_REG = 0x8000050F0D010C3F, + PEC_PCS_PCLCK_CNTL_PLLB_REG = 0x8000054F0D010C3F, + PEC_PCS_TX_DCLCK_ROTATOR_REG = 0x800004450D010C3F, + PEC_PCS_TX_PCIE_REC_DETECT_CNTL1_REG = 0x8000046C0D010C3F, + PEC_PCS_TX_PCIE_REC_DETECT_CNTL2_REG = 0x8000046D0D010C3F, + PEC_PCS_TX_POWER_SEQ_ENABLE_REG = 0x800004700D010C3F, + + PEC_SCOM0X0B_EDMOD = 52, + + PEC_PCS_RX_VGA_CONTROL1_REG = 0x8000048B0D010C3F, + PEC_PCS_RX_VGA_CONTROL2_REG = 0x8000048C0D010C3F, + PEC_IOP_RX_DFE_FUNC_REGISTER1 = 0x8000049F0D010C3F, + PEC_PCS_SYS_CONTROL_REG = 0x80000C000D010C3F, + + PEC_PCS_M1_CONTROL_REG = 0x80000C010D010C3F, + PEC_PCS_M2_CONTROL_REG = 0x80000C020D010C3F, + PEC_PCS_M3_CONTROL_REG = 0x80000C030D010C3F, + PEC_PCS_M4_CONTROL_REG = 0x80000C040D010C3F, + }; + + uint8_t pec = 0; + + for (pec = 0; pec < MAX_PEC_PER_PROC; ++pec) { + long time; + uint8_t i; + uint64_t val; + + chiplet_id_t chiplet = PCI0_CHIPLET_ID + pec; + + /* ATTR_PROC_PCIE_PCS_RX_CDR_GAIN, from talos.xml */ + uint8_t pcs_cdr_gain[] = { 0x56, 0x47, 0x47, 0x47 }; + /* ATTR_PROC_PCIE_PCS_RX_INIT_GAIN, all zeroes by default */ + uint8_t pcs_init_gain = 0; + /* ATTR_PROC_PCIE_PCS_RX_PK_INIT, all zeroes by default */ + uint8_t pcs_pk_init = 0; + /* ATTR_PROC_PCIE_PCS_RX_SIGDET_LVL, defaults and talos.xml */ + uint8_t pcs_sigdet_lvl = 0x0B; + + uint32_t pcs_config_mode[NUM_PCS_CONFIG] = { 0xA006, 0xA805, 0xB071, 0xB870 }; + + /* Phase1 init step 1 (get VPD, no operation here) */ + + /* Phase1 init step 2a */ + val = pec_val(pec, pec_cfgs[pec]->lane_config, + PEC0_IOP_CONFIG_START_BIT, PEC0_IOP_BIT_COUNT * 2, + PEC1_IOP_CONFIG_START_BIT, PEC1_IOP_BIT_COUNT * 2, + PEC2_IOP_CONFIG_START_BIT, PEC2_IOP_BIT_COUNT * 2); + write_scom_for_chiplet(chip, chiplet, PEC_CPLT_CONF1_OR, val); + + /* Phase1 init step 2b */ + + val = pec_val(pec, pcie_iop_swap[chip][pec], + PEC0_IOP_SWAP_START_BIT, PEC0_IOP_BIT_COUNT, + PEC1_IOP_SWAP_START_BIT, PEC1_IOP_BIT_COUNT, + PEC2_IOP_SWAP_START_BIT, PEC2_IOP_BIT_COUNT); + write_scom_for_chiplet(chip, chiplet, PEC_CPLT_CONF1_OR, val); + + /* Phase1 init step 3a */ + + val = pec_val(pec, iovalid_enable[pec], + PEC0_IOP_IOVALID_ENABLE_START_BIT, PEC0_IOP_BIT_COUNT, + PEC1_IOP_IOVALID_ENABLE_START_BIT, PEC1_IOP_BIT_COUNT, + PEC2_IOP_IOVALID_ENABLE_START_BIT, PEC2_IOP_BIT_COUNT); + + /* Set IOVALID for base PHB if PHB2, or PHB4, or PHB5 are set (SW417485) */ + if ((val & PPC_BIT(PEC_IOP_IOVALID_ENABLE_STACK1_BIT)) || + (val & PPC_BIT(PEC_IOP_IOVALID_ENABLE_STACK2_BIT))) { + val |= PPC_BIT(PEC_IOP_IOVALID_ENABLE_STACK0_BIT); + val |= PPC_BIT(PEC_IOP_IOVALID_ENABLE_STACK1_BIT); + } + + write_scom_for_chiplet(chip, chiplet, PEC_CPLT_CONF1_OR, val); + + /* Phase1 init step 3b (enable clock) */ + /* ATTR_PROC_PCIE_REFCLOCK_ENABLE, all PECs are enabled. */ + write_scom_for_chiplet(chip, chiplet, PEC_CPLT_CTRL0_OR, + PPC_BIT(PEC_IOP_REFCLOCK_ENABLE_START_BIT)); + + /* Phase1 init step 4 (PMA reset) */ + + write_scom_for_chiplet(chip, chiplet, PEC_CPLT_CONF1_CLEAR, + PPC_BIT(PEC_IOP_PMA_RESET_START_BIT)); + udelay(1); /* at least 400ns */ + write_scom_for_chiplet(chip, chiplet, PEC_CPLT_CONF1_OR, + PPC_BIT(PEC_IOP_PMA_RESET_START_BIT)); + udelay(1); /* at least 400ns */ + write_scom_for_chiplet(chip, chiplet, PEC_CPLT_CONF1_CLEAR, + PPC_BIT(PEC_IOP_PMA_RESET_START_BIT)); + + /* + * Poll for PRTREADY status on PLLA and PLLB: + * PEC_IOP_PLLA_VCO_COURSE_CAL_REGISTER1 = 0x800005010D010C3F + * PEC_IOP_PLLB_VCO_COURSE_CAL_REGISTER1 = 0x800005410D010C3F + * PEC_IOP_HSS_PORT_READY_START_BIT = 58 + */ + time = wait_us(40, + (read_scom_for_chiplet(chip, chiplet, 0x800005010D010C3F) & PPC_BIT(58)) || + (read_scom_for_chiplet(chip, chiplet, 0x800005410D010C3F) & PPC_BIT(58))); + if (!time) + die("IOP HSS Port Ready status is not set!"); + + /* Phase1 init step 5 (Set IOP FIR action0) */ + write_scom_for_chiplet(chip, chiplet, PEC_FIR_ACTION0_REG, + PCI_IOP_FIR_ACTION0_REG); + + /* Phase1 init step 6 (Set IOP FIR action1) */ + write_scom_for_chiplet(chip, chiplet, PEC_FIR_ACTION1_REG, + PCI_IOP_FIR_ACTION1_REG); + + /* Phase1 init step 7 (Set IOP FIR mask) */ + write_scom_for_chiplet(chip, chiplet, PEC_FIR_MASK_REG, + PCI_IOP_FIR_MASK_REG); + + /* Phase1 init step 8-11 (Config 0 - 3) */ + + for (i = 0; i < NUM_PCS_CONFIG; ++i) { + uint8_t lane; + + /* RX Config Mode */ + write_scom_for_chiplet(chip, chiplet, PEC_PCS_RX_CONFIG_MODE_REG, + pcs_config_mode[i]); + + /* RX CDR GAIN */ + scom_and_or_for_chiplet(chip, chiplet, PEC_PCS_RX_CDR_GAIN_REG, + ~PPC_BITMASK(56, 63), + pcs_cdr_gain[i]); + + for (lane = 0; lane < NUM_PCIE_LANES; ++lane) { + /* RX INITGAIN */ + scom_and_or_for_chiplet(chip, chiplet, + RX_VGA_CTRL3_REGISTER[lane], + ~PPC_BITMASK(48, 52), + PPC_PLACE(pcs_init_gain, 48, 5)); + + /* RX PKINIT */ + scom_and_or_for_chiplet(chip, chiplet, + RX_LOFF_CNTL_REGISTER[lane], + ~PPC_BITMASK(58, 63), + pcs_pk_init); + } + + /* RX SIGDET LVL */ + scom_and_or_for_chiplet(chip, chiplet, PEC_PCS_RX_SIGDET_CONTROL_REG, + ~PPC_BITMASK(59, 63), + pcs_sigdet_lvl); + } + + /* + * Phase1 init step 12 (RX Rot Cntl CDR Lookahead Disabled, SSC Disabled) + * + * All these attributes are zero for Nimbus: + * - ATTR_PROC_PCIE_PCS_RX_ROT_CDR_LOOKAHEAD (55) + * - ATTR_PROC_PCIE_PCS_RX_ROT_CDR_SSC (63) + * - ATTR_PROC_PCIE_PCS_RX_ROT_EXTEL (59) + * - ATTR_PROC_PCIE_PCS_RX_ROT_RST_FW (62) + */ + scom_and_for_chiplet(chip, chiplet, PEC_PCS_RX_ROT_CNTL_REG, + ~(PPC_BIT(55) | PPC_BIT(63) | PPC_BIT(59) | PPC_BIT(62))); + + /* Phase1 init step 13 (RX Config Mode Enable External Config Control) */ + write_scom_for_chiplet(chip, chiplet, PEC_PCS_RX_CONFIG_MODE_REG, 0x8600); + + /* Phase1 init step 14 (PCLCK Control Register - PLLA) */ + /* ATTR_PROC_PCIE_PCS_PCLCK_CNTL_PLLA = 0xF8 */ + scom_and_or_for_chiplet(chip, chiplet, PEC_PCS_PCLCK_CNTL_PLLA_REG, + ~PPC_BITMASK(56, 63), + 0xF8); + + /* Phase1 init step 15 (PCLCK Control Register - PLLB) */ + /* ATTR_PROC_PCIE_PCS_PCLCK_CNTL_PLLB = 0xF8 */ + scom_and_or_for_chiplet(chip, chiplet, PEC_PCS_PCLCK_CNTL_PLLB_REG, + ~PPC_BITMASK(56, 63), + 0xF8); + + /* Phase1 init step 16 (TX DCLCK Rotator Override) */ + /* ATTR_PROC_PCIE_PCS_TX_DCLCK_ROT = 0x0022 */ + write_scom_for_chiplet(chip, chiplet, PEC_PCS_TX_DCLCK_ROTATOR_REG, 0x0022); + + /* Phase1 init step 17 (TX PCIe Receiver Detect Control Register 1) */ + /* ATTR_PROC_PCIE_PCS_TX_PCIE_RECV_DETECT_CNTL_REG1 = 0xAA7A */ + write_scom_for_chiplet(chip, chiplet, PEC_PCS_TX_PCIE_REC_DETECT_CNTL1_REG, + 0xAA7A); + + /* Phase1 init step 18 (TX PCIe Receiver Detect Control Register 2) */ + /* ATTR_PROC_PCIE_PCS_TX_PCIE_RECV_DETECT_CNTL_REG2 = 0x2000 */ + write_scom_for_chiplet(chip, chiplet, PEC_PCS_TX_PCIE_REC_DETECT_CNTL2_REG, + 0x2000); + + /* Phase1 init step 19 (TX Power Sequence Enable) */ + /* ATTR_PROC_PCIE_PCS_TX_POWER_SEQ_ENABLE = 0xFF, but field is 7 bits */ + scom_and_or_for_chiplet(chip, chiplet, PEC_PCS_TX_POWER_SEQ_ENABLE_REG, + ~PPC_BITMASK(56, 62), + PPC_PLACE(0x7F, 56, 7)); + + /* Phase1 init step 20 (RX VGA Control Register 1) */ + + /* ATTR_PROC_PCIE_PCS_RX_VGA_CNTL_REG1 = 0 */ + val = 0; + + /* ATTR_CHIP_EC_FEATURE_HW414759 = 0, so not setting PEC_SCOM0X0B_EDMOD */ + + write_scom_for_chiplet(chip, chiplet, PEC_PCS_RX_VGA_CONTROL1_REG, val); + + /* Phase1 init step 21 (RX VGA Control Register 2) */ + /* ATTR_PROC_PCIE_PCS_RX_VGA_CNTL_REG2 = 0 */ + write_scom_for_chiplet(chip, chiplet, PEC_PCS_RX_VGA_CONTROL2_REG, 0); + + /* Phase1 init step 22 (RX DFE Func Control Register 1) */ + /* ATTR_PROC_PCIE_PCS_RX_DFE_FDDC = 1 */ + scom_or_for_chiplet(chip, chiplet, PEC_IOP_RX_DFE_FUNC_REGISTER1, PPC_BIT(50)); + + /* Phase1 init step 23 (PCS System Control) */ + /* ATTR_PROC_PCIE_PCS_SYSTEM_CNTL computed above */ + scom_and_or_for_chiplet(chip, chiplet, PEC_PCS_SYS_CONTROL_REG, + ~PPC_BITMASK(55, 63), + pec_cfgs[pec]->phb_to_pcie_mac); + + /* + * All values in ATTR_PROC_PCIE_PCS_M_CNTL are 0. + * Hostboot has bugs here in that it updates PEC_PCS_M1_CONTROL_REG + * 4 times instead of updating 4 different registers (M1-M4). + */ + + /* Phase1 init step 24 (PCS M1 Control) */ + scom_and_for_chiplet(chip, chiplet, PEC_PCS_M1_CONTROL_REG, + ~PPC_BITMASK(55, 63)); + /* Phase1 init step 25 (PCS M2 Control) */ + scom_and_for_chiplet(chip, chiplet, PEC_PCS_M2_CONTROL_REG, + ~PPC_BITMASK(55, 63)); + /* Phase1 init step 26 (PCS M3 Control) */ + scom_and_for_chiplet(chip, chiplet, PEC_PCS_M3_CONTROL_REG, + ~PPC_BITMASK(55, 63)); + /* Phase1 init step 27 (PCS M4 Control) */ + scom_and_for_chiplet(chip, chiplet, PEC_PCS_M4_CONTROL_REG, + ~PPC_BITMASK(55, 63)); + + /* Delay a minimum of 200ns to allow prior SCOM programming to take effect */ + udelay(1); + + /* Phase1 init step 28 */ + write_scom_for_chiplet(chip, chiplet, PEC_CPLT_CONF1_CLEAR, + PPC_BIT(PEC_IOP_PIPE_RESET_START_BIT)); + + /* + * Delay a minimum of 300ns for reset to complete. + * Inherent delay before deasserting PCS PIPE Reset is enough here. + */ + } +} + +void istep_10_10(uint8_t chips, struct pci_info *pci_info) +{ + report_istep(10, 10); + + for (uint8_t chip = 0; chip < MAX_CHIPS; chip++) { + const struct lane_config_row *pec_cfgs[MAX_PEC_PER_PROC] = { NULL }; + + if (!(chips & (1 << chip))) + continue; + + determine_lane_configs(chip, &pci_info[chip].phb_active_mask, pec_cfgs); + + pci_info[chip].iovalid_enable[0] = pec_cfgs[0]->phb_active >> PEC0_PHB_SHIFT; + pci_info[chip].iovalid_enable[1] = pec_cfgs[1]->phb_active >> PEC1_PHB_SHIFT; + pci_info[chip].iovalid_enable[2] = pec_cfgs[2]->phb_active >> PEC2_PHB_SHIFT; + + phase1(chip, pec_cfgs, pci_info[chip].iovalid_enable); + } +} diff --git a/src/soc/ibm/power9/istep_10_12.c b/src/soc/ibm/power9/istep_10_12.c new file mode 100644 index 00000000000..8fe939bf92e --- /dev/null +++ b/src/soc/ibm/power9/istep_10_12.c @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include + +#include +#include +#include + +#include "pci.h" + +/* PCIe only at the moment, Hostboot also updates MC and OBus chiplets too */ +static void enable_ridi(uint8_t chip) +{ + enum { + PERV_NET_CTRL0 = 0x000F0040, + PERV_NET_CTRL0_WOR = 0x000F0042, + }; + + uint8_t pec = 0; + + for (pec = 0; pec < MAX_PEC_PER_PROC; ++pec) { + chiplet_id_t chiplet = PCI0_CHIPLET_ID + pec; + + /* Getting NET_CTRL0 register value and checking its CHIPLET_ENABLE bit */ + if (read_scom_for_chiplet(chip, chiplet, PERV_NET_CTRL0) & PPC_BIT(0)) { + /* Enable Receivers, Drivers DI1 & DI2 */ + uint64_t val = 0; + val |= PPC_BIT(19); // NET_CTRL0.RI_N = 1 + val |= PPC_BIT(20); // NET_CTRL0.DI1_N = 1 + val |= PPC_BIT(21); // NET_CTRL0.DI2_N = 1 + write_scom_for_chiplet(chip, chiplet, PERV_NET_CTRL0_WOR, val); + } + } +} + +void istep_10_12(uint8_t chips) +{ + report_istep(10, 12); + + for (uint8_t chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) + enable_ridi(chip); + } +} diff --git a/src/soc/ibm/power9/istep_10_13.c b/src/soc/ibm/power9/istep_10_13.c new file mode 100644 index 00000000000..e961697ace7 --- /dev/null +++ b/src/soc/ibm/power9/istep_10_13.c @@ -0,0 +1,97 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include + +#include +#include +#include +#include + +/* + * 10.13 host_rng_bist: Trigger Built In Self Test for RNG + * + * a) p9_rng_init_phase1.C + * - Trigger the Random Number Generator Built In Self Test (BIST). Results + * are checked later in step 16 when RNG is secured + */ + +static void host_rng_bist(uint8_t chip) +{ + /* Assume DD2.0 or newer */ + + /* PU_NX_RNG_CFG + [44] COND_STARTUP_TEST_FAIL + */ + if (read_scom_for_chiplet(chip, N0_CHIPLET_ID, 0x020110E0) & PPC_BIT(44)) + die("RNG Conditioner startup test failed\n"); + + /* PU_NX_RNG_ST0 + [0-1] REPTEST_MATCH_TH = 0x1 (3 repeated numbers) + [7-8] ADAPTEST_SAMPLE_SIZE = 0x2 (8b wide sample) + [9-11] ADAPTEST_WINDOW_SIZE = 0x1 (512 size) + [12-23] ADAPTEST_RRN_RNG0_MATCH_TH = 0x32 (50; Assuming H = 5) + [24-35] ADAPTEST_RRN_RNG1_MATCH_TH = 0x32 (50; Assuming H = 5) + [36-47] ADAPTEST_CRN_RNG0_MATCH_TH = 0x32 (50; Assuming H = 5) + [48-59] ADAPTEST_CRN_RNG1_MATCH_TH = 0x32 (50; Assuming H = 5) + */ + scom_and_or_for_chiplet(chip, N0_CHIPLET_ID, 0x020110E1, + ~(PPC_BITMASK(0, 1) | PPC_BITMASK(7, 63)), + PPC_PLACE(1, 0, 2) | PPC_PLACE(2, 7, 2) | PPC_PLACE(1, 9, 3) + | PPC_PLACE(0x32, 12, 12) | PPC_PLACE(0x32, 24, 12) + | PPC_PLACE(0x32, 36, 12) | PPC_PLACE(0x32, 48, 12)); + + /* PU_NX_RNG_ST1 + [0-6] ADAPTEST_SOFT_FAIL_TH = 2 + [7-22] ADAPTEST_1BIT_MATCH_TH_MIN = 100 + [23-38] ADAPTEST_1BIT_MATCH_TH_MAX = 415 + */ + scom_and_or_for_chiplet(chip, N0_CHIPLET_ID, 0x020110E2, ~PPC_BITMASK(0, 38), + PPC_PLACE(2, 0, 7) | PPC_PLACE(100, 7, 16) + | PPC_PLACE(415, 23, 16)); + + /* PU_NX_RNG_ST3 + [0] SAMPTEST_RRN_ENABLE = 1 + [1-3] SAMPTEST_WINDOW_SIZE = 7 (64k -1 size) + [4-19] SAMPTEST_MATCH_TH_MIN = 0x6D60 (28,000) + [20-35] SAMPTEST_MATCH_TH_MAX = 0x988A (39,050) + */ + scom_and_or_for_chiplet(chip, N0_CHIPLET_ID, 0x020110E8, ~PPC_BITMASK(0, 35), + PPC_BIT(0) | PPC_PLACE(7, 1, 3) | PPC_PLACE(0x6D60, 4, 16) + | PPC_PLACE(0x988A, 20, 16)); + + /* PU_NX_RNG_RDELAY + [6] LFSR_RESEED_EN = 1 + [7-11] READ_RTY_RATIO = 0x1D (1/16) + */ + scom_and_or_for_chiplet(chip, N0_CHIPLET_ID, 0x020110E5, ~PPC_BITMASK(6, 11), + PPC_BIT(6) | PPC_PLACE(0x1D, 7, 5)); + + /* PU_NX_RNG_CFG + [30-37] ST2_RESET_PERIOD = 0x1B + [39] MASK_TOGGLE_ENABLE = 0 + [40] SAMPTEST_ENABLE = 1 + [41] REPTEST_ENABLE = 1 + [42] ADAPTEST_1BIT_ENABLE = 1 + [43] ADAPTEST_ENABLE = 1 + [46-61] PACE_RATE = 0x07D0 (2000) + [63] ENABLE = 1 + */ + scom_and_or_for_chiplet(chip, N0_CHIPLET_ID, 0x020110E0, + ~(PPC_BITMASK(30, 37) | PPC_BITMASK(39, 43) + | PPC_BITMASK(46, 61) | PPC_BIT(63)), + PPC_PLACE(0x1B, 30, 8) | PPC_BIT(40) | PPC_BIT(41) + | PPC_BIT(42) | PPC_BIT(43) | PPC_PLACE(0x07D0, 46, 16) + | PPC_BIT(63)); +} + +void istep_10_13(uint8_t chips) +{ + uint8_t chip; + + report_istep(10, 13); + + for (chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) + host_rng_bist(chip); + } +} diff --git a/src/soc/ibm/power9/istep_10_6.c b/src/soc/ibm/power9/istep_10_6.c new file mode 100644 index 00000000000..d67e33f6e5b --- /dev/null +++ b/src/soc/ibm/power9/istep_10_6.c @@ -0,0 +1,421 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include + +#include +#include +#include +#include +#include + +static void mcs_scom(uint8_t chip, chiplet_id_t chiplet) +{ + uint64_t data; + + { + data = read_scom_for_chiplet(chip, chiplet, 0x5010810); + + PPC_INSERT(data, 25, 32, 7); + PPC_INSERT(data, 0x7, 46, 4); + PPC_INSERT(data, 0xF, 55, 6); + /* MC01_PBI01_SCOMFIR_MCPERF1_ENABLE_PF_DROP_CMDLIST_ON */ + data |= PPC_BIT(61); + PPC_INSERT(data, 0x0, 62, 1); + /* MC01_PBI01_SCOMFIR_MCPERF1_ENABLE_PREFETCH_PROMOTE_ON */ + data |= PPC_BIT(63); + + write_scom_for_chiplet(chip, chiplet, 0x5010810, data); + } + + { + data = read_scom_for_chiplet(chip, chiplet, 0x5010811); + + /* MC01_PBI01_SCOMFIR_MCMODE0_ENABLE_CENTAUR_SYNC_ON */ + data |= PPC_BIT(20); + /* MC01_PBI01_SCOMFIR_MCMODE0_ENABLE_64_128B_READ_ON */ + data |= PPC_BIT(9); + /* MC01_PBI01_SCOMFIR_MCMODE0_ENABLE_DROP_FP_DYN64_ACTIVE_ON */ + data |= PPC_BIT(8); + /* MC01_PBI01_SCOMFIR_MCMODE0_CENTAURP_ENABLE_ECRESP_OFF */ + data &= ~PPC_BIT(7); + /* MC01_PBI01_SCOMFIR_MCMODE0_DISABLE_MC_SYNC_ON */ + data |= PPC_BIT(27); + /* MC01_PBI01_SCOMFIR_MCMODE0_DISABLE_MC_PAIR_SYNC_ON */ + data |= PPC_BIT(28); + /* MC01_PBI01_SCOMFIR_MCMODE0_FORCE_COMMANDLIST_VALID_ON */ + data |= PPC_BIT(17); + + write_scom_for_chiplet(chip, chiplet, 0x5010811, data); + } + { + data = read_scom_for_chiplet(chip, chiplet, 0x5010812); + + /* MC01_PBI01_SCOMFIR_MCMODE1_DISABLE_FP_M_BIT_ON */ + data |= PPC_BIT(10); + PPC_INSERT(data, 0x40, 33, 19); + + write_scom_for_chiplet(chip, chiplet, 0x5010812, data); + } + { + data = read_scom_for_chiplet(chip, chiplet, 0x5010813); + PPC_INSERT(data, 0x8, 24, 16); + write_scom_for_chiplet(chip, chiplet, 0x5010813, data); + } + { + data = read_scom_for_chiplet(chip, chiplet, 0x501081B); + + /* MC01_PBI01_SCOMFIR_MCTO_SELECT_PB_HANG_PULSE_ON */ + data |= PPC_BIT(0); + /* MC01_PBI01_SCOMFIR_MCTO_SELECT_LOCAL_HANG_PULSE_OFF */ + data &= ~PPC_BIT(1); + /* MC01_PBI01_SCOMFIR_MCTO_ENABLE_NONMIRROR_HANG_ON */ + data |= PPC_BIT(32); + /* MC01_PBI01_SCOMFIR_MCTO_ENABLE_APO_HANG_ON */ + data |= PPC_BIT(34); + PPC_INSERT(data, 0x1, 2, 2); + PPC_INSERT(data, 0x1, 24, 8); + PPC_INSERT(data, 0x7, 5, 3); + + write_scom_for_chiplet(chip, chiplet, 0x501081B, data); + } +} + +static void fbc_ioo_tl_scom(uint8_t chip) +{ + uint64_t data; + + /* PB_IOO_SCOM_A0_MODE_BLOCKED */ + scom_or(chip, 0x501380A, PPC_BIT(20) | PPC_BIT(25) | PPC_BIT(52) | PPC_BIT(57)); + + /* PB_IOO_SCOM_A1_MODE_BLOCKED */ + scom_or(chip, 0x501380B, PPC_BIT(20) | PPC_BIT(25) | PPC_BIT(52) | PPC_BIT(57)); + + /* PB_IOO_SCOM_A2_MODE_BLOCKED */ + scom_or(chip, 0x501380C, PPC_BIT(20) | PPC_BIT(25) | PPC_BIT(52) | PPC_BIT(57)); + + /* PB_IOO_SCOM_A3_MODE_BLOCKED */ + scom_or(chip, 0x501380D, PPC_BIT(20) | PPC_BIT(25) | PPC_BIT(52) | PPC_BIT(57)); + + /* 0x5013810, 0x5013811, 0x5013812 and 0x5013813 are not modified */ + + data = read_scom(chip, 0x5013823); + + data &= ~PPC_BIT(0); // PB_IOO_SCOM_PB_CFG_IOO01_IS_LOGICAL_PAIR_OFF + data &= ~PPC_BIT(1); // PB_IOO_SCOM_PB_CFG_IOO23_IS_LOGICAL_PAIR_OFF + data &= ~PPC_BIT(2); // PB_IOO_SCOM_PB_CFG_IOO45_IS_LOGICAL_PAIR_OFF + data &= ~PPC_BIT(3); // PB_IOO_SCOM_PB_CFG_IOO67_IS_LOGICAL_PAIR_OFF + data &= ~PPC_BIT(8); // PB_IOO_SCOM_LINKS01_TOD_ENABLE_OFF + data &= ~PPC_BIT(9); // PB_IOO_SCOM_LINKS23_TOD_ENABLE_OFF + data &= ~PPC_BIT(10); // PB_IOO_SCOM_LINKS45_TOD_ENABLE_OFF + data &= ~PPC_BIT(11); // PB_IOO_SCOM_LINKS67_TOD_ENABLE_OFF + + write_scom(chip, 0x5013823, data); + + /* 0x5013824 is not modified */ +} + +static void nx_scom(uint8_t chip, uint8_t dd) +{ + uint64_t data; + + { + data = read_scom(chip, 0x2011041); + + data |= PPC_BIT(63); // NX_DMA_CH0_EFT_ENABLE_ON + data |= PPC_BIT(62); // NX_DMA_CH1_EFT_ENABLE_ON + data |= PPC_BIT(58); // NX_DMA_CH2_SYM_ENABLE_ON + data |= PPC_BIT(57); // NX_DMA_CH3_SYM_ENABLE_ON + data |= PPC_BIT(61); // NX_DMA_CH4_GZIP_ENABLE_ON + + write_scom(chip, 0x2011041, data); + } + { + data = read_scom(chip, 0x2011042); + + PPC_INSERT(data, 0xF, 8, 4); // NX_DMA_GZIPCOMP_MAX_INRD_MAX_15_INRD + PPC_INSERT(data, 0xF, 12, 4); // NX_DMA_GZIPDECOMP_MAX_INRD_MAX_15_INRD + PPC_INSERT(data, 0x3, 25, 4); // NX_DMA_SYM_MAX_INRD_MAX_3_INRD + PPC_INSERT(data, 0xF, 33, 4); // NX_DMA_EFTCOMP_MAX_INRD_MAX_15_INRD = 0xf; + PPC_INSERT(data, 0xF, 37, 4); // NX_DMA_EFTDECOMP_MAX_INRD_MAX_15_INRD + + data |= PPC_BIT(23); // NX_DMA_EFT_COMP_PREFETCH_ENABLE_ON + data |= PPC_BIT(24); // NX_DMA_EFT_DECOMP_PREFETCH_ENABLE_ON + data |= PPC_BIT(16); // NX_DMA_GZIP_COMP_PREFETCH_ENABLE_ON + data |= PPC_BIT(17); // NX_DMA_GZIP_DECOMP_PREFETCH_ENABLE_ON + data &= ~PPC_BIT(56); // NX_DMA_EFT_SPBC_WRITE_ENABLE_OFF + + write_scom(chip, 0x2011042, data); + } + { + data = read_scom(chip, 0x201105C); + + PPC_INSERT(data, 0x9, 1, 4); // NX_DMA_CH0_WATCHDOG_REF_DIV_DIVIDE_BY_512 + PPC_INSERT(data, 0x9, 6, 4); // NX_DMA_CH1_WATCHDOG_REF_DIV_DIVIDE_BY_512 + PPC_INSERT(data, 0x9, 11, 4); // NX_DMA_CH2_WATCHDOG_REF_DIV_DIVIDE_BY_512 + PPC_INSERT(data, 0x9, 16, 4); // NX_DMA_CH3_WATCHDOG_REF_DIV_DIVIDE_BY_512 + PPC_INSERT(data, 0x9, 21, 4); // NX_DMA_CH4_WATCHDOG_REF_DIV_DIVIDE_BY_512 + PPC_INSERT(data, 0x8, 26, 4); // NX_DMA_DMA_HANG_TIMER_REF_DIV_DIVIDE_BY_1024 + + data |= PPC_BIT(0); // NX_DMA_CH0_WATCHDOG_TIMER_ENBL_ON + data |= PPC_BIT(5); // NX_DMA_CH1_WATCHDOG_TIMER_ENBL_ON + data |= PPC_BIT(10); // NX_DMA_CH2_WATCHDOG_TIMER_ENBL_ON + data |= PPC_BIT(15); // NX_DMA_CH3_WATCHDOG_TIMER_ENBL_ON + data |= PPC_BIT(20); // NX_DMA_CH4_WATCHDOG_TIMER_ENBL_ON + data |= PPC_BIT(25); // NX_DMA_DMA_HANG_TIMER_ENBL_ON + + write_scom(chip, 0x201105C, data); + } + { + data = read_scom(chip, 0x2011087); + + data &= ~0x93EFDFFF3FF00000; + data |= 0x48102000C0000000; + + if (dd == 0x20) + data &= ~0x2400000000000000; + else + data |= 0x2400000000000000; + + write_scom(chip, 0x2011087, data); + } + { + data = read_scom(chip, 0x2011095); + + data |= PPC_BIT(24); // NX_PBI_CQ_WRAP_NXCQ_SCOM_SKIP_G_ON + data |= PPC_BIT(1); // NX_PBI_CQ_WRAP_NXCQ_SCOM_DMA_WR_DISABLE_GROUP_ON + data |= PPC_BIT(5); // NX_PBI_CQ_WRAP_NXCQ_SCOM_DMA_RD_DISABLE_GROUP_ON + data |= PPC_BIT(9); // NX_PBI_CQ_WRAP_NXCQ_SCOM_UMAC_WR_DISABLE_GROUP_ON + data |= PPC_BIT(13); // NX_PBI_CQ_WRAP_NXCQ_SCOM_UMAC_RD_DISABLE_GROUP_ON + data |= PPC_BIT(2); // NX_PBI_CQ_WRAP_NXCQ_SCOM_DMA_WR_DISABLE_VG_NOT_SYS_ON + data |= PPC_BIT(6); // NX_PBI_CQ_WRAP_NXCQ_SCOM_DMA_RD_DISABLE_VG_NOT_SYS_ON + data |= PPC_BIT(10); // NX_PBI_CQ_WRAP_NXCQ_SCOM_UMAC_WR_DISABLE_VG_NOT_SYS_ON + data |= PPC_BIT(14); // NX_PBI_CQ_WRAP_NXCQ_SCOM_UMAC_RD_DISABLE_VG_NOT_SYS_ON + data |= PPC_BIT(22); // NX_PBI_CQ_WRAP_NXCQ_SCOM_RD_GO_M_QOS_ON + data &= ~PPC_BIT(23); // NX_PBI_CQ_WRAP_NXCQ_SCOM_ADDR_BAR_MODE_OFF + + PPC_INSERT(data, 0x0, 56, 4); // TGT1_ATTR_FABRIC_ADDR_EXTENSION_GROUP_ID + PPC_INSERT(data, 0x0, 60, 3); // TGT1_ATTR_FABRIC_ADDR_EXTENSION_CHIP_ID + PPC_INSERT(data, 0x1, 25, 2); + PPC_INSERT(data, 0xFC, 40, 8); + PPC_INSERT(data, 0xFC, 48, 8); + + write_scom(chip, 0x2011095, data); + } + { + data = read_scom(chip, 0x20110D6); + + PPC_INSERT(data, 0x2, 9, 3); + data |= PPC_BIT(6); // NX_PBI_DISABLE_PROMOTE_ON + + write_scom(chip, 0x20110D6, data); + } + { + data = read_scom(chip, 0x2011107); + + data &= ~0xF0839FFFC2FFC000; + data |= 0x0A7400003D000000; + + if (dd == 0x20) + data &= ~0x0508600000000000; + else + data |= 0x0508600000000000; + + write_scom(chip, 0x2011107, data); + } + + scom_and_or(chip, 0x2011083, ~0xEEF8FF9CFD000000, 0x1107006302F00000); + scom_and(chip, 0x2011086, ~0xFFFFFFFFFFF00000); + scom_and_or(chip, 0x20110A8, ~0x0FFFF00000000000, 0x0888800000000000); + scom_and_or(chip, 0x20110C3, ~0x0000001F00000000, 0x0000000080000000); + scom_and_or(chip, 0x20110C4, ~PPC_BITMASK(27, 35), PPC_PLACE(0x8, 27, 9)); + scom_and_or(chip, 0x20110C5, ~PPC_BITMASK(27, 35), PPC_PLACE(0x8, 27, 9)); + scom_or(chip, 0x20110D5, PPC_BIT(1)); // NX_PBI_PBI_UMAC_CRB_READS_ENBL_ON + scom_and_or(chip, 0x2011103, ~0xCF7DEF81BF003000, 0x3082107E40FFC000); + scom_and(chip, 0x2011106, ~0xFFFFFFFFFFFFC000); +} + +static void cxa_scom(uint8_t chip, uint8_t dd) +{ + uint64_t data; + + data = read_scom(chip, 0x2010803); + data &= ~PPC_BITMASK(0, 52); + data |= (dd == 0x20 ? 0x801B1F98C8717000 : 0x801B1F98D8717000); + write_scom(chip, 0x2010803, data); + + data = read_scom(chip, 0x2010818); + data &= ~PPC_BIT(1); // CAPP0_CXA_TOP_CXA_APC0_APCCTL_ADR_BAR_MODE_OFF + data |= PPC_BIT(6); // CAPP0_CXA_TOP_CXA_APC0_APCCTL_SKIP_G_ON + data &= ~PPC_BITMASK(21, 24); // ATTR_FABRIC_ADDR_EXTENSION_GROUP_ID + data &= ~PPC_BITMASK(25, 27); // ATTR_FABRIC_ADDR_EXTENSION_CHIP_ID + data |= PPC_BIT(4); // CAPP0_CXA_TOP_CXA_APC0_APCCTL_DISABLE_G_ON + data |= PPC_BIT(3); // CAPP0_CXA_TOP_CXA_APC0_APCCTL_DISABLE_VG_NOT_SYS_ON + write_scom(chip, 0x2010818, data); + + scom_and(chip, 0x2010806, ~PPC_BITMASK(0, 52)); + scom_or(chip, 0x2010807, PPC_BIT(2) | PPC_BIT(8) | PPC_BIT(34) | PPC_BIT(44)); + scom_and(chip, 0x2010819, ~PPC_BITMASK(4, 7)); + scom_and_or(chip, 0x201081B, + ~PPC_BITMASK(45, 51), PPC_PLACE(0x7, 45, 3) | PPC_PLACE(0x2, 48, 4)); + scom_and_or(chip, 0x201081C, ~PPC_BITMASK(18, 21), PPC_PLACE(0x1, 18, 4)); +} + +static void int_scom(uint8_t chip, uint8_t dd) +{ + /* + * [0] = 0 + * [1] = 1 + * [5-8] ATTR_FABRIC_ADDR_EXTENSION_GROUP_ID + * [9-11] ATTR_FABRIC_ADDR_EXTENSION_CHIP_ID + */ + scom_and_or(chip, 0x501300A, ~(PPC_BITMASK(0, 1) | PPC_BITMASK(5, 11)), PPC_BIT(1)); + + scom_or(chip, 0x5013021, + PPC_BIT(46) | // INT_CQ_PBO_CTL_DISABLE_VG_NOT_SYS_ON + PPC_BIT(47) | // INT_CQ_PBO_CTL_DISABLE_G_ON + PPC_BIT(49)); + + if (dd <= 0x20) + write_scom(chip, 0x5013033, 0x2000005C040281C3); + else + write_scom(chip, 0x5013033, 0x0000005C040081C3); + + write_scom(chip, 0x5013036, 0); + write_scom(chip, 0x5013037, 0x9554021F80110E0C); + + scom_and_or(chip, 0x5013130, + ~(PPC_BITMASK(2, 7) | PPC_BITMASK(10, 15)), + PPC_PLACE(0x18, 2, 6) | PPC_PLACE(0x18, 10, 6)); + + write_scom(chip, 0x5013140, 0x050043EF00100020); + write_scom(chip, 0x5013141, 0xFADFBB8CFFAFFFD7); + write_scom(chip, 0x5013178, 0x0002000610000000); + + scom_and_or(chip, 0x501320E, ~PPC_BITMASK(0, 47), PPC_PLACE(0x626222024216, 0, 48)); + scom_and_or(chip, 0x5013214, ~PPC_BITMASK(16, 31), PPC_PLACE(0x5BBF, 16, 16)); + scom_and_or(chip, 0x501322B, ~PPC_BITMASK(58, 63), PPC_PLACE(0x18, 58, 6)); + + if (dd == 0x20) { + scom_and_or(chip, 0x5013272, + ~PPC_BITMASK(0, 43), PPC_PLACE(0x0002C018006, 0, 44)); + scom_and_or(chip, 0x5013273, + ~PPC_BITMASK(0, 43), PPC_PLACE(0xFFFCFFEFFFA, 0, 44)); + } +} + +static void vas_scom(uint8_t chip, uint8_t dd) +{ + uint64_t data; + + scom_and_or(chip, 0x3011803, ~PPC_BITMASK(0, 53), 0x00210102540D7C00); + scom_and(chip, 0x3011806, ~PPC_BITMASK(0, 53)); + + data = read_scom(chip, 0x3011807); + data &= ~PPC_BITMASK(0, 53); + data |= (dd == 0x20 ? 0x00DD020180000000 : 0x00DF020180000000); + write_scom(chip, 0x3011807, data); + + /* + * [0-3] ATTR_FABRIC_ADDR_EXTENSION_GROUP_ID + * [4-6] ATTR_FABRIC_ADDR_EXTENSION_CHIP_ID + */ + scom_and(chip, 0x301184D, ~PPC_BITMASK(0, 6)); + + data = read_scom(chip, 0x301184E); + data &= ~PPC_BIT(13); // SOUTH_VA_EG_SCF_ADDR_BAR_MODE_OFF + data |= PPC_BIT(14); // SOUTH_VA_EG_SCF_SKIP_G_ON + data |= PPC_BIT(1); // SOUTH_VA_EG_SCF_DISABLE_G_WR_ON + data |= PPC_BIT(5); // SOUTH_VA_EG_SCF_DISABLE_G_RD_ON + data |= PPC_BIT(2); // SOUTH_VA_EG_SCF_DISABLE_VG_WR_ON + data |= PPC_BIT(6); // SOUTH_VA_EG_SCF_DISABLE_VG_RD_ON + PPC_INSERT(data, 0xFC, 20, 8); + PPC_INSERT(data, 0xFC, 28, 8); + write_scom(chip, 0x301184E, data); + + if (dd == 0x20) + scom_or(chip, 0x301184F, PPC_BIT(0)); +} + +static void chiplet_scominit(uint8_t chip, uint8_t dd) +{ + enum { + PU_PB_CENT_SM0_PB_CENT_FIR_REG = 0x05011C00, + PU_PB_CENT_SM0_PB_CENT_FIR_MASK_REG_SPARE_13 = 13, + + PU_PB_IOE_FIR_MASK_REG_OR = 0x05013405, + PU_PB_CENT_SM1_EXTFIR_MASK_REG_OR = 0x05011C33, + + FBC_IOE_TL_FIR_MASK_X0_NF = 0x00C00C0C00000880, + FBC_IOE_TL_FIR_MASK_X2_NF = 0x000300C0C0000220, + FBC_EXT_FIR_MASK_X0_NF = 0x8000000000000000, + FBC_EXT_FIR_MASK_X1_NF = 0x4000000000000000, + FBC_EXT_FIR_MASK_X2_NF = 0x2000000000000000, + + PU_NMMU_MM_EPSILON_COUNTER_VALUE = 0x5012C1D, + }; + + const struct powerbus_cfg *pb_cfg = powerbus_cfg(chip); + + int mcs_i; + + for (mcs_i = 0; mcs_i < MCS_PER_PROC; mcs_i++) + mcs_scom(chip, mcs_to_nest[mcs_ids[mcs_i]]); + + /* + * Read spare FBC FIR bit -- if set, SBE has configured XBUS FIR resources for all + * present units, and code here will be run to mask resources associated with + * non-functional units. + */ + if (read_scom(chip, PU_PB_CENT_SM0_PB_CENT_FIR_REG) & + PPC_BIT(PU_PB_CENT_SM0_PB_CENT_FIR_MASK_REG_SPARE_13)) { + /* Masking XBUS FIR resources for unused links */ + + /* XBUS0 FBC TL */ + write_scom(chip, PU_PB_IOE_FIR_MASK_REG_OR, FBC_IOE_TL_FIR_MASK_X0_NF); + /* XBUS0 EXTFIR */ + write_scom(chip, PU_PB_CENT_SM1_EXTFIR_MASK_REG_OR, FBC_EXT_FIR_MASK_X0_NF); + + /* XBUS2 FBC TL */ + write_scom(chip, PU_PB_IOE_FIR_MASK_REG_OR, FBC_IOE_TL_FIR_MASK_X2_NF); + /* XBUS2 EXTFIR */ + write_scom(chip, PU_PB_CENT_SM1_EXTFIR_MASK_REG_OR, FBC_EXT_FIR_MASK_X2_NF); + } + + fbc_ioo_tl_scom(chip); + nx_scom(chip, dd); + cxa_scom(chip, dd); // CAPP + int_scom(chip, dd); + vas_scom(chip, dd); + + /* Setup NMMU epsilon write cycles */ + scom_and_or(chip, PU_NMMU_MM_EPSILON_COUNTER_VALUE, + ~(PPC_BITMASK(0, 11) | PPC_BITMASK(16, 27)), + PPC_PLACE(pb_cfg->eps_w[0], 0, 12) | PPC_PLACE(pb_cfg->eps_w[1], 16, 12)); +} + +static void psi_scom(uint8_t chip) +{ + scom_or(chip, 0x4011803, PPC_BITMASK(0, 6)); + scom_and(chip, 0x4011806, ~PPC_BITMASK(0, 6)); + scom_and(chip, 0x4011807, ~PPC_BITMASK(0, 6)); + + scom_and_or(chip, 0x5012903, ~PPC_BITMASK(0, 28), PPC_PLACE(0x7E040DF, 0, 29)); + scom_and_or(chip, 0x5012906, ~PPC_BITMASK(0, 28), PPC_PLACE(0x0, 0, 29)); + scom_and_or(chip, 0x5012907, ~PPC_BITMASK(0, 28), PPC_PLACE(0x18050020, 0, 29)); + + scom_and(chip, 0x501290F, ~(PPC_BITMASK(16, 27) | PPC_BITMASK(48, 52))); +} + +void istep_10_6(uint8_t chips) +{ + uint8_t dd = get_dd(); // XXX: this should probably be chip-specific + + report_istep(10, 6); + + for (uint8_t chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) { + chiplet_scominit(chip, dd); + psi_scom(chip); + } + } +} diff --git a/src/soc/ibm/power9/istep_13_10.c b/src/soc/ibm/power9/istep_13_10.c new file mode 100644 index 00000000000..af9d6e0d0fa --- /dev/null +++ b/src/soc/ibm/power9/istep_13_10.c @@ -0,0 +1,539 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include +#include +#include +#include + +#include "istep_13_scom.h" + +/* + * 13.10 mss_draminit: Dram initialize + * + * a) p9_mss_draminit.C (mcbist) -- Nimbus + * b) p9c_mss_draminit.C (mba) -- Cumulus + * - RCD parity errors are checked before logging other errors - HWP will + * exit with RC + * - De-assert dram reset + * - De-assert bit (Scom) that forces mem clock low - dram clocks start + * - Raise CKE + * - Load RCD Control Words + * - Load MRS - for each dimm pair/ports/rank + * - ODT Values + * - MR0-MR6 + * c) Check for attentions (even if HWP has error) + * - FW + * - Call PRD + * - If finds and error, commit HWP RC as informational + * - Else commit HWP RC as normal + * - Trigger reconfig loop is anything was deconfigured + */ + +static void draminit_cke_helper(uint8_t chip, chiplet_id_t id, int mca_i) +{ + /* + * Hostboot stops CCS before sending new programs. I'm not sure it is wise + * to do, unless there are infinite loops. Don't do and see what happens. + MC01.MCBIST.MBA_SCOMFIR.CCS_CNTLQ // 0x070123A5 + [all] 0 + [1] CCS_CNTLQ_CCS_STOP = 1 + timeout(50*10ns): + if MC01.MCBIST.MBA_SCOMFIR.CCS_STATQ[0] (CCS_STATQ_CCS_IP) != 1: break // 0x070123A6 + delay(10ns) + */ + + ccs_add_instruction(chip, id, 0, 0xF, 0xF, 400); + ccs_execute(chip, id, mca_i); +} + +static void rcd_load(uint8_t chip, mca_data_t *mca, int d) +{ + uint8_t val; + rdimm_data_t *dimm = &mca->dimm[d]; + uint8_t *spd = dimm->spd; + unsigned int spd_bus = SPD_I2C_BUS + chip * I2C_BUSES_PER_CPU; + + /* Raw card specifications are JEDEC documents MODULE4.20.28.x, where x is A-E */ + + /* + F0RC00 = 0x0 // Depends on reference raw card used, sometimes 0x2 (ref. A, B, C and custom?) + // Seems that 'custom' is used for > C, which means 0x2 is always set. + F0RC01 = 0x0 // Depends on reference raw card used, sometimes 0xC (ref. C?). + // JESD82-31: "The system must read the module SPD to determine + // which clock outputs are used by the module". R/C C and D use + // only Y0-Y1, other R/C use all 4 signals. + */ + /* + * F0RC01 is effectively based on dimm->mranks, but maybe future reference R/C + * will use different clocks than Y0-Y1, which technically is possible... + * + * (spd[131] & 0x1F) is 0x02 for C and 0x03 for D, this line tests for both + */ + val = ((spd[131] & 0x1E) == 0x02) ? 0xC2 : 0x02; + rcd_write_reg(spd_bus, mca->dimm[d].rcd_i2c_addr, F0RC00_01, val); + + /* + F0RC02 = + [0] = 1 if(!(16Gb density && x4 width)) // disable A17? // Why not use SPD[5]? + // Hostboot waits for tSTAB, however it is not necessary as long as bit 3 is not changed. + F0RC03 = + [0-1] SPD[137][4-5] // Address/Command drive strength + [2-3] SPD[137][6-7] // CS drive strength + // There is also a workaround for NVDIMM hybrids, not needed for plain RDIMM + */ + val = spd[137] & 0xF0; // F0RC03 + if (dimm->density != DENSITY_16Gb || dimm->width != WIDTH_x4) + val |= 1; + rcd_write_reg(spd_bus, mca->dimm[d].rcd_i2c_addr, F0RC02_03, val); + + /* + F0RC04 = + // BUG? Hostboot reverses bitfields order for RC04, 05 + [0-1] SPD[137][2-3] // ODT drive strength + [2-3] SPD[137][0-1] // CKE drive strength + // There is also a workaround for NVDIMM hybrids, not needed for plain RDIMM + F0RC05 = + [0-1] SPD[138][2-3] // Clocks drive strength, A side (1,3) + [2-3] SPD[138][0-1] // Clocks drive strength, B side (0,2) + // There is also a workaround for NVDIMM hybrids, not needed for plain RDIMM + */ + /* First read both nibbles as they are in SPD, then swap pairs of bit fields */ + val = (spd[137] & 0x0F) | ((spd[138] & 0x0F) << 4); + val = ((val & 0x33) << 2) | ((val & 0xCC) >> 2); + rcd_write_reg(spd_bus, mca->dimm[d].rcd_i2c_addr, F0RC04_05, val); + + /* + F0RC06 = 0xf // This is a command register, either don't touch it or use NOP (F) + F0RC07 = 0x0 // This is a command register, either don't touch it or use NOP (0) + */ + + /* + F0RC08 = + [0-1] = + 1 if master ranks == 4 (SPD[12]) // C0 and C1 enabled + 3 if not 3DS (check SPD[6] and SPD[10]) // all disabled + 2 if slave ranks <= 2 // C0 enabled + 1 if slave ranks <= 4 // C0 and C1 enabled + 0 otherwise (3DS with 5-8 slave ranks) // C0, C1 and C2 enabled + [3] = 1 if(!(16Gb density && x4 width)) // disable A17? // Why not use SPD[5]? + F0RC09 = + [2] = + // TODO: add test for it, write 1 for now + 0 if this DIMM's ODTs are used for writes or reads that target the other DIMM on the same port + 1 otherwise + [3] = 1 // Register CKE Power Down. CKE must be high at the moment of writing to this register and stay high. + // TODO: For how long? Indefinitely, tMRD, tInDIS, tFixedOutput or anything else? + */ + /* Assume no 4R */ + val = (dimm->mranks == dimm->log_ranks) ? 3 : + (2 - (dimm->log_ranks / dimm->mranks) / 4); + if (dimm->density != DENSITY_16Gb || dimm->width != WIDTH_x4) + val |= 8; + val |= 0xC0; + rcd_write_reg(spd_bus, mca->dimm[d].rcd_i2c_addr, F0RC08_09, val); + + /* + F0RC0A = + [0-2] = // There are other valid values not used by Hostboot + 1 if 1866 MT/s + 2 if 2133 MT/s + 3 if 2400 MT/s + 4 if 2666 MT/s + F0RC0B = 0xe // External VrefCA connected to QVrefCA and BVrefCA + */ + val = mem_data[chip].speed == 1866 ? 1 : + mem_data[chip].speed == 2133 ? 2 : + mem_data[chip].speed == 2400 ? 3 : 4; + val |= 0xE0; + rcd_write_reg(spd_bus, mca->dimm[d].rcd_i2c_addr, F0RC0A_0B, val); + + /* + F0RC0C = 0 // Normal operating mode + F0RC0D = + [0-1] = // CS mode + 3 if master ranks == 4 (SPD[12]) // encoded QuadCS + 0 otherwise // direct DualCS + [2] = 1 // RDIMM + [3] = SPD[136] // Address mirroring for MRS commands + */ + /* Assume RDIMM and that there are no 4R configurations, add when needed */ + val = 0x40; + if (spd[136]) + val |= 0x80; + rcd_write_reg(spd_bus, mca->dimm[d].rcd_i2c_addr, F0RC0C_0D, val); + + /* + F0RC0E = 0xd // Parity enable, ALERT_n assertion and re-enable + F0RC0F = 0 // Normal mode + */ + val = 0x0D; + rcd_write_reg(spd_bus, mca->dimm[d].rcd_i2c_addr, F0RC0E_0F, val); + + /* + F0RC1x = 0 // Normal mode, VDD/2 + F0RC2x = 0 // Normal mode, all I2C accesses enabled + */ + + /* + F0RC3x = + 0x1f if 1866 MT/s + 0x2c if 2133 MT/s + 0x39 if 2400 MT/s + 0x47 if 2666 MT/s + */ + val = mem_data[chip].speed == 1866 ? 0x1F : + mem_data[chip].speed == 2133 ? 0x2C : + mem_data[chip].speed == 2400 ? 0x39 : 0x47; + rcd_write_reg(spd_bus, mca->dimm[d].rcd_i2c_addr, F0RC3x, val); + + /* + F0RC4x = 0 // Should not be touched at all, it is used to access different function spaces + F0RC5x = 0 // Should not be touched at all, it is used to access different function spaces + F0RC6x = 0 // Should not be touched at all, it is used to access different function spaces + F0RC7x = 0 // Value comes from VPD, 0 is default, it doesn't seem to be changed anywhere in the code... + F0RC8x = 0 // Default QxODT timing for reads and for writes + F0RC9x = 0 // QxODT not asserted during writes, all ranks + F0RCAx = 0 // QxODT not asserted during reads, all ranks + */ + + /* + F0RCBx = + [0-2] = // Note that only the first line is different than F0RC08 (C0 vs. C0 & C1) + 6 if master ranks == 4 (SPD[12]) // C0 enabled + 7 if not 3DS (check SPD[6] and SPD[10]) // all disabled + 6 if slave ranks <= 2 // C0 enabled + 4 if slave ranks <= 4 // C0 and C1 enabled + 0 otherwise (3DS with 5-8 slave ranks) // C0, C1 and C2 enabled + */ + /* Assume no 4R */ + val = (dimm->mranks == dimm->log_ranks) ? 7 : + (dimm->log_ranks / dimm->mranks) == 2 ? 6 : + (dimm->log_ranks / dimm->mranks) == 4 ? 4 : 0; + rcd_write_reg(spd_bus, mca->dimm[d].rcd_i2c_addr, F0RCBx, val); + + /* + * After all RCWs are set, DRAM gets reset "to ensure it is reset properly". + * Comment: "Note: the minimum for a FORC06 soft reset is 32 cycles, but we + * empirically tested it at 8k cycles". Shouldn't we rather wait (again!) + * for periods defined in JESD79-4C (200us low and 500us high)? + * + * Do we even need it in the first place? + */ + /* + F0RC06 = 0x2 // Set QRST_n to active (low) + delay(8000 memclocks) + F0RC06 = 0x3 // Set QRST_n to inactive (high) + delay(8000 memclocks) + */ + val = 0x2; + rcd_write_reg(spd_bus, mca->dimm[d].rcd_i2c_addr, F0RC06_07, val); + delay_nck(chip, 8000); + val = 0x3; + rcd_write_reg(spd_bus, mca->dimm[d].rcd_i2c_addr, F0RC06_07, val); + delay_nck(chip, 8000); + + /* + * Dumped values from currently installed DIMM, from Petitboot: + * 0xc7 0x18 0x42 0x00 0x00 0x00 0x00 0x00 VID[2], DID[2], RID[1], 3x reserved + * 0x02 0x01 0x00 0x03 0xcb 0xe4 0x40 0x0d F0RC00-0F (4b each) + * 0x00 0x00 0x47 0x00 0x00 0x00 0x00 0x00 F0RC1x-8x (8b each) + * 0x00 0x00 0x07 F0RC9x-Bx (8b each), then all zeroes (Error Log Registers) + * + * Below is a copy of above values, this also tests RCD/I2C API. Command + * register is changed to NOP (was "Clear DRAM Reset" in dump). + */ + /* + rcd_write_32b(spd_bus, mca->dimm[d].rcd_i2c_addr, F0RC00_01, 0x0201000f); + rcd_write_32b(spd_bus, mca->dimm[d].rcd_i2c_addr, F0RC08_09, 0xcbe4400d); + rcd_write_reg(spd_bus, mca->dimm[d].rcd_i2c_addr, F0RC3x, 0x47); + rcd_write_reg(spd_bus, mca->dimm[d].rcd_i2c_addr, F0RCBx, 0x07); + */ +} + +/* + * Programming the Mode Registers consists of entering special mode using MRS + * (Mode Register Set) command and sending MR# values, one at a time, in a + * specific order (3,6,5,4,2,1,0). Those values are sent using address lines, + * including bank and bank group lines, which select which MR to write to. + * One of the implications is that these values cannot be read back. PHY + * controller holds the mirrors of last written values in its registers, but the + * mapping of bits is not clear. This mirror is RW, so there is a possibility + * that the values are not the same as the real ones (but this would be a bad + * idea as these bits are used by a controller). It gets further complicated + * when PDA mode was used at any point, as there is just one mirror register per + * rank pair. + * + * We have to write a whole register even when changing just one bit, this means + * that we have to remember what was written, or be able to (re)generate valid + * data. For this platform we have CCS which can be programmed to push all MRs + * in one sequence of instructions, including all required timeouts. There are + * two main timeout parameters: tMRD (minimal amount of time between two MRS + * commands) and tMOD (time between MRS and non-MRS and non-DES command). For + * all Speed Bins tMRD = 8nCK, tMOD = max(24nCK, 15ns) = 24nCK. Exceptions to + * those are: + * - gear down mode + * - PDA mode + * - settings to command & address lines: C/A parity latency, CS to C/A latency + * (only tMRC doesn't apply) + * - VrefDQ training + * - DLL Enable, DLL Reset (only tMOD doesn't apply) + * - maximum power saving mode (only tMOD doesn't apply) + * + * MRS are written per rank usually, although most of them must have the same + * values across the DIMM or even port. There are some settings that apply to + * individual DRAMs instead of whole rank (e.g. Vref in MR6). Normally settings + * written to MR# are passed to each DRAM, if individual DRAM has to have its + * settings changed independently of others we must use Per DRAM Addressability + * (PDA) mode. PDA is possible only after write leveling was performed. + * + * CCS is per MCBIST, so we need at most 4 (ports) * 2 (DIMMs per port) * + * 2 (master ranks per DIMM) * 2 (A- and B-side) * + * (7 (# of MRS) + 1 (final DES)) = 256 instructions. CCS holds space for 32 + * instructions, so we have to divide it and send a set of instructions per DIMM + * or even smaller chunks. + * + * TODO: is 4 ranks on RDIMM possible/used? PHY supports two ranks per DIMM (see + * 2.1 in any of the volumes of the registers specification), but Hostboot has + * configurations even for RDIMMs with 4 master ranks (see xlate_map vector in + * src/import/chips/p9/procedures/hwp/memory/lib/mc/xlate.C). Maybe those are + * counted in different places, i.e. before and after RCD, and thanks to Encoded + * QuadCS 4R DIMMs are visible to the PHY as 2R devices? + */ +static void mrs_load(uint8_t chip, int mcs_i, int mca_i, int d) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; + int mirrored = mca->dimm[d].spd[136] & 1; + mrs_cmd_t mrs; + int vpd_idx = (mca->dimm[d].mranks - 1) * 2 + (!!mca->dimm[d ^ 1].present); + enum rank_selection ranks; + + if (d == 0) { + if (mca->dimm[d].mranks == 2) + ranks = DIMM0_ALL_RANKS; + else + ranks = DIMM0_RANK0; + } + else { + if (mca->dimm[d].mranks == 2) + ranks = DIMM1_ALL_RANKS; + else + ranks = DIMM1_RANK0; + } + + /* + * If any of the following are changed, make sure to change istep 13.11 too, + * some of the pre-/post-workarounds are also writing to these registers. + */ + + mrs = ddr4_get_mr3(DDR4_MR3_MPR_SERIAL, + DDR4_MR3_CRC_DM_5, + DDR4_MR3_FINE_GRAN_REF_NORMAL, + DDR4_MR3_TEMP_SENSOR_DISABLE, + DDR4_MR3_PDA_DISABLE, + DDR4_MR3_GEARDOWN_1_2_RATE, + DDR4_MR3_MPR_NORMAL, + 0); + ccs_add_mrs(chip, id, mrs, ranks, mirrored, tMRD); + + mrs = ddr4_get_mr6(mca->nccd_l, + DDR4_MR6_VREFDQ_TRAINING_DISABLE, + DDR4_MR6_VREFDQ_TRAINING_RANGE_1, /* Don't care when disabled */ + 0); + ccs_add_mrs(chip, id, mrs, ranks, mirrored, tMRD); + + mrs = ddr4_get_mr5(DDR4_MR5_RD_DBI_DISABLE, + DDR4_MR5_WR_DBI_DISABLE, + DDR4_MR5_DATA_MASK_DISABLE, + vpd_to_rtt_park(ATTR_MSS_VPD_MT_DRAM_RTT_PARK[vpd_idx]), + DDR4_MR5_ODT_PD_ACTIVADED, + DDR4_MR5_CA_PARITY_LAT_DISABLE); + ccs_add_mrs(chip, id, mrs, ranks, mirrored, tMRD); + + mrs = ddr4_get_mr4(DDR4_MR4_HPPR_DISABLE, + DDR4_MR4_WR_PREAMBLE_1, /* ATTR_MSS_VPD_MT_PREAMBLE - always 0 */ + DDR4_MR4_RD_PREAMBLE_1, /* ATTR_MSS_VPD_MT_PREAMBLE - always 0 */ + DDR4_MR4_RD_PREAMBLE_TRAINING_DISABLE, + DDR4_MR4_SELF_REFRESH_ABORT_DISABLE, + DDR4_MR4_CS_TO_CMD_LAT_DISABLE, + DDR4_MR4_SPPR_DISABLE, + DDR4_MR4_INTERNAL_VREF_MON_DISABLE, + DDR4_MR4_TEMP_CONTROLLED_REFR_DISABLE, + DDR4_MR4_MAX_PD_MODE_DISABLE); + ccs_add_mrs(chip, id, mrs, ranks, mirrored, tMRD); + + /* + * Regarding RTT_WR: OFF seems to be the safest option, but it is not always + * the case in VPD. + * See "Write leveling - pre-workaround" (and post-workaround) in 13.11, + * maybe write 0 here and don't do pre-? + */ + mrs = ddr4_get_mr2(DDR4_MR2_WR_CRC_DISABLE, /* ATTR_MSS_MRW_DRAM_WRITE_CRC, default 0 */ + vpd_to_rtt_wr(ATTR_MSS_VPD_MT_DRAM_RTT_WR[vpd_idx]), + /* ATTR_MSS_MRW_REFRESH_RATE_REQUEST, default DOUBLE. + * Do we need to half tREFI as well? */ + DDR4_MR2_ASR_MANUAL_EXTENDED_RANGE, + mem_data[chip].cwl); + ccs_add_mrs(chip, id, mrs, ranks, mirrored, tMRD); + + mrs = ddr4_get_mr1(DDR4_MR1_QOFF_ENABLE, + mca->dimm[d].width == WIDTH_x8 ? DDR4_MR1_TQDS_ENABLE : DDR4_MR1_TQDS_DISABLE, + vpd_to_rtt_nom(ATTR_MSS_VPD_MT_DRAM_RTT_NOM[vpd_idx]), + DDR4_MR1_WRLVL_DISABLE, + DDR4_MR1_ODIMP_RZQ_7, /* ATTR_MSS_VPD_MT_DRAM_DRV_IMP_DQ_DQS, always 34 Ohms */ + DDR4_MR1_AL_DISABLE, + DDR4_MR1_DLL_ENABLE); + ccs_add_mrs(chip, id, mrs, ranks, mirrored, tMRD); + + mrs = ddr4_get_mr0(mca->nwr, + DDR4_MR0_DLL_RESET_YES, + DDR4_MR0_MODE_NORMAL, + mca->cl, + DDR4_MR0_BURST_TYPE_SEQUENTIAL, + DDR4_MR0_BURST_LENGTH_FIXED_8); + ccs_add_mrs(chip, id, mrs, ranks, mirrored, tMOD); + + ccs_execute(chip, id, mca_i); +} + +static void mss_draminit(uint8_t chip) +{ + unsigned int spd_bus = SPD_I2C_BUS + chip * I2C_BUSES_PER_CPU; + int mcs_i, mca_i, dimm; + + for (mcs_i = 0; mcs_i < MCS_PER_PROC; mcs_i++) { + if (!mem_data[chip].mcs[mcs_i].functional) + continue; + + /* MC01.MCBIST.MBA_SCOMFIR.CCS_MODEQ + // "It's unclear if we want to run with this true or false. Right now (10/15) this + // has to be false. Shelton was unclear if this should be on or off in general BRS" + [0] CCS_MODEQ_CCS_STOP_ON_ERR = 0 + [1] CCS_MODEQ_CCS_UE_DISABLE = 0 + [24] CCS_MODEQ_CFG_CCS_PARITY_AFTER_CMD = 1 + [26] CCS_MODEQ_COPY_CKE_TO_SPARE_CKE = 1 // Docs: "Does not apply for POWER9. No spare chips to copy to." + // The following are set in 13.11, but we can do it here, one less RMW + // "Hm. Centaur sets this up for the longest duration possible. Can we do better?" + // This is timeout so we should only hit it in the case of error. What is the unit of this field? Memclocks? + [8-23] CCS_MODEQ_DDR_CAL_TIMEOUT_CNT = 0xffff + [30-31] CCS_MODEQ_DDR_CAL_TIMEOUT_CNT_MULT = 3 + */ + scom_and_or_for_chiplet(chip, mcs_ids[mcs_i], CCS_MODEQ, + ~(PPC_BIT(CCS_MODEQ_CCS_STOP_ON_ERR) | + PPC_BIT(CCS_MODEQ_CCS_UE_DISABLE)), + PPC_BIT(CCS_MODEQ_CFG_CCS_PARITY_AFTER_CMD) | + PPC_BIT(CCS_MODEQ_COPY_CKE_TO_SPARE_CKE) | + PPC_PLACE(0xFFFF, + CCS_MODEQ_DDR_CAL_TIMEOUT_CNT, + CCS_MODEQ_DDR_CAL_TIMEOUT_CNT_LEN) | + PPC_PLACE(0x3, + CCS_MODEQ_DDR_CAL_TIMEOUT_CNT_MULT, + CCS_MODEQ_DDR_CAL_TIMEOUT_CNT_MULT_LEN)); + + for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; + + if (!mca->functional) + continue; + + /* MC01.PORT0.SRQ.MBA_FARB5Q // 0x07010918 + // RESET_N should stay low for at least 200us (JEDEC fig 7) for cold boot. Who and when sets it low? + // "Up, down P down, up N. Somewhat magic numbers - came from Centaur and proven to be the + // same on Nimbus. Why these are what they are might be lost to time ..." + [0-1] MBA_FARB5Q_CFG_DDR_DPHY_NCLK = 0x1 // 0b01 // 2nd RMW + [2-3] MBA_FARB5Q_CFG_DDR_DPHY_PCLK = 0x2 // 0b10 // 2nd RMW + [4] MBA_FARB5Q_CFG_DDR_RESETN = 1 // 3rd RMW (optional (?), only if changes) + [5] MBA_FARB5Q_CFG_CCS_ADDR_MUX_SEL = 1 // 1st RMW (optional, only if changes) + [6] MBA_FARB5Q_CFG_CCS_INST_RESET_ENABLE = 0 // 1st RMW (optional, only if changes) + */ + mca_and_or(chip, mcs_ids[mcs_i], mca_i, MBA_FARB5Q, + ~PPC_BIT(MBA_FARB5Q_CFG_CCS_INST_RESET_ENABLE), + PPC_BIT(MBA_FARB5Q_CFG_CCS_ADDR_MUX_SEL)); + mca_and_or(chip, mcs_ids[mcs_i], mca_i, MBA_FARB5Q, ~PPC_BITMASK(0, 3), + PPC_PLACE(0x1, + MBA_FARB5Q_CFG_DDR_DPHY_NCLK, + MBA_FARB5Q_CFG_DDR_DPHY_NCLK_LEN) | + PPC_PLACE(0x2, + MBA_FARB5Q_CFG_DDR_DPHY_PCLK, + MBA_FARB5Q_CFG_DDR_DPHY_PCLK_LEN)); + mca_and_or(chip, mcs_ids[mcs_i], mca_i, MBA_FARB5Q, ~0, + PPC_BIT(MBA_FARB5Q_CFG_DDR_RESETN)); + + udelay(500); /* part of 3rd RMW, but delay is unconditional */ + } + + /* + * JEDEC, fig 7,8: delays above and below end at the same point, they + * are not consecutive. RDIMM spec says that clocks must be stable for + * 16nCK before RESET_n = 1. This is not explicitly ensured. + * + * Below seems unnecessary, we are starting clocks at the same time as + * deasserting reset (are we?) + */ + /* + * max(10ns, 5tCK), but for all DDR4 Speed Bins 10ns is bigger. + * coreboot API doesn't have enough precision anyway. + */ + udelay(1); + + /* + * draminit_cke_helper() is called only for the first functional MCA + * because CCS_ADDR_MUX_SEL is set. + */ + for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { + if (mem_data[chip].mcs[mcs_i].mca[mca_i].functional) + break; + } + draminit_cke_helper(chip, mcs_ids[mcs_i], mca_i); + + for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; + + if (!mca->functional) + continue; + + /* + * "Per conversation with Shelton and Steve, turn off addr_mux_sel + * after the CKE CCS but before the RCD/MRS CCSs" + * + * Needs to be disabled for all MCAs before next instructions, hence + * separate loop. + MC01.PORT0.SRQ.MBA_FARB5Q + [5] MBA_FARB5Q_CFG_CCS_ADDR_MUX_SEL = 0 + */ + mca_and_or(chip, mcs_ids[mcs_i], mca_i, MBA_FARB5Q, + ~PPC_BIT(MBA_FARB5Q_CFG_CCS_ADDR_MUX_SEL), 0); + } + + for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; + + if (!mca->functional) + continue; + + for (dimm = 0; dimm < DIMMS_PER_MCA; dimm++) { + if (!mca->dimm[dimm].present) + continue; + + rcd_load(chip, mca, dimm); + // bcw_load(); /* LRDIMM only */ + mrs_load(chip, mcs_i, mca_i, dimm); + dump_rcd(spd_bus, mca->dimm[dimm].rcd_i2c_addr); + } + } + } +} + +void istep_13_10(uint8_t chips) +{ + uint8_t chip; + + report_istep(13, 10); + + for (chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) + mss_draminit(chip); + } +} diff --git a/src/soc/ibm/power9/istep_13_11.c b/src/soc/ibm/power9/istep_13_11.c new file mode 100644 index 00000000000..b6e134f86d8 --- /dev/null +++ b/src/soc/ibm/power9/istep_13_11.c @@ -0,0 +1,1371 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include +#include +#include + +#include "istep_13_scom.h" + +/* + * 13.11 mss_draminit_training: Dram training + * + * a) p9_mss_draminit_training.C (mcbist) -- Nimbus + * b) p9c_mss_draminit_training.C (mba) -- Cumulus + * - Prior to running this procedure will apply known DQ bad bits to prevent + * them from participating in training. This information is extracted from + * the bad DQ attribute and applied to Hardware + * - Marks the calibration fail array + * - External ZQ Calibration + * - Execute initial dram calibration (7 step - handled by HW) + * - This procedure will update the bad DQ attribute for each dimm based on + * its findings + */ + +static void setup_and_execute_zqcal(uint8_t chip, int mcs_i, int mca_i, int d) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; + int mirrored = mca->dimm[d].spd[136] & 1; /* Maybe add this to mca_data_t? */ + mrs_cmd_t cmd = ddr4_get_zqcal_cmd(DDR4_ZQCAL_LONG); + enum rank_selection ranks; + + if (d == 0) { + if (mca->dimm[d].mranks == 2) + ranks = DIMM0_ALL_RANKS; + else + ranks = DIMM0_RANK0; + } else { + if (mca->dimm[d].mranks == 2) + ranks = DIMM1_ALL_RANKS; + else + ranks = DIMM1_RANK0; + } + + /* + * JEDEC: "All banks must be precharged and tRP met before ZQCL or ZQCS + * commands are issued by the controller" - not sure if this is ensured. + * A refresh during the calibration probably would impact the results. Also, + * "No other activities should be performed on the DRAM channel by the + * controller for the duration of tZQinit, tZQoper, or tZQCS" - this means + * we have to insert a delay after every ZQCL, not only after the last one. + * As a possible improvement, perhaps we could reorder this step a bit and + * send ZQCL on all ports "simultaneously" (without delays) and add a delay + * just between different DIMMs/ranks, but those delays cannot be done by + * CCS and we don't have a timer with enough precision to make it worth the + * effort. + */ + ccs_add_mrs(chip, id, cmd, ranks, mirrored, tZQinit); + ccs_execute(chip, id, mca_i); +} + +static void clear_initial_cal_errors(uint8_t chip, int mcs_i, int mca_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + int dp; + + for (dp = 0; dp < 5; dp++) { + /* Whole lot of zeroing + IOM0.DDRPHY_DP16_RD_VREF_CAL_ERROR_P0_{0-4}, + IOM0.DDRPHY_DP16_WR_ERROR0_P0_{0-4}, + IOM0.DDRPHY_DP16_RD_STATUS0_P0_{0-4}, + IOM0.DDRPHY_DP16_RD_LVL_STATUS2_P0_{0-4}, + IOM0.DDRPHY_DP16_RD_LVL_STATUS0_P0_{0-4}, + IOM0.DDRPHY_DP16_WR_VREF_ERROR0_P0_{0-4}, + IOM0.DDRPHY_DP16_WR_VREF_ERROR1_P0_{0-4}, + [all] 0 + */ + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_RD_VREF_CAL_ERROR_P0_0, 0, 0); + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_WR_ERROR0_P0_0, 0, 0); + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_RD_STATUS0_P0_0, 0, 0); + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_RD_LVL_STATUS2_P0_0, 0, 0); + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_RD_LVL_STATUS0_P0_0, 0, 0); + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_WR_VREF_ERROR0_P0_0, 0, 0); + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_WR_VREF_ERROR1_P0_0, 0, 0); + } + + /* IOM0.DDRPHY_APB_CONFIG0_P0 = + [49] RESET_ERR_RPT = 1, then 0 + */ + mca_and_or(chip, id, mca_i, DDRPHY_APB_CONFIG0_P0, ~0, PPC_BIT(RESET_ERR_RPT)); + mca_and_or(chip, id, mca_i, DDRPHY_APB_CONFIG0_P0, ~PPC_BIT(RESET_ERR_RPT), 0); + + /* IOM0.DDRPHY_APB_ERROR_STATUS0_P0 = + [all] 0 + */ + mca_and_or(chip, id, mca_i, DDRPHY_APB_ERROR_STATUS0_P0, 0, 0); + + /* IOM0.DDRPHY_RC_ERROR_STATUS0_P0 = + [all] 0 + */ + mca_and_or(chip, id, mca_i, DDRPHY_RC_ERROR_STATUS0_P0, 0, 0); + + /* IOM0.DDRPHY_SEQ_ERROR_STATUS0_P0 = + [all] 0 + */ + mca_and_or(chip, id, mca_i, DDRPHY_SEQ_ERROR_STATUS0_P0, 0, 0); + + /* IOM0.DDRPHY_WC_ERROR_STATUS0_P0 = + [all] 0 + */ + mca_and_or(chip, id, mca_i, DDRPHY_WC_ERROR_STATUS0_P0, 0, 0); + + /* IOM0.DDRPHY_PC_ERROR_STATUS0_P0 = + [all] 0 + */ + mca_and_or(chip, id, mca_i, DDRPHY_PC_ERROR_STATUS0_P0, 0, 0); + + /* IOM0.DDRPHY_PC_INIT_CAL_ERROR_P0 = + [all] 0 + */ + mca_and_or(chip, id, mca_i, DDRPHY_PC_INIT_CAL_ERROR_P0, 0, 0); + + /* IOM0.IOM_PHY0_DDRPHY_FIR_REG = + [56] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_2 = 0 + */ + mca_and_or(chip, id, mca_i, IOM_PHY0_DDRPHY_FIR_REG, + ~PPC_BIT(IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_2), 0); +} + +static void dump_cal_errors(uint8_t chip, int mcs_i, int mca_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + int dp; + + /* + * Values are printed before names for two reasons: + * - it is easier to align, + * - BMC buffers host's serial output both in 'obmc-console-client' and in + * Serial over LAN and may not print few last characters. + */ + for (dp = 0; dp < 5; dp++) { + printk(RAM_SPEW, "DP %d\n", dp); + printk(RAM_SPEW, "\t%#16.16llx - RD_VREF_CAL_ERROR\n", + dp_mca_read(chip, id, dp, mca_i, DDRPHY_DP16_RD_VREF_CAL_ERROR_P0_0)); + printk(RAM_SPEW, "\t%#16.16llx - DQ_BIT_DISABLE_RP0\n", + dp_mca_read(chip, id, dp, mca_i, DDRPHY_DP16_DQ_BIT_DISABLE_RP0_P0_0)); + printk(RAM_SPEW, "\t%#16.16llx - DQS_BIT_DISABLE_RP0\n", + dp_mca_read(chip, id, dp, mca_i, DDRPHY_DP16_DQS_BIT_DISABLE_RP0_P0_0)); + printk(RAM_SPEW, "\t%#16.16llx - WR_ERROR0\n", + dp_mca_read(chip, id, dp, mca_i, DDRPHY_DP16_WR_ERROR0_P0_0)); + printk(RAM_SPEW, "\t%#16.16llx - RD_STATUS0\n", + dp_mca_read(chip, id, dp, mca_i, DDRPHY_DP16_RD_STATUS0_P0_0)); + printk(RAM_SPEW, "\t%#16.16llx - RD_LVL_STATUS2\n", + dp_mca_read(chip, id, dp, mca_i, DDRPHY_DP16_RD_LVL_STATUS2_P0_0)); + printk(RAM_SPEW, "\t%#16.16llx - RD_LVL_STATUS0\n", + dp_mca_read(chip, id, dp, mca_i, DDRPHY_DP16_RD_LVL_STATUS0_P0_0)); + printk(RAM_SPEW, "\t%#16.16llx - WR_VREF_ERROR0\n", + dp_mca_read(chip, id, dp, mca_i, DDRPHY_DP16_WR_VREF_ERROR0_P0_0)); + printk(RAM_SPEW, "\t%#16.16llx - WR_VREF_ERROR1\n", + dp_mca_read(chip, id, dp, mca_i, DDRPHY_DP16_WR_VREF_ERROR1_P0_0)); + } + + printk(RAM_SPEW, "%#16.16llx - APB_ERROR_STATUS0\n", + mca_read(chip, id, mca_i, DDRPHY_APB_ERROR_STATUS0_P0)); + + printk(RAM_SPEW, "%#16.16llx - RC_ERROR_STATUS0\n", + mca_read(chip, id, mca_i, DDRPHY_RC_ERROR_STATUS0_P0)); + + printk(RAM_SPEW, "%#16.16llx - SEQ_ERROR_STATUS0\n", + mca_read(chip, id, mca_i, DDRPHY_SEQ_ERROR_STATUS0_P0)); + + printk(RAM_SPEW, "%#16.16llx - WC_ERROR_STATUS0\n", + mca_read(chip, id, mca_i, DDRPHY_WC_ERROR_STATUS0_P0)); + + printk(RAM_SPEW, "%#16.16llx - PC_ERROR_STATUS0\n", + mca_read(chip, id, mca_i, DDRPHY_PC_ERROR_STATUS0_P0)); + + printk(RAM_SPEW, "%#16.16llx - PC_INIT_CAL_ERROR\n", + mca_read(chip, id, mca_i, DDRPHY_PC_INIT_CAL_ERROR_P0)); + + /* 0x8000 on success for first rank, 0x4000 for second */ + printk(RAM_SPEW, "%#16.16llx - PC_INIT_CAL_STATUS\n", + mca_read(chip, id, mca_i, DDRPHY_PC_INIT_CAL_STATUS_P0)); + + printk(RAM_SPEW, "%#16.16llx - IOM_PHY0_DDRPHY_FIR_REG\n", + mca_read(chip, id, mca_i, IOM_PHY0_DDRPHY_FIR_REG)); + + printk(RAM_SPEW, "%#16.16llx - MBACALFIRQ\n", + mca_read(chip, id, mca_i, MBACALFIR)); +} + +/* Based on ATTR_MSS_MRW_RESET_DELAY_BEFORE_CAL, by default do it. */ +static void dp16_reset_delay_values(uint8_t chip, int mcs_i, int mca_i, + enum rank_selection ranks_present) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + int dp; + + /* + * It iterates over enabled rank pairs. See 13.8 for where these "pairs" + * (which may have up to 4 elements) were set. + */ + for (dp = 0; dp < 5; dp++) { + /* IOM0.DDRPHY_DP16_DQS_GATE_DELAY_RP0_P0_{0-4} = 0 */ + if (ranks_present & DIMM0_RANK0) + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_DQS_GATE_DELAY_RP0_P0_0, + 0, 0); + /* IOM0.DDRPHY_DP16_DQS_GATE_DELAY_RP1_P0_{0-4} = 0 */ + if (ranks_present & DIMM0_RANK1) + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_DQS_GATE_DELAY_RP1_P0_0, + 0, 0); + /* IOM0.DDRPHY_DP16_DQS_GATE_DELAY_RP2_P0_{0-4} = 0 */ + if (ranks_present & DIMM1_RANK0) + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_DQS_GATE_DELAY_RP2_P0_0, + 0, 0); + /* IOM0.DDRPHY_DP16_DQS_GATE_DELAY_RP3_P0_{0-4} = 0 */ + if (ranks_present & DIMM1_RANK1) + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_DQS_GATE_DELAY_RP3_P0_0, + 0, 0); + } +} + +static void dqs_align_turn_on_refresh(uint8_t chip, int mcs_i, int mca_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + + /* IOM0.DDRPHY_SEQ_MEM_TIMING_PARAM0_P0 + // > May need to add freq/tRFI attr dependency later but for now use this value + // > Provided by Ryan King + [60-63] TRFC_CYCLES = 9 // tRFC = 2^9 = 512 memcycles + */ + /* See note in seq_reset() in 13.8. This may not be necessary. */ + mca_and_or(chip, id, mca_i, DDRPHY_SEQ_MEM_TIMING_PARAM0_P0, ~PPC_BITMASK(60, 63), + PPC_PLACE(9, TRFC_CYCLES, TRFC_CYCLES_LEN)); + + /* IOM0.DDRPHY_PC_INIT_CAL_CONFIG1_P0 + // > Hard coded settings provided by Ryan King for this workaround + [48-51] REFRESH_COUNT = 0xf + // TODO: see "Read clock align - pre-workaround" below. Why not 1 until + // calibration finishes? Does it pull in refresh commands? + [52-53] REFRESH_CONTROL = 3 // refresh commands may interrupt calibration routines + [54] REFRESH_ALL_RANKS = 1 + [55] CMD_SNOOP_DIS = 0 + [57-63] REFRESH_INTERVAL = 0x13 // Worst case: 6.08us for 1866 (max tCK). Must be not more than 7.8us + */ + mca_and_or(chip, id, mca_i, DDRPHY_PC_INIT_CAL_CONFIG1_P0, + ~(PPC_BITMASK(48, 55) | PPC_BITMASK(57, 63)), + PPC_PLACE(0xF, REFRESH_COUNT, REFRESH_COUNT_LEN) | + PPC_PLACE(0x3, REFRESH_CONTROL, REFRESH_CONTROL_LEN) | + PPC_BIT(REFRESH_ALL_RANKS) | + PPC_PLACE(0x13, REFRESH_INTERVAL, REFRESH_INTERVAL_LEN)); +} + +static void wr_level_pre(uint8_t chip, int mcs_i, int mca_i, int rp, + enum rank_selection ranks_present) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; + int d = rp / 2; + int vpd_idx = (mca->dimm[d].mranks - 1) * 2 + (!!mca->dimm[d ^ 1].present); + int mirrored = mca->dimm[d].spd[136] & 1; + mrs_cmd_t mrs; + enum rank_selection rank = 1 << rp; + int i; + + /* + * JEDEC specification requires disabling RTT_WR during WR_LEVEL, and + * enabling equivalent terminations. + */ + if (ATTR_MSS_VPD_MT_DRAM_RTT_WR[vpd_idx] != 0) { + /* MR2 = // redo the rest of the bits + [A11-A9] 0 + */ + mrs = ddr4_get_mr2(DDR4_MR2_WR_CRC_DISABLE, + vpd_to_rtt_wr(0), + DDR4_MR2_ASR_MANUAL_EXTENDED_RANGE, + mem_data[chip].cwl); + ccs_add_mrs(chip, id, mrs, rank, mirrored, tMRD); + + /* MR1 = // redo the rest of the bits + // Write properly encoded RTT_WR value as RTT_NOM + [A8-A10] 240/ATTR_MSS_VPD_MT_DRAM_RTT_WR + */ + mrs = ddr4_get_mr1(DDR4_MR1_QOFF_ENABLE, + mca->dimm[d].width == WIDTH_x8 ? DDR4_MR1_TQDS_ENABLE : + DDR4_MR1_TQDS_DISABLE, + vpd_to_rtt_nom(ATTR_MSS_VPD_MT_DRAM_RTT_WR[vpd_idx]), + DDR4_MR1_WRLVL_DISABLE, + DDR4_MR1_ODIMP_RZQ_7, + DDR4_MR1_AL_DISABLE, + DDR4_MR1_DLL_ENABLE); + /* + * Next command for this rank is REF, done by PHY hardware, so use tMOD. + * + * There are possible MRS commands to be send to other ranks, maybe we + * can subtract those. On the other hand, with microsecond precision for + * delays in ccs_execute(), this probably doesn't matter anyway. + */ + ccs_add_mrs(chip, id, mrs, rank, mirrored, tMOD); + + /* + * This block is done after MRS commands in Hostboot, but we do not call + * ccs_execute() until the end of this function anyway. It doesn't seem + * to make a difference. + */ + switch (rp) { + case 0: + /* IOM0.DDRPHY_SEQ_ODT_WR_CONFIG0_P0 = + [48] = 1 + */ + mca_and_or(chip, id, mca_i, DDRPHY_SEQ_ODT_WR_CONFIG0_P0, + ~0, PPC_BIT(48)); + break; + case 1: + /* IOM0.DDRPHY_SEQ_ODT_WR_CONFIG0_P0 = + [57] = 1 + */ + mca_and_or(chip, id, mca_i, DDRPHY_SEQ_ODT_WR_CONFIG0_P0, + ~0, PPC_BIT(57)); + break; + case 2: + /* IOM0.DDRPHY_SEQ_ODT_WR_CONFIG1_P0 = + [50] = 1 + */ + mca_and_or(chip, id, mca_i, DDRPHY_SEQ_ODT_WR_CONFIG0_P0, + ~0, PPC_BIT(50)); + break; + case 3: + /* IOM0.DDRPHY_SEQ_ODT_WR_CONFIG1_P0 = + [59] = 1 + */ + mca_and_or(chip, id, mca_i, DDRPHY_SEQ_ODT_WR_CONFIG0_P0, + ~0, PPC_BIT(59)); + break; + } + + // mss::workarounds::seq::odt_config(); // Not needed on DD2 + + } + + /* Different workaround, executed even if RTT_WR == 0 */ + /* workarounds::wr_lvl::configure_non_calibrating_ranks() + for each rank on MCA except current primary rank: + MR1 = // redo the rest of the bits + [A7] = 1 // Write Leveling Enable + [A12] = 1 // Outputs disabled (DQ, DQS) + */ + for (i = 0; i < 4; i++) { + rank = 1 << i; + if (i == rp || !(ranks_present & rank)) + continue; + + /* + * VPD index stays the same (DIMM mixing rules), but I'm not sure about + * mirroring. Better safe than sorry, assume mirrored and non-mirrored + * DIMMs can be mixed. + */ + mirrored = mca->dimm[i/2].spd[136] & 1; + + mrs = ddr4_get_mr1(DDR4_MR1_QOFF_DISABLE, + mca->dimm[d].width == WIDTH_x8 ? DDR4_MR1_TQDS_ENABLE : + DDR4_MR1_TQDS_DISABLE, + vpd_to_rtt_nom(ATTR_MSS_VPD_MT_DRAM_RTT_NOM[vpd_idx]), + DDR4_MR1_WRLVL_ENABLE, + DDR4_MR1_ODIMP_RZQ_7, + DDR4_MR1_AL_DISABLE, + DDR4_MR1_DLL_ENABLE); + /* + * Delays apply to commands sent to the same rank, but we are changing + * ranks. Can we get away with 0 delay? Is it worth it? Remember that + * the same delay is currently used between sides of RCD. + */ + ccs_add_mrs(chip, id, mrs, rank, mirrored, tMRD); + } + + /* TODO: maybe drop it, next ccs_phy_hw_step() would call it anyway. */ + //ccs_execute(id, mca_i); +} + +static uint64_t wr_level_time(uint8_t chip, mca_data_t *mca) +{ + /* + * "Note: the following equation is taken from the PHY workbook - leaving + * the naked numbers in for parity to the workbook + * + * This step runs for approximately (80 + TWLO_TWLOE) x NUM_VALID_SAMPLES x + * (384/(BIG_STEP + 1) + (2 x (BIG_STEP + 1))/(SMALL_STEP + 1)) + 20 memory + * clock cycles per rank." + * + * TWLO_TWLOE for every defined speed bin is 9.5 + 2 = 11.5 ns, this needs + * to be converted to clock cycles, it is the only non-constant component of + * the equation. + */ + const int big_step = 7; + const int small_step = 0; + const int num_valid_samples = 5; + const int twlo_twloe = ps_to_nck(chip, 11500); + + return (80 + twlo_twloe) * num_valid_samples * (384 / (big_step + 1) + + (2 * (big_step + 1)) / (small_step + 1)) + 20; +} + +/* Undo the pre-workaround, basically */ +static void wr_level_post(uint8_t chip, int mcs_i, int mca_i, int rp, + enum rank_selection ranks_present) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; + int d = rp / 2; + int vpd_idx = (mca->dimm[d].mranks - 1) * 2 + (!!mca->dimm[d ^ 1].present); + int mirrored = mca->dimm[d].spd[136] & 1; + mrs_cmd_t mrs; + enum rank_selection rank = 1 << rp; + int i; + + /* + * JEDEC specification requires disabling RTT_WR during WR_LEVEL, and + * enabling equivalent terminations. + */ + if (ATTR_MSS_VPD_MT_DRAM_RTT_WR[vpd_idx] != 0) { + #define F(x) ((((x) >> 4) & 0xc) | (((x) >> 2) & 0x3)) + /* Originally done in seq_reset() in 13.8 */ + /* IOM0.DDRPHY_SEQ_ODT_RD_CONFIG1_P0 = + F(X) = (((X >> 4) & 0xc) | ((X >> 2) & 0x3)) // Bits 0,1,4,5 of X, see also MC01.PORT0.SRQ.MBA_FARB2Q + [all] 0 + [48-51] ODT_RD_VALUES0 = + count_dimm(MCA) == 2: F(ATTR_MSS_VPD_MT_ODT_RD[index(MCA)][1][0]) + count_dimm(MCA) != 2: F(ATTR_MSS_VPD_MT_ODT_RD[index(MCA)][0][2]) + [56-59] ODT_RD_VALUES1 = + count_dimm(MCA) == 2: F(ATTR_MSS_VPD_MT_ODT_RD[index(MCA)][1][1]) + count_dimm(MCA) != 2: F(ATTR_MSS_VPD_MT_ODT_RD[index(MCA)][0][3]) + */ + /* 2 DIMMs -> odd vpd_idx */ + uint64_t val = 0; + if (vpd_idx % 2) + val = PPC_PLACE(F(ATTR_MSS_VPD_MT_ODT_RD[vpd_idx][1][0]), ODT_RD_VALUES0, ODT_RD_VALUES0_LEN) + | PPC_PLACE(F(ATTR_MSS_VPD_MT_ODT_RD[vpd_idx][1][1]), ODT_RD_VALUES1, ODT_RD_VALUES1_LEN); + + mca_and_or(chip, id, mca_i, DDRPHY_SEQ_ODT_RD_CONFIG1_P0, 0, val); + + + /* IOM0.DDRPHY_SEQ_ODT_WR_CONFIG0_P0 = + F(X) = (((X >> 4) & 0xc) | ((X >> 2) & 0x3)) // Bits 0,1,4,5 of X, see also MC01.PORT0.SRQ.MBA_FARB2Q + [all] 0 + [48-51] ODT_WR_VALUES0 = F(ATTR_MSS_VPD_MT_ODT_WR[index(MCA)][0][0]) + [56-59] ODT_WR_VALUES1 = F(ATTR_MSS_VPD_MT_ODT_WR[index(MCA)][0][1]) + */ + mca_and_or(chip, id, mca_i, DDRPHY_SEQ_ODT_WR_CONFIG0_P0, 0, + PPC_PLACE(F(ATTR_MSS_VPD_MT_ODT_WR[vpd_idx][0][0]), ODT_RD_VALUES0, ODT_RD_VALUES0_LEN) | + PPC_PLACE(F(ATTR_MSS_VPD_MT_ODT_WR[vpd_idx][0][1]), ODT_RD_VALUES1, ODT_RD_VALUES1_LEN)); + #undef F + + /* MR2 = // redo the rest of the bits + [A11-A9] ATTR_MSS_VPD_MT_DRAM_RTT_WR + */ + mrs = ddr4_get_mr2(DDR4_MR2_WR_CRC_DISABLE, + vpd_to_rtt_wr(ATTR_MSS_VPD_MT_DRAM_RTT_WR[vpd_idx]), + DDR4_MR2_ASR_MANUAL_EXTENDED_RANGE, + mem_data[chip].cwl); + ccs_add_mrs(chip, id, mrs, rank, mirrored, tMRD); + + /* MR1 = // redo the rest of the bits + // Write properly encoded RTT_NOM value + [A8-A10] 240/ATTR_MSS_VPD_MT_DRAM_RTT_NOM + */ + mrs = ddr4_get_mr1(DDR4_MR1_QOFF_ENABLE, + mca->dimm[d].width == WIDTH_x8 ? DDR4_MR1_TQDS_ENABLE : + DDR4_MR1_TQDS_DISABLE, + vpd_to_rtt_nom(ATTR_MSS_VPD_MT_DRAM_RTT_NOM[vpd_idx]), + DDR4_MR1_WRLVL_DISABLE, + DDR4_MR1_ODIMP_RZQ_7, + DDR4_MR1_AL_DISABLE, + DDR4_MR1_DLL_ENABLE); + /* + * Next command for this rank should be REF before Initial Pattern Write, + * done by PHY hardware, so use tMOD. + */ + ccs_add_mrs(chip, id, mrs, rank, mirrored, tMOD); + + // mss::workarounds::seq::odt_config(); // Not needed on DD2 + } + + /* Different workaround, executed even if RTT_WR == 0 */ + /* workarounds::wr_lvl::configure_non_calibrating_ranks() + for each rank on MCA except current primary rank: + MR1 = // redo the rest of the bits + [A7] = 1 // Write Leveling Enable + [A12] = 1 // Outputs disabled (DQ, DQS) + */ + for (i = 0; i < 4; i++) { + rank = 1 << i; + if (i == rp || !(ranks_present & rank)) + continue; + + /* + * VPD index stays the same (DIMM mixing rules), but I'm not sure about + * mirroring. Better safe than sorry, assume mirrored and non-mirrored + * DIMMs can be mixed. + */ + mirrored = mca->dimm[i/2].spd[136] & 1; + + mrs = ddr4_get_mr1(DDR4_MR1_QOFF_ENABLE, + mca->dimm[d].width == WIDTH_x8 ? DDR4_MR1_TQDS_ENABLE : + DDR4_MR1_TQDS_DISABLE, + vpd_to_rtt_nom(ATTR_MSS_VPD_MT_DRAM_RTT_NOM[vpd_idx]), + DDR4_MR1_WRLVL_DISABLE, + DDR4_MR1_ODIMP_RZQ_7, + DDR4_MR1_AL_DISABLE, + DDR4_MR1_DLL_ENABLE); + /* + * Delays apply to commands sent to the same rank, but we are changing + * ranks. Can we get away with 0 delay? Is it worth it? Remember that + * the same delay is currently used between sides of RCD. + */ + ccs_add_mrs(chip, id, mrs, rank, mirrored, tMRD); + } + + /* TODO: maybe drop it, next ccs_phy_hw_step() would call it anyway. */ + //ccs_execute(id, mca_i); +} + +static uint64_t initial_pat_wr_time(uint8_t chip, mca_data_t *mca) +{ + /* + * "Not sure how long this should take, so we're gonna use 1 to make sure we + * get at least one polling loop" + * + * Hostboot polls every 10 us, but in coreboot this value results in minimal + * delay of 2 us (one microsecond for delay_nck() and another for wait_us() + * in ccs_execute()). Tests show that it is not enough. + * + * What has to be done to write pattern to MPR in general: + * - write to MR3 to enable MPR access (tMOD) + * - write to MPRs (tWR_MPR for back-to-back writes, there are 4 MPRs; + * tWR_MPR is tMOD + AL + PL, but AL and PL is 0 here) + * - write to MR3 to disable MPR access (tMOD or tMRD, depending on what is + * the next command). + * + * This gives 6 * tMOD, but because there is RCD with sides A and B this is + * 12 * tMOD = 288 nCK. However, we have to add to calculations refresh + * commands, as set in dqs_align_turn_on_refresh() - 15 commands, each takes + * 512 nCK. This is kind of consistent for 2666 MT/s DIMM with 5 us I've + * seen in tests. + * + * There is no limit about how many refresh commands can be issued (as long + * as tRFC isn't violated), but only 8 of them are "pulling in" further + * refreshes, meaning that DRAM will survive 9*tREFI without a refresh + * (8 pulled in and 1 regular interval) - this is useful for longer + * calibration steps. Another 9*tREFI can be postponed - REF commands are + * sent after a longer pause, but this (probably) isn't relevant here. + * + * There may be more refreshes sent in the middle of the most of steps due + * to REFRESH_CONTROL setting. + * + * These additional cycles should be added to all calibration steps. I don't + * think they are included in Hostboot, then again I don't know what exactly + * is added in "equations taken from the PHY workbook". This may be the + * reason why Hostboot multiplies every timeout by 4 AND assumes worst case + * wherever possible AND polls so rarely. + * + * From the lack of better ideas, return 10 us. + */ + return ns_to_nck(chip, 10 * 1000); +} + +static uint64_t dqs_align_time(uint8_t chip, mca_data_t *mca) +{ + /* + * "This step runs for approximately 6 x 600 x 4 DRAM clocks per rank pair." + * + * In tests this is a bit less than that, but not enough to impact total + * times because we start busy polling earlier. + */ + return 6 * 600 * 4; +} + +static void rdclk_align_pre(uint8_t chip, int mcs_i, int mca_i, int rp, + enum rank_selection ranks_present) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + + /* + * TODO: we just set it before starting calibration steps. As we don't have + * any precious data in RAM yet, maybe we can use 0 there and just change it + * to 3 in the post-workaround? + */ + + /* Turn off refresh, we don't want it to interfere here + IOM0.DDRPHY_PC_INIT_CAL_CONFIG1_P0 + [52-53] REFRESH_CONTROL = 0 // refresh commands are only sent at start of initial calibration + */ + mca_and_or(chip, id, mca_i, DDRPHY_PC_INIT_CAL_CONFIG1_P0, ~PPC_BITMASK(52, 53), 0); +} + +static uint64_t rdclk_align_time(uint8_t chip, mca_data_t *mca) +{ + /* + * "This step runs for approximately 24 x ((1024/COARSE_CAL_STEP_SIZE + + * 4 x COARSE_CAL_STEP_SIZE) x 4 + 32) DRAM clocks per rank pair" + * + * COARSE_CAL_STEP_SIZE = 4 + * + * In tests this finishes in about a third of this time (7 us instead of + * calculated 20.16 us). + */ + const int coarse_cal_step_size = 4; + return 24 * ((1024/coarse_cal_step_size + 4*coarse_cal_step_size) * 4 + 32); +} + +static void rdclk_align_post(uint8_t chip, int mcs_i, int mca_i, int rp, + enum rank_selection ranks_present) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + int dp; + uint64_t val; + const uint64_t mul = 0x0000010000000000; + + /* + * "In DD2.*, We adjust the red waterfall to account for low VDN settings. + * We move the waterfall forward by one" + IOM0.DDRPHY_DP16_DQS_RD_PHASE_SELECT_RANK_PAIR{0-3}_P0_{0-3} + [48-49] DQSCLK_SELECT0 = (++DQSCLK_SELECT0 % 4) + [52-53] DQSCLK_SELECT1 = (++DQSCLK_SELECT1 % 4) + [56-57] DQSCLK_SELECT2 = (++DQSCLK_SELECT2 % 4) + [60-61] DQSCLK_SELECT3 = (++DQSCLK_SELECT3 % 4) + IOM0.DDRPHY_DP16_DQS_RD_PHASE_SELECT_RANK_PAIR{0-3}_P0_4 + [48-49] DQSCLK_SELECT0 = (++DQSCLK_SELECT0 % 4) + [52-53] DQSCLK_SELECT1 = (++DQSCLK_SELECT1 % 4) + // Can't change non-existing quads + */ + for (dp = 0; dp < 4; dp++) { + val = dp_mca_read(chip, id, dp, mca_i, + DDRPHY_DP16_DQS_RD_PHASE_SELECT_RANK_PAIR0_P0_0 + rp * mul); + val += PPC_BIT(49) | PPC_BIT(53) | PPC_BIT(57) | PPC_BIT(61); + val &= PPC_BITMASK(48, 49) | PPC_BITMASK(52, 53) | PPC_BITMASK(56, 57) | + PPC_BITMASK(60, 61); + /* TODO: this can be done with just one read */ + dp_mca_and_or(chip, id, dp, mca_i, + DDRPHY_DP16_DQS_RD_PHASE_SELECT_RANK_PAIR0_P0_0 + rp * mul, + ~(PPC_BITMASK(48, 49) | PPC_BITMASK(52, 53) | + PPC_BITMASK(56, 57) | PPC_BITMASK(60, 61)), + val); + } + + val = dp_mca_read(chip, id, 4, mca_i, + DDRPHY_DP16_DQS_RD_PHASE_SELECT_RANK_PAIR0_P0_0 + rp * mul); + val += PPC_BIT(49) | PPC_BIT(53); + val &= PPC_BITMASK(48, 49) | PPC_BITMASK(52, 53); + dp_mca_and_or(chip, id, dp, mca_i, + DDRPHY_DP16_DQS_RD_PHASE_SELECT_RANK_PAIR0_P0_0 + rp * mul, + ~(PPC_BITMASK(48, 49) | PPC_BITMASK(52, 53)), + val); + + /* Turn on refresh */ + dqs_align_turn_on_refresh(chip, mcs_i, mca_i); +} + +static void read_ctr_pre(uint8_t chip, int mcs_i, int mca_i, int rp, + enum rank_selection ranks_present) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + int dp; + + /* Turn off refresh + IOM0.DDRPHY_PC_INIT_CAL_CONFIG1_P0 + [52-53] REFRESH_CONTROL = 0 // refresh commands are only sent at start of initial calibration + */ + mca_and_or(chip, id, mca_i, DDRPHY_PC_INIT_CAL_CONFIG1_P0, ~PPC_BITMASK(52, 53), 0); + + for (dp = 0; dp < 5; dp++) { + /* + IOM0.DDRPHY_DP16_CONFIG0_P0_{0-4} + [62] 1 // part of ATESTSEL_0_4 field + */ + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_CONFIG0_P0_0, ~0, PPC_BIT(62)); + + /* + * This was a part of main calibration in Hostboot, not pre-workaround, + * but this is easier this way. + IOM0.DDRPHY_DP16_RD_VREF_CAL_EN_P0_{0-4} + [all] 0 + [48-63] VREF_CAL_EN = 0xffff // We already did this in reset_rd_vref() in 13.8 + */ + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_RD_VREF_CAL_EN_P0_0, 0, + PPC_PLACE(0xFFFF, 48, 16)); + } + + /* This also was part of main + IOM0.DDRPHY_RC_RDVREF_CONFIG1_P0 + [60] CALIBRATION_ENABLE = 1 + [61] SKIP_RDCENTERING = 0 + */ + mca_and_or(chip, id, mca_i, DDRPHY_RC_RDVREF_CONFIG1_P0, + ~PPC_BIT(SKIP_RDCENTERING), + PPC_BIT(CALIBRATION_ENABLE)); +} + +static uint64_t read_ctr_time(uint8_t chip, mca_data_t *mca) +{ + /* + * "This step runs for approximately 6 x (512/COARSE_CAL_STEP_SIZE + 4 x + * (COARSE_CAL_STEP_SIZE + 4 x CONSEQ_PASS)) x 24 DRAM clocks per rank pair." + * + * COARSE_CAL_STEP_SIZE = 4 + * CONSEQ_PASS = 8 + * + * In tests this step takes more than that (38/30us), probably because of + * REF commands that are pulled in before the calibration. It is still much + * less than timeout (107us). + */ + const int coarse_cal_step_size = 4; + const int conseq_pass = 8; + return 6 * (512/coarse_cal_step_size + 4 * (coarse_cal_step_size + 4 * conseq_pass)) + * 24; +} + +static void read_ctr_post(uint8_t chip, int mcs_i, int mca_i, int rp, + enum rank_selection ranks_present) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + int dp; + + /* Does not apply to DD2 */ + // workarounds::dp16::rd_dq::fix_delay_values(); + + /* Turn on refresh */ + dqs_align_turn_on_refresh(chip, mcs_i, mca_i); + + for (dp = 0; dp < 5; dp++) { + /* + IOM0.DDRPHY_DP16_CONFIG0_P0_{0-4} + [62] 0 // part of ATESTSEL_0_4 field + */ + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_CONFIG0_P0_0, ~PPC_BIT(62), 0); + } +} + +/* Assume 18 DRAMs per DIMM ((8 data + 1 ECC) * 2), even for x8 */ +static uint16_t write_delays[18]; + +static void write_ctr_pre(uint8_t chip, int mcs_i, int mca_i, int rp, + enum rank_selection ranks_present) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; + int mirrored = mca->dimm[rp/2].spd[136] & 1; + mrs_cmd_t mrs; + enum rank_selection rank = 1 << rp; + int vpd_idx = (mca->dimm[rp/2].mranks - 1) * 2 + (!!mca->dimm[(rp/2) ^ 1].present); + int dram; + + /* + * Write VREF Latching + * + * This may be considered a separate step, but with current dispatch logic + * we cannot add a step that isn't accelerated by PHY hardware so do this as + * a part of pre-workaround of next step. + * + * "JEDEC has a 3 step latching process for WR VREF + * 1) enter into VREFDQ training mode, with the desired range value is XXXXXX + * 2) set the VREFDQ value while in training mode - this actually latches the value + * 3) exit VREFDQ training mode and go into normal operation mode" + * + * Each step is followed by a 150ns (tVREFDQE or tVREFDQX) stream of DES + * commands before next one. + */ + uint64_t tVREFDQ_E_X = ns_to_nck(chip, 150); + + /* Fill MRS command once, then flip VREFDQ training mode bit as needed */ + mrs = ddr4_get_mr6(mca->nccd_l, + DDR4_MR6_VREFDQ_TRAINING_ENABLE, + (ATTR_MSS_VPD_MT_VREF_DRAM_WR[vpd_idx] & 0x40) >> 6, + ATTR_MSS_VPD_MT_VREF_DRAM_WR[vpd_idx] & 0x3F); + + /* Step 1 - enter VREFDQ training mode */ + ccs_add_mrs(chip, id, mrs, rank, mirrored, tVREFDQ_E_X); + + /* Step 2 - latch VREFDQ value, command exactly the same as step 1 */ + ccs_add_mrs(chip, id, mrs, rank, mirrored, tVREFDQ_E_X); + + /* Step 3 - exit VREFDQ training mode */ + mrs ^= 1 << 7; // A7 - VREFDQ Training Enable + ccs_add_mrs(chip, id, mrs, rank, mirrored, tVREFDQ_E_X); + + /* TODO: maybe drop it, next ccs_phy_hw_step() would call it anyway. */ + //ccs_execute(id, mca_i); + + /* End of VREF Latching, beginning of Write Centering pre-workaround */ + + /* + * DRAM is one IC on the DIMM module, there are 9 DRAMs for x8 and 18 for + * x4 devices (DQ bits/width) per rank. Before centering the delays are the + * same for each DQ of a given DRAM, meaning it is enough to save just one + * value per DRAM. For simplicity, save every 4th DQ even on x8 devices. + */ + for (dram = 0; dram < ARRAY_SIZE(write_delays); dram++) { + int dp = (dram * 4) / 16; + int val_idx = (dram * 4) % 16; + const uint64_t rp_mul = 0x0000010000000000; + const uint64_t val_mul = 0x0000000100000000; + /* IOM0.DDRPHY_DP16_WR_DELAY_VALUE__RP_REG_P0_ */ + uint64_t val = dp_mca_read(chip, id, dp, mca_i, + DDRPHY_DP16_WR_DELAY_VALUE_0_RP0_REG_P0_0 + + rp * rp_mul + val_idx * val_mul); + write_delays[dram] = (uint16_t) val; + } +} + +static uint64_t write_ctr_time(uint8_t chip, mca_data_t *mca) +{ + /* + * "1000 + (NUM_VALID_SAMPLES * (FW_WR_RD + FW_RD_WR + 16) * + * (1024/(SMALL_STEP +1) + 128/(BIG_STEP +1)) + 2 * (BIG_STEP+1)/(SMALL_STEP+1)) * 24 + * DRAM clocks per rank pair." + * + * Yes, write leveling values are used for write centering, this is not an + * error (or is it? CONFIG0 says BIG_STEP = 1) + * WR_LVL_BIG_STEP = 7 + * WR_LVL_SMALL_STEP = 0 + * WR_LVL_NUM_VALID_SAMPLES = 5 + * + * "Per PHY spec, defaults to 0. Would need an attribute to drive differently" + * FW_WR_RD = 0 + * + * "From the PHY spec. Also confirmed with S. Wyatt as this is different + * than the calculation used in Centaur. This field must be set to the + * larger of the two values in number of memory clock cycles. + * FW_RD_WR = max(tWTR + 11, AL + tRTP + 3) + * Note from J. Bialas: The difference between tWTR_S and tWTR_L is that _S + * is write to read time to different bank groups, while _L is to the same. + * The algorithm should be switching bank groups so tWTR_S can be used" + * + * tRTP = 7.5ns (this comes from DDR4 spec) + * AL = 0 + * + * For tWTR_S = 2.5ns this should give ~2.9-4.5ms, + 2 * 3 * 150ns from MRS + * commands in pre-workaround (insignificantly small compared to total time). + * In tests this is ~7.5ms, with 10.5ms timeout, mostly because the equation + * below probably doesn't account for REF commands. This leaves rather small + * margin for error. + */ + const int big_step = 7; + const int small_step = 0; + const int num_valid_samples = 5; + int fw_rd_wr = MAX(mca->nwtr_s + 11, ps_to_nck(chip, 7500) + 3); + return 1000 + (num_valid_samples * (fw_rd_wr + 16) * + (1024/(small_step + 1) + 128/(big_step + 1)) + + 2 * (big_step + 1)/(small_step + 1)) * 24; +} + +static void write_ctr_post(uint8_t chip, int mcs_i, int mca_i, int rp, + enum rank_selection ranks_present) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + int dp; + uint64_t bad_bits = 0; + + /* + * TODO: this just tests if workaround is needed, real workaround is not + * yet implemented. + */ + for (dp = 0; dp < 5; dp++) { + bad_bits |= dp_mca_read(chip, id, dp, mca_i, + DDRPHY_DP16_DQ_BIT_DISABLE_RP0_P0_0); + } + + if (!bad_bits) + return; + + /* + * Full workaround consists of: + * - enabling PDA mode (per DRAM addressing) on MC + * - reverting initial WR Vref values in MC + * - reverting WR delays saved in pre-workaround + * - clearing bad DQ bits (because this calibration step will be re-run) + * - entering PDA mode on DRAMs + * - reverting initial VREFDQ values in bad DRAM(s) + * - exiting PDA mode on DRAMs (this point has its own workaround) + * - exiting PDA mode on MC + * - finding a median of RD Vref DAC values and disabling all DQ bits except + * one known to be good (close to median) + * - rerunning main calibration, exit on success + * - if it still fails, re-enable all DQ bits (bad and good), set 1D only + * write centering and rerun again + */ + die("Write Centering post-workaround required, but not yet implemented\n"); +} + +static uint64_t coarse_wr_rd_time(uint8_t chip, mca_data_t *mca) +{ + /* + * "40 cycles for WR, 32 for RD" + * + * With number of cycles set to just the above this step times out, add time + * for 15 REF commands as set in dqs_align_turn_on_refresh(). + */ + return 40 + 32 + 15 * 512; +} + +typedef void (phy_workaround_t) (uint8_t chip, int mcs_i, int mca_i, int rp, + enum rank_selection ranks_present); + +struct phy_step { + const char *name; + enum cal_config cfg; + phy_workaround_t *pre; + uint64_t (*time)(uint8_t chip, mca_data_t *mca); + phy_workaround_t *post; +}; + +static struct phy_step steps[] = { + { + "Write Leveling", + CAL_WR_LEVEL, + wr_level_pre, + wr_level_time, + wr_level_post, + }, + { + "Initial Pattern Write", + CAL_INITIAL_PAT_WR, + NULL, + initial_pat_wr_time, + NULL, + }, + { + "DQS alignment", + CAL_DQS_ALIGN, + NULL, + dqs_align_time, + NULL, + }, + { + "Read Clock Alignment", + CAL_RDCLK_ALIGN, + rdclk_align_pre, + rdclk_align_time, + rdclk_align_post, + }, + { + "Read Centering", + CAL_READ_CTR, + read_ctr_pre, + read_ctr_time, + read_ctr_post, + }, + { + "Write Centering", + CAL_WRITE_CTR, + write_ctr_pre, + write_ctr_time, + write_ctr_post, + }, + { + "Coarse write/read", + CAL_INITIAL_COARSE_WR | CAL_COARSE_RD, + NULL, + coarse_wr_rd_time, + NULL, + }, + +/* + // Following are performed in istep 13.12 + CAL_CUSTOM_RD + CAL_CUSTOM_WR +*/ +}; + +static void dispatch_step(uint8_t chip, struct phy_step *step, int mcs_i, int mca_i, int rp, + enum rank_selection ranks_present) +{ + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; + printk(BIOS_DEBUG, "%s starting\n", step->name); + + if (step->pre) + step->pre(chip, mcs_i, mca_i, rp, ranks_present); + + ccs_phy_hw_step(chip, mcs_ids[mcs_i], mca_i, rp, step->cfg, step->time(chip, mca)); + + if (step->post) + step->post(chip, mcs_i, mca_i, rp, ranks_present); + + dump_cal_errors(chip, mcs_i, mca_i); + + if (mca_read(chip, mcs_ids[mcs_i], mca_i, DDRPHY_PC_INIT_CAL_ERROR_P0) != 0) + die("%s failed, aborting\n", step->name); + + printk(BIOS_DEBUG, "%s done\n", step->name); +} + +/* Can we modify dump_cal_errors() for this? */ +static int process_initial_cal_errors(uint8_t chip, int mcs_i, int mca_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + int dp; + uint64_t err = 0; + + for (dp = 0; dp < 5; dp++) { + /* IOM0.DDRPHY_DP16_RD_VREF_CAL_ERROR_P0_n */ + err |= dp_mca_read(chip, id, dp, mca_i, DDRPHY_DP16_RD_VREF_CAL_ERROR_P0_0); + + /* Both ERROR_MASK registers were set to 0xFFFF in 13.8 */ + /* IOM0.DDRPHY_DP16_WR_VREF_ERROR0_P0_n & + * ~IOM0.DDRPHY_DP16_WR_VREF_ERROR_MASK0_P0_n */ + err |= (dp_mca_read(chip, id, dp, mca_i, DDRPHY_DP16_WR_VREF_ERROR0_P0_0) & + ~dp_mca_read(chip, id, dp, mca_i, + DDRPHY_DP16_WR_VREF_ERROR_MASK0_P0_0)); + + /* IOM0.DDRPHY_DP16_WR_VREF_ERROR1_P0_n & + * ~IOM0.DDRPHY_DP16_WR_VREF_ERROR_MASK1_P0_n */ + err |= (dp_mca_read(chip, id, dp, mca_i, DDRPHY_DP16_WR_VREF_ERROR1_P0_0) & + ~dp_mca_read(chip, id, dp, mca_i, + DDRPHY_DP16_WR_VREF_ERROR_MASK1_P0_0)); + } + + /* IOM0.DDRPHY_PC_INIT_CAL_ERROR_P0 */ + err |= mca_read(chip, id, mca_i, DDRPHY_PC_INIT_CAL_ERROR_P0); + + if (err) + return 1; + + /* + * err == 0 at this point can be either a true success or an error of the + * calibration engine itself. Check for latter. + */ + /* IOM0.IOM_PHY0_DDRPHY_FIR_REG */ + if (read_scom_for_chiplet(chip, id, IOM_PHY0_DDRPHY_FIR_REG) & + PPC_BIT(IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_2)) { + /* + * "Clear the PHY FIR ERROR 2 bit so we don't keep failing training and + * training advance on this port" + */ + scom_and_or_for_chiplet(chip, id, IOM_PHY0_DDRPHY_FIR_REG, + ~PPC_BIT(IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_2), + 0); + + return 1; + } + + return 0; +} + +static int can_recover(uint8_t chip, int mcs_i, int mca_i, int rp) +{ + /* + * We can recover from 1 nibble + 1 bit (or less) bad lines. Anything more + * and DIMM is beyond repair. A bad nibble is a nibble with any number of + * bad bits. If a DQS is bad (either true or complementary signal, or both), + * a whole nibble (for x4 DRAMs) or byte (x8) is considered bad. + * + * Check both DQS and DQ registers in one loop, iterating over DP16s - that + * way it is easier to sum bad bits/nibbles. + * + * See reset_clock_enable() in 13.8 or an array in process_bad_bits() in + * phy/dp16.C for mapping of DQS bits in x8 and mask bits from this register + * accordingly. + */ + int bad_nibbles = 0; + int bad_bits = 0; + int dp; + chiplet_id_t id = mcs_ids[mcs_i]; + uint8_t width = mem_data[chip].mcs[mcs_i].mca[mca_i].dimm[rp/2].width; + + for (dp = 0; dp < 5; dp++) { + uint64_t reg; + uint64_t nibbles_mask = 0xFFFF; + /* + IOM0.DDRPHY_DP16_DQS_BIT_DISABLE_RP_P0_{0-4}: + // This calculates how many (DQS_t | DQS_c) failed - if _t and _c failed + // for the same DQS, we count it as one. + bad_dqs = bit_count((reg & 0x5500) | ((reg & 0xaa00) >> 1)) + if x8 && bad_dqs > 0: DIMM is FUBAR, return error + total_bad_nibbles += bad_dqs + // If we are already past max possible number, we might as well return now + if total_bad_nibbles > 1: DIMM is FUBAR, return error + */ + const uint64_t rp_mul = 0x0000010000000000; + reg = dp_mca_read(chip, id, dp, mca_i, + DDRPHY_DP16_DQS_BIT_DISABLE_RP0_P0_0 + rp * rp_mul); + + /* One bad DQS on x8 is already bad 2 nibbles, can't recover from that. */ + if (reg != 0 && width == WIDTH_x8) + return 0; + + if (reg & (PPC_BIT(48) | PPC_BIT(49))) // quad 0 + nibbles_mask &= 0x0FFF; + if (reg & (PPC_BIT(50) | PPC_BIT(51))) // quad 1 + nibbles_mask &= 0xF0FF; + if (reg & (PPC_BIT(52) | PPC_BIT(53))) // quad 2 + nibbles_mask &= 0xFF0F; + if (reg & (PPC_BIT(54) | PPC_BIT(55))) // quad 3 + nibbles_mask &= 0xFFF0; + + bad_nibbles += __builtin_popcount((reg & 0x5500) | ((reg & 0xAA00) >> 1)); + + /* + IOM0.DDRPHY_DP16_DQ_BIT_DISABLE_RP_P0_{0-4}: + nibble = {[48-51], [52-55], [56-59], [60-63]} + for each nibble: + if bit_count(nibble) > 1: total_bad_nibbles += 1 + if bit_count(nibble) == 1: total_bad_bits += 1 + // We can't have two bad bits, one of them must be treated as bad nibble + if total_bad_bits > 1: total_bad_nibbles += 1, total_bad_bits -= 1 + if total_bad_nibbles > 1: DIMM is FUBAR, return error? + */ + reg = dp_mca_read(chip, id, dp, mca_i, + DDRPHY_DP16_DQ_BIT_DISABLE_RP0_P0_0 + rp * rp_mul); + + /* Exclude nibbles corresponding to a bad DQS, it won't get worse. */ + reg &= nibbles_mask; + + /* Add bits in nibbles */ + reg = ((reg & 0x1111) >> 0) + ((reg & 0x2222) >> 1) + + ((reg & 0x4444) >> 2) + ((reg & 0x8888) >> 3); + + /* + * We only care if there is 0, 1 or more bad bits. Collapse bits [0-2] + * of each nibble into [2], leave [3] unmodified (PPC bit numbering). + */ + reg = ((reg & 0x1111) >> 0) | ((reg & 0x2222) >> 0) | + ((reg & 0x4444) >> 1) | ((reg & 0x8888) >> 2); + + /* Clear bit [3] if [2] is also set. */ + reg = (reg & 0x2222) | ((reg & 0x1111) & ~((reg & 0x2222) >> 1)); + + /* Now [2] is bad nibble, [3] is exactly one bad bit */ + bad_bits += __builtin_popcount(reg & 0x1111); + if (bad_bits > 1) { + bad_nibbles += bad_bits - 1; + bad_bits = 1; + } + bad_nibbles += __builtin_popcount(reg & 0x2222); + + /* No need to test for bad single bits, condition above handles it */ + if (bad_nibbles > 1) + return 0; + } + + /* + * Now, if total_bad_nibbles is less than 2 we know that total_bad_bits is + * also less than 2, and DIMM is good enough for recovery. + */ + printk(BIOS_WARNING, "MCS%d MCA%d DIMM%d has %d bad nibble(s) and %d bad " + "bit(s), but can be recovered\n", mcs_i, mca_i, rp/2, bad_nibbles, + bad_bits); + return 1; +} + +static void fir_unmask(uint8_t chip, int mcs_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + int mca_i; + + /* + * "All mcbist attentions are already special attentions" + MC01.MCBIST.MBA_SCOMFIR.MCBISTFIRACT0 + [1] MCBISTFIRQ_COMMAND_ADDRESS_TIMEOUT = 0 + MC01.MCBIST.MBA_SCOMFIR.MCBISTFIRACT1 + [1] MCBISTFIRQ_COMMAND_ADDRESS_TIMEOUT = 1 + MC01.MCBIST.MBA_SCOMFIR.MCBISTFIRMASK + [1] MCBISTFIRQ_COMMAND_ADDRESS_TIMEOUT = 0 //recoverable_error (0,1,0) + */ + scom_and_or_for_chiplet(chip, id, MCBISTFIRACT0, + ~PPC_BIT(MCBISTFIRQ_COMMAND_ADDRESS_TIMEOUT), + 0); + scom_and_or_for_chiplet(chip, id, MCBISTFIRACT1, + ~PPC_BIT(MCBISTFIRQ_COMMAND_ADDRESS_TIMEOUT), + PPC_BIT(MCBISTFIRQ_COMMAND_ADDRESS_TIMEOUT)); + scom_and_or_for_chiplet(chip, id, MCBISTFIRMASK, + ~PPC_BIT(MCBISTFIRQ_COMMAND_ADDRESS_TIMEOUT), + 0); + + for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { + if (!mem_data[chip].mcs[mcs_i].mca[mca_i].functional) + continue; + + /* + MC01.PORT0.SRQ.MBACALFIR_ACTION0 + [2] MBACALFIR_MASK_REFRESH_OVERRUN = 0 + [5] MBACALFIR_MASK_DDR_CAL_TIMEOUT_ERR = 0 + [7] MBACALFIR_MASK_DDR_CAL_RESET_TIMEOUT = 0 + [9] MBACALFIR_MASK_WRQ_RRQ_HANG_ERR = 0 + [11] MBACALFIR_MASK_ASYNC_IF_ERROR = 0 + [12] MBACALFIR_MASK_CMD_PARITY_ERROR = 0 + [14] MBACALFIR_MASK_RCD_CAL_PARITY_ERROR = 0 + MC01.PORT0.SRQ.MBACALFIR_ACTION1 + [2] MBACALFIR_MASK_REFRESH_OVERRUN = 1 + [5] MBACALFIR_MASK_DDR_CAL_TIMEOUT_ERR = 1 + [7] MBACALFIR_MASK_DDR_CAL_RESET_TIMEOUT = 1 + [9] MBACALFIR_MASK_WRQ_RRQ_HANG_ERR = 1 + [11] MBACALFIR_MASK_ASYNC_IF_ERROR = 0 + [12] MBACALFIR_MASK_CMD_PARITY_ERROR = 0 + [14] MBACALFIR_MASK_RCD_CAL_PARITY_ERROR = 1 + MC01.PORT0.SRQ.MBACALFIR_MASK + [2] MBACALFIR_MASK_REFRESH_OVERRUN = 0 // recoverable_error (0,1,0) + [5] MBACALFIR_MASK_DDR_CAL_TIMEOUT_ERR = 0 // recoverable_error (0,1,0) + [7] MBACALFIR_MASK_DDR_CAL_RESET_TIMEOUT = 0 // recoverable_error (0,1,0) + [9] MBACALFIR_MASK_WRQ_RRQ_HANG_ERR = 0 // recoverable_error (0,1,0) + [11] MBACALFIR_MASK_ASYNC_IF_ERROR = 0 // checkstop (0,0,0) + [12] MBACALFIR_MASK_CMD_PARITY_ERROR = 0 // checkstop (0,0,0) + [14] MBACALFIR_MASK_RCD_CAL_PARITY_ERROR = 0 // recoverable_error (0,1,0) + */ + mca_and_or(chip, id, mca_i, MBACALFIR_ACTION0, + ~(PPC_BIT(MBACALFIR_REFRESH_OVERRUN) | + PPC_BIT(MBACALFIR_DDR_CAL_TIMEOUT_ERR) | + PPC_BIT(MBACALFIR_DDR_CAL_RESET_TIMEOUT) | + PPC_BIT(MBACALFIR_WRQ_RRQ_HANG_ERR) | + PPC_BIT(MBACALFIR_ASYNC_IF_ERROR) | + PPC_BIT(MBACALFIR_CMD_PARITY_ERROR) | + PPC_BIT(MBACALFIR_RCD_CAL_PARITY_ERROR)), + 0); + mca_and_or(chip, id, mca_i, MBACALFIR_ACTION1, + ~(PPC_BIT(MBACALFIR_REFRESH_OVERRUN) | + PPC_BIT(MBACALFIR_DDR_CAL_TIMEOUT_ERR) | + PPC_BIT(MBACALFIR_DDR_CAL_RESET_TIMEOUT) | + PPC_BIT(MBACALFIR_WRQ_RRQ_HANG_ERR) | + PPC_BIT(MBACALFIR_ASYNC_IF_ERROR) | + PPC_BIT(MBACALFIR_CMD_PARITY_ERROR) | + PPC_BIT(MBACALFIR_RCD_CAL_PARITY_ERROR)), + PPC_BIT(MBACALFIR_REFRESH_OVERRUN) | + PPC_BIT(MBACALFIR_DDR_CAL_TIMEOUT_ERR) | + PPC_BIT(MBACALFIR_DDR_CAL_RESET_TIMEOUT) | + PPC_BIT(MBACALFIR_WRQ_RRQ_HANG_ERR) | + PPC_BIT(MBACALFIR_RCD_CAL_PARITY_ERROR)); + mca_and_or(chip, id, mca_i, MBACALFIR_MASK, + ~(PPC_BIT(MBACALFIR_REFRESH_OVERRUN) | + PPC_BIT(MBACALFIR_DDR_CAL_TIMEOUT_ERR) | + PPC_BIT(MBACALFIR_DDR_CAL_RESET_TIMEOUT) | + PPC_BIT(MBACALFIR_WRQ_RRQ_HANG_ERR) | + PPC_BIT(MBACALFIR_ASYNC_IF_ERROR) | + PPC_BIT(MBACALFIR_CMD_PARITY_ERROR) | + PPC_BIT(MBACALFIR_RCD_CAL_PARITY_ERROR)), + 0); + } +} + +static void mss_draminit_training(uint8_t chip) +{ + int mcs_i, mca_i, dimm, rp; + enum rank_selection ranks_present; + + for (mcs_i = 0; mcs_i < MCS_PER_PROC; mcs_i++) { + if (!mem_data[chip].mcs[mcs_i].functional) + continue; + + for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; + + if (!mca->functional) + continue; + + ranks_present = NO_RANKS; + for (dimm = 0; dimm < DIMMS_PER_MCA; dimm++) { + if (!mca->dimm[dimm].present) + continue; + + if (mca->dimm[dimm].mranks == 2) + ranks_present |= DIMM0_ALL_RANKS << (2 * dimm); + else + ranks_present |= DIMM0_RANK0 << (2 * dimm); + + setup_and_execute_zqcal(chip, mcs_i, mca_i, dimm); + } + + /* IOM0.DDRPHY_PC_INIT_CAL_CONFIG0_P0 = 0 */ + mca_and_or(chip, mcs_ids[mcs_i], mca_i, + DDRPHY_PC_INIT_CAL_CONFIG0_P0, + 0, 0); + + /* + * > Disable port fails as it doesn't appear the MC handles initial + * > cal timeouts correctly (cal_length.) BRS, see conversation with + * > Brad Michael + MC01.PORT0.SRQ.MBA_FARB0Q = + [57] MBA_FARB0Q_CFG_PORT_FAIL_DISABLE = 1 + */ + mca_and_or(chip, mcs_ids[mcs_i], mca_i, MBA_FARB0Q, ~0, + PPC_BIT(MBA_FARB0Q_CFG_PORT_FAIL_DISABLE)); + + /* + * > The following registers must be configured to the correct + * > operating environment: + * > These are reset in phy_scominit + * > Section 5.2.5.10 SEQ ODT Write Configuration {0-3} on page 422 + * > Section 5.2.6.1 WC Configuration 0 Register on page 434 + * > Section 5.2.6.2 WC Configuration 1 Register on page 436 + * > Section 5.2.6.3 WC Configuration 2 Register on page 438 + * + * It would be nice to have the documentation mentioned above or at + * least know what it is about... + */ + + clear_initial_cal_errors(chip, mcs_i, mca_i); + dp16_reset_delay_values(chip, mcs_i, mca_i, ranks_present); + dqs_align_turn_on_refresh(chip, mcs_i, mca_i); + + /* + * List of calibration steps for RDIMM, in execution order: + * - ZQ calibration - calibrates DRAM output driver and on-die termination + * values (already done) + * - Write leveling - compensates for skew caused by a fly-by topology + * - Initial pattern write - not exactly a calibration, but prepares patterns + * for next steps + * - DQS align + * - RDCLK align + * - Read centering + * - Write Vref latching - not exactly a calibration, but required for next + * steps; there is no help from PHY for that but it is simple to do + * manually + * - Write centering + * - Coarse write/read + * - Custom read and/or write centering - performed in istep 13.12 + * Some of these steps have pre- or post-workarounds, or both. + * + * All of those steps (except ZQ calibration) are executed for each rank pair + * before going to the next pair. Some of them require that there is no other + * activity on the controller so parallelization may not be possible. + * + * Quick reminder from set_rank_pairs() in 13.8 (RDIMM only): + * - RP0 primary - DIMM 0 rank 0 + * - RP1 primary - DIMM 0 rank 1 + * - RP2 primary - DIMM 1 rank 0 + * - RP3 primary - DIMM 1 rank 1 + */ + for (rp = 0; rp < 4; rp++) { + if (!(ranks_present & (1 << rp))) + continue; + + dump_cal_errors(chip, mcs_i, mca_i); + + for (int i = 0; i < ARRAY_SIZE(steps); i++) + dispatch_step(chip, &steps[i], mcs_i, mca_i, rp, + ranks_present); + + if (process_initial_cal_errors(chip, mcs_i, mca_i) && + !can_recover(chip, mcs_i, mca_i, rp)) { + die("Calibration failed for MCS%d MCA%d DIMM%d\n", mcs_i, mca_i, rp/2); + } + } + + /* Does not apply to DD2.* */ + //workarounds::dp16::modify_calibration_results(); + } + + /* + * Hostboot just logs the errors reported earlier (i.e. more than + * 1 nibble + 1 bit of bad DQ lines) "and lets PRD deconfigure based off + * of ATTR_BAD_DQ_BITMAP". + * TODO: what is PRD? How does it "deconfigure" and what? Quick glance + * at the code: it may have something to do with undocumented 0x0501082X + * SCOM registers, there are usr/diag/prdf/*//*.rule files with + * yacc/flex files to compile them. It also may be using 'attn' + * instruction. + */ + + fir_unmask(chip, mcs_i); + } +} + +void istep_13_11(uint8_t chips) +{ + uint8_t chip; + + report_istep(13, 11); + + for (chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) + mss_draminit_training(chip); + } +} diff --git a/src/soc/ibm/power9/istep_13_13.c b/src/soc/ibm/power9/istep_13_13.c new file mode 100644 index 00000000000..80e041338bb --- /dev/null +++ b/src/soc/ibm/power9/istep_13_13.c @@ -0,0 +1,728 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include + +#include "istep_13_scom.h" + +/* + * 13.13 mss_draminit_mc: Hand off control to MC + * + * a) p9_mss_draminit_mc.C (mcbist) - Nimbus + * b) p9c_mss_draminit_mc.C (membuf) -Cumulus + * - P9 Cumulus -- Set IML complete bit in centaur + * - Start main refresh engine + * - Refresh, periodic calibration, power controls + * - Turn on ECC checking on memory accesses + * - Note at this point memory FIRs can be monitored by PRD + */ + +/* + * Set up the MC port <-> DIMM address translation registers. + * + * These are not documented in specs, everything described here comes from the + * code (and comments). Depending on how you count them, there are 2 or 3 base + * configurations, and the rest is a modification of one of the bases or its + * derived forms. Each level usually adds one row bit, but sometimes it removes + * it or modifies rank bits. In most cases when it happens, the rest of bits + * must be shifted. + * + * There are two pairs of identical settings for each master/slave rank + * configurations: 4Gb x4 is always the same as 8Gb x8, and 8Gb x4 is the same + * as 16Gb x8. + * + * Base configurations are: + * - 1 rank, non-3DS 4Gb x4 and second DIMM is also 1R (not necessarily 4Gb x4) + * - special case when the other DIMM is not an 1R device (because of allowed + * DIMM mixing this can only mean tha the other slot is not populated) + * - 2 rank, non-3DS 4Gb x4 + * + * The special case uses different column, bank and bank group addressing, the + * other two cases use identical mapping. This is due to the fact that for one + * 1R DIMM there is no port address bit with index 7, which is used as C9 in + * other cases. Hostboot divides those cases as listed above, but it might make + * more sense to separate special case and use uniform logic for the rest. + * However, for two 1R DIMMs port address 29 is always assigned to D-bit (more + * about it later), because bit map fields for rows use only 3 bit for encoding, + * meaning that only port bits 0-7 can be mapped to row bits 15-17. + * + * According to code, port addresses 0-7 and 22-32 can be configured in the + * register - 19 possibilities total, encoded, so bit field in register is 5 + * bits long, except for row bitmaps, which are only 3 bits long (addresses 0-7 + * only). Column, bank and bank group addressing is always the same (DDR4 always + * has 10 column, 2 bank, 2 bank group bits), difference is in row bit mapping + * (we may or may not have bits 15, 16 and 17, those are indexed from 0 so we + * can have 15-18 row bits in total) and ranks mapping (up to 2 bits for master + * ranks, up to 3 bits for slave ranks in 3DS devices). + * + * TODO: what about 16Gb bank groups? Those should use just 1 bit, but the code + * doesn't change it. + * + * Apart from already mentioned bits there is also D-bit (D is short for DIMM). + * It is used to tell the controller which DIMM has to be accessed. To avoid + * holes in the memory map, larger DIMM must be mapped to lower addresses. For + * example, when we have 4GB and 8GB DIMMs: + * + * 0 4 8 12 16 ... memory space, in GB + * | DIMM X |DIMM Y | hole | ... <- this is good + * |DIMM Y | hole | DIMM X | ... <- this is bad + * + * Whether DIMM X is in DIMM0 or DIMM1 slot doesn't matter. The example is + * simplified - the addresses do not translate directly to CPU address space. + * There are multiple MCAs in the system, they are grouped together later in + * 14.5, based on the mappings calculated in 7.4. + * + * There are two pieces to configure for D-bit: + * - D_BIT_MAP - which address bit is used to decide which DIMM is used (this + * corresponds to 8GB in the example above), this is common to both DIMMs, + * - SLOTn_D_VALUE - what value should the D-bit have to access DIMMn, each DIMM + * has its own SLOTn_D_VALUE, when one DIMM has this bit set, the other one + * must have it cleared; in the (good) example above DIMM Y should have this + * bit set. When both DIMMs have the same size then only one D_VALUE must be + * set, but it doesn't matter which one. + * + * TODO: what if only one DIMM is present? Do we have to set these to something + * sane (0 and 0 should work) or is it enough that VALID bit is clear for the + * other DIMM? + * + * If bits are assigned in a proper order, we can use a constant table with + * mappings and assign values from that table to registers describing address + * bits in a sparse manner, depending on a number of rank and row bits used by + * a given DIMM. The order is based on the cost of changing individual bits on + * the DIMM side (considering data locality): + * 1. Bank group, bank and column bits are sent with every read/write command. + * It takes RL = AL + CL + PL after the read command until first DQ bits + * appear on the bus. In practice we usually don't care about write delays, + * when data is sent to the controller the CPU can already execute further + * code, it doesn't have to wait until it is actually written to DRAM. This + * is a cheap change. + * TODO: this is for DRAM, any additional delay caused by RCD, PHY or MC? + * 2. Ranks (both master and slave) are selected by CS (and Chip ID for slave + * ranks) bits, they are also sent with each command. Depending on MR4 + * settings, we may need to wait for additional tCAL (CS to Command Address + * Latency), DRAM needs some time to "wake up" before it can parse commands. + * If tCAL is not used (default in Hostboot), the cost is the same as for BG, + * BA, column bits. It doesn't matter whether master or slave ranks are + * assigned first, but Hostboot starts with slave ranks - it has 5 bits per + * bit map, so it can encode higher numbers. + * 3. Row bits - these are expensive. Row must be activated before its column + * are accessed. Each bank can have one activated row at a time. If there was + * an open row (different than the one we want to access), it must be + * precharged (it takes tRP before next activation command can be issued), + * and then the new row can be activated (after which we have to wait for + * tRCD before sending the read/write command). A row cannot be opened + * indefinitely, there is both a minimal and maximal period between ACT and + * PRE commands (tRAS), and minimums for read to precharge (tRTP), ACT to ACT + * for different banks (tRRD, with differentiation between the same and + * different bank groups) and Four Activate Window (tFAW). When row changes + * don't happen too often, we usually have to wait for tRCD and sometimes + * also tRP, on top of the previous delays. + * 4. D bit. Two DIMMs on a channel share all of its lines except CLK, CS, ODT + * and CKE bits. Because we don't have to change CS for a given DIMM, the + * cost is the same as 1 (assuming hardware holds CS between commands). + * However, this bit has to be assigned lastly (i.e. it has to be the most + * significant port address bit) to not introduce holes in the memory space + * for two differently sized DIMMs. + * * TODO: can we safely map it closer to LSB (at least before row bits) + * when we have two DIMMs with the same size? + * + * TODO: what about bad DQ bits? Do they impact this in any way? Probably not, + * unless a whole DIMM is disabled. + * + * Below are registers layouts reconstructed from + * import/chips/p9/common/include/p9n2_mc_scom_addresses_fld.H: + * 0x05010820 // P9N2_MCS_PORT02_MCP0XLT0, also PORT13 on +0x10 SCOM addresses + * [0] SLOT0_VALID // set if DIMM present + * [1] SLOT0_D_VALUE // set if both DIMMs present and size of DIMM1 > DIMM0 + * [2] 12GB_ENABLE // unused (maybe for 12Gb/24Gb DRAM?) + * [5] SLOT0_M0_VALID + * [6] SLOT0_M1_VALID + * [9] SLOT0_S0_VALID + * [10] SLOT0_S1_VALID + * [11] SLOT0_S2_VALID + * [12] SLOT0_B2_VALID // Hmmm... + * [13] SLOT0_ROW15_VALID + * [14] SLOT0_ROW16_VALID + * [15] SLOT0_ROW17_VALID + * [16] SLOT1_VALID // set if DIMM present + * [17] SLOT1_D_VALUE // set if both DIMMs present and size of DIMM1 <= DIMM0 + * [21] SLOT1_M0_VALID + * [22] SLOT1_M1_VALID + * [25] SLOT1_S0_VALID + * [26] SLOT1_S1_VALID + * [27] SLOT1_S2_VALID + * [28] SLOT1_B2_VALID // Hmmm... + * [29] SLOT1_ROW15_VALID + * [30] SLOT1_ROW16_VALID + * [31] SLOT1_ROW17_VALID + * [35-39] D_BIT_MAP + * [41-43] M0_BIT_MAP // 3b for M0 but 5b for M1 + * [47-51] M1_BIT_MAP + * [53-55] R17_BIT_MAP + * [57-59] R16_BIT_MAP + * [61-63] R15_BIT_MAP + * + * 0x05010821 // P9N2_MCS_PORT02_MCP0XLT1 + * [3-7] S0_BIT_MAP + * [11-15] S1_BIT_MAP + * [19-23] S2_BIT_MAP + * [35-39] COL4_BIT_MAP + * [43-47] COL5_BIT_MAP + * [51-55] COL6_BIT_MAP + * [59-63] COL7_BIT_MAP + * + * 0x05010822 // P9N2_MCS_PORT02_MCP0XLT2 + * [3-7] COL8_BIT_MAP + * [11-15] COL9_BIT_MAP + * [19-23] BANK0_BIT_MAP + * [27-31] BANK1_BIT_MAP + * [35-39] BANK2_BIT_MAP // Hmmm... + * [43-47] BANK_GROUP0_BIT_MAP + * [51-55] BANK_GROUP1_BIT_MAP + * + * All *_BIT_MAP fields above are encoded. Note that some of them are 3b long, + * those can map only PA 0 through 7. + */ + +static uint64_t dimms_rank_config(mca_data_t *mca, uint64_t xlt0, int update_d_bit) +{ + uint64_t val = 0; + int me; + int max_row_bits = 0; + + for (me = 0; me < DIMMS_PER_MCA; me++) { + if (mca->dimm[me].present) { + int other = me ^ 1; + int height = mca->dimm[me].log_ranks / mca->dimm[me].mranks; + /* + * Note: this depends on width/density having values as encoded in + * SPD and istep_13.h. Please do not change them. + */ + int row_bits = 12 + mca->dimm[me].density - mca->dimm[me].width; + if (row_bits > max_row_bits) + max_row_bits = row_bits; + + val |= PPC_BIT(0 + 16*me); + + /* When mixing rules are followed, bigger density = bigger size */ + if (mca->dimm[other].present && + mca->dimm[other].density > mca->dimm[me].density) + val |= PPC_BIT(1 + 16*me); + + /* M1 is used first, then M0 */ + if (mca->dimm[me].mranks > 1) + val |= PPC_BIT(6 + 16*me); + + if (mca->dimm[me].mranks > 2) + val |= PPC_BIT(5 + 16*me); + + /* Same with S2, S1, S0 */ + if (height > 1) + val |= PPC_BIT(11 + 16*me); + + if (height > 2) + val |= PPC_BIT(10 + 16*me); + + if (height > 4) + val |= PPC_BIT(9 + 16*me); + + /* Row bits */ + if (row_bits > 15) + val |= PPC_BIT(13 + 16*me); + + if (row_bits > 16) + val |= PPC_BIT(14 + 16*me); + + if (row_bits > 17) + val |= PPC_BIT(15 + 16*me); + } + } + + /* When both DIMMs are present and have the same sizes, D_VALUE was not set. */ + if (mca->dimm[0].density == mca->dimm[1].density) + val |= PPC_BIT(1); + + val |= xlt0; + + if (update_d_bit) { + /* + * In order for this to work: + * - old D-bit must have the value it would have for 18 row bits + * - changes happen only in PA0-PA7 range + * - D-bit is always numerically the lowest assigned PA index + * + * These assumptions are always true except for non-3DS 1R DIMMs, but + * those do not set update_d_bit. + */ + uint64_t dbit = xlt0 & PPC_BITMASK(35, 39); + dbit += ((uint64_t)(18 - max_row_bits)) << 39; + val = (val & ~PPC_BITMASK(35, 39)) | dbit; + } + + return val; +} + +enum pa_encoding { + NA = 0, + PA0 = 0, + PA1, + PA2, + PA3, + PA4, + PA5, + PA6, + PA7, + PA22 = 8, // Defined but not used by Hostboot + PA23, + PA24, + PA25, + PA26, + PA27, + PA28, + PA29, + PA30, + PA31, + PA32 // 0b10010 +}; + +/* + * M - master aka package ranks + * H - height (1 for non-3DS devices) + */ +enum mc_rank_config { + /* SDP, DDP, QDP DIMMs */ + M1H1_ONE_DIMM, + M1H1_TWO_DIMMS, + M2H1, + M4H1, + /* TODO: add 3DS DIMMs when needed */ +}; + +#define MCP0XLT0(D, M0, M1, R17, R16, R15) \ +(PPC_PLACE((D), 35, 5) | PPC_PLACE((M0), 41, 3) | PPC_PLACE((M1), 47, 5) | \ + PPC_PLACE((R17), 53, 3) | PPC_PLACE((R16), 57, 3) | PPC_PLACE((R15), 61, 3)) + +#define MCP0XLT1(S0, S1, S2, COL4, COL5, COL6, COL7) \ +(PPC_PLACE((S0), 3, 5) | PPC_PLACE((S1), 11, 5) | PPC_PLACE((S2), 19, 5) | \ + PPC_PLACE((COL4), 35, 5) | PPC_PLACE((COL5), 43, 5) | PPC_PLACE((COL6), 51, 5) | \ + PPC_PLACE((COL7), 59, 5)) + +#define MCP0XLT2(COL8, COL9, BA0, BA1, BG0, BG1) \ +(PPC_PLACE((COL8), 3, 5) | PPC_PLACE((COL9), 11, 5) | PPC_PLACE((BA0), 19, 5) | \ + PPC_PLACE((BA1), 27, 5) | PPC_PLACE((BG0), 43, 5) | PPC_PLACE((BG1), 51, 5)) + +/* + * xlt_tables[rank_configuration][reg_index] + * + * rank_configuration: see enum above + * reg_index: MCP0XLT0, MCP0XLT1, MCP0XLT2 + * + * Width and density do not matter directly, only through number of row bits. + * Different widths cannot be mixed on the same port, but densities can, and + * consequently row bits can, too. Assume that all bitmaps can be configured, + * as long as 'valid' bits are set properly. + * + * For anything else than 1R non-3DS devices D-bit is patched by code. Initial + * value in tables below is PA that would be assigned for DRAM with 18 row bits. + * When two DIMMs with different densities are installed in one port, use number + * of row bits of a bigger DIMM. + */ +static const uint64_t xlt_tables[][3] = { + /* 1R, one DIMM under port */ + { + MCP0XLT0(NA, NA, NA, PA5, PA6, PA7), + MCP0XLT1(NA, NA, NA, PA28, PA27, PA26, PA25), + MCP0XLT2(PA24, PA23, PA29, PA30, PA31, PA32), + }, + /* 1R, both DIMMs under port */ + { + MCP0XLT0(PA29, NA, NA, PA4, PA5, PA6), + MCP0XLT1(NA, NA, NA, PA28, PA27, PA26, PA25), + MCP0XLT2(PA24, PA23, PA7, PA30, PA31, PA32), + }, + /* 2R */ + { + MCP0XLT0(PA3, NA, PA29, PA4, PA5, PA6), + MCP0XLT1(NA, NA, NA, PA28, PA27, PA26, PA25), + MCP0XLT2(PA24, PA23, PA7, PA30, PA31, PA32), + }, + /* 4R */ + { + MCP0XLT0(PA2, PA6, PA29, PA3, PA4, PA5), + MCP0XLT1(NA, NA, NA, PA28, PA27, PA26, PA25), + MCP0XLT2(PA24, PA23, PA7, PA30, PA31, PA32), + }, + /* TODO: 3DS */ +}; + +static void setup_xlate_map(uint8_t chip, int mcs_i, int mca_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; + const int mca_mul = 0x10; + /* + * Mixing rules: + * - rank configurations are the same for both DIMMs + * - fields for unpopulated DIMMs are initialized to all 0 + * + * With those two assumptions values can be logically ORed to produce a + * common value without conditionals. + * + * Note: mixing rules do not specify explicitly if two 3DS of different + * heights can be mixed. In that case log_ranks/mranks could have bad value, + * but it would still be different than 1. + */ + int n_dimms = (mca->dimm[0].present && mca->dimm[1].present) ? 2 : 1; + int mranks = mca->dimm[0].mranks | mca->dimm[1].mranks; + int log_ranks = mca->dimm[0].log_ranks | mca->dimm[1].log_ranks; + int is_3DS = (log_ranks / mranks) != 1; + int update_d = log_ranks != 1; // Logically the same as '(mranks != 1) | is_3DS' + chiplet_id_t nest = mcs_to_nest[id]; + enum mc_rank_config cfg = M1H1_ONE_DIMM; + + if (is_3DS) { + die("3DS DIMMs not yet supported\n"); + } else { + switch (mranks) { + case 1: + /* One DIMM is default */ + if (n_dimms == 2) + cfg = M1H1_TWO_DIMMS; + break; + case 2: + cfg = M2H1; + break; + case 4: + cfg = M4H1; + break; + default: + /* Should be impossible to reach */ + die("Bad number of package ranks: %d\n", mranks); + break; + } + } + + /* MCS_PORT02_MCP0XLT0 (?) */ + write_scom_for_chiplet(chip, nest, 0x05010820 + mca_i * mca_mul, + dimms_rank_config(mca, xlt_tables[cfg][0], update_d)); + + /* MCS_PORT02_MCP0XLT1 (?) */ + write_scom_for_chiplet(chip, nest, 0x05010821 + mca_i * mca_mul, + xlt_tables[cfg][1]); + + /* MCS_PORT02_MCP0XLT2 (?) */ + write_scom_for_chiplet(chip, nest, 0x05010822 + mca_i * mca_mul, + xlt_tables[cfg][2]); +} + +static void enable_pm(uint8_t chip, int mcs_i, int mca_i) +{ + const int ATTR_MSS_MRW_POWER_CONTROL_REQUESTED = 0; + /* + * Enable Power management based off of mrw_power_control_requested + * "Before enabling power controls, run the parity disable workaround" + * This is a loop over MCAs inside a loop over MCAs. Is this necessary? + * for each functional MCA + * // > The workaround is needed iff + * // > 1) greater than or equal to DD2 + * // > 2) self time refresh is enabled + * // > 3) the DIMM's are not TSV // TSV = 3DS + * // > 4) a 4R DIMM is present + * // TODO: skip for now, we do not have any 4R, non-3DS sticks to test it + * str_non_tsv_parity() + */ + + /* MC01.PORT0.SRQ.PC.MBARPC0Q + if ATTR_MSS_MRW_POWER_CONTROL_REQUESTED == // default 0 == off + ENUM_ATTR_MSS_MRW_IDLE_POWER_CONTROL_REQUESTED_POWER_DOWN || // 1 + ENUM_ATTR_MSS_MRW_IDLE_POWER_CONTROL_REQUESTED_PD_AND_STR_CLK || // 2 + ENUM_ATTR_MSS_MRW_IDLE_POWER_CONTROL_REQUESTED_PD_AND_STR_CLK_STOP: // 3 + [2] MBARPC0Q_CFG_MIN_MAX_DOMAINS_ENABLE = 1 + */ + if (ATTR_MSS_MRW_POWER_CONTROL_REQUESTED) + mca_and_or(chip, mcs_ids[mcs_i], mca_i, MBARPC0Q, ~0, + PPC_BIT(MBARPC0Q_CFG_MIN_MAX_DOMAINS_ENABLE)); +} + +static void apply_mark_store(uint8_t chip, int mcs_i, int mca_i) +{ + /* + * FIXME: where do the values written to MVPD come from? They are all 0s in + * SCOM dump, which makes this function no-op. + */ + const uint64_t ATTR_MSS_MVPD_FWMS[8] = {0}; + int i; + + for (i = 0; i < ARRAY_SIZE(ATTR_MSS_MVPD_FWMS); i++) { + if (ATTR_MSS_MVPD_FWMS[i] == 0) + continue; + + /* MC01.PORT0.ECC64.SCOM.FWMS{0-7} + [all] 0 + [0-22] from ATTR_MSS_MVPD_FWMS + */ + mca_and_or(chip, mcs_ids[mcs_i], mca_i, FWMS0 + i, + 0, ATTR_MSS_MVPD_FWMS[i]); + } +} + +static void fir_unmask(uint8_t chip, int mcs_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + int mca_i; + + /* + * "All mcbist attentions are already special attentions" + * + * These include broadcast_out_of_sync() workaround. + MC01.MCBIST.MBA_SCOMFIR.MCBISTFIRACT0 + [3] MCBISTFIRQ_MCBIST_BRODCAST_OUT_OF_SYNC = 0 + [10] MCBISTFIRQ_MCBIST_PROGRAM_COMPLETE = 1 + MC01.MCBIST.MBA_SCOMFIR.MCBISTFIRACT1 + [3] MCBISTFIRQ_MCBIST_BRODCAST_OUT_OF_SYNC = 1 + [10] MCBISTFIRQ_MCBIST_PROGRAM_COMPLETE = 0 + MC01.MCBIST.MBA_SCOMFIR.MCBISTFIRMASK + [3] MCBISTFIRQ_MCBIST_BRODCAST_OUT_OF_SYNC = 0 // recoverable_error (0,1,0) + [10] MCBISTFIRQ_MCBIST_PROGRAM_COMPLETE = 0 // attention (1,0,0) + */ + /* + * TODO: check if this works with bootblock in SEEPROM too. We don't have + * interrupt handlers set up in that case. + */ + scom_and_or_for_chiplet(chip, id, MCBISTFIRACT0, + ~(PPC_BIT(MCBISTFIRQ_MCBIST_BRODCAST_OUT_OF_SYNC) | + PPC_BIT(MCBISTFIRQ_MCBIST_PROGRAM_COMPLETE)), + PPC_BIT(MCBISTFIRQ_MCBIST_PROGRAM_COMPLETE)); + scom_and_or_for_chiplet(chip, id, MCBISTFIRACT1, + ~(PPC_BIT(MCBISTFIRQ_MCBIST_BRODCAST_OUT_OF_SYNC) | + PPC_BIT(MCBISTFIRQ_MCBIST_PROGRAM_COMPLETE)), + PPC_BIT(MCBISTFIRQ_MCBIST_BRODCAST_OUT_OF_SYNC)); + scom_and_or_for_chiplet(chip, id, MCBISTFIRMASK, + ~(PPC_BIT(MCBISTFIRQ_MCBIST_BRODCAST_OUT_OF_SYNC) | + PPC_BIT(MCBISTFIRQ_MCBIST_PROGRAM_COMPLETE)), + 0); + + for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { + if (!mem_data[chip].mcs[mcs_i].mca[mca_i].functional) + continue; + + /* From broadcast_out_of_sync() workaround: + MC01.PORT0.ECC64.SCOM.RECR + [26] MBSECCQ_ENABLE_UE_NOISE_WINDOW = 0 + */ + mca_and_or(chip, id, mca_i, RECR, ~PPC_BIT(MBSECCQ_ENABLE_UE_NOISE_WINDOW), 0); + + /* + MC01.PORT0.ECC64.SCOM.ACTION0 + [33] FIR_MAINTENANCE_AUE = 0 + [36] FIR_MAINTENANCE_IAUE = 0 + [41] FIR_SCOM_PARITY_CLASS_STATUS = 0 + [42] FIR_SCOM_PARITY_CLASS_RECOVERABLE = 0 + [43] FIR_SCOM_PARITY_CLASS_UNRECOVERABLE = 0 + [44] FIR_ECC_CORRECTOR_INTERNAL_PARITY_ERROR = 0 + [45] FIR_WRITE_RMW_CE = 0 + [46] FIR_WRITE_RMW_UE = 0 + [48] FIR_WDF_OVERRUN_ERROR_0 = 0 + [49] FIR_WDF_OVERRUN_ERROR_1 = 0 + [50] FIR_WDF_SCOM_SEQUENCE_ERROR = 0 + [51] FIR_WDF_STATE_MACHINE_ERROR = 0 + [52] FIR_WDF_MISC_REGISTER_PARITY_ERROR = 0 + [53] FIR_WRT_SCOM_SEQUENCE_ERROR = 0 + [54] FIR_WRT_MISC_REGISTER_PARITY_ERROR = 0 + [55] FIR_ECC_GENERATOR_INTERNAL_PARITY_ERROR = 0 + [56] FIR_READ_BUFFER_OVERFLOW_ERROR = 0 + [57] FIR_WDF_ASYNC_INTERFACE_ERROR = 0 + [58] FIR_READ_ASYNC_INTERFACE_PARITY_ERROR = 0 + [59] FIR_READ_ASYNC_INTERFACE_SEQUENCE_ERROR = 0 + MC01.PORT0.ECC64.SCOM.ACTION1 + [33] FIR_MAINTENANCE_AUE = 1 + [36] FIR_MAINTENANCE_IAUE = 1 + [41] FIR_SCOM_PARITY_CLASS_STATUS = 1 + [42] FIR_SCOM_PARITY_CLASS_RECOVERABLE = 1 + [43] FIR_SCOM_PARITY_CLASS_UNRECOVERABLE = 0 + [44] FIR_ECC_CORRECTOR_INTERNAL_PARITY_ERROR = 0 + [45] FIR_WRITE_RMW_CE = 1 + [46] FIR_WRITE_RMW_UE = 0 + [48] FIR_WDF_OVERRUN_ERROR_0 = 0 + [49] FIR_WDF_OVERRUN_ERROR_1 = 0 + [50] FIR_WDF_SCOM_SEQUENCE_ERROR = 0 + [51] FIR_WDF_STATE_MACHINE_ERROR = 0 + [52] FIR_WDF_MISC_REGISTER_PARITY_ERROR = 0 + [53] FIR_WRT_SCOM_SEQUENCE_ERROR = 0 + [54] FIR_WRT_MISC_REGISTER_PARITY_ERROR = 0 + [55] FIR_ECC_GENERATOR_INTERNAL_PARITY_ERROR = 0 + [56] FIR_READ_BUFFER_OVERFLOW_ERROR = 0 + [57] FIR_WDF_ASYNC_INTERFACE_ERROR = 0 + [58] FIR_READ_ASYNC_INTERFACE_PARITY_ERROR = 0 + [59] FIR_READ_ASYNC_INTERFACE_SEQUENCE_ERROR = 0 + MC01.PORT0.ECC64.SCOM.MASK + [33] FIR_MAINTENANCE_AUE = 0 // recoverable_error (0,1,0) + [36] FIR_MAINTENANCE_IAUE = 0 // recoverable_error (0,1,0) + [41] FIR_SCOM_PARITY_CLASS_STATUS = 0 // recoverable_error (0,1,0) + [42] FIR_SCOM_PARITY_CLASS_RECOVERABLE = 0 // recoverable_error (0,1,0) + [43] FIR_SCOM_PARITY_CLASS_UNRECOVERABLE = 0 // checkstop (0,0,0) + [44] FIR_ECC_CORRECTOR_INTERNAL_PARITY_ERROR = 0 // checkstop (0,0,0) + [45] FIR_WRITE_RMW_CE = 0 // recoverable_error (0,1,0) + [46] FIR_WRITE_RMW_UE = 0 // checkstop (0,0,0) + [48] FIR_WDF_OVERRUN_ERROR_0 = 0 // checkstop (0,0,0) + [49] FIR_WDF_OVERRUN_ERROR_1 = 0 // checkstop (0,0,0) + [50] FIR_WDF_SCOM_SEQUENCE_ERROR = 0 // checkstop (0,0,0) + [51] FIR_WDF_STATE_MACHINE_ERROR = 0 // checkstop (0,0,0) + [52] FIR_WDF_MISC_REGISTER_PARITY_ERROR = 0 // checkstop (0,0,0) + [53] FIR_WRT_SCOM_SEQUENCE_ERROR = 0 // checkstop (0,0,0) + [54] FIR_WRT_MISC_REGISTER_PARITY_ERROR = 0 // checkstop (0,0,0) + [55] FIR_ECC_GENERATOR_INTERNAL_PARITY_ERROR = 0 // checkstop (0,0,0) + [56] FIR_READ_BUFFER_OVERFLOW_ERROR = 0 // checkstop (0,0,0) + [57] FIR_WDF_ASYNC_INTERFACE_ERROR = 0 // checkstop (0,0,0) + [58] FIR_READ_ASYNC_INTERFACE_PARITY_ERROR = 0 // checkstop (0,0,0) + [59] FIR_READ_ASYNC_INTERFACE_SEQUENCE_ERROR = 0 // checkstop (0,0,0) + */ + mca_and_or(chip, id, mca_i, ECC_FIR_ACTION0, + ~(PPC_BIT(ECC_FIR_MAINTENANCE_AUE) | + PPC_BIT(ECC_FIR_MAINTENANCE_IAUE) | + PPC_BITMASK(41, 46) | PPC_BITMASK(48, 59)), + 0); + + mca_and_or(chip, id, mca_i, ECC_FIR_ACTION1, + ~(PPC_BIT(ECC_FIR_MAINTENANCE_AUE) | + PPC_BIT(ECC_FIR_MAINTENANCE_IAUE) | + PPC_BITMASK(41, 46) | PPC_BITMASK(48, 59)), + PPC_BIT(ECC_FIR_MAINTENANCE_AUE) | + PPC_BIT(ECC_FIR_MAINTENANCE_IAUE) | + PPC_BIT(ECC_FIR_SCOM_PARITY_CLASS_STATUS) | + PPC_BIT(ECC_FIR_SCOM_PARITY_CLASS_RECOVERABLE) | + PPC_BIT(ECC_FIR_WRITE_RMW_CE)); + + mca_and_or(chip, id, mca_i, ECC_FIR_MASK, + ~(PPC_BIT(ECC_FIR_MAINTENANCE_AUE) | + PPC_BIT(ECC_FIR_MAINTENANCE_IAUE) | + PPC_BITMASK(41, 46) | PPC_BITMASK(48, 59)), + 0); + } +} + +static void mss_draminit_mc(uint8_t chip) +{ + int mcs_i, mca_i; + + for (mcs_i = 0; mcs_i < MCS_PER_PROC; mcs_i++) { + /* No need to initialize a non-functional MCS */ + if (!mem_data[chip].mcs[mcs_i].functional) + continue; + + for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { + chiplet_id_t id = mcs_ids[mcs_i]; + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; + + if (!mca->functional) + continue; + + setup_xlate_map(chip, mcs_i, mca_i); + + /* Set up read pointer delay */ + /* MC01.PORT0.ECC64.SCOM.RECR + [6-8] MBSECCQ_READ_POINTER_DELAY = 1 // code sets this to "ON", but this field is numerical value + // Not sure where this attr comes from or what is its default value. Assume !0 = 1 -> TCE correction enabled + [27] MBSECCQ_ENABLE_TCE_CORRECTION = !ATTR_MNFG_FLAGS.MNFG_REPAIRS_DISABLED_ATTR + */ + mca_and_or(chip, id, mca_i, RECR, + ~(PPC_BITMASK(6, 8) | PPC_BIT(MBSECCQ_ENABLE_TCE_CORRECTION)), + PPC_PLACE(1, MBSECCQ_READ_POINTER_DELAY, MBSECCQ_READ_POINTER_DELAY_LEN) | + PPC_BIT(MBSECCQ_ENABLE_TCE_CORRECTION)); + + enable_pm(chip, mcs_i, mca_i); + + /* + * This was already done after draminit_cke_helper, search for "Per + * conversation with Shelton and Steve..." in 13.10, "however that + * might be a work-around so we set it low here kind of like + * belt-and-suspenders. BRS" + * + * MC01.PORT0.SRQ.MBA_FARB5Q + * [5] MBA_FARB5Q_CFG_CCS_ADDR_MUX_SEL = 0 + */ + mca_and_or(chip, id, mca_i, MBA_FARB5Q, + ~PPC_BIT(MBA_FARB5Q_CFG_CCS_ADDR_MUX_SEL), 0); + + /* MC01.PORT0.SRQ.MBA_FARB0Q + [57] MBA_FARB0Q_CFG_PORT_FAIL_DISABLE = 0 + */ + mca_and_or(chip, id, mca_i, MBA_FARB0Q, + ~PPC_BIT(MBA_FARB0Q_CFG_PORT_FAIL_DISABLE), 0); + + /* + * "MC work around for OE bug (seen in periodics + PHY) + * Turn on output-enable always on. Shelton tells me they'll fix + * for DD2" + * + * This is also surrounded by '#ifndef REMOVE_FOR_DD2', but this + * name is nowhere else to be found. If this still have to be used, + * we may as well merge it with the previous write. + * + * MC01.PORT0.SRQ.MBA_FARB0Q + * [55] MBA_FARB0Q_CFG_OE_ALWAYS_ON = 1 + */ + mca_and_or(chip, id, mca_i, MBA_FARB0Q, ~0, + PPC_BIT(MBA_FARB0Q_CFG_OE_ALWAYS_ON)); + + /* MC01.PORT0.SRQ.PC.MBAREF0Q + [0] MBAREF0Q_CFG_REFRESH_ENABLE = 1 + */ + mca_and_or(chip, id, mca_i, MBAREF0Q, + ~0, PPC_BIT(MBAREF0Q_CFG_REFRESH_ENABLE)); + + /* Enable periodic calibration */ + /* + * A large chunk of function enable_periodic_cal() in Hostboot is + * disabled, protected by #ifdef TODO_166433_PERIODICS, which also + * isn't mentioned anywhere else. This is what is left: + MC01.PORT0.SRQ.MBA_CAL3Q + [all] 0 + [0-1] MBA_CAL3Q_CFG_INTERNAL_ZQ_TB = 0x3 + [2-9] MBA_CAL3Q_CFG_INTERNAL_ZQ_LENGTH = 0xff + [10-11] MBA_CAL3Q_CFG_EXTERNAL_ZQ_TB = 0x3 + [12-19] MBA_CAL3Q_CFG_EXTERNAL_ZQ_LENGTH = 0xff + [20-21] MBA_CAL3Q_CFG_RDCLK_SYSCLK_TB = 0x3 + [22-29] MBA_CAL3Q_CFG_RDCLK_SYSCLK_LENGTH = 0xff + [30-31] MBA_CAL3Q_CFG_DQS_ALIGNMENT_TB = 0x3 + [32-39] MBA_CAL3Q_CFG_DQS_ALIGNMENT_LENGTH = 0xff + [40-41] MBA_CAL3Q_CFG_MPR_READEYE_TB = 0x3 + [42-49] MBA_CAL3Q_CFG_MPR_READEYE_LENGTH = 0xff + [50-51] MBA_CAL3Q_CFG_ALL_PERIODIC_TB = 0x3 + [52-59] MBA_CAL3Q_CFG_ALL_PERIODIC_LENGTH = 0xff + // Or simpler: 0xfffffffffffffff0 + */ + mca_and_or(chip, id, mca_i, MBA_CAL3Q, 0, PPC_BITMASK(0, 59)); + + /* Enable read ECC + MC01.PORT0.ECC64.SCOM.RECR // 0x07010A0A + [0] MBSECCQ_DISABLE_MEMORY_ECC_CHECK_CORRECT = 0 + [1] MBSECCQ_DISABLE_MEMORY_ECC_CORRECT = 0 + [29] MBSECCQ_USE_ADDRESS_HASH = 1 + // Docs don't describe the encoding, code suggests this inverts data, toggles checks + [30-31] MBSECCQ_DATA_INVERSION = 3 + */ + mca_and_or(chip, id, mca_i, RECR, + ~(PPC_BITMASK(0, 1) | PPC_BITMASK(29, 31)), + PPC_BIT(MBSECCQ_USE_ADDRESS_HASH) | + PPC_PLACE(3, MBSECCQ_DATA_INVERSION, MBSECCQ_DATA_INVERSION_LEN)); + + apply_mark_store(chip, mcs_i, mca_i); + } + + fir_unmask(chip, mcs_i); + } +} + +void istep_13_13(uint8_t chips) +{ + uint8_t chip; + + report_istep(13, 13); + + for (chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) + mss_draminit_mc(chip); + } +} diff --git a/src/soc/ibm/power9/istep_13_2.c b/src/soc/ibm/power9/istep_13_2.c new file mode 100644 index 00000000000..ccccf13f95f --- /dev/null +++ b/src/soc/ibm/power9/istep_13_2.c @@ -0,0 +1,183 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include +#include + +#include "istep_13_scom.h" + +/* + * 13.2 mem_pll_reset: Reset PLL for MCAs in async + * + * a) p9_mem_pll_reset.C (proc chip) + * - This step is a no-op on cumulus as the centaur is already has its PLLs + * setup in step 11 + * - This step is a no-op if memory is running in synchronous mode since the + * MCAs are using the nest PLL, HWP detect and exits + * - If in async mode then this HWP will put the PLL into bypass, reset mode + * - Disable listen_to_sync for MEM chiplet, whenever MEM is not in sync to + * NEST + */ + +static void mem_pll_reset(uint8_t chip) +{ + int i; + long time_elapsed = 0; + + for (i = 0; i < MCS_PER_PROC; i++) { + // Assert endpoint reset + /* + TP.TPCHIP.NET.PCBSLMC01.NET_CTRL0 (WOR) + [all] 0 + [1] PCB_EP_RESET = 1 + */ + write_scom_for_chiplet(chip, mcs_ids[i], PCBSLMC01_NET_CTRL0_WOR, + PPC_BIT(PCBSLMC01_NET_CTRL0_PCB_EP_RESET)); + + // Mask PLL unlock error in PCB slave + /* + TP.TPCHIP.NET.PCBSLMC01.SLAVE_CONFIG_REG + [12] (part of) ERROR_MASK = 1 + */ + scom_or_for_chiplet(chip, mcs_ids[i], PCBSLMC01_SLAVE_CONFIG_REG, PPC_BIT(12)); + + // Move MC PLL into reset state (3 separate writes, no delays between them) + /* + TP.TPCHIP.NET.PCBSLMC01.NET_CTRL0 (WOR) + [all] 0 + [5] PLL_BYPASS = 1 + TP.TPCHIP.NET.PCBSLMC01.NET_CTRL0 (WOR) + [all] 0 + [4] PLL_RESET = 1 + TP.TPCHIP.NET.PCBSLMC01.NET_CTRL0 (WOR) + [all] 0 + [3] PLL_TEST_EN = 1 + */ + write_scom_for_chiplet(chip, mcs_ids[i], PCBSLMC01_NET_CTRL0_WOR, + PPC_BIT(PCBSLMC01_NET_CTRL0_PLL_BYPASS)); + write_scom_for_chiplet(chip, mcs_ids[i], PCBSLMC01_NET_CTRL0_WOR, + PPC_BIT(PCBSLMC01_NET_CTRL0_PLL_RESET)); + write_scom_for_chiplet(chip, mcs_ids[i], PCBSLMC01_NET_CTRL0_WOR, + PPC_BIT(PCBSLMC01_NET_CTRL0_PLL_TEST_EN)); + + // Assert MEM PLDY and DCC bypass + /* + TP.TPCHIP.NET.PCBSLMC01.NET_CTRL1 (WOR) + [all] 0 + [1] CLK_DCC_BYPASS_EN = 1 + [2] CLK_PDLY_BYPASS_EN = 1 + */ + write_scom_for_chiplet(chip, mcs_ids[i], PCBSLMC01_NET_CTRL1_WOR, + PPC_BIT(PCBSLMC01_NET_CTRL1_CLK_DCC_BYPASS_EN) | + PPC_BIT(PCBSLMC01_NET_CTRL1_CLK_PDLY_BYPASS_EN)); + + // Drop endpoint reset + /* + TP.TPCHIP.NET.PCBSLMC01.NET_CTRL0 (WAND) + [all] 1 + [1] PCB_EP_RESET = 0 + */ + write_scom_for_chiplet(chip, mcs_ids[i], PCBSLMC01_NET_CTRL0_WAND, + ~PPC_BIT(PCBSLMC01_NET_CTRL0_PCB_EP_RESET)); + + // Disable listen to sync pulse to MC chiplet, when MEM is not in sync to nest + /* + TP.TCMC01.MCSLOW.SYNC_CONFIG + [4] LISTEN_TO_SYNC_PULSE_DIS = 1 + */ + scom_or_for_chiplet(chip, mcs_ids[i], MCSLOW_SYNC_CONFIG, + PPC_BIT(MCSLOW_SYNC_CONFIG_LISTEN_TO_SYNC_PULSE_DIS)); + + // Initialize OPCG_ALIGN register + /* + TP.TCMC01.MCSLOW.OPCG_ALIGN + [all] 0 + [0-3] INOP_ALIGN = 5 // 8:1 + [12-19] INOP_WAIT = 0 + [47-51] SCAN_RATIO = 0 // 1:1 + [52-63] OPCG_WAIT_CYCLES = 0x20 + */ + write_scom_for_chiplet(chip, mcs_ids[i], MCSLOW_OPCG_ALIGN, + PPC_PLACE(5, MCSLOW_OPCG_ALIGN_INOP_ALIGN, + MCSLOW_OPCG_ALIGN_INOP_ALIGN_LEN) | + PPC_PLACE(0x20, MCSLOW_OPCG_ALIGN_OPCG_WAIT_CYCLES, + MCSLOW_OPCG_ALIGN_OPCG_WAIT_CYCLES_LEN)); + + // scan0 flush PLL boundary ring + /* + TP.TCMC01.MCSLOW.CLK_REGION + [all] 0 + [14] CLOCK_REGION_UNIT10 = 1 + [48] SEL_THOLD_SL = 1 + [49] SEL_THOLD_NSL = 1 + [50] SEL_THOLD_ARY = 1 + TP.TCMC01.MCSLOW.SCAN_REGION_TYPE + [all] 0 + [14] SCAN_REGION_UNIT10 = 1 + [56] SCAN_TYPE_BNDY = 1 + TP.TCMC01.MCSLOW.OPCG_REG0 + [0] RUNN_MODE = 0 + // Separate write, but don't have to read again + TP.TCMC01.MCSLOW.OPCG_REG0 + [2] RUN_SCAN0 = 1 + */ + write_scom_for_chiplet(chip, mcs_ids[i], MCSLOW_CLK_REGION, + PPC_BIT(MCSLOW_CLK_REGION_CLOCK_REGION_UNIT10) | + PPC_BIT(MCSLOW_CLK_REGION_SEL_THOLD_SL) | + PPC_BIT(MCSLOW_CLK_REGION_SEL_THOLD_NSL) | + PPC_BIT(MCSLOW_CLK_REGION_SEL_THOLD_ARY)); + write_scom_for_chiplet(chip, mcs_ids[i], MCSLOW_SCAN_REGION_TYPE, + PPC_BIT(MCSLOW_SCAN_REGION_TYPE_SCAN_REGION_UNIT10) | + PPC_BIT(MCSLOW_SCAN_REGION_TYPE_SCAN_TYPE_BNDY)); + scom_and_for_chiplet(chip, mcs_ids[i], MCSLOW_OPCG_REG0, + ~PPC_BIT(MCSLOW_OPCG_RUNN_MODE)); + scom_or_for_chiplet(chip, mcs_ids[i], MCSLOW_OPCG_REG0, + PPC_BIT(MCSLOW_OPCG_RUN_SCAN0)); + } + + /* Separate loop so we won't have to wait for timeout twice */ + for (i = 0; i < MCS_PER_PROC; i++) { + /* FIXME: previous one didn't skip nonfunctional, should this one? */ + //~ if (!mem_data.mcs[i].functional) + //~ continue; + + /* + timeout(200 * 16us): + TP.TCMC01.MCSLOW.CPLT_STAT0 + if (([8] CC_CTRL_OPCG_DONE_DC) == 1) break + delay(16us) + */ + time_elapsed = wait_us(200 * 16 - time_elapsed, + read_scom_for_chiplet(chip, mcs_ids[i], + MCSLOW_CPLT_STAT0) & + PPC_BIT(MCSLOW_CPLT_STAT0_CC_CTRL_OPCG_DONE_DC)); + + if (!time_elapsed) + die("Timed out while waiting for PLL boundary ring flush\n"); + + // Cleanup + /* + TP.TCMC01.MCSLOW.CLK_REGION + [all] 0 + TP.TCMC01.MCSLOW.SCAN_REGION_TYPE + [all] 0 + */ + write_scom_for_chiplet(chip, mcs_ids[i], MCSLOW_CLK_REGION, 0); + write_scom_for_chiplet(chip, mcs_ids[i], MCSLOW_SCAN_REGION_TYPE, 0); + } +} + +void istep_13_2(uint8_t chips) +{ + uint8_t chip; + + report_istep(13, 2); + + /* Assuming MC doesn't run in sync mode with Fabric, otherwise this is no-op */ + + for (chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) + mem_pll_reset(chip); + } +} diff --git a/src/soc/ibm/power9/istep_13_3.c b/src/soc/ibm/power9/istep_13_3.c new file mode 100644 index 00000000000..686023e438a --- /dev/null +++ b/src/soc/ibm/power9/istep_13_3.c @@ -0,0 +1,136 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include +#include + +#include "istep_13_scom.h" + +#define RING_ID_1866 0x6B +#define RING_ID_2133 0x6C +#define RING_ID_2400 0x6D +#define RING_ID_2666 0x6E + +/* + * 13.3 mem_pll_initf: PLL Initfile for MBAs + * + * a) p9_mem_pll_initf.C (proc chip) + * - This step is a no-op on cumulus + * - This step is a no-op if memory is running in synchronous mode since the + * MCAs are using the nest PLL, HWP detect and exits + * - MCA PLL setup + * - Note that Hostboot doesn't support twiddling bits, Looks up which + * "bucket" (ring) to use from attributes set during mss_freq + * - Then request the SBE to scan ringId with setPulse + * - SBE needs to support 5 RS4 images + * - Data is stored as a ring image in the SBE that is frequency specific + * - 5 different frequencies (1866, 2133, 2400, 2667, EXP) + */ + +static void mem_pll_initf(uint8_t chip) +{ + uint64_t ring_id; + int mcs_i; + + /* Assuming MC doesn't run in sync mode with Fabric, otherwise this is no-op */ + + switch (mem_data[chip].speed) { + case 2666: + ring_id = RING_ID_2666; + break; + case 2400: + ring_id = RING_ID_2400; + break; + case 2133: + ring_id = RING_ID_2133; + break; + case 1866: + ring_id = RING_ID_1866; + break; + default: + die("Unsupported memory speed (%d MT/s)\n", mem_data[chip].speed); + } + + /* + * This is the only place where Hostboot does `putRing()` on Nimbus, but + * because Hostboot tries to be as generic as possible, there are many tests + * and safeties in place. We do not have to worry about another threads or + * out of order command/response pair. Just fill a buffer, send it and make + * sure the receiver (SBE) gets it. If you still want to know the details, + * start digging here: https://github.com/open-power/hostboot/blob/master/src/usr/scan/scandd.C#L169 + * + * TODO: this is the only place where `putRing()` is called, but it isn't + * the only place where PSU commands are used (see 16.1-16.2). Consider + * making a function from this. + */ + // TP.TPCHIP.PIB.PSU.PSU_SBE_DOORBELL_REG + if (read_scom(chip, PSU_SBE_DOORBELL_REG) & PPC_BIT(0)) + die("MBOX to SBE busy, this should not happen\n"); + + + for (mcs_i = 0; mcs_i < MCS_PER_PROC; mcs_i++) { + long time; + + if (!mem_data[chip].mcs[mcs_i].functional) + continue; + + /* https://github.com/open-power/hostboot/blob/master/src/include/usr/sbeio/sbe_psudd.H#L418 */ + // TP.TPCHIP.PIB.PSU.PSU_HOST_SBE_MBOX0_REG + /* REQUIRE_RESPONSE, PSU_PUT_RING_FROM_IMAGE_CMD, CMD_CONTROL_PUTRING */ + /* + * TODO: there is also a sequence ID (bits 32-47) which should be unique. It + * has a value of 9 at this point in Hostboot logs, meaning there were + * probably earlier messages to SBE. In that case, we may also need a static + * variable for it, which probably implies wrapping this into a function and + * moving it to separate file. + */ + write_scom(chip, PSU_HOST_SBE_MBOX0_REG, 0x000001000000D301); + + // TP.TPCHIP.PIB.PSU.PSU_HOST_SBE_MBOX0_REG + /* TARGET_TYPE_PERV, chiplet ID = 0x07, ring ID, RING_MODE_SET_PULSE_NSL */ + write_scom(chip, PSU_HOST_SBE_MBOX1_REG, 0x0002000000000004 | + PPC_PLACE(ring_id, 32, 16) | + PPC_PLACE(mcs_ids[mcs_i], 24, 8)); + + // Ring the host->SBE doorbell + // TP.TPCHIP.PIB.PSU.PSU_SBE_DOORBELL_REG_OR + write_scom(chip, PSU_SBE_DOORBELL_REG_WOR, PPC_BIT(0)); + + // Wait for response + /* + * Hostboot registers an interrupt handler in a thread that is demonized. We + * do not want nor need to implement a whole OS just for this purpose, we + * can just busy-wait here, there isn't anything better to do anyway. + * + * The original timeout is 90 seconds, but that seems like eternity. After + * thorough testing we probably should trim it. + */ + // TP.TPCHIP.PIB.PSU.PSU_HOST_DOORBELL_REG + time = wait_ms(90 * MSECS_PER_SEC, + read_scom(chip, PSU_HOST_DOORBELL_REG) & PPC_BIT(0)); + + if (!time) + die("Timed out while waiting for SBE response\n"); + + /* This may depend on the requested frequency, but for current setup in our + * lab this is ~3ms both for coreboot and Hostboot. */ + printk(RAM_DEBUG, "putRing took %ld ms\n", time); + + // Clear SBE->host doorbell + // TP.TPCHIP.PIB.PSU.PSU_HOST_DOORBELL_REG_AND + write_scom(chip, PSU_HOST_DOORBELL_REG_WAND, ~PPC_BIT(0)); + } +} + +void istep_13_3(uint8_t chips) +{ + uint8_t chip; + + report_istep(13, 3); + + for (chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) + mem_pll_initf(chip); + } +} diff --git a/src/soc/ibm/power9/istep_13_4.c b/src/soc/ibm/power9/istep_13_4.c new file mode 100644 index 00000000000..e920e3c62ff --- /dev/null +++ b/src/soc/ibm/power9/istep_13_4.c @@ -0,0 +1,132 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include + +#include "istep_13_scom.h" + +/* + * 13.4 mem_pll_setup: Setup PLL for MBAs + * + * a) p9_mem_pll_setup.C (proc chip) + * - This step is a no-op on cumulus + * - This step is a no-op if memory is running in synchronous mode since the + * MCAs are using the nest PLL, HWP detect and exits + * - MCA PLL setup + * - Moved PLL out of bypass (just DDR) + * - Performs PLL checking + */ + +static void mem_pll_setup(uint8_t chip) +{ + int i; + + for (i = 0; i < MCS_PER_PROC; i++) { + // Drop PLDY bypass of Progdelay logic + /* + TP.TPCHIP.NET.PCBSLMC01.NET_CTRL1 (WAND) + [all] 1 + [2] CLK_PDLY_BYPASS_EN = 0 + */ + write_scom_for_chiplet(chip, mcs_ids[i], PCBSLMC01_NET_CTRL1_WAND, + ~PPC_BIT(PCBSLMC01_NET_CTRL1_CLK_PDLY_BYPASS_EN)); + + // Drop DCC bypass of DCC logic + /* + TP.TPCHIP.NET.PCBSLMC01.NET_CTRL1 (WAND) + [all] 1 + [1] CLK_DCC_BYPASS_EN = 0 + */ + write_scom_for_chiplet(chip, mcs_ids[i], PCBSLMC01_NET_CTRL1_WAND, + ~PPC_BIT(PCBSLMC01_NET_CTRL1_CLK_DCC_BYPASS_EN)); + + // ATTR_NEST_MEM_X_O_PCI_BYPASS is set to 0 in talos.xml. + // > if (ATTR_NEST_MEM_X_O_PCI_BYPASS == 0) + + // Drop PLL test enable + /* + TP.TPCHIP.NET.PCBSLMC01.NET_CTRL0 (WAND) + [all] 1 + [3] PLL_TEST_EN = 0 + */ + write_scom_for_chiplet(chip, mcs_ids[i], PCBSLMC01_NET_CTRL0_WAND, + ~PPC_BIT(PCBSLMC01_NET_CTRL0_PLL_TEST_EN)); + + // Drop PLL reset + /* + TP.TPCHIP.NET.PCBSLMC01.NET_CTRL0 (WAND) + [all] 1 + [4] PLL_RESET = 0 + */ + write_scom_for_chiplet(chip, mcs_ids[i], PCBSLMC01_NET_CTRL0_WAND, + ~PPC_BIT(PCBSLMC01_NET_CTRL0_PLL_RESET)); + + /* + * TODO: This is how Hosboot does it, maybe it would be better to use + * wait_ms and a separate loop to have only one timeout. On the other + * hand, it is possible that MCS will stop responding to SCOM accesses + * after PLL reset so we wouldn't be able to read the status. + */ + mdelay(5); + + // Check PLL lock + /* + TP.TPCHIP.NET.PCBSLMC01.PLL_LOCK_REG + assert([0] (reserved) == 1) + */ + if (!(read_scom_for_chiplet(chip, mcs_ids[i], + PCBSLMC01_PLL_LOCK_REG) & PPC_BIT(0))) + die("MCS%d PLL not locked\n", i); + + // Drop PLL Bypass + /* + TP.TPCHIP.NET.PCBSLMC01.NET_CTRL0 (WAND) + [all] 1 + [5] PLL_BYPASS = 0 + */ + write_scom_for_chiplet(chip, mcs_ids[i], PCBSLMC01_NET_CTRL0_WAND, + ~PPC_BIT(PCBSLMC01_NET_CTRL0_PLL_BYPASS)); + + // Set scan ratio to 4:1 + /* + TP.TCMC01.MCSLOW.OPCG_ALIGN + [47-51] SCAN_RATIO = 3 // 4:1 + */ + scom_and_or_for_chiplet(chip, mcs_ids[i], MCSLOW_OPCG_ALIGN, + ~PPC_BITMASK(47,51), + PPC_PLACE(3, MCSLOW_OPCG_ALIGN_SCAN_RATIO, + MCSLOW_OPCG_ALIGN_SCAN_RATIO_LEN)); + + // > end if + + // Reset PCB Slave error register + /* + TP.TPCHIP.NET.PCBSLMC01.ERROR_REG + [all] 1 // Write 1 to clear + */ + write_scom_for_chiplet(chip, mcs_ids[i], PCBSLMC01_ERROR_REG, ~0); + + // Unmask PLL unlock error in PCB slave + /* + TP.TPCHIP.NET.PCBSLMC01.SLAVE_CONFIG_REG + [12] (part of) ERROR_MASK = 0 + */ + scom_and_for_chiplet(chip, mcs_ids[i], PCBSLMC01_SLAVE_CONFIG_REG, + ~PPC_BIT(12)); + } +} + +void istep_13_4(uint8_t chips) +{ + uint8_t chip; + + report_istep(13, 4); + + /* Assuming MC doesn't run in sync mode with Fabric, otherwise this is no-op */ + + for (chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) + mem_pll_setup(chip); + } +} diff --git a/src/soc/ibm/power9/istep_13_6.c b/src/soc/ibm/power9/istep_13_6.c new file mode 100644 index 00000000000..55eaef3bcd7 --- /dev/null +++ b/src/soc/ibm/power9/istep_13_6.c @@ -0,0 +1,335 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include +#include +#include + +#include "istep_13_scom.h" + +/* + * 13.6 mem_startclocks: Start clocks on MBA/MCAs + * + * a) p9_mem_startclocks.C (proc chip) + * - This step is a no-op on cumulus + * - This step is a no-op if memory is running in synchronous mode since the + * MCAs are using the nest PLL, HWP detect and exits + * - Drop fences and tholds on MBA/MCAs to start the functional clocks + */ + +static inline void p9_mem_startclocks_cplt_ctrl_action_function(uint8_t chip, chiplet_id_t id, + uint64_t pg) +{ + // Drop partial good fences + /* + TP.TCMC01.MCSLOW.CPLT_CTRL1 (WO_CLEAR) + [all] 0 + [3] TC_VITL_REGION_FENCE = ~ATTR_PG[3] + [4-14] TC_REGION{1-3}_FENCE, UNUSED_{8-14}B = ~ATTR_PG[4-14] + */ + write_scom_for_chiplet(chip, id, MCSLOW_CPLT_CTRL1_WCLEAR, ~pg & PPC_BITMASK(3, 14)); + + // Reset abistclk_muxsel and syncclk_muxsel + /* + TP.TCMC01.MCSLOW.CPLT_CTRL0 (WO_CLEAR) + [all] 0 + [0] CTRL_CC_ABSTCLK_MUXSEL_DC = 1 + [1] TC_UNIT_SYNCCLK_MUXSEL_DC = 1 + */ + write_scom_for_chiplet(chip, id, MCSLOW_CPLT_CTRL0_WCLEAR, + PPC_BIT(MCSLOW_CPLT_CTRL0_CTRL_CC_ABSTCLK_MUXSEL_DC) | + PPC_BIT(MCSLOW_CPLT_CTRL0_TC_UNIT_SYNCCLK_MUXSEL_DC)); + +} + +static inline void p9_sbe_common_align_chiplets(uint8_t chip, chiplet_id_t id) +{ + // Exit flush + /* + TP.TCMC01.MCSLOW.CPLT_CTRL0 (WO_OR) + [all] 0 + [2] CTRL_CC_FLUSHMODE_INH_DC = 1 + */ + write_scom_for_chiplet(chip, id, MCSLOW_CPLT_CTRL0_WOR, + PPC_BIT(MCSLOW_CPLT_CTRL0_CTRL_CC_FLUSHMODE_INH_DC)); + + // Enable alignement + /* + TP.TCMC01.MCSLOW.CPLT_CTRL0 (WO_OR) + [all] 0 + [3] CTRL_CC_FORCE_ALIGN_DC = 1 + */ + write_scom_for_chiplet(chip, id, MCSLOW_CPLT_CTRL0_WOR, + PPC_BIT(MCSLOW_CPLT_CTRL0_CTRL_CC_FORCE_ALIGN_DC)); + + // Clear chiplet is aligned + /* + TP.TCMC01.MCSLOW.SYNC_CONFIG + [7] CLEAR_CHIPLET_IS_ALIGNED = 1 + */ + scom_or_for_chiplet(chip, id, MCSLOW_SYNC_CONFIG, + PPC_BIT(MCSLOW_SYNC_CONFIG_CLEAR_CHIPLET_IS_ALIGNED)); + + // Unset Clear chiplet is aligned + /* + TP.TCMC01.MCSLOW.SYNC_CONFIG + [7] CLEAR_CHIPLET_IS_ALIGNED = 0 + */ + scom_and_for_chiplet(chip, id, MCSLOW_SYNC_CONFIG, + ~PPC_BIT(MCSLOW_SYNC_CONFIG_CLEAR_CHIPLET_IS_ALIGNED)); + + udelay(100); + + // Poll aligned bit + /* + timeout(10*100us): + TP.TCMC01.MCSLOW.CPLT_STAT0 + if (([9] CC_CTRL_CHIPLET_IS_ALIGNED_DC) == 1) break + delay(100us) + */ + if (!wait_us(10 * 100, read_scom_for_chiplet(chip, id, MCSLOW_CPLT_STAT0) & + PPC_BIT(MCSLOW_CPLT_STAT0_CC_CTRL_CHIPLET_IS_ALIGNED_DC))) + die("Timeout while waiting for chiplet alignment\n"); + + // Disable alignment + /* + TP.TCMC01.MCSLOW.CPLT_CTRL0 (WO_CLEAR) + [all] 0 + [3] CTRL_CC_FORCE_ALIGN_DC = 1 + */ + write_scom_for_chiplet(chip, id, MCSLOW_CPLT_CTRL0_WCLEAR, + PPC_BIT(MCSLOW_CPLT_CTRL0_CTRL_CC_FORCE_ALIGN_DC)); +} + +static void p9_sbe_common_clock_start_stop(uint8_t chip, chiplet_id_t id, uint64_t pg) +{ + // Chiplet exit flush + /* + TP.TCMC01.MCSLOW.CPLT_CTRL0 (WO_OR) + [all] 0 + [2] CTRL_CC_FLUSHMODE_INH_DC = 1 + */ + write_scom_for_chiplet(chip, id, MCSLOW_CPLT_CTRL0_WOR, + PPC_BIT(MCSLOW_CPLT_CTRL0_CTRL_CC_FLUSHMODE_INH_DC)); + + // Clear Scan region type register + /* + TP.TCMC01.MCSLOW.SCAN_REGION_TYPE + [all] 0 + */ + write_scom_for_chiplet(chip, id, MCSLOW_SCAN_REGION_TYPE, 0); + + // Setup all Clock Domains and Clock Types + /* + TP.TCMC01.MCSLOW.CLK_REGION + [0-1] CLOCK_CMD = 1 // start + [2] SLAVE_MODE = 0 + [3] MASTER_MODE = 0 + [4-14] CLOCK_REGION_* = (((~ATTR_PG[4-14]) >> 1) & 0x07FE) << 1 = + ~ATTR_PG[4-14] & 0x0FFC = + ~ATTR_PG[4-13] // Hostboot tends to complicate + [48] SEL_THOLD_SL = 1 + [49] SEL_THOLD_NSL = 1 + [50] SEL_THOLD_ARY = 1 + */ + scom_and_or_for_chiplet(chip, id, MCSLOW_CLK_REGION, + ~(PPC_BITMASK(0, 14) | PPC_BITMASK(48, 50)), + PPC_PLACE(1, MCSLOW_CLK_REGION_CLOCK_CMD, + MCSLOW_CLK_REGION_CLOCK_CMD_LEN) | + PPC_BIT(MCSLOW_CLK_REGION_SEL_THOLD_SL) | + PPC_BIT(MCSLOW_CLK_REGION_SEL_THOLD_NSL) | + PPC_BIT(MCSLOW_CLK_REGION_SEL_THOLD_ARY) | + (~pg & PPC_BITMASK(4, 13))); + + // Poll OPCG done bit to check for completeness + /* + timeout(10*100us): + TP.TCMC01.MCSLOW.CPLT_STAT0 + if (([8] CC_CTRL_OPCG_DONE_DC) == 1) break + delay(100us) + */ + if (!wait_us(10 * 100, read_scom_for_chiplet(chip, id, MCSLOW_CPLT_STAT0) & + PPC_BIT(MCSLOW_CPLT_STAT0_CC_CTRL_OPCG_DONE_DC))) + die("Timeout while waiting for OPCG done bit\n"); + + /* + * Here Hostboot calculates what is expected clock status, based on previous + * values and requested command. It is done by generic functions, but + * because we know exactly which clocks were to be started, we can test just + * for those. + */ + /* + TP.TCMC01.MCSLOW.CLOCK_STAT_SL + TP.TCMC01.MCSLOW.CLOCK_STAT_NSL + TP.TCMC01.MCSLOW.CLOCK_STAT_ARY + assert(([4-14] & ATTR_PG[4-14]) == ATTR_PG[4-14]) + */ + uint64_t mask = PPC_BITMASK(4, 13); + uint64_t expected = pg & mask; + if ((read_scom_for_chiplet(chip, id, MCSLOW_CLOCK_STAT_SL) & mask) != expected || + (read_scom_for_chiplet(chip, id, MCSLOW_CLOCK_STAT_NSL) & mask) != expected || + (read_scom_for_chiplet(chip, id, MCSLOW_CLOCK_STAT_ARY) & mask) != expected) + die("Unexpected clock status\n"); +} + +static inline void p9_mem_startclocks_fence_setup_function(uint8_t chip, chiplet_id_t id) +{ + /* + * Hostboot does it based on pg_vector. It seems to check for Nest IDs to + * which MCs are connected, but I'm not sure if this is the case. I also + * don't know if it is possible to have a functional MCBIST for which we + * don't want to drop the fence (functional MCBIST with nonfunctional NEST?) + * + * Most likely this will need to be fixed for populated second MCS. + */ + + /* + * if ((MC.ATTR_CHIP_UNIT_POS == 0x07 && pg_vector[5]) || + * (MC.ATTR_CHIP_UNIT_POS == 0x08 && pg_vector[3])) + *{ + */ + + // Drop chiplet fence + /* + TP.TPCHIP.NET.PCBSLMC01.NET_CTRL0 (WAND) + [all] 1 + [18] FENCE_EN = 0 + */ + write_scom_for_chiplet(chip, id, PCBSLMC01_NET_CTRL0_WAND, + ~PPC_BIT(PCBSLMC01_NET_CTRL0_FENCE_EN)); + + /* }*/ +} + +static void p9_sbe_common_configure_chiplet_FIR(uint8_t chip, chiplet_id_t id) +{ + // reset pervasive FIR + /* + TP.TCMC01.MCSLOW.LOCAL_FIR + [all] 0 + */ + write_scom_for_chiplet(chip, id, MCSLOW_LOCAL_FIR, 0); + + // configure pervasive FIR action/mask + /* + TP.TCMC01.MCSLOW.LOCAL_FIR_ACTION0 + [all] 0 + TP.TCMC01.MCSLOW.LOCAL_FIR_ACTION1 + [all] 0 + [0-3] 0xF + TP.TCMC01.MCSLOW.LOCAL_FIR_MASK + [all] 0 + [4-41] 0x3FFFFFFFFF (every bit set) + */ + write_scom_for_chiplet(chip, id, MCSLOW_LOCAL_FIR_ACTION0, 0); + write_scom_for_chiplet(chip, id, MCSLOW_LOCAL_FIR_ACTION1, PPC_BITMASK(0, 3)); + write_scom_for_chiplet(chip, id, MCSLOW_LOCAL_FIR_MASK, PPC_BITMASK(4, 41)); + + // reset XFIR + /* + TP.TCMC01.MCSLOW.XFIR + [all] 0 + */ + write_scom_for_chiplet(chip, id, MCSLOW_XFIR, 0); + + // configure XFIR mask + /* + TP.TCMC01.MCSLOW.FIR_MASK + [all] 0 + */ + write_scom_for_chiplet(chip, id, MCSLOW_FIR_MASK, 0); +} + +static void mem_startclocks(uint8_t chip) +{ + int i; + uint16_t pg[MCS_PER_PROC]; + + mvpd_get_mcs_pg(chip, pg); + + for (i = 0; i < MCS_PER_PROC; i++) { + const uint64_t mcs_pg = PPC_PLACE(pg[i], 0, 16); + + /* According to logs, Hostboot does it also for the second MCS */ + //~ if (!mem_data.mcs[i].functional) + //~ continue; + + // Call p9_mem_startclocks_cplt_ctrl_action_function for Mc chiplets + p9_mem_startclocks_cplt_ctrl_action_function(chip, mcs_ids[i], mcs_pg); + + // Call module align chiplets for Mc chiplets + p9_sbe_common_align_chiplets(chip, mcs_ids[i]); + + // Call module clock start stop for MC01, MC23 + p9_sbe_common_clock_start_stop(chip, mcs_ids[i], mcs_pg); + + // Call p9_mem_startclocks_fence_setup_function for Mc chiplets + p9_mem_startclocks_fence_setup_function(chip, mcs_ids[i]); + + // Clear flush_inhibit to go in to flush mode + /* + TP.TCMC01.MCSLOW.CPLT_CTRL0 (WO_CLEAR) + [all] 0 + [2] CTRL_CC_FLUSHMODE_INH_DC = 1 + */ + write_scom_for_chiplet(chip, mcs_ids[i], MCSLOW_CPLT_CTRL0_WCLEAR, + PPC_BIT(MCSLOW_CPLT_CTRL0_CTRL_CC_FLUSHMODE_INH_DC)); + + // Call p9_sbe_common_configure_chiplet_FIR for MC chiplets + p9_sbe_common_configure_chiplet_FIR(chip, mcs_ids[i]); + + // Reset FBC chiplet configuration + /* + TP.TCMC01.MCSLOW.CPLT_CONF0 + [48-51] TC_UNIT_GROUP_ID_DC = ATTR_PROC_FABRIC_GROUP_ID // Where do these come from? + [52-54] TC_UNIT_CHIP_ID_DC = ATTR_PROC_FABRIC_CHIP_ID + [56-60] TC_UNIT_SYS_ID_DC = ATTR_PROC_FABRIC_SYSTEM_ID // 0 in talos.xml + */ + /* + * Take 0 for all values - assuming ATTR_PROC_FABRIC_GROUP_ID is + * ATTR_FABRIC_GROUP_ID of parent PROC (same for CHIP_ID). Only + * SYSTEM_ID is present in talos.xml with full name. + */ + scom_and_or_for_chiplet(chip, mcs_ids[i], MCSLOW_CPLT_CONF0, + ~(PPC_BITMASK(48, 54) | PPC_BITMASK(56, 60)), + PPC_PLACE(chip, 48, 4)); + + // Add to Multicast Group + /* Avoid setting if register is already set, i.e. [3-5] != 7 */ + /* + TP.TPCHIP.NET.PCBSLMC01.MULTICAST_GROUP_1 + [3-5] MULTICAST1_GROUP: if 7 then set to 0 + [16-23] (not described): if [3-5] == 7 then set to 0x1C // No clue why Hostboot modifies these bits + TP.TPCHIP.NET.PCBSLMC01.MULTICAST_GROUP_2 + [3-5] MULTICAST1_GROUP: if 7 then set to 2 + [16-23] (not described): if [3-5] == 7 then set to 0x1C + */ + if ((read_scom_for_chiplet(chip, mcs_ids[i], PCBSLMC01_MULTICAST_GROUP_1) & + PPC_BITMASK(3, 5)) == PPC_BITMASK(3, 5)) + scom_and_or_for_chiplet(chip, mcs_ids[i], PCBSLMC01_MULTICAST_GROUP_1, + ~(PPC_BITMASK(3, 5) | PPC_BITMASK(16, 23)), + PPC_BITMASK(19, 21)); + + if ((read_scom_for_chiplet(chip, mcs_ids[i], PCBSLMC01_MULTICAST_GROUP_2) & + PPC_BITMASK(3, 5)) == PPC_BITMASK(3, 5)) + scom_and_or_for_chiplet(chip, mcs_ids[i], PCBSLMC01_MULTICAST_GROUP_2, + ~(PPC_BITMASK(3, 5) | PPC_BITMASK(16, 23)), + PPC_BIT(4) | PPC_BITMASK(19, 21)); + } + +} + +void istep_13_6(uint8_t chips) +{ + uint8_t chip; + + report_istep(13, 6); + + /* Assuming MC doesn't run in sync mode with Fabric, otherwise this is no-op */ + + for (chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) + mem_startclocks(chip); + } +} diff --git a/src/soc/ibm/power9/istep_13_8.c b/src/soc/ibm/power9/istep_13_8.c new file mode 100644 index 00000000000..3d23c6d1b42 --- /dev/null +++ b/src/soc/ibm/power9/istep_13_8.c @@ -0,0 +1,2409 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include +#include +#include + +#include "istep_13_scom.h" + +/* + * 13.8 mss_scominit: Perform scom inits to MC and PHY + * + * - HW units included are MCBIST, MCA/PHY (Nimbus) or membuf, L4, MBAs (Cumulus) + * - Does not use initfiles, coded into HWP + * - Uses attributes from previous step + * - Pushes memory extent configuration into the MBA/MCAs + * - Addresses are pulled from attributes, set previously by mss_eff_config + * - MBA/MCAs always start at address 0, address map controlled by + * proc_setup_bars below + */ + +/* + * This function was generated from initfiles. Some of the registers used here + * are not documented, except for occasional name of a constant written to it. + * They also access registers at addresses for chiplet ID = 5 (Nest west), even + * though the specified target is MCA. It is not clear if MCA offset has to be + * added to SCOM address for those registers or not. Even logs from debug + * version of Hostboot don't list the addresses explicitly, but by comparing + * them with values read with 'pdbg' it seems that they use a stride of 0x10. + * + * Undocumented registers are marked with (?) in the comments. + */ +static void p9n_mca_scom(uint8_t chip, int mcs_i, int mca_i) +{ + const struct powerbus_cfg *pb_cfg = powerbus_cfg(chip); + + chiplet_id_t id = mcs_ids[mcs_i]; + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; + const int mca_mul = 0x10; + /* + * Mixing rules: + * - rank configurations are the same for both DIMMs + * - fields for unpopulated DIMMs are initialized to all 0 + * + * With those two assumptions values can be logically ORed to produce a + * common value without conditionals. + */ + int n_dimms = (mca->dimm[0].present && mca->dimm[1].present) ? 2 : 1; + int mranks = mca->dimm[0].mranks | mca->dimm[1].mranks; + int log_ranks = mca->dimm[0].log_ranks | mca->dimm[1].log_ranks; + bool is_8H = (log_ranks / mranks) == 8; + chiplet_id_t nest = mcs_to_nest[id]; + int vpd_idx = mca->dimm[0].present ? (mca->dimm[0].mranks == 2 ? 2 : 0) : + (mca->dimm[1].mranks == 2 ? 2 : 0); + if (mca->dimm[0].present && mca->dimm[1].present) + vpd_idx++; + + /* P9N2_MCS_PORT02_MCPERF0 (?) + [22-27] = 0x20 // AMO_LIMIT + */ + scom_and_or_for_chiplet(chip, nest, 0x05010823 + mca_i * mca_mul, + ~PPC_BITMASK(22, 27), PPC_PLACE(0x20, 22, 6)); + + /* P9N2_MCS_PORT02_MCPERF2 (?) + [0-2] = 1 // PF_DROP_VALUE0 + [3-5] = 3 // PF_DROP_VALUE1 + [6-8] = 5 // PF_DROP_VALUE2 + [9-11] = 7 // PF_DROP_VALUE3 + [13-15] = // REFRESH_BLOCK_CONFIG + if has only one DIMM in MCA: + 0b000 : if master ranks = 1 + 0b001 : if master ranks = 2 + 0b100 : if master ranks = 4 + // Per allowable DIMM mixing rules, we cannot mix different number of ranks on any single port + if has both DIMMs in MCA: + 0b010 : if master ranks = 1 + 0b011 : if master ranks = 2 + 0b100 : if master ranks = 4 // 4 mranks is the same for one and two DIMMs in MCA + [16] = // ENABLE_REFRESH_BLOCK_SQ + [17] = // ENABLE_REFRESH_BLOCK_NSQ, always the same value as [16] + 1 : if (1 < (DIMM0 + DIMM1 logical ranks) <= 8 && not (one DIMM, 4 mranks, 2H 3DS) + 0 : otherwise + [18] = 0 // ENABLE_REFRESH_BLOCK_DISP + [28-31] = 0b0100 // SQ_LFSR_CNTL + [50-54] = 0b11100 // NUM_RMW_BUF + [61] = ATTR_ENABLE_MEM_EARLY_DATA_SCOM // EN_ALT_ECR_ERR, 0? + */ + uint64_t ref_blk_cfg = mranks == 4 ? 0x4 : + mranks == 2 ? (n_dimms == 1 ? 0x1 : 0x3) : + n_dimms == 1 ? 0x0 : 0x2; + uint64_t en_ref_blk = (log_ranks <= 1 || log_ranks > 8) ? 0 : + (n_dimms == 1 && mranks == 4 && log_ranks == 8) ? 0 : 3; + + scom_and_or_for_chiplet(chip, nest, 0x05010824 + mca_i * mca_mul, + /* and */ + ~(PPC_BITMASK(0, 11) | PPC_BITMASK(13, 18) | PPC_BITMASK(28, 31) + | PPC_BITMASK(28, 31) | PPC_BITMASK(50, 54) | PPC_BIT(61)), + /* or */ + PPC_PLACE(1, 0, 3) | PPC_PLACE(3, 3, 3) | PPC_PLACE(5, 6, 3) + | PPC_PLACE(7, 9, 3) /* PF_DROP_VALUEs */ + | PPC_PLACE(ref_blk_cfg, 13, 3) | PPC_PLACE(en_ref_blk, 16, 2) + | PPC_PLACE(0x4, 28, 4) | PPC_PLACE(0x1C, 50, 5)); + + /* P9N2_MCS_PORT02_MCAMOC (?) + [1] = 0 // FORCE_PF_DROP0 + [4-28] = 0x19fffff // WRTO_AMO_COLLISION_RULES + [29-31] = 1 // AMO_SIZE_SELECT, 128B_RW_64B_DATA + */ + scom_and_or_for_chiplet(chip, nest, 0x05010825 + mca_i * mca_mul, + ~(PPC_BIT(1) | PPC_BITMASK(4, 31)), + PPC_PLACE(0x19FFFFF, 4, 25) | PPC_PLACE(1, 29, 3)); + + /* P9N2_MCS_PORT02_MCEPSQ (?) + [0-7] = 1 // JITTER_EPSILON + // ATTR_PROC_EPS_READ_CYCLES_T* are calculated in 8.6 + // Rounded up? + [8-15] = (ATTR_PROC_EPS_READ_CYCLES_T0 + 6) / 4 // LOCAL_NODE_EPSILON + [16-23] = (ATTR_PROC_EPS_READ_CYCLES_T1 + 6) / 4 // NEAR_NODAL_EPSILON + [24-31] = (ATTR_PROC_EPS_READ_CYCLES_T1 + 6) / 4 // GROUP_EPSILON + [32-39] = (ATTR_PROC_EPS_READ_CYCLES_T2 + 6) / 4 // REMOTE_NODAL_EPSILON + [40-47] = (ATTR_PROC_EPS_READ_CYCLES_T2 + 6) / 4 // VECTOR_GROUP_EPSILON + */ + #define F(X) (((X) + 6) / 4) + scom_and_or_for_chiplet(chip, nest, 0x05010826 + mca_i * mca_mul, ~PPC_BITMASK(0, 47), + PPC_PLACE(1, 0, 8) + | PPC_PLACE(F(pb_cfg->eps_r[0]), 8, 8) + | PPC_PLACE(F(pb_cfg->eps_r[1]), 16, 8) + | PPC_PLACE(F(pb_cfg->eps_r[1]), 24, 8) + | PPC_PLACE(F(pb_cfg->eps_r[2]), 32, 8) + | PPC_PLACE(F(pb_cfg->eps_r[2]), 40, 8)); + #undef F +//~ static const uint32_t EPSILON_R_T0_LE[] = { 7, 7, 8, 8, 10, 22 }; // T0, T1 +//~ static const uint32_t EPSILON_R_T2_LE[] = { 67, 69, 71, 74, 79, 103 }; // T2 + + /* P9N2_MCS_PORT02_MCBUSYQ (?) + [0] = 1 // ENABLE_BUSY_COUNTERS + [1-3] = 1 // BUSY_COUNTER_WINDOW_SELECT, 1024 cycles + [4-13] = 38 // BUSY_COUNTER_THRESHOLD0 + [14-23] = 51 // BUSY_COUNTER_THRESHOLD1 + [24-33] = 64 // BUSY_COUNTER_THRESHOLD2 + */ + scom_and_or_for_chiplet(chip, nest, 0x05010827 + mca_i * mca_mul, ~PPC_BITMASK(0, 33), + PPC_BIT(0) | PPC_PLACE(1, 1, 3) | PPC_PLACE(38, 4, 10) + | PPC_PLACE(51, 14, 10) | PPC_PLACE(64, 24, 10)); + + /* P9N2_MCS_PORT02_MCPERF3 (?) + [31] = 1 // ENABLE_CL0 + [41] = 1 // ENABLE_AMO_MSI_RMW_ONLY + [43] = !ATTR_ENABLE_MEM_EARLY_DATA_SCOM // ENABLE_CP_M_MDI0_LOCAL_ONLY, !0 = 1? + [44] = 1 // DISABLE_WRTO_IG + [45] = 1 // AMO_LIMIT_SEL + */ + scom_or_for_chiplet(chip, nest, 0x0501082B + mca_i * mca_mul, + PPC_BIT(31) | PPC_BIT(41) | PPC_BIT(43) | PPC_BIT(44) + | PPC_BIT(45)); + + /* MC01.PORT0.SRQ.MBA_DSM0Q = + // These are set per port so all latencies should be calculated from both DIMMs (if present) + [0-5] MBA_DSM0Q_CFG_RODT_START_DLY = ATTR_EFF_DRAM_CL - ATTR_EFF_DRAM_CWL + [6-11] MBA_DSM0Q_CFG_RODT_END_DLY = ATTR_EFF_DRAM_CL - ATTR_EFF_DRAM_CWL + 5 + [12-17] MBA_DSM0Q_CFG_WODT_START_DLY = 0 + [18-23] MBA_DSM0Q_CFG_WODT_END_DLY = 5 + [24-29] MBA_DSM0Q_CFG_WRDONE_DLY = 24 + [30-35] MBA_DSM0Q_CFG_WRDATA_DLY = ATTR_EFF_DRAM_CWL + ATTR_MSS_EFF_DPHY_WLO - 8 + // Assume RDIMM, non-NVDIMM only + [36-41] MBA_DSM0Q_CFG_RDTAG_DLY = + MSS_FREQ_EQ_1866: ATTR_EFF_DRAM_CL + 7 + MSS_FREQ_EQ_2133: ATTR_EFF_DRAM_CL + 7 + MSS_FREQ_EQ_2400: ATTR_EFF_DRAM_CL + 8 + MSS_FREQ_EQ_2666: ATTR_EFF_DRAM_CL + 9 + */ + /* ATTR_MSS_EFF_DPHY_WLO = 1 from VPD, 3 from dump? */ + uint64_t rdtag_dly = mem_data[chip].speed == 2666 ? 9 : + mem_data[chip].speed == 2400 ? 8 : 7; + mca_and_or(chip, id, mca_i, MBA_DSM0Q, ~PPC_BITMASK(0, 41), + PPC_PLACE(mca->cl - mem_data[chip].cwl, MBA_DSM0Q_CFG_RODT_START_DLY, + MBA_DSM0Q_CFG_RODT_START_DLY_LEN) | + PPC_PLACE(mca->cl - mem_data[chip].cwl + 5, MBA_DSM0Q_CFG_RODT_END_DLY, + MBA_DSM0Q_CFG_RODT_END_DLY_LEN) | + PPC_PLACE(5, MBA_DSM0Q_CFG_WODT_END_DLY, + MBA_DSM0Q_CFG_WODT_END_DLY_LEN) | + PPC_PLACE(24, MBA_DSM0Q_CFG_WRDONE_DLY, + MBA_DSM0Q_CFG_WRDONE_DLY_LEN) | + PPC_PLACE(mem_data[chip].cwl + /* 1 */ 3 - 8, MBA_DSM0Q_CFG_WRDATA_DLY, + MBA_DSM0Q_CFG_WRDATA_DLY_LEN) | + PPC_PLACE(mca->cl + rdtag_dly, MBA_DSM0Q_CFG_RDTAG_DLY, + MBA_DSM0Q_CFG_RDTAG_DLY_LEN)); + + /* MC01.PORT0.SRQ.MBA_TMR0Q = + [0-3] MBA_TMR0Q_RRDM_DLY = + MSS_FREQ_EQ_1866: 8 + MSS_FREQ_EQ_2133: 9 + MSS_FREQ_EQ_2400: 10 + MSS_FREQ_EQ_2666: 11 + [4-7] MBA_TMR0Q_RRSMSR_DLY = 4 + [8-11] MBA_TMR0Q_RRSMDR_DLY = 4 + [12-15] MBA_TMR0Q_RROP_DLY = ATTR_EFF_DRAM_TCCD_L + [16-19] MBA_TMR0Q_WWDM_DLY = + MSS_FREQ_EQ_1866: 8 + MSS_FREQ_EQ_2133: 9 + MSS_FREQ_EQ_2400: 10 + MSS_FREQ_EQ_2666: 11 + [20-23] MBA_TMR0Q_WWSMSR_DLY = 4 + [24-27] MBA_TMR0Q_WWSMDR_DLY = 4 + [28-31] MBA_TMR0Q_WWOP_DLY = ATTR_EFF_DRAM_TCCD_L + [32-36] MBA_TMR0Q_RWDM_DLY = // same as below + [37-41] MBA_TMR0Q_RWSMSR_DLY = // same as below + [42-46] MBA_TMR0Q_RWSMDR_DLY = + MSS_FREQ_EQ_1866: ATTR_EFF_DRAM_CL - ATTR_EFF_DRAM_CWL + 8 + MSS_FREQ_EQ_2133: ATTR_EFF_DRAM_CL - ATTR_EFF_DRAM_CWL + 9 + MSS_FREQ_EQ_2400: ATTR_EFF_DRAM_CL - ATTR_EFF_DRAM_CWL + 10 + MSS_FREQ_EQ_2666: ATTR_EFF_DRAM_CL - ATTR_EFF_DRAM_CWL + 11 + [47-50] MBA_TMR0Q_WRDM_DLY = + MSS_FREQ_EQ_1866: ATTR_EFF_DRAM_CWL - ATTR_EFF_DRAM_CL + 8 + MSS_FREQ_EQ_2133: ATTR_EFF_DRAM_CWL - ATTR_EFF_DRAM_CL + 9 + MSS_FREQ_EQ_2400: ATTR_EFF_DRAM_CWL - ATTR_EFF_DRAM_CL + 10 + MSS_FREQ_EQ_2666: ATTR_EFF_DRAM_CWL - ATTR_EFF_DRAM_CL + 11 + [51-56] MBA_TMR0Q_WRSMSR_DLY = // same as below + [57-62] MBA_TMR0Q_WRSMDR_DLY = ATTR_EFF_DRAM_CWL + ATTR_EFF_DRAM_TWTR_S + 4 + */ + uint64_t var_dly = mem_data[chip].speed == 2666 ? 11 : + mem_data[chip].speed == 2400 ? 10 : + mem_data[chip].speed == 2133 ? 9 : 8; + mca_and_or(chip, id, mca_i, MBA_TMR0Q, PPC_BIT(63), + PPC_PLACE(var_dly, MBA_TMR0Q_RRDM_DLY, MBA_TMR0Q_RRDM_DLY_LEN) | + PPC_PLACE(4, MBA_TMR0Q_RRSMSR_DLY, MBA_TMR0Q_RRSMSR_DLY_LEN) | + PPC_PLACE(4, MBA_TMR0Q_RRSMDR_DLY, MBA_TMR0Q_RRSMDR_DLY_LEN) | + PPC_PLACE(mca->nccd_l, MBA_TMR0Q_RROP_DLY, MBA_TMR0Q_RROP_DLY_LEN) | + PPC_PLACE(var_dly, MBA_TMR0Q_WWDM_DLY, MBA_TMR0Q_WWDM_DLY_LEN) | + PPC_PLACE(4, MBA_TMR0Q_WWSMSR_DLY, MBA_TMR0Q_WWSMSR_DLY_LEN) | + PPC_PLACE(4, MBA_TMR0Q_WWSMDR_DLY, MBA_TMR0Q_WWSMDR_DLY_LEN) | + PPC_PLACE(mca->nccd_l, MBA_TMR0Q_WWOP_DLY, MBA_TMR0Q_WWOP_DLY_LEN) | + PPC_PLACE(mca->cl - mem_data[chip].cwl + var_dly, MBA_TMR0Q_RWDM_DLY, + MBA_TMR0Q_RWDM_DLY_LEN) | + PPC_PLACE(mca->cl - mem_data[chip].cwl + var_dly, MBA_TMR0Q_RWSMSR_DLY, + MBA_TMR0Q_RWSMSR_DLY_LEN) | + PPC_PLACE(mca->cl - mem_data[chip].cwl + var_dly, MBA_TMR0Q_RWSMDR_DLY, + MBA_TMR0Q_RWSMDR_DLY_LEN) | + PPC_PLACE(mem_data[chip].cwl - mca->cl + var_dly, MBA_TMR0Q_WRDM_DLY, + MBA_TMR0Q_WRDM_DLY_LEN) | + PPC_PLACE(mem_data[chip].cwl + mca->nwtr_s + 4, MBA_TMR0Q_WRSMSR_DLY, + MBA_TMR0Q_WRSMSR_DLY_LEN) | + PPC_PLACE(mem_data[chip].cwl + mca->nwtr_s + 4, MBA_TMR0Q_WRSMDR_DLY, + MBA_TMR0Q_WRSMDR_DLY_LEN)); + + /* MC01.PORT0.SRQ.MBA_TMR1Q = + [0-3] MBA_TMR1Q_RRSBG_DLY = ATTR_EFF_DRAM_TCCD_L + [4-9] MBA_TMR1Q_WRSBG_DLY = ATTR_EFF_DRAM_CWL + ATTR_EFF_DRAM_TWTR_L + 4 + [10-15] MBA_TMR1Q_CFG_TFAW = ATTR_EFF_DRAM_TFAW + [16-20] MBA_TMR1Q_CFG_TRCD = ATTR_EFF_DRAM_TRCD + [21-25] MBA_TMR1Q_CFG_TRP = ATTR_EFF_DRAM_TRP + [26-31] MBA_TMR1Q_CFG_TRAS = ATTR_EFF_DRAM_TRAS + [41-47] MBA_TMR1Q_CFG_WR2PRE = ATTR_EFF_DRAM_CWL + ATTR_EFF_DRAM_TWR + 4 + [48-51] MBA_TMR1Q_CFG_RD2PRE = ATTR_EFF_DRAM_TRTP + [52-55] MBA_TMR1Q_TRRD = ATTR_EFF_DRAM_TRRD_S + [56-59] MBA_TMR1Q_TRRD_SBG = ATTR_EFF_DRAM_TRRD_L + [60-63] MBA_TMR1Q_CFG_ACT_TO_DIFF_RANK_DLY = // var_dly from above + MSS_FREQ_EQ_1866: 8 + MSS_FREQ_EQ_2133: 9 + MSS_FREQ_EQ_2400: 10 + MSS_FREQ_EQ_2666: 11 + */ + mca_and_or(chip, id, mca_i, MBA_TMR1Q, 0, + PPC_PLACE(mca->nccd_l, MBA_TMR1Q_RRSBG_DLY, MBA_TMR1Q_RRSBG_DLY_LEN) | + PPC_PLACE(mem_data[chip].cwl + mca->nwtr_l + 4, MBA_TMR1Q_WRSBG_DLY, + MBA_TMR1Q_WRSBG_DLY_LEN) | + PPC_PLACE(mca->nfaw, MBA_TMR1Q_CFG_TFAW, MBA_TMR1Q_CFG_TFAW_LEN) | + PPC_PLACE(mca->nrcd, MBA_TMR1Q_CFG_TRCD, MBA_TMR1Q_CFG_TRCD_LEN) | + PPC_PLACE(mca->nrp, MBA_TMR1Q_CFG_TRP, MBA_TMR1Q_CFG_TRP_LEN) | + PPC_PLACE(mca->nras, MBA_TMR1Q_CFG_TRAS, MBA_TMR1Q_CFG_TRAS_LEN) | + PPC_PLACE(mem_data[chip].cwl + mca->nwr + 4, MBA_TMR1Q_CFG_WR2PRE, + MBA_TMR1Q_CFG_WR2PRE_LEN) | + PPC_PLACE(mem_data[chip].nrtp, + MBA_TMR1Q_CFG_RD2PRE, + MBA_TMR1Q_CFG_RD2PRE_LEN) | + PPC_PLACE(mca->nrrd_s, MBA_TMR1Q_TRRD, MBA_TMR1Q_TRRD_LEN) | + PPC_PLACE(mca->nrrd_l, MBA_TMR1Q_TRRD_SBG, MBA_TMR1Q_TRRD_SBG_LEN) | + PPC_PLACE(var_dly, MBA_TMR1Q_CFG_ACT_TO_DIFF_RANK_DLY, + MBA_TMR1Q_CFG_ACT_TO_DIFF_RANK_DLY_LEN)); + + /* MC01.PORT0.SRQ.MBA_WRQ0Q = + [5] MBA_WRQ0Q_CFG_WRQ_FIFO_MODE = 0 // ATTR_MSS_REORDER_QUEUE_SETTING, 0 = reorder + [6] MBA_WRQ0Q_CFG_DISABLE_WR_PG_MODE = 1 + [55-58] MBA_WRQ0Q_CFG_WRQ_ACT_NUM_WRITES_PENDING = 8 + */ + mca_and_or(chip, id, mca_i, MBA_WRQ0Q, + ~(PPC_BIT(MBA_WRQ0Q_CFG_WRQ_FIFO_MODE) | + PPC_BIT(MBA_WRQ0Q_CFG_DISABLE_WR_PG_MODE) | + PPC_BITMASK(55, 58)), + PPC_BIT(MBA_WRQ0Q_CFG_DISABLE_WR_PG_MODE) | + PPC_PLACE(8, MBA_WRQ0Q_CFG_WRQ_ACT_NUM_WRITES_PENDING, + MBA_WRQ0Q_CFG_WRQ_ACT_NUM_WRITES_PENDING_LEN)); + + /* MC01.PORT0.SRQ.MBA_RRQ0Q = + [6] MBA_RRQ0Q_CFG_RRQ_FIFO_MODE = 0 // ATTR_MSS_REORDER_QUEUE_SETTING + [57-60] MBA_RRQ0Q_CFG_RRQ_ACT_NUM_READS_PENDING = 8 + */ + mca_and_or(chip, id, mca_i, MBA_RRQ0Q, + ~(PPC_BIT(MBA_RRQ0Q_CFG_RRQ_FIFO_MODE) | PPC_BITMASK(57, 60)), + PPC_PLACE(8, MBA_RRQ0Q_CFG_RRQ_ACT_NUM_READS_PENDING, + MBA_RRQ0Q_CFG_RRQ_ACT_NUM_READS_PENDING_LEN)); + + /* MC01.PORT0.SRQ.MBA_FARB0Q = + if (l_TGT3_ATTR_MSS_MRW_DRAM_2N_MODE == 0x02 || (l_TGT3_ATTR_MSS_MRW_DRAM_2N_MODE == 0x00 && l_TGT2_ATTR_MSS_VPD_MR_MC_2N_MODE_AUTOSET == 0x02)) + [17] MBA_FARB0Q_CFG_2N_ADDR = 1 // Default is auto for mode, 1N from VPD, so [17] = 0 + [38] MBA_FARB0Q_CFG_PARITY_AFTER_CMD = 1 + [61-63] MBA_FARB0Q_CFG_OPT_RD_SIZE = 3 + */ + mca_and_or(chip, id, mca_i, MBA_FARB0Q, + ~(PPC_BIT(MBA_FARB0Q_CFG_2N_ADDR) | + PPC_BIT(MBA_FARB0Q_CFG_PARITY_AFTER_CMD) | + PPC_BITMASK(61, 63)), + PPC_BIT(MBA_FARB0Q_CFG_PARITY_AFTER_CMD) | + PPC_PLACE(3, MBA_FARB0Q_CFG_OPT_RD_SIZE, MBA_FARB0Q_CFG_OPT_RD_SIZE_LEN)); + + /* MC01.PORT0.SRQ.MBA_FARB1Q = + [0-2] MBA_FARB1Q_CFG_SLOT0_S0_CID = 0 + [3-5] MBA_FARB1Q_CFG_SLOT0_S1_CID = 4 + [6-8] MBA_FARB1Q_CFG_SLOT0_S2_CID = 2 + [9-11] MBA_FARB1Q_CFG_SLOT0_S3_CID = 6 + if (DIMM0 is 8H 3DS) + [12-14] MBA_FARB1Q_CFG_SLOT0_S4_CID = 1 + [15-17] MBA_FARB1Q_CFG_SLOT0_S5_CID = 5 + [18-20] MBA_FARB1Q_CFG_SLOT0_S6_CID = 3 + [21-23] MBA_FARB1Q_CFG_SLOT0_S7_CID = 7 + else + [12-14] MBA_FARB1Q_CFG_SLOT0_S4_CID = 0 + [15-17] MBA_FARB1Q_CFG_SLOT0_S5_CID = 4 + [18-20] MBA_FARB1Q_CFG_SLOT0_S6_CID = 2 + [21-23] MBA_FARB1Q_CFG_SLOT0_S7_CID = 6 + if (DIMM0 has 4 master ranks) + [12-14] MBA_FARB1Q_CFG_SLOT0_S4_CID = 4 // TODO: test if all slots with 4R DIMMs works with that + [24-26] MBA_FARB1Q_CFG_SLOT1_S0_CID = 0 + [27-29] MBA_FARB1Q_CFG_SLOT1_S1_CID = 4 + [30-32] MBA_FARB1Q_CFG_SLOT1_S2_CID = 2 + [33-35] MBA_FARB1Q_CFG_SLOT1_S3_CID = 6 + if (DIMM1 is 8H 3DS) + [36-38] MBA_FARB1Q_CFG_SLOT1_S4_CID = 1 + [39-41] MBA_FARB1Q_CFG_SLOT1_S5_CID = 5 + [42-44] MBA_FARB1Q_CFG_SLOT1_S6_CID = 3 + [45-47] MBA_FARB1Q_CFG_SLOT1_S7_CID = 7 + else + [36-38] MBA_FARB1Q_CFG_SLOT1_S4_CID = 0 + [39-41] MBA_FARB1Q_CFG_SLOT1_S5_CID = 4 + [42-44] MBA_FARB1Q_CFG_SLOT1_S6_CID = 2 + [45-47] MBA_FARB1Q_CFG_SLOT1_S7_CID = 6 + if (DIMM1 has 4 master ranks) + [36-38] MBA_FARB1Q_CFG_SLOT1_S4_CID = 4 // TODO: test if all slots with 4R DIMMs works with that + */ + /* Due to allowable DIMM mixing rules, ranks of both DIMMs are the same */ + uint64_t cids_even = (0 << 9) | (4 << 6) | (2 << 3) | (6 << 0); + uint64_t cids_odd = (1 << 9) | (5 << 6) | (3 << 3) | (7 << 0); + uint64_t cids_4_7 = is_8H ? cids_odd : cids_even; + /* Not sure if this is even supported, there is no MT VPD data for this case */ + if (mranks == 4) + cids_4_7 = (cids_4_7 & ~(7ull << 9)) | (4 << 9); + + mca_and_or(chip, id, mca_i, MBA_FARB1Q, ~PPC_BITMASK(0, 47), + PPC_PLACE(cids_even, MBA_FARB1Q_CFG_SLOT0_S0_CID, 12) | + PPC_PLACE(cids_4_7, MBA_FARB1Q_CFG_SLOT0_S4_CID, 12) | + PPC_PLACE(cids_even, MBA_FARB1Q_CFG_SLOT1_S0_CID, 12) | + PPC_PLACE(cids_4_7, MBA_FARB1Q_CFG_SLOT1_S4_CID, 12)); + + /* MC01.PORT0.SRQ.MBA_FARB2Q = + F(X) = (((X >> 4) & 0xc) | ((X >> 2) & 0x3)) // Bits 0,1,4,5 of uint8_t X, big endian numbering + [0-3] MBA_FARB2Q_CFG_RANK0_RD_ODT = F(ATTR_MSS_VPD_MT_ODT_RD[l_def_PORT_INDEX][0][0]) + [4-7] MBA_FARB2Q_CFG_RANK1_RD_ODT = F(ATTR_MSS_VPD_MT_ODT_RD[l_def_PORT_INDEX][0][1]) + [8-11] MBA_FARB2Q_CFG_RANK2_RD_ODT = F(ATTR_MSS_VPD_MT_ODT_RD[l_def_PORT_INDEX][0][2]) // always 0 + [12-15] MBA_FARB2Q_CFG_RANK3_RD_ODT = F(ATTR_MSS_VPD_MT_ODT_RD[l_def_PORT_INDEX][0][3]) // always 0 + [16-19] MBA_FARB2Q_CFG_RANK4_RD_ODT = F(ATTR_MSS_VPD_MT_ODT_RD[l_def_PORT_INDEX][1][0]) + [20-23] MBA_FARB2Q_CFG_RANK5_RD_ODT = F(ATTR_MSS_VPD_MT_ODT_RD[l_def_PORT_INDEX][1][1]) + [24-27] MBA_FARB2Q_CFG_RANK6_RD_ODT = F(ATTR_MSS_VPD_MT_ODT_RD[l_def_PORT_INDEX][1][2]) // always 0 + [28-31] MBA_FARB2Q_CFG_RANK7_RD_ODT = F(ATTR_MSS_VPD_MT_ODT_RD[l_def_PORT_INDEX][1][3]) // always 0 + [32-35] MBA_FARB2Q_CFG_RANK0_WR_ODT = F(ATTR_MSS_VPD_MT_ODT_WR[l_def_PORT_INDEX][0][0]) + [36-39] MBA_FARB2Q_CFG_RANK1_WR_ODT = F(ATTR_MSS_VPD_MT_ODT_WR[l_def_PORT_INDEX][0][1]) + [40-43] MBA_FARB2Q_CFG_RANK2_WR_ODT = F(ATTR_MSS_VPD_MT_ODT_WR[l_def_PORT_INDEX][0][2]) // always 0 + [44-47] MBA_FARB2Q_CFG_RANK3_WR_ODT = F(ATTR_MSS_VPD_MT_ODT_WR[l_def_PORT_INDEX][0][3]) // always 0 + [48-51] MBA_FARB2Q_CFG_RANK4_WR_ODT = F(ATTR_MSS_VPD_MT_ODT_WR[l_def_PORT_INDEX][1][0]) + [52-55] MBA_FARB2Q_CFG_RANK5_WR_ODT = F(ATTR_MSS_VPD_MT_ODT_WR[l_def_PORT_INDEX][1][1]) + [56-59] MBA_FARB2Q_CFG_RANK6_WR_ODT = F(ATTR_MSS_VPD_MT_ODT_WR[l_def_PORT_INDEX][1][2]) // always 0 + [60-63] MBA_FARB2Q_CFG_RANK7_WR_ODT = F(ATTR_MSS_VPD_MT_ODT_WR[l_def_PORT_INDEX][1][3]) // always 0 + */ + #define F(X) ((((X) >> 4) & 0xc) | (((X) >> 2) & 0x3)) + mca_and_or(chip, id, mca_i, MBA_FARB2Q, 0, + PPC_PLACE(F(ATTR_MSS_VPD_MT_ODT_RD[vpd_idx][0][0]), + MBA_FARB2Q_CFG_RANK0_RD_ODT, MBA_FARB2Q_CFG_RANK0_RD_ODT_LEN) | + PPC_PLACE(F(ATTR_MSS_VPD_MT_ODT_RD[vpd_idx][0][1]), + MBA_FARB2Q_CFG_RANK1_RD_ODT, MBA_FARB2Q_CFG_RANK1_RD_ODT_LEN) | + PPC_PLACE(F(ATTR_MSS_VPD_MT_ODT_RD[vpd_idx][1][0]), + MBA_FARB2Q_CFG_RANK4_RD_ODT, MBA_FARB2Q_CFG_RANK4_RD_ODT_LEN) | + PPC_PLACE(F(ATTR_MSS_VPD_MT_ODT_RD[vpd_idx][1][1]), + MBA_FARB2Q_CFG_RANK5_RD_ODT, MBA_FARB2Q_CFG_RANK5_RD_ODT_LEN) | + PPC_PLACE(F(ATTR_MSS_VPD_MT_ODT_WR[vpd_idx][0][0]), + MBA_FARB2Q_CFG_RANK0_WR_ODT, MBA_FARB2Q_CFG_RANK0_WR_ODT_LEN) | + PPC_PLACE(F(ATTR_MSS_VPD_MT_ODT_WR[vpd_idx][0][1]), + MBA_FARB2Q_CFG_RANK1_WR_ODT, MBA_FARB2Q_CFG_RANK1_WR_ODT_LEN) | + PPC_PLACE(F(ATTR_MSS_VPD_MT_ODT_WR[vpd_idx][1][0]), + MBA_FARB2Q_CFG_RANK4_WR_ODT, MBA_FARB2Q_CFG_RANK4_WR_ODT_LEN) | + PPC_PLACE(F(ATTR_MSS_VPD_MT_ODT_WR[vpd_idx][1][1]), + MBA_FARB2Q_CFG_RANK5_WR_ODT, MBA_FARB2Q_CFG_RANK5_WR_ODT_LEN) ); + #undef F + + /* MC01.PORT0.SRQ.PC.MBAREF0Q = + [5-7] MBAREF0Q_CFG_REFRESH_PRIORITY_THRESHOLD = 3 + [8-18] MBAREF0Q_CFG_REFRESH_INTERVAL = ATTR_EFF_DRAM_TREFI / (8 * (DIMM0 + DIMM1 logical ranks)) + [30-39] MBAREF0Q_CFG_TRFC = ATTR_EFF_DRAM_TRFC + [40-49] MBAREF0Q_CFG_REFR_TSV_STACK = ATTR_EFF_DRAM_TRFC_DLR + [50-60] MBAREF0Q_CFG_REFR_CHECK_INTERVAL = ((ATTR_EFF_DRAM_TREFI / 8) * 6) / 5 + */ + /* + * Hostboot writes slightly lower REFR_CHECK_INTERVAL, 1544 vs 1560, because + * it uses 99% of tREFI in 7.4 in eff_dimm::dram_trefi(). If this causes any + * issues we can do the same, but for now let's try to avoid floating point + * arithmetic. + */ + mca_and_or(chip, id, mca_i, MBAREF0Q, ~(PPC_BITMASK(5, 18) | PPC_BITMASK(30, 60)), + PPC_PLACE(3, + MBAREF0Q_CFG_REFRESH_PRIORITY_THRESHOLD, + MBAREF0Q_CFG_REFRESH_PRIORITY_THRESHOLD_LEN) | + PPC_PLACE(mem_data[chip].nrefi / (8 * 2 * log_ranks), + MBAREF0Q_CFG_REFRESH_INTERVAL, + MBAREF0Q_CFG_REFRESH_INTERVAL_LEN) | + PPC_PLACE(mca->nrfc, + MBAREF0Q_CFG_TRFC, + MBAREF0Q_CFG_TRFC_LEN) | + PPC_PLACE(mca->nrfc_dlr, + MBAREF0Q_CFG_REFR_TSV_STACK, + MBAREF0Q_CFG_REFR_TSV_STACK_LEN) | + PPC_PLACE(((mem_data[chip].nrefi / 8) * 6) / 5, + MBAREF0Q_CFG_REFR_CHECK_INTERVAL, + MBAREF0Q_CFG_REFR_CHECK_INTERVAL_LEN)); + + /* MC01.PORT0.SRQ.PC.MBARPC0Q = + [6-10] MBARPC0Q_CFG_PUP_AVAIL = + MSS_FREQ_EQ_1866: 6 + MSS_FREQ_EQ_2133: 7 + MSS_FREQ_EQ_2400: 8 + MSS_FREQ_EQ_2666: 9 + [11-15] MBARPC0Q_CFG_PDN_PUP = + MSS_FREQ_EQ_1866: 5 + MSS_FREQ_EQ_2133: 6 + MSS_FREQ_EQ_2400: 6 + MSS_FREQ_EQ_2666: 7 + [16-20] MBARPC0Q_CFG_PUP_PDN = + MSS_FREQ_EQ_1866: 5 + MSS_FREQ_EQ_2133: 6 + MSS_FREQ_EQ_2400: 6 + MSS_FREQ_EQ_2666: 7 + [21] MBARPC0Q_RESERVED_21 = // MCP_PORT0_SRQ_PC_MBARPC0Q_CFG_QUAD_RANK_ENC + (l_def_MASTER_RANKS_DIMM0 == 4): 1 + (l_def_MASTER_RANKS_DIMM0 != 4): 0 + */ + /* Perhaps these can be done by ns_to_nck(), but Hostboot used a forest of ifs */ + uint64_t pup_avail = mem_data[chip].speed == 1866 ? 6 : + mem_data[chip].speed == 2133 ? 7 : + mem_data[chip].speed == 2400 ? 8 : 9; + uint64_t p_up_dn = mem_data[chip].speed == 1866 ? 5 : + mem_data[chip].speed == 2666 ? 7 : 6; + mca_and_or(chip, id, mca_i, MBARPC0Q, ~PPC_BITMASK(6, 21), + PPC_PLACE(pup_avail, MBARPC0Q_CFG_PUP_AVAIL, MBARPC0Q_CFG_PUP_AVAIL_LEN) | + PPC_PLACE(p_up_dn, MBARPC0Q_CFG_PDN_PUP, MBARPC0Q_CFG_PDN_PUP_LEN) | + PPC_PLACE(p_up_dn, MBARPC0Q_CFG_PUP_PDN, MBARPC0Q_CFG_PUP_PDN_LEN) | + (mranks == 4 ? PPC_BIT(MBARPC0Q_RESERVED_21) : 0)); + + /* MC01.PORT0.SRQ.PC.MBASTR0Q = + [12-16] MBASTR0Q_CFG_TCKESR = 5 + [17-21] MBASTR0Q_CFG_TCKSRE = + MSS_FREQ_EQ_1866: 10 + MSS_FREQ_EQ_2133: 11 + MSS_FREQ_EQ_2400: 12 + MSS_FREQ_EQ_2666: 14 + [22-26] MBASTR0Q_CFG_TCKSRX = + MSS_FREQ_EQ_1866: 10 + MSS_FREQ_EQ_2133: 11 + MSS_FREQ_EQ_2400: 12 + MSS_FREQ_EQ_2666: 14 + [27-37] MBASTR0Q_CFG_TXSDLL = + MSS_FREQ_EQ_1866: 597 + MSS_FREQ_EQ_2133: 768 + MSS_FREQ_EQ_2400: 768 + MSS_FREQ_EQ_2666: 939 + [46-56] MBASTR0Q_CFG_SAFE_REFRESH_INTERVAL = ATTR_EFF_DRAM_TREFI / (8 * (DIMM0 + DIMM1 logical ranks)) + */ + uint64_t tcksr_ex = mem_data[chip].speed == 1866 ? 10 : + mem_data[chip].speed == 2133 ? 11 : + mem_data[chip].speed == 2400 ? 12 : 14; + uint64_t txsdll = mem_data[chip].speed == 1866 ? 597 : + mem_data[chip].speed == 2666 ? 939 : 768; + mca_and_or(chip, id, mca_i, MBASTR0Q, ~(PPC_BITMASK(12, 37) | PPC_BITMASK(46, 56)), + PPC_PLACE(5, MBASTR0Q_CFG_TCKESR, MBASTR0Q_CFG_TCKESR_LEN) | + PPC_PLACE(tcksr_ex, MBASTR0Q_CFG_TCKSRE, MBASTR0Q_CFG_TCKSRE_LEN) | + PPC_PLACE(tcksr_ex, MBASTR0Q_CFG_TCKSRX, MBASTR0Q_CFG_TCKSRX_LEN) | + PPC_PLACE(txsdll, MBASTR0Q_CFG_TXSDLL, MBASTR0Q_CFG_TXSDLL_LEN) | + PPC_PLACE(mem_data[chip].nrefi / + (8 * (mca->dimm[0].log_ranks + mca->dimm[1].log_ranks)), + MBASTR0Q_CFG_SAFE_REFRESH_INTERVAL, + MBASTR0Q_CFG_SAFE_REFRESH_INTERVAL_LEN)); + + /* MC01.PORT0.ECC64.SCOM.RECR = + [16-18] MBSECCQ_VAL_TO_DATA_DELAY = + l_TGT4_ATTR_MC_SYNC_MODE == 1: 5 + l_def_mn_freq_ratio < 915: 3 + l_def_mn_freq_ratio < 1150: 4 + l_def_mn_freq_ratio < 1300: 5 + l_def_mn_freq_ratio >= 1300: 6 + [19] MBSECCQ_DELAY_VALID_1X = 0 + [20-21] MBSECCQ_NEST_VAL_TO_DATA_DELAY = + l_TGT4_ATTR_MC_SYNC_MODE == 1: 1 + l_def_mn_freq_ratio < 1040: 1 + l_def_mn_freq_ratio < 1150: 0 + l_def_mn_freq_ratio < 1215: 1 + l_def_mn_freq_ratio < 1300: 0 + l_def_mn_freq_ratio < 1400: 1 + l_def_mn_freq_ratio >= 1400: 0 + [22] MBSECCQ_DELAY_NONBYPASS = + l_TGT4_ATTR_MC_SYNC_MODE == 1: 0 + l_def_mn_freq_ratio < 1215: 0 + l_def_mn_freq_ratio >= 1215: 1 + [40] MBSECCQ_RESERVED_36_43 = // MCP_PORT0_ECC64_ECC_SCOM_MBSECCQ_BYPASS_TENURE_3 + l_TGT4_ATTR_MC_SYNC_MODE == 1: 0 + l_TGT4_ATTR_MC_SYNC_MODE == 0: 1 + */ + /* Assume asynchronous mode */ + /* + * From Hostboot: + * l_def_mn_freq_ratio = 1000 * ATTR_MSS_FREQ / ATTR_FREQ_PB_MHZ; + * ATTR_MSS_FREQ is in MT/s (sigh). + */ + uint32_t pb_freq = pb_cfg->fabric_freq; + uint64_t mn_freq_ratio = 1000 * mem_data[chip].speed / pb_freq; + uint64_t val_to_data = mn_freq_ratio < 915 ? 3 : + mn_freq_ratio < 1150 ? 4 : + mn_freq_ratio < 1300 ? 5 : 6; + uint64_t nest_val_to_data = mn_freq_ratio < 1040 ? 1 : + mn_freq_ratio < 1150 ? 0 : + mn_freq_ratio < 1215 ? 1 : + mn_freq_ratio < 1300 ? 0 : + mn_freq_ratio < 1400 ? 1 : 0; + mca_and_or(chip, id, mca_i, RECR, ~(PPC_BITMASK(16, 22) | PPC_BIT(MBSECCQ_RESERVED_40)), + PPC_PLACE(val_to_data, MBSECCQ_VAL_TO_DATA_DELAY, + MBSECCQ_VAL_TO_DATA_DELAY_LEN) | + PPC_PLACE(nest_val_to_data, MBSECCQ_NEST_VAL_TO_DATA_DELAY, + MBSECCQ_NEST_VAL_TO_DATA_DELAY_LEN) | + (mn_freq_ratio < 1215 ? 0 : PPC_BIT(MBSECCQ_DELAY_NONBYPASS)) | + PPC_BIT(MBSECCQ_RESERVED_40)); + + /* MC01.PORT0.ECC64.SCOM.DBGR = + [9] DBGR_ECC_WAT_ACTION_SELECT = 0 + [10-11] DBGR_ECC_WAT_SOURCE = 0 + */ + mca_and_or(chip, id, mca_i, DBGR, ~PPC_BITMASK(9, 11), 0); + + /* MC01.PORT0.WRITE.WRTCFG = + [9] = 1 // MCP_PORT0_WRITE_NEW_WRITE_64B_MODE this is marked as RO const 0 for bits 8-63 in docs! + */ + mca_and_or(chip, id, mca_i, WRTCFG, ~0ull, PPC_BIT(9)); +} + +static void thermal_throttle_scominit(uint8_t chip, int mcs_i, int mca_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + /* Set power control register */ + /* MC01.PORT0.SRQ.PC.MBARPC0Q = + [3-5] MBARPC0Q_CFG_MIN_MAX_DOMAINS = 0 + [22] MBARPC0Q_CFG_MIN_DOMAIN_REDUCTION_ENABLE = + if ATTR_MSS_MRW_POWER_CONTROL_REQUESTED == PD_AND_STR_OFF: 0 // default + else: 1 + [23-32] MBARPC0Q_CFG_MIN_DOMAIN_REDUCTION_TIME = 959 + */ + mca_and_or(chip, id, mca_i, MBARPC0Q, ~(PPC_BITMASK(3, 5) | PPC_BITMASK(22, 32)), + PPC_PLACE(959, MBARPC0Q_CFG_MIN_DOMAIN_REDUCTION_TIME, + MBARPC0Q_CFG_MIN_DOMAIN_REDUCTION_TIME_LEN )); + + /* Set STR register */ + /* MC01.PORT0.SRQ.PC.MBASTR0Q = + [0] MBASTR0Q_CFG_STR_ENABLE = + ATTR_MSS_MRW_POWER_CONTROL_REQUESTED == PD_AND_STR: 1 + ATTR_MSS_MRW_POWER_CONTROL_REQUESTED == PD_AND_STR_CLK_STOP: 1 + ATTR_MSS_MRW_POWER_CONTROL_REQUESTED == POWER_DOWN: 0 + ATTR_MSS_MRW_POWER_CONTROL_REQUESTED == PD_AND_STR_OFF: 0 // default + [2-11] MBASTR0Q_CFG_ENTER_STR_TIME = 1023 + */ + mca_and_or(chip, id, mca_i, MBASTR0Q, ~(PPC_BIT(0) | PPC_BITMASK(2, 11)), + PPC_PLACE(1023, MBASTR0Q_CFG_ENTER_STR_TIME, + MBASTR0Q_CFG_ENTER_STR_TIME_LEN)); + + /* Set N/M throttling control register */ + /* MC01.PORT0.SRQ.MBA_FARB3Q = + [0-14] MBA_FARB3Q_CFG_NM_N_PER_SLOT = ATTR_MSS_RUNTIME_MEM_THROTTLED_N_COMMANDS_PER_SLOT[mss::index(MCA)] + [15-30] MBA_FARB3Q_CFG_NM_N_PER_PORT = ATTR_MSS_RUNTIME_MEM_THROTTLED_N_COMMANDS_PER_PORT[mss::index(MCA)] + [31-44] MBA_FARB3Q_CFG_NM_M = ATTR_MSS_MRW_MEM_M_DRAM_CLOCKS // default 0x200 + [45-47] MBA_FARB3Q_CFG_NM_RAS_WEIGHT = 0 + [48-50] MBA_FARB3Q_CFG_NM_CAS_WEIGHT = 1 + // Set to disable permanently due to hardware design bug (HW403028) that won't be changed + [53] MBA_FARB3Q_CFG_NM_CHANGE_AFTER_SYNC = 0 + */ + /* + * Values of m_dram_clocks and nm_throttled_n_per_port come from talos.xml + * nm_n_per_slot and nm_n_per_port are derived from values in talos.xml + * + * All of them may be different for other platforms + */ + uint64_t nm_n_per_slot = 0x80; + uint64_t nm_n_per_port = 0x80; + uint64_t m_dram_clocks = 0x200; + mca_and_or(chip, id, mca_i, MBA_FARB3Q, ~(PPC_BITMASK(0, 50) | PPC_BIT(53)), + PPC_PLACE(nm_n_per_slot, MBA_FARB3Q_CFG_NM_N_PER_SLOT, + MBA_FARB3Q_CFG_NM_N_PER_SLOT_LEN) | + PPC_PLACE(nm_n_per_port, MBA_FARB3Q_CFG_NM_N_PER_PORT, + MBA_FARB3Q_CFG_NM_N_PER_PORT_LEN) | + PPC_PLACE(m_dram_clocks, MBA_FARB3Q_CFG_NM_M, + MBA_FARB3Q_CFG_NM_M_LEN) | + PPC_PLACE(1, MBA_FARB3Q_CFG_NM_CAS_WEIGHT, + MBA_FARB3Q_CFG_NM_CAS_WEIGHT_LEN)); + + /* Set safemode throttles */ + /* MC01.PORT0.SRQ.MBA_FARB4Q = + [27-41] MBA_FARB4Q_EMERGENCY_N = ATTR_MSS_MRW_SAFEMODE_MEM_THROTTLED_N_COMMANDS_PER_PORT[mss::index(MCA)] + [42-55] MBA_FARB4Q_EMERGENCY_M = ATTR_MSS_MRW_MEM_M_DRAM_CLOCKS + */ + uint64_t nm_throttled_n_per_port = 0x20; + mca_and_or(chip, id, mca_i, MBA_FARB4Q, ~PPC_BITMASK(27, 55), + PPC_PLACE(nm_throttled_n_per_port, MBA_FARB4Q_EMERGENCY_N, + MBA_FARB4Q_EMERGENCY_N_LEN) | + PPC_PLACE(m_dram_clocks, MBA_FARB4Q_EMERGENCY_M, + MBA_FARB4Q_EMERGENCY_M_LEN)); +} + +/* + * Values set in this function are mostly for magic MCA, other (functional) MCAs + * are set later. If all of these registers are later written with proper values + * for functional MCAs, maybe this can be called just for magic, non-functional + * ones to save time, but for now do it in a way the Hostboot does it. + */ +static void p9n_ddrphy_scom(uint8_t chip, int mcs_i, int mca_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + int dp; + /* + * Hostboot sets this to proper value in phy_scominit(), but I don't see + * why. Speed is the same for whole MCBIST anyway. + */ + uint64_t strength = mem_data[chip].speed == 1866 ? 1 : + mem_data[chip].speed == 2133 ? 2 : + mem_data[chip].speed == 2400 ? 4 : 8; + + for (dp = 0; dp < 5; dp++) { + /* IOM0.DDRPHY_DP16_DLL_VREG_CONTROL0_P0_{0,1,2,3,4} = + [48-50] RXREG_VREG_COMPCON_DC = 3 + [52-59] = 0x74: + [53-55] RXREG_VREG_DRVCON_DC = 0x7 + [56-58] RXREG_VREG_REF_SEL_DC = 0x2 + [62-63] = 0: + [62] DLL_DRVREN_MODE = POWER8 mode (thermometer style, enabling all drivers up to the one that is used) + [63] DLL_CAL_CKTS_ACTIVE = After VREG calibration, some analog circuits are powered down + */ + /* Same as default value after reset? */ + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_DLL_VREG_CONTROL0_P0_0, + ~(PPC_BITMASK(48, 50) | PPC_BITMASK(52, 59) | PPC_BITMASK(62, 63)), + PPC_PLACE(3, RXREG_VREG_COMPCON_DC, RXREG_VREG_COMPCON_DC_LEN) | + PPC_PLACE(7, RXREG_VREG_DRVCON_DC, RXREG_VREG_DRVCON_DC_LEN) | + PPC_PLACE(2, RXREG_VREG_REF_SEL_DC, RXREG_VREG_REF_SEL_DC_LEN)); + + /* IOM0.DDRPHY_DP16_DLL_VREG_CONTROL1_P0_{0,1,2,3,4} = + [48-50] RXREG_VREG_COMPCON_DC = 3 + [52-59] = 0x74: + [53-55] RXREG_VREG_DRVCON_DC = 0x7 + [56-58] RXREG_VREG_REF_SEL_DC = 0x2 + [62-63] = 0: + [62] DLL_DRVREN_MODE = POWER8 mode (thermometer style, enabling all drivers up to the one that is used) + [63] DLL_CAL_CKTS_ACTIVE = After VREG calibration, some analog circuits are powered down + */ + /* Same as default value after reset? */ + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_DLL_VREG_CONTROL1_P0_0, + ~(PPC_BITMASK(48, 50) | PPC_BITMASK(52, 59) | PPC_BITMASK(62, 63)), + PPC_PLACE(3, RXREG_VREG_COMPCON_DC, RXREG_VREG_COMPCON_DC_LEN) | + PPC_PLACE(7, RXREG_VREG_DRVCON_DC, RXREG_VREG_DRVCON_DC_LEN) | + PPC_PLACE(2, RXREG_VREG_REF_SEL_DC, RXREG_VREG_REF_SEL_DC_LEN)); + + /* IOM0.DDRPHY_DP16_WRCLK_PR_P0_{0,1,2,3,4} = + // For zero delay simulations, or simulations where the delay of the SysClk tree and the WrClk tree are equal, + // set this field to 60h + [49-55] TSYS_WRCLK = 0x60 + */ + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_WRCLK_PR_P0_0, + ~PPC_BITMASK(49, 55), + PPC_PLACE(0x60, TSYS_WRCLK, TSYS_WRCLK_LEN)); + + /* IOM0.DDRPHY_DP16_IO_TX_CONFIG0_P0_{0,1,2,3,4} = + [48-51] STRENGTH = 0x4 // 2400 MT/s + [52] DD2_RESET_READ_FIX_DISABLE = 0 // Enable the DD2 function to remove the register reset on read feature + // on status registers + */ + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_IO_TX_CONFIG0_P0_0, + ~PPC_BITMASK(48, 52), + PPC_PLACE(strength, DDRPHY_DP16_IO_TX_CONFIG0_STRENGTH, + DDRPHY_DP16_IO_TX_CONFIG0_STRENGTH_LEN)); + + /* IOM0.DDRPHY_DP16_DLL_CONFIG1_P0_{0,1,2,3,4} = + [48-63] = 0x0006: + [48-51] HS_DLLMUX_SEL_0_0_3 = 0 + [53-56] HS_DLLMUX_SEL_1_0_3 = 0 + [61] S0INSDLYTAP = 1 // For proper functional operation, this bit must be 0b + [62] S1INSDLYTAP = 1 // For proper functional operation, this bit must be 0b + */ + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_DLL_CONFIG1_P0_0, + ~(PPC_BITMASK(48, 63)), + PPC_BIT(S0INSDLYTAP) | PPC_BIT(S1INSDLYTAP)); + + /* IOM0.DDRPHY_DP16_IO_TX_FET_SLICE_P0_{0,1,2,3,4} = + [48-63] = 0x7f7f: + [49-55] EN_SLICE_N_WR = 0x7f + [57-63] EN_SLICE_P_WR = 0x7f + */ + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_IO_TX_FET_SLICE_P0_0, + ~PPC_BITMASK(48, 63), + PPC_PLACE(0x7F, EN_SLICE_N_WR, EN_SLICE_N_WR_LEN) | + PPC_PLACE(0x7F, EN_SLICE_P_WR, EN_SLICE_P_WR_LEN)); + } + + for (dp = 0; dp < 4; dp++) { + /* IOM0.DDRPHY_ADR_BIT_ENABLE_P0_ADR{0,1,2,3} = + [48-63] = 0xffff + */ + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_ADR_BIT_ENABLE_P0_ADR0, + ~PPC_BITMASK(48, 63), PPC_PLACE(0xFFFF, 48, 16)); + } + + /* IOM0.DDRPHY_ADR_DIFFPAIR_ENABLE_P0_ADR1 = + [48-63] = 0x5000: + [49] DI_ADR2_ADR3: 1 = Lanes 2 and 3 are a differential clock pair + [51] DI_ADR6_ADR7: 1 = Lanes 6 and 7 are a differential clock pair + */ + mca_and_or(chip, id, mca_i, DDRPHY_ADR_DIFFPAIR_ENABLE_P0_ADR1, + ~PPC_BITMASK(48, 63), PPC_PLACE(0x5000, 48, 16)); + + /* IOM0.DDRPHY_ADR_DELAY1_P0_ADR1 = + [48-63] = 0x4040: + [49-55] ADR_DELAY2 = 0x40 + [57-63] ADR_DELAY3 = 0x40 + */ + mca_and_or(chip, id, mca_i, DDRPHY_ADR_DELAY1_P0_ADR1, + ~PPC_BITMASK(48, 63), PPC_PLACE(0x4040, 48, 16)); + + /* IOM0.DDRPHY_ADR_DELAY3_P0_ADR1 = + [48-63] = 0x4040: + [49-55] ADR_DELAY6 = 0x40 + [57-63] ADR_DELAY7 = 0x40 + */ + mca_and_or(chip, id, mca_i, DDRPHY_ADR_DELAY3_P0_ADR1, + ~PPC_BITMASK(48, 63), PPC_PLACE(0x4040, 48, 16)); + + for (dp = 0; dp < 2; dp ++) { + /* IOM0.DDRPHY_ADR_DLL_VREG_CONFIG_1_P0_ADR32S{0,1} = + [48-63] = 0x0008: + [48-51] HS_DLLMUX_SEL_0_3 = 0 + [59-62] STRENGTH = 4 // 2400 MT/s + */ + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_ADR_DLL_VREG_CONFIG_1_P0_ADR32S0, + ~PPC_BITMASK(48, 63), + PPC_PLACE(strength, DDRPHY_ADR_DLL_VREG_CONFIG_1_STRENGTH, + DDRPHY_ADR_DLL_VREG_CONFIG_1_STRENGTH_LEN)); + + /* IOM0.DDRPHY_ADR_MCCLK_WRCLK_PR_STATIC_OFFSET_P0_ADR32S{0,1} = + [48-63] = 0x6000 + // For zero delay simulations, or simulations where the delay of the + // SysClk tree and the WrClk tree are equal, set this field to 60h + [49-55] TSYS_WRCLK = 0x60 + */ + dp_mca_and_or(chip, id, dp, mca_i, + DDRPHY_ADR_MCCLK_WRCLK_PR_STATIC_OFFSET_P0_ADR32S0, + ~PPC_BITMASK(48, 63), + PPC_PLACE(0x60, TSYS_WRCLK, TSYS_WRCLK_LEN)); + + /* IOM0.DDRPHY_ADR_DLL_VREG_CONTROL_P0_ADR32S{0,1} = + [48-50] RXREG_VREG_COMPCON_DC = 3 + [52-59] = 0x74: + [53-55] RXREG_VREG_DRVCON_DC = 0x7 + [56-58] RXREG_VREG_REF_SEL_DC = 0x2 + [63] DLL_CAL_CKTS_ACTIVE = 0 // After VREG calibration, some analog circuits are powered down + */ + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_ADR_DLL_VREG_CONTROL_P0_ADR32S0, + ~PPC_BITMASK(48, 63), + PPC_PLACE(3, RXREG_VREG_COMPCON_DC, RXREG_VREG_COMPCON_DC_LEN) | + PPC_PLACE(7, RXREG_VREG_DRVCON_DC, RXREG_VREG_DRVCON_DC_LEN) | + PPC_PLACE(2, RXREG_VREG_REF_SEL_DC, RXREG_VREG_REF_SEL_DC_LEN)); + } + + /* IOM0.DDRPHY_PC_CONFIG0_P0 = + [48-63] = 0x0202: + [48-51] PDA_ENABLE_OVERRIDE = 0 + [52] 2TCK_PREAMBLE_ENABLE = 0 + [53] PBA_ENABLE = 0 + [54] DDR4_CMD_SIG_REDUCTION = 1 + [55] SYSCLK_2X_MEMINTCLKO = 0 + [56] RANK_OVERRIDE = 0 + [57-59] RANK_OVERRIDE_VALUE = 0 + [60] LOW_LATENCY = 0 + [61] DDR4_IPW_LOOP_DIS = 0 + [62] DDR4_VLEVEL_BANK_GROUP = 1 + [63] VPROTH_PSEL_MODE = 0 + */ + mca_and_or(chip, id, mca_i, DDRPHY_PC_CONFIG0_P0, ~PPC_BITMASK(48, 63), + PPC_BIT(DDR4_CMD_SIG_REDUCTION) | + PPC_BIT(DDR4_VLEVEL_BANK_GROUP)); +} + +static void p9n_mcbist_scom(uint8_t chip, int mcs_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + /* MC01.MCBIST.MBA_SCOMFIR.WATCFG0AQ = + [0-47] WATCFG0AQ_CFG_WAT_EVENT_SEL = 0x400000000000 + */ + scom_and_or_for_chiplet(chip, id, WATCFG0AQ, ~PPC_BITMASK(0, 47), + PPC_PLACE(0x400000000000, WATCFG0AQ_CFG_WAT_EVENT_SEL, + WATCFG0AQ_CFG_WAT_EVENT_SEL_LEN)); + + /* MC01.MCBIST.MBA_SCOMFIR.WATCFG0BQ = + [0-43] WATCFG0BQ_CFG_WAT_MSKA = 0x3fbfff + [44-60] WATCFG0BQ_CFG_WAT_CNTL = 0x10000 + */ + scom_and_or_for_chiplet(chip, id, WATCFG0BQ, ~PPC_BITMASK(0, 60), + PPC_PLACE(0x3fbfff, WATCFG0BQ_CFG_WAT_MSKA, + WATCFG0BQ_CFG_WAT_MSKA_LEN) | + PPC_PLACE(0x10000, WATCFG0BQ_CFG_WAT_CNTL, + WATCFG0BQ_CFG_WAT_CNTL_LEN)); + + /* MC01.MCBIST.MBA_SCOMFIR.WATCFG0DQ = + [0-43] WATCFG0DQ_CFG_WAT_PATA = 0x80200004000 + */ + scom_and_or_for_chiplet(chip, id, WATCFG0DQ, ~PPC_BITMASK(0, 43), + PPC_PLACE(0x80200004000, WATCFG0DQ_CFG_WAT_PATA, + WATCFG0DQ_CFG_WAT_PATA_LEN)); + + /* MC01.MCBIST.MBA_SCOMFIR.WATCFG3AQ = + [0-47] WATCFG3AQ_CFG_WAT_EVENT_SEL = 0x800000000000 + */ + scom_and_or_for_chiplet(chip, id, WATCFG3AQ, ~PPC_BITMASK(0, 47), + PPC_PLACE(0x800000000000, WATCFG3AQ_CFG_WAT_EVENT_SEL, + WATCFG3AQ_CFG_WAT_EVENT_SEL_LEN)); + + /* MC01.MCBIST.MBA_SCOMFIR.WATCFG3BQ = + [0-43] WATCFG3BQ_CFG_WAT_MSKA = 0xfffffffffff + [44-60] WATCFG3BQ_CFG_WAT_CNTL = 0x10400 + */ + scom_and_or_for_chiplet(chip, id, WATCFG3BQ, ~PPC_BITMASK(0, 60), + PPC_PLACE(0xfffffffffff, WATCFG3BQ_CFG_WAT_MSKA, + WATCFG3BQ_CFG_WAT_MSKA_LEN) | + PPC_PLACE(0x10400, WATCFG3BQ_CFG_WAT_CNTL, + WATCFG3BQ_CFG_WAT_CNTL_LEN)); + + /* MC01.MCBIST.MBA_SCOMFIR.MCBCFGQ = + [36] MCBCFGQ_CFG_LOG_COUNTS_IN_TRACE = 0 + */ + scom_and_for_chiplet(chip, id, MCBCFGQ, ~PPC_BIT(MCBCFGQ_CFG_LOG_COUNTS_IN_TRACE)); + + /* MC01.MCBIST.MBA_SCOMFIR.DBGCFG0Q = + [0] DBGCFG0Q_CFG_DBG_ENABLE = 1 + [23-33] DBGCFG0Q_CFG_DBG_PICK_MCBIST01 = 0x780 + */ + scom_and_or_for_chiplet(chip, id, DBGCFG0Q, ~PPC_BITMASK(23, 33), + PPC_BIT(DBGCFG0Q_CFG_DBG_ENABLE) | + PPC_PLACE(0x780, DBGCFG0Q_CFG_DBG_PICK_MCBIST01, + DBGCFG0Q_CFG_DBG_PICK_MCBIST01_LEN)); + + /* MC01.MCBIST.MBA_SCOMFIR.DBGCFG1Q = + [0] DBGCFG1Q_CFG_WAT_ENABLE = 1 + */ + scom_or_for_chiplet(chip, id, DBGCFG1Q, PPC_BIT(DBGCFG1Q_CFG_WAT_ENABLE)); + + /* MC01.MCBIST.MBA_SCOMFIR.DBGCFG2Q = + [0-19] DBGCFG2Q_CFG_WAT_LOC_EVENT0_SEL = 0x10000 + [20-39] DBGCFG2Q_CFG_WAT_LOC_EVENT1_SEL = 0x08000 + */ + scom_and_or_for_chiplet(chip, id, DBGCFG2Q, ~PPC_BITMASK(0, 39), + PPC_PLACE(0x10000, DBGCFG2Q_CFG_WAT_LOC_EVENT0_SEL, + DBGCFG2Q_CFG_WAT_LOC_EVENT0_SEL_LEN) | + PPC_PLACE(0x08000, DBGCFG2Q_CFG_WAT_LOC_EVENT1_SEL, + DBGCFG2Q_CFG_WAT_LOC_EVENT1_SEL_LEN)); + + /* MC01.MCBIST.MBA_SCOMFIR.DBGCFG3Q = + [20-22] DBGCFG3Q_CFG_WAT_GLOB_EVENT0_SEL = 0x4 + [23-25] DBGCFG3Q_CFG_WAT_GLOB_EVENT1_SEL = 0x4 + [37-40] DBGCFG3Q_CFG_WAT_ACT_SET_SPATTN_PULSE = 0x4 + */ + scom_and_or_for_chiplet(chip, id, DBGCFG3Q, + ~(PPC_BITMASK(20, 25) | PPC_BITMASK(37, 40)), + PPC_PLACE(0x4, DBGCFG3Q_CFG_WAT_GLOB_EVENT0_SEL, + DBGCFG3Q_CFG_WAT_GLOB_EVENT0_SEL_LEN) | + PPC_PLACE(0x4, DBGCFG3Q_CFG_WAT_GLOB_EVENT1_SEL, + DBGCFG3Q_CFG_WAT_GLOB_EVENT1_SEL_LEN) | + PPC_PLACE(0x4, DBGCFG3Q_CFG_WAT_ACT_SET_SPATTN_PULSE, + DBGCFG3Q_CFG_WAT_ACT_SET_SPATTN_PULSE_LEN)); +} + +static void set_rank_pairs(uint8_t chip, int mcs_i, int mca_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; + /* + * Assumptions: + * - non-LR DIMMs (platform wiki), + * - no ATTR_EFF_RANK_GROUP_OVERRIDE, + * - mixing rules followed - the same rank configuration for both DIMMs. + * + * Because rank pairs are defined for each MCA, we can only have up to two + * 2R DIMMs. For such configurations, RP0 primary is rank 0 on DIMM 0, + * RP1 primary - rank 1 DIMM 0, RP2 primary - rank 0 DIMM 1, + * RP3 primary - rank 1 DIMM 1. There are no secondary (this is true for + * RDIMM only), tertiary or quaternary rank pairs. + */ + + static const uint16_t F[] = {0, 0xf000, 0xf0f0, 0xfff0, 0xffff}; + + /* TODO: can we mix mirrored and non-mirrored 2R DIMMs in one port? */ + + /* IOM0.DDRPHY_PC_RANK_PAIR0_P0 = + // rank_countX is the number of master ranks on DIMM X. + [48-63] = 0x1537 & F[rank_count0]: // F = {0, 0xf000, 0xf0f0, 0xfff0, 0xffff} + [48-50] RANK_PAIR0_PRI = 0 + [51] RANK_PAIR0_PRI_V = 1: if (rank_count0 >= 1) + [52-54] RANK_PAIR0_SEC = 2 + [55] RANK_PAIR0_SEC_V = 1: if (rank_count0 >= 3) + [56-58] RANK_PAIR1_PRI = 1 + [59] RANK_PAIR1_PRI_V = 1: if (rank_count0 >= 2) + [60-62] RANK_PAIR1_SEC = 3 + [63] RANK_PAIR1_SEC_V = 1: if (rank_count0 == 4) + */ + mca_and_or(chip, id, mca_i, DDRPHY_PC_RANK_PAIR0_P0, ~PPC_BITMASK(48, 63), + PPC_PLACE(0x1537 & F[mca->dimm[0].mranks], 48, 16)); + + /* IOM0.DDRPHY_PC_RANK_PAIR1_P0 = + [48-63] = 0x1537 & F[rank_count1]: // F = {0, 0xf000, 0xf0f0, 0xfff0, 0xffff} + [48-50] RANK_PAIR2_PRI = 0 + [51] RANK_PAIR2_PRI_V = 1: if (rank_count1 >= 1) + [52-54] RANK_PAIR2_SEC = 2 + [55] RANK_PAIR2_SEC_V = 1: if (rank_count1 >= 3) + [56-58] RANK_PAIR3_PRI = 1 + [59] RANK_PAIR3_PRI_V = 1: if (rank_count1 >= 2) + [60-62] RANK_PAIR3_SEC = 3 + [63] RANK_PAIR3_SEC_V = 1: if (rank_count1 == 4) + */ + mca_and_or(chip, id, mca_i, DDRPHY_PC_RANK_PAIR1_P0, ~PPC_BITMASK(48, 63), + PPC_PLACE(0x1537 & F[mca->dimm[1].mranks], 48, 16)); + + /* IOM0.DDRPHY_PC_RANK_PAIR2_P0 = + [48-63] = 0 + */ + mca_and_or(chip, id, mca_i, DDRPHY_PC_RANK_PAIR2_P0, ~PPC_BITMASK(48, 63), 0); + + /* IOM0.DDRPHY_PC_RANK_PAIR3_P0 = + [48-63] = 0 + */ + mca_and_or(chip, id, mca_i, DDRPHY_PC_RANK_PAIR3_P0, ~PPC_BITMASK(48, 63), 0); + + /* IOM0.DDRPHY_PC_CSID_CFG_P0 = + [0-63] 0xf000: + [48] CS0_INIT_CAL_VALUE = 1 + [49] CS1_INIT_CAL_VALUE = 1 + [50] CS2_INIT_CAL_VALUE = 1 + [51] CS3_INIT_CAL_VALUE = 1 + */ + mca_and_or(chip, id, mca_i, DDRPHY_PC_CSID_CFG_P0, ~PPC_BITMASK(48, 63), + PPC_PLACE(0xF000, 48, 16)); + + /* IOM0.DDRPHY_PC_MIRROR_CONFIG_P0 = + [all] = 0 + // A rank is mirrored if all are true: + // - the rank is valid (RANK_PAIRn_XXX_V == 1) + // - the rank is odd (RANK_PAIRn_XXX % 2 == 1) + // - the mirror mode attribute is set for the rank's DIMM (SPD[136]) + // - We are not in quad encoded mode (so master ranks <= 2) + [48] ADDR_MIRROR_RP0_PRI + [49] ADDR_MIRROR_RP0_SEC + [50] ADDR_MIRROR_RP1_PRI + [51] ADDR_MIRROR_RP1_SEC + [52] ADDR_MIRROR_RP2_PRI + [53] ADDR_MIRROR_RP2_SEC + [54] ADDR_MIRROR_RP3_PRI + [55] ADDR_MIRROR_RP3_SEC + [58] ADDR_MIRROR_A3_A4 = 1 + [59] ADDR_MIRROR_A5_A6 = 1 + [60] ADDR_MIRROR_A7_A8 = 1 + [61] ADDR_MIRROR_A11_A13 = 1 + [62] ADDR_MIRROR_BA0_BA1 = 1 + [63] ADDR_MIRROR_BG0_BG1 = 1 + */ + /* + * Assumptions: + * - primary and secondary have the same evenness, + * - RP1 and RP3 have odd ranks, + * - both DIMMs have SPD[136] set or both have it unset, no mixing allowed, + * - when rank is not valid, it doesn't matter if it is mirrored, + * - no quad encoded mode - no data for it in MT VPD anyway. + * + * With all of the above, ADDR_MIRROR_RP{1,3}_{PRI,SEC} = SPD[136]. + */ + uint64_t mirr = mca->dimm[0].present ? mca->dimm[0].spd[136] : + mca->dimm[1].spd[136]; + mca_and_or(chip, id, mca_i, DDRPHY_PC_MIRROR_CONFIG_P0, ~PPC_BITMASK(48, 63), + PPC_PLACE(mirr, ADDR_MIRROR_RP1_PRI, 1) | + PPC_PLACE(mirr, ADDR_MIRROR_RP1_SEC, 1) | + PPC_PLACE(mirr, ADDR_MIRROR_RP3_PRI, 1) | + PPC_PLACE(mirr, ADDR_MIRROR_RP3_SEC, 1) | + PPC_BITMASK(58, 63)); + + /* IOM0.DDRPHY_PC_RANK_GROUP_EXT_P0 = // 0x8000C0350701103F + [all] = 0 + // Same rules as above + [48] ADDR_MIRROR_RP0_TER + ... + [55] ADDR_MIRROR_RP3_QUA + */ + /* These are not valid anyway, so don't bother setting anything. */ +} + +static void reset_data_bit_enable(uint8_t chip, int mcs_i, int mca_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + int dp; + + for (dp = 0; dp < 4; dp++) { + /* IOM0.DDRPHY_DP16_DQ_BIT_ENABLE0_P0_{0,1,2,3} = + [all] = 0 + [48-63] DATA_BIT_ENABLE_0_15 = 0xffff + */ + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_DQ_BIT_ENABLE0_P0_0, 0, 0xFFFF); + } + + /* IOM0.DDRPHY_DP16_DQ_BIT_ENABLE0_P0_4 = + [all] = 0 + [48-63] DATA_BIT_ENABLE_0_15 = 0xff00 + */ + dp_mca_and_or(chip, id, 4, mca_i, DDRPHY_DP16_DQ_BIT_ENABLE0_P0_0, 0, 0xFF00); + + /* IOM0.DDRPHY_DP16_DFT_PDA_CONTROL_P0_{0,1,2,3,4} = + // This reg is named MCA_DDRPHY_DP16_DATA_BIT_ENABLE1_P0_n in the code. + // Probably the address changed for DD2 but the documentation did not. + [all] = 0 + */ + for (dp = 0; dp < 5; dp++) { + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_DFT_PDA_CONTROL_P0_0, 0, 0); + } +} + +/* 5 DP16, 8 MCA */ +/* + * These tables specify which clock/strobes pins (16-23) of DP16 are used to + * capture outgoing/incoming data on which data pins (0-16). Those will + * eventually arrive to DIMM as DQS and DQ, respectively. The mapping must be + * the same for write and read, but for some reason HW has two separate sets of + * registers. + */ +/* + * TODO: after we know how MCAs are numbered we can drop half of x8 table. + * I'm 90% sure it is 0,1,4,5, but for now I'll leave the rest in comments. + */ +static const uint16_t x4_clk[5] = {0x8640, 0x8640, 0x8640, 0x8640, 0x8400}; +static const uint16_t x8_clk[8][5] = { + {0x0CC0, 0xC0C0, 0x0CC0, 0x0F00, 0x0C00}, /* Port 0 */ + {0xC0C0, 0x0F00, 0x0CC0, 0xC300, 0x0C00}, /* Port 1 */ +// {0xC300, 0xC0C0, 0xC0C0, 0x0F00, 0x0C00}, /* Port 2 */ +// {0x0F00, 0x0F00, 0xC300, 0xC300, 0xC000}, /* Port 3 */ + {0x0CC0, 0xC0C0, 0x0F00, 0x0F00, 0xC000}, /* Port 4 */ + {0xC300, 0x0CC0, 0x0CC0, 0xC300, 0xC000}, /* Port 5 */ +// {0x0CC0, 0x0CC0, 0x0CC0, 0xC0C0, 0x0C00}, /* Port 6 */ +// {0x0CC0, 0xC0C0, 0x0F00, 0xC300, 0xC000}, /* Port 7 */ +}; + +static void reset_clock_enable(uint8_t chip, int mcs_i, int mca_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; + /* Assume the same rank configuration for both DIMMs */ + int dp; + int width = mca->dimm[0].present ? mca->dimm[0].width : + mca->dimm[1].width; + int mranks[2] = {mca->dimm[0].mranks, mca->dimm[1].mranks}; + /* Index for x8_clk depends on how MCAs are numbered... */ + const uint16_t *clk = width == WIDTH_x4 ? x4_clk : + x8_clk[mcs_i * MCA_PER_MCS + mca_i]; + + /* IOM0.DDRPHY_DP16_WRCLK_EN_RP0_P0_{0,1,2,3,4} + [all] = 0 + [48-63] QUADn_CLKxx + */ + /* IOM0.DDRPHY_DP16_READ_CLOCK_RANK_PAIR0_P0_{0,1,2,3,4} + [all] = 0 + [48-63] QUADn_CLKxx + */ + for (dp = 0; dp < 5; dp++) { + /* Note that these correspond to valid rank pairs */ + if (mranks[0] > 0) { + dp_mca_and_or(chip, id, dp, mca_i, + DDRPHY_DP16_WRCLK_EN_RP0_P0_0, 0, clk[dp]); + dp_mca_and_or(chip, id, dp, mca_i, + DDRPHY_DP16_READ_CLOCK_RANK_PAIR0_P0_0, 0, clk[dp]); + } + + if (mranks[0] > 1) { + dp_mca_and_or(chip, id, dp, mca_i, + DDRPHY_DP16_WRCLK_EN_RP1_P0_0, 0, clk[dp]); + dp_mca_and_or(chip, id, dp, mca_i, + DDRPHY_DP16_READ_CLOCK_RANK_PAIR1_P0_0, 0, clk[dp]); + } + + if (mranks[1] > 0) { + dp_mca_and_or(chip, id, dp, mca_i, + DDRPHY_DP16_WRCLK_EN_RP2_P0_0, 0, clk[dp]); + dp_mca_and_or(chip, id, dp, mca_i, + DDRPHY_DP16_READ_CLOCK_RANK_PAIR2_P0_0, 0, clk[dp]); + } + + if (mranks[1] > 1) { + dp_mca_and_or(chip, id, dp, mca_i, + DDRPHY_DP16_WRCLK_EN_RP3_P0_0, 0, clk[dp]); + dp_mca_and_or(chip, id, dp, mca_i, + DDRPHY_DP16_READ_CLOCK_RANK_PAIR3_P0_0, 0, clk[dp]); + } + } +} + +static void reset_rd_vref(uint8_t chip, int mcs_i, int mca_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; + + int dp; + int vpd_idx = mca->dimm[0].present ? (mca->dimm[0].mranks == 2 ? 2 : 0) : + (mca->dimm[1].mranks == 2 ? 2 : 0); + if (mca->dimm[0].present && mca->dimm[1].present) + vpd_idx++; + + /* RD_VREF_DVDD * (100000 - ATTR_MSS_VPD_MT_VREF_MC_RD) / RD_VREF_DAC_STEP + vref_bf = 12 * (100000 - ATTR_MSS_VPD_MT_VREF_MC_RD) / 6500 + IOM0.DDRPHY_DP16_RD_VREF_DAC_{0-7}_P0_{0-3}, + IOM0.DDRPHY_DP16_RD_VREF_DAC_{0-3}_P0_4 = // only half of last DP16 is used + [49-55] BIT0_VREF_DAC = vref_bf + [57-63] BIT1_VREF_DAC = vref_bf + */ + const uint64_t vref_bf = 12 * (100000 - ATTR_MSS_VPD_MT_VREF_MC_RD[vpd_idx]) / 6500; + for (dp = 0; dp < 5; dp++) { + + /* SCOM addresses are not regular for DAC, so no inner loop. */ + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_RD_VREF_DAC_0_P0_0, + ~(PPC_BITMASK(49, 55) | PPC_BITMASK(57, 63)), + PPC_PLACE(vref_bf, BIT0_VREF_DAC, BIT0_VREF_DAC_LEN) | + PPC_PLACE(vref_bf, BIT1_VREF_DAC, BIT1_VREF_DAC_LEN)); + + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_RD_VREF_DAC_1_P0_0, + ~(PPC_BITMASK(49, 55) | PPC_BITMASK(57, 63)), + PPC_PLACE(vref_bf, BIT0_VREF_DAC, BIT0_VREF_DAC_LEN) | + PPC_PLACE(vref_bf, BIT1_VREF_DAC, BIT1_VREF_DAC_LEN)); + + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_RD_VREF_DAC_2_P0_0, + ~(PPC_BITMASK(49, 55) | PPC_BITMASK(57, 63)), + PPC_PLACE(vref_bf, BIT0_VREF_DAC, BIT0_VREF_DAC_LEN) | + PPC_PLACE(vref_bf, BIT1_VREF_DAC, BIT1_VREF_DAC_LEN)); + + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_RD_VREF_DAC_3_P0_0, + ~(PPC_BITMASK(49, 55) | PPC_BITMASK(57, 63)), + PPC_PLACE(vref_bf, BIT0_VREF_DAC, BIT0_VREF_DAC_LEN) | + PPC_PLACE(vref_bf, BIT1_VREF_DAC, BIT1_VREF_DAC_LEN)); + + if (dp == 4) break; + + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_RD_VREF_DAC_4_P0_0, + ~(PPC_BITMASK(49, 55) | PPC_BITMASK(57, 63)), + PPC_PLACE(vref_bf, BIT0_VREF_DAC, BIT0_VREF_DAC_LEN) | + PPC_PLACE(vref_bf, BIT1_VREF_DAC, BIT1_VREF_DAC_LEN)); + + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_RD_VREF_DAC_5_P0_0, + ~(PPC_BITMASK(49, 55) | PPC_BITMASK(57, 63)), + PPC_PLACE(vref_bf, BIT0_VREF_DAC, BIT0_VREF_DAC_LEN) | + PPC_PLACE(vref_bf, BIT1_VREF_DAC, BIT1_VREF_DAC_LEN)); + + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_RD_VREF_DAC_6_P0_0, + ~(PPC_BITMASK(49, 55) | PPC_BITMASK(57, 63)), + PPC_PLACE(vref_bf, BIT0_VREF_DAC, BIT0_VREF_DAC_LEN) | + PPC_PLACE(vref_bf, BIT1_VREF_DAC, BIT1_VREF_DAC_LEN)); + + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_RD_VREF_DAC_7_P0_0, + ~(PPC_BITMASK(49, 55) | PPC_BITMASK(57, 63)), + PPC_PLACE(vref_bf, BIT0_VREF_DAC, BIT0_VREF_DAC_LEN) | + PPC_PLACE(vref_bf, BIT1_VREF_DAC, BIT1_VREF_DAC_LEN)); + } + + /* IOM0.DDRPHY_DP16_RD_VREF_CAL_EN_P0_{0-4} + [48-63] VREF_CAL_EN = 0xffff // enable = 0xffff, disable = 0x0000 + */ + for (dp = 0; dp < 5; dp++) { + /* Is it safe to set this before VREF_DAC? If yes, may use one loop for both */ + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_RD_VREF_CAL_EN_P0_0, + 0, PPC_BITMASK(48, 63)); + } +} + +static void pc_reset(uint8_t chip, int mcs_i, int mca_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + /* These are from VPD */ + /* + uint64_t ATTR_MSS_EFF_DPHY_WLO = mem_data[chip].speed == 1866 ? 1 : 2; + uint64_t ATTR_MSS_EFF_DPHY_RLO = mem_data[chip].speed == 1866 ? 4 : + mem_data[chip].speed == 2133 ? 5 : + mem_data[chip].speed == 2400 ? 6 : 7; + */ + + /* IOM0.DDRPHY_PC_CONFIG0_P0 has been reset in p9n_ddrphy_scom() */ + + /* IOM0.DDRPHY_PC_CONFIG1_P0 = + [48-51] WRITE_LATENCY_OFFSET = ATTR_MSS_EFF_DPHY_WLO + [52-55] READ_LATENCY_OFFSET = ATTR_MSS_EFF_DPHY_RLO + +1: if 2N mode (ATTR_MSS_VPD_MR_MC_2N_MODE_AUTOSET, ATTR_MSS_MRW_DRAM_2N_MODE) // Gear-down mode in JEDEC + // Assume no LRDIMM + [59-61] MEMORY_TYPE = 0x5 // 0x7 for LRDIMM + [62] DDR4_LATENCY_SW = 1 + */ + /* + * FIXME: I have no idea where Hostboot gets these values from, they should + * be the same as in VPD, yet WLO is 3 and RLO is 5 when written to SCOM... + * + * These are from VPD: + * uint64_t ATTR_MSS_EFF_DPHY_WLO = mem_data[chip].speed == 1866 ? 1 : 2; + * uint64_t ATTR_MSS_EFF_DPHY_RLO = mem_data[chip].speed == 1866 ? 4 : + * mem_data[chip].speed == 2133 ? 5 : + * mem_data[chip].speed == 2400 ? 6 : 7; + */ + mca_and_or(chip, id, mca_i, DDRPHY_PC_CONFIG1_P0, + ~(PPC_BITMASK(48, 55) | PPC_BITMASK(59, 62)), + PPC_PLACE(/* ATTR_MSS_EFF_DPHY_WLO */ 3, WRITE_LATENCY_OFFSET, + WRITE_LATENCY_OFFSET_LEN) | + PPC_PLACE(/* ATTR_MSS_EFF_DPHY_RLO */ 5, READ_LATENCY_OFFSET, + READ_LATENCY_OFFSET_LEN) | + PPC_PLACE(0x5, MEMORY_TYPE, MEMORY_TYPE_LEN) | + PPC_BIT(DDR4_LATENCY_SW)); + + /* IOM0.DDRPHY_PC_ERROR_STATUS0_P0 = + [all] 0 + */ + mca_and_or(chip, id, mca_i, DDRPHY_PC_ERROR_STATUS0_P0, 0, 0); + + /* IOM0.DDRPHY_PC_INIT_CAL_ERROR_P0 = + [all] 0 + */ + mca_and_or(chip, id, mca_i, DDRPHY_PC_INIT_CAL_ERROR_P0, 0, 0); +} + +static void wc_reset(uint8_t chip, int mcs_i, int mca_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; + + /* IOM0.DDRPHY_WC_CONFIG0_P0 = + [all] 0 + // BUG? Mismatch between comment (-,-), code (+,+) and docs (-,+) for operations inside 'max' + [48-55] TWLO_TWLOE = 12 + max((twldqsen - tmod), (twlo + twlow)) + + longest DQS delay in clocks (rounded up) + longest DQ delay in clocks (rounded up) + [56] WL_ONE_DQS_PULSE = 1 + [57-62] FW_WR_RD = 0x20 // "# dd0 = 17 clocks, now 32 from SWyatt" + [63] CUSTOM_INIT_WRITE = 1 // set to a 1 to get proper values for RD VREF + */ + /* + * tMOD = max(24 nCK, 15 ns) = 24 nCK for all supported speed bins + * tWLDQSEN >= 25 nCK + * tWLDQSEN > tMOD + ODTLon + tADC + * 0.3 tCK <= tADC <= 0.7 tCK, round to 1 + * ODTLon = WL - 2 = CWL + AL + PL - 2; AL = 0, PL = 0 + * tWLDQSEN = max(25, tMOD + CWL - 2 + 1) = CWL + 23 + * tWLO = 0 - 9.5 ns, Hostboot uses ATTR_MSS_EFF_DPHY_WLO + * tWLOE = 0 - 2 ns, Hostboot uses 2 ns + * Longest DQ and DQS delays are both equal 1 nCK. + */ + /* + * FIXME: again, tWLO = 3 in Hostboot. Why? + * This is still much smaller than tWLDQSEN so leave it, for now. + */ + uint64_t tWLO = mem_data[chip].speed == 1866 ? 1 : 2; + uint64_t tWLOE = ns_to_nck(chip, 2); + uint64_t tWLDQSEN = MAX(25, tMOD + (mem_data[chip].cwl - 2) + 1); + /* + * Use the version from the code, it may be longer than necessary but it + * works. Note that MAX() always expands to CWL + 23 + 24 = 47 + CWL, which + * means that we can just write 'tWLO_tWLOE = 61 + CWL'. Leaving full + * version below, it will be easier to fix. + */ + /* + * FIXME: relative to Hostboot, we are 2 nCK short for tWLDQSEN (37 vs 39). + * It doesn't have '- 2' in its calculations (timing.H). However, this is + * JEDEC way of doing it so it _should_ work. + */ + uint64_t tWLO_tWLOE = 12 + MAX((tWLDQSEN + tMOD), (tWLO + tWLOE)) + 1 + 1; + mca_and_or(chip, id, mca_i, DDRPHY_WC_CONFIG0_P0, 0, + PPC_PLACE(tWLO_tWLOE, TWLO_TWLOE, TWLO_TWLOE_LEN) | + PPC_BIT(WL_ONE_DQS_PULSE) | + PPC_PLACE(0x20, FW_WR_RD, FW_WR_RD_LEN) | + PPC_BIT(CUSTOM_INIT_WRITE)); + + /* IOM0.DDRPHY_WC_CONFIG1_P0 = + [all] 0 + [48-51] BIG_STEP = 7 + [52-54] SMALL_STEP = 0 + [55-60] WR_PRE_DLY = 0x2a (42) + */ + mca_and_or(chip, id, mca_i, DDRPHY_WC_CONFIG1_P0, 0, + PPC_PLACE(7, BIG_STEP, BIG_STEP_LEN) | + PPC_PLACE(0x2A, WR_PRE_DLY, WR_PRE_DLY_LEN)); + + /* IOM0.DDRPHY_WC_CONFIG2_P0 = + [all] 0 + [48-51] NUM_VALID_SAMPLES = 5 + [52-57] FW_RD_WR = max(tWTR_S + 11, AL + tRTP + 3) + [58-61] IPW_WR_WR = 5 // results in 24 clock cycles + */ + /* There is no Additive Latency. */ + mca_and_or(chip, id, mca_i, DDRPHY_WC_CONFIG2_P0, 0, + PPC_PLACE(5, NUM_VALID_SAMPLES, NUM_VALID_SAMPLES_LEN) | + PPC_PLACE(MAX(mca->nwtr_s + 11, mem_data[chip].nrtp + 3), + FW_RD_WR, + FW_RD_WR_LEN) | + PPC_PLACE(5, IPW_WR_WR, IPW_WR_WR_LEN)); + + /* IOM0.DDRPHY_WC_CONFIG3_P0 = + [all] 0 + [55-60] MRS_CMD_DQ_OFF = 0x3f + */ + mca_and_or(chip, id, mca_i, DDRPHY_WC_CONFIG3_P0, 0, + PPC_PLACE(0x3F, MRS_CMD_DQ_OFF, MRS_CMD_DQ_OFF_LEN)); + + /* IOM0.DDRPHY_WC_RTT_WR_SWAP_ENABLE_P0 + [48] WL_ENABLE_RTT_SWAP = 0 + [49] WR_CTR_ENABLE_RTT_SWAP = 0 + [50-59] WR_CTR_VREF_COUNTER_RESET_VAL = 150ns in clock cycles // JESD79-4C Table 67 + */ + mca_and_or(chip, id, mca_i, DDRPHY_WC_RTT_WR_SWAP_ENABLE_P0, ~PPC_BITMASK(48, 59), + PPC_PLACE(ns_to_nck(chip, 150), WR_CTR_VREF_COUNTER_RESET_VAL, + WR_CTR_VREF_COUNTER_RESET_VAL_LEN)); +} + +static void rc_reset(uint8_t chip, int mcs_i, int mca_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; + + /* IOM0.DDRPHY_RC_CONFIG0_P0 + [all] 0 + [48-51] GLOBAL_PHY_OFFSET = 0x5 // ATTR_MSS_VPD_MR_DPHY_GPO + [62] PERFORM_RDCLK_ALIGN = 1 + */ + mca_and_or(chip, id, mca_i, DDRPHY_RC_CONFIG0_P0, 0, + PPC_PLACE(0x5, GLOBAL_PHY_OFFSET, GLOBAL_PHY_OFFSET_LEN) | + PPC_BIT(PERFORM_RDCLK_ALIGN)); + + /* IOM0.DDRPHY_RC_CONFIG1_P0 + [all] 0 + */ + mca_and_or(chip, id, mca_i, DDRPHY_RC_CONFIG1_P0, 0, 0); + + /* IOM0.DDRPHY_RC_CONFIG2_P0 + [all] 0 + [48-52] CONSEC_PASS = 8 + [57-58] 3 // not documented, BURST_WINDOW? + */ + mca_and_or(chip, id, mca_i, DDRPHY_RC_CONFIG2_P0, 0, + PPC_PLACE(8, CONSEC_PASS, CONSEC_PASS_LEN) | + PPC_PLACE(3, 57, 2)); + + /* IOM0.DDRPHY_RC_CONFIG3_P0 + [all] 0 + [51-54] COARSE_CAL_STEP_SIZE = 4 // 5/128 + */ + mca_and_or(chip, id, mca_i, DDRPHY_RC_CONFIG3_P0, 0, + PPC_PLACE(4, COARSE_CAL_STEP_SIZE, COARSE_CAL_STEP_SIZE_LEN)); + + /* IOM0.DDRPHY_RC_RDVREF_CONFIG0_P0 = + [all] 0 + [48-63] WAIT_TIME = + 0xffff // as slow as possible, or use calculation from vref_guess_time(), or: + MSS_FREQ_EQ_1866: 0x0804 + MSS_FREQ_EQ_2133: 0x092a + MSS_FREQ_EQ_2400: 0x0a50 + MSS_FREQ_EQ_2666: 0x0b74 // use this value for all freqs maybe? + */ + uint64_t wait_time = mem_data[chip].speed == 1866 ? 0x0804 : + mem_data[chip].speed == 2133 ? 0x092A : + mem_data[chip].speed == 2400 ? 0x0A50 : 0x0B74; + mca_and_or(chip, id, mca_i, DDRPHY_RC_RDVREF_CONFIG0_P0, + 0, PPC_PLACE(wait_time, 48, 16)); + + /* IOM0.DDRPHY_RC_RDVREF_CONFIG1_P0 = + [all] 0 + [48-55] CMD_PRECEDE_TIME = (AL + CL + 15) + [56-59] MPR_LOCATION = 4 // "From R. King." + */ + mca_and_or(chip, id, mca_i, DDRPHY_RC_RDVREF_CONFIG1_P0, 0, + PPC_PLACE(mca->cl + 15, CMD_PRECEDE_TIME, CMD_PRECEDE_TIME_LEN) | + PPC_PLACE(4, MPR_LOCATION, MPR_LOCATION_LEN)); +} + +static inline int log2_up(uint32_t x) +{ + int lz; + asm("cntlzd %0, %1" : "=r"(lz) : "r"((x << 1) - 1)); + return 63 - lz; +} + +static void seq_reset(uint8_t chip, int mcs_i, int mca_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; + int vpd_idx = mca->dimm[0].present ? (mca->dimm[0].mranks == 2 ? 2 : 0) : + (mca->dimm[1].mranks == 2 ? 2 : 0); + if (mca->dimm[0].present && mca->dimm[1].present) + vpd_idx++; + + /* IOM0.DDRPHY_SEQ_CONFIG0_P0 = + [all] 0 + [49] TWO_CYCLE_ADDR_EN = + 2N mode: 1 + else: 0 + [54] DELAYED_PAR = 1 + [62] PAR_A17_MASK = + 16Gb x4 configuration: 0 + else: 1 + */ + uint64_t par_a17_mask = PPC_BIT(PAR_A17_MASK); + if ((mca->dimm[0].width == WIDTH_x4 && mca->dimm[0].density == DENSITY_16Gb) || + (mca->dimm[1].width == WIDTH_x4 && mca->dimm[1].density == DENSITY_16Gb)) + par_a17_mask = 0; + + mca_and_or(chip, id, mca_i, DDRPHY_SEQ_CONFIG0_P0, 0, + PPC_BIT(DELAYED_PAR) | par_a17_mask); + + /* All log2 values in timing registers are rounded up. */ + /* IOM0.DDRPHY_SEQ_MEM_TIMING_PARAM0_P0 = + [all] 0 + [48-51] TMOD_CYCLES = 5 // log2(max(tMRD, tMOD)) = log2(24), JEDEC tables 169 and 170 and section 13.5 + [52-55] TRCD_CYCLES = log2(tRCD) + [56-59] TRP_CYCLES = log2(tRP) + [60-63] TRFC_CYCLES = log2(tRFC) + */ + /* + * FIXME or FIXHOSTBOOT: due to a bug in Hostboot TRFC_CYCLES is always 0. + * A loop searches for a minimum for all MCAs, but minimum that values are + * compared to is initially set to 0. This is a clear violation of RFC + * timing. It is fixed later in dqs_align_turn_on_refresh() in 13.11, but + * that may not have been necessary if it were written here properly. + * + * https://github.com/open-power/hostboot/blob/master/src/import/chips/p9/procedures/hwp/memory/lib/phy/seq.C#L142 + */ + mca_and_or(chip, id, mca_i, DDRPHY_SEQ_MEM_TIMING_PARAM0_P0, 0, + PPC_PLACE(5, TMOD_CYCLES, TMOD_CYCLES_LEN) | + PPC_PLACE(log2_up(mca->nrcd), TRCD_CYCLES, TRCD_CYCLES_LEN) | + PPC_PLACE(log2_up(mca->nrp), TRP_CYCLES, TRP_CYCLES_LEN) | + PPC_PLACE(log2_up(mca->nrfc), TRFC_CYCLES, TRFC_CYCLES_LEN)); + + /* IOM0.DDRPHY_SEQ_MEM_TIMING_PARAM1_P0 = + [all] 0 + [48-51] TZQINIT_CYCLES = 10 // log2(1024), JEDEC tables 169 and 170 + [52-55] TZQCS_CYCLES = 7 // log2(128), JEDEC tables 169 and 170 + [56-59] TWLDQSEN_CYCLES = 6 // log2(37) rounded up, JEDEC tables 169 and 170 + [60-63] TWRMRD_CYCLES = 6 // log2(40) rounded up, JEDEC tables 169 and 170 + */ + mca_and_or(chip, id, mca_i, DDRPHY_SEQ_MEM_TIMING_PARAM1_P0, 0, + PPC_PLACE(10, TZQINIT_CYCLES, TZQINIT_CYCLES_LEN) | + PPC_PLACE(7, TZQCS_CYCLES, TZQCS_CYCLES_LEN) | + PPC_PLACE(6, TWLDQSEN_CYCLES, TWLDQSEN_CYCLES_LEN) | + PPC_PLACE(6, TWRMRD_CYCLES, TWRMRD_CYCLES_LEN)); + + /* IOM0.DDRPHY_SEQ_MEM_TIMING_PARAM2_P0 = + [all] 0 + [48-51] TODTLON_OFF_CYCLES = log2(CWL + AL + PL - 2) + [52-63] reserved = 0x777 // "Reset value of SEQ_TIMING2 is lucky 7's" + */ + /* AL and PL are disabled (0) */ + mca_and_or(chip, id, mca_i, DDRPHY_SEQ_MEM_TIMING_PARAM2_P0, 0, + PPC_PLACE(log2_up(mem_data[chip].cwl - 2), TODTLON_OFF_CYCLES, + TODTLON_OFF_CYCLES_LEN) | + PPC_PLACE(0x777, 52, 12)); + + /* IOM0.DDRPHY_SEQ_RD_WR_DATA0_P0 = + [all] 0 + [48-63] RD_RW_DATA_REG0 = 0xaa00 + */ + mca_and_or(chip, id, mca_i, DDRPHY_SEQ_RD_WR_DATA0_P0, 0, + PPC_PLACE(0xAA00, RD_RW_DATA_REG0, RD_RW_DATA_REG0_LEN)); + + /* IOM0.DDRPHY_SEQ_RD_WR_DATA1_P0 = + [all] 0 + [48-63] RD_RW_DATA_REG1 = 0x00aa + */ + mca_and_or(chip, id, mca_i, DDRPHY_SEQ_RD_WR_DATA1_P0, 0, + PPC_PLACE(0x00AA, RD_RW_DATA_REG1, RD_RW_DATA_REG1_LEN)); + + /* + * For all registers below, assume RDIMM (max 2 ranks). + * + * Remember that VPD data layout is different, code will be slightly + * different than the comments. + */ +#define F(x) (((x >> 4) & 0xC) | ((x >> 2) & 0x3)) + + /* IOM0.DDRPHY_SEQ_ODT_RD_CONFIG0_P0 = + F(X) = (((X >> 4) & 0xc) | ((X >> 2) & 0x3)) // Bits 0,1,4,5 of X, see also MC01.PORT0.SRQ.MBA_FARB2Q + [all] 0 + [48-51] ODT_RD_VALUES0 = F(ATTR_MSS_VPD_MT_ODT_RD[index(MCA)][0][0]) + [56-59] ODT_RD_VALUES1 = F(ATTR_MSS_VPD_MT_ODT_RD[index(MCA)][0][1]) + */ + mca_and_or(chip, id, mca_i, DDRPHY_SEQ_ODT_RD_CONFIG0_P0, 0, + PPC_PLACE(F(ATTR_MSS_VPD_MT_ODT_RD[vpd_idx][0][0]), ODT_RD_VALUES0, + ODT_RD_VALUES0_LEN) | + PPC_PLACE(F(ATTR_MSS_VPD_MT_ODT_RD[vpd_idx][0][1]), ODT_RD_VALUES1, + ODT_RD_VALUES1_LEN)); + + /* IOM0.DDRPHY_SEQ_ODT_RD_CONFIG1_P0 = + F(X) = (((X >> 4) & 0xc) | ((X >> 2) & 0x3)) // Bits 0,1,4,5 of X, see also MC01.PORT0.SRQ.MBA_FARB2Q + [all] 0 + [48-51] ODT_RD_VALUES2 = + count_dimm(MCA) == 2: F(ATTR_MSS_VPD_MT_ODT_RD[index(MCA)][1][0]) + count_dimm(MCA) != 2: F(ATTR_MSS_VPD_MT_ODT_RD[index(MCA)][0][2]) + [56-59] ODT_RD_VALUES3 = + count_dimm(MCA) == 2: F(ATTR_MSS_VPD_MT_ODT_RD[index(MCA)][1][1]) + count_dimm(MCA) != 2: F(ATTR_MSS_VPD_MT_ODT_RD[index(MCA)][0][3]) + */ + /* 2 DIMMs -> odd vpd_idx */ + uint64_t val = 0; + if (vpd_idx % 2) + val = PPC_PLACE(F(ATTR_MSS_VPD_MT_ODT_RD[vpd_idx][1][0]), ODT_RD_VALUES2, + ODT_RD_VALUES2_LEN) | + PPC_PLACE(F(ATTR_MSS_VPD_MT_ODT_RD[vpd_idx][1][1]), ODT_RD_VALUES3, + ODT_RD_VALUES3_LEN); + + mca_and_or(chip, id, mca_i, DDRPHY_SEQ_ODT_RD_CONFIG1_P0, 0, val); + + + /* IOM0.DDRPHY_SEQ_ODT_WR_CONFIG0_P0 = + F(X) = (((X >> 4) & 0xc) | ((X >> 2) & 0x3)) // Bits 0,1,4,5 of X, see also MC01.PORT0.SRQ.MBA_FARB2Q + [all] 0 + [48-51] ODT_WR_VALUES0 = F(ATTR_MSS_VPD_MT_ODT_WR[index(MCA)][0][0]) + [56-59] ODT_WR_VALUES1 = F(ATTR_MSS_VPD_MT_ODT_WR[index(MCA)][0][1]) + */ + mca_and_or(chip, id, mca_i, DDRPHY_SEQ_ODT_WR_CONFIG0_P0, 0, + PPC_PLACE(F(ATTR_MSS_VPD_MT_ODT_WR[vpd_idx][0][0]), ODT_WR_VALUES0, + ODT_WR_VALUES0_LEN) | + PPC_PLACE(F(ATTR_MSS_VPD_MT_ODT_WR[vpd_idx][0][1]), ODT_WR_VALUES1, + ODT_WR_VALUES1_LEN)); + + /* IOM0.DDRPHY_SEQ_ODT_WR_CONFIG1_P0 = + F(X) = (((X >> 4) & 0xc) | ((X >> 2) & 0x3)) // Bits 0,1,4,5 of X, see also MC01.PORT0.SRQ.MBA_FARB2Q + [all] 0 + [48-51] ODT_WR_VALUES2 = + count_dimm(MCA) == 2: F(ATTR_MSS_VPD_MT_ODT_WR[index(MCA)][1][0]) + count_dimm(MCA) != 2: F(ATTR_MSS_VPD_MT_ODT_WR[index(MCA)][0][2]) + [56-59] ODT_WR_VALUES3 = + count_dimm(MCA) == 2: F(ATTR_MSS_VPD_MT_ODT_WR[index(MCA)][1][1]) + count_dimm(MCA) != 2: F(ATTR_MSS_VPD_MT_ODT_WR[index(MCA)][0][3]) + */ + val = 0; + if (vpd_idx % 2) + val = PPC_PLACE(F(ATTR_MSS_VPD_MT_ODT_WR[vpd_idx][1][0]), ODT_WR_VALUES2, + ODT_WR_VALUES2_LEN) | + PPC_PLACE(F(ATTR_MSS_VPD_MT_ODT_WR[vpd_idx][1][1]), ODT_WR_VALUES3, + ODT_WR_VALUES3_LEN); + + mca_and_or(chip, id, mca_i, DDRPHY_SEQ_ODT_WR_CONFIG1_P0, 0, val); +#undef F +} + +static void reset_ac_boost_cntl(uint8_t chip, int mcs_i, int mca_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + int dp; + + /* IOM0.DDRPHY_DP16_ACBOOST_CTL_BYTE{0,1}_P0_{0,1,2,3,4} = + // For all of the AC Boost attributes, they're laid out in the uint32_t as such: + // Bit 0-2 = DP16 Block 0 (DQ Bits 0-7) BYTE0_P0_0 + // Bit 3-5 = DP16 Block 0 (DQ Bits 8-15) BYTE1_P0_0 + // Bit 6-8 = DP16 Block 1 (DQ Bits 0-7) BYTE0_P0_1 + // Bit 9-11 = DP16 Block 1 (DQ Bits 8-15) BYTE1_P0_1 + // Bit 12-14 = DP16 Block 2 (DQ Bits 0-7) BYTE0_P0_2 + // Bit 15-17 = DP16 Block 2 (DQ Bits 8-15) BYTE1_P0_2 + // Bit 18-20 = DP16 Block 3 (DQ Bits 0-7) BYTE0_P0_3 + // Bit 21-23 = DP16 Block 3 (DQ Bits 8-15) BYTE1_P0_3 + // Bit 24-26 = DP16 Block 4 (DQ Bits 0-7) BYTE0_P0_4 + // Bit 27-29 = DP16 Block 4 (DQ Bits 8-15) BYTE1_P0_4 + [all] 0? // function does read prev values from SCOM but then overwrites all non-const-0 fields. Why bother? + [48-50] S{0,1}ACENSLICENDRV_DC = appropriate bits from ATTR_MSS_VPD_MT_MC_DQ_ACBOOST_WR_DOWN + [51-53] S{0,1}ACENSLICEPDRV_DC = appropriate bits from ATTR_MSS_VPD_MT_MC_DQ_ACBOOST_WR_UP + [54-56] S{0,1}ACENSLICEPTERM_DC = appropriate bits from ATTR_MSS_VPD_MT_MC_DQ_ACBOOST_RD_UP + */ + /* + * Both ATTR_MSS_VPD_MT_MC_DQ_ACBOOST_WR_* have a value of 0x24924924 + * for all rank configurations (two copies for two MCA indices to be exact), + * meaning that all 3b fields are 0b001. ATTR_MSS_VPD_MT_MC_DQ_ACBOOST_RD_UP + * equals 0. Last DP16 doesn't require special handling, all DQ bits are + * configured. + * + * Write these fields explicitly instead of shifting and masking for better + * readability. + */ + for (dp = 0; dp < 5; dp++) { + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_ACBOOST_CTL_BYTE0_P0_0, + ~PPC_BITMASK(48, 56), + PPC_PLACE(1, S0ACENSLICENDRV_DC, S0ACENSLICENDRV_DC_LEN) | + PPC_PLACE(1, S0ACENSLICEPDRV_DC, S0ACENSLICEPDRV_DC_LEN)); + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_ACBOOST_CTL_BYTE1_P0_0, + ~PPC_BITMASK(48, 56), + PPC_PLACE(1, S1ACENSLICENDRV_DC, S1ACENSLICENDRV_DC_LEN) | + PPC_PLACE(1, S1ACENSLICEPDRV_DC, S1ACENSLICEPDRV_DC_LEN)); + } +} + +static void reset_ctle_cntl(uint8_t chip, int mcs_i, int mca_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + int dp; + + /* IOM0.DDRPHY_DP16_CTLE_CTL_BYTE{0,1}_P0_{0,1,2,3,4} = + // For the capacitance CTLE attributes, they're laid out in the uint64_t as such. The resitance + // attributes are the same, but 3 bits long. Notice that DP Block X Nibble 0 is DQ0:3, + // Nibble 1 is DQ4:7, Nibble 2 is DQ8:11 and 3 is DQ12:15. + // Bit 0-1 = DP16 Block 0 Nibble 0 Bit 16-17 = DP16 Block 2 Nibble 0 Bit 32-33 = DP16 Block 4 Nibble 0 + // Bit 2-3 = DP16 Block 0 Nibble 1 Bit 18-19 = DP16 Block 2 Nibble 1 Bit 34-35 = DP16 Block 4 Nibble 1 + // Bit 4-5 = DP16 Block 0 Nibble 2 Bit 20-21 = DP16 Block 2 Nibble 2 Bit 36-37 = DP16 Block 4 Nibble 2 + // Bit 6-7 = DP16 Block 0 Nibble 3 Bit 22-23 = DP16 Block 2 Nibble 3 Bit 38-39 = DP16 Block 4 Nibble 3 + // Bit 8-9 = DP16 Block 1 Nibble 0 Bit 24-25 = DP16 Block 3 Nibble 0 + // Bit 10-11 = DP16 Block 1 Nibble 1 Bit 26-27 = DP16 Block 3 Nibble 1 + // Bit 12-13 = DP16 Block 1 Nibble 2 Bit 28-29 = DP16 Block 3 Nibble 2 + // Bit 14-15 = DP16 Block 1 Nibble 3 Bit 30-31 = DP16 Block 3 Nibble 3 + [48-49] NIB_{0,2}_DQSEL_CAP = appropriate bits from ATTR_MSS_VPD_MT_MC_DQ_CTLE_CAP + [53-55] NIB_{0,2}_DQSEL_RES = appropriate bits from ATTR_MSS_VPD_MT_MC_DQ_CTLE_RES + [56-57] NIB_{1,3}_DQSEL_CAP = appropriate bits from ATTR_MSS_VPD_MT_MC_DQ_CTLE_CAP + [61-63] NIB_{1,3}_DQSEL_RES = appropriate bits from ATTR_MSS_VPD_MT_MC_DQ_CTLE_RES + */ + /* + * For all rank configurations and both MCAs, ATTR_MSS_VPD_MT_MC_DQ_CTLE_CAP + * is 0x5555555555000000 (so every 2b field is 0b01) and *_RES equals + * 0xb6db6db6db6db6d0 (every 3b field is 0b101 = 5). + */ + for (dp = 0; dp < 5; dp++) { + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_CTLE_CTL_BYTE0_P0_0, + ~(PPC_BITMASK(48, 49) | PPC_BITMASK(53, 57) | PPC_BITMASK(61, 63)), + PPC_PLACE(1, NIB_0_DQSEL_CAP, NIB_0_DQSEL_CAP_LEN) | + PPC_PLACE(5, NIB_0_DQSEL_RES, NIB_0_DQSEL_RES_LEN) | + PPC_PLACE(1, NIB_1_DQSEL_CAP, NIB_1_DQSEL_CAP_LEN) | + PPC_PLACE(5, NIB_1_DQSEL_RES, NIB_1_DQSEL_RES_LEN)); + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_CTLE_CTL_BYTE1_P0_0, + ~(PPC_BITMASK(48, 49) | PPC_BITMASK(53, 57) | PPC_BITMASK(61, 63)), + PPC_PLACE(1, NIB_2_DQSEL_CAP, NIB_2_DQSEL_CAP_LEN) | + PPC_PLACE(5, NIB_2_DQSEL_RES, NIB_2_DQSEL_RES_LEN) | + PPC_PLACE(1, NIB_3_DQSEL_CAP, NIB_3_DQSEL_CAP_LEN) | + PPC_PLACE(5, NIB_3_DQSEL_RES, NIB_3_DQSEL_RES_LEN)); + } +} + +static void reset_delay(uint8_t chip, int mcs_i, int mca_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; + + /* See comments in ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_CSN0 for layout */ + int speed_idx = mem_data[chip].speed == 1866 ? 0 : + mem_data[chip].speed == 2133 ? 8 : + mem_data[chip].speed == 2400 ? 16 : 24; + int dimm_idx = (mca->dimm[0].present && mca->dimm[1].present) ? 4 : 0; + /* TODO: second CPU not supported */ + int vpd_idx = speed_idx + dimm_idx + mcs_i; + + /* + * From documentation: + * "If the reset value is not sufficient for the given system, these + * registers must be set via the programming interface." + * + * Unsure if this is the case. Hostboot sets it, so lets do it too. + */ + /* IOM0.DDRPHY_ADR_DELAY0_P0_ADR0 = + [all] 0 + [49-55] ADR_DELAY0 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_CSN0 + [57-63] ADR_DELAY1 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_ADDR_WEN_A14 + */ + mca_and_or(chip, id, mca_i, DDRPHY_ADR_DELAY0_P0_ADR0, 0, + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_CSN0[vpd_idx][mca_i], + ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN) | + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_ADDR_WEN_A14[vpd_idx][mca_i], + ADR_DELAY_ODD, ADR_DELAY_ODD_LEN)); + + /* IOM0.DDRPHY_ADR_DELAY1_P0_ADR0 = + [all] 0 + [49-55] ADR_DELAY2 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_ODT1 + [57-63] ADR_DELAY3 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_C0 + */ + mca_and_or(chip, id, mca_i, DDRPHY_ADR_DELAY1_P0_ADR0, 0, + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_ODT1[vpd_idx][mca_i], + ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN) | + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_C0[vpd_idx][mca_i], + ADR_DELAY_ODD, ADR_DELAY_ODD_LEN)); + + /* IOM0.DDRPHY_ADR_DELAY2_P0_ADR0 = + [all] 0 + [49-55] ADR_DELAY4 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_BA1 + [57-63] ADR_DELAY5 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A10 + */ + mca_and_or(chip, id, mca_i, DDRPHY_ADR_DELAY2_P0_ADR0, 0, + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_BA1[vpd_idx][mca_i], + ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN) | + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A10[vpd_idx][mca_i], + ADR_DELAY_ODD, ADR_DELAY_ODD_LEN)); + + /* IOM0.DDRPHY_ADR_DELAY3_P0_ADR0 = + [all] 0 + [49-55] ADR_DELAY6 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_ODT1 + [57-63] ADR_DELAY7 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_BA0 + */ + mca_and_or(chip, id, mca_i, DDRPHY_ADR_DELAY3_P0_ADR0, 0, + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_ODT1[vpd_idx][mca_i], + ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN) | + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_BA0[vpd_idx][mca_i], + ADR_DELAY_ODD, ADR_DELAY_ODD_LEN)); + + /* IOM0.DDRPHY_ADR_DELAY4_P0_ADR0 = + [all] 0 + [49-55] ADR_DELAY8 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A00 + [57-63] ADR_DELAY9 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_ODT0 + */ + mca_and_or(chip, id, mca_i, DDRPHY_ADR_DELAY4_P0_ADR0, 0, + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A00[vpd_idx][mca_i], + ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN) | + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_ODT0[vpd_idx][mca_i], + ADR_DELAY_ODD, ADR_DELAY_ODD_LEN)); + + /* IOM0.DDRPHY_ADR_DELAY5_P0_ADR0 = + [all] 0 + [49-55] ADR_DELAY10 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_ODT0 + [57-63] ADR_DELAY11 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_ADDR_CASN_A15 + */ + mca_and_or(chip, id, mca_i, DDRPHY_ADR_DELAY5_P0_ADR0, 0, + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_ODT0[vpd_idx][mca_i], + ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN) | + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_ADDR_CASN_A15[vpd_idx][mca_i], + ADR_DELAY_ODD, ADR_DELAY_ODD_LEN)); + + + /* IOM0.DDRPHY_ADR_DELAY0_P0_ADR1 = + [all] 0 + [49-55] ADR_DELAY0 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A13 + [57-63] ADR_DELAY1 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_CSN1 + */ + mca_and_or(chip, id, mca_i, DDRPHY_ADR_DELAY0_P0_ADR1, 0, + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A13[vpd_idx][mca_i], + ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN) | + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_CSN1[vpd_idx][mca_i], + ADR_DELAY_ODD, ADR_DELAY_ODD_LEN)); + + /* IOM0.DDRPHY_ADR_DELAY1_P0_ADR1 = + [all] 0 + [49-55] ADR_DELAY2 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_D0_CLKN + [57-63] ADR_DELAY3 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_D0_CLKP + */ + mca_and_or(chip, id, mca_i, DDRPHY_ADR_DELAY1_P0_ADR1, 0, + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_D0_CLKN[vpd_idx][mca_i], + ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN) | + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_D0_CLKP[vpd_idx][mca_i], + ADR_DELAY_ODD, ADR_DELAY_ODD_LEN)); + + /* IOM0.DDRPHY_ADR_DELAY2_P0_ADR1 = + [all] 0 + [49-55] ADR_DELAY4 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A17 + [57-63] ADR_DELAY5 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_C1 + */ + mca_and_or(chip, id, mca_i, DDRPHY_ADR_DELAY2_P0_ADR1, 0, + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A17[vpd_idx][mca_i], + ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN) | + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_C1[vpd_idx][mca_i], + ADR_DELAY_ODD, ADR_DELAY_ODD_LEN)); + + /* IOM0.DDRPHY_ADR_DELAY3_P0_ADR1 = + [all] 0 + [49-55] ADR_DELAY6 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_D1_CLKN + [57-63] ADR_DELAY7 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_D1_CLKP + */ + mca_and_or(chip, id, mca_i, DDRPHY_ADR_DELAY3_P0_ADR1, 0, + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_D1_CLKN[vpd_idx][mca_i], + ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN) | + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_D1_CLKP[vpd_idx][mca_i], + ADR_DELAY_ODD, ADR_DELAY_ODD_LEN)); + + /* IOM0.DDRPHY_ADR_DELAY4_P0_ADR1 = + [all] 0 + [49-55] ADR_DELAY8 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_C2 + [57-63] ADR_DELAY9 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_CSN1 + */ + mca_and_or(chip, id, mca_i, DDRPHY_ADR_DELAY4_P0_ADR1, 0, + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_C2[vpd_idx][mca_i], + ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN) | + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_CSN1[vpd_idx][mca_i], + ADR_DELAY_ODD, ADR_DELAY_ODD_LEN)); + + /* IOM0.DDRPHY_ADR_DELAY5_P0_ADR1 = + [all] 0 + [49-55] ADR_DELAY10 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A02 + [57-63] ADR_DELAY11 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_PAR + */ + mca_and_or(chip, id, mca_i, DDRPHY_ADR_DELAY5_P0_ADR1, 0, + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A02[vpd_idx][mca_i], + ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN) | + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_PAR[vpd_idx][mca_i], + ADR_DELAY_ODD, ADR_DELAY_ODD_LEN)); + + + /* IOM0.DDRPHY_ADR_DELAY0_P0_ADR2 = + [all] 0 + [49-55] ADR_DELAY0 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_CSN0 + [57-63] ADR_DELAY1 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_ADDR_RASN_A16 + */ + mca_and_or(chip, id, mca_i, DDRPHY_ADR_DELAY0_P0_ADR2, 0, + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_CSN0[vpd_idx][mca_i], + ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN) | + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_ADDR_RASN_A16[vpd_idx][mca_i], + ADR_DELAY_ODD, ADR_DELAY_ODD_LEN)); + + /* IOM0.DDRPHY_ADR_DELAY1_P0_ADR2 = + [all] 0 + [49-55] ADR_DELAY2 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A08 + [57-63] ADR_DELAY3 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A05 + */ + mca_and_or(chip, id, mca_i, DDRPHY_ADR_DELAY1_P0_ADR2, 0, + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A08[vpd_idx][mca_i], + ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN) | + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A05[vpd_idx][mca_i], + ADR_DELAY_ODD, ADR_DELAY_ODD_LEN)); + + /* IOM0.DDRPHY_ADR_DELAY2_P0_ADR2 = + [all] 0 + [49-55] ADR_DELAY4 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A03 + [57-63] ADR_DELAY5 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A01 + */ + mca_and_or(chip, id, mca_i, DDRPHY_ADR_DELAY2_P0_ADR2, 0, + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A03[vpd_idx][mca_i], + ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN) | + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A01[vpd_idx][mca_i], + ADR_DELAY_ODD, ADR_DELAY_ODD_LEN)); + + /* IOM0.DDRPHY_ADR_DELAY3_P0_ADR2 = + [all] 0 + [49-55] ADR_DELAY6 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A04 + [57-63] ADR_DELAY7 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A07 + */ + mca_and_or(chip, id, mca_i, DDRPHY_ADR_DELAY3_P0_ADR2, 0, + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A04[vpd_idx][mca_i], + ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN) | + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A07[vpd_idx][mca_i], + ADR_DELAY_ODD, ADR_DELAY_ODD_LEN)); + + /* IOM0.DDRPHY_ADR_DELAY4_P0_ADR2 = + [all] 0 + [49-55] ADR_DELAY8 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A09 + [57-63] ADR_DELAY9 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A06 + */ + mca_and_or(chip, id, mca_i, DDRPHY_ADR_DELAY4_P0_ADR2, 0, + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A09[vpd_idx][mca_i], + ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN) | + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A06[vpd_idx][mca_i], + ADR_DELAY_ODD, ADR_DELAY_ODD_LEN)); + + /* IOM0.DDRPHY_ADR_DELAY5_P0_ADR2 = + [all] 0 + [49-55] ADR_DELAY10 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_CKE1 + [57-63] ADR_DELAY11 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A12 + */ + mca_and_or(chip, id, mca_i, DDRPHY_ADR_DELAY5_P0_ADR2, 0, + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_CKE1[vpd_idx][mca_i], + ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN) | + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A12[vpd_idx][mca_i], + ADR_DELAY_ODD, ADR_DELAY_ODD_LEN)); + + + /* IOM0.DDRPHY_ADR_DELAY0_P0_ADR3 = + [all] 0 + [49-55] ADR_DELAY0 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_ACTN + [57-63] ADR_DELAY1 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A11 + */ + mca_and_or(chip, id, mca_i, DDRPHY_ADR_DELAY0_P0_ADR3, 0, + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_ACTN[vpd_idx][mca_i], + ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN) | + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A11[vpd_idx][mca_i], + ADR_DELAY_ODD, ADR_DELAY_ODD_LEN)); + + /* IOM0.DDRPHY_ADR_DELAY1_P0_ADR3 = + [all] 0 + [49-55] ADR_DELAY2 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_BG0 + [57-63] ADR_DELAY3 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_CKE0 + */ + mca_and_or(chip, id, mca_i, DDRPHY_ADR_DELAY1_P0_ADR3, 0, + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_BG0[vpd_idx][mca_i], + ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN) | + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_CKE0[vpd_idx][mca_i], + ADR_DELAY_ODD, ADR_DELAY_ODD_LEN)); + + /* IOM0.DDRPHY_ADR_DELAY2_P0_ADR3 = + [all] 0 + [49-55] ADR_DELAY4 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_CKE1 + [57-63] ADR_DELAY5 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_BG1 + */ + mca_and_or(chip, id, mca_i, DDRPHY_ADR_DELAY2_P0_ADR3, 0, + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_CKE1[vpd_idx][mca_i], + ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN) | + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_BG1[vpd_idx][mca_i], + ADR_DELAY_ODD, ADR_DELAY_ODD_LEN)); + + /* IOM0.DDRPHY_ADR_DELAY3_P0_ADR3 = + [all] 0 + [49-55] ADR_DELAY6 = ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_CKE0 + */ + mca_and_or(chip, id, mca_i, DDRPHY_ADR_DELAY3_P0_ADR3, 0, + PPC_PLACE(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_CKE0[vpd_idx][mca_i], + ADR_DELAY_EVEN, ADR_DELAY_EVEN_LEN)); + +} + +static void reset_tsys_adr(uint8_t chip, int mcs_i, int mca_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + int i = mem_data[chip].speed == 1866 ? 0 : + mem_data[chip].speed == 2133 ? 1 : + mem_data[chip].speed == 2400 ? 2 : 3; + + /* IOM0.DDRPHY_ADR_MCCLK_WRCLK_PR_STATIC_OFFSET_P0_ADR32S{0,1} = + [all] 0 + [49-55] TSYS_WRCLK = ATTR_MSS_VPD_MR_TSYS_ADR + // From regs spec: + // Set to '19'h for 2666 MT/s. + // Set to '17'h for 2400 MT/s. + // Set to '14'h for 2133 MT/s. + // Set to '12'h for 1866 MT/s. + */ + /* Has the same stride as DP16. */ + dp_mca_and_or(chip, id, 0, mca_i, DDRPHY_ADR_MCCLK_WRCLK_PR_STATIC_OFFSET_P0_ADR32S0, + 0, PPC_PLACE(ATTR_MSS_VPD_MR_TSYS_ADR[i], TSYS_WRCLK, TSYS_WRCLK_LEN)); + dp_mca_and_or(chip, id, 1, mca_i, DDRPHY_ADR_MCCLK_WRCLK_PR_STATIC_OFFSET_P0_ADR32S0, + 0, PPC_PLACE(ATTR_MSS_VPD_MR_TSYS_ADR[i], TSYS_WRCLK, TSYS_WRCLK_LEN)); +} + +static void reset_tsys_data(uint8_t chip, int mcs_i, int mca_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + int i = mem_data[chip].speed == 1866 ? 0 : + mem_data[chip].speed == 2133 ? 1 : + mem_data[chip].speed == 2400 ? 2 : 3; + int dp; + + /* IOM0.DDRPHY_DP16_WRCLK_PR_P0_{0,1,2,3,4} = + [all] 0 + [49-55] TSYS_WRCLK = ATTR_MSS_VPD_MR_TSYS_DATA + // From regs spec: + // Set to '12'h for 2666 MT/s. + // Set to '10'h for 2400 MT/s. + // Set to '0F'h for 2133 MT/s. + // Set to '0D'h for 1866 MT/s. + */ + for (dp = 0; dp < 5; dp++) { + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_WRCLK_PR_P0_0, 0, + PPC_PLACE(ATTR_MSS_VPD_MR_TSYS_DATA[i], TSYS_WRCLK, + TSYS_WRCLK_LEN)); + } +} + +static void reset_io_impedances(uint8_t chip, int mcs_i, int mca_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + int dp; + + for (dp = 0; dp < 5; dp++) { + /* IOM0.DDRPHY_DP16_IO_TX_FET_SLICE_P0_{0,1,2,3,4} = + [all] 0 + // 0 - Hi-Z, otherwise impedance = 240/ Ohms + [49-55] EN_SLICE_N_WR = ATTR_MSS_VPD_MT_MC_DRV_IMP_DQ_DQS[{0,1,2,3,4}] + [57-63] EN_SLICE_P_WR = ATTR_MSS_VPD_MT_MC_DRV_IMP_DQ_DQS[{0,1,2,3,4}] + */ + /* + * For all rank configurations and MCAs, ATTR_MSS_VPD_MT_MC_DRV_IMP_DQ_DQS + * is 34 Ohms. 240/34 = 7 bits set. According to documentation this is the + * default value, but set it just to be safe. + */ + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_IO_TX_FET_SLICE_P0_0, 0, + PPC_PLACE(0x7F, EN_SLICE_N_WR, EN_SLICE_N_WR_LEN) | + PPC_PLACE(0x7F, EN_SLICE_P_WR, EN_SLICE_P_WR_LEN)); + + /* IOM0.DDRPHY_DP16_IO_TX_PFET_TERM_P0_{0,1,2,3,4} = + [all] 0 + // 0 - Hi-Z, otherwise impedance = 240/ Ohms + [49-55] EN_SLICE_N_WR = ATTR_MSS_VPD_MT_MC_RCV_IMP_DQ_DQS[{0,1,2,3,4}] + */ + /* 60 Ohms for all configurations, 240/60 = 4 bits set. */ + dp_mca_and_or(chip, id, dp, mca_i, DDRPHY_DP16_IO_TX_PFET_TERM_P0_0, 0, + PPC_PLACE(0x0F, EN_SLICE_N_WR, EN_SLICE_N_WR_LEN)); + } + + /* IOM0.DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR1 = // yes, ADR1 + // These are RMW one at a time. I don't see why not all at once, or at least in pairs (P and N of the same clocks) + if (ATTR_MSS_VPD_MT_MC_DRV_IMP_CLK == ENUM_ATTR_MSS_VPD_MT_MC_DRV_IMP_CLK_OHM30): + [54,52,62,60] SLICE_SELn = 1 // CLK00 P, CLK00 N, CLK01 P, CLK01 N + else + [54,52,62,60] = 0 + */ + /* 30 Ohms for all configurations. */ + mca_and_or(chip, id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR1, ~0, + PPC_BIT(SLICE_SEL2) | PPC_BIT(SLICE_SEL3) | + PPC_BIT(SLICE_SEL6) | PPC_BIT(SLICE_SEL7)); + + /* + * Following are reordered to minimalize number of register reads/writes + ------------------------------------------------------------------------ + val = (ATTR_MSS_VPD_MT_MC_DRV_IMP_CMD_ADDR == ENUM_ATTR_MSS_VPD_MT_MC_DRV_IMP_CMD_ADDR_OHM30) ? 1 : 0 + // val = 30 for all VPD configurations + IOM0.DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR0 = + [50,56,58,62] = val // ADDR14/WEN, BA1, ADDR10, BA0 + IOM0.DDRPHY_ADR_IO_FET_SLICE_EN_MAP1_P0_ADR0 = + [48,54] = val // ADDR0, ADDR15/CAS + IOM0.DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR1 = // same as CLK, however it uses different VPD + [48,56] = val // ADDR13, ADDR17/RAS + IOM0.DDRPHY_ADR_IO_FET_SLICE_EN_MAP1_P0_ADR1 = + [52] = val // ADDR2 + IOM0.DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR2 = + [50,52,54,56,58,60,62] = val // ADDR16/RAS, ADDR8, ADDR5, ADDR3, ADDR1, ADDR4, ADDR7 + IOM0.DDRPHY_ADR_IO_FET_SLICE_EN_MAP1_P0_ADR2 = + [48,50,54] = val // ADDR9, ADDR6, ADDR12 + IOM0.DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR3 = + [48,50,52,58] = val // ACT_N, ADDR11, BG0, BG1 + */ + mca_and_or(chip, id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR0, ~0, + PPC_BIT(SLICE_SEL1) | PPC_BIT(SLICE_SEL4) | PPC_BIT(SLICE_SEL5) | + PPC_BIT(SLICE_SEL7)); + mca_and_or(chip, id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP1_P0_ADR0, ~0, + PPC_BIT(SLICE_SEL0) | PPC_BIT(SLICE_SEL3)); + mca_and_or(chip, id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR1, ~0, + PPC_BIT(SLICE_SEL0) | PPC_BIT(SLICE_SEL4)); + mca_and_or(chip, id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP1_P0_ADR1, ~0, + PPC_BIT(SLICE_SEL2)); + mca_and_or(chip, id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR2, ~0, + PPC_BIT(SLICE_SEL1) | PPC_BIT(SLICE_SEL2) | PPC_BIT(SLICE_SEL3) | + PPC_BIT(SLICE_SEL4) | PPC_BIT(SLICE_SEL5) | PPC_BIT(SLICE_SEL6) | + PPC_BIT(SLICE_SEL7)); + mca_and_or(chip, id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP1_P0_ADR2, ~0, + PPC_BIT(SLICE_SEL0) | PPC_BIT(SLICE_SEL1) | PPC_BIT(SLICE_SEL3)); + mca_and_or(chip, id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR3, ~0, + PPC_BIT(SLICE_SEL0) | PPC_BIT(SLICE_SEL1) | PPC_BIT(SLICE_SEL2) | + PPC_BIT(SLICE_SEL5)); + + /* + * Following are reordered to minimalize number of register reads/writes + ------------------------------------------------------------------------ + val = (ATTR_MSS_VPD_MT_MC_DRV_IMP_CNTL == ENUM_ATTR_MSS_VPD_MT_MC_DRV_IMP_CNTL_OHM30) ? 1 : 0 + // val = 30 for all VPD sets + IOM0.DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR0 = // same as CMD/ADDR, however it uses different VPD + [52,60] = val // ODT3, ODT1 + IOM0.DDRPHY_ADR_IO_FET_SLICE_EN_MAP1_P0_ADR0 = // same as CMD/ADDR, however it uses different VPD + [50,52] = val // ODT2, ODT0 + IOM0.DDRPHY_ADR_IO_FET_SLICE_EN_MAP1_P0_ADR1 = // same as CMD/ADDR, however it uses different VPD + [54] = val // PARITY + IOM0.DDRPHY_ADR_IO_FET_SLICE_EN_MAP1_P0_ADR2 = // same as CMD/ADDR, however it uses different VPD + [52] = val // CKE1 + IOM0.DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR3 = // same as CMD/ADDR, however it uses different VPD + [54,56,60,62] = val // CKE0, CKE3, CKE2, RESET_N + */ + mca_and_or(chip, id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR0, ~0, + PPC_BIT(SLICE_SEL2) | PPC_BIT(SLICE_SEL6)); + mca_and_or(chip, id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP1_P0_ADR0, ~0, + PPC_BIT(SLICE_SEL1) | PPC_BIT(SLICE_SEL2)); + mca_and_or(chip, id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP1_P0_ADR1, ~0, + PPC_BIT(SLICE_SEL3)); + mca_and_or(chip, id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP1_P0_ADR2, ~0, + PPC_BIT(SLICE_SEL2)); + mca_and_or(chip, id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR3, ~0, + PPC_BIT(SLICE_SEL3) | PPC_BIT(SLICE_SEL4) | PPC_BIT(SLICE_SEL6) | + PPC_BIT(SLICE_SEL7)); + + /* + * Following are reordered to minimalize number of register reads/writes + ------------------------------------------------------------------------ + val = (ATTR_MSS_VPD_MT_MC_DRV_IMP_CSCID == ENUM_ATTR_MSS_VPD_MT_MC_DRV_IMP_CSCID_OHM30) ? 1 : 0 + // val = 30 for all VPD sets + IOM0.DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR0 = // same as CMD/ADDR and CNTL, however it uses different VPD + [48,54] = val // CS0, CID0 + IOM0.DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR1 = // same as CLK and CMD/ADDR, however it uses different VPD + [50,58] = val // CS1, CID1 + IOM0.DDRPHY_ADR_IO_FET_SLICE_EN_MAP1_P0_ADR1 = // same as CMD/ADDR and CNTL, however it uses different VPD + [48,50] = val // CS3, CID2 + IOM0.DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR2 = // same as CMD/ADDR, however it uses different VPD + [48] = val // CS2 + */ + mca_and_or(chip, id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR0, ~0, + PPC_BIT(SLICE_SEL0) | PPC_BIT(SLICE_SEL3)); + mca_and_or(chip, id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR1, ~0, + PPC_BIT(SLICE_SEL1) | PPC_BIT(SLICE_SEL5)); + mca_and_or(chip, id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP1_P0_ADR1, ~0, + PPC_BIT(SLICE_SEL0) | PPC_BIT(SLICE_SEL1)); + mca_and_or(chip, id, mca_i, DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR2, ~0, + PPC_BIT(SLICE_SEL0)); + + /* + * IO impedance regs summary: lanes 9-15 have different possible settings (results in 15/30 vs 40/30 Ohm) + * MAP0_ADR0: all set MAP1_ADR0: lanes 12-15 not set + * MAP0_ADR1: all set MAP1_ADR1: lanes 12-15 not set + * MAP0_ADR2: all set MAP1_ADR2: lanes 12-15 not set + * MAP0_ADR3: all set MAP1_ADR3: not used + * This mapping is consistent with ADR_DELAYx_P0_ADRy settings. + */ +} + +static void reset_wr_vref_registers(uint8_t chip, int mcs_i, int mca_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; + int dp; + int vpd_idx = mca->dimm[0].present ? (mca->dimm[0].mranks == 2 ? 2 : 0) : + (mca->dimm[1].mranks == 2 ? 2 : 0); + if (mca->dimm[0].present && mca->dimm[1].present) + vpd_idx++; + + for (dp = 0; dp < 5; dp++) { + /* IOM0.DDRPHY_DP16_WR_VREF_CONFIG0_P0_{0,1,2,3,4} = + [all] 0 + [48] WR_CTR_1D_MODE_SWITCH = 0 // 1 for functional) + continue; + + /* Some registers cannot be initialized without data from SPD */ + if (mca->functional) { + /* Assume DIMM mixing rules are followed - same rank config on both DIMMs*/ + p9n_mca_scom(chip, mcs_i, mca_i); + thermal_throttle_scominit(chip, mcs_i, mca_i); + } + + /* The rest can and should be initialized also on magic port */ + p9n_ddrphy_scom(chip, mcs_i, mca_i); + } + p9n_mcbist_scom(chip, mcs_i); + } + + /* This double loop is a part of phy_scominit() in Hostboot, but this is simpler. */ + for (mcs_i = 0; mcs_i < MCS_PER_PROC; mcs_i++) { + if (!mem_data[chip].mcs[mcs_i].functional) + continue; + + for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; + /* No magic for phy_scominit(). */ + if (mca->functional) + phy_scominit(chip, mcs_i, mca_i); + + if (mca_i == 0 || mca->functional) + fir_unmask(chip, mcs_i, mca_i); + } + } +} + +void istep_13_8(uint8_t chips) +{ + uint8_t chip; + + report_istep(13, 8); + + for (chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) + mss_scominit(chip); + } +} diff --git a/src/soc/ibm/power9/istep_13_9.c b/src/soc/ibm/power9/istep_13_9.c new file mode 100644 index 00000000000..0c4f4859612 --- /dev/null +++ b/src/soc/ibm/power9/istep_13_9.c @@ -0,0 +1,814 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include +#include + +#include "istep_13_scom.h" + +/* + * 13.9 mss_ddr_phy_reset: Soft reset of DDR PHY macros + * + * - Lock DDR DLLs + * - Already configured DDR DLL in scaninit + * - Sends Soft DDR Phy reset + * - Kick off internal ZQ Cal + * - Perform any config that wasn't scanned in (TBD) + * - Nothing known here + */ + +static int test_dll_calib_done(uint8_t chip, int mcs_i, int mca_i, bool *do_workaround) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + uint64_t status = mca_read(chip, id, mca_i, DDRPHY_PC_DLL_ZCAL_CAL_STATUS_P0); + /* + if (IOM0.DDRPHY_PC_DLL_ZCAL_CAL_STATUS_P0 + [48] DP_DLL_CAL_GOOD == 1 + [49] DP_DLL_CAL_ERROR == 0 + [50] DP_DLL_CAL_ERROR_FINE == 0 + [51] ADR_DLL_CAL_GOOD == 1 + [52] ADR_DLL_CAL_ERROR == 0 + [53] ADR_DLL_CAL_ERROR_FINE == 0) break // success + if (IOM0.DDRPHY_PC_DLL_ZCAL_CAL_STATUS_P0 + [49] DP_DLL_CAL_ERROR == 1 | + [50] DP_DLL_CAL_ERROR_FINE == 1 | + [52] ADR_DLL_CAL_ERROR == 1 | + [53] ADR_DLL_CAL_ERROR_FINE == 1) break and do the workaround + */ + if ((status & PPC_BITMASK(48, 53)) == + (PPC_BIT(DP_DLL_CAL_GOOD) | PPC_BIT(ADR_DLL_CAL_GOOD))) { + /* DLL calibration finished without errors */ + return 1; + } + + if (status & (PPC_BIT(DP_DLL_CAL_ERROR) | PPC_BIT(DP_DLL_CAL_ERROR_FINE) | + PPC_BIT(ADR_DLL_CAL_ERROR) | PPC_BIT(ADR_DLL_CAL_ERROR_FINE))) { + /* DLL calibration finished, but with errors */ + *do_workaround = true; + return 1; + } + + /* Not done yet */ + return 0; +} + +static int test_bb_lock(uint8_t chip, int mcs_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + uint64_t res = PPC_BIT(BB_LOCK0) | PPC_BIT(BB_LOCK1); + int mca_i, dp; + + for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { + if (!mem_data[chip].mcs[mcs_i].mca[mca_i].functional) + continue; + + /* + IOM0.DDRPHY_ADR_SYSCLK_PR_VALUE_RO_P0_ADR32S{0,1} + [56] BB_LOCK & + IOM0.DDRPHY_DP16_SYSCLK_PR_VALUE_P0_{0,1,2,3} + [48] BB_LOCK0 & + [56] BB_LOCK1 & + IOM0.DDRPHY_DP16_SYSCLK_PR_VALUE_P0_4 + [48] BB_LOCK0 // last DP16 uses only first half + if all bits listed above are set: success + */ + + /* ADR_SYSCLK_PR_VALUE_RO_P0_ADR32S{0,1}, BB_LOCK0 doesn't matter */ + res &= dp_mca_read(chip, id, 0, mca_i, ADR_SYSCLK_PR_VALUE_RO_P0_ADR32S0) | + PPC_BIT(BB_LOCK0); + res &= dp_mca_read(chip, id, 1, mca_i, ADR_SYSCLK_PR_VALUE_RO_P0_ADR32S0) | + PPC_BIT(BB_LOCK0); + + /* IOM0.DDRPHY_DP16_SYSCLK_PR_VALUE_P0_{0,1,2,3} */ + for (dp = 0; dp < 4; dp++) { + res &= dp_mca_read(chip, id, dp, mca_i, + DDRPHY_DP16_SYSCLK_PR_VALUE_P0_0); + } + + /* IOM0.DDRPHY_DP16_SYSCLK_PR_VALUE_P0_4, BB_LOCK1 doesn't matter */ + res &= dp_mca_read(chip, id, dp, mca_i, DDRPHY_DP16_SYSCLK_PR_VALUE_P0_0) | + PPC_BIT(BB_LOCK1); + + /* Do we want early return here? */ + } + + return res == (PPC_BIT(BB_LOCK0) | PPC_BIT(BB_LOCK1)); +} + +static void fix_bad_voltage_settings(int mcs_i) +{ + die("fix_bad_voltage_settings() required for MCS%d, but not implemented yet\n", mcs_i); + + /* TODO: implement if needed */ +/* + for each functional MCA + // Each MCA has 10 DLLs: ADR DLL0, DP0-4 DLL0, DP0-3 DLL1. Each of those can fail. For each DLL there are 5 registers + // used in this workaround, those are (see src/import/chips/p9/procedures/hwp/memory/lib/workarounds/dll_workaround.C): + // - l_CNTRL: DP16 or ADR CNTRL register + // - l_COARSE_SAME: VREG_COARSE register for same DLL as CNTRL reg + // - l_COARSE_NEIGH: VREG_COARSE register for DLL neighbor for this workaround + // - l_DAC_LOWER: DLL DAC Lower register + // - l_DAC_UPPER: DLL DAC Upper register + // Warning: the last two have their descriptions swapped in dll_workaround.H + // It seems that the code excepts that DLL neighbor is always good, what if it isn't? + // + // General flow, stripped from C++ bloating and repeated loops: + for each DLL // list in workarounds/dll_workaround.C + 1. check if this DLL failed, if not - skip to the next one + (l_CNTRL[62 | 63] | l_COARSE_SAME[56-62] == 1) -> failed + 2. set reset bit, set skip VREG bit, clear the error bits + l_CNTRL[48] = 1 + l_CNTRL[50-51] = 2 // REGS_RXDLL_CAL_SKIP, 2 - skip VREG calib., do coarse delay calib. only + l_CNTRL[62-63] = 0 + 3. clear DLL FIR (see "Do FIRry things" at the end of 13.8) // this was actually done for non-failed DLLs too, why? + IOM0.IOM_PHY0_DDRPHY_FIR_REG = // 0x07011000 // maybe use SCOM1 (AND) 0x07011001 + [56] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_2 = 0 // calibration errors + [58] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_4 = 0 // DLL errors + 4. write the VREG DAC value found in neighbor (good) to the failing DLL VREG DAC + l_COARSE_SAME[56-62] = l_COARSE_NEIGH[56-62] + 5. reset the upper and lower fine calibration bits back to defaults + l_DAC_LOWER[56-63] = 0x0x8000 // Hard coded default values per Steve Wyatt for this workaround + l_DAC_UPPER[56-63] = 0x0xFFE0 + 6. run DLL Calibration again on failed DLLs + l_CNTRL[48] = 0 + // Wait for calibration to finish + delay(37,382 memclocks) // again, we could do better than this + + // Check if calibration succeeded (same tests as in 1 above, for all DLLs) + for each DLL + if (l_CNTRL[62 | 63] | l_COARSE_SAME[56-62] == 1): failed, assert and die? +*/ +} + +static void check_during_phy_reset(uint8_t chip, int mcs_i) +{ + /* + * Mostly FFDC, which to my current knowledge is just the error logging. If + * it does anything else, this whole function needs rechecking. + */ + chiplet_id_t id = mcs_ids[mcs_i]; + int mca_i; + uint64_t val; + + /* If any of these bits is set, report error. Clear them unconditionally. */ + for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { + if (mca_i != 0 && !mem_data[chip].mcs[mcs_i].mca[mca_i].functional) + continue; + + /* MC01.PORT0.SRQ.MBACALFIR + [0] MBACALFIR_MBA_RECOVERABLE_ERROR + [1] MBACALFIR_MBA_NONRECOVERABLE_ERROR + [10] MBACALFIR_SM_1HOT_ERR + */ + val = mca_read(chip, id, mca_i, MBACALFIR); + if (val & (PPC_BIT(MBACALFIR_MBA_RECOVERABLE_ERROR) | + PPC_BIT(MBACALFIR_MBA_NONRECOVERABLE_ERROR) | + PPC_BIT(MBACALFIR_SM_1HOT_ERR))) { + /* No idea how severe that error is... */ + printk(BIOS_ERR, "Error detected in PORT%d.SRQ.MBACALFIR: %#llx\n", + mca_i, val); + } + + mca_and_or(chip, id, mca_i, MBACALFIR, + ~(PPC_BIT(MBACALFIR_MBA_RECOVERABLE_ERROR) | + PPC_BIT(MBACALFIR_MBA_NONRECOVERABLE_ERROR) | + PPC_BIT(MBACALFIR_SM_1HOT_ERR)), + 0); + + /* IOM0.IOM_PHY0_DDRPHY_FIR_REG + [54] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_0 + [55] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_1 + [56] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_2 + [57] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_3 + [58] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_4 + [59] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_5 + [60] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_6 + [61] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_7 + */ + val = mca_read(chip, id, mca_i, IOM_PHY0_DDRPHY_FIR_REG); + if (val & PPC_BITMASK(54, 61)) { + /* No idea how severe that error is... */ + printk(BIOS_ERR, "Error detected in IOM_PHY%d_DDRPHY_FIR_REG: %#llx\n", + mca_i , val); + } + + mca_and_or(chip, id, mca_i, IOM_PHY0_DDRPHY_FIR_REG, ~(PPC_BITMASK(54, 61)), 0); + } +} + +static void fir_unmask(uint8_t chip, int mcs_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + int mca_i; + + /* + * MASK must be always written last, otherwise we may get unintended + * actions. No need for explicit barrier here, SCOM functions do eieio. + MC01.MCBIST.MBA_SCOMFIR.MCBISTFIRACT0 + [2] MCBISTFIRQ_INTERNAL_FSM_ERROR = 0 + [13] MCBISTFIRQ_SCOM_RECOVERABLE_REG_PE = 0 + [14] MCBISTFIRQ_SCOM_FATAL_REG_PE = 0 + MC01.MCBIST.MBA_SCOMFIR.MCBISTFIRACT1 + [2] MCBISTFIRQ_INTERNAL_FSM_ERROR = 0 + [13] MCBISTFIRQ_SCOM_RECOVERABLE_REG_PE = 1 + [14] MCBISTFIRQ_SCOM_FATAL_REG_PE = 0 + MC01.MCBIST.MBA_SCOMFIR.MCBISTFIRMASK + [2] MCBISTFIRQ_INTERNAL_FSM_ERROR = 0 // checkstop (0,0,0) + [13] MCBISTFIRQ_SCOM_RECOVERABLE_REG_PE = 0 // recoverable_error (0,1,0) + [14] MCBISTFIRQ_SCOM_FATAL_REG_PE = 0 // checkstop (0,0,0) + */ + scom_and_or_for_chiplet(chip, id, MCBISTFIRACT0, + ~(PPC_BIT(MCBISTFIRQ_INTERNAL_FSM_ERROR) | + PPC_BIT(MCBISTFIRQ_SCOM_RECOVERABLE_REG_PE) | + PPC_BIT(MCBISTFIRQ_SCOM_FATAL_REG_PE)), + 0); + scom_and_or_for_chiplet(chip, id, MCBISTFIRACT1, + ~(PPC_BIT(MCBISTFIRQ_INTERNAL_FSM_ERROR) | + PPC_BIT(MCBISTFIRQ_SCOM_RECOVERABLE_REG_PE) | + PPC_BIT(MCBISTFIRQ_SCOM_FATAL_REG_PE)), + PPC_BIT(MCBISTFIRQ_SCOM_RECOVERABLE_REG_PE)); + scom_and_or_for_chiplet(chip, id, MCBISTFIRMASK, + ~(PPC_BIT(MCBISTFIRQ_INTERNAL_FSM_ERROR) | + PPC_BIT(MCBISTFIRQ_SCOM_RECOVERABLE_REG_PE) | + PPC_BIT(MCBISTFIRQ_SCOM_FATAL_REG_PE)), + 0); + + for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { + if (!mem_data[chip].mcs[mcs_i].mca[mca_i].functional) + continue; + + /* + MC01.PORT0.SRQ.MBACALFIR_ACTION0 + [0] MBACALFIR_MBA_RECOVERABLE_ERROR = 0 + [1] MBACALFIR_MBA_NONRECOVERABLE_ERROR = 0 + [4] MBACALFIR_RCD_PARITY_ERROR = 0 + [10] MBACALFIR_SM_1HOT_ERR = 0 + MC01.PORT0.SRQ.MBACALFIR_ACTION1 + [0] MBACALFIR_MBA_RECOVERABLE_ERROR = 1 + [1] MBACALFIR_MBA_NONRECOVERABLE_ERROR = 0 + [4] MBACALFIR_RCD_PARITY_ERROR = 1 + [10] MBACALFIR_SM_1HOT_ERR = 0 + MC01.PORT0.SRQ.MBACALFIR_MASK + [0] MBACALFIR_MBA_RECOVERABLE_ERROR = 0 // recoverable_error (0,1,0) + [1] MBACALFIR_MBA_NONRECOVERABLE_ERROR = 0 // checkstop (0,0,0) + [4] MBACALFIR_RCD_PARITY_ERROR = 0 // recoverable_error (0,1,0) + [10] MBACALFIR_SM_1HOT_ERR = 0 // checkstop (0,0,0) + */ + mca_and_or(chip, id, mca_i, MBACALFIR_ACTION0, + ~(PPC_BIT(MBACALFIR_MBA_RECOVERABLE_ERROR) | + PPC_BIT(MBACALFIR_MBA_NONRECOVERABLE_ERROR) | + PPC_BIT(MBACALFIR_RCD_PARITY_ERROR) | + PPC_BIT(MBACALFIR_SM_1HOT_ERR)), + 0); + mca_and_or(chip, id, mca_i, MBACALFIR_ACTION1, + ~(PPC_BIT(MBACALFIR_MBA_RECOVERABLE_ERROR) | + PPC_BIT(MBACALFIR_MBA_NONRECOVERABLE_ERROR) | + PPC_BIT(MBACALFIR_RCD_PARITY_ERROR) | + PPC_BIT(MBACALFIR_SM_1HOT_ERR)), + PPC_BIT(MBACALFIR_MBA_RECOVERABLE_ERROR) | + PPC_BIT(MBACALFIR_RCD_PARITY_ERROR)); + mca_and_or(chip, id, mca_i, MBACALFIR_MASK, + ~(PPC_BIT(MBACALFIR_MBA_RECOVERABLE_ERROR) | + PPC_BIT(MBACALFIR_MBA_NONRECOVERABLE_ERROR) | + PPC_BIT(MBACALFIR_RCD_PARITY_ERROR) | + PPC_BIT(MBACALFIR_SM_1HOT_ERR)), + 0); + + /* + IOM0.IOM_PHY0_DDRPHY_FIR_ACTION0_REG + [54] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_0 = 0 + [55] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_1 = 0 + [57] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_3 = 0 // no ERROR_2! + [58] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_4 = 0 + [59] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_5 = 0 + [60] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_6 = 0 + [61] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_7 = 0 + IOM0.IOM_PHY0_DDRPHY_FIR_ACTION1_REG + [54] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_0 = 1 + [55] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_1 = 1 + [57] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_3 = 1 + [58] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_4 = 1 + [59] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_5 = 1 + [60] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_6 = 1 + [61] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_7 = 1 + IOM0.IOM_PHY0_DDRPHY_FIR_MASK_REG + [54] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_0 = 0 // recoverable_error (0,1,0) + [55] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_1 = 0 // recoverable_error (0,1,0) + [57] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_3 = 0 // recoverable_error (0,1,0) + [58] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_4 = 0 // recoverable_error (0,1,0) + [59] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_5 = 0 // recoverable_error (0,1,0) + [60] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_6 = 0 // recoverable_error (0,1,0) + [61] IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_7 = 0 // recoverable_error (0,1,0) + */ + mca_and_or(chip, id, mca_i, IOM_PHY0_DDRPHY_FIR_ACTION0_REG, + ~(PPC_BITMASK(54, 55) | PPC_BITMASK(57, 61)), + 0); + mca_and_or(chip, id, mca_i, IOM_PHY0_DDRPHY_FIR_ACTION1_REG, + ~(PPC_BITMASK(54, 55) | PPC_BITMASK(57, 61)), + PPC_BITMASK(54, 55) | PPC_BITMASK(57, 61)); + mca_and_or(chip, id, mca_i, IOM_PHY0_DDRPHY_FIR_MASK_REG, + ~(PPC_BITMASK(54, 55) | PPC_BITMASK(57, 61)), + 0); + } +} + +/* + * Can't protect with do..while, this macro is supposed to exit 'for' loop in + * which it is invoked. As a side effect, it is used without semicolon. + * + * "I want to break free" - Freddie Mercury + */ +#define TEST_VREF(dp, scom) \ +if ((dp_mca_read(chip, mcs_ids[mcs_i], dp, mca_i, scom) & PPC_BITMASK(56, 62)) == \ + PPC_PLACE(1, 56, 7)) { \ + need_dll_workaround = true; \ + break; \ +} + +static void mss_ddr_phy_reset(uint8_t chip) +{ + int mcs_i, mca_i, dp; + long time; + bool need_dll_workaround; + + /* + * Most of this istep consists of: + * 1. asserting reset bit or starting calibration + * 2. delay + * 3. deasserting reset bit or checking the result of calibration + * + * These are done for each (functional and/or magic) MCA. Because the delay + * is required between points 1 and 3 for a given MCA, those delays are done + * outside of 'for each MCA' loops. They are still inside 'for each MCS' + * loop, unclear if we can break it into pieces too. + */ + for (mcs_i = 0; mcs_i < MCS_PER_PROC; mcs_i++) { + if (!mem_data[chip].mcs[mcs_i].functional) + continue; + + for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; + + if (mca_i != 0 && !mca->functional) + continue; + + /* MC01.PORT0.SRQ.MBA_FARB5Q = + [8] MBA_FARB5Q_CFG_FORCE_MCLK_LOW_N = 0 + */ + mca_and_or(chip, mcs_ids[mcs_i], mca_i, MBA_FARB5Q, + ~PPC_BIT(MBA_FARB5Q_CFG_FORCE_MCLK_LOW_N), 0); + + /* Drive all control signals to their inactive/idle state, or + * inactive value + IOM0.DDRPHY_DP16_SYSCLK_PR0_P0_{0,1,2,3,4} = + IOM0.DDRPHY_DP16_SYSCLK_PR1_P0_{0,1,2,3,4} = + [all] 0 + [48] reserved = 1 // MCA_DDRPHY_DP16_SYSCLK_PR0_P0_0_01_ENABLE + */ + for (dp = 0; dp < 5; dp++) { + dp_mca_and_or(chip, mcs_ids[mcs_i], dp, mca_i, + DDRPHY_DP16_SYSCLK_PR0_P0_0, + 0, PPC_BIT(48)); + dp_mca_and_or(chip, mcs_ids[mcs_i], dp, mca_i, + DDRPHY_DP16_SYSCLK_PR1_P0_0, + 0, PPC_BIT(48)); + } + + /* Assert reset to PHY for 32 memory clocks + MC01.PORT0.SRQ.MBA_CAL0Q = + [57] MBA_CAL0Q_RESET_RECOVER = 1 + */ + mca_and_or(chip, mcs_ids[mcs_i], mca_i, MBA_CAL0Q, ~0, + PPC_BIT(MBA_CAL0Q_RESET_RECOVER)); + } + + delay_nck(chip, 32); + + for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; + + if (mca_i != 0 && !mca->functional) + continue; + + /* Deassert reset_n + MC01.PORT0.SRQ.MBA_CAL0Q = + [57] MBA_CAL0Q_RESET_RECOVER = 0 + */ + mca_and_or(chip, mcs_ids[mcs_i], mca_i, MBA_CAL0Q, + ~PPC_BIT(MBA_CAL0Q_RESET_RECOVER), 0); + + /* Flush output drivers + IOM0.DDRPHY_ADR_OUTPUT_FORCE_ATEST_CNTL_P0_ADR32S{0,1} = + [all] 0 + [48] FLUSH = 1 + [50] INIT_IO = 1 + */ + /* Has the same stride as DP16 */ + dp_mca_and_or(chip, mcs_ids[mcs_i], 0, mca_i, + DDRPHY_ADR_OUTPUT_FORCE_ATEST_CNTL_P0_ADR32S0, 0, + PPC_BIT(FLUSH) | PPC_BIT(INIT_IO)); + dp_mca_and_or(chip, mcs_ids[mcs_i], 1, mca_i, + DDRPHY_ADR_OUTPUT_FORCE_ATEST_CNTL_P0_ADR32S0, 0, + PPC_BIT(FLUSH) | PPC_BIT(INIT_IO)); + + /* IOM0.DDRPHY_DP16_CONFIG0_P0_{0,1,2,3,4} = + [all] 0 + [51] FLUSH = 1 + [54] INIT_IO = 1 + [55] ADVANCE_PING_PONG = 1 + [58] DELAY_PING_PONG_HALF = 1 + */ + for (dp = 0; dp < 5; dp++) { + dp_mca_and_or(chip, mcs_ids[mcs_i], 0, mca_i, + DDRPHY_DP16_CONFIG0_P0_0, 0, + PPC_BIT(DP16_CONFIG0_FLUSH) | + PPC_BIT(DP16_CONFIG0_INIT_IO) | + PPC_BIT(DP16_CONFIG0_ADVANCE_PING_PONG) | + PPC_BIT(DP16_CONFIG0_DELAY_PING_PONG_HALF)); + } + } + + delay_nck(chip, 32); + + for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; + + if (mca_i != 0 && !mca->functional) + continue; + + /* IOM0.DDRPHY_ADR_OUTPUT_FORCE_ATEST_CNTL_P0_ADR32S{0,1} = + [all] 0 + [48] FLUSH = 0 + [50] INIT_IO = 0 + */ + /* Has the same stride as DP16 */ + dp_mca_and_or(chip, mcs_ids[mcs_i], 0, mca_i, + DDRPHY_ADR_OUTPUT_FORCE_ATEST_CNTL_P0_ADR32S0, 0, 0); + dp_mca_and_or(chip, mcs_ids[mcs_i], 1, mca_i, + DDRPHY_ADR_OUTPUT_FORCE_ATEST_CNTL_P0_ADR32S0, 0, 0); + + /* IOM0.DDRPHY_DP16_CONFIG0_P0_{0,1,2,3,4} = + [all] 0 + [51] FLUSH = 0 + [54] INIT_IO = 0 + [55] ADVANCE_PING_PONG = 1 + [58] DELAY_PING_PONG_HALF = 1 + */ + for (dp = 0; dp < 5; dp++) { + dp_mca_and_or(chip, mcs_ids[mcs_i], 0, mca_i, + DDRPHY_DP16_CONFIG0_P0_0, 0, + PPC_BIT(DP16_CONFIG0_ADVANCE_PING_PONG) | + PPC_BIT(DP16_CONFIG0_DELAY_PING_PONG_HALF)); + } + } + + /* ZCTL Enable */ + /* + * In Hostboot this is 'for each magic MCA'. We know there is only one + * magic, and it has always the same index. + IOM0.DDRPHY_PC_RESETS_P0 = + // Yet another documentation error: all bits in this register are marked as read-only + [51] ENABLE_ZCAL = 1 + */ + mca_and_or(chip, mcs_ids[mcs_i], 0, DDRPHY_PC_RESETS_P0, + ~0, PPC_BIT(ENABLE_ZCAL)); + + /* Maybe it would be better to add another 1us later instead of this. */ + delay_nck(chip, 1024); + + /* for each magic MCA */ + /* 50*10ns, but we don't have such precision. */ + time = wait_us(1, mca_read(chip, mcs_ids[mcs_i], 0, + DDRPHY_PC_DLL_ZCAL_CAL_STATUS_P0) & PPC_BIT(ZCAL_DONE)); + + if (!time) + die("ZQ calibration timeout\n"); + + /* DLL calibration */ + /* + * Here was an early return if no functional MCAs were found. Wouldn't + * that make whole MCBIST non-functional? + */ + for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; + + if (!mca->functional) + continue; + + /* IOM0.DDRPHY_ADR_DLL_CNTL_P0_ADR32S{0,1} = + [48] INIT_RXDLL_CAL_RESET = 0 + */ + /* Has the same stride as DP16 */ + dp_mca_and_or(chip, mcs_ids[mcs_i], 0, mca_i, + DDRPHY_ADR_DLL_CNTL_P0_ADR32S0, + ~PPC_BIT(INIT_RXDLL_CAL_RESET), 0); + dp_mca_and_or(chip, mcs_ids[mcs_i], 1, mca_i, + DDRPHY_ADR_DLL_CNTL_P0_ADR32S0, + ~PPC_BIT(INIT_RXDLL_CAL_RESET), 0); + + for (dp = 0; dp < 4; dp++) { + /* IOM0.DDRPHY_DP16_DLL_CNTL{0,1}_P0_{0,1,2,3} = + [48] INIT_RXDLL_CAL_RESET = 0 + */ + dp_mca_and_or(chip, mcs_ids[mcs_i], dp, mca_i, + DDRPHY_DP16_DLL_CNTL0_P0_0, + ~PPC_BIT(INIT_RXDLL_CAL_RESET), 0); + dp_mca_and_or(chip, mcs_ids[mcs_i], dp, mca_i, DDRPHY_DP16_DLL_CNTL1_P0_0, + ~PPC_BIT(INIT_RXDLL_CAL_RESET), 0); + } + /* Last DP16 is different + IOM0.DDRPHY_DP16_DLL_CNTL0_P0_4 + [48] INIT_RXDLL_CAL_RESET = 0 + IOM0.DDRPHY_DP16_DLL_CNTL1_P0_4 + [48] INIT_RXDLL_CAL_RESET = 1 + */ + dp_mca_and_or(chip, mcs_ids[mcs_i], dp, mca_i, + DDRPHY_DP16_DLL_CNTL0_P0_0, + ~PPC_BIT(INIT_RXDLL_CAL_RESET), 0); + dp_mca_and_or(chip, mcs_ids[mcs_i], dp, mca_i, + DDRPHY_DP16_DLL_CNTL1_P0_0, + ~0, PPC_BIT(INIT_RXDLL_CAL_RESET)); + } + + /* From Hostboot's comments: + * 32,772 dphy_nclk cycles from Reset=0 to VREG Calibration to exhaust all values + * 37,382 dphy_nclk cycles for full calibration to start and fail ("worst case") + * + * Why assume worst case instead of making the next timeout bigger? + */ + delay_nck(chip, 37382); + + /* + * The comment before poll says: + * > To keep things simple, we'll poll for the change in one of the ports. + * > Once that's completed, we'll check the others. If any one has failed, + * > or isn't notifying complete, we'll pop out an error + * + * The issue is that it only tests the first of the functional ports. + * Other ports may or may not have failed. Even if this times out, the + * rest of the function continues normally, without throwing any error... + * + * For now, leave it as it was done in Hostboot. + */ + /* timeout(50*10ns): + if (IOM0.DDRPHY_PC_DLL_ZCAL_CAL_STATUS_P0 + [48] DP_DLL_CAL_GOOD == 1 + [49] DP_DLL_CAL_ERROR == 0 + [50] DP_DLL_CAL_ERROR_FINE == 0 + [51] ADR_DLL_CAL_GOOD == 1 + [52] ADR_DLL_CAL_ERROR == 0 + [53] ADR_DLL_CAL_ERROR_FINE == 0) break // success + if (IOM0.DDRPHY_PC_DLL_ZCAL_CAL_STATUS_P0 + [49] DP_DLL_CAL_ERROR == 1 | + [50] DP_DLL_CAL_ERROR_FINE == 1 | + [52] ADR_DLL_CAL_ERROR == 1 | + [53] ADR_DLL_CAL_ERROR_FINE == 1) break and do the workaround + */ + need_dll_workaround = false; + for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { + if (mem_data[chip].mcs[mcs_i].mca[mca_i].functional) + break; + } + /* 50*10ns, but we don't have such precision. */ + time = wait_us(1, test_dll_calib_done(chip, mcs_i, mca_i, + &need_dll_workaround)); + if (!time) + die("DLL calibration timeout\n"); + + /* + * Workaround is also required if any of coarse VREG has value 1 after + * calibration. Test from poll above is repeated here - this time for every + * MCA, but it doesn't wait until DLL gets calibrated if that is still in + * progress. The registers below (also used in the workaround) __must not__ + * be written to while hardware calibration is in progress. + */ + for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; + + if (need_dll_workaround) + break; + + if (!mca->functional) + continue; + + /* + * This assumes that by the time the first functional MCA completed + * successfully, all MCAs completed (with or without errors). If the + * first MCA failed then we won't even get here, we would bail earlier + * because need_dll_workaround == true in that case. + * + * This is not safe if DLL calibration takes more time for other MCAs, + * but this is the way Hostboot does it. + */ + test_dll_calib_done(chip, mcs_i, mca_i, &need_dll_workaround); + + /* + if (IOM0.DDRPHY_ADR_DLL_VREG_COARSE_P0_ADR32S0 | + IOM0.DDRPHY_DP16_DLL_VREG_COARSE0_P0_{0,1,2,3,4} | + IOM0.DDRPHY_DP16_DLL_VREG_COARSE1_P0_{0,1,2,3} | + [56-62] REGS_RXDLL_VREG_DAC_COARSE = 1) // The same offset for ADR and DP16 + do the workaround + */ + TEST_VREF(0, DDRPHY_ADR_DLL_VREG_COARSE_P0_ADR32S0) + TEST_VREF(4, DDRPHY_DP16_DLL_VREG_COARSE0_P0_0) + for (dp = 0; dp < 4; dp++) { + TEST_VREF(dp, DDRPHY_DP16_DLL_VREG_COARSE0_P0_0) + TEST_VREF(dp, DDRPHY_DP16_DLL_VREG_COARSE1_P0_0) + } + } + + if (need_dll_workaround) + fix_bad_voltage_settings(mcs_i); + + /* Start bang-bang-lock */ + for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; + + if (!mca->functional) + continue; + + /* Take dphy_nclk/SysClk alignment circuits out of reset and put into + * continuous update mode + IOM0.DDRPHY_ADR_SYSCLK_CNTL_PR_P0_ADR32S{0,1} = + IOM0.DDRPHY_DP16_SYSCLK_PR0_P0_{0,1,2,3,4} = + IOM0.DDRPHY_DP16_SYSCLK_PR1_P0_{0,1,2,3} = + [all] 0 + [48-63] 0x8024 // From the DDR PHY workbook + */ + /* Has the same stride as DP16 */ + dp_mca_and_or(chip, mcs_ids[mcs_i], 0, mca_i, + ADR_SYSCLK_CNTRL_PR_P0_ADR32S0, + ~PPC_BITMASK(48, 63), PPC_PLACE(0x8024, 48, 16)); + dp_mca_and_or(chip, mcs_ids[mcs_i], 1, mca_i, + ADR_SYSCLK_CNTRL_PR_P0_ADR32S0, + ~PPC_BITMASK(48, 63), PPC_PLACE(0x8024, 48, 16)); + + for (dp = 0; dp < 4; dp++) { + dp_mca_and_or(chip, mcs_ids[mcs_i], dp, mca_i, + DDRPHY_DP16_SYSCLK_PR0_P0_0, + ~PPC_BITMASK(48, 63), PPC_PLACE(0x8024, 48, 16)); + dp_mca_and_or(chip, mcs_ids[mcs_i], dp, mca_i, + DDRPHY_DP16_SYSCLK_PR1_P0_0, + ~PPC_BITMASK(48, 63), PPC_PLACE(0x8024, 48, 16)); + } + dp_mca_and_or(chip, mcs_ids[mcs_i], 4, mca_i, + DDRPHY_DP16_SYSCLK_PR0_P0_0, + ~PPC_BITMASK(48, 63), PPC_PLACE(0x8024, 48, 16)); + } + + /* + * Wait at least 5932 dphy_nclk clock cycles to allow the dphy_nclk/SysClk + * alignment circuit to perform initial alignment. + */ + delay_nck(chip, 5932); + + /* Check for LOCK in {DP16,ADR}_SYSCLK_PR_VALUE */ + /* 50*10ns, but we don't have such precision. */ + /* + * FIXME: Hostboot uses the timeout mentioned above for each of + * the registers separately. It also checks them separately, + * meaning that they don't have to be locked at the same time. + * I am not sure if this is why the call below times out or if + * there is another reason. Can these locks be lost or should + * they hold until reset? + * + * Increasing the timeout helps (maybe that's just luck), but + * this probably isn't a proper way to do this. + */ + time = wait_ms(1000, test_bb_lock(chip, mcs_i)); + if (!time) + die("BB lock timeout\n"); + + for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; + + if (!mca->functional) + continue; + + /* De-assert the SYSCLK_RESET + IOM0.DDRPHY_PC_RESETS_P0 = + [49] SYSCLK_RESET = 0 + */ + mca_and_or(chip, mcs_ids[mcs_i], mca_i, DDRPHY_PC_RESETS_P0, + ~PPC_BIT(SYSCLK_RESET), 0); + + /* Reset the windage registers */ + /* + * According to the PHY team, resetting the read delay offset must be + * done after SYSCLK_RESET. + * + * ATTR_MSS_VPD_MT_WINDAGE_RD_CTR holds (signed) value of offset in + * picoseconds. It must be converted to phase rotator ticks. There are + * 128 ticks per clock, and clock period depends on memory frequency. + * + * Result is rounded away from zero, so we have to add _or subtract_ + * half of tick. + * + * In some cases we can skip this (40 register writes per port), from + * documentation: + * + * "This register must not be set to a nonzero value unless detailed + * timing analysis shows that, for a particular configuration, the + * read-centering algorithm places the sampling point off from the eye + * center." + * + * ATTR_MSS_VPD_MT_WINDAGE_RD_CTR is outside of defined values for VPD + * for Talos, it is by default set to 0. Skipping this for now, but it + * may be needed for generalized code. + * + // 0x80000{0,1,2,3}0C0701103F, +0x0400_0000_0000 + IOM0.DDRPHY_DP16_READ_DELAY_OFFSET0_RANK_PAIR{0,1,2,3}_P0_{0,1,2,3,4} = + // 0x80000{0,1,2,3}0D0701103F, +0x0400_0000_0000 + IOM0.DDRPHY_DP16_READ_DELAY_OFFSET1_RANK_PAIR{0,1,2,3}_P0_{0,1,2,3,4} = + [all] 0 + [49-55] OFFSET0 = offset_in_ticks_rounded + [57-63] OFFSET1 = offset_in_ticks_rounded + */ + + /* + * Take the dphy_nclk/SysClk alignment circuit out of the Continuous + * Update mode + IOM0.DDRPHY_ADR_SYSCLK_CNTL_PR_P0_ADR32S{0,1} = // 0x800080320701103F, +0x0400_0000_0000 + IOM0.DDRPHY_DP16_SYSCLK_PR0_P0_{0,1,2,3,4} = // 0x800000070701103F, +0x0400_0000_0000 + IOM0.DDRPHY_DP16_SYSCLK_PR1_P0_{0,1,2,3} = // 0x8000007F0701103F, +0x0400_0000_0000 + [all] 0 + [48-63] 0x8020 // From the DDR PHY workbook + */ + /* Has the same stride as DP16 */ + dp_mca_and_or(chip, mcs_ids[mcs_i], 0, mca_i, + ADR_SYSCLK_CNTRL_PR_P0_ADR32S0, + ~PPC_BITMASK(48, 63), PPC_PLACE(0x8020, 48, 16)); + dp_mca_and_or(chip, mcs_ids[mcs_i], 1, mca_i, + ADR_SYSCLK_CNTRL_PR_P0_ADR32S0, + ~PPC_BITMASK(48, 63), PPC_PLACE(0x8020, 48, 16)); + + for (dp = 0; dp < 4; dp++) { + dp_mca_and_or(chip, mcs_ids[mcs_i], dp, mca_i, + DDRPHY_DP16_SYSCLK_PR0_P0_0, + ~PPC_BITMASK(48, 63), PPC_PLACE(0x8020, 48, 16)); + dp_mca_and_or(chip, mcs_ids[mcs_i], dp, mca_i, + DDRPHY_DP16_SYSCLK_PR1_P0_0, + ~PPC_BITMASK(48, 63), PPC_PLACE(0x8020, 48, 16)); + } + dp_mca_and_or(chip, mcs_ids[mcs_i], 4, mca_i, + DDRPHY_DP16_SYSCLK_PR0_P0_0, + ~PPC_BITMASK(48, 63), PPC_PLACE(0x8020, 48, 16)); + } + + /* Wait at least 32 dphy_nclk clock cycles */ + delay_nck(chip, 32); + /* Done bang-bang-lock */ + + /* Per J. Bialas, force_mclk_low can be dasserted */ + for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; + + if (!mca->functional) + continue; + + /* MC01.PORT0.SRQ.MBA_FARB5Q = + [8] MBA_FARB5Q_CFG_FORCE_MCLK_LOW_N = 1 + */ + mca_and_or(chip, mcs_ids[mcs_i], mca_i, MBA_FARB5Q, ~0, + PPC_BIT(MBA_FARB5Q_CFG_FORCE_MCLK_LOW_N)); + + } + + /* Workarounds */ + /* + * Does not apply to DD2, but even then reads and writes back some + * registers without modifications. + */ + // mss::workarounds::dp16::after_phy_reset(); + + /* + * Comments from Hostboot: + * + * "New for Nimbus - perform duty cycle clock distortion calibration + * (DCD cal). + * Per PHY team's characterization, the DCD cal needs to be run after DLL + * calibration." + * + * However, it can be skipped based on ATTR_MSS_RUN_DCD_CALIBRATION, + * and by default it is skipped. + */ + // mss::adr32s::duty_cycle_distortion_calibration(); + + /* FIR */ + check_during_phy_reset(chip, mcs_i); + fir_unmask(chip, mcs_i); + } +} + +void istep_13_9(uint8_t chips) +{ + uint8_t chip; + + report_istep(13, 9); + + for (chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) + mss_ddr_phy_reset(chip); + } +} diff --git a/src/soc/ibm/power9/istep_13_scom.h b/src/soc/ibm/power9/istep_13_scom.h new file mode 100644 index 00000000000..cdb7e729c6d --- /dev/null +++ b/src/soc/ibm/power9/istep_13_scom.h @@ -0,0 +1,1005 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef ISTEP_13_SCOM_H +#define ISTEP_13_SCOM_H + +#include + +#define PSU_HOST_SBE_MBOX0_REG 0x000D0050 +#define PSU_HOST_SBE_MBOX1_REG 0x000D0051 + +#define PSU_SBE_DOORBELL_REG 0x000D0060 +#define PSU_SBE_DOORBELL_REG_WAND 0x000D0061 +#define PSU_SBE_DOORBELL_REG_WOR 0x000D0062 +#define PSU_HOST_DOORBELL_REG 0x000D0063 +#define PSU_HOST_DOORBELL_REG_WAND 0x000D0064 +#define PSU_HOST_DOORBELL_REG_WOR 0x000D0065 + +#define MCD0_FIR_MASK_REG 0x03011003 +#define MCD1_FIR_MASK_REG 0x03011403 + +#define NEST_CPLT_CTRL1 0x05000001 + +#define MCSLOW_CPLT_CTRL0 0x07000000 +#define MCSLOW_CPLT_CTRL0_WOR 0x07000010 +#define MCSLOW_CPLT_CTRL0_WCLEAR 0x07000020 +#define MCSLOW_CPLT_CTRL0_CTRL_CC_ABSTCLK_MUXSEL_DC 0 +#define MCSLOW_CPLT_CTRL0_TC_UNIT_SYNCCLK_MUXSEL_DC 1 +#define MCSLOW_CPLT_CTRL0_CTRL_CC_FLUSHMODE_INH_DC 2 +#define MCSLOW_CPLT_CTRL0_CTRL_CC_FORCE_ALIGN_DC 3 + +#define MCSLOW_CPLT_CTRL1 0x07000001 +#define MCSLOW_CPLT_CTRL1_WOR 0x07000011 +#define MCSLOW_CPLT_CTRL1_WCLEAR 0x07000021 + +#define MCSLOW_CPLT_CONF0 0x07000008 + +#define MCSLOW_CPLT_STAT0 0x07000100 +#define MCSLOW_CPLT_STAT0_CC_CTRL_OPCG_DONE_DC 8 +#define MCSLOW_CPLT_STAT0_CC_CTRL_CHIPLET_IS_ALIGNED_DC 9 + +#define MBACALFIR 0x07010900 +#define MBACALFIR_MASK 0x07010903 +#define MBACALFIR_ACTION0 0x07010906 +#define MBACALFIR_ACTION1 0x07010907 +#define MBACALFIR_MBA_RECOVERABLE_ERROR 0 +#define MBACALFIR_MBA_NONRECOVERABLE_ERROR 1 +#define MBACALFIR_REFRESH_OVERRUN 2 +#define MBACALFIR_RCD_PARITY_ERROR 4 +#define MBACALFIR_DDR_CAL_TIMEOUT_ERR 5 +#define MBACALFIR_DDR_CAL_RESET_TIMEOUT 7 +#define MBACALFIR_DDR_MBA_EVENT_N 8 +#define MBACALFIR_WRQ_RRQ_HANG_ERR 9 +#define MBACALFIR_SM_1HOT_ERR 10 +#define MBACALFIR_ASYNC_IF_ERROR 11 +#define MBACALFIR_CMD_PARITY_ERROR 12 +#define MBACALFIR_PORT_FAIL 13 +#define MBACALFIR_RCD_CAL_PARITY_ERROR 14 + +#define MBA_DSM0Q 0x0701090A +#define MBA_DSM0Q_CFG_RODT_START_DLY 0 +#define MBA_DSM0Q_CFG_RODT_START_DLY_LEN 6 +#define MBA_DSM0Q_CFG_RODT_END_DLY 6 +#define MBA_DSM0Q_CFG_RODT_END_DLY_LEN 6 +#define MBA_DSM0Q_CFG_WODT_START_DLY 12 +#define MBA_DSM0Q_CFG_WODT_START_DLY_LEN 6 +#define MBA_DSM0Q_CFG_WODT_END_DLY 18 +#define MBA_DSM0Q_CFG_WODT_END_DLY_LEN 6 +#define MBA_DSM0Q_CFG_WRDONE_DLY 24 +#define MBA_DSM0Q_CFG_WRDONE_DLY_LEN 6 +#define MBA_DSM0Q_CFG_WRDATA_DLY 30 +#define MBA_DSM0Q_CFG_WRDATA_DLY_LEN 6 +#define MBA_DSM0Q_CFG_RDTAG_DLY 36 +#define MBA_DSM0Q_CFG_RDTAG_DLY_LEN 6 + +#define MBA_TMR0Q 0x0701090B +#define MBA_TMR0Q_RRDM_DLY 0 +#define MBA_TMR0Q_RRDM_DLY_LEN 4 +#define MBA_TMR0Q_RRSMSR_DLY 4 +#define MBA_TMR0Q_RRSMSR_DLY_LEN 4 +#define MBA_TMR0Q_RRSMDR_DLY 8 +#define MBA_TMR0Q_RRSMDR_DLY_LEN 4 +#define MBA_TMR0Q_RROP_DLY 12 +#define MBA_TMR0Q_RROP_DLY_LEN 4 +#define MBA_TMR0Q_WWDM_DLY 16 +#define MBA_TMR0Q_WWDM_DLY_LEN 4 +#define MBA_TMR0Q_WWSMSR_DLY 20 +#define MBA_TMR0Q_WWSMSR_DLY_LEN 4 +#define MBA_TMR0Q_WWSMDR_DLY 24 +#define MBA_TMR0Q_WWSMDR_DLY_LEN 4 +#define MBA_TMR0Q_WWOP_DLY 28 +#define MBA_TMR0Q_WWOP_DLY_LEN 4 +#define MBA_TMR0Q_RWDM_DLY 32 +#define MBA_TMR0Q_RWDM_DLY_LEN 5 +#define MBA_TMR0Q_RWSMSR_DLY 37 +#define MBA_TMR0Q_RWSMSR_DLY_LEN 5 +#define MBA_TMR0Q_RWSMDR_DLY 42 +#define MBA_TMR0Q_RWSMDR_DLY_LEN 5 +#define MBA_TMR0Q_WRDM_DLY 47 +#define MBA_TMR0Q_WRDM_DLY_LEN 4 +#define MBA_TMR0Q_WRSMSR_DLY 51 +#define MBA_TMR0Q_WRSMSR_DLY_LEN 6 +#define MBA_TMR0Q_WRSMDR_DLY 57 +#define MBA_TMR0Q_WRSMDR_DLY_LEN 6 + +#define MBA_TMR1Q 0x0701090C +#define MBA_TMR1Q_RRSBG_DLY 0 +#define MBA_TMR1Q_RRSBG_DLY_LEN 4 +#define MBA_TMR1Q_WRSBG_DLY 4 +#define MBA_TMR1Q_WRSBG_DLY_LEN 6 +#define MBA_TMR1Q_CFG_TFAW 10 +#define MBA_TMR1Q_CFG_TFAW_LEN 6 +#define MBA_TMR1Q_CFG_TRCD 16 +#define MBA_TMR1Q_CFG_TRCD_LEN 5 +#define MBA_TMR1Q_CFG_TRP 21 +#define MBA_TMR1Q_CFG_TRP_LEN 5 +#define MBA_TMR1Q_CFG_TRAS 26 +#define MBA_TMR1Q_CFG_TRAS_LEN 6 +#define MBA_TMR1Q_CFG_WR2PRE 41 +#define MBA_TMR1Q_CFG_WR2PRE_LEN 7 +#define MBA_TMR1Q_CFG_RD2PRE 48 +#define MBA_TMR1Q_CFG_RD2PRE_LEN 4 +#define MBA_TMR1Q_TRRD 52 +#define MBA_TMR1Q_TRRD_LEN 4 +#define MBA_TMR1Q_TRRD_SBG 56 +#define MBA_TMR1Q_TRRD_SBG_LEN 4 +#define MBA_TMR1Q_CFG_ACT_TO_DIFF_RANK_DLY 60 +#define MBA_TMR1Q_CFG_ACT_TO_DIFF_RANK_DLY_LEN 4 + +#define MBA_WRQ0Q 0x0701090D +#define MBA_WRQ0Q_CFG_WRQ_FIFO_MODE 5 +#define MBA_WRQ0Q_CFG_WRQ_FIFO_MODE_LEN 1 +#define MBA_WRQ0Q_CFG_DISABLE_WR_PG_MODE 6 +#define MBA_WRQ0Q_CFG_WRQ_ACT_NUM_WRITES_PENDING 55 +#define MBA_WRQ0Q_CFG_WRQ_ACT_NUM_WRITES_PENDING_LEN 4 + +#define MBA_RRQ0Q 0x0701090E +#define MBA_RRQ0Q_CFG_RRQ_FIFO_MODE 6 +#define MBA_RRQ0Q_CFG_RRQ_FIFO_MODE_LEN 1 +#define MBA_RRQ0Q_CFG_RRQ_ACT_NUM_READS_PENDING 57 +#define MBA_RRQ0Q_CFG_RRQ_ACT_NUM_READS_PENDING_LEN 4 + +#define MBA_CAL0Q 0x0701090F +#define MBA_CAL0Q_RESET_RECOVER 57 + +#define MBA_CAL3Q 0x07010912 + +#define MBA_FARB0Q 0x07010913 +#define MBA_FARB0Q_CFG_2N_ADDR 17 +#define MBA_FARB0Q_CFG_PARITY_AFTER_CMD 38 +#define MBA_FARB0Q_CFG_RCD_PROTECTION_TIME 48 +#define MBA_FARB0Q_CFG_RCD_PROTECTION_TIME_LEN 6 +#define MBA_FARB0Q_CFG_DISABLE_RCD_RECOVERY 54 +#define MBA_FARB0Q_CFG_OE_ALWAYS_ON 55 +#define MBA_FARB0Q_CFG_PORT_FAIL_DISABLE 57 +#define MBA_FARB0Q_CFG_OPT_RD_SIZE 61 +#define MBA_FARB0Q_CFG_OPT_RD_SIZE_LEN 3 + +#define MBA_FARB1Q 0x07010914 +#define MBA_FARB1Q_CFG_SLOT0_S0_CID 0 +#define MBA_FARB1Q_CFG_SLOT0_S1_CID 3 +#define MBA_FARB1Q_CFG_SLOT0_S2_CID 6 +#define MBA_FARB1Q_CFG_SLOT0_S3_CID 9 +#define MBA_FARB1Q_CFG_SLOT0_S4_CID 12 +#define MBA_FARB1Q_CFG_SLOT0_S5_CID 15 +#define MBA_FARB1Q_CFG_SLOT0_S6_CID 18 +#define MBA_FARB1Q_CFG_SLOT0_S7_CID 21 +#define MBA_FARB1Q_CFG_SLOT1_S0_CID 24 +#define MBA_FARB1Q_CFG_SLOT1_S1_CID 27 +#define MBA_FARB1Q_CFG_SLOT1_S2_CID 30 +#define MBA_FARB1Q_CFG_SLOT1_S3_CID 33 +#define MBA_FARB1Q_CFG_SLOT1_S4_CID 36 +#define MBA_FARB1Q_CFG_SLOT1_S5_CID 39 +#define MBA_FARB1Q_CFG_SLOT1_S6_CID 42 +#define MBA_FARB1Q_CFG_SLOT1_S7_CID 45 + +#define MBA_FARB2Q 0x07010915 +#define MBA_FARB2Q_CFG_RANK0_RD_ODT 0 +#define MBA_FARB2Q_CFG_RANK0_RD_ODT_LEN 4 +#define MBA_FARB2Q_CFG_RANK1_RD_ODT 4 +#define MBA_FARB2Q_CFG_RANK1_RD_ODT_LEN 4 +#define MBA_FARB2Q_CFG_RANK2_RD_ODT 8 +#define MBA_FARB2Q_CFG_RANK2_RD_ODT_LEN 4 +#define MBA_FARB2Q_CFG_RANK3_RD_ODT 12 +#define MBA_FARB2Q_CFG_RANK3_RD_ODT_LEN 4 +#define MBA_FARB2Q_CFG_RANK4_RD_ODT 16 +#define MBA_FARB2Q_CFG_RANK4_RD_ODT_LEN 4 +#define MBA_FARB2Q_CFG_RANK5_RD_ODT 20 +#define MBA_FARB2Q_CFG_RANK5_RD_ODT_LEN 4 +#define MBA_FARB2Q_CFG_RANK6_RD_ODT 24 +#define MBA_FARB2Q_CFG_RANK6_RD_ODT_LEN 4 +#define MBA_FARB2Q_CFG_RANK7_RD_ODT 28 +#define MBA_FARB2Q_CFG_RANK7_RD_ODT_LEN 4 +#define MBA_FARB2Q_CFG_RANK0_WR_ODT 32 +#define MBA_FARB2Q_CFG_RANK0_WR_ODT_LEN 4 +#define MBA_FARB2Q_CFG_RANK1_WR_ODT 36 +#define MBA_FARB2Q_CFG_RANK1_WR_ODT_LEN 4 +#define MBA_FARB2Q_CFG_RANK2_WR_ODT 40 +#define MBA_FARB2Q_CFG_RANK2_WR_ODT_LEN 4 +#define MBA_FARB2Q_CFG_RANK3_WR_ODT 44 +#define MBA_FARB2Q_CFG_RANK3_WR_ODT_LEN 4 +#define MBA_FARB2Q_CFG_RANK4_WR_ODT 48 +#define MBA_FARB2Q_CFG_RANK4_WR_ODT_LEN 4 +#define MBA_FARB2Q_CFG_RANK5_WR_ODT 52 +#define MBA_FARB2Q_CFG_RANK5_WR_ODT_LEN 4 +#define MBA_FARB2Q_CFG_RANK6_WR_ODT 56 +#define MBA_FARB2Q_CFG_RANK6_WR_ODT_LEN 4 +#define MBA_FARB2Q_CFG_RANK7_WR_ODT 60 +#define MBA_FARB2Q_CFG_RANK7_WR_ODT_LEN 4 + +#define MBA_FARB3Q 0x07010916 +#define MBA_FARB3Q_CFG_NM_N_PER_SLOT 0 +#define MBA_FARB3Q_CFG_NM_N_PER_SLOT_LEN 15 +#define MBA_FARB3Q_CFG_NM_N_PER_PORT 15 +#define MBA_FARB3Q_CFG_NM_N_PER_PORT_LEN 16 +#define MBA_FARB3Q_CFG_NM_M 31 +#define MBA_FARB3Q_CFG_NM_M_LEN 14 +#define MBA_FARB3Q_CFG_NM_RAS_WEIGHT 47 +#define MBA_FARB3Q_CFG_NM_CAS_WEIGHT 48 +#define MBA_FARB3Q_CFG_NM_CAS_WEIGHT_LEN 3 +#define MBA_FARB3Q_CFG_NM_CHANGE_AFTER_SYNC 53 + +#define MBA_FARB4Q 0x07010917 +#define MBA_FARB4Q_EMERGENCY_N 27 +#define MBA_FARB4Q_EMERGENCY_N_LEN 15 +#define MBA_FARB4Q_EMERGENCY_M 42 +#define MBA_FARB4Q_EMERGENCY_M_LEN 14 + +#define MBA_FARB5Q 0x07010918 +#define MBA_FARB5Q_CFG_DDR_DPHY_NCLK 0 +#define MBA_FARB5Q_CFG_DDR_DPHY_NCLK_LEN 2 +#define MBA_FARB5Q_CFG_DDR_DPHY_PCLK 2 +#define MBA_FARB5Q_CFG_DDR_DPHY_PCLK_LEN 2 +#define MBA_FARB5Q_CFG_DDR_RESETN 4 +#define MBA_FARB5Q_CFG_CCS_ADDR_MUX_SEL 5 +#define MBA_FARB5Q_CFG_CCS_INST_RESET_ENABLE 6 +#define MBA_FARB5Q_CFG_FORCE_MCLK_LOW_N 8 + +#define MBAREF0Q 0x07010932 +#define MBAREF0Q_CFG_REFRESH_ENABLE 0 +#define MBAREF0Q_CFG_REFRESH_PRIORITY_THRESHOLD 5 +#define MBAREF0Q_CFG_REFRESH_PRIORITY_THRESHOLD_LEN 3 +#define MBAREF0Q_CFG_REFRESH_INTERVAL 8 +#define MBAREF0Q_CFG_REFRESH_INTERVAL_LEN 11 +#define MBAREF0Q_CFG_TRFC 30 +#define MBAREF0Q_CFG_TRFC_LEN 10 +#define MBAREF0Q_CFG_REFR_TSV_STACK 40 +#define MBAREF0Q_CFG_REFR_TSV_STACK_LEN 10 +#define MBAREF0Q_CFG_REFR_CHECK_INTERVAL 50 +#define MBAREF0Q_CFG_REFR_CHECK_INTERVAL_LEN 11 + +#define MBARPC0Q 0x07010934 +#define MBARPC0Q_CFG_MIN_MAX_DOMAINS_ENABLE 2 +#define MBARPC0Q_CFG_MIN_MAX_DOMAINS 5 +#define MBARPC0Q_CFG_PUP_AVAIL 6 +#define MBARPC0Q_CFG_PUP_AVAIL_LEN 5 +#define MBARPC0Q_CFG_PDN_PUP 11 +#define MBARPC0Q_CFG_PDN_PUP_LEN 5 +#define MBARPC0Q_CFG_PUP_PDN 16 +#define MBARPC0Q_CFG_PUP_PDN_LEN 5 +#define MBARPC0Q_RESERVED_21 21 +#define MBARPC0Q_CFG_MIN_DOMAIN_REDUCTION_ENABLE 22 +#define MBARPC0Q_CFG_MIN_DOMAIN_REDUCTION_TIME 23 +#define MBARPC0Q_CFG_MIN_DOMAIN_REDUCTION_TIME_LEN 10 + +#define MBASTR0Q 0x07010935 +#define MBASTR0Q_CFG_STR_ENABLE 0 +#define MBASTR0Q_CFG_ENTER_STR_TIME 2 +#define MBASTR0Q_CFG_ENTER_STR_TIME_LEN 10 +#define MBASTR0Q_CFG_TCKESR 12 +#define MBASTR0Q_CFG_TCKESR_LEN 5 +#define MBASTR0Q_CFG_TCKSRE 17 +#define MBASTR0Q_CFG_TCKSRE_LEN 5 +#define MBASTR0Q_CFG_TCKSRX 22 +#define MBASTR0Q_CFG_TCKSRX_LEN 5 +#define MBASTR0Q_CFG_TXSDLL 27 +#define MBASTR0Q_CFG_TXSDLL_LEN 11 +#define MBASTR0Q_CFG_SAFE_REFRESH_INTERVAL 46 +#define MBASTR0Q_CFG_SAFE_REFRESH_INTERVAL_LEN 11 + +#define ECC_FIR_MASK 0x07010A03 +#define ECC_FIR_ACTION0 0x07010A06 +#define ECC_FIR_ACTION1 0x07010A07 +#define FIR_MAINLINE_AUE 13 +#define FIR_MAINLINE_UE 14 +#define FIR_MAINLINE_RCD 15 +#define FIR_MAINLINE_IAUE 16 +#define FIR_MAINLINE_IUE 17 +#define ECC_FIR_MAINTENANCE_AUE 33 +#define ECC_FIR_MAINTENANCE_IAUE 36 +#define MCA_FIR_MAINTENANCE_IUE 37 +#define ECC_FIR_SCOM_PARITY_CLASS_STATUS 41 +#define ECC_FIR_SCOM_PARITY_CLASS_RECOVERABLE 42 +#define ECC_FIR_WRITE_RMW_CE 45 + +#define RECR 0x07010A0A +#define MBSECCQ_DISABLE_MEMORY_ECC_CHECK_CORRECT 0 +#define MBSECCQ_DISABLE_MEMORY_ECC_CORRECT 1 +#define MBSECCQ_READ_POINTER_DELAY 6 +#define MBSECCQ_READ_POINTER_DELAY_LEN 3 +#define MBSECCQ_VAL_TO_DATA_DELAY 16 +#define MBSECCQ_VAL_TO_DATA_DELAY_LEN 3 +#define MBSECCQ_DELAY_VALID_1X 19 +#define MBSECCQ_NEST_VAL_TO_DATA_DELAY 20 +#define MBSECCQ_NEST_VAL_TO_DATA_DELAY_LEN 2 +#define MBSECCQ_DELAY_NONBYPASS 22 +#define MBSECCQ_ENABLE_UE_NOISE_WINDOW 26 +#define MBSECCQ_ENABLE_TCE_CORRECTION 27 +#define MBSECCQ_USE_ADDRESS_HASH 29 +#define MBSECCQ_DATA_INVERSION 30 +#define MBSECCQ_DATA_INVERSION_LEN 2 +#define MBSECCQ_RESERVED_40 40 + +#define DBGR 0x07010A0B +#define DBGR_ECC_WAT_ACTION_SELECT 9 +#define DBGR_ECC_WAT_SOURCE 11 + +#define FWMS0 0x07010A18 + +#define AACR 0x07010A29 +#define AACR_ADDRESS 1 +#define AACR_ADDRESS_LEN 9 +#define AACR_AUTOINC 10 +#define AACR_ECCGEN 11 + +#define AADR 0x07010A2A +#define AAER 0x07010A2B + +#define WRTCFG 0x07010A38 + +#define IOM_PHY0_DDRPHY_FIR_REG 0x07011000 +#define IOM_PHY0_DDRPHY_FIR_MASK_REG 0x07011003 +#define IOM_PHY0_DDRPHY_FIR_ACTION0_REG 0x07011006 +#define IOM_PHY0_DDRPHY_FIR_ACTION1_REG 0x07011007 +#define IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_0 54 +#define IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_1 55 +#define IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_2 56 +#define IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_3 57 +#define IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_4 58 +#define IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_5 59 +#define IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_6 60 +#define IOM_PHY0_DDRPHY_FIR_REG_DDR_FIR_ERROR_7 61 + +#define MCBISTFIR 0x07012300 +#define MCBISTFIRMASK 0x07012303 +#define MCBISTFIRACT0 0x07012306 +#define MCBISTFIRACT1 0x07012307 +#define MCBISTFIRQ_COMMAND_ADDRESS_TIMEOUT 1 +#define MCBISTFIRQ_INTERNAL_FSM_ERROR 2 +#define MCBISTFIRQ_MCBIST_BRODCAST_OUT_OF_SYNC 3 +#define MCBISTFIRQ_MCBIST_PROGRAM_COMPLETE 10 +#define MCBISTFIRQ_SCOM_RECOVERABLE_REG_PE 13 +#define MCBISTFIRQ_SCOM_FATAL_REG_PE 14 + +#define CCS_INST_ARR0_00 0x07012315 +#define CCS_INST_ARR0_00_CCS_DDR_ACTN 20 +#define CCS_INST_ARR0_00_CCS_DDR_CKE 24 +#define CCS_INST_ARR0_00_CCS_DDR_CKE_LEN 4 +#define CCS_INST_ARR0_00_CCS_DDR_CSN_0_1 32 +#define CCS_INST_ARR0_00_CCS_DDR_CSN_0_1_LEN 2 +#define CCS_INST_ARR0_00_CCS_DDR_CSN_2_3 36 +#define CCS_INST_ARR0_00_CCS_DDR_CSN_2_3_LEN 2 +#define CCS_INST_ARR0_00_CCS_DDR_CAL_TYPE 56 +#define CCS_INST_ARR0_00_CCS_DDR_CAL_TYPE_LEN 4 + +#define CCS_INST_ARR1_00 0x07012335 +#define CCS_INST_ARR1_00_IDLES 0 +#define CCS_INST_ARR1_00_IDLES_LEN 16 +#define CCS_INST_ARR1_00_DDR_CAL_RANK 53 +#define CCS_INST_ARR1_00_DDR_CAL_RANK_LEN 4 +#define CCS_INST_ARR1_00_DDR_CALIBRATION_ENABLE 57 +#define CCS_INST_ARR1_00_CCS_END 58 +#define CCS_INST_ARR1_00_GOTO_CMD 59 +#define CCS_INST_ARR1_00_GOTO_CMD_LEN 5 + +#define MBSEC0Q 0x07012355 +#define MBSEC1Q 0x07012356 + +#define MBSTRQ 0x07012357 +#define MBSTRQ_CFG_PAUSE_ON_MPE 34 +#define MBSTRQ_CFG_PAUSE_ON_UE 35 +#define MBSTRQ_CFG_PAUSE_ON_AUE 37 +#define MBSTRQ_CFG_NCE_SOFT_SYMBOL_COUNT_ENABLE 55 +#define MBSTRQ_CFG_NCE_INTER_SYMBOL_COUNT_ENABLE 56 +#define MBSTRQ_CFG_NCE_HARD_SYMBOL_COUNT_ENABLE 57 + +#define MCBSTATQ 0x07012366 + +#define WATCFG0AQ 0x07012380 +#define WATCFG0AQ_CFG_WAT_EVENT_SEL 0 +#define WATCFG0AQ_CFG_WAT_EVENT_SEL_LEN 48 + +#define WATCFG0BQ 0x07012381 +#define WATCFG0BQ_CFG_WAT_MSKA 0 +#define WATCFG0BQ_CFG_WAT_MSKA_LEN 44 +#define WATCFG0BQ_CFG_WAT_CNTL 44 +#define WATCFG0BQ_CFG_WAT_CNTL_LEN 17 + +#define WATCFG0DQ 0x07012383 +#define WATCFG0DQ_CFG_WAT_PATA 0 +#define WATCFG0DQ_CFG_WAT_PATA_LEN 44 + +#define WATCFG3AQ 0x0701238F +#define WATCFG3AQ_CFG_WAT_EVENT_SEL 0 +#define WATCFG3AQ_CFG_WAT_EVENT_SEL_LEN 48 + +#define WATCFG3BQ 0x07012390 +#define WATCFG3BQ_CFG_WAT_MSKA 0 +#define WATCFG3BQ_CFG_WAT_MSKA_LEN 44 +#define WATCFG3BQ_CFG_WAT_CNTL 44 +#define WATCFG3BQ_CFG_WAT_CNTL_LEN 17 + +#define CCS_CNTLQ 0x070123A5 +#define CCS_CNTLQ_CCS_START 0 +#define CCS_CNTLQ_CCS_STOP 1 + +#define CCS_STATQ 0x070123A6 +#define CCS_STATQ_CCS_IP 0 +#define CCS_STATQ_CCS_DONE 1 + +#define CCS_MODEQ 0x070123A7 +#define CCS_MODEQ_CCS_STOP_ON_ERR 0 +#define CCS_MODEQ_CCS_UE_DISABLE 1 +#define CCS_MODEQ_DDR_CAL_TIMEOUT_CNT 8 +#define CCS_MODEQ_DDR_CAL_TIMEOUT_CNT_LEN 16 +#define CCS_MODEQ_CFG_CCS_PARITY_AFTER_CMD 24 +#define CCS_MODEQ_COPY_CKE_TO_SPARE_CKE 26 +#define CCS_MODEQ_DDR_CAL_TIMEOUT_CNT_MULT 30 +#define CCS_MODEQ_DDR_CAL_TIMEOUT_CNT_MULT_LEN 2 + +#define MCBMR0Q 0x070123A8 + +#define MCBPARMQ 0x070123AF + +#define MCBDRSRQ 0x070123BC +#define MCBDRCRQ 0x070123BD +#define MCBFD0Q 0x070123BE + +#define MCBSA0Q 0x070123CC +#define MCBEA0Q 0x070123CE + +#define MCBAGRAQ 0x070123D6 +#define MCBAGRAQ_CFG_MAINT_ADDR_MODE_EN 10 +#define MCBAGRAQ_CFG_MAINT_DETECT_SRANK_BOUNDARIES 12 + +#define MCB_CNTLQ 0x070123DB +#define MCB_CNTLQ_MCB_START 0 + +#define MCB_CNTLSTATQ 0x070123DC +#define MCB_CNTLSTATQ_MCB_IP 0 +#define MCB_CNTLSTATQ_MCB_DONE 1 +#define MCB_CNTLSTATQ_MCB_FAIL 2 + +#define MCBCFGQ 0x070123E0 +#define MCBCFGQ_CFG_LOG_COUNTS_IN_TRACE 36 +#define MCBCFGQ_CFG_PAUSE_ON_ERROR_MODE 57 +#define MCBCFGQ_CFG_PAUSE_ON_ERROR_MODE_LEN 2 + +#define DBGCFG0Q 0x070123E8 +#define DBGCFG0Q_CFG_DBG_ENABLE 0 +#define DBGCFG0Q_CFG_DBG_PICK_MCBIST01 23 +#define DBGCFG0Q_CFG_DBG_PICK_MCBIST01_LEN 11 + +#define DBGCFG1Q 0x070123E9 +#define DBGCFG1Q_CFG_WAT_ENABLE 0 + +#define DBGCFG2Q 0x070123EA +#define DBGCFG2Q_CFG_WAT_LOC_EVENT0_SEL 0 +#define DBGCFG2Q_CFG_WAT_LOC_EVENT0_SEL_LEN 20 +#define DBGCFG2Q_CFG_WAT_LOC_EVENT1_SEL 20 +#define DBGCFG2Q_CFG_WAT_LOC_EVENT1_SEL_LEN 20 + +#define DBGCFG3Q 0x070123EB +#define DBGCFG3Q_CFG_WAT_GLOB_EVENT0_SEL 20 +#define DBGCFG3Q_CFG_WAT_GLOB_EVENT0_SEL_LEN 3 +#define DBGCFG3Q_CFG_WAT_GLOB_EVENT1_SEL 23 +#define DBGCFG3Q_CFG_WAT_GLOB_EVENT1_SEL_LEN 3 +#define DBGCFG3Q_CFG_WAT_ACT_SET_SPATTN_PULSE 37 +#define DBGCFG3Q_CFG_WAT_ACT_SET_SPATTN_PULSE_LEN 4 + +#define MCSLOW_SYNC_CONFIG 0x07030000 +#define MCSLOW_SYNC_CONFIG_LISTEN_TO_SYNC_PULSE_DIS 4 +#define MCSLOW_SYNC_CONFIG_CLEAR_CHIPLET_IS_ALIGNED 7 + +#define MCSLOW_OPCG_ALIGN 0x07030001 +#define MCSLOW_OPCG_ALIGN_INOP_ALIGN 0 +#define MCSLOW_OPCG_ALIGN_INOP_ALIGN_LEN 4 +#define MCSLOW_OPCG_ALIGN_INOP_WAIT 19 +#define MCSLOW_OPCG_ALIGN_SCAN_RATIO 47 +#define MCSLOW_OPCG_ALIGN_SCAN_RATIO_LEN 5 +#define MCSLOW_OPCG_ALIGN_OPCG_WAIT_CYCLES 52 +#define MCSLOW_OPCG_ALIGN_OPCG_WAIT_CYCLES_LEN 12 + +#define MCSLOW_OPCG_REG0 0x07030002 +#define MCSLOW_OPCG_RUNN_MODE 0 +#define MCSLOW_OPCG_RUN_SCAN0 2 + +#define MCSLOW_SCAN_REGION_TYPE 0x07030005 +#define MCSLOW_SCAN_REGION_TYPE_SCAN_REGION_UNIT10 14 +#define MCSLOW_SCAN_REGION_TYPE_SCAN_TYPE_BNDY 56 + +#define MCSLOW_CLK_REGION 0x07030006 +#define MCSLOW_CLK_REGION_CLOCK_CMD 0 +#define MCSLOW_CLK_REGION_CLOCK_CMD_LEN 2 +#define MCSLOW_CLK_REGION_CLOCK_REGION_UNIT10 14 +#define MCSLOW_CLK_REGION_SEL_THOLD_SL 48 +#define MCSLOW_CLK_REGION_SEL_THOLD_NSL 49 +#define MCSLOW_CLK_REGION_SEL_THOLD_ARY 50 + +#define MCSLOW_CLOCK_STAT_SL 0x07030008 +#define MCSLOW_CLOCK_STAT_NSL 0x07030009 +#define MCSLOW_CLOCK_STAT_ARY 0x0703000A + +#define MCSLOW_XFIR 0x07040000 +#define MCSLOW_FIR_MASK 0x07040002 +#define MCSLOW_LOCAL_FIR 0x0704000A +#define MCSLOW_LOCAL_FIR_MASK 0x0704000D +#define MCSLOW_LOCAL_FIR_ACTION0 0x07040010 +#define MCSLOW_LOCAL_FIR_ACTION1 0x07040011 + +#define PCBSLMC01_MULTICAST_GROUP_1 0x070F0001 +#define PCBSLMC01_MULTICAST_GROUP_2 0x070F0002 + +#define PCBSLMC01_PLL_LOCK_REG 0x070F0019 + +#define PCBSLMC01_SLAVE_CONFIG_REG 0x070F001E + +#define PCBSLMC01_ERROR_REG 0x070F001F + +#define PCBSLMC01_NET_CTRL0 0x070F0040 +#define PCBSLMC01_NET_CTRL0_WAND 0x070F0041 +#define PCBSLMC01_NET_CTRL0_WOR 0x070F0042 +#define PCBSLMC01_NET_CTRL0_PCB_EP_RESET 1 +#define PCBSLMC01_NET_CTRL0_PLL_TEST_EN 3 +#define PCBSLMC01_NET_CTRL0_PLL_RESET 4 +#define PCBSLMC01_NET_CTRL0_PLL_BYPASS 5 +#define PCBSLMC01_NET_CTRL0_FENCE_EN 18 + +#define PCBSLMC01_NET_CTRL1 0x070F0044 +#define PCBSLMC01_NET_CTRL1_WAND 0x070F0045 +#define PCBSLMC01_NET_CTRL1_WOR 0x070F0046 +#define PCBSLMC01_NET_CTRL1_CLK_DCC_BYPASS_EN 1 +#define PCBSLMC01_NET_CTRL1_CLK_PDLY_BYPASS_EN 2 + +#define DDRPHY_DP16_DQ_BIT_ENABLE0_P0_0 0x800000000701103F +#define DDRPHY_DP16_DFT_PDA_CONTROL_P0_0 0x800000010701103F + +#define DDRPHY_DP16_CONFIG0_P0_0 0x800000030701103F +#define DP16_CONFIG0_FLUSH 51 +#define DP16_CONFIG0_INIT_IO 54 +#define DP16_CONFIG0_ADVANCE_PING_PONG 55 +#define DP16_CONFIG0_DELAY_PING_PONG_HALF 58 + +/* Names come from documentation, they are inconsistent there too. */ +#define DDRPHY_DP16_READ_CLOCK_RANK_PAIR0_P0_0 0x800000040701103F +#define DDRPHY_DP16_READ_CLOCK_RANK_PAIR1_P0_0 0x800001040701103F +#define DDRPHY_DP16_READ_CLOCK_RANK_PAIR2_P0_0 0x800002040701103F +#define DDRPHY_DP16_READ_CLOCK_RANK_PAIR3_P0_0 0x800003040701103F +#define DDRPHY_DP16_WRCLK_EN_RP0_P0_0 0x800000050701103F +#define DDRPHY_DP16_WRCLK_EN_RP1_P0_0 0x800001050701103F +#define DDRPHY_DP16_WRCLK_EN_RP2_P0_0 0x800002050701103F +#define DDRPHY_DP16_WRCLK_EN_RP3_P0_0 0x800003050701103F + +#define DDRPHY_DP16_SYSCLK_PR0_P0_0 0x800000070701103F +#define DDRPHY_DP16_SYSCLK_PR1_P0_0 0x8000007F0701103F + +#define DDRPHY_DP16_DQS_RD_PHASE_SELECT_RANK_PAIR0_P0_0 0x800000090701103F + +#define DDRPHY_DP16_DRIFT_LIMITS_P0_0 0x8000000A0701103F +#define DD2_BLUE_EXTEND_RANGE 48 +#define DD2_BLUE_EXTEND_RANGE_LEN 2 + +#define DDRPHY_DP16_RD_LVL_STATUS0_P0_0 0x8000000E0701103F +#define DDRPHY_DP16_RD_LVL_STATUS2_P0_0 0x800000100701103F + +#define DDRPHY_DP16_RD_DIA_CONFIG5_P0_0 0x800000120701103F +#define DYN_MCTERM_CNTL_EN 49 +#define PER_CAL_UPDATE_DISABLE 52 +#define PERCAL_PWR_DIS 59 + +#define DDRPHY_DP16_DQS_GATE_DELAY_RP0_P0_0 0x800000130701103F +#define DDRPHY_DP16_DQS_GATE_DELAY_RP1_P0_0 0x800001130701103F +#define DDRPHY_DP16_DQS_GATE_DELAY_RP2_P0_0 0x800002130701103F +#define DDRPHY_DP16_DQS_GATE_DELAY_RP3_P0_0 0x800003130701103F + +#define DDRPHY_DP16_RD_STATUS0_P0_0 0x800000140701103F + +#define DDRPHY_DP16_RD_VREF_DAC_0_P0_0 0x800000160701103F +#define DDRPHY_DP16_RD_VREF_DAC_1_P0_0 0x8000001F0701103F +#define DDRPHY_DP16_RD_VREF_DAC_2_P0_0 0x800000C00701103F +#define DDRPHY_DP16_RD_VREF_DAC_3_P0_0 0x800000C10701103F +#define DDRPHY_DP16_RD_VREF_DAC_4_P0_0 0x800000C20701103F +#define DDRPHY_DP16_RD_VREF_DAC_5_P0_0 0x800000C30701103F +#define DDRPHY_DP16_RD_VREF_DAC_6_P0_0 0x800000C40701103F +#define DDRPHY_DP16_RD_VREF_DAC_7_P0_0 0x800000C50701103F +#define BIT0_VREF_DAC 49 +#define BIT0_VREF_DAC_LEN 7 +#define BIT1_VREF_DAC 57 +#define BIT1_VREF_DAC_LEN 7 + +#define DDRPHY_DP16_WR_ERROR0_P0_0 0x8000001B0701103F + +#define DDRPHY_DP16_CTLE_CTL_BYTE0_P0_0 0x800000200701103F +#define NIB_0_DQSEL_CAP 48 +#define NIB_0_DQSEL_CAP_LEN 2 +#define NIB_0_DQSEL_RES 53 +#define NIB_0_DQSEL_RES_LEN 3 +#define NIB_1_DQSEL_CAP 56 +#define NIB_1_DQSEL_CAP_LEN 2 +#define NIB_1_DQSEL_RES 61 +#define NIB_1_DQSEL_RES_LEN 3 + +#define DDRPHY_DP16_CTLE_CTL_BYTE1_P0_0 0x800000210701103F +#define NIB_2_DQSEL_CAP 48 +#define NIB_2_DQSEL_CAP_LEN 2 +#define NIB_2_DQSEL_RES 53 +#define NIB_2_DQSEL_RES_LEN 3 +#define NIB_3_DQSEL_CAP 56 +#define NIB_3_DQSEL_CAP_LEN 2 +#define NIB_3_DQSEL_RES 61 +#define NIB_3_DQSEL_RES_LEN 3 + +#define DDRPHY_DP16_ACBOOST_CTL_BYTE0_P0_0 0x800000220701103F +#define S0ACENSLICENDRV_DC 48 +#define S0ACENSLICENDRV_DC_LEN 3 +#define S0ACENSLICEPDRV_DC 51 +#define S0ACENSLICEPDRV_DC_LEN 3 +#define S0ACENSLICEPTERM_DC 56 + +#define DDRPHY_DP16_ACBOOST_CTL_BYTE1_P0_0 0x800000230701103F +#define S1ACENSLICENDRV_DC 48 +#define S1ACENSLICENDRV_DC_LEN 3 +#define S1ACENSLICEPDRV_DC 51 +#define S1ACENSLICEPDRV_DC_LEN 3 +#define S1ACENSLICEPTERM_DC 56 + +#define DDRPHY_DP16_DLL_CNTL0_P0_0 0x800000240701103F +#define DDRPHY_DP16_DLL_CNTL1_P0_0 0x800000250701103F +#define INIT_RXDLL_CAL_RESET 48 + +#define DDRPHY_DP16_DLL_VREG_CONTROL0_P0_0 0x8000002A0701103F +#define DDRPHY_DP16_DLL_VREG_CONTROL1_P0_0 0x8000002B0701103F +#define RXREG_VREG_COMPCON_DC 48 +#define RXREG_VREG_COMPCON_DC_LEN 3 +#define RXREG_VREG_DRVCON_DC 53 +#define RXREG_VREG_DRVCON_DC_LEN 3 +#define RXREG_VREG_REF_SEL_DC 56 +#define RXREG_VREG_REF_SEL_DC_LEN 3 + +#define DDRPHY_DP16_DLL_VREG_COARSE0_P0_0 0x8000002C0701103F +#define DDRPHY_DP16_DLL_VREG_COARSE1_P0_0 0x8000002D0701103F + +#define DDRPHY_DP16_WR_VREF_STATUS0_P0_0 0x8000002E0701103F +#define DDRPHY_DP16_WR_VREF_STATUS1_P0_0 0x8000002F0701103F + +#define DDRPHY_DP16_DQSCLK_OFFSET_P0_0 0x800000370701103F +#define DQS_OFFSET 49 +#define DQS_OFFSET_LEN 7 + +#define DDRPHY_DP16_WR_DELAY_VALUE_0_RP0_REG_P0_0 0x800000380701103F + +#define DDRPHY_DP16_WR_VREF_VALUE0_RANK_PAIR0_P0_0 0x8000005E0701103F +#define DDRPHY_DP16_WR_VREF_VALUE0_RANK_PAIR1_P0_0 0x8000015E0701103F +#define DDRPHY_DP16_WR_VREF_VALUE0_RANK_PAIR2_P0_0 0x8000025E0701103F +#define DDRPHY_DP16_WR_VREF_VALUE0_RANK_PAIR3_P0_0 0x8000035E0701103F +#define WR_VREF_RANGE_DRAM0 49 +#define WR_VREF_VALUE_DRAM0 50 +#define WR_VREF_VALUE_DRAM0_LEN 6 +#define WR_VREF_RANGE_DRAM1 57 +#define WR_VREF_VALUE_DRAM1 58 +#define WR_VREF_VALUE_DRAM1_LEN 6 + +#define DDRPHY_DP16_WR_VREF_VALUE1_RANK_PAIR0_P0_0 0x8000005F0701103F +#define DDRPHY_DP16_WR_VREF_VALUE1_RANK_PAIR1_P0_0 0x8000015F0701103F +#define DDRPHY_DP16_WR_VREF_VALUE1_RANK_PAIR2_P0_0 0x8000025F0701103F +#define DDRPHY_DP16_WR_VREF_VALUE1_RANK_PAIR3_P0_0 0x8000035F0701103F +#define WR_VREF_RANGE_DRAM2 49 +#define WR_VREF_VALUE_DRAM2 50 +#define WR_VREF_VALUE_DRAM2_LEN 6 +#define WR_VREF_RANGE_DRAM3 57 +#define WR_VREF_VALUE_DRAM3 58 +#define WR_VREF_VALUE_DRAM3_LEN 6 + +#define DDRPHY_DP16_WR_VREF_CONFIG0_P0_0 0x8000006C0701103F +#define WR_CTR_1D_MODE_SWITCH 48 +#define WR_CTR_RUN_FULL_1D 49 +#define WR_CTR_2D_SMALL_STEP_VAL 52 +#define WR_CTR_2D_BIG_STEP_VAL 53 +#define WR_CTR_2D_BIG_STEP_VAL_LEN 4 +#define WR_CTR_NUM_BITS_TO_SKIP 57 +#define WR_CTR_NUM_BITS_TO_SKIP_LEN 3 +#define WR_CTR_NUM_NO_INC_VREF_COMP 60 +#define WR_CTR_NUM_NO_INC_VREF_COMP_LEN 3 + +#define DDRPHY_DP16_SYSCLK_PR_VALUE_P0_0 0x800000730701103F +#define BB_LOCK0 48 +#define BB_LOCK1 56 + +#define DDRPHY_DP16_WRCLK_PR_P0_0 0x800000740701103F +#define TSYS_WRCLK 49 +#define TSYS_WRCLK_LEN 7 + +#define DDRPHY_DP16_IO_TX_CONFIG0_P0_0 0x800000750701103F +#define DDRPHY_DP16_IO_TX_CONFIG0_STRENGTH 48 +#define DDRPHY_DP16_IO_TX_CONFIG0_STRENGTH_LEN 4 + +#define DDRPHY_DP16_RD_VREF_CAL_EN_P0_0 0x800000760701103F + +#define DDRPHY_DP16_DLL_CONFIG1_P0_0 0x800000770701103F +#define S0INSDLYTAP 61 +#define S1INSDLYTAP 62 + +#define DDRPHY_DP16_IO_TX_FET_SLICE_P0_0 0x800000780701103F +#define EN_SLICE_N_WR 49 +#define EN_SLICE_N_WR_LEN 7 +#define EN_SLICE_P_WR 57 +#define EN_SLICE_P_WR_LEN 7 + +#define DDRPHY_DP16_RD_VREF_CAL_ERROR_P0_0 0x8000007A0701103F + +#define DDRPHY_DP16_IO_TX_PFET_TERM_P0_0 0x8000007B0701103F + +#define DDRPHY_DP16_DQ_BIT_DISABLE_RP0_P0_0 0x8000007C0701103F +#define DDRPHY_DP16_DQS_BIT_DISABLE_RP0_P0_0 0x8000007D0701103F + +#define DDRPHY_DP16_WR_VREF_ERROR0_P0_0 0x800000AE0701103F +#define DDRPHY_DP16_WR_VREF_ERROR1_P0_0 0x800000AF0701103F + +#define DDRPHY_DP16_WR_VREF_CONFIG1_P0_0 0x800000EC0701103F +#define WR_CTR_VREF_RANGE_SELECT 48 +#define WR_CTR_VREF_RANGE_CROSSOVER 49 +#define WR_CTR_VREF_RANGE_CROSSOVER_LEN 7 +#define WR_CTR_VREF_SINGLE_RANGE_MAX 56 +#define WR_CTR_VREF_SINGLE_RANGE_MAX_LEN 7 + +/* Yes, MASK1 is supposed to be before MASK0. */ +#define DDRPHY_DP16_WR_VREF_ERROR_MASK1_P0_0 0x800000FA0701103F +#define DDRPHY_DP16_WR_VREF_ERROR_MASK0_P0_0 0x800000FB0701103F + +#define DDRPHY_ADR_BIT_ENABLE_P0_ADR0 0x800040000701103F +#define DDRPHY_ADR_DIFFPAIR_ENABLE_P0_ADR1 0x800044010701103F + +#define DDRPHY_ADR_DELAY0_P0_ADR0 0x800040040701103F +#define DDRPHY_ADR_DELAY1_P0_ADR0 0x800040050701103F +#define DDRPHY_ADR_DELAY2_P0_ADR0 0x800040060701103F +#define DDRPHY_ADR_DELAY3_P0_ADR0 0x800040070701103F +#define DDRPHY_ADR_DELAY4_P0_ADR0 0x800040080701103F +#define DDRPHY_ADR_DELAY5_P0_ADR0 0x800040090701103F +#define DDRPHY_ADR_DELAY0_P0_ADR1 0x800044040701103F +#define DDRPHY_ADR_DELAY1_P0_ADR1 0x800044050701103F +#define DDRPHY_ADR_DELAY2_P0_ADR1 0x800044060701103F +#define DDRPHY_ADR_DELAY3_P0_ADR1 0x800044070701103F +#define DDRPHY_ADR_DELAY4_P0_ADR1 0x800044080701103F +#define DDRPHY_ADR_DELAY5_P0_ADR1 0x800044090701103F +#define DDRPHY_ADR_DELAY0_P0_ADR2 0x800048040701103F +#define DDRPHY_ADR_DELAY1_P0_ADR2 0x800048050701103F +#define DDRPHY_ADR_DELAY2_P0_ADR2 0x800048060701103F +#define DDRPHY_ADR_DELAY3_P0_ADR2 0x800048070701103F +#define DDRPHY_ADR_DELAY4_P0_ADR2 0x800048080701103F +#define DDRPHY_ADR_DELAY5_P0_ADR2 0x800048090701103F +#define DDRPHY_ADR_DELAY0_P0_ADR3 0x80004C040701103F +#define DDRPHY_ADR_DELAY1_P0_ADR3 0x80004C050701103F +#define DDRPHY_ADR_DELAY2_P0_ADR3 0x80004C060701103F +#define DDRPHY_ADR_DELAY3_P0_ADR3 0x80004C070701103F +#define ADR_DELAY_EVEN 49 +#define ADR_DELAY_EVEN_LEN 7 +#define ADR_DELAY_ODD 57 +#define ADR_DELAY_ODD_LEN 7 + +#define DDRPHY_ADR_DELAY1_P0_ADR1 0x800044050701103F +#define DDRPHY_ADR_DELAY3_P0_ADR1 0x800044070701103F + +#define DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR0 0x800040200701103F +#define DDRPHY_ADR_IO_FET_SLICE_EN_MAP1_P0_ADR0 0x800040210701103F +#define DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR1 0x800044200701103F +#define DDRPHY_ADR_IO_FET_SLICE_EN_MAP1_P0_ADR1 0x800044210701103F +#define DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR2 0x800048200701103F +#define DDRPHY_ADR_IO_FET_SLICE_EN_MAP1_P0_ADR2 0x800048210701103F +#define DDRPHY_ADR_IO_FET_SLICE_EN_MAP0_P0_ADR3 0x80004C200701103F +#define DDRPHY_ADR_IO_FET_SLICE_EN_MAP1_P0_ADR3 0x80004C210701103F +#define SLICE_SEL0 48 +#define SLICE_SEL1 50 +#define SLICE_SEL2 52 +#define SLICE_SEL3 54 +#define SLICE_SEL4 56 +#define SLICE_SEL5 58 +#define SLICE_SEL6 60 +#define SLICE_SEL7 62 + +#define DDRPHY_ADR_DLL_VREG_CONFIG_1_P0_ADR32S0 0x800080310701103F +#define DDRPHY_ADR_DLL_VREG_CONFIG_1_STRENGTH 59 +#define DDRPHY_ADR_DLL_VREG_CONFIG_1_STRENGTH_LEN 4 + +#define ADR_SYSCLK_CNTRL_PR_P0_ADR32S0 0x800080320701103F + +#define DDRPHY_ADR_MCCLK_WRCLK_PR_STATIC_OFFSET_P0_ADR32S0 0x800080330701103F + +#define ADR_SYSCLK_PR_VALUE_RO_P0_ADR32S0 0x800080340701103F + +#define DDRPHY_ADR_OUTPUT_FORCE_ATEST_CNTL_P0_ADR32S0 0x800080350701103F +#define FLUSH 48 +#define INIT_IO 50 + +#define DDRPHY_ADR_DLL_CNTL_P0_ADR32S0 0x8000803A0701103F +#define INIT_RXDLL_CAL_RESET 48 + +#define DDRPHY_ADR_DLL_VREG_CONTROL_P0_ADR32S0 0x8000803D0701103F + +#define DDRPHY_ADR_DLL_VREG_COARSE_P0_ADR32S0 0x8000803E0701103F + +#define DDRPHY_PC_DLL_ZCAL_CAL_STATUS_P0 0x8000C0000701103F +#define DP_DLL_CAL_GOOD 48 +#define DP_DLL_CAL_ERROR 49 +#define DP_DLL_CAL_ERROR_FINE 50 +#define ADR_DLL_CAL_GOOD 51 +#define ADR_DLL_CAL_ERROR 52 +#define ADR_DLL_CAL_ERROR_FINE 53 +#define ZCAL_DONE 63 + +#define DDRPHY_PC_RANK_PAIR0_P0 0x8000C0020701103F +#define DDRPHY_PC_RANK_PAIR1_P0 0x8000C0030701103F +#define DDRPHY_PC_RANK_PAIR2_P0 0x8000C0300701103F +#define DDRPHY_PC_RANK_PAIR3_P0 0x8000C0310701103F + +#define DDRPHY_PC_CONFIG0_P0 0x8000C00C0701103F +#define DDR4_CMD_SIG_REDUCTION 54 +#define DDR4_VLEVEL_BANK_GROUP 62 + +#define DDRPHY_PC_CONFIG1_P0 0x8000C00D0701103F +#define WRITE_LATENCY_OFFSET 48 +#define WRITE_LATENCY_OFFSET_LEN 4 +#define READ_LATENCY_OFFSET 52 +#define READ_LATENCY_OFFSET_LEN 4 +#define MEMORY_TYPE 59 +#define MEMORY_TYPE_LEN 3 +#define DDR4_LATENCY_SW 62 + +#define DDRPHY_PC_RESETS_P0 0x8000C00E0701103F +#define SYSCLK_RESET 49 +#define ENABLE_ZCAL 51 + +#define DDRPHY_PC_MIRROR_CONFIG_P0 0x8000C0110701103F +#define ADDR_MIRROR_RP1_PRI 50 +#define ADDR_MIRROR_RP1_SEC 51 +#define ADDR_MIRROR_RP3_PRI 54 +#define ADDR_MIRROR_RP3_SEC 55 + +#define DDRPHY_PC_ERROR_STATUS0_P0 0x8000C0120701103F + +#define DDRPHY_PC_INIT_CAL_CONFIG0_P0 0x8000C0160701103F +#define ABORT_ON_CAL_ERROR 58 +#define ENA_RANK_PAIR_MSB 60 + +#define DDRPHY_PC_INIT_CAL_CONFIG1_P0 0x8000C0170701103F +#define REFRESH_COUNT 48 +#define REFRESH_COUNT_LEN 4 +#define REFRESH_CONTROL 52 +#define REFRESH_CONTROL_LEN 2 +#define REFRESH_ALL_RANKS 54 +#define CMD_SNOOP_DIS 55 +#define REFRESH_INTERVAL 57 +#define REFRESH_INTERVAL_LEN 7 + +#define DDRPHY_PC_INIT_CAL_ERROR_P0 0x8000C0180701103F + +#define DDRPHY_PC_INIT_CAL_STATUS_P0 0x8000C0190701103F + +#define DDRPHY_PC_CSID_CFG_P0 0x8000C0330701103F + +#define DDRPHY_SEQ_CONFIG0_P0 0x8000C4020701103F +#define TWO_CYCLE_ADDR_EN 49 +#define DELAYED_PAR 54 +#define PAR_A17_MASK 62 + +#define DDRPHY_SEQ_ODT_WR_CONFIG0_P0 0x8000C40A0701103F +#define DDRPHY_SEQ_ODT_RD_CONFIG1_P0 0x8000C40F0701103F +#define ODT_RD_VALUES0 48 +#define ODT_RD_VALUES0_LEN 4 +#define ODT_RD_VALUES1 56 +#define ODT_RD_VALUES1_LEN 4 + +#define DDRPHY_SEQ_MEM_TIMING_PARAM0_P0 0x8000C4120701103F +#define TMOD_CYCLES 48 +#define TMOD_CYCLES_LEN 4 +#define TRCD_CYCLES 52 +#define TRCD_CYCLES_LEN 4 +#define TRP_CYCLES 56 +#define TRP_CYCLES_LEN 4 +#define TRFC_CYCLES 60 +#define TRFC_CYCLES_LEN 4 + +#define DDRPHY_SEQ_MEM_TIMING_PARAM1_P0 0x8000C4130701103F +#define TZQINIT_CYCLES 48 +#define TZQINIT_CYCLES_LEN 4 +#define TZQCS_CYCLES 52 +#define TZQCS_CYCLES_LEN 4 +#define TWLDQSEN_CYCLES 56 +#define TWLDQSEN_CYCLES_LEN 4 +#define TWRMRD_CYCLES 60 +#define TWRMRD_CYCLES_LEN 4 + +#define DDRPHY_SEQ_MEM_TIMING_PARAM2_P0 0x8000C4140701103F +#define TODTLON_OFF_CYCLES 48 +#define TODTLON_OFF_CYCLES_LEN 4 + +#define DDRPHY_SEQ_RD_WR_DATA0_P0 0x8000C4000701103F +#define DDRPHY_SEQ_RD_WR_DATA1_P0 0x8000C4010701103F +#define RD_RW_DATA_REG0 48 +#define RD_RW_DATA_REG0_LEN 16 +#define RD_RW_DATA_REG1 48 +#define RD_RW_DATA_REG1_LEN 16 + +#define DDRPHY_SEQ_ERROR_STATUS0_P0 0x8000C4080701103F + +#define DDRPHY_SEQ_ODT_WR_CONFIG0_P0 0x8000C40A0701103F +#define ODT_WR_VALUES0 48 +#define ODT_WR_VALUES0_LEN 4 +#define ODT_WR_VALUES1 56 +#define ODT_WR_VALUES1_LEN 4 + +#define DDRPHY_SEQ_ODT_WR_CONFIG1_P0 0x8000C40B0701103F +#define ODT_WR_VALUES2 48 +#define ODT_WR_VALUES2_LEN 4 +#define ODT_WR_VALUES3 56 +#define ODT_WR_VALUES3_LEN 4 + +#define DDRPHY_SEQ_ODT_RD_CONFIG0_P0 0x8000C40E0701103F +#define ODT_RD_VALUES0 48 +#define ODT_RD_VALUES0_LEN 4 +#define ODT_RD_VALUES1 56 +#define ODT_RD_VALUES1_LEN 4 + +#define DDRPHY_SEQ_ODT_RD_CONFIG1_P0 0x8000C40F0701103F +#define ODT_RD_VALUES2 48 +#define ODT_RD_VALUES2_LEN 4 +#define ODT_RD_VALUES3 56 +#define ODT_RD_VALUES3_LEN 4 + +#define DDRPHY_RC_CONFIG0_P0 0x8000C8000701103F +#define GLOBAL_PHY_OFFSET 48 +#define GLOBAL_PHY_OFFSET_LEN 4 +#define PERFORM_RDCLK_ALIGN 62 + +#define DDRPHY_RC_CONFIG1_P0 0x8000C8010701103F + +#define DDRPHY_RC_CONFIG2_P0 0x8000C8020701103F +#define CONSEC_PASS 48 +#define CONSEC_PASS_LEN 5 + +#define DDRPHY_RC_ERROR_STATUS0_P0 0x8000C8050701103F + +#define DDRPHY_RC_CONFIG3_P0 0x8000C8070701103F +#define COARSE_CAL_STEP_SIZE 51 +#define COARSE_CAL_STEP_SIZE_LEN 4 + +#define DDRPHY_RC_RDVREF_CONFIG0_P0 0x8000C8090701103F + +#define DDRPHY_RC_RDVREF_CONFIG1_P0 0x8000C80A0701103F +#define CMD_PRECEDE_TIME 48 +#define CMD_PRECEDE_TIME_LEN 8 +#define MPR_LOCATION 56 +#define MPR_LOCATION_LEN 4 +#define CALIBRATION_ENABLE 60 +#define SKIP_RDCENTERING 61 + +#define DDRPHY_WC_CONFIG0_P0 0x8000CC000701103F +#define TWLO_TWLOE 48 +#define TWLO_TWLOE_LEN 8 +#define WL_ONE_DQS_PULSE 56 +#define FW_WR_RD 57 +#define FW_WR_RD_LEN 6 +#define CUSTOM_INIT_WRITE 63 + +#define DDRPHY_WC_CONFIG1_P0 0x8000CC010701103F +#define BIG_STEP 48 +#define BIG_STEP_LEN 4 +#define SMALL_STEP 54 +#define WR_PRE_DLY 55 +#define WR_PRE_DLY_LEN 6 + +#define DDRPHY_WC_CONFIG2_P0 0x8000CC020701103F +#define NUM_VALID_SAMPLES 48 +#define NUM_VALID_SAMPLES_LEN 4 +#define FW_RD_WR 52 +#define FW_RD_WR_LEN 6 +#define IPW_WR_WR 58 +#define IPW_WR_WR_LEN 4 + +#define DDRPHY_WC_ERROR_STATUS0_P0 0x8000CC030701103F + +#define DDRPHY_WC_CONFIG3_P0 0x8000CC050701103F +#define MRS_CMD_DQ_OFF 55 +#define MRS_CMD_DQ_OFF_LEN 6 + +#define DDRPHY_WC_RTT_WR_SWAP_ENABLE_P0 0x8000CC060701103F +#define WL_ENABLE_RTT_SWAP 48 +#define WR_CTR_ENABLE_RTT_SWAP 49 +#define WR_CTR_VREF_COUNTER_RESET_VAL 50 +#define WR_CTR_VREF_COUNTER_RESET_VAL_LEN 10 + +#define DDRPHY_APB_CONFIG0_P0 0x8000D0000701103F +#define RESET_ERR_RPT 49 + +#define DDRPHY_APB_ERROR_STATUS0_P0 0x8000D0010701103F + +#endif // ISTEP_13_SCOM_H diff --git a/src/soc/ibm/power9/istep_14_1.c b/src/soc/ibm/power9/istep_14_1.c new file mode 100644 index 00000000000..d6f8c72cf11 --- /dev/null +++ b/src/soc/ibm/power9/istep_14_1.c @@ -0,0 +1,544 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include +#include +#include +#include + +#include "istep_13_scom.h" +#include "mcbist.h" + +/* + * 14.1 mss_memdiag: Mainstore Pattern Testing + * + * - The following step documents the generalities of this step + * - In FW PRD will control mem diags via interrupts. It doesn't use + * mss_memdiags.C directly but the HWP subroutines + * - In cronus it will execute mss_memdiags.C directly + * b) p9_mss_memdiags.C (mcbist)--Nimbus + * c) p9_mss_memdiags.C (mba) -- Cumulus + * - Prior to running this procedure will apply known DQ bad bits to prevent + * them from participating in training. This information is extracted from + * the bad DQ attribute and applied to Hardware + * - Nimbus uses the mcbist engine + * - Still supports superfast read/init/scrub + * - Cumulus/Centaur uses the scrub engine + * - Modes: + * - Minimal: Write-only with 0's + * - Standard: Write of 0's followed by a Read + * - Medium: Write-followed by Read, 4 patterns, last of 0's + * - Max: Write-followed by Read, 9 patterns, last of 0's + * - Run on the host + * - This procedure will update the bad DQ attribute for each dimm based on + * its findings + * - At the end of this procedure sets FIR masks correctly for runtime + * analysis + * - All subsequent repairs are considered runtime issues + */ + +static void fir_unmask(uint8_t chip, int mcs_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + int mca_i; + const int is_dd20 = get_dd() == 0x20; + /* Bits in other registers (act0, mask) are already set properly. + MC01.MCBIST.MBA_SCOMFIR.MCBISTFIRACT1 + [3] MCBISTFIRQ_MCBIST_BRODCAST_OUT_OF_SYNC = 0 // checkstop (0,0,0) + */ + scom_and_or_for_chiplet(chip, id, MCBISTFIRACT1, + ~PPC_BIT(MCBISTFIRQ_MCBIST_BRODCAST_OUT_OF_SYNC), 0); + + for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { + uint64_t val; + if (!mem_data[chip].mcs[mcs_i].mca[mca_i].functional) + continue; + + /* From broadcast_out_of_sync() workaround: + MC01.PORT0.ECC64.SCOM.RECR + [26] MBSECCQ_ENABLE_UE_NOISE_WINDOW = 1 + */ + mca_and_or(chip, id, mca_i, RECR, ~0, PPC_BIT(MBSECCQ_ENABLE_UE_NOISE_WINDOW)); + + /* + * Read out the wr_done and rd_tag delays and find min and set the RCD + * Protect Time to this value. + * + * MC01.PORT0.SRQ.MBA_DSM0Q + * [24-29] MBA_DSM0Q_CFG_WRDONE_DLY + * [36-41] MBA_DSM0Q_CFG_RDTAG_DLY + * + * MC01.PORT0.SRQ.MBA_FARB0Q + * [48-53] MBA_FARB0Q_CFG_RCD_PROTECTION_TIME + */ + val = mca_read(chip, id, mca_i, MBA_DSM0Q); + val = MIN((val & PPC_BITMASK(24, 29)) >> 29, + (val & PPC_BITMASK(36, 41)) >> 41); + mca_and_or(chip, id, mca_i, MBA_FARB0Q, + ~PPC_BITMASK(48, 53), + PPC_PLACE(val, MBA_FARB0Q_CFG_RCD_PROTECTION_TIME, + MBA_FARB0Q_CFG_RCD_PROTECTION_TIME_LEN)); + + /* + * Due to hardware defect with DD2.0 certain errors are not handled + * properly. As a result, these firs are marked as checkstop for DD2 to + * avoid any mishandling. + * + * MCA_FIR_MAINLINE_RCD stays masked on newer platforms. ACT0 and ACT1 + * for RCD are not touched by Hostboot, but for simplicity set those to + * 0 always - they are "don't care" if masked, and 0 is their reset + * value. Affected bits are annotated with asterisk below - whatever is + * mentioned below is changed to checkstop for those bits. + * + * This also affects Cumulus DD1.0, but the rest of the code is for + * Nimbus only so don't bother checking for it. + * + * MC01.PORT0.ECC64.SCOM.ACTION0 + * [13] FIR_MAINLINE_AUE = 0 + * [14] FIR_MAINLINE_UE = 0 + * [15] FIR_MAINLINE_RCD = 0 + * [16] FIR_MAINLINE_IAUE = 0 + * [17] FIR_MAINLINE_IUE = 0 + * [37] MCA_FIR_MAINTENANCE_IUE = 0 + * MC01.PORT0.ECC64.SCOM.ACTION1 + * [13] FIR_MAINLINE_AUE = 0 + * [14] FIR_MAINLINE_UE = 1* + * [15] FIR_MAINLINE_RCD = 0 + * [16] FIR_MAINLINE_IAUE = 0 + * [17] FIR_MAINLINE_IUE = 1 + * [33] MCA_FIR_MAINTENANCE_AUE = 0 // Hostboot clears AUE and IAUE without + * [36] MCA_FIR_MAINTENANCE_IAUE = 0 // unmasking, with no explanation why + * [37] MCA_FIR_MAINTENANCE_IUE = 1 + * MC01.PORT0.ECC64.SCOM.MASK + * [13] FIR_MAINLINE_AUE = 0 // checkstop (0,0,0) + * [14] FIR_MAINLINE_UE = 0 // *recoverable_error (0,1,0) + * [15] FIR_MAINLINE_RCD = 1* // *masked (X,X,1) + * [16] FIR_MAINLINE_IAUE = 0 // checkstop (0,0,0) + * [17] FIR_MAINLINE_IUE = 0 // recoverable_error (0,1,0) + * [37] MCA_FIR_MAINTENANCE_IUE = 0 // recoverable_error (0,1,0) + */ + mca_and_or(chip, id, mca_i, ECC_FIR_ACTION0, + ~(PPC_BITMASK(13, 17) | PPC_BIT(MCA_FIR_MAINTENANCE_IUE)), + 0); + mca_and_or(chip, id, mca_i, ECC_FIR_ACTION1, + ~(PPC_BITMASK(13, 17) | PPC_BIT(ECC_FIR_MAINTENANCE_AUE) | + PPC_BITMASK(36, 37)), + (is_dd20 ? 0 : PPC_BIT(FIR_MAINLINE_UE)) | + PPC_BIT(FIR_MAINLINE_IUE) | PPC_BIT(MCA_FIR_MAINTENANCE_IUE)); + mca_and_or(chip, id, mca_i, ECC_FIR_MASK, + ~(PPC_BITMASK(13, 17) | + PPC_BIT(MCA_FIR_MAINTENANCE_IUE)), + (is_dd20 ? 0 : PPC_BIT(FIR_MAINLINE_RCD))); + + /* + * WARNING: checkstop is encoded differently (1,0,0). **Do not** try to + * make a function/macro that pretends to be universal. + * + * MC01.PORT0.SRQ.MBACALFIR_ACTION0 + * [13] MBACALFIR_PORT_FAIL = 0* + * MC01.PORT0.SRQ.MBACALFIR_ACTION1 + * [13] MBACALFIR_PORT_FAIL = 1* + * MC01.PORT0.SRQ.MBACALFIR_MASK + * [13] MBACALFIR_PORT_FAIL = 0 // *recoverable_error (0,1,0) + */ + mca_and_or(chip, id, mca_i, MBACALFIR_ACTION0, + ~PPC_BIT(13), + (is_dd20 ? PPC_BIT(MBACALFIR_PORT_FAIL) : 0)); + mca_and_or(chip, id, mca_i, MBACALFIR_ACTION1, + ~PPC_BIT(MBACALFIR_PORT_FAIL), + (is_dd20 ? 0 : PPC_BIT(MBACALFIR_PORT_FAIL))); + mca_and_or(chip, id, mca_i, MBACALFIR_MASK, ~PPC_BIT(MBACALFIR_PORT_FAIL), 0); + + /* + * Enable port fail and RCD recovery + * TODO: check if we can set this together with RCD protection time. + * + * MC01.PORT0.SRQ.MBA_FARB0Q + * [54] MBA_FARB0Q_CFG_DISABLE_RCD_RECOVERY = 0 + * [57] MBA_FARB0Q_CFG_PORT_FAIL_DISABLE = 0 + */ + mca_and_or(chip, id, mca_i, MBA_FARB0Q, + ~(PPC_BIT(MBA_FARB0Q_CFG_DISABLE_RCD_RECOVERY) | + PPC_BIT(MBA_FARB0Q_CFG_PORT_FAIL_DISABLE)), 0); + } +} + +static void set_fifo_mode(uint8_t chip, int mcs_i, int fifo) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + int mca_i; + /* Make sure fifo is either 0 or 1, nothing else. */ + fifo = !!fifo; + + /* MC01.PORT0.SRQ.MBA_RRQ0Q + * [6] MBA_RRQ0Q_CFG_RRQ_FIFO_MODE = fifo + * MC01.PORT0.SRQ.MBA_WRQ0Q + * [5] MBA_WRQ0Q_CFG_WRQ_FIFO_MODE = fifo + */ + for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { + if (!mem_data[chip].mcs[mcs_i].mca[mca_i].functional) + continue; + + mca_and_or(chip, id, mca_i, MBA_RRQ0Q, ~PPC_BIT(MBA_RRQ0Q_CFG_RRQ_FIFO_MODE), + PPC_PLACE(fifo, MBA_RRQ0Q_CFG_RRQ_FIFO_MODE, + MBA_RRQ0Q_CFG_RRQ_FIFO_MODE_LEN)); + mca_and_or(chip, id, mca_i, MBA_WRQ0Q, ~PPC_BIT(MBA_WRQ0Q_CFG_WRQ_FIFO_MODE), + PPC_PLACE(fifo, MBA_WRQ0Q_CFG_WRQ_FIFO_MODE, + MBA_WRQ0Q_CFG_WRQ_FIFO_MODE_LEN)); + } +} + +static void load_maint_pattern(uint8_t chip, int mcs_i, const uint64_t pat[16]) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + /* + * Different than in Hostboot: + * - Hostboot writes data for second 64B line but doesn't use 128B mode so + * first 64B are repeated + * - Hostboot also manually sets the address for the second half even + * though it would be autoincremented to proper value + * - Hostboot writes 4 pairs of 64b chunks of data, we write 8 uint64_t's + */ + int mca_i; + + for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { + int i; + if (!mem_data[chip].mcs[mcs_i].mca[mca_i].functional) + continue; + + /* MC01.PORT0.ECC64.SCOM.AACR + * [1-9] AACR_ADDRESS = 0b111110000 = 0x1F0 + * [10] AACR_AUTOINC = 1 + * [11] AACR_ECCGEN = 1 + */ + mca_write(chip, id, mca_i, AACR, + PPC_PLACE(0x1F0, AACR_ADDRESS, AACR_ADDRESS_LEN) | + PPC_BIT(AACR_AUTOINC) | + PPC_BIT(AACR_ECCGEN)); + + for (i = 0; i < 16; i++) { + /* MC01.PORT0.ECC64.SCOM.AADR - data */ + mca_write(chip, id, mca_i, AADR, pat[i]); + /* + * Although ECC is generated by hardware, we still have to write to + * this register to have address incremented. Comments say that + * the data also wouldn't be written to RMW buffer without it. + */ + /* MC01.PORT0.ECC64.SCOM.AAER - ECC */ + mca_write(chip, id, mca_i, AAER, 0); + } + } +} + +static const uint64_t patterns[][16] = { + {0}, + {0x596f75207265616c, 0x6c792073686f756c, 0x646e27742072656c, 0x79206f6e206d656d, + 0x6f7279206265696e, 0x67207a65726f6564, 0x206279206669726d, 0x776172652e2e2e00}, + {0x4e6576657220756e, 0x646572657374696d, 0x6174652074686520, 0x62616e6477696474, + 0x68206f6620612073, 0x746174696f6e2077, 0x61676f6e2066756c, 0x6c206f6620746170, + 0x657320687572746c, 0x696e6720646f776e, 0x2074686520686967, 0x687761792e202d20, + 0x416e647265772053, 0x2e2054616e656e62, 0x61756d0a00000000}, +}; + +/* + * Layout of start/end address registers: + * [0-2] unused by HW, in Hostboot: + * [0-1] port select + * [2] dimm select + * [3-4] mrank (0 to 1) + * [5-7] srank (0 to 2) + * [8-25] row (0 to 17) + * [26-32] col (3 to 9) + * [33-35] bank (0 to 2) + * [36-37] bank_group (0 to 1) + * + * In maintenance mode MCBIST automatically skips unused bits, they can safely + * be set to 0 for start and 1 for end addresses. + * + * Hostboot sets 3 ranges: + * - 0 to end of first DIMM (aka first DIMM) + * - 0 to end of address space (aka everything) + * - first address on first DIMM on last port to end of address space (aka last + * port) + * + * Assuming that the documentation is correct, when spare bits are not taken + * into account, all ranges result in [start of DIMM, end of DIMM] range. Maybe + * they are set only for debug purposes? + * + * Trying to use just one range instead. + */ +/* + * NOTE: Except for setting address ranges, Hostboot repeats all of this for + * every subtest, even though most of the registers don't change in between. + */ +static void init_mcbist(uint8_t chip, int mcs_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + uint64_t val; + int i; + + /* Fill address ranges */ + /* MC01.MCBIST.MBA_SCOMFIR.MCBSA0Q + * [0-37] MCBSA0Q_CFG_START_ADDR_0 + */ + write_scom_for_chiplet(chip, id, MCBSA0Q, 0); + /* MC01.MCBIST.MBA_SCOMFIR.MCBEA0Q + * [0-37] MCBSA0Q_CFG_END_ADDR_0 + */ + write_scom_for_chiplet(chip, id, MCBEA0Q, PPC_BITMASK(3, 37)); + + /* Hostboot stops MCBIST engine, die() if it is already started instead */ + /* TODO: check all bits (MCBIST was ever started) or just "in progress"? */ + /* MC01.MCBIST.MBA_SCOMFIR.MCB_CNTLSTATQ + * [0] MCB_CNTLSTATQ_MCB_IP + * [1] MCB_CNTLSTATQ_MCB_DONE + * [2] MCB_CNTLSTATQ_MCB_FAIL + */ + if ((val = read_scom_for_chiplet(chip, id, MCB_CNTLSTATQ)) != 0) + die("MCBIST started already (%#16.16llx), this shouldn't happen\n", val); + + /* + * Clear MCBIST errors: + * - MCBIST Error Status Register - MC01.MCBIST.MBA_SCOMFIR.MCBSTATQ + * - MBS Memory Scrub/Read Error Count Register 0 - MC01.MCBIST.MBA_SCOMFIR.MBSEC0Q + * - MBS Memory Scrub/Read Error Count Register 1 - MC01.MCBIST.MBA_SCOMFIR.MBSEC1Q + * - MCBIST Fault Isolation Register - MC01.MCBIST.MBA_SCOMFIR.MCBISTFIRQ + */ + write_scom_for_chiplet(chip, id, MCBSTATQ, 0); + write_scom_for_chiplet(chip, id, MBSEC0Q, 0); + write_scom_for_chiplet(chip, id, MBSEC1Q, 0); + write_scom_for_chiplet(chip, id, MCBISTFIR, 0); + + /* Enable FIFO mode */ + set_fifo_mode(chip, mcs_i, 1); + + /* + * Hostboot clears address maps, but they are not used in maintenance + * address mode. Also, it sets MCBAGRAQ_CFG_MAINT_DETECT_SRANK_BOUNDARIES + * for scrub commands, but not for patterns. I have no idea what possible + * implications this has, but without 3DS DIMMs I have no way of testing it. + * For now I'll set this bit even for patterns so MCBAGRAQ register can be + * written only once instead of each subtest. + */ + /* MC01.MCBIST.MBA_SCOMFIR.MCBAGRAQ + * [all] 0 + * [10] MCBAGRAQ_CFG_MAINT_ADDR_MODE_EN = 1 + * [12] MCBAGRAQ_CFG_MAINT_DETECT_SRANK_BOUNDARIES = 1 + */ + write_scom_for_chiplet(chip, id, MCBAGRAQ, + PPC_BIT(MCBAGRAQ_CFG_MAINT_ADDR_MODE_EN) | + PPC_BIT(MCBAGRAQ_CFG_MAINT_DETECT_SRANK_BOUNDARIES)); + + /* + * Configure MCBIST + * + * Enabling MCBCFGQ_CFG_MCB_LEN64 speeds up operations on x4 devices (~70ms + * per pass on 16GB DIMM), but slows down x8 (~90ms per pass on 8GB DIMM). + * As the difference for x8 is bigger than x4, keep it disabled. + * + * MCBCFGQ_CFG_PAUSE_ON_ERROR_MODE = 0b10 sets MCBIST to pause on error + * after current rank finishes. This is set for scrub only, but as we don't + * expect to see any errors, it should be OK to set it for pattern writing + * as well. + * + * MCBCFGQ_CFG_ENABLE_HOST_ATTN is set in Hostboot, but we don't have + * interrupt handlers so keep it disabled. + */ + /* MC01.MCBIST.MBA_SCOMFIR.MCBCFGQ + * [all] 0 + * [56] MCBCFGQ_CFG_MCB_LEN64 = see above + * [57-58] MCBCFGQ_CFG_PAUSE_ON_ERROR_MODE = 0 for patterns, 0b10 for scrub + * [63] MCBCFGQ_CFG_ENABLE_HOST_ATTN = see above + */ + write_scom_for_chiplet(chip, id, MCBCFGQ, + PPC_PLACE(0x2, MCBCFGQ_CFG_PAUSE_ON_ERROR_MODE, + MCBCFGQ_CFG_PAUSE_ON_ERROR_MODE_LEN)); + + /* + * This sets up memory parameters, mostly gaps between commands. For as fast + * as possible, gaps of 0 are configured here. + */ + /* MC01.MCBIST.MBA_SCOMFIR.MCBPARMQ */ + write_scom_for_chiplet(chip, id, MCBPARMQ, 0); + + /* + * Steps done from this point should be moved out of this function, they + * should be done with different patterns before each subtest. Right now + * only a pattern of all zeroes is used. + */ + + /* Data pattern: 8 data registers + 1 ECC register */ + /* TODO: different patterns can be used */ + for (i = 0; i < 9; i++) { + write_scom_for_chiplet(chip, id, MCBFD0Q + i, patterns[0][i]); + } + + /* TODO: random seeds */ + + /* + * Maintenance data pattern + * + * Difference between this and data pattern above is that this is used for + * ALTER and the one above for WRITE. ALTER can write 128 different bytes, + * while WRITE repeats a sequence of 64B twice. ALTER is ~3-4 times slower. + */ + load_maint_pattern(chip, mcs_i, patterns[0]); + + /* + * Load the data rotate config and seeds + * + * Patterns (fixed) used by Hostboot are self-repeating and either all ones, + * all zeroes or alternating bits (0x55/0xAA). Only in the last case + * rotating data seeds can make a difference, but it is the same as + * inverting. + */ + /* MC01.MCBIST.MBA_SCOMFIR.MCBDRCRQ */ + write_scom_for_chiplet(chip, id, MCBDRCRQ, 0); + /* MC01.MCBIST.MBA_SCOMFIR.MCBDRSRQ */ + write_scom_for_chiplet(chip, id, MCBDRSRQ, 0); + + /* + * The following step may be done just once, as long as the same set of + * options work for both pattern writing and scrubbing, which so far seems + * to be the case. + */ + + /* + * Load MCBIST threshold register + * + * This one has slightly different settings for patterns than for scrub, but + * some of those that are explicitly set for scrubbing are always implicitly + * enabled for nonscrub. The only meaningful difference is that some + * uncorrectable errors pause MCBIST on scrub, but not on pattern writes. + * Lets set them to pause even for pattern writes here and hope for the + * best. + */ + /* MC01.MCBIST.MBA_SCOMFIR.MBSTRQ + * [0-31] those are thresholds for different errors, all of them are set to + * all 1's, meaning that pausing on threshold is disabled + * [34] MBSTRQ_CFG_PAUSE_ON_MPE = 1 for scrub, else 0 (Mark Placed Error) + * [35] MBSTRQ_CFG_PAUSE_ON_UE = 1 for scrub, else 0 (Uncorrectable Error) + * [37] MBSTRQ_CFG_PAUSE_ON_AUE = 1 for scrub, else 0 (Array UE) + * [55] MBSTRQ_CFG_NCE_SOFT_SYMBOL_COUNT_ENABLE \ 1 for scrub, nonscrub + * [56] MBSTRQ_CFG_NCE_INTER_SYMBOL_COUNT_ENABLE } counts all NCE + * [57] MBSTRQ_CFG_NCE_HARD_SYMBOL_COUNT_ENABLE / + */ + write_scom_for_chiplet(chip, id, MBSTRQ, PPC_BITMASK(0, 31) | + PPC_BIT(MBSTRQ_CFG_PAUSE_ON_MPE) | + PPC_BIT(MBSTRQ_CFG_PAUSE_ON_UE) | + PPC_BIT(MBSTRQ_CFG_PAUSE_ON_AUE) | + PPC_BIT(MBSTRQ_CFG_NCE_SOFT_SYMBOL_COUNT_ENABLE) | + PPC_BIT(MBSTRQ_CFG_NCE_INTER_SYMBOL_COUNT_ENABLE) | + PPC_BIT(MBSTRQ_CFG_NCE_HARD_SYMBOL_COUNT_ENABLE)); +} + +static void mss_memdiag(uint8_t chips) +{ + uint8_t chip; + int mcs_i, mca_i; + + for (chip = 0; chip < MAX_CHIPS; chip++) { + for (mcs_i = 0; mcs_i < MCS_PER_PROC; mcs_i++) { + if (!mem_data[chip].mcs[mcs_i].functional) + continue; + + /* + * FIXME: add testing for chipkill + * + * Testing touches bad DQ registers. This step also configures MC to + * deal with bad nibbles/DQs - see can_recover() in 13.11. It repeats, + * to some extent, training done in 13.12 which is TODO. Following the + * assumptions made in previous isteps, skip this for now. + */ + init_mcbist(chip, mcs_i); + + /* + * Add subtests. + * + * At the very minimum one pattern write is required, otherwise RAM will + * have random data, which most likely will throw unrecoverable errors + * because ECC is also random. + * + * Scrubbing may throw errors when address mapping is wrong even when + * maintenance pattern write can succeed for the same configuration. + */ + for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; + int dimm; + if (!mca->functional) + continue; + + for (dimm = 0; dimm < DIMMS_PER_MCA; dimm++) { + if (!mca->dimm[dimm].present) + continue; + + add_fixed_pattern_write(chip, mcs_i, mca_i*2 + dimm); + /* + * Hostboot uses separate program for scrub due to + * different pausing conditions. Having it in the same + * program seems to be working. + */ + if (!CONFIG(SKIP_INITIAL_ECC_SCRUB)) + add_scrub(chip, mcs_i, mca_i*2 + dimm); + } + } + + /* + * TODO: it writes whole RAM, this will take loooooong time. We can + * easily start second MCBIST while this is running. This would get more + * complicated for more patterns, but it still should be doable without + * interrupts reporting completion. + * + * Also, under right circumstances*, it should be possible to use + * broadcast mode for writing to all DIMMs simultaneously. + * + * *) Proper circumstances are: + * - every port has the same number of DIMMs (or no DIMMs at all) + * - every DIMM has the same: + * - rank configuration + * - number of row and column bits + * - width (and density, but this is implied by previous + * requirements) + * - module family (but we don't support anything but RDIMM anyway) + */ + mcbist_execute(chip, mcs_i); + } + } + + for (chip = 0; chip < MAX_CHIPS; chip++) { + for (mcs_i = 0; mcs_i < MCS_PER_PROC; mcs_i++) { + if (!mem_data[chip].mcs[mcs_i].functional) + continue; + + /* + * When there is no other activity on the bus, this should take roughly + * (total RAM size under MCS / transfer rate) * number of subtests. + * + * Not measuring time it takes individual MCBISTs to complete as they + * all work in parallel. + */ + long time = wait_us(1000*1000*60, + (udelay(1), mcbist_is_done(chip, mcs_i))); + + /* TODO: dump error/status registers on failure */ + if (!time) { + die("MCBIST%d of chip %d times out (%#16.16llx)\n", mcs_i, chip, + read_scom_for_chiplet(chip, mcs_ids[mcs_i], + MCB_CNTLSTATQ)); + } + + /* Unmask mainline FIRs. */ + fir_unmask(chip, mcs_i); + + /* Turn off FIFO mode to improve performance. */ + set_fifo_mode(chip, mcs_i, 0); + } + } +} + +void istep_14_1(uint8_t chips) +{ + report_istep(14, 1); + mss_memdiag(chips); +} diff --git a/src/soc/ibm/power9/istep_14_2.c b/src/soc/ibm/power9/istep_14_2.c new file mode 100644 index 00000000000..c929a2b461d --- /dev/null +++ b/src/soc/ibm/power9/istep_14_2.c @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include +#include +#include + +#define MCS_MCMODE0 0x5010811 +#define MCS_MCSYNC 0x5010815 +#define MCA_MBA_FARB3Q 0x7010916 + +#define MCS_MCSYNC_SYNC_GO_CH0 16 +#define SUPER_SYNC_BIT 14 +#define MBA_REFRESH_SYNC_BIT 8 +#define MCS_MCMODE0_DISABLE_MC_SYNC 27 +#define MCS_MCMODE0_DISABLE_MC_PAIR_SYNC 28 + +static void thermal_init(uint8_t chip) +{ + for (size_t mcs_i = 0; mcs_i < MCS_PER_PROC; ++mcs_i) { + for (size_t mca_i = 0; mca_i < MCA_PER_MCS; ++mca_i) { + mca_and_or(chip, mcs_ids[mcs_i], mca_i, MCA_MBA_FARB3Q, + ~PPC_BITMASK(0, 45), + PPC_BIT(10) | PPC_BIT(25) | PPC_BIT(37)); + } + scom_and_for_chiplet(chip, mcs_to_nest[mcs_ids[mcs_i]], + MCS_MCMODE0 + 0x80 * mcs_i, + PPC_BIT(21)); + } +} + +static void prog_mc_mode0(uint8_t chip, chiplet_id_t nest_target, size_t index) +{ + uint64_t mask = PPC_BIT(MCS_MCMODE0_DISABLE_MC_SYNC) + | PPC_BIT(MCS_MCMODE0_DISABLE_MC_PAIR_SYNC); + uint64_t data = PPC_BIT(MCS_MCMODE0_DISABLE_MC_SYNC) + | PPC_BIT(MCS_MCMODE0_DISABLE_MC_PAIR_SYNC); + scom_and_or_for_chiplet(chip, nest_target, MCS_MCMODE0 + 0x80 * index, ~mask, + data & mask); +} + +static void throttle_sync(uint8_t chip) +{ + for (size_t mcs_i = 0; mcs_i < MCS_PER_PROC; ++mcs_i) + prog_mc_mode0(chip, mcs_to_nest[mcs_ids[mcs_i]], mcs_i); + scom_and_for_chiplet(chip, N3_CHIPLET_ID, MCS_MCSYNC, + ~PPC_BIT(MCS_MCSYNC_SYNC_GO_CH0)); + scom_and_or_for_chiplet(chip, N3_CHIPLET_ID, MCS_MCSYNC, ~PPC_BIT(SUPER_SYNC_BIT), + PPC_BITMASK(0, 16)); + scom_and_for_chiplet(chip, N3_CHIPLET_ID, MCS_MCSYNC, ~PPC_BIT(MBA_REFRESH_SYNC_BIT)); +} + +void istep_14_2(uint8_t chips) +{ + uint8_t chip; + + report_istep(14, 2); + + for (chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) { + thermal_init(chip); + throttle_sync(chip); + } + } +} diff --git a/src/soc/ibm/power9/istep_14_3.c b/src/soc/ibm/power9/istep_14_3.c new file mode 100644 index 00000000000..be3b1f5a200 --- /dev/null +++ b/src/soc/ibm/power9/istep_14_3.c @@ -0,0 +1,515 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include + +#include +#include +#include + +#include "pci.h" +#include "scratch.h" + +static uint64_t pec_addr(uint8_t pec, uint64_t addr) +{ + return addr + pec * 0x400; +} + +static void init_pecs(uint8_t chip, const uint8_t *iovalid_enable) +{ + enum { + P9N2_PEC_ADDREXTMASK_REG = 0x4010C05, + PEC_PBCQHWCFG_REG = 0x4010C00, + PEC_NESTTRC_REG = 0x4010C03, + PEC_PBAIBHWCFG_REG = 0xD010800, + + PEC_PBCQHWCFG_REG_PE_DISABLE_TCE_ARBITRATION = 60, + PEC_PBAIBHWCFG_REG_PE_PCIE_CLK_TRACE_EN = 30, + PEC_AIB_HWCFG_OSBM_HOL_BLK_CNT = 40, + PEC_AIB_HWCFG_OSBM_HOL_BLK_CNT_LEN = 3, + PEC_PBCQHWCFG_REG_PE_DISABLE_OOO_MODE = 0x16, + PEC_PBCQHWCFG_REG_PE_DISABLE_WR_SCOPE_GROUP = 42, + PEC_PBCQHWCFG_REG_PE_CHANNEL_STREAMING_EN = 33, + PEC_PBCQHWCFG_REG_PE_DISABLE_WR_VG = 41, + PEC_PBCQHWCFG_REG_PE_DISABLE_INTWR_VG = 43, + PEC_PBCQHWCFG_REG_PE_DISABLE_INTWR_SCOPE_GROUP = 44, + PEC_PBCQHWCFG_REG_PE_DISABLE_RD_SCOPE_GROUP = 51, + PEC_PBCQHWCFG_REG_PE_DISABLE_RD_VG = 54, + PEC_PBCQHWCFG_REG_PE_DISABLE_TCE_SCOPE_GROUP = 56, + PEC_PBCQHWCFG_REG_PE_DISABLE_TCE_VG = 59, + }; + + uint64_t scratch_reg6 = 0; + uint8_t pec = 0; + bool node_pump_mode = false; + uint8_t dd = get_dd(); + + scratch_reg6 = read_scom(chip, MBOX_SCRATCH_REG1 + 5); + + /* ATTR_PROC_FABRIC_PUMP_MODE, it's either node or group pump mode */ + node_pump_mode = !(scratch_reg6 & PPC_BIT(MBOX_SCRATCH_REG6_GROUP_PUMP_MODE)); + + for (pec = 0; pec < MAX_PEC_PER_PROC; ++pec) { + uint64_t val = 0; + + printk(BIOS_INFO, "Initializing PEC%d...\n", pec); + + /* + * ATTR_FABRIC_ADDR_EXTENSION_GROUP_ID = 0 + * ATTR_FABRIC_ADDR_EXTENSION_CHIP_ID = 0 + */ + scom_and_or_for_chiplet(chip, N2_CHIPLET_ID, + pec_addr(pec, P9N2_PEC_ADDREXTMASK_REG), + ~PPC_BITMASK(0, 6), + PPC_PLACE(0, 0, 7)); + + /* + * Phase2 init step 1 + * NestBase + 0x00 + * Set bits 00:03 = 0b0001 Set hang poll scale + * Set bits 04:07 = 0b0001 Set data scale + * Set bits 08:11 = 0b0001 Set hang pe scale + * Set bit 22 = 0b1 Disable out of order store behavior + * Set bit 33 = 0b1 Enable Channel Tag streaming behavior + * Set bits 34:35 = 0b11 Set P9 Style cache-inject behavior + * Set bits 46:48 = 0b011 Set P9 Style cache-inject rate, 1/16 cycles + * Set bit 60 = 0b1 only if PEC is bifurcated or trifurcated. + * + * if HW423589_option1, set Disable Group Scope (r/w) and Use Vg(sys) at Vg + * scope + */ + + val = read_scom_for_chiplet(chip, N2_CHIPLET_ID, + pec_addr(pec, PEC_PBCQHWCFG_REG)); + /* Set hang poll scale */ + val &= ~PPC_BITMASK(0, 3); + val |= PPC_PLACE(1, 0, 4); + /* Set data scale */ + val &= ~PPC_BITMASK(4, 7); + val |= PPC_PLACE(1, 4, 4); + /* Set hang pe scale */ + val &= ~PPC_BITMASK(8, 11); + val |= PPC_PLACE(1, 8, 4); + /* Disable out of order store behavior */ + val |= PPC_BIT(22); + /* Enable Channel Tag streaming behavior */ + val |= PPC_BIT(33); + + /* Set Disable Group Scope (r/w) and Use Vg(sys) at Vg scope */ + val |= PPC_BIT(PEC_PBCQHWCFG_REG_PE_DISABLE_WR_VG); + val |= PPC_BIT(PEC_PBCQHWCFG_REG_PE_DISABLE_WR_SCOPE_GROUP); + val |= PPC_BIT(PEC_PBCQHWCFG_REG_PE_DISABLE_INTWR_VG); + val |= PPC_BIT(PEC_PBCQHWCFG_REG_PE_DISABLE_INTWR_SCOPE_GROUP); + val |= PPC_BIT(PEC_PBCQHWCFG_REG_PE_DISABLE_RD_VG); + val |= PPC_BIT(PEC_PBCQHWCFG_REG_PE_DISABLE_RD_SCOPE_GROUP); + val |= PPC_BIT(PEC_PBCQHWCFG_REG_PE_DISABLE_TCE_SCOPE_GROUP); + val |= PPC_BIT(PEC_PBCQHWCFG_REG_PE_DISABLE_TCE_VG); + + /* Disable P9 Style cache injects if chip is node */ + if (!node_pump_mode) { + /* + * ATTR_PROC_PCIE_CACHE_INJ_MODE + * Attribute to control the cache inject mode. + * + * DISABLE_CI = 0x0 - Disable cache inject completely. + * (Reset value default.) + * P7_STYLE_CI = 0x1 - Use cache inject design from Power7. + * PCITLP_STYLE_CI = 0x2 - Use PCI TLP Hint bits in packet to perform + * the cache inject. + * P9_STYLE_CI = 0x3 - Initial attempt as cache inject. Power9 + * style. (Attribute default.) + * + * Different cache inject modes will affect DMA write performance. The + * attribute default was selected based on various workloads and was to + * be the most optimal settings for Power9. + * + * fapi2::ATTR_PROC_PCIE_CACHE_INJ_MODE = 3 by default + */ + val &= ~PPC_BITMASK(34, 35); + val |= PPC_PLACE(0x3, 34, 2); + + if (dd == 0x21 || dd == 0x22 || dd == 0x23) { + /* + * ATTR_PROC_PCIE_CACHE_INJ_THROTTLE + * Attribute to control the cache inject throttling when cache + * inject is enable. + * + * DISABLE = 0x0 - Disable cache inject throttling. + * (Reset value default.) + * 16_CYCLES = 0x1 - Perform 1 cache inject every 16 clock + * cycles. + * 32_CYCLES = 0x3 - Perform 1 cache inject every 32 clock + * cycles. (Attribute default.) + * 64_CYCLES = 0x7 - Perform 1 cache inject every 32 clock + * cycles. + * + * Different throttle rates will affect DMA write performance. + * The attribute default settings were optimal settings found + * across various workloads. + */ + val &= ~PPC_BITMASK(46, 48); + val |= PPC_PLACE(0x3, 46, 3); + } + } + + if (pec == 1 || (pec == 2 && iovalid_enable[pec] != 0x4)) + val |= PPC_BIT(PEC_PBCQHWCFG_REG_PE_DISABLE_TCE_ARBITRATION); + + write_scom_for_chiplet(chip, N2_CHIPLET_ID, pec_addr(pec, PEC_PBCQHWCFG_REG), + val); + + /* + * Phase2 init step 2 + * NestBase + 0x01 + * N/A Modify Drop Priority Control Register (DrPriCtl) + */ + + /* + * Phase2 init step 3 + * NestBase + 0x03 + * Set bits 00:03 = 0b1001 Enable trace, and select + * inbound operations with addr information + */ + scom_and_or_for_chiplet(chip, N2_CHIPLET_ID, pec_addr(pec, PEC_NESTTRC_REG), + ~PPC_BITMASK(0, 3), + PPC_PLACE(9, 0, 4)); + + /* + * Phase2 init step 4 + * NestBase + 0x05 + * N/A For use of atomics/asb_notify + */ + + /* + * Phase2 init step 5 + * NestBase + 0x06 + * N/A To override scope prediction + */ + + /* + * Phase2 init step 6 + * PCIBase +0x00 + * Set bits 30 = 0b1 Enable Trace + */ + val = 0; + val |= PPC_BIT(PEC_PBAIBHWCFG_REG_PE_PCIE_CLK_TRACE_EN); + val |= PPC_PLACE(7, PEC_AIB_HWCFG_OSBM_HOL_BLK_CNT, + PEC_AIB_HWCFG_OSBM_HOL_BLK_CNT_LEN); + write_scom_for_chiplet(chip, PCI0_CHIPLET_ID + pec, PEC_PBAIBHWCFG_REG, val); + } +} + +/* See src/import/chips/p9/common/scominfo/p9_scominfo.C in Hostboot */ +static void phb_write(uint8_t chip, uint8_t phb, uint64_t addr, uint64_t data) +{ + chiplet_id_t chiplet; + uint8_t sat_id = (addr >> 6) & 0xF; + + if (phb == 0) { + chiplet = PCI0_CHIPLET_ID; + sat_id = (sat_id < 4 ? 1 : 4); + } else { + chiplet = PCI0_CHIPLET_ID + (phb / 3) + 1; + sat_id = (sat_id < 4 ? 1 : 4) + + ((phb % 2) ? 0 : 1) + + (2 * (phb / 5)); + } + + addr &= ~PPC_BITMASK(54, 57); + addr |= PPC_PLACE(sat_id, 54, 4); + + write_scom_for_chiplet(chip, chiplet, addr, data); +} + +/* See src/import/chips/p9/common/scominfo/p9_scominfo.C in Hostboot */ +static void phb_nest_write(uint8_t chip, uint8_t phb, uint64_t addr, uint64_t data) +{ + enum { N2_PCIS0_0_RING_ID = 0x3 }; + + uint8_t ring; + uint8_t sat_id = (addr >> 6) & 0xF; + + if (phb == 0) { + ring = (N2_PCIS0_0_RING_ID & 0xF); + sat_id = (sat_id < 4 ? 1 : 4); + } else { + ring = ((N2_PCIS0_0_RING_ID + (phb / 3) + 1) & 0xF); + sat_id = (sat_id < 4 ? 1 : 4) + + (phb % 2 ? 0 : 1) + + (2 * (phb / 5)); + } + + addr &= ~PPC_BITMASK(50, 53); + addr |= PPC_PLACE(ring, 50, 4); + + addr &= ~PPC_BITMASK(54, 57); + addr |= PPC_PLACE(sat_id, 54, 4); + + write_scom_for_chiplet(chip, N2_CHIPLET_ID, addr, data); +} + +static void init_phbs(uint8_t chip, uint8_t phb_active_mask, const uint8_t *iovalid_enable) +{ + enum { + PHB_CERR_RPT0_REG = 0x4010C4A, + PHB_CERR_RPT1_REG = 0x4010C4B, + PHB_NFIR_REG = 0x4010C40, + PHB_NFIRWOF_REG = 0x4010C48, + + PHB_NFIRACTION0_REG = 0x4010C46, + PCI_NFIR_ACTION0_REG = 0x5B0F81E000000000, + + PHB_NFIRACTION1_REG = 0x4010C47, + PCI_NFIR_ACTION1_REG = 0x7F0F81E000000000, + + PHB_NFIRMASK_REG = 0x4010C43, + PCI_NFIR_MASK_REG = 0x30001C00000000, + + PHB_PE_DFREEZE_REG = 0x4010C55, + PHB_PBAIB_CERR_RPT_REG = 0xD01084B, + PHB_PFIR_REG = 0xD010840, + PHB_PFIRWOF_REG = 0xD010848, + + PHB_PFIRACTION0_REG = 0xD010846, + PCI_PFIR_ACTION0_REG = 0xB000000000000000, + + PHB_PFIRACTION1_REG = 0xD010847, + PCI_PFIR_ACTION1_REG = 0xB000000000000000, + + PHB_PFIRMASK_REG = 0xD010843, + PCI_PFIR_MASK_REG = 0xE00000000000000, + + P9_PCIE_CONFIG_BAR_SHIFT = 8, + + PHB_MMIOBAR0_REG = 0x4010C4E, + PHB_MMIOBAR0_MASK_REG = 0x4010C4F, + PHB_MMIOBAR1_REG = 0x4010C50, + PHB_MMIOBAR1_MASK_REG = 0x04010C51, + PHB_PHBBAR_REG = 0x4010C52, + PHB_BARE_REG = 0x4010C54, + + PHB_PHBRESET_REG = 0xD01084A, + PHB_ACT0_REG = 0xD01090E, + PHB_ACTION1_REG = 0xD01090F, + PHB_MASK_REG = 0xD01090B, + }; + + /* ATTR_PROC_PCIE_MMIO_BAR0_BASE_ADDR_OFFSET */ + const uint64_t mmio_bar0_offsets[MAX_PHB_PER_PROC] = { 0 }; + /* ATTR_PROC_PCIE_MMIO_BAR1_BASE_ADDR_OFFSET */ + const uint64_t mmio_bar1_offsets[MAX_PHB_PER_PROC] = { 0 }; + /* ATTR_PROC_PCIE_REGISTER_BAR_BASE_ADDR_OFFSET */ + const uint64_t register_bar_offsets[MAX_PHB_PER_PROC] = { 0 }; + /* ATTR_PROC_PCIE_BAR_SIZE */ + const uint64_t bar_sizes[3] = { 0 }; + + /* Base address of chip MMIO range */ + const uint64_t base_addr_mmio = PROC_BASE_ADDR(chip, /*msel=*/0x3); + + uint8_t phb = 0; + for (phb = 0; phb < MAX_PHB_PER_PROC; ++phb) { + /* BAR enable attribute (ATTR_PROC_PCIE_BAR_ENABLE) */ + const uint8_t bar_enables[3] = { 0 }; + + uint64_t val = 0; + uint64_t mmio0_bar = base_addr_mmio; + uint64_t mmio1_bar = base_addr_mmio; + uint64_t register_bar = base_addr_mmio; + + if (!(phb_active_mask & (PHB0_MASK >> phb))) + continue; + + printk(BIOS_INFO, "Initializing PHB%d...\n", phb); + + /* + * Phase2 init step 12_a (yes, out of order) + * NestBase + StackBase + 0xA + * 0xFFFFFFFF_FFFFFFFF + * Clear any spurious cerr_rpt0 bits (cerr_rpt0) + */ + phb_nest_write(chip, phb, PHB_CERR_RPT0_REG, PPC_BITMASK(0, 63)); + + /* + * Phase2 init step 12_b (yes, out of order) + * NestBase + StackBase + 0xB + * 0xFFFFFFFF_FFFFFFFF + * Clear any spurious cerr_rpt1 bits (cerr_rpt1) + */ + phb_nest_write(chip, phb, PHB_CERR_RPT1_REG, PPC_BITMASK(0, 63)); + + /* + * Phase2 init step 7_c + * NestBase + StackBase + 0x0 + * 0x00000000_00000000 + * Clear any spurious FIR + * bits (NFIR)NFIR + */ + phb_nest_write(chip, phb, PHB_NFIR_REG, 0); + + /* + * Phase2 init step 8 + * NestBase + StackBase + 0x8 + * 0x00000000_00000000 + * Clear any spurious WOF bits (NFIRWOF) + */ + phb_nest_write(chip, phb, PHB_NFIRWOF_REG, 0); + + /* + * Phase2 init step 9 + * NestBase + StackBase + 0x6 + * Set the per FIR Bit Action 0 register + */ + phb_nest_write(chip, phb, PHB_NFIRACTION0_REG, PCI_NFIR_ACTION0_REG); + + /* + * Phase2 init step 10 + * NestBase + StackBase + 0x7 + * Set the per FIR Bit Action 1 register + */ + phb_nest_write(chip, phb, PHB_NFIRACTION1_REG, PCI_NFIR_ACTION1_REG); + + /* + * Phase2 init step 11 + * NestBase + StackBase + 0x3 + * Set FIR Mask Bits to allow errors (NFIRMask) + */ + phb_nest_write(chip, phb, PHB_NFIRMASK_REG, PCI_NFIR_MASK_REG); + + /* + * Phase2 init step 12 + * NestBase + StackBase + 0x15 + * 0x00000000_00000000 + * Set Data Freeze Type Register for SUE handling (DFREEZE) + */ + phb_nest_write(chip, phb, PHB_PE_DFREEZE_REG, 0); + + /* + * Phase2 init step 13_a + * PCIBase + StackBase + 0xB + * 0x00000000_00000000 + * Clear any spurious pbaib_cerr_rpt bits + */ + phb_write(chip, phb, PHB_PBAIB_CERR_RPT_REG, 0); + + /* + * Phase2 init step 13_b + * PCIBase + StackBase + 0x0 + * 0x00000000_00000000 + * Clear any spurious FIR + * bits (PFIR)PFIR + */ + phb_write(chip, phb, PHB_PFIR_REG, 0); + + /* + * Phase2 init step 14 + * PCIBase + StackBase + 0x8 + * 0x00000000_00000000 + * Clear any spurious WOF bits (PFIRWOF) + */ + phb_write(chip, phb, PHB_PFIRWOF_REG, 0); + + /* + * Phase2 init step 15 + * PCIBase + StackBase + 0x6 + * Set the per FIR Bit Action 0 register + */ + phb_write(chip, phb, PHB_PFIRACTION0_REG, PCI_PFIR_ACTION0_REG); + + /* + * Phase2 init step 16 + * PCIBase + StackBase + 0x7 + * Set the per FIR Bit Action 1 register + */ + phb_write(chip, phb, PHB_PFIRACTION1_REG, PCI_PFIR_ACTION1_REG); + + /* + * Phase2 init step 17 + * PCIBase + StackBase + 0x3 + * Set FIR Mask Bits to allow errors (PFIRMask) + */ + phb_write(chip, phb, PHB_PFIRMASK_REG, PCI_PFIR_MASK_REG); + + /* + * Phase2 init step 18 + * NestBase + StackBase + 0xE + * Set MMIO Base Address Register 0 (MMIOBAR0) + */ + mmio0_bar += mmio_bar0_offsets[phb]; + mmio0_bar <<= P9_PCIE_CONFIG_BAR_SHIFT; + phb_nest_write(chip, phb, PHB_MMIOBAR0_REG, mmio0_bar); + + /* + * Phase2 init step 19 + * NestBase + StackBase + 0xF + * Set MMIO BASE Address Register Mask 0 (MMIOBAR0_MASK) + */ + phb_nest_write(chip, phb, PHB_MMIOBAR0_MASK_REG, bar_sizes[0]); + + /* + * Phase2 init step 20 + * NestBase + StackBase + 0x10 + * Set MMIO Base + * Address Register 1 (MMIOBAR1) + */ + mmio1_bar += mmio_bar1_offsets[phb]; + mmio1_bar <<= P9_PCIE_CONFIG_BAR_SHIFT; + phb_nest_write(chip, phb, PHB_MMIOBAR1_REG, mmio1_bar); + + /* + * Phase2 init step 21 + * NestBase + StackBase + 0x11 + * Set MMIO Base Address Register Mask 1 (MMIOBAR1_MASK) + */ + phb_nest_write(chip, phb, PHB_MMIOBAR1_MASK_REG, bar_sizes[1]); + + /* + * Phase2 init step 22 + * NestBase + StackBase + 0x12 + * Set PHB Register Base address Register (PHBBAR) + */ + register_bar += register_bar_offsets[phb]; + register_bar <<= P9_PCIE_CONFIG_BAR_SHIFT; + phb_nest_write(chip, phb, PHB_PHBBAR_REG, register_bar); + + /* + * Phase2 init step 23 + * NestBase + StackBase + 0x14 + * Set Base address Enable Register (BARE) + */ + + val = 0; + + if (bar_enables[0]) + val |= PPC_BIT(0); // PHB_BARE_REG_PE_MMIO_BAR0_EN, bit 0 for BAR0 + if (bar_enables[1]) + val |= PPC_BIT(1); // PHB_BARE_REG_PE_MMIO_BAR1_EN, bit 1 for BAR1 + if (bar_enables[2]) + val |= PPC_BIT(2); // PHB_BARE_REG_PE_PHB_BAR_EN, bit 2 for PHB + + phb_nest_write(chip, phb, PHB_BARE_REG, val); + + /* + * Phase2 init step 24 + * PCIBase + StackBase +0x0A + * 0x00000000_00000000 + * Remove ETU/AIB bus from reset (PHBReset) + */ + phb_write(chip, phb, PHB_PHBRESET_REG, 0); + /* Configure ETU FIR (all masked) */ + phb_write(chip, phb, PHB_ACT0_REG, 0); + phb_write(chip, phb, PHB_ACTION1_REG, 0); + phb_write(chip, phb, PHB_MASK_REG, PPC_BITMASK(0, 63)); + } +} + +void istep_14_3(uint8_t chips, const struct pci_info *pci_info) +{ + report_istep(14, 3); + + for (uint8_t chip = 0; chip < MAX_CHIPS; chip++) { + if (!(chips & (1 << chip))) + continue; + + init_pecs(chip, pci_info[chip].iovalid_enable); + init_phbs(chip, pci_info[chip].phb_active_mask, pci_info[chip].iovalid_enable); + } +} diff --git a/src/soc/ibm/power9/istep_14_5.c b/src/soc/ibm/power9/istep_14_5.c new file mode 100644 index 00000000000..805e3fa5d83 --- /dev/null +++ b/src/soc/ibm/power9/istep_14_5.c @@ -0,0 +1,350 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include +#include + +#include "istep_13_scom.h" + +/* + * 14.5 proc_setup_bars: Setup Memory BARs + * + * a) p9_mss_setup_bars.C (proc chip) -- Nimbus + * b) p9c_mss_setup_bars.C (proc chip) -- Cumulus + * - Same HWP interface for both Nimbus and Cumulus, input target is + * TARGET_TYPE_PROC_CHIP; HWP is to figure out if target is a Nimbus (MCS) + * or Cumulus (MI) internally. + * - Prior to setting the memory bars on each processor chip, this procedure + * needs to set the centaur security protection bit + * - TCM_CHIP_PROTECTION_EN_DC is SCOM Addr 0x03030000 + * - TCN_CHIP_PROTECTION_EN_DC is SCOM Addr 0x02030000 + * - Both must be set to protect Nest and Mem domains + * - Based on system memory map + * - Each MCS has its mirroring and non mirrored BARs + * - Set the correct checkerboard configs. Note that chip flushes to + * checkerboard + * - need to disable memory bar on slave otherwise base flush values will + * ack all memory accesses + * c) p9_setup_bars.C + * - Sets up Powerbus/MCD, L3 BARs on running core + * - Other cores are setup via winkle images + * - Setup dSMP and PCIe Bars + * - Setup PCIe outbound BARS (doing stores/loads from host core) + * - Addresses that PCIE responds to on powerbus (PCI init 1-7) + * - Informing PCIe of the memory map (inbound) + * - PCI Init 8-15 + * - Set up Powerbus Epsilon settings + * - Code is still running out of L3 cache + * - Use this procedure to setup runtime epsilon values + * - Must be done before memory is viable + */ + +/* + * Reset memory controller configuration written by SBE. + * Close the MCS acker before enabling the real memory bars. + * + * Some undocumented registers, again. The registers use a stride I haven't seen + * before (0x80), not sure if those are MCSs (including those not present on P9), + * magic MCAs or something totally different. Hostboot writes to all possible + * registers, regardless of how many ports/slots are populated. + * + * All register and field names come from code and comments only, except for the + * first one. + */ +static void revert_mc_hb_dcbz_config(uint8_t chip) +{ + int mcs_i, i; + uint64_t val; + const uint64_t mul = 0x80; + + for (mcs_i = 0; mcs_i < MCS_PER_PROC; mcs_i++) { + chiplet_id_t nest = mcs_to_nest[mcs_ids[mcs_i]]; + + /* + * Bit for MCS2/3 is documented, but for MCS0/1 it is "unused". Use what + * Hostboot uses - bit 10 for MCS0/1 and bit 9 for MCS2/3. + */ + /* TP.TCNx.Nx.CPLT_CTRL1, x = {1,3} */ + val = read_scom_for_chiplet(chip, nest, NEST_CPLT_CTRL1); + if ((mcs_i == 0 && val & PPC_BIT(10)) || + (mcs_i == 1 && val & PPC_BIT(9))) + continue; + + for (i = 0; i < 2; i++) { + /* MCFGP -- mark BAR invalid & reset grouping configuration fields + MCS_n_MCFGP // undocumented, 0x0501080A, 0x0501088A, 0x0301080A, 0x0301088A for MCS{0-3} + [0] VALID = 0 + [1-4] MC_CHANNELS_PER_GROUP = 0 + [5-7] CHANNEL_0_GROUP_MEMBER_IDENTIFICATION = 0 // CHANNEL_1_GROUP_MEMBER_IDENTIFICATION not cleared? + [13-23] GROUP_SIZE = 0 + */ + scom_and_or_for_chiplet(chip, nest, 0x0501080A + i * mul, + ~(PPC_BITMASK(0, 7) | PPC_BITMASK(13, 23)), + 0); + + /* MCMODE1 -- enable speculation, cmd bypass, fp command bypass + MCS_n_MCMODE1 // undocumented, 0x05010812, 0x05010892, 0x03010812, 0x03010892 + [32] DISABLE_ALL_SPEC_OPS = 0 + [33-51] DISABLE_SPEC_OP = 0x40 // bit 45 (called DCBF_BIT in code) set because of HW414958 + [54-60] DISABLE_COMMAND_BYPASS = 0 + [61] DISABLE_FP_COMMAND_BYPASS = 0 + */ + scom_and_or_for_chiplet(chip, nest, 0x05010812 + i * mul, + ~(PPC_BITMASK(32, 51) | PPC_BITMASK(54, 61)), + PPC_PLACE(0x40, 33, 19)); + + /* MCS_MCPERF1 -- enable fast path + MCS_n_MCPERF1 // undocumented, 0x05010810, 0x05010890, 0x03010810, 0x03010890 + [0] DISABLE_FASTPATH = 0 + */ + scom_and_or_for_chiplet(chip, nest, 0x05010810 + i * mul, + ~PPC_BIT(0), + 0); + + /* Re-mask MCFIR. We want to ensure all MCSs are masked until the + * BARs are opened later during IPL. + MCS_n_MCFIRMASK_OR // undocumented, 0x05010805, 0x05010885, 0x03010805, 0x03010885 + [all] 1 + */ + write_scom_for_chiplet(chip, nest, 0x05010805 + i * mul, ~0); + } + } +} + +/* + * TODO: right now, every port is a separate group. This is easier to code, but + * will impact performance due to no interleaving. + * + * Even though documentation (POWER9 Processor User's Manual) says that only the + * total amount of memory behind an MCU has to be the same, Hostboot doesn't + * group 1Rx4 with 2Rx8 (both have 16GB), at least if they are on the different + * sides of CPU. Case when they are on the same side was not tested yet. + * + * If that means MCAs from different sides cannot be grouped, groups bigger than + * 2 ports are not possible, at least for Talos. + * + * TODO2: note that this groups _ports_, not _DIMMs_. One implication is that + * total amount of memory doesn't have to be a power of 2 (different densities). + * Group sizes written to the register however are based on log2 of size. This + * means that either there will be a hole or some RAM won't be mapped. We do not + * have a way of testing it right now, all our DIMMs have 8Gb density. + */ +struct mc_group { + /* Multiple MCAs can be in one group, but not the other way around. */ + uint8_t port_mask; + /* Encoded, 4GB = 0, 8GB = 1, 16GB = 3, 32GB = 7 ... */ + uint8_t group_size; +}; + +/* Without proper documentation it's hard to tell if this is correct. */ +/* The following array is MCS_MCFGP for MCA0 and MCS_MCFGPM for MCA1: + * MCS_MCFGP // undocumented, 0x0501080A + * [all] 0 + * [0] VALID + * [1-4] MC_CHANNELS_PER_GROUP (*) + * [5-7] CHANNEL_0_GROUP_MEMBER_IDENTIFICATION (*) + * [8-10] CHANNEL_1_GROUP_MEMBER_IDENTIFICATION (*) + * [13-23] GROUP_SIZE + * [24-47] GROUP_BASE_ADDRESS + * + * MCS_MCFGPM // undocumented, 0x0501080C + * [all] 0 + * [0] VALID + * [13-23] GROUP_SIZE + * [24-47] GROUP_BASE_ADDRESS + * + * Fields marked with (*) are used only when there is more than 1 MCA in a group. + */ +static uint64_t mcfgp_regs[MCS_PER_PROC][MCA_PER_MCS]; + +/* Encodes size and keeps groups[] sorted. */ +static void add_group(struct mc_group groups[MCA_PER_PROC], int size, uint8_t mask) +{ + int i; + /* + * Size calculations are correct for size that is a power of 2. I have no + * idea what is the proper thing to do if it isn't. + */ + struct mc_group in = {mask, (size - 1) >> 2}; + + if (size & (size - 1)) + die("Size of group %#2.2x (%d GB) is not a power of 2\n", mask, size); + + for (i = 0; i < MCA_PER_PROC; i++) { + struct mc_group tmp = groups[i]; + + if (tmp.group_size < in.group_size) { + groups[i] = in; + /* Shift the rest of elements */ + in = tmp; + } + + /* Current element was empty */ + if (tmp.port_mask == 0) + break; + } + + if (in.port_mask != 0) + die("Tried to add more groups than possible\n"); +} + +/* TODO: make groups with > 1 MCA possible */ +static void fill_groups(uint8_t chip) +{ + int mcs_i, mca_i, i; + struct mc_group groups[MCA_PER_PROC] = {0}; + /* This is in 4GB units, as expected by registers. */ + uint32_t cur_ba = PROC_BASE_ADDR(chip, /*msel=*/0x0) >> 32; + + memset(mcfgp_regs, 0, sizeof(mcfgp_regs)); + + for (mcs_i = 0; mcs_i < MCS_PER_PROC; mcs_i++) { + if (!mem_data[chip].mcs[mcs_i].functional) + continue; + + for (mca_i = 0; mca_i < MCA_PER_MCS; mca_i++) { + mca_data_t *mca = &mem_data[chip].mcs[mcs_i].mca[mca_i]; + + if (!mca->functional) + continue; + + /* + * Use the same format as in Hostboot, in case there can be more + * than 2 MCAs per MCS. + * mask = (MCS0/MCA0, MCS0/MCA1, 0, 0, MCS1/MCA0, MCS1/MCA1, 0, 0) + */ + uint8_t mask = PPC_BIT(mcs_i * 4 + mca_i) >> 56; + /* Non-present DIMM will have a size of 0. */ + add_group(groups, mca->dimm[0].size_gb + mca->dimm[1].size_gb, mask); + } + } + + /* Now that all the groups are sorted by size, we can set base addresses. */ + for (i = 0; i < MCA_PER_PROC; i++) { + uint8_t mask = groups[i].port_mask; + if (mask == 0) + break; + + /* A reminder for whoever implements this in add_group() but not here. */ + if (mask & (mask - 1)) + die("Multiple MCs in a group are not supported yet\n"); + + /* + * Get MCS and MCA from mask, we expect bigger groups in the future. No + * else-ifs, bigger groups must set multiple registers (though that is + * not enough, there are also IDs to be set in MCS_MCFGP). + */ + if (mask & 0x80) { + /* MCS = 0, MCA = 0 */ + mcfgp_regs[0][0] = PPC_BIT(0) | PPC_PLACE(groups[i].group_size, 13, 11) | + PPC_PLACE(cur_ba, 24, 24); + } + if (mask & 0x40) { + /* MCS = 0, MCA = 1 */ + mcfgp_regs[0][1] = PPC_BIT(0) | PPC_PLACE(groups[i].group_size, 13, 11) | + PPC_PLACE(cur_ba, 24, 24); + } + if (mask & 0x08) { + /* MCS = 1, MCA = 0 */ + mcfgp_regs[1][0] = PPC_BIT(0) | PPC_PLACE(groups[i].group_size, 13, 11) | + PPC_PLACE(cur_ba, 24, 24); + } + if (mask & 0x04) { + /* MCS = 1, MCA = 1 */ + mcfgp_regs[1][1] = PPC_BIT(0) | PPC_PLACE(groups[i].group_size, 13, 11) | + PPC_PLACE(cur_ba, 24, 24); + } + + cur_ba += groups[i].group_size + 1; + } + /* + * This would be a good place to check if we passed the start of PCIe MMIO + * range (2TB). In that case we probably should configure this memory hole + * somehow (MCFGPA?). + */ +} + +/* + * This function is different than all previous FIR unmasking. It doesn't touch + * Action0 register. It also doesn't modify Action1, it just writes the value + * discarding the old one. As these registers are not documented, I can't even + * tell whether it sets checkstop, recoverable error or something else. + */ +static void fir_unmask(uint8_t chip, int mcs_i) +{ + chiplet_id_t nest = mcs_to_nest[mcs_ids[mcs_i]]; + /* Stride discovered by trial and error due to lack of documentation. */ + uint64_t mul = 0x80; + + /* MCS_MCFIRACT1 // undocumented, 0x05010807 + [all] 0 + [0] MC_INTERNAL_RECOVERABLE_ERROR = 1 + [8] COMMAND_LIST_TIMEOUT = 1 + */ + write_scom_for_chiplet(chip, nest, 0x05010807 + mcs_i * mul, + PPC_BIT(0) | PPC_BIT(8)); + + /* MCS_MCFIRMASK (AND) // undocumented, 0x05010804 + [all] 1 + [0] MC_INTERNAL_RECOVERABLE_ERROR = 0 + [1] MC_INTERNAL_NONRECOVERABLE_ERROR = 0 + [2] POWERBUS_PROTOCOL_ERROR = 0 + [4] MULTIPLE_BAR = 0 + [5] INVALID_ADDRESS = 0 + [8] COMMAND_LIST_TIMEOUT = 0 + */ + write_scom_for_chiplet(chip, nest, 0x05010804 + mcs_i * mul, + ~(PPC_BIT(0) | PPC_BIT(1) | PPC_BIT(2) | + PPC_BIT(4) | PPC_BIT(5) | PPC_BIT(8))); +} + +static void mcd_fir_mask(uint8_t chip) +{ + /* These are set always for N1 chiplet only. */ + write_scom_for_chiplet(chip, N1_CHIPLET_ID, MCD1_FIR_MASK_REG, ~0); + write_scom_for_chiplet(chip, N1_CHIPLET_ID, MCD0_FIR_MASK_REG, ~0); +} + +static void proc_setup_bars(uint8_t chip) +{ + int mcs_i; + + fill_groups(chip); + + for (mcs_i = 0; mcs_i < MCS_PER_PROC; mcs_i++) { + chiplet_id_t nest = mcs_to_nest[mcs_ids[mcs_i]]; + + if (!mem_data[chip].mcs[mcs_i].functional) + continue; + + fir_unmask(chip, mcs_i); + + /* + * More undocumented registers. First two are described before + * 'mcfgp_regs', last two are for setting up memory hole and SMF, they + * are unused now. + */ + write_scom_for_chiplet(chip, nest, 0x0501080A, mcfgp_regs[mcs_i][0]); + write_scom_for_chiplet(chip, nest, 0x0501080C, mcfgp_regs[mcs_i][1]); + write_scom_for_chiplet(chip, nest, 0x0501080B, 0); + write_scom_for_chiplet(chip, nest, 0x0501080D, 0); + } + + mcd_fir_mask(chip); +} + +void istep_14_5(uint8_t chips) +{ + uint8_t chip; + + report_istep(14, 5); + + /* Start MCS reset */ + revert_mc_hb_dcbz_config(/*chip=*/0); + + for (chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) + proc_setup_bars(chip); + } +} diff --git a/src/soc/ibm/power9/istep_18_11.c b/src/soc/ibm/power9/istep_18_11.c new file mode 100644 index 00000000000..98f392066cb --- /dev/null +++ b/src/soc/ibm/power9/istep_18_11.c @@ -0,0 +1,464 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include + +#include +#include +#include +#include + +#define MDMT_TOD_GRID_CYCLE_STAGING_DELAY 6 +#define TOD_GRID_PS 400 + +#define PERV_ROOT_CTRL8_TP_PLL_CLKIN_SEL9_DC 21 +#define PERV_TOD_M_PATH_CTRL_REG_STEP_CREATE_DUAL_EDGE_DISABLE 4 + +#define M_PATH_0_OSC_NOT_VALID 0 +#define M_PATH_1_OSC_NOT_VALID 1 +#define M_PATH_0_STEP_ALIGN_DISABLE 2 + +#define M_PATH_STEP_CHECK_CPS_DEVIATION_FACTOR_OFFSET 24 +#define M_PATH_STEP_CHECK_CPS_DEVIATION_FACTOR_LEN 2 +#define M_PATH_0_STEP_CHECK_CPS_DEVIATION_OFFSET 8 +#define M_PATH_0_STEP_CHECK_CPS_DEVIATION_LEN 4 +#define M_PATH_SYNC_CREATE_SPS_SELECT_OFFSET 5 +#define M_PATH_SYNC_CREATE_SPS_SELECT_LEN 3 +#define M_PATH_0_STEP_CHECK_VALIDITY_COUNT_OFFSET 13 +#define M_PATH_0_STEP_CHECK_VALIDITY_COUNT_LEN 3 + +#define BUS_DELAY_63 PPC_BITMASK(52, 63) +#define BUS_DELAY_47 PPC_BITMASK(36, 47) + +// Power Bus Electrical Round Trip Delay Control Register +// Trip Delay Control Register +// [0:5] WO_1P PB_ELINK_RT_DELAY_CTL_SET: +// Setting a bit to 1 (auto reset to 0) causes the matching link +// to attempt to do a round-trip delay calculation. +// Results end up in the PB_ELINK_DLY_*_REG regs. +#define PU_PB_ELINK_RT_DELAY_CTL_REG 0x05013419 +// Processor bus Electrical Link Delay 0123 Register +// [36:47] ROX Reserved. +// [52:63] ROX Reserved. +// Note: Documentations describes these bits as reserved however they are used +// to get bus_delay value. +#define PU_PB_ELINK_DLY_0123_REG 0x0501340E +// Root Control 8 Register +// [21] RW ROOT_CTRL8_21_SPARE_PLL_CONTROL: +#define PERV_ROOT_CTRL8_SCOM 0x00050018 +// Master/slave select: master path select: slave path select: step check setup +// [1] RWX PRI_M_S_TOD_SELECT: Primary configuration: master-slave TOD select. +// 0 = TOD is slave. +// 1 = TOD is master. +// [2] RWX PRI_M_S_DRAWER_SELECT: Primary configuration: master-slave drawer select. +// 0 = Drawer is slave. +// 1 = Drawer is master It is just used for TOD internal power gating. +// [9] RWX SEC_M_S_TOD_SELECT: Secondary configuration: master-slave TOD select. +// 0 = TOD is slave. +// 1 = TOD is master. +// [10] RWX SEC_M_S_DRAWER_SELECT: Secondary configuration: master-slave drawer select. +// 0 = Drawer is slave. +// 1 = Drawer is master. It is used for TOD internal power gating. +#define PERV_TOD_PSS_MSS_CTRL_REG 0x00040007 +// Control register 1 for the secondary configuration distribution port. +#define PERV_TOD_SEC_PORT_1_CTRL_REG 0x00040004 +// Control register 1 for the primary configuration distribution port. +#define PERV_TOD_PRI_PORT_1_CTRL_REG 0x00040002 +// TOD: Setup for Master Paths Control Register +// Used for oscillator validity, step alignment, sync pulse frequency, and step check. +// [0] RW M_PATH_0_OSC_NOT_VALID: Indicates whether the oscillator attached to master path-0 +// is not valid. +// 0 = Valid oscillator is attached to master path-0. +// 1 = No valid oscillator is attached to master path-0. +// [1] RW M_PATH_1_OSC_NOT_VALID: Indicates whether the oscillator attached to master path-1 +// is not valid. +// 0 = Valid oscillator is attached to master path-1. +// 1 = No valid oscillator is attached to master path-1. +// [2] RW M_PATH_0_STEP_ALIGN_DISABLE: Master path-0. Indicates alignment of master path-0 +// step to master path-1 step is active +// 0 = Alignment of master path-0 step to master path-1 step is active. +// 1 = Alignment of master path-0 step to master path-1 Step is not active. +// [5:7] RW M_PATH_SYNC_CREATE_SPS_SELECT: Master path: sync create: steps per sync (SPS) +// select: number of STEP pulses per SYNC pulse. +// [8:11] RW M_PATH_0_STEP_CHECK_CPS_DEVIATION: Master path-0: step check: CPS deviation. +// [13:15] RW M_PATH_0_STEP_CHECK_VALIDITY_COUNT: Master path-0 step check. Specifies the number +// of received steps before the step is declared as valid. +// [24:25] RW M_PATH_STEP_CHECK_CPS_DEVIATION_FACTOR: Master path-01: step check: CPS deviation +// factor. +#define PERV_TOD_M_PATH_CTRL_REG 0x00040000 +// TOD: Internal Path Control Register +// [8:11] RW I_PATH_STEP_CHECK_CPS_DEVIATION: Internal path: step check: CPS deviation. +// [13:15] RW I_PATH_STEP_CHECK_VALIDITY_COUNT: Internal path: step check: validity count. +// Defines the number of received steps before the step is declared as valid. +// [32:39] RWX I_PATH_CPS: Internal path: CPS +// In write mode, the value is used to load the CPS for the constant CPS for the step +// checker. In read mode the value shows the actual CPS in the internal path. +#define PERV_TOD_I_PATH_CTRL_REG 0x00040006 +// TOD: Secondary Configuration Distribution Port Control Register 0 +// [32:39] RW SEC_I_PATH_DELAY_VALUE: Internal path: secondary configuration: delay value. +#define PERV_TOD_SEC_PORT_0_CTRL_REG 0x00040003 +// TOD: Primary Configuration Distribution Port Control Register 0 +// Same bit purpouse as in PERV_TOD_SEC_PORT_0_CTRL_REG +#define PERV_TOD_PRI_PORT_0_CTRL_REG 0x00040001 +// TOD: Chip Control Register +// [10:15] LOW_ORDER_STEP_VALUE: Low-order step value needed +// for USE_TB_STEP_SYNC as the programmable +// cycle counter for creating a step. +#define PERV_TOD_CHIP_CTRL_REG 0x00040010 +// TOD: Slave Path Control Register +// [26:27] RW S_PATH_REMOTE_SYNC_CHECK_CPS_DEVIATION_FACTOR: +// Slave path-01: remote sync: sync-step check: CPS deviation factor. +// [28:31] RW S_PATH_REMOTE_SYNC_CHECK_CPS_DEVIATION: +// Slave path-01: remote sync: sync-step check: CPS deviation. +// [32:39] RW S_PATH_REMOTE_SYNC_MISS_COUNT_MAX: +// Slave path-01: remote sync: maximum of SYNC miss counts: 0 - 255 syncs. +#define PERV_TOD_S_PATH_CTRL_REG 0x00040005 + +/* + * 2 CPU topology + * + * Primary (CHIP0 is MDMT): + * + * CHIP0 --TX--> XBUS1 --> XBUS1 --RX--> CHIP1 + * + * Secondary (CHIP1 is MDMT): + * + * CHIP0 <--RX-- XBUS1 <-- XBUS1 <--TX-- CHIP1 + */ + +static uint32_t calculate_topology_delay(uint8_t chip, uint8_t chips, uint8_t mdmt) +{ + enum { XBUS_LINK_FACTOR = 8 }; + + uint32_t delay; + + /* + * In simple topology with one proc it is enough to assign node delay to 0. + * With multiple processors this will get more complicated, + * see calculate_node_link_delay() in Hostboot + */ + if (chips == 0x01) + return MDMT_TOD_GRID_CYCLE_STAGING_DELAY; + + /* The only non-trivial topology supported is a two-node one */ + if (chips != 0x03) + die("%s() supports only two-chip configuration\n", __func__); + + /* There are no nodes connected to non-MDMT chip, hence no delay */ + if (chip != mdmt) + return 0; + + /* + * Find the most-delayed path in the topology; this is the MDMT's delay. + * + * A more advanced topologies would require building a tree and finding + * the longest/slowest path from top node to one of the leaves, which in + * our special case equals to delay to the next chip. + */ + + /* For some reason this is a write, not RMW */ + write_scom(chip, PU_PB_ELINK_RT_DELAY_CTL_REG, PPC_BITMASK(2, 3)); + uint64_t bus_mode_reg = read_scom(chip, PU_PB_ELINK_DLY_0123_REG); + + uint32_t bus_delay = (((bus_mode_reg & BUS_DELAY_47) >> 16) + + (bus_mode_reg & BUS_DELAY_63)) / 2; + + /* + * By default, the TOD grid runs at 400ps; TOD counts its delay based on this. + * + * Example: Bus round trip delay is 35 cycles and the bus is running at 4800MHz + * - Divide by 2 to get one-way delay time + * - Divide by 4800 * 10^6 to get delay in seconds + * - Multiply by 10^12 to get delay in picoseconds + * - Divide by 400ps to get TOD-grid-cycles + * - (To avoid including math.h) Add 1 and cast to uint32_t to round up to nearest TOD-grid-cycle + * - (To avoid including math.h) 10^12/10^6=1000000 + * - (uint32_t)(( 35 / 2 / (4800 * 10^6) * 10^12 / 400 ) + 1) = 10 TOD-grid-cycles + */ + /* + * FIXME: floating point wasn't fully configured, see if we can skip it. + * Testing requires bigger topology, i.e. more CPUs. + */ + delay = (uint32_t)(1 + ((double)(bus_delay * 1000000) + / (double)(2 * XBUS_LINK_FACTOR * FREQ_X_MHZ * TOD_GRID_PS))); + + /* The MDMT delay must include additional TOD-grid-cycles to account for + * staging latches in slaves */ + if (chip == mdmt) + delay += MDMT_TOD_GRID_CYCLE_STAGING_DELAY; + + return delay; +} + +static void calculate_m_path(uint8_t chip, uint8_t mdmt) +{ + uint64_t dual_edge_disable = + (read_scom(chip, PERV_ROOT_CTRL8_SCOM) & + PPC_BIT(PERV_ROOT_CTRL8_TP_PLL_CLKIN_SEL9_DC)) + ? PPC_BIT(PERV_TOD_M_PATH_CTRL_REG_STEP_CREATE_DUAL_EDGE_DISABLE) + : 0; + + if (chip == mdmt) { + scom_and_or(chip, PERV_TOD_M_PATH_CTRL_REG, + ~(PPC_BIT(M_PATH_0_OSC_NOT_VALID) | + PPC_BIT(M_PATH_0_STEP_ALIGN_DISABLE) | + PPC_BIT(PERV_TOD_M_PATH_CTRL_REG_STEP_CREATE_DUAL_EDGE_DISABLE) | + PPC_PLACE(0x7, M_PATH_0_STEP_CHECK_VALIDITY_COUNT_OFFSET, + M_PATH_0_STEP_CHECK_VALIDITY_COUNT_LEN) | + PPC_PLACE(0x7, M_PATH_SYNC_CREATE_SPS_SELECT_OFFSET, + M_PATH_SYNC_CREATE_SPS_SELECT_LEN) | + PPC_PLACE(0xF, M_PATH_0_STEP_CHECK_CPS_DEVIATION_OFFSET, + M_PATH_0_STEP_CHECK_CPS_DEVIATION_LEN) | + PPC_PLACE(0x3, M_PATH_STEP_CHECK_CPS_DEVIATION_FACTOR_OFFSET, + M_PATH_STEP_CHECK_CPS_DEVIATION_FACTOR_LEN)), + PPC_BIT(M_PATH_1_OSC_NOT_VALID) | + PPC_PLACE(0x8, M_PATH_0_STEP_CHECK_CPS_DEVIATION_OFFSET, + M_PATH_0_STEP_CHECK_CPS_DEVIATION_LEN) | + PPC_PLACE(0x3, M_PATH_0_STEP_CHECK_VALIDITY_COUNT_OFFSET, + M_PATH_0_STEP_CHECK_VALIDITY_COUNT_LEN) | + dual_edge_disable); + } else { + scom_and_or(chip, PERV_TOD_M_PATH_CTRL_REG, + ~PPC_BIT(PERV_TOD_M_PATH_CTRL_REG_STEP_CREATE_DUAL_EDGE_DISABLE), + dual_edge_disable); + } +} + +static void configure_tod(uint8_t chip, uint8_t chips, uint8_t pri_mdmt, uint8_t sec_mdmt) +{ + uint32_t topology_delay = calculate_topology_delay(chip, chips, pri_mdmt); + + /* Clear previous primary topology */ + write_scom(chip, PERV_TOD_PRI_PORT_0_CTRL_REG, 0); + write_scom(chip, PERV_TOD_SEC_PORT_0_CTRL_REG, 0); + + /* Workaround for HW480181: Init remote sync checker tolerance to maximum + * [26-27] REMOTE_SYNC_CHECK_CPS_DEVIATION_FACTOR = 0x3 (factor 8) + * [28-31] REMOTE_SYNC_CHECK_CPS_DEVIATION = 0xF (93.75%) + */ + scom_or(chip, PERV_TOD_S_PATH_CTRL_REG, PPC_PLACE(0x3, 26, 2) | PPC_PLACE(0xF, 28, 4)); + + /* + * Set PSS_MSS_CTRL_REG for primary configuration, assumption: + * - valid oscillator is attached to master path-0, but not path-1 + * [0] PRI_M_PATH_SELECT = 0 (path-0 selected) + * [1] PRI_M_S_TOD_SELECT = 1 (TOD is master) + * [2] PRI_M_S_DRAWER_SELECT = 1 (drawer is master) + */ + if (chip == pri_mdmt) { + scom_and_or(chip, PERV_TOD_PSS_MSS_CTRL_REG, + ~PPC_BIT(0), PPC_BIT(1) | PPC_BIT(2)); + } else { + scom_and(chip, PERV_TOD_PSS_MSS_CTRL_REG, ~PPC_BIT(1)); + } + + /* + * Set CPS deviation to 75% (CPS deviation bits = 0xC, factor=1), + * 8 valid steps to enable step check. + * + * [0] S_PATH_CTRL_REG_PRI_SELECT = 0 (slave path 0) + * [6-7] S_PATH_CTRL_REG_STEP_CHECK_CPS_DEVIATION_FACTOR = 0 (factor = 1) + * [8-11] S_PATH_CTRL_REG_0_STEP_CHECK_CPS_DEVIATION = 0xC (75%) + * [13-15] S_PATH_CTRL_REG_0_STEP_CHECK_VALIDITY_COUNT = 3 (8 valid steps) + * [26-27] S_PATH_CTRL_REG_REMOTE_SYNC_CHECK_CPS_DEVIATION_FACTOR = 0 (factor = 1) + * [28-31] S_PATH_CTRL_REG_REMOTE_SYNC_CHECK_CPS_DEVIATION = 0xC (75%) + * [32-39] TOD_S_PATH_CTRL_REG_REMOTE_SYNC_MISS_COUNT_2 = 0x5 + */ + if (chip != pri_mdmt) { + scom_and_or(chip, PERV_TOD_S_PATH_CTRL_REG, + ~(PPC_BIT(0) | PPC_BITMASK(6, 11) | PPC_BITMASK(13, 15) | + PPC_BITMASK(26, 39)), + PPC_PLACE(0xC, 8, 4) | PPC_PLACE(0x3, 13, 3) | + PPC_PLACE(0xC, 28, 4) | PPC_PLACE(0x5, 32, 8)); + } + + /* + * Configure PORT_CTRL_REGs (primary), assumptions: + * - XBUS1 link is used on both chips + * - oscillator connected to OSC0 and not to OSC1 + * + * [0-2] PRI_PORT_0_CTRL_REG_RX_SELECT = 1 (link number) + * [6-7] TOD_PORT_CTRL_REG_TX_X1_SEL = TX sel + * [21] TOD_PORT_CTRL_REG_TX_X1_EN = enable + */ + if (chips != 0x01) { + /* MDMT has no RX */ + int rx_sel = (chip == pri_mdmt ? 0 : 1); + /* Only MDMT has TX */ + int tx_sel = (chip == pri_mdmt ? 2 /*m_path_0*/ : 0); + uint64_t tx_en = (chip == pri_mdmt ? PPC_BIT(21) : 0); + + scom_and_or(chip, PERV_TOD_PRI_PORT_0_CTRL_REG, + ~(PPC_BITMASK(0, 2) | PPC_BITMASK(6, 7) | PPC_BIT(21)), + PPC_PLACE(rx_sel, 0, 3) | PPC_PLACE(tx_sel, 6, 2) | tx_en); + scom_and_or(chip, PERV_TOD_SEC_PORT_0_CTRL_REG, + ~(PPC_BITMASK(0, 2) | PPC_BITMASK(6, 7) | PPC_BIT(21)), + PPC_PLACE(rx_sel, 0, 3) | PPC_PLACE(tx_sel, 6, 2) | tx_en); + } + + /* Configure M_PATH_CTRL_REG (primary) */ + /* + * TODO: check this again. Value is correct, not sure whether fields are + * correctly cleared. Also comment the values written. + */ + calculate_m_path(chip, pri_mdmt); + + /* Configure I_PATH_CTRL_REG (primary) */ + /* PERV_TOD_PRI_PORT_0_CTRL_REG: + * [32-39] PRI_I_PATH_DELAY_VALUE = calculate + * PERV_TOD_I_PATH_CTRL_REG: + * [0] I_PATH_DELAY_DISABLE = 0 + * [1] I_PATH_DELAY_ADJUST_DISABLE = 0 + * [6-7] I_PATH_STEP_CHECK_CPS_DEVIATION_FACTOR = 0 (factor = 1) + * [8-11] I_PATH_STEP_CHECK_CPS_DEVIATION = 0xF (93.75%) + * [13-15] I_PATH_STEP_CHECK_VALIDITY_COUNT = 0x3 (count = 8) + */ + scom_and_or(chip, PERV_TOD_PRI_PORT_0_CTRL_REG, + ~PPC_BITMASK(32, 39), PPC_PLACE(topology_delay, 32, 8)); + scom_and_or(chip, PERV_TOD_I_PATH_CTRL_REG, + ~(PPC_BIT(0) | PPC_BIT(1) | PPC_BITMASK(6, 11) | PPC_BITMASK(13, 15)), + PPC_PLACE(0xF, 8, 4) | PPC_PLACE(0x3, 13, 3)); + + /* Configure INIT_CHIP_CTRL_REG (primary) */ + /* [1-3] I_PATH_CORE_SYNC_PERIOD_SELECT = 0 (core sync period is 8us) + * [4] I_PATH_SYNC_CHECK_DISABLE = 0 (enable internal path sync check) + * [7] MOVE_TOD_TO_TB_ON_2X_SYNC_ENABLE = 0 (1x sync boundaries) + * [8] USE_TB_SYNC_MECHANISM = 0 + * [9] USE_TB_STEP_SYNC = 0 (use TB step sync from internal path) + * [10-15] LOW_ORDER_STEP_VALUE = 0x3F (4-bit WOF counter is incremented with each 200MHz clock cycle) + * [30] XSTOP_GATE = 0 (stop TOD on checkstop) + */ + scom_and_or(chip, PERV_TOD_CHIP_CTRL_REG, + ~(PPC_BITMASK(1, 4) | PPC_BITMASK(7, 15) | PPC_BIT(30)), + PPC_PLACE(0x3F, 10, 6)); + + /* TODO: test if we can skip repeated writes (M_PATH, I_PATH, CHIP) */ + + topology_delay = calculate_topology_delay(chip, chips, sec_mdmt); + + /* Clear previous secondary topology */ + /* NOTE: order is swapped wrt primary, does it matter? */ + write_scom(chip, PERV_TOD_SEC_PORT_1_CTRL_REG, 0); + write_scom(chip, PERV_TOD_PRI_PORT_1_CTRL_REG, 0); + + /* + * Set PSS_MSS_CTRL_REG for secondary configuration, assumptions as before + * [0] PRI_M_PATH_SELECT = 0 (path-0 selected) (SW440224) + * [8] SEC_M_PATH_SELECT = 0 (path-0 selected) + * [9] SEC_M_S_TOD_SELECT = 1 (TOD is master) + * [10] SEC_M_S_DRAWER_SELECT = 1 (drawer is master) + */ + if (chip == sec_mdmt) { + scom_and_or(chip, PERV_TOD_PSS_MSS_CTRL_REG, ~(PPC_BIT(0) | PPC_BIT(8)), + PPC_BIT(9) | PPC_BIT(10)); + } else { + scom_and(chip, PERV_TOD_PSS_MSS_CTRL_REG, ~PPC_BIT(9)); + } + + /* + * Set CPS deviation to 75% (CPS deviation bits = 0xC, factor=1), + * 8 valid steps to enable step check. + * + * [0] S_PATH_CTRL_REG_PRI_SELECT = 0 (slave path 0) + * [4] S_PATH_CTRL_REG_SEC_SELECT = 1 (slave path 1) + * [6-7] S_PATH_CTRL_REG_STEP_CHECK_CPS_DEVIATION_FACTOR = 0 (factor = 1) + * [16-19] S_PATH_CTRL_REG_1_STEP_CHECK_CPS_DEVIATION = 0xC (75%) + * [21-23] PERV_TOD_S_PATH_CTRL_REG_1_STEP_CHECK_VALIDITY_COUNT = 3 (8 valid steps) + * [32-39] TOD_S_PATH_CTRL_REG_REMOTE_SYNC_MISS_COUNT_2 = 0x5 + */ + if (chip != sec_mdmt) { + scom_and_or(chip, PERV_TOD_S_PATH_CTRL_REG, + ~(PPC_BIT(0) | PPC_BITMASK(6, 7) | PPC_BITMASK(16, 19) | + PPC_BITMASK(21, 23) | PPC_BITMASK(32, 39)), + PPC_BIT(4) | PPC_PLACE(0xC, 16, 4) | PPC_PLACE(0x3, 21, 3) | + PPC_PLACE(0x5, 32, 8)); + } + + /* Configure PORT_CTRL_REGs (secondary), assumptions as above. + * + * [0-2] PRI_PORT_0_CTRL_REG_RX_SELECT = 1 (link number) + * [6-7] TOD_PORT_CTRL_REG_TX_X1_SEL = TX sel + * [21] TOD_PORT_CTRL_REG_TX_X1_EN = enable + */ + if (chips != 0x01) { + /* MDMT has no RX */ + int rx_sel = (chip == sec_mdmt ? 0 : 1); + /* Only MDMT has TX */ + int tx_sel = (chip == sec_mdmt ? 2 /*m_path_0*/ : 0); + uint64_t tx_en = (chip == sec_mdmt ? PPC_BIT(21) : 0); + + scom_and_or(chip, PERV_TOD_PRI_PORT_1_CTRL_REG, + ~(PPC_BITMASK(0, 2) | PPC_BITMASK(6, 7) | PPC_BIT(21)), + PPC_PLACE(rx_sel, 0, 3) | PPC_PLACE(tx_sel, 6, 2) | tx_en); + scom_and_or(chip, PERV_TOD_SEC_PORT_1_CTRL_REG, + ~(PPC_BITMASK(0, 2) | PPC_BITMASK(6, 7) | PPC_BIT(21)), + PPC_PLACE(rx_sel, 0, 3) | PPC_PLACE(tx_sel, 6, 2) | tx_en); + } + + /* Configure M_PATH_CTRL_REG (secondary) */ + calculate_m_path(chip, sec_mdmt); + + /* Configure I_PATH_CTRL_REG (secondary) */ + /* PERV_TOD_SEC_PORT_0_CTRL_REG: + * [32-39] SEC_I_PATH_DELAY_VALUE = calculate + * PERV_TOD_I_PATH_CTRL_REG: + * [0] I_PATH_DELAY_DISABLE = 0 + * [1] I_PATH_DELAY_ADJUST_DISABLE = 0 + * [6-7] I_PATH_STEP_CHECK_CPS_DEVIATION_FACTOR = 0 (factor = 1) + * [8-11] I_PATH_STEP_CHECK_CPS_DEVIATION = 0xF (93.75%) + * [13-15] I_PATH_STEP_CHECK_VALIDITY_COUNT = 0x3 (count = 8) + */ + scom_and_or(chip, PERV_TOD_SEC_PORT_0_CTRL_REG, + ~PPC_BITMASK(32, 39), PPC_PLACE(topology_delay, 32, 8)); + scom_and_or(chip, PERV_TOD_I_PATH_CTRL_REG, + ~(PPC_BIT(0) | PPC_BIT(1) | PPC_BITMASK(6, 11) | PPC_BITMASK(13, 15)), + PPC_PLACE(0xF, 8, 4) | PPC_PLACE(0x3, 13, 3)); + + /* Configure INIT_CHIP_CTRL_REG (secondary) */ + /* [1-3] I_PATH_CORE_SYNC_PERIOD_SELECT = 0 (core sync period is 8us) + * [4] I_PATH_SYNC_CHECK_DISABLE = 0 (enable internal path sync check) + * [7] MOVE_TOD_TO_TB_ON_2X_SYNC_ENABLE = 0 (1x sync boundaries) + * [8] USE_TB_SYNC_MECHANISM = 0 + * [9] USE_TB_STEP_SYNC = 0 (use TB step sync from internal path) + * [10-15] LOW_ORDER_STEP_VALUE = 0x3F (4-bit WOF counter is incremented with each 200MHz clock cycle) + * [30] XSTOP_GATE = 0 (stop TOD on checkstop) + */ + scom_and_or(chip, PERV_TOD_CHIP_CTRL_REG, + ~(PPC_BITMASK(1, 4) | PPC_BITMASK(7, 15) | PPC_BIT(30)), + PPC_PLACE(0x3F, 10, 6)); +} + +static int core_count(uint8_t chip) +{ + uint64_t cores = read_scom(chip, 0x0006C090); + return __builtin_popcount((uint32_t)cores) + __builtin_popcount(cores >> 32); +} + +void istep_18_11(uint8_t chips, uint8_t *mdmt) +{ + uint8_t pri_mdmt; + uint8_t sec_mdmt; + uint8_t chip; + + report_istep(18, 11); + + if (chips != 0x01 && chips != 0x03) + die("Unsupported number of chips for TOD: 0x%02x\n", chips); + + pri_mdmt = 0; + sec_mdmt = 0; + + if (chips == 0x3) { + uint64_t cores[2] = { core_count(0), core_count(1) }; + /* CPU with max amount of cores is primary MDMT */ + pri_mdmt = (cores[0] <= cores[1] ? 0 : 1); + /* The other one will be secondary MDMT */ + sec_mdmt = (cores[0] <= cores[1] ? 1 : 0); + } + + *mdmt = pri_mdmt; + + for (chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) + configure_tod(chip, chips, pri_mdmt, sec_mdmt); + } +} diff --git a/src/soc/ibm/power9/istep_18_12.c b/src/soc/ibm/power9/istep_18_12.c new file mode 100644 index 00000000000..c58c4725d85 --- /dev/null +++ b/src/soc/ibm/power9/istep_18_12.c @@ -0,0 +1,143 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include + +#include +#include +#include +#include + +#define PERV_TOD_ERROR_MASK_REG_RX_TTYPE_0 38 + +#define PERV_TOD_ERROR_REG_RX_TTYPE_0 38 + +#define PERV_TOD_FSM_REG_IS_RUNNING 4 +#define PERV_TOD_LOAD_TOD_MOD_REG_FSM_TRIGGER 0 +#define PERV_TOD_START_TOD_REG_FSM_TRIGGER 0 +#define PERV_TOD_TX_TTYPE_REG_TRIGGER 0 + +// TOD: TX TTYPE +// TX TType triggering register. +// [0] TX_TTYPE_2_TRIGGER: TX TTYPE trigger. +#define PERV_TOD_TX_TTYPE_2_REG 0x00040013 +#define PERV_TOD_TX_TTYPE_4_REG 0x00040015 +#define PERV_TOD_TX_TTYPE_5_REG 0x00040016 + +// TOD: Load +// TOD-mod triggering register. This register sets the FSM in NOT_SET state. +// [0] FSM_LOAD_TOD_MOD_TRIGGER: FSM: LOAD_TOD_MOD trigger. +#define PERV_TOD_LOAD_TOD_MOD_REG 0x00040018 +// TOD: Load Register TOD Incrementer: 60 +// Bit TOD and 4-bit WOF on read: Returns all 0s when the TOD is not running. +// On write: go to wait for sync state when data bit 6) = '0' (load TOD). +// Otherwise, go to stopped state (load TOD data63). +// [0-59] LOAD_TOD_VALUE: Internal path: load TOD value. +// [60-63] WOF: who's-on-first (WOF) incrementer. +#define PERV_TOD_LOAD_TOD_REG 0x00040021 +// TOD: Start TOD Triggering Register +// Goes to running state when data bit [02] = '0'. +// Otherwise, go to wait for sync state. +// [0] FSM_START_TOD_TRIGGER: FSM: Start TOD trigger. +#define PERV_TOD_START_TOD_REG 0x00040022 +// TOD: FSM Register +// [0:3] RWX I_PATH_FSM_STATE: Internal path. +// TOD FSM state (TOD is running in the following states: +// x'02', x'0A', x'0E'). 0000 = Error. +// [4] ROX TOD_IS_RUNNING: TOD running indicator. +#define PERV_TOD_FSM_REG 0x00040024 +// TOD: Error and Interrupt Register +// [38] RWX RX_TTYPE_0: Status: received TType-0. +// [39] RWX RX_TTYPE_1: Status: received TType-1. +// [40] RWX RX_TTYPE_2: Status: received TType-2. +// [41] RWX RX_TTYPE_3: Status: received TType-3. +// [42] RWX RX_TTYPE_4: Status: received TType-4. +// [43] RWX RX_TTYPE_5: Status: received TType-5 when FSM is in running state. +#define PERV_TOD_ERROR_REG 0x00040030 +// TOD: Error Mask Register Mask: Error Reporting Component (C_ERR_RPT) +// TOD: Error mask register mask of the error reporting component (c_err_rpt) +// This register holds masks for the same bits +// as in the previous (PERV_TOD_ERROR_REG) register +#define PERV_TOD_ERROR_MASK_REG 0x00040032 + +/* See istep 18.11 for 2 CPU topology diagram */ + +static void init_tod_node(uint8_t chips, uint8_t mdmt) +{ + uint8_t chip; + + /* Clear the TOD error register by writing all bits to 1 */ + /* + * Probably documentation issue, all bits in this register are described as + * RW, but code treats them as if they were write-1-to-clear. + */ + for (chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) + write_scom(chip, PERV_TOD_ERROR_REG, ~0); + } + + /* Configure MDMT */ + + /* Chip TOD step checkers enable */ + write_scom(mdmt, PERV_TOD_TX_TTYPE_2_REG, + PPC_BIT(PERV_TOD_TX_TTYPE_REG_TRIGGER)); + + /* Switch local Chip TOD to 'Not Set' state */ + write_scom(mdmt, PERV_TOD_LOAD_TOD_MOD_REG, + PPC_BIT(PERV_TOD_LOAD_TOD_MOD_REG_FSM_TRIGGER)); + + /* Switch all Chip TOD in the system to 'Not Set' state */ + write_scom(mdmt, PERV_TOD_TX_TTYPE_5_REG, + PPC_BIT(PERV_TOD_TX_TTYPE_REG_TRIGGER)); + + /* Chip TOD load value (move TB to TOD) */ + write_scom(mdmt, PERV_TOD_LOAD_TOD_REG, + PPC_PLACE(0x3FF, 0, 60) | PPC_PLACE(0xC, 60, 4)); + + /* Chip TOD start_tod (switch local Chip TOD to 'Running' state) */ + write_scom(mdmt, PERV_TOD_START_TOD_REG, + PPC_BIT(PERV_TOD_START_TOD_REG_FSM_TRIGGER)); + + /* Send local Chip TOD value to all Chip TODs */ + write_scom(mdmt, PERV_TOD_TX_TTYPE_4_REG, + PPC_BIT(PERV_TOD_TX_TTYPE_REG_TRIGGER)); + + /* In case of larger topology, replace loops with a recursion */ + for (chip = 0; chip < MAX_CHIPS; chip++) { + uint64_t error_reg; + + if (!(chips & (1 << chip))) + continue; + + /* Wait until TOD is running */ + if (!wait_us(1000, read_scom(chip, PERV_TOD_FSM_REG) & + PPC_BIT(PERV_TOD_FSM_REG_IS_RUNNING))) { + printk(BIOS_ERR, "PERV_TOD_ERROR_REG = %#16.16llx\n", + read_scom(chip, PERV_TOD_ERROR_REG)); + die("Error on chip#%d: TOD is not running!\n", chip); + } + + /* Clear TTYPE#2, TTYPE#4, and TTYPE#5 status */ + write_scom(chip, PERV_TOD_ERROR_REG, + PPC_BIT(PERV_TOD_ERROR_REG_RX_TTYPE_0 + 2) | + PPC_BIT(PERV_TOD_ERROR_REG_RX_TTYPE_0 + 4) | + PPC_BIT(PERV_TOD_ERROR_REG_RX_TTYPE_0 + 5)); + + /* Check for real errors */ + error_reg = read_scom(chip, PERV_TOD_ERROR_REG); + if (error_reg != 0) { + printk(BIOS_ERR, "PERV_TOD_ERROR_REG = %#16.16llx\n", error_reg); + die("Error: TOD initialization failed!\n"); + } + + /* Set error mask to runtime configuration (mask TTYPE informational bits) */ + write_scom(chip, PERV_TOD_ERROR_MASK_REG, + PPC_BITMASK(PERV_TOD_ERROR_MASK_REG_RX_TTYPE_0, + PERV_TOD_ERROR_MASK_REG_RX_TTYPE_0 + 5)); + } +} + +void istep_18_12(uint8_t chips, uint8_t mdmt) +{ + report_istep(18, 12); + init_tod_node(chips, mdmt); +} diff --git a/src/soc/ibm/power9/istep_8_1.c b/src/soc/ibm/power9/istep_8_1.c new file mode 100644 index 00000000000..1cd0c25d70d --- /dev/null +++ b/src/soc/ibm/power9/istep_8_1.c @@ -0,0 +1,317 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "fsi.h" +#include "scratch.h" + +/* + * 8.1 host_slave_sbe_config + * + * Need to run this from master processor to all slave processors for + * Secureboot hole (need to ensure that SP didn't leave compromised P8 Slave). + * + * - Update SBE config data area with any configs/parameters required by SBE + * (see step 0 for more details) + * - This includes the nest (and memory frequency if in synchronous mode) + * - Configuration flags (MPIPL, etc) + */ + +/* + * Legend for constant names: + * - *_FSI is a CFAM address (in 4 byte words, that's how it is in Hostboot) + * - *_FSI_BYTE is an FSI address + */ + +/* Used to read SBE Boot Side from processor */ +const uint64_t SBE_BOOT_SELECT_MASK = 0x0000400000000000; + +static void compute_chip_gards(uint8_t chip, + uint8_t *eq_gard, uint32_t *ec_gard) +{ + const uint64_t cores = mvpd_get_available_cores(chip); + + *eq_gard = 0xFF; + *ec_gard = 0xFFFFFFFF; + + for (int quad = 0; quad < MAX_QUADS_PER_CHIP; quad++) { + if (IS_EQ_FUNCTIONAL(quad, cores)) + *eq_gard &= ~(0x80 >> quad); + } + + for (int core = 0; core < MAX_CORES_PER_CHIP; core++) { + if (IS_EC_FUNCTIONAL(core, cores)) + *ec_gard &= ~(0x80000000 >> core); + } + + /* Shift the first meaningful bit to LSB position */ + *eq_gard >>= 2; + *ec_gard >>= 8; +} + +static void setup_sbe_config(uint8_t chip) +{ + /* These aren't defined in scratch.h because they are used only here and + * this allows for much shorter names. */ + enum { + /* SCRATCH_REGISTER_1 */ + EQ_GARD_START = 0, + EQ_GARD_LEN = 6, + EC_GARD_START = 8, + EC_GARD_LEN = 24, + + /* SCRATCH_REGISTER_2 */ + I2C_BUS_DIV_REF_START = 0, + I2C_BUS_DIV_REF_LEN = 16, + OPTICS_CONFIG_MODE_OBUS0 = 16, + OPTICS_CONFIG_MODE_OBUS1 = 17, + OPTICS_CONFIG_MODE_OBUS2 = 18, + OPTICS_CONFIG_MODE_OBUS3 = 19, + MC_PLL_BUCKET_START = 21, + MC_PLL_BUCKET_LEN = 3, + OB0_PLL_BUCKET_START = 24, + OB0_PLL_BUCKET_LEN = 2, + OB1_PLL_BUCKET_START = 26, + OB1_PLL_BUCKET_LEN = 2, + OB2_PLL_BUCKET_START = 28, + OB2_PLL_BUCKET_LEN = 2, + OB3_PLL_BUCKET_START = 30, + OB3_PLL_BUCKET_LEN = 2, + + /* SCRATCH_REGISTER_3 */ + BOOT_FLAGS_START = 0, + BOOT_FLAGS_LEN = 32, + RISK_LEVEL_START = 28, + RISK_LEVEL_LEN = 4, + + /* SCRATCH_REGISTER_4 */ + BOOT_FREQ_MULT_START = 0, + BOOT_FREQ_MULT_LEN = 16, + CP_FILTER_BYPASS = 16, + SS_FILTER_BYPASS = 17, + IO_FILTER_BYPASS = 18, + DPLL_BYPASS = 19, + NEST_MEM_X_O_PCI_BYPASS = 20, + OBUS_RATIO_VALUE = 21, + NEST_PLL_BUCKET_START = 29, + NEST_PLL_BUCKET_LEN = 3, + + /* SCRATCH_REGISTER_5 */ + PLL_MUX_START = 12, + PLL_MUX_LEN = 20, + CC_IPL = 0, + INIT_ALL_CORES = 1, + RISK_LEVEL_BIT_DEPRECATED = 2, + DISABLE_HBBL_VECTORS = 3, + MC_SYNC_MODE = 4, + SLOW_PCI_REF_CLOCK = 5, + + /* SCRATCH_REGISTER_6 */ + SMF_CONFIG = 16, + PROC_EFF_FABRIC_GROUP_ID_START = 17, + PROC_EFF_FABRIC_GROUP_ID_LEN = 3, + PROC_EFF_FABRIC_CHIP_ID_START = 20, + PROC_EFF_FABRIC_CHIP_ID_LEN = 3, + PUMP_CHIP_IS_GROUP = 23, + SLAVE_CHIP_SBE = 24, + PROC_FABRIC_GROUP_ID_START = 26, + PROC_FABRIC_GROUP_ID_LEN = 3, + PROC_FABRIC_CHIP_ID_START = 29, + PROC_FABRIC_CHIP_ID_LEN = 3, + PROC_MEM_TO_USE_START = 1, + PROC_MEM_TO_USE_LEN = 6, + }; + + uint64_t scratch; + + uint32_t boot_flags; + uint8_t risk_level; + + uint8_t eq_gard; + uint32_t ec_gard; + + compute_chip_gards(chip, &eq_gard, &ec_gard); + + /* SCRATCH_REGISTER_1 */ + + scratch = PPC_SHIFT(read_cfam(chip, MBOX_SCRATCH_REG1_FSI), 31); + + /* ATTR_EQ_GARD (computed at runtime) */ + PPC_INSERT(scratch, eq_gard, EQ_GARD_START, EQ_GARD_LEN); + /* ATTR_EC_GARD (computed at runtime)*/ + PPC_INSERT(scratch, ec_gard, EC_GARD_START, EC_GARD_LEN); + + write_cfam(chip, MBOX_SCRATCH_REG1_FSI, scratch >> 32); + + /* SCRATCH_REGISTER_2 */ + + scratch = PPC_SHIFT(read_cfam(chip, MBOX_SCRATCH_REG1_FSI + 1), 31); + + /* ATTR_I2C_BUS_DIV_REF (default, talos.xml) */ + PPC_INSERT(scratch, 0x0003, I2C_BUS_DIV_REF_START, I2C_BUS_DIV_REF_LEN); + /* ATTR_MC_PLL_BUCKET (seems to not by relevant for Nimbus) */ + PPC_INSERT(scratch, 0x00, MC_PLL_BUCKET_START, MC_PLL_BUCKET_LEN); + + /* + * TODO: if OBUS support is needed: + * + * - set OPTICS_CONFIG_MODE_OBUS[0-3] bits + * - set OB[0-3]_PLL_BUCKET bits + * see getObusPllBucket() in Hostboot for values of ATTR_OB*_PLL_BUCKET + */ + + write_cfam(chip, MBOX_SCRATCH_REG1_FSI + 1, scratch >> 32); + + /* SCRATCH_REGISTER_3 */ + + scratch = PPC_SHIFT(read_cfam(chip, MBOX_SCRATCH_REG1_FSI + 2), 31); + + boot_flags = (read_scom(0, MBOX_SCRATCH_REG1 + 2) >> 32); + risk_level = (get_dd() < 0x23 ? 0 : 4); + + /* Note that the two fields overlap (boot flags include risk level), so + order in which they are set is important. */ + + /* ATTR_BOOT_FLAGS (computed) */ + PPC_INSERT(scratch, boot_flags, BOOT_FLAGS_START, BOOT_FLAGS_LEN); + /* ATTR_RISK_LEVEL (computed) */ + PPC_INSERT(scratch, risk_level, RISK_LEVEL_START, RISK_LEVEL_LEN); + + write_cfam(chip, MBOX_SCRATCH_REG1_FSI + 2, scratch >> 32); + + /* SCRATCH_REGISTER_4 */ + + scratch = PPC_SHIFT(read_cfam(chip, MBOX_SCRATCH_REG1_FSI + 3), 31); + + /* ATTR_BOOT_FREQ_MULT (talos.xml) */ + PPC_INSERT(scratch, 96, BOOT_FREQ_MULT_START, BOOT_FREQ_MULT_LEN); + /* ATTR_NEST_PLL_BUCKET (index of nest frequency (1866 MHz) in + ATTR_NEST_PLL_FREQ_LIST array base 1, see setFrequencyAttributes() in + Hostboot) */ + PPC_INSERT(scratch, 2, NEST_PLL_BUCKET_START, NEST_PLL_BUCKET_LEN); + + /* ATTR_CP_FILTER_BYPASS (default, talos.xml) */ + PPC_INSERT(scratch, 0, CP_FILTER_BYPASS, 1); + /* ATTR_SS_FILTER_BYPASS (default, talos.xml) */ + PPC_INSERT(scratch, 0, SS_FILTER_BYPASS, 1); + /* ATTR_IO_FILTER_BYPASS (default, talos.xml) */ + PPC_INSERT(scratch, 0, IO_FILTER_BYPASS, 1); + /* ATTR_DPLL_BYPASS (default, talos.xml) */ + PPC_INSERT(scratch, 0, DPLL_BYPASS, 1); + /* ATTR_NEST_MEM_X_O_PCI_BYPASS (default, talos.xml) */ + PPC_INSERT(scratch, 0, NEST_MEM_X_O_PCI_BYPASS, 1); + + /* ATTR_OBUS_RATIO_VALUE (empty default in talos.xml) */ + PPC_INSERT(scratch, 0, OBUS_RATIO_VALUE, 1); + + write_cfam(chip, MBOX_SCRATCH_REG1_FSI + 3, scratch >> 32); + + /* SCRATCH_REGISTER_5 */ + + scratch = PPC_SHIFT(read_cfam(chip, MBOX_SCRATCH_REG1_FSI + 4), 31); + + /* ATTR_SYSTEM_IPL_PHASE (default, talos.xml) == HB_IPL, not CACHE_CONTAINED + XXX: but we're not Hostboot and in ROM stage, so set this bit to 1? */ + PPC_INSERT(scratch, 0, CC_IPL, 1); + /* ATTR_SYS_FORCE_ALL_CORES (talos.xml) */ + PPC_INSERT(scratch, 0, INIT_ALL_CORES, 1); + /* Risk level flag is deprecated here, moved to SCRATCH_REG_3 */ + PPC_INSERT(scratch, 0, RISK_LEVEL_BIT_DEPRECATED, 1); + /* ATTR_DISABLE_HBBL_VECTORS (default, talos.xml) */ + PPC_INSERT(scratch, 0, DISABLE_HBBL_VECTORS, 1); + /* Hostboot reads it from SBE, but we assume it's 0 in p9n_mca_scom() */ + PPC_INSERT(scratch, 0, MC_SYNC_MODE, 1); + /* ATTR_DD1_SLOW_PCI_REF_CLOCK (we're not DD1, but Hostboot sets this bit) */ + PPC_INSERT(scratch, 1, SLOW_PCI_REF_CLOCK, 1); + + /* ATTR_CLOCK_PLL_MUX (talos.xml) */ + PPC_INSERT(scratch, 0x80030, PLL_MUX_START, PLL_MUX_LEN); + + write_cfam(chip, MBOX_SCRATCH_REG1_FSI + 4, scratch >> 32); + + /* SCRATCH_REGISTER_6 */ + + scratch = PPC_SHIFT(read_cfam(chip, MBOX_SCRATCH_REG1_FSI + 5), 31); + + /* ATTR_PROC_SBE_MASTER_CHIP is always zero here */ + PPC_INSERT(scratch, 1, SLAVE_CHIP_SBE, 1); + /* ATTR_SMF_CONFIG */ + PPC_INSERT(scratch, 0, SMF_CONFIG, 1); + /* ATTR_PROC_FABRIC_PUMP_MODE (talos.xml) */ + PPC_INSERT(scratch, 1, PUMP_CHIP_IS_GROUP, 1); + + /* ATTR_PROC_FABRIC_GROUP_ID */ + PPC_INSERT(scratch, chip, PROC_FABRIC_GROUP_ID_START, PROC_FABRIC_GROUP_ID_LEN); + /* ATTR_PROC_FABRIC_CHIP_ID */ + PPC_INSERT(scratch, 0, PROC_FABRIC_CHIP_ID_START, PROC_FABRIC_CHIP_ID_LEN); + + /* ATTR_PROC_EFF_FABRIC_GROUP_ID */ + PPC_INSERT(scratch, chip, PROC_EFF_FABRIC_GROUP_ID_START, PROC_EFF_FABRIC_GROUP_ID_LEN); + /* ATTR_PROC_EFF_FABRIC_CHIP_ID */ + PPC_INSERT(scratch, 0, PROC_EFF_FABRIC_CHIP_ID_START, PROC_EFF_FABRIC_CHIP_ID_LEN); + + /* Not documented what this is */ + scratch |= PPC_BIT(0); + + /* ATTR_PROC_MEM_TO_USE (talos.xml; each CPU uses its own memory) */ + PPC_INSERT(scratch, chip << 3, PROC_MEM_TO_USE_START, PROC_MEM_TO_USE_LEN); + + write_cfam(chip, MBOX_SCRATCH_REG1_FSI + 5, scratch >> 32); + + /* SCRATCH_REGISTER_7 is left as is (it's related to DRTM payload) */ + + /* SCRATCH_REGISTER_8 */ + + scratch = PPC_SHIFT(read_cfam(chip, MBOX_SCRATCH_REG1_FSI + 7), 31); + + /* Indicate validity of SCRATCH_REGISTER_[1-6] */ + scratch |= PPC_BITMASK(0, 5); + + write_cfam(chip, MBOX_SCRATCH_REG1_FSI + 7, scratch >> 32); +} + +static int get_master_sbe_boot_seeprom(void) +{ + enum { PERV_SB_CS_SCOM = 0x00050008 }; + return (read_scom(0, PERV_SB_CS_SCOM) & SBE_BOOT_SELECT_MASK) ? 1 : 0; +} + +static void set_sbe_boot_seeprom(uint8_t chip, int seeprom_side) +{ + enum { PERV_SB_CS_FSI_BYTE = 0x00002820 }; + + const uint32_t sbe_boot_select_mask = SBE_BOOT_SELECT_MASK >> 32; + + uint32_t sb_cs = read_fsi(chip, PERV_SB_CS_FSI_BYTE); + + if (seeprom_side == 0) + sb_cs &= ~sbe_boot_select_mask; + else + sb_cs |= sbe_boot_select_mask; + + write_fsi(chip, PERV_SB_CS_FSI_BYTE, sb_cs); +} + +void istep_8_1(uint8_t chips) +{ + int boot_seeprom_side; + + report_istep(8, 1); + + boot_seeprom_side = get_master_sbe_boot_seeprom(); + + /* Skipping master chip */ + for (uint8_t chip = 1; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) { + setup_sbe_config(chip); + set_sbe_boot_seeprom(chip, boot_seeprom_side); + } + } +} diff --git a/src/soc/ibm/power9/istep_8_10.c b/src/soc/ibm/power9/istep_8_10.c new file mode 100644 index 00000000000..d581042052c --- /dev/null +++ b/src/soc/ibm/power9/istep_8_10.c @@ -0,0 +1,376 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include + +#include +#include +#include + +#include "xbus.h" + +static void xbus_scom(uint8_t chip, uint8_t group) +{ + /* ATTR_IO_XBUS_CHAN_EQ is 0 by default and Hostboot logs seem to confirm this*/ + + /* ATTR_IO_XBUS_MASTER_MODE */ + const bool xbus_master_mode = (chip == 0); + /* + * Offset for group. + * + * Note that several registers are initialized for both groups and don't + * use the offset. Some other writes are group-specific and don't need + * it either. + */ + const uint64_t offset = group * XBUS_LINK_GROUP_OFFSET; + + int i; + + /* *_RX_DATA_DAC_SPARE_MODE_PL */ + for (i = 0; i < 18; i++) { + uint64_t addr = xbus_addr(0x8000000006010C3F + offset + 0x100000000 * i); + // 53 - *_RX_DAC_REGS_RX_DAC_REGS_RX_PL_DATA_DAC_SPARE_MODE_5_OFF + // 54 - *_RX_DAC_REGS_RX_DAC_REGS_RX_PL_DATA_DAC_SPARE_MODE_6_OFF + // 55 - *_RX_DAC_REGS_RX_DAC_REGS_RX_PL_DATA_DAC_SPARE_MODE_7_OFF + scom_and(chip, addr, ~PPC_BITMASK(53, 55)); + } + + /* *_RX_DAC_CNTL1_EO_PL */ + for (i = 0; i < 18; i++) { + uint64_t addr = xbus_addr(0x8000080006010C3F + offset + 0x100000000 * i); + // 54 - *_RX_DAC_REGS_RX_DAC_REGS_RX_LANE_ANA_PDWN_{OFF,ON} + if (i < 17) + scom_and(chip, addr, ~PPC_BIT(54)); + else + scom_or(chip, addr, PPC_BIT(54)); + } + + /* *_RX_DAC_CNTL5_EO_PL */ + for (i = 0; i < 18; i++) { + uint64_t addr = xbus_addr(0x8000280006010C3F + offset + 0x100000000 * i); + scom_and(chip, addr, + ~(PPC_BITMASK(48, 51) | PPC_BITMASK(52, 56) | PPC_BITMASK(57, 61))); + } + + /* *_RX_DAC_CNTL6_EO_PL */ + for (i = 0; i < 18; i++) { + uint64_t addr = xbus_addr(0x8000300006010C3F + offset + 0x100000000 * i); + scom_and_or(chip, addr, + ~(PPC_BITMASK(53, 56) | PPC_BITMASK(48, 52)), + PPC_PLACE(0x7, 53, 4) | PPC_PLACE(0x0C, 48, 5)); + } + + /* *_RX_DAC_CNTL9_E_PL */ + for (i = 0; i < 18; i++) { + uint64_t addr = xbus_addr(0x8000C00006010C3F + offset + 0x100000000 * i); + scom_and(chip, addr, ~(PPC_BITMASK(48, 54) | PPC_BITMASK(55, 60))); + } + + /* *_RX_BIT_MODE1_EO_PL */ + for (i = 0; i < 18; i++) { + uint64_t addr = xbus_addr(0x8002200006010C3F + offset + 0x100000000 * i); + // 48 - *_RX_BIT_REGS_RX_LANE_DIG_PDWN_{OFF,ON} + if (i < 17) + scom_and(chip, addr, ~PPC_BIT(48)); + else + scom_or(chip, addr, PPC_BIT(48)); + } + + /* *_RX_BIT_MODE1_E_PL */ + for (i = 0; i < 17; i++) { + uint64_t addr = xbus_addr(0x8002C00006010C3F + offset + 0x100000000 * i); + const uint16_t data[17] = { + 0x1000, 0xF03E, 0x07BC, 0x07C7, 0x03EF, 0x1F0F, 0x1800, 0x9C00, + 0x1000, 0x9C00, 0x1800, 0x1F0F, 0x03EF, 0x07C7, 0x07BC, 0xF03E, + 0x1000 + }; + scom_and_or(chip, addr, ~PPC_BITMASK(48, 63), PPC_PLACE(data[i], 48, 16)); + } + + /* *_RX_BIT_MODE2_E_PL */ + for (i = 0; i < 17; i++) { + uint64_t addr = xbus_addr(0x8002C80006010C3F + offset + 0x100000000 * i); + const uint8_t data[17] = { + 0x42, 0x3E, 0x00, 0x60, 0x40, 0x40, 0x03, 0x03, + 0x42, 0x03, 0x03, 0x40, 0x40, 0x60, 0x00, 0x3E, + 0x42 + }; + scom_and_or(chip, addr, ~PPC_BITMASK(48, 54), PPC_PLACE(data[i], 48, 7)); + } + + /* *_TX_MODE1_PL */ + for (i = 0; i < 17; i++) { + uint64_t addr = xbus_addr(0x8004040006010C3F + offset + 0x100000000 * i); + scom_and(chip, addr, ~PPC_BIT(48)); + } + + /* *_TX_MODE2_PL */ + for (i = 0; i < 17; i++) { + uint64_t addr = xbus_addr(0x80040C0006010C3F + offset + 0x100000000 * i); + scom_or(chip, addr, PPC_BIT(62)); + } + + /* *_TX_BIT_MODE1_E_PL */ + for (i = 0; i < 17; i++) { + uint64_t addr = xbus_addr(0x80043C0006010C3F + offset + 0x100000000 * i); + const uint16_t data[17] = { + 0x000, 0x000, 0x01E, 0x01F, 0x00F, 0x07C, 0xC63, 0xE73, + 0x000, 0xE73, 0xC63, 0x07C, 0x00F, 0x01F, 0x01E, 0x000, + 0x000, + }; + scom_and_or(chip, addr, ~PPC_BITMASK(48, 63), PPC_PLACE(data[i], 48, 16)); + } + + /* *_TX_BIT_MODE2_E_PL */ + for (i = 0; i < 17; i++) { + uint64_t addr = xbus_addr(0x8004440006010C3F + offset + 0x100000000 * i); + const uint8_t data[17] = { + 0x01, 0x7C, 0x7B, 0x0C, 0x5E, 0x10, 0x0C, 0x4E, + 0x01, 0x4E, 0x0C, 0x10, 0x5E, 0x0C, 0x7B, 0x7C, + 0x01, + }; + scom_and_or(chip, addr, ~PPC_BITMASK(48, 54), PPC_PLACE(data[i], 48, 7)); + } + + // P9A_XBUS_0_RX[01]_RX_SPARE_MODE_PG + // 49 - IOF1_RX_RX0_RXCTL_CTL_REGS_RX_CTL_REGS_RX_PG_SPARE_MODE_1_ON + scom_or(chip, xbus_addr(0x8008000006010C3F + offset), PPC_BIT(49)); + + // P9A_XBUS_0_RX[01]_RX_ID1_PG + scom_and_or(chip, xbus_addr(0x8008080006010C3F + offset), + ~PPC_BITMASK(48, 53), + PPC_PLACE((group == 0 ? 0x00 : 0x01), 48, 6)); + + // P9A_XBUS_0_RX[01]_RX_CTL_MODE1_EO_PG + // 48 - IOF1_RX_RX0_RXCTL_CTL_REGS_RX_CTL_REGS_RX_CLKDIST_PDWN_OFF + scom_and(chip, xbus_addr(0x8008100006010C3F + offset), ~PPC_BIT(48)); + + // P9A_XBUS_0_RX[01]_RX_CTL_MODE5_EO_PG + // 51-53 - IOF1_RX_RX0_RXCTL_CTL_REGS_RX_CTL_REGS_RX_DYN_RECAL_INTERVAL_TIMEOUT_SEL_TAP5 + // 54-55 - IOF1_RX_RX0_RXCTL_CTL_REGS_RX_CTL_REGS_RX_DYN_RECAL_STATUS_RPT_TIMEOUT_SEL_TAP1 + scom_and_or(chip, xbus_addr(0x8008300006010C3F + offset), + ~(PPC_BITMASK(51, 53) | PPC_BITMASK(54, 55)), + PPC_PLACE(0x5, 51, 3) | PPC_PLACE(0x1, 54, 2)); + + // P9A_XBUS_0_RX[01]_RX_CTL_MODE7_EO_PG + scom_and_or(chip, xbus_addr(0x8008400006010C3F + offset), + ~PPC_BITMASK(60, 63), + PPC_PLACE(0xA, 60, 4)); + + // P9A_XBUS_0_RX0_RX_CTL_MODE23_EO_PG (same address for both groups) + // 55 - IOF1_RX_RX0_RXCTL_CTL_REGS_RX_CTL_REGS_RX_PEAK_TUNE_OFF + // 56 - IOF1_RX_RX0_RXCTL_CTL_REGS_RX_CTL_REGS_RX_LTE_EN_ON + // 59 - IOF1_RX_RX0_RXCTL_CTL_REGS_RX_CTL_REGS_RX_DFEHISPD_EN_ON + // 60 - IOF1_RX_RX0_RXCTL_CTL_REGS_RX_CTL_REGS_RX_DFE12_EN_ON + if (group == 0) { + scom_and_or(chip, xbus_addr(0x8008C00006010C3F), + ~(PPC_BITMASK(48, 49) | PPC_BITMASK(55, 60)), + PPC_PLACE(0x1, 48, 2) | PPC_BIT(56) | PPC_PLACE(0x3, 57, 2) | + PPC_BIT(59) | PPC_BIT(60)); + } else { + scom_and_or(chip, xbus_addr(0x8008C00006010C3F), + ~PPC_BITMASK(48, 49), PPC_PLACE(0x1, 48, 2)); + } + + // P9A_XBUS_0_RX0_RX_CTL_MODE23_EO_PG + // 55 - IOF1_RX_RX1_RXCTL_CTL_REGS_RX_CTL_REGS_RX_PEAK_TUNE_OFF + // 56 - IOF1_RX_RX1_RXCTL_CTL_REGS_RX_CTL_REGS_RX_LTE_EN_ON + // 57 - 0b11 + // 59 - IOF1_RX_RX1_RXCTL_CTL_REGS_RX_CTL_REGS_RX_DFEHISPD_EN_ON + // 60 - IOF1_RX_RX1_RXCTL_CTL_REGS_RX_CTL_REGS_RX_DFE12_EN_ON + if (group == 1) { + scom_and_or(chip, xbus_addr(0x8008C02006010C3F), + ~PPC_BITMASK(55, 60), + PPC_BIT(56) | PPC_PLACE(0x3, 57, 2) | PPC_BIT(59) | PPC_BIT(60)); + } + + // P9A_XBUS_0_RX0_RX_CTL_MODE29_EO_PG (identical for both groups) + scom_and_or(chip, xbus_addr(0x8008D00006010C3F + offset), + ~(PPC_BITMASK(48, 55) | PPC_BITMASK(56, 63)), + PPC_PLACE(0x66, 48, 8) | PPC_PLACE(0x44, 56, 8)); + + // P9A_XBUS_0_RX[01]_RX_CTL_MODE27_EO_PG + // 48 - IOF1_RX_RX0_RXCTL_CTL_REGS_RX_CTL_REGS_RX_RC_ENABLE_CTLE_1ST_LATCH_OFFSET_CAL_ON + scom_or(chip, xbus_addr(0x8009700006010C3F + offset), PPC_BIT(48)); + + // P9A_XBUS_0_RX[01]_RX_ID2_PG + scom_and_or(chip, xbus_addr(0x8009800006010C3F + offset), + ~(PPC_BITMASK(49, 55) | PPC_BITMASK(57, 63)), + PPC_PLACE(0x00, 49, 7) | PPC_PLACE(0x10, 57, 7)); + + // P9A_XBUS_0_RX[01]_RX_CTL_MODE1_E_PG + // 48 - IOF1_RX_RX0_RXCTL_CTL_REGS_RX_CTL_REGS_RX_MASTER_MODE_MASTER + // 57 - IOF1_RX_RX0_RXCTL_CTL_REGS_RX_CTL_REGS_RX_FENCE_FENCED + // 58 - IOF1_RX_RX0_RXCTL_CTL_REGS_RX_CTL_REGS_RX_PDWN_LITE_DISABLE_ON + scom_or(chip, xbus_addr(0x8009900006010C3F + offset), + (xbus_master_mode ? PPC_BIT(48) : 0) | PPC_BIT(57) | PPC_BIT(58)); + + // P9A_XBUS_0_RX[01]_RX_CTL_MODE2_E_PG + scom_and_or(chip, xbus_addr(0x8009980006010C3F + offset), + ~PPC_BITMASK(48, 52), PPC_PLACE(0x01, 48, 5)); + + // P9A_XBUS_0_RX[01]_RX_CTL_MODE3_E_PG + scom_and_or(chip, xbus_addr(0x8009A00006010C3F + offset), + ~PPC_BITMASK(48, 51), PPC_PLACE(0xB, 48, 4)); + + // P9A_XBUS_0_RX[01]_RX_CTL_MODE5_E_PG + scom_and_or(chip, xbus_addr(0x8009B00006010C3F + offset), + ~PPC_BITMASK(52, 55), PPC_PLACE(0x1, 52, 4)); + + // P9A_XBUS_0_RX[01]_RX_CTL_MODE6_E_PG + scom_and_or(chip, xbus_addr(0x8009B80006010C3F + offset), + ~(PPC_BITMASK(48, 54) | PPC_BITMASK(55, 61)), + PPC_PLACE(0x11, 48, 7) | PPC_PLACE(0x11, 55, 7)); + + // P9A_XBUS_0_RX[01]_RX_CTL_MODE8_E_PG + // 55-58 - IOF1_RX_RX0_RXCTL_CTL_REGS_RX_CTL_REGS_RX_DYN_RPR_ERR_CNTR1_DURATION_TAP5 + scom_and_or(chip, xbus_addr(0x8009C80006010C3F + offset), + ~(PPC_BITMASK(48, 54) | PPC_BITMASK(55, 58) | PPC_BITMASK(61, 63)), + PPC_PLACE(0xF, 48, 7) | PPC_PLACE(0x5, 55, 4) | PPC_PLACE(0x5, 61, 3)); + + // P9A_XBUS_0_RX[01]_RX_CTL_MODE9_E_PG + // 55-58 - IOF1_RX_RX0_RXCTL_CTL_REGS_RX_CTL_REGS_RX_DYN_RPR_ERR_CNTR2_DURATION_TAP5 + scom_and_or(chip, xbus_addr(0x8009D00006010C3F + offset), + ~(PPC_BITMASK(48, 54) | PPC_BITMASK(55, 58)), + PPC_PLACE(0x3F, 48, 7) | PPC_PLACE(0x5, 55, 4)); + + // P9A_XBUS_0_RX[01]_RX_CTL_MODE11_E_PG + scom_and(chip, xbus_addr(0x8009E00006010C3F + offset), ~PPC_BITMASK(48, 63)); + + // P9A_XBUS_0_RX[01]_RX_CTL_MODE12_E_PG + scom_and_or(chip, xbus_addr(0x8009E80006010C3F + offset), + ~PPC_BITMASK(48, 55), PPC_PLACE(0x7F, 48, 8)); + + // P9A_XBUS_0_RX[01]_RX_GLBSM_SPARE_MODE_PG + // 50 - IOF1_RX_RX0_RXCTL_GLBSM_REGS_RX_PG_GLBSM_SPARE_MODE_2_ON + // 56 - IOF1_RX_RX0_RXCTL_GLBSM_REGS_RX_DESKEW_BUMP_AFTER_AFTER + scom_or(chip, xbus_addr(0x800A800006010C3F + offset), PPC_BIT(50) | PPC_BIT(56)); + + // P9A_XBUS_0_RX[01]_RX_GLBSM_CNTL3_EO_PG + scom_and_or(chip, xbus_addr(0x800AE80006010C3F + offset), + ~PPC_BITMASK(56, 57), PPC_PLACE(0x2, 56, 2)); + + // P9A_XBUS_0_RX[01]_RX_GLBSM_MODE1_EO_PG + scom_and_or(chip, xbus_addr(0x800AF80006010C3F + offset), + ~(PPC_BITMASK(48, 51) | PPC_BITMASK(52, 55)), + PPC_PLACE(0xC, 48, 4) | PPC_PLACE(0xC, 52, 4)); + + // P9A_XBUS_0_RX[01]_RX_DATASM_SPARE_MODE_PG + // 60 - IOF1_RX_RX0_RXCTL_DATASM_DATASM_REGS_RX_CTL_DATASM_CLKDIST_PDWN_OFF + scom_and(chip, xbus_addr(0x800B800006010C3F + offset), ~PPC_BIT(60)); + + // P9A_XBUS_0_TX[01]_TX_SPARE_MODE_PG + scom_and(chip, xbus_addr(0x800C040006010C3F + offset), ~PPC_BITMASK(56, 57)); + + // P9A_XBUS_0_TX[01]_TX_ID1_PG + scom_and_or(chip, xbus_addr(0x800C0C0006010C3F + offset), + ~PPC_BITMASK(48, 53), + PPC_PLACE((group == 0 ? 0x00 : 0x01), 48, 6)); + + // P9A_XBUS_0_TX[01]_TX_CTL_MODE1_EO_PG + // 48 - IOF1_TX_WRAP_TX0_TXCTL_CTL_REGS_TX_CTL_REGS_TX_CLKDIST_PDWN_OFF + // 59 - IOF1_TX_WRAP_TX0_TXCTL_CTL_REGS_TX_CTL_REGS_TX_PDWN_LITE_DISABLE_ON + scom_and_or(chip, xbus_addr(0x800C140006010C3F + offset), + ~(PPC_BIT(48) | PPC_BITMASK(53, 57) | PPC_BIT(59)), + PPC_PLACE(0x01, 53, 5) | PPC_BIT(59)); + + // P9A_XBUS_0_TX[01]_TX_CTL_MODE2_EO_PG + scom_and_or(chip, xbus_addr(0x800C1C0006010C3F + offset), + ~PPC_BITMASK(56, 62), PPC_PLACE(0x11, 56, 7)); + + // P9A_XBUS_0_TX[01]_TX_CTL_CNTLG1_EO_PG + // 48-49 - IOF1_TX_WRAP_TX0_TXCTL_CTL_REGS_TX_CTL_REGS_TX_DRV_CLK_PATTERN_GCRMSG_DRV_0S + scom_and(chip, xbus_addr(0x800C240006010C3F + offset), ~PPC_BITMASK(48, 49)); + + // P9A_XBUS_0_TX[01]_TX_ID2_PG + scom_and_or(chip, xbus_addr(0x800C840006010C3F + offset), + ~(PPC_BITMASK(49, 55) | PPC_BITMASK(57, 63)), + PPC_PLACE(0x0, 49, 7) | PPC_PLACE(0x10, 57, 7)); + + // P9A_XBUS_0_TX[01]_TX_CTL_MODE1_E_PG + // 55-57 - IOF1_TX_WRAP_TX0_TXCTL_CTL_REGS_TX_CTL_REGS_TX_DYN_RECAL_INTERVAL_TIMEOUT_SEL_TAP5 + // 58-59 - IOF1_TX_WRAP_TX0_TXCTL_CTL_REGS_TX_CTL_REGS_TX_DYN_RECAL_STATUS_RPT_TIMEOUT_SEL_TAP1 + scom_and_or(chip, xbus_addr(0x800C8C0006010C3F + offset), + ~(PPC_BITMASK(55, 57) | PPC_BITMASK(58, 59)), + PPC_PLACE(0x5, 55, 3) | PPC_PLACE(0x1, 58, 2)); + + // P9A_XBUS_0_TX[01]_TX_CTL_MODE2_E_PG + scom_and(chip, xbus_addr(0x800CEC0006010C3F + offset), ~PPC_BITMASK(48, 63)); + + // P9A_XBUS_0_TX[01]_TX_CTL_MODE3_E_PG + scom_and_or(chip, xbus_addr(0x800CF40006010C3F + offset), + ~PPC_BITMASK(48, 55), PPC_PLACE(0x7F, 48, 8)); + + // P9A_XBUS_0_TX[01]_TX_CTLSM_MODE1_EO_PG + // 59 - IOF1_TX_WRAP_TX0_TXCTL_TX_CTL_SM_REGS_TX_FFE_BOOST_EN_ON + scom_and(chip, xbus_addr(0x800D2C0006010C3F + offset), PPC_BIT(59)); + + // P9A_XBUS_0_TX_IMPCAL_P_4X_PB (identical for both groups) + scom_and_or(chip, xbus_addr(0x800F1C0006010C3F), + ~PPC_BITMASK(48, 54), PPC_PLACE(0x0E, 48, 5)); +} + +static void set_msb_swap(uint8_t chip, int group) +{ + enum { + TX_CTL_MODE1_EO_PG = 0x800C140006010C3F, + EDIP_TX_MSBSWAP = 58, + }; + + const uint64_t addr = xbus_addr(TX_CTL_MODE1_EO_PG + group * XBUS_LINK_GROUP_OFFSET); + + /* ATTR_EI_BUS_TX_MSBSWAP seems to be 0x80 which is GROUP_0_SWAP */ + if (group == 0) + scom_or(chip, addr, PPC_BIT(EDIP_TX_MSBSWAP)); + else + scom_and(chip, addr, ~PPC_BIT(EDIP_TX_MSBSWAP)); +} + +static void xbus_scominit(int group) +{ + enum { + PU_PB_CENT_SM0_PB_CENT_FIR_REG = 0x05011C00, + + XBUS_PHY_FIR_ACTION0 = 0x0000000000000000ULL, + XBUS_FIR_ACTION0_REG = 0x06010C06, + XBUS_PHY_FIR_ACTION1 = 0x2068680000000000ULL, + XBUS_FIR_ACTION1_REG = 0x06010C07, + XBUS_PHY_FIR_MASK = 0xDF9797FFFFFFC000ULL, + XBUS_FIR_MASK_REG = 0x06010C03, + + EDIP_RX_IORESET = 0x8009F80006010C3F, + EDIP_TX_IORESET = 0x800C9C0006010C3F, + }; + + const uint64_t offset = group * XBUS_LINK_GROUP_OFFSET; + + /* Assert IO reset to power-up bus endpoint logic */ + scom_or(0, xbus_addr(EDIP_RX_IORESET + offset), PPC_BIT(52)); + scom_or(1, xbus_addr(EDIP_RX_IORESET + offset), PPC_BIT(52)); + udelay(50); + scom_or(0, xbus_addr(EDIP_TX_IORESET + offset), PPC_BIT(48)); + scom_or(1, xbus_addr(EDIP_TX_IORESET + offset), PPC_BIT(48)); + udelay(50); + + set_msb_swap(/*chip=*/0, group); + set_msb_swap(/*chip=*/1, group); + + xbus_scom(/*chip=*/0, group); + xbus_scom(/*chip=*/1, group); + + /* PU_PB_CENT_SM0_PB_CENT_FIR_MASK_REG_SPARE_13 */ + if (!(read_scom(/*chip=*/0, PU_PB_CENT_SM0_PB_CENT_FIR_REG) & PPC_BIT(13))) { + write_scom(/*chip=*/0, xbus_addr(XBUS_FIR_ACTION0_REG), XBUS_PHY_FIR_ACTION0); + write_scom(/*chip=*/0, xbus_addr(XBUS_FIR_ACTION1_REG), XBUS_PHY_FIR_ACTION1); + write_scom(/*chip=*/0, xbus_addr(XBUS_FIR_MASK_REG), XBUS_PHY_FIR_MASK); + } +} + +void istep_8_10(uint8_t chips) +{ + report_istep(8,10); + + if (chips != 0x01) { + xbus_scominit(/*group=*/0); + xbus_scominit(/*group=*/1); + } +} diff --git a/src/soc/ibm/power9/istep_8_11.c b/src/soc/ibm/power9/istep_8_11.c new file mode 100644 index 00000000000..08ffeb782d8 --- /dev/null +++ b/src/soc/ibm/power9/istep_8_11.c @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include + +#include +#include +#include + +#include "xbus.h" + +static void xbus_enable_ridi(uint8_t chip) +{ + enum { + PERV_NET_CTRL0 = 0x060F0040, + PERV_NET_CTRL0_WOR = 0x060F0042, + }; + + /* Getting NET_CTRL0 register value and checking its CHIPLET_ENABLE bit */ + if (read_scom(chip, xbus_addr(PERV_NET_CTRL0)) & PPC_BIT(0)) { + /* Enable Receivers, Drivers DI1 & DI2 */ + uint64_t val = 0; + val |= PPC_BIT(19); // NET_CTRL0.RI_N = 1 + val |= PPC_BIT(20); // NET_CTRL0.DI1_N = 1 + val |= PPC_BIT(21); // NET_CTRL0.DI2_N = 1 + write_scom(chip, xbus_addr(PERV_NET_CTRL0_WOR), val); + } +} + +void istep_8_11(uint8_t chips) +{ + uint8_t chip; + + report_istep(8,11); + + for (chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) + xbus_enable_ridi(chip); + } +} diff --git a/src/soc/ibm/power9/istep_8_2.c b/src/soc/ibm/power9/istep_8_2.c new file mode 100644 index 00000000000..51a4e6eb740 --- /dev/null +++ b/src/soc/ibm/power9/istep_8_2.c @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include +#include +#include +#include + +#include "fsi.h" + +/* + * 8.2 host_setup_sbe + * + * - Done for all boots - some settings will change based on system type and IPL + * type + * - Set the GP bits to default state + * - Needs to take into account to not change values set up in + * p9_set_clock_term.C procedure + */ + +static void set_fsi_gp_shadow(uint8_t chip) +{ + enum { + PERV_PERV_CTRL0_COPY_FSI = 0x0000291A, + PERV_PERV_CTRL0_TP_OTP_SCOM_FUSED_CORE_MODE = 23, + }; + + uint32_t ctrl0_copy = read_cfam(chip, PERV_PERV_CTRL0_COPY_FSI); + + /* ATTR_FUSED_CORE_MODE (seems to be zero by default) */ + PPC_INSERT(ctrl0_copy, 0, PERV_PERV_CTRL0_TP_OTP_SCOM_FUSED_CORE_MODE, 1); + + write_cfam(chip, PERV_PERV_CTRL0_COPY_FSI, ctrl0_copy); +} + +void istep_8_2(uint8_t chips) +{ + report_istep(8, 2); + + /* Skipping master chip */ + for (uint8_t chip = 1; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) + set_fsi_gp_shadow(chip); + } +} diff --git a/src/soc/ibm/power9/istep_8_3.c b/src/soc/ibm/power9/istep_8_3.c new file mode 100644 index 00000000000..2cfcb871280 --- /dev/null +++ b/src/soc/ibm/power9/istep_8_3.c @@ -0,0 +1,143 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include +#include +#include +#include +#include + +#include "fsi.h" + +/* + * 8.3 host_cbs_start + * + * - Set a bit to start the SBE engine on slave chips. Located in FSI GP region. + * - This same bit performs the scan0 flush of pervasive. + */ + +static void send_fifo_reset(uint8_t chip) +{ + enum { SBE_FIFO_DNFIFO_RESET = 0x00002450 }; + + /* Perform a write to the DNFIFO Reset to cleanup the FIFO */ + write_fsi(chip, SBE_FIFO_DNFIFO_RESET, 0xDEAD); +} + +static void start_cbs(uint8_t chip) +{ + enum { + PERV_SB_MSG_FSI = 0x00002809, + + PERV_CBS_CS_FSI = 0x00002801, + PERV_CBS_CS_START_BOOT_SEQUENCER = 0, + PERV_CBS_CS_OPTION_SKIP_SCAN0_CLOCKSTART = 2, + PERV_CBS_CS_OPTION_PREVENT_SBE_START = 3, + + PERV_SB_CS_FSI = 0x00002808, + PERV_SB_CS_START_RESTART_VECTOR0 = 12, + PERV_SB_CS_START_RESTART_VECTOR1 = 13, + + PERV_CBS_ENVSTAT_FSI = 0x00002804, + PERV_CBS_ENVSTAT_C4_VDN_GPOOD = 2, + + /* Observed Number of times CBS read for CBS_INTERNAL_STATE_VECTOR */ + P9_CFAM_CBS_POLL_COUNT = 20, + /* + * unit is micro seconds [min : 64k x (1/100MHz) = 64k x 10(-8) = 640 us + * max : 64k x (1/50MHz) = 128k x 10(-8) = 1280 us] + */ + P9_CBS_IDLE_HW_US_DELAY = 640, + + CBS_IDLE_VALUE = 0x0002, + + PERV_FSB_FSB_DOWNFIFO_RESET_FSI = 0x00002414, + FIFO_RESET = 0x80000000, + + PERV_FSI2PIB_STATUS_FSI = 0x00001007, + PERV_FSI2PIB_STATUS_VDD_NEST_OBSERVE = 16, + }; + + int poll_count; + uint64_t tmp; + + /* Clear Selfboot message register before every boot */ + write_cfam(chip, PERV_SB_MSG_FSI, 0); + + /* Configure Prevent SBE start option */ + tmp = PPC_SHIFT(read_cfam(chip, PERV_CBS_CS_FSI), 31); + tmp |= PPC_BIT(PERV_CBS_CS_OPTION_PREVENT_SBE_START); + write_cfam(chip, PERV_CBS_CS_FSI, tmp >> 32); + + /* Setup hreset to 0 */ + tmp = PPC_SHIFT(read_cfam(chip, PERV_SB_CS_FSI), 31); + tmp &= ~PPC_BIT(PERV_SB_CS_START_RESTART_VECTOR0); + tmp &= ~PPC_BIT(PERV_SB_CS_START_RESTART_VECTOR1); + write_cfam(chip, PERV_SB_CS_FSI, tmp >> 32); + + /* Check for VDN_PGOOD */ + tmp = PPC_SHIFT(read_cfam(chip, PERV_CBS_ENVSTAT_FSI), 31); + if (!(tmp & PPC_BIT(PERV_CBS_ENVSTAT_C4_VDN_GPOOD))) + die("CBS startup: VDN_PGOOD is OFF, can't proceed\n"); + + /* Reset CFAM Boot Sequencer (CBS) to flush value */ + tmp = PPC_SHIFT(read_cfam(chip, PERV_CBS_CS_FSI), 31); + tmp &= ~PPC_BIT(PERV_CBS_CS_START_BOOT_SEQUENCER); + tmp &= ~PPC_BIT(PERV_CBS_CS_OPTION_SKIP_SCAN0_CLOCKSTART); + write_cfam(chip, PERV_CBS_CS_FSI, tmp >> 32); + + /* Trigger CFAM Boot Sequencer (CBS) to start (no read, we know register's contents) */ + tmp |= PPC_BIT(PERV_CBS_CS_START_BOOT_SEQUENCER); + write_cfam(chip, PERV_CBS_CS_FSI, tmp >> 32); + + for (poll_count = 0; poll_count < P9_CFAM_CBS_POLL_COUNT; poll_count++) { + /* + * PERV_CBS_CS_INTERNAL_STATE_VECTOR_START = 16 + * PERV_CBS_CS_INTERNAL_STATE_VECTOR_LEN = 16 + */ + uint16_t cbs_state = (read_cfam(chip, PERV_CBS_CS_FSI) & 0xFF); + if (cbs_state == CBS_IDLE_VALUE) + break; + + udelay(P9_CBS_IDLE_HW_US_DELAY); + } + + if (poll_count == P9_CFAM_CBS_POLL_COUNT) + die("CBS startup: CBS has not reached idle state!\n"); + + /* Reset FIFO (ATTR_START_CBS_FIFO_RESET_SKIP is set only for some specific test) */ + write_cfam(chip, PERV_FSB_FSB_DOWNFIFO_RESET_FSI, FIFO_RESET); + + /* Setup up hreset (clear -> set -> clear again) */ + tmp = PPC_SHIFT(read_cfam(chip, PERV_SB_CS_FSI), 31); + tmp &= ~PPC_BIT(PERV_SB_CS_START_RESTART_VECTOR0); + write_cfam(chip, PERV_SB_CS_FSI, tmp >> 32); + tmp |= PPC_BIT(PERV_SB_CS_START_RESTART_VECTOR0); + write_cfam(chip, PERV_SB_CS_FSI, tmp >> 32); + tmp &= ~PPC_BIT(PERV_SB_CS_START_RESTART_VECTOR0); + write_cfam(chip, PERV_SB_CS_FSI, tmp >> 32); + + /* Check for VDD status */ + tmp = PPC_SHIFT(read_cfam(chip, PERV_FSI2PIB_STATUS_FSI), 31); + if (!(tmp & PPC_BIT(PERV_FSI2PIB_STATUS_VDD_NEST_OBSERVE))) + die("CBS startup: VDD is OFF!\n"); +} + +void istep_8_3(uint8_t chips) +{ + report_istep(8, 3); + + /* Skipping master chip */ + for (uint8_t chip = 1; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) { + /* + * Before starting the CBS (and thus the SBE) on slave + * procs make sure the SBE FIFO is clean by doing a full + * reset of the FIFO + */ + send_fifo_reset(chip); + start_cbs(chip); + } + } +} diff --git a/src/soc/ibm/power9/istep_8_4.c b/src/soc/ibm/power9/istep_8_4.c new file mode 100644 index 00000000000..2a3521141b5 --- /dev/null +++ b/src/soc/ibm/power9/istep_8_4.c @@ -0,0 +1,110 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include +#include +#include +#include + +#include "fsi.h" + +/* + * 8.4 proc_check_slave_sbe_seeprom_complete : Check Slave SBE Complete + * + * - Check to make sure that the slave SBE engines have completed their IPL + * - FW will poll for up to 1 second to see if the "done" signature is in + * the status reg (not tied to istep number) + * - If "done" signature is not found then FW must extract FFDC from the SBE + */ + +static bool sbe_run_extract_msg_reg(uint8_t chip) +{ + enum { + PERV_SB_MSG_FSI = 0x00002809, + + /* SBE is in its operational (runtime) state */ + SBE_STATE_RUNTIME = 0x4, + + /* + * Much higher frequency of polling buys us about 333ms here. + * Can also wait with second precision at first (4 seconds) as SBE boots in + * 4.7s every time. + */ + SBE_RETRY_TIMEOUT_HW_MS = 60 * 1000, + SBE_RETRY_NUM_LOOPS = 60 * 100, // 100 times per second + }; + + /* Each sbe gets 60s to respond with the fact that it's booted and at + * runtime (stable state). */ + uint64_t SBE_WAIT_SLEEP_MS = (SBE_RETRY_TIMEOUT_HW_MS / SBE_RETRY_NUM_LOOPS); + + /* + * Layout of the register: + * [0] = SBE control loop initialized + * [1] = async FFDC present on SBE + * [2-3] = reserved + * [4-7] = previous SBE state + * [8-11] = current SBE state + * [12-19] = last major istep executed by the SBE + * [20-25] = last minor istep executed by the SBE + * [26-31] = reserved + */ + uint32_t msg_reg; + + for (uint64_t i = 0; i < SBE_RETRY_NUM_LOOPS; i++) { + uint8_t curr_state; + + msg_reg = read_cfam(chip, PERV_SB_MSG_FSI); + + curr_state = (msg_reg >> 20) & 0xF; + if (curr_state == SBE_STATE_RUNTIME) + return true; + + /* Check async FFDC bit (indicates SBE is failing to boot) */ + if (msg_reg & (1 << 30)) + break; + + if ((i * SBE_WAIT_SLEEP_MS) % 1000 == 0) + printk(BIOS_NOTICE, "SBE for chip #%d is booting...\n", chip); + + /* Hostboot resets watchdog before sleeping, we might want to + do it too or just increase timer after experimenting */ + mdelay(SBE_WAIT_SLEEP_MS); + } + + /* We reach this line only if something is wrong with SBE */ + + printk(BIOS_ERR, "Message register: 0x%08x\n", msg_reg); + + if (msg_reg & (1 << 30)) + printk(BIOS_ERR, "SBE reports an error.\n"); + else + printk(BIOS_ERR, "SBE takes too long to boot.\n"); + + printk(BIOS_ERR, "SBE for chip #%d failed to boot!\n", chip); + + /* If SBE did boot (started its control loop) and then failed, can read + some debug information from it (p9_extract_sbe_rc() in Hostboot) */ + + /* + * Might want to restart SBE here if boot failure is something that can + * happen under normal circumstances. Hostboot gives current SBE side + * two tries, switches sides and gives up if it also fails twice. + */ + + return false; +} + +void istep_8_4(uint8_t chips) +{ + report_istep(8, 4); + + /* Skipping master chip */ + for (uint8_t chip = 1; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) { + if (!sbe_run_extract_msg_reg(chip)) + die("SBE for chip #%d did not boot properly.\n", chip); + } + } +} diff --git a/src/soc/ibm/power9/istep_8_9.c b/src/soc/ibm/power9/istep_8_9.c new file mode 100644 index 00000000000..6e2abafc8df --- /dev/null +++ b/src/soc/ibm/power9/istep_8_9.c @@ -0,0 +1,340 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include + +#include +#include +#include +#include +#include + +#include "xbus.h" + +/* + * This code omits initialization of OBus which isn't present. It also assumes + * there is only one XBus (X1). Both of these statements are true for Nimbus + * Sforza. + * + * For consistency with Hostboot some read values are unused, written + * unmodified or ANDed with 0, this simplifies verification that the code + * operates correctly by comparing against Hostboot logs. + */ + +static void p9_fbc_no_hp_scom(bool is_xbus_active, uint8_t chip) +{ + enum { + /* Power Bus PB West Mode Configuration Register */ + PB_WEST_MODE = 0x501180A, + /* Power Bus PB CENT Mode Register */ + PB_CENT_MODE = 0x5011C0A, + /* Power Bus PB CENT GP command RATE DP0 Register */ + PB_CENT_GP_CMD_RATE_DP0 = 0x5011C26, + /* Power Bus PB CENT GP command RATE DP1 Register */ + PB_CENT_GP_CMD_RATE_DP1 = 0x5011C27, + /* Power Bus PB CENT RGP command RATE DP0 Register */ + PB_CENT_RGP_CMD_RATE_DP0 = 0x5011C28, + /* Power Bus PB CENT RGP command RATE DP1 Register */ + PB_CENT_RGP_CMD_RATE_DP1 = 0x5011C29, + /* Power Bus PB CENT SP command RATE DP0 Register */ + PB_CENT_SP_CMD_RATE_DP0 = 0x5011C2A, + /* Power Bus PB CENT SP command RATE DP1 Register */ + PB_CENT_SP_CMD_RATE_DP1 = 0x5011C2B, + /* Power Bus PB East Mode Configuration Register */ + PB_EAST_MODE = 0x501200A, + + PB_CFG_CHIP_IS_SYSTEM = 4, + + PB_CFG_SP_HW_MARK = 16, + PB_CFG_SP_HW_MARK_LEN = 7, + + PB_CFG_GP_HW_MARK = 23, + PB_CFG_GP_HW_MARK_LEN = 7, + + PB_CFG_LCL_HW_MARK = 30, + PB_CFG_LCL_HW_MARK_LEN = 6, + }; + + /* + * ATTR_PROC_FABRIC_X_LINKS_CNFG + * Number of active XBus links: 1 for two CPUs, 0 for one CPU. + */ + const int num_x_links_cfg = (is_xbus_active ? 1 : 0); + + uint64_t pb_west_mode, pb_cent_mode, pb_east_mode; + uint64_t pb_cent_rgp_cmd_rate_dp0, pb_cent_rgp_cmd_rate_dp1; + uint64_t pb_cent_sp_cmd_rate_dp0, pb_cent_sp_cmd_rate_dp1; + + pb_west_mode = read_scom(chip, PB_WEST_MODE); + PPC_INSERT(pb_west_mode, (num_x_links_cfg == 0), PB_CFG_CHIP_IS_SYSTEM, 1); + PPC_INSERT(pb_west_mode, 0x3F, PB_CFG_SP_HW_MARK, PB_CFG_SP_HW_MARK_LEN); + PPC_INSERT(pb_west_mode, 0x3F, PB_CFG_GP_HW_MARK, PB_CFG_GP_HW_MARK_LEN); + PPC_INSERT(pb_west_mode, 0x2A, PB_CFG_LCL_HW_MARK, PB_CFG_LCL_HW_MARK_LEN); + write_scom(chip, PB_WEST_MODE, pb_west_mode); + + pb_cent_mode = read_scom(chip, PB_CENT_MODE); + PPC_INSERT(pb_cent_mode, (num_x_links_cfg == 0), PB_CFG_CHIP_IS_SYSTEM, 1); + PPC_INSERT(pb_cent_mode, 0x3F, PB_CFG_SP_HW_MARK, PB_CFG_SP_HW_MARK_LEN); + PPC_INSERT(pb_cent_mode, 0x3F, PB_CFG_GP_HW_MARK, PB_CFG_GP_HW_MARK_LEN); + PPC_INSERT(pb_cent_mode, 0x2A, PB_CFG_LCL_HW_MARK, PB_CFG_LCL_HW_MARK_LEN); + write_scom(chip, PB_CENT_MODE, pb_cent_mode); + + scom_and(chip, PB_CENT_GP_CMD_RATE_DP0, 0); + scom_and(chip, PB_CENT_GP_CMD_RATE_DP1, 0); + + (void)read_scom(chip, PB_CENT_RGP_CMD_RATE_DP0); + pb_cent_rgp_cmd_rate_dp0 = (num_x_links_cfg == 0 ? 0 : 0x030406080A0C1218); + write_scom(chip, PB_CENT_RGP_CMD_RATE_DP0, pb_cent_rgp_cmd_rate_dp0); + + (void)read_scom(chip, PB_CENT_RGP_CMD_RATE_DP1); + pb_cent_rgp_cmd_rate_dp1 = (num_x_links_cfg == 0 ? 0 : 0x040508080A0C1218); + write_scom(chip, PB_CENT_RGP_CMD_RATE_DP1, pb_cent_rgp_cmd_rate_dp1); + + pb_cent_sp_cmd_rate_dp0 = read_scom(chip, PB_CENT_SP_CMD_RATE_DP0); + pb_cent_sp_cmd_rate_dp0 = (num_x_links_cfg == 0 ? 0 : 0x030406080A0C1218); + write_scom(chip, PB_CENT_SP_CMD_RATE_DP0, pb_cent_sp_cmd_rate_dp0); + + pb_cent_sp_cmd_rate_dp1 = read_scom(chip, PB_CENT_SP_CMD_RATE_DP1); + pb_cent_sp_cmd_rate_dp1 = (num_x_links_cfg == 0 ? 0 : 0x030406080A0C1218); + write_scom(chip, PB_CENT_SP_CMD_RATE_DP1, pb_cent_sp_cmd_rate_dp1); + + pb_east_mode = read_scom(chip, PB_EAST_MODE); + PPC_INSERT(pb_east_mode, (num_x_links_cfg == 0), PB_CFG_CHIP_IS_SYSTEM, 1); + PPC_INSERT(pb_east_mode, 0x3F, PB_CFG_SP_HW_MARK, PB_CFG_SP_HW_MARK_LEN); + PPC_INSERT(pb_east_mode, 0x3F, PB_CFG_GP_HW_MARK, PB_CFG_GP_HW_MARK_LEN); + PPC_INSERT(pb_east_mode, 0x2A, PB_CFG_LCL_HW_MARK, PB_CFG_LCL_HW_MARK_LEN); + write_scom(chip, PB_EAST_MODE, pb_east_mode); +} + +static void p9_fbc_ioe_tl_scom(bool is_xbus_active, uint8_t chip) +{ + enum { + /* Processor bus Electrical Framer/Parser 01 configuration register */ + PB_FP01_CFG = 0x501340A, + /* Power Bus Electrical Framer/Parser 23 Configuration Register */ + PB_FP23_CFG = 0x501340B, + /* Power Bus Electrical Framer/Parser 45 Configuration Register */ + PB_FP45_CFG = 0x501340C, + /* Power Bus Electrical Link Data Buffer 01 Configuration Register */ + PB_ELINK_DATA_01_CFG_REG = 0x5013410, + /* Power Bus Electrical Link Data Buffer 23 Configuration Register */ + PB_ELINK_DATA_23_CFG_REG = 0x5013411, + /* Power Bus Electrical Link Data Buffer 45 Configuration Register */ + PB_ELINK_DATA_45_CFG_REG = 0x5013412, + /* Power Bus Electrical Miscellaneous Configuration Register */ + PB_MISC_CFG = 0x5013423, + /* Power Bus Electrical Link Trace Configuration Register */ + PB_TRACE_CFG = 0x5013424, + + FP0_FMR_DISABLE = 20, + FP0_PRS_DISABLE = 25, + FP1_FMR_DISABLE = 52, + FP1_PRS_DISABLE = 57, + + FP2_FMR_DISABLE = 20, + FP2_PRS_DISABLE = 25, + FP3_FMR_DISABLE = 52, + FP3_PRS_DISABLE = 57, + + FP4_FMR_DISABLE = 20, + FP4_PRS_DISABLE = 25, + FP5_FMR_DISABLE = 52, + FP5_PRS_DISABLE = 57, + + IOE01_IS_LOGICAL_PAIR = 0, + IOE23_IS_LOGICAL_PAIR = 1, + IOE45_IS_LOGICAL_PAIR = 2, + }; + + /* + * According to schematics we only support one XBus with + * ATTR_PROC_FABRIC_X_ATTACHED_CHIP_CNFG = { false, true, false } + * Meaning that X1 is present and X0 and X2 aren't. + */ + + const uint64_t pb_freq_mhz = powerbus_cfg(chip)->fabric_freq; + + const uint64_t dd2_lo_limit_d = (FREQ_X_MHZ * 10); + const uint64_t dd2_lo_limit_n = pb_freq_mhz * 82; + + uint64_t pb_fp01_cfg, pb_fp23_cfg, pb_fp45_cfg; + uint64_t pb_elink_data_23_cfg_reg; + uint64_t pb_misc_cfg, pb_trace_cfg; + + pb_fp01_cfg = read_scom(chip, PB_FP01_CFG); + pb_fp01_cfg |= PPC_BIT(FP0_FMR_DISABLE); + pb_fp01_cfg |= PPC_BIT(FP0_PRS_DISABLE); + pb_fp01_cfg |= PPC_BIT(FP1_FMR_DISABLE); + pb_fp01_cfg |= PPC_BIT(FP1_PRS_DISABLE); + write_scom(chip, PB_FP01_CFG, pb_fp01_cfg); + + pb_fp23_cfg = read_scom(chip, PB_FP23_CFG); + + PPC_INSERT(pb_fp23_cfg, !is_xbus_active, FP2_FMR_DISABLE, 1); + PPC_INSERT(pb_fp23_cfg, !is_xbus_active, FP2_PRS_DISABLE, 1); + PPC_INSERT(pb_fp23_cfg, !is_xbus_active, FP3_FMR_DISABLE, 1); + PPC_INSERT(pb_fp23_cfg, !is_xbus_active, FP3_PRS_DISABLE, 1); + + if (is_xbus_active) { + PPC_INSERT(pb_fp23_cfg, 0x01, 22, 2); + PPC_INSERT(pb_fp23_cfg, 0x20, 12, 8); + PPC_INSERT(pb_fp23_cfg, 0x15 - (dd2_lo_limit_n / dd2_lo_limit_d), 4, 8); + PPC_INSERT(pb_fp23_cfg, 0x20, 44, 8); + PPC_INSERT(pb_fp23_cfg, 0x15 - (dd2_lo_limit_n / dd2_lo_limit_d), 36, 8); + } + + write_scom(chip, PB_FP23_CFG, pb_fp23_cfg); + + pb_fp45_cfg = read_scom(chip, PB_FP45_CFG); + pb_fp45_cfg |= PPC_BIT(FP4_FMR_DISABLE); + pb_fp45_cfg |= PPC_BIT(FP4_PRS_DISABLE); + pb_fp45_cfg |= PPC_BIT(FP5_FMR_DISABLE); + pb_fp45_cfg |= PPC_BIT(FP5_PRS_DISABLE); + write_scom(chip, PB_FP45_CFG, pb_fp45_cfg); + + write_scom(chip, PB_ELINK_DATA_01_CFG_REG, read_scom(chip, PB_ELINK_DATA_01_CFG_REG)); + + pb_elink_data_23_cfg_reg = read_scom(chip, PB_ELINK_DATA_23_CFG_REG); + if (is_xbus_active) { + PPC_INSERT(pb_elink_data_23_cfg_reg, 0x1F, 24, 5); + PPC_INSERT(pb_elink_data_23_cfg_reg, 0x40, 1, 7); + PPC_INSERT(pb_elink_data_23_cfg_reg, 0x40, 33, 7); + PPC_INSERT(pb_elink_data_23_cfg_reg, 0x3C, 9, 7); + PPC_INSERT(pb_elink_data_23_cfg_reg, 0x3C, 41, 7); + PPC_INSERT(pb_elink_data_23_cfg_reg, 0x3C, 17, 7); + PPC_INSERT(pb_elink_data_23_cfg_reg, 0x3C, 49, 7); + } + write_scom(chip, PB_ELINK_DATA_23_CFG_REG, pb_elink_data_23_cfg_reg); + + write_scom(chip, PB_ELINK_DATA_45_CFG_REG, read_scom(chip, PB_ELINK_DATA_45_CFG_REG)); + + pb_misc_cfg = read_scom(chip, PB_MISC_CFG); + PPC_INSERT(pb_misc_cfg, 0x00, IOE01_IS_LOGICAL_PAIR, 1); + PPC_INSERT(pb_misc_cfg, is_xbus_active, IOE23_IS_LOGICAL_PAIR, 1); + PPC_INSERT(pb_misc_cfg, 0x00, IOE45_IS_LOGICAL_PAIR, 1); + write_scom(chip, PB_MISC_CFG, pb_misc_cfg); + + pb_trace_cfg = read_scom(chip, PB_TRACE_CFG); + if (is_xbus_active) { + PPC_INSERT(pb_trace_cfg, 0x4, 16, 4); + PPC_INSERT(pb_trace_cfg, 0x4, 24, 4); + PPC_INSERT(pb_trace_cfg, 0x1, 20, 4); + PPC_INSERT(pb_trace_cfg, 0x1, 28, 4); + } + write_scom(chip, PB_TRACE_CFG, pb_trace_cfg); +} + +static void p9_fbc_ioe_dl_scom(uint8_t chip) +{ + enum { + /* ELL Configuration Register */ + IOEL_CONFIG = 0x601180A, + /* ELL Replay Threshold Register */ + IOEL_REPLAY_THRESHOLD = 0x6011818, + /* ELL SL ECC Threshold Register */ + IOEL_SL_ECC_THRESHOLD = 0x6011819, + + LL1_CONFIG_LINK_PAIR = 0, + LL1_CONFIG_CRC_LANE_ID = 2, + LL1_CONFIG_SL_UE_CRC_ERR = 4, + }; + + /* ATTR_LINK_TRAIN == fapi2::ENUM_ATTR_LINK_TRAIN_BOTH (from logs) */ + + uint64_t ioel_config, ioel_replay_threshold, ioel_sl_ecc_threshold; + + ioel_config = read_scom(chip, xbus_addr(IOEL_CONFIG)); + ioel_config |= PPC_BIT(LL1_CONFIG_LINK_PAIR); + ioel_config |= PPC_BIT(LL1_CONFIG_CRC_LANE_ID); + ioel_config |= PPC_BIT(LL1_CONFIG_SL_UE_CRC_ERR); + PPC_INSERT(ioel_config, 0xF, 11, 5); + PPC_INSERT(ioel_config, 0xF, 28, 4); + write_scom(chip, xbus_addr(IOEL_CONFIG), ioel_config); + + ioel_replay_threshold = read_scom(chip, xbus_addr(IOEL_REPLAY_THRESHOLD)); + PPC_INSERT(ioel_replay_threshold, 0x7, 8, 3); + PPC_INSERT(ioel_replay_threshold, 0xF, 4, 4); + PPC_INSERT(ioel_replay_threshold, 0x6, 0, 4); + write_scom(chip, xbus_addr(IOEL_REPLAY_THRESHOLD), ioel_replay_threshold); + + ioel_sl_ecc_threshold = read_scom(chip, xbus_addr(IOEL_SL_ECC_THRESHOLD)); + PPC_INSERT(ioel_sl_ecc_threshold, 0x7, 8, 3); + PPC_INSERT(ioel_sl_ecc_threshold, 0xF, 4, 4); + PPC_INSERT(ioel_sl_ecc_threshold, 0x7, 0, 4); + write_scom(chip, xbus_addr(IOEL_SL_ECC_THRESHOLD), ioel_sl_ecc_threshold); +} + +static void chiplet_fabric_scominit(bool is_xbus_active, uint8_t chip) +{ + enum { + PU_PB_CENT_SM0_FIR_REG = 0x05011C00, + PU_PB_CENT_SM0_FIR_MASK_REG_SPARE_13 = 13, + + PU_PB_IOE_FIR_ACTION0_REG = 0x05013406, + FBC_IOE_TL_FIR_ACTION0 = 0x0000000000000000, + + PU_PB_IOE_FIR_ACTION1_REG = 0x05013407, + FBC_IOE_TL_FIR_ACTION1 = 0x0049000000000000, + + PU_PB_IOE_FIR_MASK_REG = 0x05013403, + FBC_IOE_TL_FIR_MASK = 0xFF24F0303FFFF11, + FBC_IOE_TL_FIR_MASK_X0_NF = 0x00C00C0C00000880, + FBC_IOE_TL_FIR_MASK_X2_NF = 0x000300C0C0000220, + + XBUS_LL0_IOEL_FIR_ACTION0_REG = 0x06011806, + FBC_IOE_DL_FIR_ACTION0 = 0x0000000000000000, + + XBUS_LL0_IOEL_FIR_ACTION1_REG = 0x06011807, + FBC_IOE_DL_FIR_ACTION1 = 0x0303C00000001FFC, + + XBUS_LL0_IOEL_FIR_MASK_REG = 0x06011803, + FBC_IOE_DL_FIR_MASK = 0xFCFC3FFFFFFFE003, + }; + + bool init_firs; + uint64_t fbc_cent_fir; + + /* Apply FBC non-hotplug initfile */ + p9_fbc_no_hp_scom(is_xbus_active, chip); + + /* Setup IOE (XBUS FBC IO) TL SCOMs */ + p9_fbc_ioe_tl_scom(is_xbus_active, chip); + + /* TL/DL FIRs are configured by us only if not already setup by SBE */ + fbc_cent_fir = read_scom(chip, PU_PB_CENT_SM0_FIR_REG); + init_firs = !(fbc_cent_fir & PPC_BIT(PU_PB_CENT_SM0_FIR_MASK_REG_SPARE_13)); + + if (init_firs) { + uint64_t fir_mask; + + write_scom(chip, PU_PB_IOE_FIR_ACTION0_REG, FBC_IOE_TL_FIR_ACTION0); + write_scom(chip, PU_PB_IOE_FIR_ACTION1_REG, FBC_IOE_TL_FIR_ACTION1); + + fir_mask = FBC_IOE_TL_FIR_MASK + | FBC_IOE_TL_FIR_MASK_X0_NF + | FBC_IOE_TL_FIR_MASK_X2_NF; + write_scom(chip, PU_PB_IOE_FIR_MASK_REG, fir_mask); + } + + /* Setup IOE (XBUS FBC IO) DL SCOMs */ + p9_fbc_ioe_dl_scom(chip); + + if (init_firs) { + write_scom(chip, xbus_addr(XBUS_LL0_IOEL_FIR_ACTION0_REG), + FBC_IOE_DL_FIR_ACTION0); + write_scom(chip, xbus_addr(XBUS_LL0_IOEL_FIR_ACTION1_REG), + FBC_IOE_DL_FIR_ACTION1); + write_scom(chip, xbus_addr(XBUS_LL0_IOEL_FIR_MASK_REG), + FBC_IOE_DL_FIR_MASK); + } +} + +void istep_8_9(uint8_t chips) +{ + report_istep(8,9); + + /* Not skipping master chip and initializing it even if we don't have a second chip */ + for (uint8_t chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) + chiplet_fabric_scominit(/*is_xbus_active=*/chips == 0x03, chip); + } +} diff --git a/src/soc/ibm/power9/istep_9_2.c b/src/soc/ibm/power9/istep_9_2.c new file mode 100644 index 00000000000..a16cac2dcd4 --- /dev/null +++ b/src/soc/ibm/power9/istep_9_2.c @@ -0,0 +1,313 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include + +#include +#include +#include +#include + +#include "xbus.h" + +struct edip_data_t { + uint32_t en_margin_pu; + uint32_t en_margin_pd; + uint32_t en_main; + uint32_t sel_pre; +}; + +static void compute_margin_data(uint32_t margin, struct edip_data_t *d) +{ + /* ATTR_IO_XBUS_TX_FFE_PRECURSOR = 6 (default, talos.xml) */ + const uint8_t ffe_pre_coef = 6; + + /* Need to convert the 8R value to a 4R equivalent */ + + const uint32_t val = margin >> 1; + const uint32_t en_pre = 18; + + uint32_t val_4r = val - en_pre; + + d->en_margin_pu = 32; + d->en_margin_pd = 32; + d->en_main = 0; + d->sel_pre = 0; + + if (val_4r < 64) { + if (val_4r % 4 != 0) { + d->en_main = 2; + val_4r -= d->en_main; + } + d->en_margin_pd = val_4r / 2; + d->en_margin_pu = val_4r - d->en_margin_pd; + } + + d->en_main += val_4r - d->en_margin_pu - d->en_margin_pd; + d->en_main = MIN(d->en_main, 50); + d->sel_pre = (val * ffe_pre_coef) / 128; + d->sel_pre = MIN(d->sel_pre, en_pre); +} + +/* Converts a 4R decimal value to a 1R thermometer code */ +static uint32_t convert_4r(uint32_t val_4r) +{ + /* + * 1. Add 2 for averaging since we will truncate the last 2 bits. + * 2. Divide by 4 to bring back to a 1r value. + * 3. Convert the decimal number to number of bits set by shifting a 0x1 + * over by the amount and subtracting 1. + */ + return (0x1 << ((val_4r + 2) / 4)) - 1; +} + +static uint32_t convert_4r_with_2r(uint32_t val_4r, uint8_t width) +{ + /* Add 1 for rounding, then shift the 4r bit off. We now have a 2r equivalent */ + const uint32_t val_2r = (val_4r + 1) >> 1; + + /* If the LSB of the 2r equivalent is on, then we need to set the 2r bit (MSB) */ + const uint32_t on_2r = val_2r & 0x1; + + /* Shift the 2r equivalent to a 1r value and convert to a thermometer code */ + const uint32_t val_1r = (1 << (val_2r >> 0x1)) - 1; + + /* Combine 1r equivalent thermometer code + the 2r MSB value */ + return (on_2r << (width - 1)) | val_1r; +} + +static void config_run_bus_group_mode(uint8_t chip, int group) +{ + enum { + P9A_XBUS_TX_IMPCAL_PVAL_PB = 0x800F140006010C3F, + P9A_XBUS_TX_IMPCAL_NVAL_PB = 0x800F0C0006010C3F, + }; + + /* ATTR_IO_XBUS_TX_MARGIN_RATIO = 0 (default) */ + const uint8_t margin_ratio = 0; + + const uint8_t PRE_WIDTH = 5; + /* 4R Total = (1R * 4) + (2R * 2); */ + const uint32_t PRE_4R_TOTAL = ( 4 * 4) + ( 1 * 2); + + const uint64_t offset = group * XBUS_LINK_GROUP_OFFSET; + + /* Same registers are read for both groups */ + uint32_t pval = (read_scom(chip, xbus_addr(P9A_XBUS_TX_IMPCAL_PVAL_PB)) >> 7) & 0x1FF; + uint32_t nval = (read_scom(chip, xbus_addr(P9A_XBUS_TX_IMPCAL_NVAL_PB)) >> 7) & 0x1FF; + + uint32_t sel_margin_pu; + uint32_t sel_margin_pd; + + struct edip_data_t p; + struct edip_data_t n; + + uint64_t val; + + compute_margin_data(pval, &p); + compute_margin_data(nval, &n); + + sel_margin_pu = (pval * margin_ratio) / 256; + sel_margin_pu = MIN(sel_margin_pu, MIN(p.en_margin_pu, n.en_margin_pu)); + + sel_margin_pd = (nval * margin_ratio) / 256; + sel_margin_pd = MIN(sel_margin_pd, + MIN(p.en_margin_pd, MIN(n.en_margin_pd, sel_margin_pu))); + + val = read_scom(chip, xbus_addr(0x800D340006010C3F + offset)); + + /* EDIP_TX_PSEG_PRE_EN (pre bank pseg enable) */ + PPC_INSERT(val, convert_4r_with_2r(PRE_4R_TOTAL, PRE_WIDTH), 51, 5); + /* EDIP_TX_PSEG_PRE_SEL (pre bank pseg mode selection) */ + PPC_INSERT(val, convert_4r_with_2r(p.sel_pre, PRE_WIDTH), 56, 5); + + write_scom(chip, xbus_addr(0x800D340006010C3F + offset), val); + val = read_scom(chip, xbus_addr(0x800D3C0006010C3F + offset)); + + /* EDIP_TX_NSEG_PRE_EN (pre bank nseg enable) */ + PPC_INSERT(val, convert_4r_with_2r(PRE_4R_TOTAL, PRE_WIDTH), 51, 5); + /* EDIP_TX_NSEG_PRE_SEL (pre bank nseg mode selection) */ + PPC_INSERT(val, convert_4r_with_2r(n.sel_pre, PRE_WIDTH), 56, 5); + + write_scom(chip, xbus_addr(0x800D3C0006010C3F + offset), val); + val = read_scom(chip, xbus_addr(0x800D440006010C3F + offset)); + + /* EDIP_TX_PSEG_MARGINPD_EN (margin pull-down bank pseg enable) */ + PPC_INSERT(val, convert_4r(p.en_margin_pd), 56, 8); + /* EDIP_TX_PSEG_MARGINPU_EN (margin pull-up bank pseg enable) */ + PPC_INSERT(val, convert_4r(p.en_margin_pu), 48, 8); + + write_scom(chip, xbus_addr(0x800D440006010C3F + offset), val); + val = read_scom(chip, xbus_addr(0x800D4C0006010C3F + offset)); + + /* EDIP_TX_NSEG_MARGINPD_EN (margin pull-down bank nseg enable) */ + PPC_INSERT(val, convert_4r(n.en_margin_pd), 56, 8); + /* EDIP_TX_NSEG_MARGINPU_EN (margin pull-up bank nseg enable) */ + PPC_INSERT(val, convert_4r(n.en_margin_pu), 48, 8); + + write_scom(chip, xbus_addr(0x800D4C0006010C3F + offset), val); + val = read_scom(chip, xbus_addr(0x800D540006010C3F + offset)); + + /* EDIP_TX_MARGINPD_SEL (margin pull-down bank mode selection) */ + PPC_INSERT(val, convert_4r(sel_margin_pd), 56, 8); + /* EDIP_TX_MARGINPU_SEL (margin pull-up bank mode selection) */ + PPC_INSERT(val, convert_4r(sel_margin_pu), 48, 8); + + write_scom(chip, xbus_addr(0x800D540006010C3F + offset), val); + + /* EDIP_TX_PSEG_MAIN_EN (main bank pseg enable) */ + val = read_scom(chip, xbus_addr(0x800D5C0006010C3F + offset)); + PPC_INSERT(val, convert_4r_with_2r(p.en_main, 13), 51, 13); + write_scom(chip, xbus_addr(0x800D5C0006010C3F + offset), val); + + /* EDIP_TX_NSEG_MAIN_EN (main bank nseg enable) */ + val = read_scom(chip, xbus_addr(0x800D640006010C3F + offset)); + PPC_INSERT(val, convert_4r_with_2r(n.en_main, 13), 51, 13); + write_scom(chip, xbus_addr(0x800D640006010C3F + offset), val); +} + +static void config_run_bus_mode(uint8_t chip) +{ + enum { + P9A_XBUS_TX_IMPCAL_PB = 0x800F040006010C3F, + EDIP_TX_ZCAL_DONE = 50, + EDIP_TX_ZCAL_ERROR = 51, + }; + + long time; + + /* Set EDIP_TX_ZCAL_REQ to start Tx Impedance Calibration */ + scom_or(chip, xbus_addr(P9A_XBUS_TX_IMPCAL_PB), PPC_BIT(49)); + mdelay(20); + + time = wait_us(200 * 10, read_scom(chip, xbus_addr(P9A_XBUS_TX_IMPCAL_PB)) & + (PPC_BIT(EDIP_TX_ZCAL_DONE) | PPC_BIT(EDIP_TX_ZCAL_ERROR))); + if (!time) + die("Timed out waiting for I/O EDI+ Xbus Tx Z Calibration\n"); + + if (read_scom(chip, xbus_addr(P9A_XBUS_TX_IMPCAL_PB)) & PPC_BIT(EDIP_TX_ZCAL_ERROR)) + die("I/O EDI+ Xbus Tx Z Calibration failed\n"); + + config_run_bus_group_mode(chip, /*group=*/0); + config_run_bus_group_mode(chip, /*group=*/1); +} + +static void rx_dc_calibration_start(uint8_t chip, int group) +{ + const uint64_t offset = group * XBUS_LINK_GROUP_OFFSET; + + /* Must set lane invalid bit to 0 to run rx dccal, this enables us to + * run dccal on the specified lane. These bits are normally set by + * wiretest although we are not running that now. */ + for (int i = 0; i < XBUS_LANE_COUNT; i++) { + uint64_t lane_offset = PPC_PLACE(i, 27, 5); + /* EDIP_RX_LANE_INVALID */ + scom_and(chip, xbus_addr(0x8002400006010C3F | offset | lane_offset), + ~PPC_BIT(50)); + } + + /* Start Cleanup Pll */ + + /* + * EDIP_RX_CTL_CNTL4_E_PG + * 50 - EDIP_RX_WT_PLL_REFCLKSEL (0 - io clock, 1 - bist) + * 51 - EDIP_RX_PLL_REFCLKSEL_SCOM_EN (0 - pll controls selects refclk, + * 1 - and gcr register does it) + */ + scom_or(chip, xbus_addr(0x8009F80006010C3F + offset), PPC_BIT(50) | PPC_BIT(51)); + udelay(150); + + /* + * EDIP_RX_CTL_CNTL4_E_PG + * 48 - EDIP_RX_WT_CU_PLL_PGOOD (0 - places rx pll in reset, + * 1 - sets pgood on rx pll for locking) + */ + scom_or(chip, xbus_addr(0x8009F80006010C3F + offset), PPC_BIT(48)); + udelay(5); + + /* + * EDIP_RX_DC_CALIBRATE_DONE + * (when this bit is read as a 1, the dc calibration steps have been completed) + */ + scom_and(chip, xbus_addr(0x800A380006010C3F + offset), ~PPC_BIT(53)); + + /* + * EDIP_RX_START_DC_CALIBRATE + * (when this register is written to a 1 the training state machine will run the dc + * calibrate substeps defined in eye optimizations) + */ + scom_or(chip, xbus_addr(0x8009F00006010C3F + offset), PPC_BIT(53)); +} + +static void rx_dc_calibration_poll(uint8_t chip, int group) +{ + const uint64_t offset = group * XBUS_LINK_GROUP_OFFSET; + + long time; + + /* + * EDIP_RX_DC_CALIBRATE_DONE + * (when this bit is read as a 1, the dc calibration steps have been completed) + */ + time = wait_ms(200 * 10, + read_scom(chip, xbus_addr(0x800A380006010C3F + offset)) & PPC_BIT(53)); + if (!time) + die("Timed out waiting for Rx Dc Calibration\n"); + + /* + * EDIP_RX_START_DC_CALIBRATE + * (when this register is written to a 1 the training state machine will run the dc + * calibrate substeps defined in eye optimizations) + */ + scom_and(chip, xbus_addr(0x8009F00006010C3F + offset), ~PPC_BIT(53)); + + /* + * EDIP_RX_CTL_CNTL4_E_PG + * 48 - EDIP_RX_WT_CU_PLL_PGOOD (0 - places rx pll in reset, + * 1 - sets pgood on rx pll for locking) + * 50 - EDIP_RX_WT_PLL_REFCLKSEL (0 - io clock, 1 - bist) + * 51 - EDIP_RX_PLL_REFCLKSEL_SCOM_EN (0 - pll controls selects refclk, + * 1 - and gcr register does it) + */ + scom_and(chip, xbus_addr(0x8009F80006010C3F + offset), + ~(PPC_BIT(48) | PPC_BIT(50) | PPC_BIT(51))); + udelay(111); + + /* Restore the invalid bits, Wiretest will modify these as training is run */ + for (int i = 0; i < XBUS_LANE_COUNT; i++) { + uint64_t lane_offset = PPC_PLACE(i, 27, 5); + /* EDIP_RX_LANE_INVALID */ + scom_or(chip, xbus_addr(0x8002400006010C3F | offset | lane_offset), + PPC_BIT(50)); + } +} + +static void config_bus_mode(void) +{ + /* Initiate Dc calibration in parallel */ + rx_dc_calibration_start(/*chip=*/0, /*group=*/0); + rx_dc_calibration_start(/*chip=*/1, /*group=*/0); + rx_dc_calibration_start(/*chip=*/0, /*group=*/1); + rx_dc_calibration_start(/*chip=*/1, /*group=*/1); + + /* HB does this delay inside rx_dc_calibration_poll(), but doing it + * once instead of four times should be enough */ + mdelay(100); + + /* Then wait for each combination of chip and group */ + rx_dc_calibration_poll(/*chip=*/0, /*group=*/0); + rx_dc_calibration_poll(/*chip=*/1, /*group=*/0); + rx_dc_calibration_poll(/*chip=*/0, /*group=*/1); + rx_dc_calibration_poll(/*chip=*/1, /*group=*/1); +} + +void istep_9_2(uint8_t chips) +{ + report_istep(9,2); + + if (chips != 0x01) { + config_run_bus_mode(/*chip=*/0); + config_run_bus_mode(/*chip=*/1); + + config_bus_mode(); + } +} diff --git a/src/soc/ibm/power9/istep_9_4.c b/src/soc/ibm/power9/istep_9_4.c new file mode 100644 index 00000000000..48fac5d49d6 --- /dev/null +++ b/src/soc/ibm/power9/istep_9_4.c @@ -0,0 +1,113 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include + +#include +#include +#include + +#include "xbus.h" + +static void tx_serializer_sync_power_on(uint8_t master_chip, uint8_t slave_chip, int group) +{ + const uint64_t offset = group * XBUS_LINK_GROUP_OFFSET; + + /* + * EDIP_TX_CLK_UNLOAD_CLK_DISABLE + * (set to 0 to clock off sync logic on the clock slice and save power; + * it should not be necessary to use the sync logic on the clock slice + * since it has no fifo but control is available just in case) + */ + scom_and(master_chip, xbus_addr(0x800C1C0006010C3F + offset), ~PPC_BIT(50)); + scom_and(slave_chip, xbus_addr(0x800C1C0006010C3F + offset), ~PPC_BIT(50)); + + /* + * EDIP_TX_CLK_RUN_COUNT + * (set to 1 to enable the tx clock slice serializer; this should be + * enabled at all times but control is available just in case) + */ + scom_and(master_chip, xbus_addr(0x800C1C0006010C3F + offset), ~PPC_BIT(51)); + scom_and(slave_chip, xbus_addr(0x800C1C0006010C3F + offset), ~PPC_BIT(51)); + + /* EDIP_TX_CLK_RUN_COUNT (see above) */ + scom_or(master_chip, xbus_addr(0x800C1C0006010C3F + offset), PPC_BIT(51)); + scom_or(slave_chip, xbus_addr(0x800C1C0006010C3F + offset), PPC_BIT(51)); + + /* EDIP_TX_CLK_UNLOAD_CLK_DISABLE (see above) */ + scom_or(master_chip, xbus_addr(0x800C1C0006010C3F + offset), PPC_BIT(50)); + scom_or(slave_chip, xbus_addr(0x800C1C0006010C3F + offset), PPC_BIT(50)); + + for (int i = 0; i < XBUS_LANE_COUNT; ++i) { + uint64_t lane_offset = PPC_PLACE(i, 27, 5); + /* + * EDIP_TX_UNLOAD_CLK_DISABLE + * (set to 0 to enable sync of tx custom serializer via tx_fifo_init register, + * set to 1 to clock off sync logic and save power) + */ + scom_and(master_chip, xbus_addr(0x80040C0006010C3F | offset | lane_offset), + ~PPC_BIT(56)); + scom_and(slave_chip, xbus_addr(0x80040C0006010C3F | offset | lane_offset), + ~PPC_BIT(56)); + } +} + +static void xbus_linktrain(uint8_t master_chip, uint8_t slave_chip, int group) +{ + enum { + /* I/O EDI+ Training Substeps */ + NONE = 0x00000000, + WIRETEST = 0x00000001, + DESKEW = 0x00000002, + EYEOPT = 0x00000004, + REPAIR = 0x00000008, + FUNCTIONAL = 0x00000010, + WDERF = 0x0000001F, // all of the above + }; + + const uint64_t offset = group * XBUS_LINK_GROUP_OFFSET; + + uint64_t tmp; + + /* Hostboot collects bad lane information here, we don't */ + + /* + * Clock Serializer Init + * Isn't strictly necessary but does line up the clock serializer + * counter with the data slices. + */ + tx_serializer_sync_power_on(master_chip, slave_chip, group); + + /* Start Slave/Master Target Link Training */ + + /* + * EDIP_RX_START_WDERF_ALIAS (alias for rx_start_* bits) + * Slave training must start first. + */ + scom_and_or(slave_chip, xbus_addr(0x8009F00006010C3F + offset), + ~PPC_BITMASK(48, 52), PPC_PLACE(WDERF, 48, 5)); + scom_and_or(master_chip, xbus_addr(0x8009F00006010C3F + offset), + ~PPC_BITMASK(48, 52), PPC_PLACE(WDERF, 48, 5)); + + /* + * 48-52 EDIP_RX_WDERF_DONE_ALIAS (alias for rx_*_done bits) + * 56-60 EDIP_RX_WDERF_FAILED_ALIAS (alias for rx_*_failed bits) + */ + wait_ms(100 * 1, + (tmp = read_scom(master_chip, xbus_addr(0x800A380006010C3F + offset)), + (((tmp >> 11) & 0x1F) == WDERF || ((tmp >> 3) & 0x1F) != 0))); + if (((tmp >> 3) & 0x1F) != 0) + die("I/O EDI+ Xbus link training failed.\n"); + if (((tmp >> 11) & 0x1F) != WDERF) + die("I/O EDI+ Xbus link training timeout.\n"); + +} + +void istep_9_4(uint8_t chips) +{ + report_istep(9,4); + + if (chips != 0x01) { + xbus_linktrain(/*master_chip=*/0, /*slave_chip=*/1, /*group=*/0); + xbus_linktrain(/*master_chip=*/0, /*slave_chip=*/1, /*group=*/1); + } +} diff --git a/src/soc/ibm/power9/istep_9_6.c b/src/soc/ibm/power9/istep_9_6.c new file mode 100644 index 00000000000..befa14feb3c --- /dev/null +++ b/src/soc/ibm/power9/istep_9_6.c @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include + +#include +#include + +#include "xbus.h" + +static void smp_link_layer(uint8_t chip) +{ + enum { + /* dl_control_addr */ + XBUS_LL1_IOEL_CONTROL = 0x0000000006011C0B, + + XBUS_LL0_IOEL_CONTROL_LINK0_STARTUP = 1, + XBUS_LL0_IOEL_CONTROL_LINK1_STARTUP = 33, + }; + + /* Hostboot uses PUTSCOMMASK operation of SBE IO. Assuming that it's + * equivalent to a RMW sequence. */ + scom_or(chip, xbus_addr(XBUS_LL1_IOEL_CONTROL), + PPC_BIT(XBUS_LL0_IOEL_CONTROL_LINK0_STARTUP) | + PPC_BIT(XBUS_LL0_IOEL_CONTROL_LINK1_STARTUP)); +} + +void istep_9_6(uint8_t chips) +{ + report_istep(9,6); + + if (chips != 0x01) { + smp_link_layer(/*chip=*/0); + smp_link_layer(/*chip=*/1); + } +} diff --git a/src/soc/ibm/power9/istep_9_7.c b/src/soc/ibm/power9/istep_9_7.c new file mode 100644 index 00000000000..9281ef42682 --- /dev/null +++ b/src/soc/ibm/power9/istep_9_7.c @@ -0,0 +1,96 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include + +#include +#include +#include +#include + +#include "xbus.h" + +static void p9_fab_iovalid_link_validate(uint8_t chip) +{ + enum { + XBUS_LL1_IOEL_FIR_REG = 0x06011C00, + DL_FIR_LINK0_TRAINED_BIT = 0, + DL_FIR_LINK1_TRAINED_BIT = 1, + }; + + int i; + + for (i = 0; i < 100; ++i) { + /* Only OBus seems to be retrained, so this XBus-only code is + * much simpler compared to corresponding code in Hostboot */ + + uint64_t dl_fir_reg = read_scom(chip, xbus_addr(XBUS_LL1_IOEL_FIR_REG)); + + bool dl_trained = (dl_fir_reg & PPC_BIT(DL_FIR_LINK0_TRAINED_BIT)) + && (dl_fir_reg & PPC_BIT(DL_FIR_LINK1_TRAINED_BIT)); + if (dl_trained) + break; + + mdelay(1); + } + + if (i == 100) + die("XBus link DL training failed\n"); +} + +static void p9_fab_iovalid(uint8_t chip) +{ + enum { + PERV_XB_CPLT_CONF1_OR = 0x06000019, + PERV_CPLT_CONF1_IOVALID_6D = 6, + + PU_PB_CENT_SM0_PB_CENT_FIR_REG = 0x05011C00, + PU_PB_CENT_SM0_PB_CENT_FIR_MASK_REG_SPARE_13 = 13, + + PU_PB_CENT_SM1_EXTFIR_ACTION0_REG = 0x05011C34, + PU_PB_CENT_SM1_EXTFIR_ACTION1_REG = 0x05011C35, + + PU_PB_CENT_SM1_EXTFIR_MASK_REG_AND = 0x05011C32, + }; + + uint64_t fbc_cent_fir_data; + + p9_fab_iovalid_link_validate(chip); + + /* Clear RAS FIR mask for link if not already set up by SBE */ + fbc_cent_fir_data = read_scom(chip, PU_PB_CENT_SM0_PB_CENT_FIR_REG); + if (!(fbc_cent_fir_data & PPC_BIT(PU_PB_CENT_SM0_PB_CENT_FIR_MASK_REG_SPARE_13))) { + scom_and(chip, PU_PB_CENT_SM1_EXTFIR_ACTION0_REG, + ~PPC_BIT(PERV_CPLT_CONF1_IOVALID_6D)); + scom_and(chip, PU_PB_CENT_SM1_EXTFIR_ACTION1_REG, + ~PPC_BIT(PERV_CPLT_CONF1_IOVALID_6D)); + write_scom(chip, PU_PB_CENT_SM1_EXTFIR_MASK_REG_AND, + ~PPC_BIT(PERV_CPLT_CONF1_IOVALID_6D)); + } + + /* + * Use AND/OR mask registers to atomically update link specific fields + * in iovalid control register. + */ + write_scom(chip, xbus_addr(PERV_XB_CPLT_CONF1_OR), + PPC_BIT(PERV_CPLT_CONF1_IOVALID_6D) | + PPC_BIT(PERV_CPLT_CONF1_IOVALID_6D + 1)); +} + +void istep_9_7(uint8_t chips) +{ + report_istep(9,7); + + if (chips != 0x01) { + /* + * Add delay for DD1.1+ procedure to compensate for lack of lane + * lock polls. + * + * HB does this inside p9_fab_iovalid(), which doubles the + * delay, which is probably unnecessary. + */ + mdelay(100); + + p9_fab_iovalid(/*chip=*/0); + p9_fab_iovalid(/*chip=*/1); + } +} diff --git a/src/soc/ibm/power9/mcbist.c b/src/soc/ibm/power9/mcbist.c new file mode 100644 index 00000000000..02ff6aed3ac --- /dev/null +++ b/src/soc/ibm/power9/mcbist.c @@ -0,0 +1,228 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +/* Debug is too slow here, hits timeouts */ +#define SKIP_SCOM_DEBUG + +#include +#include +#include + +#include "istep_13_scom.h" +#include "mcbist.h" + +#define MCBIST_TESTS_PER_REG 4 +/* 32 total, but last register is under non-consecutive SCOM address */ +#define MAX_MCBIST_TESTS 28 +#define MAX_MCBIST_TEST_REGS (MAX_MCBIST_TESTS / MCBIST_TESTS_PER_REG) + +/* + * TODO: if we were to run both MCBISTs in parallel, we would need separate + * instances of those... + */ +static uint64_t mcbist_memreg_cache[MAX_CHIPS]; +static unsigned tests[MAX_CHIPS]; + +#define ECC_MODE 0x0008 +#define DONE 0x0004 + +enum data_mode +{ + // MCBIST test data modes + FIXED_DATA_MODE = 0x0000, + RAND_FWD_MODE = 0x0010, + RAND_REV_MODE = 0x0020, + RAND_FWD_MAINT = 0x0030, + RAND_REV_MAINT = 0x0040, + DATA_EQ_ADDR = 0x0050, + ROTATE_LEFT_MODE = 0x0060, + ROTATE_RIGHT_MODE = 0x0070, +}; + +enum op_type +{ + WRITE = 0x0000, // fast, with no concurrent traffic + READ = 0x1000, // fast, with no concurrent traffic + READ_WRITE = 0x2000, + WRITE_READ = 0x3000, + READ_WRITE_READ = 0x4000, + READ_WRITE_WRITE = 0x5000, + RAND_SEQ = 0x6000, + READ_READ_WRITE = 0x8000, + SCRUB_RRWR = 0x9000, + STEER_RW = 0xA000, + ALTER = 0xB000, // (W) + DISPLAY = 0xC000, // (R, slow) + CCS_EXECUTE = 0xF000, + + // if bits 9:11 (Data Mode bits) = 000 (bits 4:8 used to specify which subtest to go to) + // Refresh only cmd if bits 9:11 (Data Mode bits) /= 000 + GOTO_SUBTEST_N = 0x7000, +}; + +static void commit_mcbist_memreg_cache(uint8_t chip, int mcs_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + int reg = (tests[chip] - 1) / MCBIST_TESTS_PER_REG; + + if (reg < 0) + die("commit_mcbist_memreg_cache() called without adding tests first!\n"); + + if (reg >= MAX_MCBIST_TEST_REGS) + die("Too many MCBIST instructions added\n"); + + /* MC01.MCBIST.MBA_SCOMFIR.MCBMRQ */ + write_scom_for_chiplet(chip, id, MCBMR0Q + reg, mcbist_memreg_cache[chip]); + mcbist_memreg_cache[chip] = 0; +} + +static void add_mcbist_test(uint8_t chip, int mcs_i, uint16_t test) +{ + int test_i = tests[chip] % MCBIST_TESTS_PER_REG; + if (test_i == 0 && tests[chip] != 0) + commit_mcbist_memreg_cache(chip, mcs_i); + + /* This assumes cache is properly cleared. */ + mcbist_memreg_cache[chip] |= PPC_PLACE(test, test_i*16, 16); + tests[chip]++; +} + +/* + * ECC Scrubbing - theory + * + * RAM cannot hold the data indefinitely. It uses capacitors to hold the bits, + * which are constantly being drawn by leaks. To counteract this, memory has to + * be periodically refreshed, which recharges the capacitors. However, sometimes + * this happens too late, when state of capacitor has already changed (either + * electric charge was depleted, or capacitor gained additional potential from + * outside - rowhammer, radiation) up to the point where it passes the threshold + * and 0 becomes 1 or vice versa. Refresh command in that case would only make + * "borderline 1" into "strong 1", so it won't be able to fix the problem. This + * is where ECC comes in. + * + * ECC is limited in number of changed bits it can fix and detect. Because of + * that it is important that ECC is checked and possible errors are corrected + * before too many bits have flipped, and corrected values are written back to + * RAM. This is done by hardware, without software's interaction, but it can be + * informed that ECC error has happened (machine check exception). + * + * ECC is checked every time data in RAM is accessed. To check every part of RAM + * even when CPU doesn't need to read it, memory controller does the accesses in + * the background. This is called ECC scrubbing. + * + * Note that it is enough for MC to just send read commands. When everything is + * correct, data is still written back to DRAM because reading operation is + * destructive - capacitors are discharged when read and have to be charged + * again. This happens internally in DRAM, there is no need to send that data + * through the memory bus when DRAM already has it. If there was an error, MC + * automatically sends corrected data to be written. + * + * ECC scrubbing happens between RAM and MC. CPU doesn't participate in this + * process, but it may be slowed down on memory intensive operations because + * some of the bandwidth is used for scrubbing. + * + * TL;DR: ECC scrub is read operation with discarded results. + */ +void add_scrub(uint8_t chip, int mcs_i, int port_dimm) +{ + uint16_t test = READ | ECC_MODE | (port_dimm << 9); + add_mcbist_test(chip, mcs_i, test); +} + +void add_fixed_pattern_write(uint8_t chip, int mcs_i, int port_dimm) +{ + /* Use ALTER instead of WRITE to use maintenance pattern. ALTER is slow. */ + uint16_t test = WRITE | FIXED_DATA_MODE | ECC_MODE | (port_dimm << 9); + add_mcbist_test(chip, mcs_i, test); +} + +/* +static void add_random_pattern_write(uint8_t chip, int port_dimm) +{ + uint16_t test = WRITE | RAND_FWD_MAINT | ECC_MODE | (port_dimm << 9); + add_mcbist_test(chip, test); +} +*/ + +/* TODO: calculate initial delays and timeouts */ +void mcbist_execute(uint8_t chip, int mcs_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + /* This is index of last instruction, not the new one. */ + int test_i = (tests[chip] - 1) % MCBIST_TESTS_PER_REG; + uint64_t val; + + /* + * Nothing to do. Note that status register won't report "done", or will + * report state of previous program instead. According to docs this bits + * are writable, do we want to set them to simplify things? + * + * Another possibility would be to start MCBIST with single no-op test (goto + * with DONE bit set), but this may unnecessarily make things slower. + */ + if (tests[chip] == 0) + return; + + /* Check if in progress */ + /* TODO: we could force it to stop, but dying will help with debugging */ + if ((val = read_scom_for_chiplet(chip, id, MCB_CNTLSTATQ)) & + PPC_BIT(MCB_CNTLSTATQ_MCB_IP)) + die("MCBIST in progress already (%#16.16llx), this shouldn't happen\n", val); + + /* + * Contrary to CCS, we don't add no-op instruction here. DONE bit has to be + * set for instruction that is already present. Perhaps DONE is poor name, + * is tells that MCBIST should stop after this test, but this is how it is + * named in the documentation. + */ + mcbist_memreg_cache[chip] |= PPC_BIT(13 + test_i*16); + commit_mcbist_memreg_cache(chip, mcs_i); + + /* MC01.MCBIST.MBA_SCOMFIR.MCB_CNTLQ + * [0] MCB_CNTLQ_MCB_START + */ + scom_and_or_for_chiplet(chip, id, MCB_CNTLQ, ~0, PPC_BIT(MCB_CNTLQ_MCB_START)); + + /* Wait for MCBIST to start. Test for IP and DONE, it may finish early. */ + if (((val = read_scom_for_chiplet(chip, id, MCB_CNTLSTATQ)) & + (PPC_BIT(MCB_CNTLSTATQ_MCB_IP) | PPC_BIT(MCB_CNTLSTATQ_MCB_DONE))) == 0) { + /* + * TODO: how long do we want to wait? Hostboot uses 10*100us polling, + * but so far it seems to always be already started on the first read. + */ + udelay(1); + if (((val = read_scom_for_chiplet(chip, id, MCB_CNTLSTATQ)) & + (PPC_BIT(MCB_CNTLSTATQ_MCB_IP) | PPC_BIT(MCB_CNTLSTATQ_MCB_DONE))) == 0) + die("MCBIST failed (%#16.16llx) to start twice\n", val); + + /* Check if this is needed. Do not move before test, it impacts delay! */ + printk(BIOS_INFO, "MCBIST started after delay\n"); + } + + tests[chip] = 0; +} + +/* + * FIXME: 0x07012300[10] MCBIST_PROGRAM_COMPLETE should be checked instead. It + * gets set when MCBIST is paused, while 0x070123DC[0] IP stays on in that case. + * This may become a problem for 3DS DIMMs. + */ +int mcbist_is_done(uint8_t chip, int mcs_i) +{ + chiplet_id_t id = mcs_ids[mcs_i]; + uint64_t val = val = read_scom_for_chiplet(chip, id, MCB_CNTLSTATQ); + + /* Still in progress */ + if (val & PPC_BIT(MCB_CNTLSTATQ_MCB_IP)) + return 0; + + /* Not sure if DONE and FAIL can be set at the same time, check FAIL first */ + if ((val & PPC_BIT(MCB_CNTLSTATQ_MCB_FAIL)) || val == 0) + die("MCBIST error (%#16.16llx)\n"); + + /* Finished */ + if (val & PPC_BIT(MCB_CNTLSTATQ_MCB_DONE)) + return 1; + + /* Is it even possible to get here? */ + return 0; +} diff --git a/src/soc/ibm/power9/mcbist.h b/src/soc/ibm/power9/mcbist.h new file mode 100644 index 00000000000..cd10890724b --- /dev/null +++ b/src/soc/ibm/power9/mcbist.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __SOC_IBM_POWER9_MCBIST_H +#define __SOC_IBM_POWER9_MCBIST_H + +#include + +void add_scrub(uint8_t chip, int mcs_i, int port_dimm); +void add_fixed_pattern_write(uint8_t chip, int mcs_i, int port_dimm); + +void mcbist_execute(uint8_t chip, int mcs_i); +int mcbist_is_done(uint8_t chip, int mcs_i); + +#endif /* __SOC_IBM_POWER9_MCBIST_H */ diff --git a/src/soc/ibm/power9/mvpd.c b/src/soc/ibm/power9/mvpd.c new file mode 100644 index 00000000000..55b008e56dd --- /dev/null +++ b/src/soc/ibm/power9/mvpd.c @@ -0,0 +1,537 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "tor.h" + +#define MVPD_TOC_ENTRIES 32 +#define MVPD_TOC_SIZE (MVPD_TOC_ENTRIES*sizeof(struct mvpd_toc_entry)) + +#define EEPROM_CHIP_SIZE (64 * KiB) + +/* Each entry points to a VPD record */ +struct mvpd_toc_entry { + char name[4]; // Name without trailing NUL byte + uint16_t offset; // Offset from the beginning of partition in LE + uint8_t reserved[2]; // Unused +} __attribute__((packed)); + +struct pt_record { + char record_name[4]; + /* All of these fields are in little endian */ + uint16_t record_type; + uint16_t record_offset; + uint16_t record_length; + uint16_t ecc_offset; + uint16_t ecc_length; +} __attribute__((packed)); + +/* + * Configuration of EEPROM with VPD data in talos.xml: + * + * + * EEPROM_VPD_PRIMARY_INFO + * + * + * i2cMasterPath + * + * /sys-0/node-0/motherboard-0/proc_socket-0/sforza-0/p9_proc_s/i2c-master-prom0-mvpd-primary/ + * + * + * port0 + * devAddr0xA0 + * engine1 + * byteAddrOffset0x02 + * maxMemorySizeKB0x80 + * chipCount0x02 + * writePageSize0x80 + * writeCycleTime0x0A + * + * + */ + +/* Reads from a single EEPROM chip, which is deduced from offset. Returns zero + on success. */ +static int read_eeprom_chip(uint8_t cpu, uint32_t offset, void *data, uint16_t len) +{ + const unsigned int bus = (cpu == 0 ? 1 : FSI_I2C_BUS); + uint16_t addr = 0xA0; + uint16_t slave = 0; + uint16_t actual_offset = 0; + + struct i2c_msg seg[2]; + + /* Two chips at two different addresses */ + if (offset >= EEPROM_CHIP_SIZE) { + offset -= EEPROM_CHIP_SIZE; + addr += 0x02; + } + + assert(offset < EEPROM_CHIP_SIZE); + actual_offset = offset; + + /* Most-significant bit is port number */ + slave = addr >> 1; + + seg[0].flags = 0; + seg[0].slave = slave; + seg[0].buf = (uint8_t *)&actual_offset; + seg[0].len = sizeof(actual_offset); + seg[1].flags = I2C_M_RD; + seg[1].slave = slave; + seg[1].buf = data; + seg[1].len = len; + + return i2c_transfer(bus, seg, ARRAY_SIZE(seg)); +} + +/* Reads from EEPROM handling accesses across chip boundaries (64 KiB). Returns + zero on success. */ +static int read_eeprom(uint8_t cpu, uint32_t offset, void *data, uint32_t len) +{ + uint16_t len1 = 0; + uint16_t len2 = 0; + + assert(len != 0); + if (offset / EEPROM_CHIP_SIZE == (offset + len - 1) / EEPROM_CHIP_SIZE) + return read_eeprom_chip(cpu, offset, data, len); + + len1 = EEPROM_CHIP_SIZE - offset; + len2 = len - len1; + + if (read_eeprom_chip(cpu, offset, data, len1)) + return 1; + if (read_eeprom_chip(cpu, EEPROM_CHIP_SIZE, (uint8_t *)data + len1, len2)) + return 1; + + return 0; +} + +/* Finds and extracts i-th keyword (`index` specifies which one) from a record + in EEPROM that starts at specified offset */ +static bool eeprom_extract_kwd(uint8_t cpu, uint64_t offset, uint8_t index, + const char *record_name, const char *kwd_name, + uint8_t *buf, size_t *size) +{ + uint16_t record_size = 0; + uint8_t name[VPD_RECORD_NAME_LEN]; + + if (strlen(record_name) != VPD_RECORD_NAME_LEN) + die("Record name has wrong length: %s!\n", record_name); + if (strlen(kwd_name) != VPD_KWD_NAME_LEN) + die("Keyword name has wrong length: %s!\n", kwd_name); + + if (read_eeprom(cpu, offset, &record_size, sizeof(record_size))) + die("Failed to read record size from EEPROM\n"); + + offset += VPD_RECORD_SIZE_LEN; + record_size = le16toh(record_size); + + /* Skip mandatory "RT" and one byte of keyword size (always 4) */ + offset += VPD_KWD_NAME_LEN + 1; + + if (read_eeprom(cpu, offset, name, sizeof(name))) + die("Failed to read record name from EEPROM\n"); + + if (memcmp(name, record_name, VPD_RECORD_NAME_LEN)) + die("Expected to be working with %s record, got %.4s!\n", + record_name, name); + + offset += VPD_RECORD_NAME_LEN; + + while (offset < record_size) { + uint8_t name_buf[VPD_KWD_NAME_LEN]; + uint16_t kwd_size = 0; + + if (read_eeprom(cpu, offset, name_buf, sizeof(name_buf))) + die("Failed to read keyword name from EEPROM\n"); + + /* This is always the last keyword */ + if (!memcmp(name_buf, "PF", VPD_KWD_NAME_LEN)) + break; + + offset += VPD_KWD_NAME_LEN; + + if (name_buf[0] == '#') { + /* This is a large (two-byte size) keyword */ + if (read_eeprom(cpu, offset, &kwd_size, sizeof(kwd_size))) + die("Failed to read large keyword size from EEPROM\n"); + kwd_size = le16toh(kwd_size); + offset += 2; + } else { + uint8_t small_size; + if (read_eeprom(cpu, offset, &small_size, sizeof(small_size))) + die("Failed to read small keyword size from EEPROM\n"); + kwd_size = small_size; + offset += 1; + } + + if (!memcmp(name_buf, kwd_name, VPD_KWD_NAME_LEN) && index-- == 0) { + if (*size < kwd_size) { + die("Keyword buffer is too small: %llu instead of %llu\n", + (unsigned long long)*size, (unsigned long long)kwd_size); + } + + if (read_eeprom(cpu, offset, buf, kwd_size)) + die("Failed to read keyword body from EEPROM\n"); + + *size = kwd_size; + return true; + } + + offset += kwd_size; + } + + return false; +} + +/* Builds MVPD partition for a single processor (64 KiB per chip) or returns an + already built one */ +static const uint8_t *mvpd_get(uint8_t cpu) +{ + enum { MAX_MVPD_SIZE = 64 * KiB }; + + static const char *const mvpd_records[] = { + "CRP0", "CP00", "VINI", + "LRP0", "LRP1", "LRP2", "LRP3", "LRP4", "LRP5", + "LWP0", "LWP1", "LWP2", "LWP3", "LWP4", "LWP5", + "VRML", "VWML", "VER0", "MER0", "VMSC", + }; + + uint8_t *mvpd_buf = &_mvpd_cache[cpu * MAX_MVPD_SIZE]; + + struct mvpd_toc_entry *toc = (void *)mvpd_buf; + uint16_t mvpd_offset = MVPD_TOC_SIZE; + + uint8_t pt_buf[256]; + struct pt_record *pt_record = (void *)pt_buf; + size_t pt_size = sizeof(struct pt_record); + + uint8_t i = 0; + + /* Skip the ECC data + "large resource" byte (0x84) in the VHDR */ + uint64_t offset = 12; + + if (cpu >= 2) + die("Unsupported CPU number for MVPD query: %d.\n", cpu); + + /* Partition is already constructed (filled one can't be empty) */ + if (mvpd_buf[0] != '\0') + return mvpd_buf; + + if (!eeprom_extract_kwd(cpu, offset, 0, "VHDR", "PT", pt_buf, &pt_size)) + die("Failed to find PT keyword of VHDR record in EEPROM.\n"); + + if (memcmp(pt_record->record_name, "VTOC", VPD_RECORD_NAME_LEN)) + die("VHDR in EEPROM is invalid (got %.4s instead of VTOC.\n", + pt_record->record_name); + + /* Move to the TOC record, skip "large resource" byte (0x84) */ + offset = le16toh(pt_record->record_offset) + 1; + + /* Fill whole TOC with 0xFF */ + memset(toc, 0xFF, MVPD_TOC_SIZE); + + /* Up to three PT keywords in VTOC record */ + for (i = 0; i < 3; ++i) { + uint8_t j; + uint8_t entry_count; + + pt_size = sizeof(pt_buf); + if (!eeprom_extract_kwd(cpu, offset, i, "VTOC", "PT", pt_buf, &pt_size)) { + if (i == 0) + die("Failed to find any PT keyword of VTOC record in EEPROM\n"); + break; + } + + entry_count = pt_size / sizeof(struct pt_record); + + for (j = 0; j < entry_count; ++j) { + const char *record_name = pt_record[j].record_name; + /* Skip "large resource" byte (0x84) */ + const uint16_t record_offset = le16toh(pt_record[j].record_offset) + 1; + const uint16_t record_size = le16toh(pt_record[j].record_length); + + uint8_t k; + for (k = 0; k < ARRAY_SIZE(mvpd_records); ++k) { + if (!memcmp(record_name, mvpd_records[k], 4)) + break; + } + + if (k == ARRAY_SIZE(mvpd_records)) + continue; + + if (mvpd_offset + record_size > MAX_MVPD_SIZE) { + die("MVPD section doesn't have space for %.4s record of " + "size %d\n", record_name, record_size); + } + + /* Store this record to MVPD */ + + memcpy(toc->name, record_name, VPD_RECORD_NAME_LEN); + toc->offset = htole16(mvpd_offset); + toc->reserved[0] = 0x5A; + toc->reserved[1] = 0x5A; + + if (read_eeprom(cpu, record_offset, mvpd_buf + mvpd_offset, + record_size)) + die("Failed to read %.4s record from EEPROM\n", record_name); + + ++toc; + mvpd_offset += record_size; + } + } + + return mvpd_buf; +} + +static struct mvpd_toc_entry *find_record(struct mvpd_toc_entry *toc, + const char *name) +{ + int i = 0; + for (i = 0; i < MVPD_TOC_ENTRIES; ++i) { + if (memcmp(toc[i].name, name, VPD_RECORD_NAME_LEN) == 0) + return &toc[i]; + } + return NULL; +} + +/* Checks if rings ends here. End is marked by an "END" string. */ +static bool is_end_of_rings(const uint8_t *buf_left, uint32_t len_left) +{ + return (len_left < 3 || memcmp(buf_left, "END", 3) == 0); +} + +/* Finds specific ring by combination of chiplet and ring ids */ +static struct ring_hdr *find_ring_step(uint8_t chiplet_id, + uint8_t even_odd, + uint16_t ring_id, + const uint8_t **buf_left, + uint32_t *len_left) +{ + uint32_t even_odd_mask = 0; + struct ring_hdr *hdr = (struct ring_hdr *)*buf_left; + + if (*len_left < sizeof(struct ring_hdr) || hdr->magic != RS4_MAGIC) + return NULL; + + *buf_left += hdr->size; + *len_left -= hdr->size; + + switch (ring_id) { + case EX_L3_REPR: + even_odd_mask = 0x00001000; + break; + case EX_L2_REPR: + even_odd_mask = 0x00000400; + break; + case EX_L3_REFR_TIME: + case EX_L3_REFR_REPR: + even_odd_mask = 0x00000040; + break; + default: + even_odd_mask = 0; + break; + } + + even_odd_mask >>= even_odd; + + if (hdr->ring_id != ring_id) + return NULL; + if (((hdr->scan_addr >> 24) & 0xFF) != chiplet_id) + return NULL; + if (even_odd_mask != 0 && !(hdr->scan_addr & even_odd_mask)) + return NULL; + + return hdr; +} + +/* Searches for a specific ring in a keyword */ +static struct ring_hdr *find_ring(uint8_t chiplet_id, uint8_t even_odd, + uint16_t ring_id, const uint8_t *buf, + uint32_t buf_len) +{ + /* Skip version number */ + --buf_len; + ++buf; + + while (!is_end_of_rings(buf, buf_len)) { + struct ring_hdr *ring = + find_ring_step(chiplet_id, even_odd, ring_id, &buf, &buf_len); + if (ring != NULL) + return ring; + } + + return NULL; +} + +static const uint8_t *mvpd_get_keyword(uint8_t cpu, const char *record_name, + const char *kwd_name, size_t *kwd_size) +{ + const uint8_t *mvpd = mvpd_get(cpu); + struct mvpd_toc_entry *mvpd_toc = (void *)mvpd; + + struct mvpd_toc_entry *toc_entry = NULL; + const uint8_t *record_data = NULL; + + const uint8_t *kwd = NULL; + + toc_entry = find_record(mvpd_toc, record_name); + if (toc_entry == NULL) + die("Failed to find %s MVPD record!\n", record_name); + + record_data = mvpd + le16toh(toc_entry->offset); + + kwd = vpd_find_kwd(record_data, record_name, kwd_name, kwd_size); + if (kwd == NULL) + die("Failed to find %s keyword in %s!\n", kwd_name, record_name); + + return kwd; +} + +void mvpd_get_mcs_pg(uint8_t chip, uint16_t *pg) +{ + enum { + VPD_CP00_PG_HDR_LENGTH = 1, + VPD_CP00_PG_DATA_LENGTH = 128, + VPD_CP00_PG_DATA_ENTRIES = VPD_CP00_PG_DATA_LENGTH / 2, + }; + + uint8_t raw_pg_data[VPD_CP00_PG_HDR_LENGTH + VPD_CP00_PG_DATA_LENGTH]; + uint16_t pg_data[VPD_CP00_PG_DATA_ENTRIES]; + uint32_t size = sizeof(raw_pg_data); + + if (!mvpd_extract_keyword(chip, "CP00", "PG", raw_pg_data, &size)) + die("Failed to read CPU%d/MVPD/CP00/PG", chip); + + memcpy(pg_data, raw_pg_data + VPD_CP00_PG_HDR_LENGTH, sizeof(pg_data)); + + pg[0] = pg_data[MC01_CHIPLET_ID]; + pg[1] = pg_data[MC23_CHIPLET_ID]; +} + +bool mvpd_extract_keyword(uint8_t chip, const char *record_name, + const char *kwd_name, uint8_t *buf, uint32_t *size) +{ + const uint8_t *kwd = NULL; + size_t kwd_size = 0; + bool copied_data = false; + + kwd = mvpd_get_keyword(chip, record_name, kwd_name, &kwd_size); + if (kwd == NULL) + die("Failed to find %s keyword in %s!\n", kwd_name, + record_name); + + if (*size >= kwd_size) { + memcpy(buf, kwd, kwd_size); + copied_data = true; + } + + *size = kwd_size; + + return copied_data; +} + +const struct voltage_kwd *mvpd_get_voltage_data(uint8_t chip, int lrp) +{ + static int inited_chip = -1; + static int inited_lrp = -1; + static uint8_t buf[sizeof(struct voltage_kwd)]; + + char record_name[] = { 'L', 'R', 'P', '0' + lrp, '\0' }; + uint32_t buf_size = sizeof(buf); + struct voltage_kwd *voltage = (void *)buf; + + assert(lrp >= 0 && lrp < 6); + if (inited_chip == chip && inited_lrp == lrp) + return voltage; + + inited_chip = -1; + inited_lrp = -1; + + if (!mvpd_extract_keyword(chip, record_name, "#V", buf, &buf_size)) { + printk(BIOS_ERR, "Failed to read LRP0 record from MVPD\n"); + return NULL; + } + + if (voltage->version != VOLTAGE_DATA_VERSION) { + printk(BIOS_ERR, "Only version %d of voltage data is supported, got: %d\n", + VOLTAGE_DATA_VERSION, voltage->version); + return NULL; + } + + inited_chip = chip; + inited_lrp = lrp; + return voltage; +} + +uint64_t mvpd_get_available_cores(uint8_t chip) +{ + enum { + VPD_CP00_PG_HDR_LENGTH = 1, + VPD_CP00_PG_DATA_LENGTH = 128, + VPD_CP00_PG_DATA_ENTRIES = VPD_CP00_PG_DATA_LENGTH / 2, + + ALL_ON_PG_MASK = 0xFFFF, + EC_AG_MASK = 0xE1FF, + }; + + uint64_t cores = 0; + + uint8_t raw_pg_data[VPD_CP00_PG_HDR_LENGTH + VPD_CP00_PG_DATA_LENGTH]; + uint16_t pg_data[VPD_CP00_PG_DATA_ENTRIES]; + uint32_t size = sizeof(raw_pg_data); + + if (!mvpd_extract_keyword(chip, "CP00", "PG", raw_pg_data, &size)) + die("Failed to read CPU%d/MVPD/CP00/PG", chip); + + memcpy(pg_data, raw_pg_data + VPD_CP00_PG_HDR_LENGTH, sizeof(pg_data)); + + for (int core = 0; core < MAX_CORES_PER_CHIP; core++) { + chiplet_id_t core_chiplet = EC00_CHIPLET_ID + core; + if ((pg_data[core_chiplet] & ALL_ON_PG_MASK) == EC_AG_MASK) + cores |= PPC_BIT(core); + } + + return cores; +} + +/* Finds a specific ring in MVPD partition and extracts it */ +bool mvpd_extract_ring(uint8_t chip, const char *record_name, + const char *kwd_name, uint8_t chiplet_id, + uint8_t even_odd, uint16_t ring_id, + uint8_t *buf, uint32_t buf_size) +{ + const uint8_t *rings = NULL; + size_t rings_size = 0; + + struct ring_hdr *ring = NULL; + uint32_t ring_size = 0; + + rings = mvpd_get_keyword(chip, record_name, kwd_name, &rings_size); + if (rings == NULL) + die("Failed to find %s keyword in %s!\n", kwd_name, record_name); + + ring = find_ring(chiplet_id, even_odd, ring_id, rings, rings_size); + if (ring == NULL) + return false; + + ring_size = ring->size; + if (buf_size >= ring_size) + memcpy(buf, ring, ring_size); + + return (buf_size >= ring_size); +} diff --git a/src/soc/ibm/power9/occ.c b/src/soc/ibm/power9/occ.c new file mode 100644 index 00000000000..a5a2837f933 --- /dev/null +++ b/src/soc/ibm/power9/occ.c @@ -0,0 +1,1412 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include +#include +#include +#include +#include // memset, memcpy +#include +#include + +#include + +#include "homer.h" +#include "ops.h" + +#define OCB_PIB_OCBCSR0_OCB_STREAM_MODE 4 +#define OCB_PIB_OCBCSR0_OCB_STREAM_TYPE 5 + +#define OCB_OCI_OCBSHCS0_PUSH_ENABLE 31 +#define OCB_OCI_OCBSHCS0_PUSH_FULL 0 + +#define PU_OCB_PIB_OCBCSR0_RO 0x0006D011 +#define PU_OCB_PIB_OCBCSR1_RO 0x0006D031 + +#define PU_OCB_OCI_OCBSHCS0_SCOM 0x0006C204 +#define PU_OCB_OCI_OCBSHCS1_SCOM 0x0006C214 + +#define EX_PPM_SPWKUP_OCC 0x200F010C +#define PU_OCB_PIB_OCBAR0 0x0006D010 + +#define PU_OCB_PIB_OCBDR0 0x0006D015 +#define PU_OCB_PIB_OCBDR1 0x0006D035 + +#define PU_OCB_PIB_OCBCSR0_OR 0x0006D013 +#define PU_OCB_PIB_OCBCSR0_CLEAR 0x0006D012 + +#define OCC_CMD_ADDR 0x000E0000 +#define OCC_RSP_ADDR 0x000E1000 + +#define OCC_CMD_POLL 0x00 +#define OCC_CMD_CLEAR_ERROR_LOG 0x12 +#define OCC_CMD_SET_STATE 0x20 +#define OCC_CMD_SETUP_CFG_DATA 0x21 +#define OCC_CMD_SET_POWER_CAP 0x22 + +#define OCC_RC_SUCCESS 0x00 +#define OCC_RC_INIT_FAILURE 0xE5 +#define OCC_RC_OCC_INIT_CHECKPOINT 0xE1 + +#define OCC_CFGDATA_FREQ_POINT 0x02 +#define OCC_CFGDATA_OCC_ROLE 0x03 +#define OCC_CFGDATA_APSS_CONFIG 0x04 +#define OCC_CFGDATA_MEM_CONFIG 0x05 +#define OCC_CFGDATA_PCAP_CONFIG 0x07 +#define OCC_CFGDATA_SYS_CONFIG 0x0F +#define OCC_CFGDATA_TCT_CONFIG 0x13 +#define OCC_CFGDATA_AVSBUS_CONFIG 0x14 +#define OCC_CFGDATA_GPU_CONFIG 0x15 + +/* FIR register offset from base */ +enum fir_offset { + BASE_WAND_INCR = 1, + BASE_WOR_INCR = 2, + MASK_INCR = 3, + MASK_WAND_INCR = 4, + MASK_WOR_INCR = 5, + ACTION0_INCR = 6, + ACTION1_INCR = 7 +}; + +struct occ_cfg_inputs { + struct homer_st *homer; + uint8_t chip; + bool is_master_occ; +}; + +struct occ_cfg_info { + const char *name; + void (*func)(const struct occ_cfg_inputs *inputs, uint8_t *data, uint16_t *size); + bool to_master_only; +}; + +struct occ_poll_response { + uint8_t status; + uint8_t ext_status; + uint8_t occs_present; + uint8_t requested_cfg; + uint8_t state; + uint8_t mode; + uint8_t ips_status; + uint8_t error_id; + uint32_t error_address; + uint16_t error_length; + uint8_t error_source; + uint8_t gpu_cfg; + uint8_t code_level[16]; + uint8_t sensor[6]; + uint8_t num_blocks; + uint8_t version; + uint8_t sensor_data[]; // 4049 bytes +} __attribute__((packed)); + +static void pm_ocb_setup(uint8_t chip, uint32_t ocb_bar) +{ + write_scom(chip, PU_OCB_PIB_OCBCSR0_OR, PPC_BIT(OCB_PIB_OCBCSR0_OCB_STREAM_MODE)); + write_scom(chip, PU_OCB_PIB_OCBCSR0_CLEAR, PPC_BIT(OCB_PIB_OCBCSR0_OCB_STREAM_TYPE)); + write_scom(chip, PU_OCB_PIB_OCBAR0, (uint64_t)ocb_bar << 32); +} + +static void check_ocb_mode(uint8_t chip, uint64_t ocb_csr_address, uint64_t ocb_shcs_address) +{ + uint64_t ocb_pib = read_scom(chip, ocb_csr_address); + + /* + * The following check for circular mode is an additional check + * performed to ensure a valid data access. + */ + if ((ocb_pib & PPC_BIT(OCB_PIB_OCBCSR0_OCB_STREAM_MODE)) && + (ocb_pib & PPC_BIT(OCB_PIB_OCBCSR0_OCB_STREAM_TYPE))) { + /* + * Check if push queue is enabled. If not, let the store occur + * anyway to let the PIB error response return occur. (That is + * what will happen if this checking code were not here.) + */ + uint64_t stream_push_ctrl = read_scom(chip, ocb_shcs_address); + + if (stream_push_ctrl & PPC_BIT(OCB_OCI_OCBSHCS0_PUSH_ENABLE)) { + uint8_t counter = 0; + for (counter = 0; counter < 4; counter++) { + /* Proceed if the OCB_OCI_OCBSHCS0_PUSH_FULL is clear */ + if (!(stream_push_ctrl & PPC_BIT(OCB_OCI_OCBSHCS0_PUSH_FULL))) + break; + + stream_push_ctrl = read_scom(chip, ocb_shcs_address); + } + + if (counter == 4) + die("Failed to write to circular buffer.\n"); + } + } +} + +static void put_ocb_indirect(uint8_t chip, uint32_t ocb_req_length, + uint32_t oci_address, uint64_t *ocb_buffer) +{ + write_scom(chip, PU_OCB_PIB_OCBAR0, (uint64_t)oci_address << 32); + + check_ocb_mode(chip, PU_OCB_PIB_OCBCSR0_RO, PU_OCB_OCI_OCBSHCS0_SCOM); + + for (uint32_t index = 0; index < ocb_req_length; index++) + write_scom(chip, PU_OCB_PIB_OCBDR0, ocb_buffer[index]); +} + +static void get_ocb_indirect(uint8_t chip, uint32_t ocb_req_length, + uint32_t oci_address, uint64_t *ocb_buffer) +{ + write_scom(chip, PU_OCB_PIB_OCBAR0, (uint64_t)oci_address << 32); + for (uint32_t loopCount = 0; loopCount < ocb_req_length; loopCount++) + ocb_buffer[loopCount] = read_scom(chip, PU_OCB_PIB_OCBDR0); +} + +static void write_occ_sram(uint8_t chip, uint32_t address, uint64_t *buffer, size_t data_length) +{ + pm_ocb_setup(chip, address); + put_ocb_indirect(chip, data_length / 8, address, buffer); +} + +static void read_occ_sram(uint8_t chip, uint32_t address, uint64_t *buffer, size_t data_length) +{ + pm_ocb_setup(chip, address); + get_ocb_indirect(chip, data_length / 8, address, buffer); +} + +static void write_occ_command(uint8_t chip, uint64_t write_data) +{ + check_ocb_mode(chip, PU_OCB_PIB_OCBCSR1_RO, PU_OCB_OCI_OCBSHCS1_SCOM); + write_scom(chip, PU_OCB_PIB_OCBDR1, write_data); +} + +void clear_occ_special_wakeups(uint8_t chip, uint64_t cores) +{ + for (size_t i = 0; i < MAX_CORES_PER_CHIP; i += 2) { + if (!IS_EX_FUNCTIONAL(i, cores)) + continue; + scom_and_for_chiplet(chip, EC00_CHIPLET_ID + i, EX_PPM_SPWKUP_OCC, + ~PPC_BIT(0)); + } +} + +void special_occ_wakeup_disable(uint8_t chip, uint64_t cores) +{ + enum { PPM_SPWKUP_FSP = 0x200F010B }; + + for (int i = 0; i < MAX_CORES_PER_CHIP; ++i) { + if (!IS_EC_FUNCTIONAL(i, cores)) + continue; + + write_scom_for_chiplet(chip, EC00_CHIPLET_ID + i, PPM_SPWKUP_FSP, 0); + /* This puts an inherent delay in the propagation of the reset transition */ + (void)read_scom_for_chiplet(chip, EC00_CHIPLET_ID + i, PPM_SPWKUP_FSP); + } +} + +/* Sets up boot loader in SRAM and returns 32-bit jump instruction to it */ +static uint64_t setup_memory_boot(uint8_t chip) +{ + enum { + OCC_BOOT_OFFSET = 0x40, + CTR = 9, + OCC_SRAM_BOOT_ADDR = 0xFFF40000, + OCC_SRAM_BOOT_ADDR2 = 0xFFF40002, + }; + + uint64_t sram_program[2]; + + /* lis r1, 0x8000 */ + sram_program[0] = ((uint64_t)ppc_lis(1, 0x8000) << 32); + + /* ori r1, r1, OCC_BOOT_OFFSET */ + sram_program[0] |= ppc_ori(1, 1, OCC_BOOT_OFFSET); + + /* mtctr (mtspr r1, CTR) */ + sram_program[1] = ((uint64_t)ppc_mtspr(1, CTR) << 32); + + /* bctr */ + sram_program[1] |= ppc_bctr(); + + /* Write to SRAM */ + write_occ_sram(chip, OCC_SRAM_BOOT_ADDR, sram_program, sizeof(sram_program)); + + return ((uint64_t)ppc_b(OCC_SRAM_BOOT_ADDR2) << 32); +} + +void occ_start_from_mem(uint8_t chip) +{ + enum { + OCB_PIB_OCR_CORE_RESET_BIT = 0, + JTG_PIB_OJCFG_DBG_HALT_BIT = 6, + + PU_SRAM_SRBV0_SCOM = 0x0006A004, + + PU_JTG_PIB_OJCFG_AND = 0x0006D005, + PU_OCB_PIB_OCR_CLEAR = 0x0006D001, + PU_OCB_PIB_OCR_OR = 0x0006D002, + }; + + write_scom(chip, PU_OCB_PIB_OCBCSR0_OR, PPC_BIT(OCB_PIB_OCBCSR0_OCB_STREAM_MODE)); + + /* + * Set up Boot Vector Registers in SRAM: + * - set bv0-2 to all 0's (illegal instructions) + * - set bv3 to proper branch instruction + */ + write_scom(chip, PU_SRAM_SRBV0_SCOM, 0); + write_scom(chip, PU_SRAM_SRBV0_SCOM + 1, 0); + write_scom(chip, PU_SRAM_SRBV0_SCOM + 2, 0); + write_scom(chip, PU_SRAM_SRBV0_SCOM + 3, setup_memory_boot(chip)); + + write_scom(chip, PU_JTG_PIB_OJCFG_AND, ~PPC_BIT(JTG_PIB_OJCFG_DBG_HALT_BIT)); + write_scom(chip, PU_OCB_PIB_OCR_OR, PPC_BIT(OCB_PIB_OCR_CORE_RESET_BIT)); + write_scom(chip, PU_OCB_PIB_OCR_CLEAR, PPC_BIT(OCB_PIB_OCR_CORE_RESET_BIT)); +} + +/* Wait for OCC to reach communications checkpoint */ +static void wait_for_occ_checkpoint(uint8_t chip) +{ + enum { + /* Wait up to 15 seconds for OCC to be ready (1500 * 10ms = 15s) */ + US_BETWEEN_READ = 10000, + READ_RETRY_LIMIT = 1500, + + OCC_COMM_INIT_COMPLETE = 0x0EFF, + OCC_INIT_FAILURE = 0xE000, + + OCC_RSP_SRAM_ADDR = 0xFFFBF000, + }; + + int retry_count = 0; + + while (retry_count++ < READ_RETRY_LIMIT) { + uint8_t response[8] = { 0x0 }; + uint8_t status; + uint16_t checkpoint; + + udelay(US_BETWEEN_READ); + + /* Read SRAM response buffer to check for OCC checkpoint */ + read_occ_sram(chip, OCC_RSP_SRAM_ADDR, (uint64_t *)response, sizeof(response)); + + /* Pull status from response (byte 2) */ + status = response[2]; + + /* Pull checkpoint from response (bytes 6-7) */ + checkpoint = (response[6] << 8) | response[7]; + + if (status == OCC_RC_OCC_INIT_CHECKPOINT && + checkpoint == OCC_COMM_INIT_COMPLETE) + /* Success */ + return; + + if ((checkpoint & OCC_INIT_FAILURE) == OCC_INIT_FAILURE || + status == OCC_RC_INIT_FAILURE) + die("OCC initialization has failed\n"); + } + + die("Waiting for OCC initialization checkpoint has timed out.\n"); +} + +static void build_occ_cmd(struct homer_st *homer, uint8_t occ_cmd, uint8_t seq_num, + const uint8_t *data, uint16_t data_len) +{ + uint8_t *cmd_buf = &homer->occ_host_area[OCC_CMD_ADDR]; + uint16_t cmd_len = 0; + uint16_t checksum = 0; + uint16_t i = 0; + + cmd_buf[cmd_len++] = seq_num; + cmd_buf[cmd_len++] = occ_cmd; + cmd_buf[cmd_len++] = (data_len >> 8) & 0xFF; + cmd_buf[cmd_len++] = data_len & 0xFF; + memcpy(&cmd_buf[cmd_len], data, data_len); + cmd_len += data_len; + + for (i = 0; i < cmd_len; ++i) + checksum += cmd_buf[i]; + cmd_buf[cmd_len++] = (checksum >> 8) & 0xFF; + cmd_buf[cmd_len++] = checksum & 0xFF; + + /* + * When the P8 processor writes to memory (such as the HOMER) there is + * no certainty that the writes happen in order or that they have + * actually completed by the time the instructions complete. 'sync' + * is a memory barrier to ensure the HOMER data has actually been made + * consistent with respect to memory, so that if the OCC were to read + * it they would see all of the data. Otherwise, there is potential + * for them to get stale or incomplete data. + */ + asm volatile("sync" ::: "memory"); +} + +static void wait_for_occ_response(struct homer_st *homer, uint32_t timeout_sec, + uint8_t seq_num) +{ + enum { + /* + * With two CPUs OCC polls were failing with this set to 10 or 20 us. + * Apparently, checks performed by the code might not guarantee + * that poll data is available in full (checksum doesn't match). + * + * With one CPU wait_for_occ_status() reports OCC is asking for PCAP + * configuration data if *this* delay (not the one in wait_for_occ_status) + * is small (50 us or smaller), 100 us seems fine. + * + * Something is wrong with synchronization and huge delays in Hostboot + * might be hiding the issue. + */ + OCC_RSP_SAMPLE_TIME_US = 100, + OCC_COMMAND_IN_PROGRESS = 0xFF, + }; + + const uint8_t *rsp_buf = &homer->occ_host_area[OCC_RSP_ADDR]; + + long timeout_us = timeout_sec * USECS_PER_SEC; + if (timeout_sec == 0) + timeout_us = OCC_RSP_SAMPLE_TIME_US; + + while (timeout_us >= 0) { + /* + * 1. When OCC receives the command, it will set the status to + * COMMAND_IN_PROGRESS. + * 2. When the response is ready OCC will update the full + * response buffer (except the status) + * 3. The status field is updated last to indicate response ready + * + * Note: Need to check the sequence number to be sure we are + * processing the expected response + */ + if (rsp_buf[2] != OCC_COMMAND_IN_PROGRESS && rsp_buf[0] == seq_num) { + /* + * Need an 'isync' here to ensure that previous instructions + * have completed before the code continues on. This is a type + * of read-barrier. Without this the processor can do + * speculative reads of the HOMER data and you can actually + * get stale data as part of the instructions that happen + * afterwards. Another 'weak consistency' issue. + */ + asm volatile("isync" ::: "memory"); + + /* OCC must have processed the command */ + break; + } + + if (timeout_us > 0) { + /* Delay before the next check */ + long sleep_us = OCC_RSP_SAMPLE_TIME_US; + if (timeout_us < sleep_us) + sleep_us = timeout_us; + + udelay(sleep_us); + timeout_us -= sleep_us; + } else { + /* Time expired */ + die("Timed out while waiting for OCC response\n"); + } + } +} + +static bool parse_occ_response(struct homer_st *homer, uint8_t occ_cmd, + uint8_t *status, uint8_t *seq_num, + uint8_t *response, uint32_t *response_len) +{ + uint16_t index = 0; + uint16_t data_len = 0; + uint16_t checksum = 0; + uint16_t i = 0; + + const uint8_t *rsp_buf = &homer->occ_host_area[OCC_RSP_ADDR]; + + *seq_num = rsp_buf[index++]; + index += 1; /* command */ + *status = rsp_buf[index++]; + + data_len = *(uint16_t *)&rsp_buf[index]; + index += 2; + + if (data_len > 0) { + uint16_t copy_size = data_len; + if (copy_size > *response_len) + copy_size = *response_len; + + memcpy(response, &rsp_buf[index], copy_size); + *response_len = copy_size; + + index += data_len; + } + + for (i = 0; i < index; ++i) + checksum += rsp_buf[i]; + + if (checksum != *(uint16_t *)&rsp_buf[index]) { + printk(BIOS_WARNING, "OCC response for 0x%02x has invalid checksum\n", + occ_cmd); + return false; + } + + if (*status != OCC_RC_SUCCESS) { + printk(BIOS_WARNING, "0x%02x OCC command failed with an error code: 0x%02x\n", + occ_cmd, *status); + return false; + } + + return true; +} + +static bool write_occ_cmd(uint8_t chip, struct homer_st *homer, uint8_t occ_cmd, + const uint8_t *data, uint16_t data_len, + uint8_t *response, uint32_t *response_len) +{ + static uint8_t cmd_seq_num; + + uint8_t status = 0; + uint8_t rsp_seq_num = 0; + + ++cmd_seq_num; + /* Do not use 0 for sequence number */ + if (cmd_seq_num == 0) + ++cmd_seq_num; + + build_occ_cmd(homer, occ_cmd, cmd_seq_num, data, data_len); + /* Sender: HTMGT; command: Command Write Attention */ + write_occ_command(chip, 0x1001000000000000); + + /* Wait for OCC to process command and send response (timeout is the + same for all commands) */ + wait_for_occ_response(homer, 20, cmd_seq_num); + + if (!parse_occ_response(homer, occ_cmd, &status, &rsp_seq_num, response, + response_len)) { + /* Statuses of 0xE0-EF are reserved for OCC exceptions */ + if ((status & 0xF0) == 0xE0) { + printk(BIOS_WARNING, + "OCC exception occurred while running 0x%02x command\n", + occ_cmd); + } + + if (console_log_level(BIOS_WARNING)) { + printk(BIOS_WARNING, "Received OCC response:\n"); + hexdump(response, *response_len); + printk(BIOS_WARNING, "Failed to parse OCC response\n"); + } + return false; + } + + if (rsp_seq_num != cmd_seq_num) { + printk(BIOS_WARNING, + "Received OCC response for a wrong command while running 0x%02x\n", + occ_cmd); + return false; + } + + return true; +} + +static void send_occ_cmd(uint8_t chip, struct homer_st *homer, uint8_t occ_cmd, + const uint8_t *data, uint16_t data_len, + uint8_t *response, uint32_t *response_len) +{ + enum { MAX_TRIES = 2 }; + + uint8_t i = 0; + + for (i = 0; i < MAX_TRIES; ++i) { + if (write_occ_cmd(chip, homer, occ_cmd, data, data_len, response, response_len)) + break; + + if (i < MAX_TRIES - 1) + printk(BIOS_DEBUG, "Retrying running OCC command 0x%02x\n", occ_cmd); + } + + if (i == MAX_TRIES) + die("Failed running OCC command 0x%02x %d times\n", occ_cmd, MAX_TRIES); +} + +/* Reports OCC error to the user and clears it on OCC's side */ +static void handle_occ_error(uint8_t chip, struct homer_st *homer, + const struct occ_poll_response *response) +{ + static uint8_t error_log_buf[4096]; + + uint16_t error_length = response->error_length; + + const uint8_t clear_log_data[4] = { + 0x01, // Version + response->error_id, + response->error_source, + 0x00 // Reserved + }; + uint32_t response_len = 0; + + if (error_length > sizeof(error_log_buf)) { + printk(BIOS_WARNING, "Truncating OCC error log from %d to %ld bytes\n", + error_length, sizeof(error_log_buf)); + error_length = sizeof(error_log_buf); + } + + read_occ_sram(chip, response->error_address, (uint64_t *)error_log_buf, error_length); + + if (console_log_level(BIOS_WARNING)) { + printk(BIOS_WARNING, "OCC error log:\n"); + hexdump(error_log_buf, error_length); + } + + /* Confirm to OCC that we've read the log */ + send_occ_cmd(chip, homer, OCC_CMD_CLEAR_ERROR_LOG, + clear_log_data, sizeof(clear_log_data), + NULL, &response_len); +} + +static void poll_occ(uint8_t chip, struct homer_st *homer, bool flush_all_errors, + struct occ_poll_response *response) +{ + enum { OCC_POLL_DATA_MIN_SIZE = 40 }; + + uint8_t max_more_errors = 10; + while (true) { + const uint8_t poll_data[1] = { 0x20 /*version*/ }; + uint32_t response_len = sizeof(*response); + + send_occ_cmd(chip, homer, OCC_CMD_POLL, poll_data, sizeof(poll_data), + (uint8_t *)response, &response_len); + + if (response_len < OCC_POLL_DATA_MIN_SIZE) + die("Invalid data length"); + + if (!flush_all_errors) + break; + + if (response->error_id == 0) + break; + + handle_occ_error(chip, homer, response); + + --max_more_errors; + if (max_more_errors == 0) { + if (console_log_level(BIOS_WARNING)) { + printk(BIOS_WARNING, "Last OCC poll response:\n"); + hexdump(response, response_len); + } + die("Hit too many errors on polling OCC\n"); + } + } +} + +static void get_freq_point_msg_data(const struct occ_cfg_inputs *inputs, + uint8_t *data, uint16_t *size) +{ + enum { OCC_CFGDATA_FREQ_POINT_VERSION = 0x20 }; + OCCPstateParmBlock *oppb = (void *)inputs->homer->ppmr.occ_parm_block; + + const struct voltage_bucket_data *bucket = get_voltage_data(inputs->chip); + + uint16_t index = 0; + uint16_t min_freq = 0; + + data[index++] = OCC_CFGDATA_FREQ_POINT; + data[index++] = OCC_CFGDATA_FREQ_POINT_VERSION; + + /* Nominal Frequency in MHz */ + memcpy(&data[index], &bucket->nominal.freq, 2); + index += 2; + + /* Turbo Frequency in MHz */ + memcpy(&data[index], &bucket->turbo.freq, 2); + index += 2; + + /* Minimum Frequency in MHz */ + min_freq = oppb->frequency_min_khz / 1000; + memcpy(&data[index], &min_freq, 2); + index += 2; + + /* Ultra Turbo Frequency in MHz */ + memcpy(&data[index], &bucket->ultra_turbo.freq, 2); + index += 2; + + /* Reserved (Static Power Save in PowerVM) */ + memset(&data[index], 0, 2); + index += 2; + + /* Reserved (FFO in PowerVM) */ + memset(&data[index], 0, 2); + index += 2; + + *size = index; +} + +static void get_occ_role_msg_data(const struct occ_cfg_inputs *inputs, + uint8_t *data, uint16_t *size) +{ + enum { + OCC_ROLE_SLAVE = 0x00, + OCC_ROLE_MASTER = 0x01, + }; + + data[0] = OCC_CFGDATA_OCC_ROLE; + data[1] = (inputs->is_master_occ ? OCC_ROLE_MASTER : OCC_ROLE_SLAVE); + + *size = 2; +} + +static void get_apss_msg_data(const struct occ_cfg_inputs *inputs, + uint8_t *data, uint16_t *size) +{ + enum { OCC_CFGDATA_APSS_VERSION = 0x20 }; + + /* ATTR_APSS_GPIO_PORT_PINS */ + uint8_t function[16] = { 0x0 }; + + /* ATTR_ADC_CHANNEL_GNDS */ + uint8_t ground[16] = { 0x0 }; + + /* ATTR_ADC_CHANNEL_GAINS */ + uint32_t gain[16] = { 0x0 }; + + /* ATTR_ADC_CHANNEL_OFFSETS */ + uint32_t offset[16] = { 0x0 }; + + uint16_t index = 0; + + data[index++] = OCC_CFGDATA_APSS_CONFIG; + data[index++] = OCC_CFGDATA_APSS_VERSION; + data[index++] = 0; + data[index++] = 0; + + for (uint64_t channel = 0; channel < sizeof(function); ++channel) { + data[index++] = function[channel]; // ADC Channel assignment + + memset(&data[index], 0, sizeof(uint32_t)); // Sensor ID + index += 4; + + data[index++] = ground[channel]; // Ground Select + + memcpy(&data[index], &gain[channel], sizeof(uint32_t)); + index += 4; + + memcpy(&data[index], &offset[channel], sizeof(uint32_t)); + index += 4; + } + + /* ATTR_APSS_GPIO_PORT_MODES */ + uint8_t gpio_mode[2] = { 0x0 }; + /* ATTR_APSS_GPIO_PORT_PINS */ + uint8_t gpio_pin[16] = { 0x0 }; + + uint64_t pins_per_port = sizeof(gpio_pin) / sizeof(gpio_mode); + uint64_t pin_idx = 0; + + for (uint64_t port = 0; port < sizeof(gpio_mode); ++port) { + data[index++] = gpio_mode[port]; + data[index++] = 0; + + memcpy(&data[index], gpio_pin + pin_idx, pins_per_port); + index += pins_per_port; + + pin_idx += pins_per_port; + } + + *size = index; +} + +static void get_mem_cfg_msg_data(const struct occ_cfg_inputs *inputs, + uint8_t *data, uint16_t *size) +{ + enum { OCC_CFGDATA_MEM_CONFIG_VERSION = 0x21 }; + + uint16_t index = 0; + + data[index++] = OCC_CFGDATA_MEM_CONFIG; + data[index++] = OCC_CFGDATA_MEM_CONFIG_VERSION; + + /* If OPAL then no "Power Control Default" support */ + + /* Byte 3: Memory Power Control Default */ + data[index++] = 0xFF; + /* Byte 4: Idle Power Memory Power Control */ + data[index++] = 0xFF; + + /* Byte 5: Number of data sets */ + data[index++] = 0; // Monitoring is disabled + + *size = index; +} + +static void add_sensor_id(uint8_t *data, uint16_t *index, uint32_t sensor_id) +{ + data[*index + 0] = sensor_id >> 24 & 0xFF; + data[*index + 1] = sensor_id >> 16 & 0xFF; + data[*index + 2] = sensor_id >> 8 & 0xFF; + data[*index + 3] = sensor_id >> 0 & 0xFF; + *index += 4; +} + +/* + * Sensors IDs listed here are valid for Talos II. Values come from talos.xml + * and may or may not be different for other boards. + */ +#define PROC0_CALLOUT_ID 0x08 +#define PROC0_CORE0_TEMP_ID 0x5B +#define PROC0_CORE0_FREQ_ID 0xA0 +#define BACKPLANE_CALLOUT_ID 0x8C +#define APSS_CALLOUT_ID 0x93 +/* Same as Backplane Callout ID */ +#define VRM_VDD_CALLOUT_ID 0x8C +#define VRM_VDD_TEMP_ID 0xFF + +static void get_sys_cfg_msg_data(const struct occ_cfg_inputs *inputs, + uint8_t *data, uint16_t *size) +{ + enum { + OCC_CFGDATA_SYS_CONFIG_VERSION = 0x21, + + /* KVM or OPAL mode + single node */ + OCC_CFGDATA_OPENPOWER_OPALVM = 0x81, + + OCC_CFGDATA_NON_REDUNDANT_PS = 0x02, + OCC_REPORT_THROTTLE_BELOW_NOMINAL = 0x08, + }; + + uint8_t system_type = OCC_CFGDATA_OPENPOWER_OPALVM; + uint16_t index = 0; + int i = 0; + + data[index++] = OCC_CFGDATA_SYS_CONFIG; + data[index++] = OCC_CFGDATA_SYS_CONFIG_VERSION; + + /* System Type */ + + /* ATTR_REPORT_THROTTLE_BELOW_NOMINAL == 0 */ + + /* 0 = OCC report throttling when max frequency lowered below turbo */ + system_type &= ~OCC_REPORT_THROTTLE_BELOW_NOMINAL; + /* Power supply policy is redundant */ + system_type &= ~OCC_CFGDATA_NON_REDUNDANT_PS; + data[index++] = system_type; + + /* Processor Callout Sensor ID */ + add_sensor_id(data, &index, PROC0_CALLOUT_ID + inputs->chip); + + /* Next 24*2 IDs are for core sensors */ + for (i = 0; i < MAX_CORES_PER_CHIP; ++i) { + /* Core Temp Sensor ID */ + add_sensor_id(data, &index, + PROC0_CORE0_TEMP_ID + inputs->chip * MAX_CORES_PER_CHIP + i); + + /* Core Frequency Sensor ID */ + add_sensor_id(data, &index, + PROC0_CORE0_FREQ_ID + inputs->chip * MAX_CORES_PER_CHIP + i); + } + + /* Backplane Callout Sensor ID */ + add_sensor_id(data, &index, BACKPLANE_CALLOUT_ID); + + /* APSS Callout Sensor ID */ + add_sensor_id(data, &index, APSS_CALLOUT_ID); + + /* Format 21 - VRM VDD Callout Sensor ID */ + add_sensor_id(data, &index, VRM_VDD_CALLOUT_ID); + + /* Format 21 - VRM VDD Temperature Sensor ID */ + add_sensor_id(data, &index, VRM_VDD_TEMP_ID); + + *size = index; +} + +static void get_thermal_ctrl_msg_data(const struct occ_cfg_inputs *inputs, + uint8_t *data, uint16_t *size) +{ + enum { + OCC_CFGDATA_TCT_CONFIG_VERSION = 0x20, + + CFGDATA_FRU_TYPE_PROC = 0x00, + CFGDATA_FRU_TYPE_MEMBUF = 0x01, + CFGDATA_FRU_TYPE_DIMM = 0x02, + CFGDATA_FRU_TYPE_VRM = 0x03, + CFGDATA_FRU_TYPE_GPU_CORE = 0x04, + CFGDATA_FRU_TYPE_GPU_MEMORY = 0x05, + CFGDATA_FRU_TYPE_VRM_VDD = 0x06, + + OCC_NOT_DEFINED = 0xFF, + }; + + uint16_t index = 0; + + data[index++] = OCC_CFGDATA_TCT_CONFIG; + data[index++] = OCC_CFGDATA_TCT_CONFIG_VERSION; + + /* Processor Core Weight, ATTR_OPEN_POWER_PROC_WEIGHT, from talos.xml */ + data[index++] = 9; + + /* Processor Quad Weight, ATTR_OPEN_POWER_QUAD_WEIGHT, from talos.xml */ + data[index++] = 1; + + /* Data sets following (proc, DIMM, etc.), and each will get a FRU type, + DVS temp, error temp and max read timeout */ + data[index++] = 5; + + /* + * Note: Bytes 4 and 5 of each data set represent the PowerVM DVFS and ERROR + * Resending the regular DVFS and ERROR for now. + */ + + /* Processor */ + data[index++] = CFGDATA_FRU_TYPE_PROC; + data[index++] = 85; // DVFS, ATTR_OPEN_POWER_PROC_DVFS_TEMP_DEG_C, from talos.xml + data[index++] = 95; // ERROR, ATTR_OPEN_POWER_PROC_ERROR_TEMP_DEG_C, from talos.xml + data[index++] = OCC_NOT_DEFINED; // PM_DVFS + data[index++] = OCC_NOT_DEFINED; // PM_ERROR + data[index++] = 5; // ATTR_OPEN_POWER_PROC_READ_TIMEOUT_SEC, from talos.xml + + /* DIMM */ + data[index++] = CFGDATA_FRU_TYPE_DIMM; + data[index++] = 84; // DVFS, ATTR_OPEN_POWER_DIMM_THROTTLE_TEMP_DEG_C, from talos.xml + data[index++] = 84; // ERROR, ATTR_OPEN_POWER_DIMM_ERROR_TEMP_DEG_C, from talos.xml + data[index++] = OCC_NOT_DEFINED; // PM_DVFS + data[index++] = OCC_NOT_DEFINED; // PM_ERROR + data[index++] = 30; // TIMEOUT, ATTR_OPEN_POWER_DIMM_READ_TIMEOUT_SEC, from talos.xml + + /* VRM OT monitoring is disabled, because ATTR_OPEN_POWER_VRM_READ_TIMEOUT_SEC == 0 + (default) */ + + /* GPU Cores */ + data[index++] = CFGDATA_FRU_TYPE_GPU_CORE; + // DVFS + data[index++] = OCC_NOT_DEFINED; + // ERROR, ATTR_OPEN_POWER_GPU_ERROR_TEMP_DEG_C, not set + data[index++] = OCC_NOT_DEFINED; + // PM_DVFS + data[index++] = OCC_NOT_DEFINED; + // PM_ERROR + data[index++] = OCC_NOT_DEFINED; + // TIMEOUT, ATTR_OPEN_POWER_GPU_READ_TIMEOUT_SEC, default + data[index++] = OCC_NOT_DEFINED; + + /* GPU Memory */ + data[index++] = CFGDATA_FRU_TYPE_GPU_MEMORY; + data[index++] = OCC_NOT_DEFINED; // DVFS + // ERROR, ATTR_OPEN_POWER_GPU_MEM_ERROR_TEMP_DEG_C, not set + data[index++] = OCC_NOT_DEFINED; + // PM_DVFS + data[index++] = OCC_NOT_DEFINED; + // PM_ERROR + data[index++] = OCC_NOT_DEFINED; + // TIMEOUT, ATTR_OPEN_POWER_GPU_MEM_READ_TIMEOUT_SEC, not set + data[index++] = OCC_NOT_DEFINED; + + /* VRM Vdd */ + data[index++] = CFGDATA_FRU_TYPE_VRM_VDD; + // DVFS, ATTR_OPEN_POWER_VRM_VDD_DVFS_TEMP_DEG_C, default + data[index++] = OCC_NOT_DEFINED; + // ERROR, ATTR_OPEN_POWER_VRM_VDD_ERROR_TEMP_DEG_C, default + data[index++] = OCC_NOT_DEFINED; + // PM_DVFS + data[index++] = OCC_NOT_DEFINED; + // PM_ERROR + data[index++] = OCC_NOT_DEFINED; + // TIMEOUT, ATTR_OPEN_POWER_VRM_VDD_READ_TIMEOUT_SEC, default + data[index++] = OCC_NOT_DEFINED; + + *size = index; +} + +static void get_power_cap_msg_data(const struct occ_cfg_inputs *inputs, + uint8_t *data, uint16_t *size) +{ + enum { OCC_CFGDATA_PCAP_CONFIG_VERSION = 0x20 }; + + uint16_t index = 0; + + /* Values of the following attributes were taken from Hostboot's log */ + + /* Minimum HARD Power Cap (ATTR_OPEN_POWER_MIN_POWER_CAP_WATTS) */ + uint16_t min_pcap = 2000; + + /* Minimum SOFT Power Cap (ATTR_OPEN_POWER_SOFT_MIN_PCAP_WATTS) */ + uint16_t soft_pcap = 2000; + + /* Quick Power Drop Power Cap (ATTR_OPEN_POWER_N_BULK_POWER_LIMIT_WATTS) */ + uint16_t qpd_pcap = 2000; + + /* System Maximum Power Cap (ATTR_OPEN_POWER_N_PLUS_ONE_HPC_BULK_POWER_LIMIT_WATTS) */ + uint16_t max_pcap = 3000; + + data[index++] = OCC_CFGDATA_PCAP_CONFIG; + data[index++] = OCC_CFGDATA_PCAP_CONFIG_VERSION; + + memcpy(&data[index], &soft_pcap, 2); + index += 2; + + memcpy(&data[index], &min_pcap, 2); + index += 2; + + memcpy(&data[index], &max_pcap, 2); + index += 2; + + memcpy(&data[index], &qpd_pcap, 2); + index += 2; + + *size = index; +} + +static void get_avs_bus_cfg_msg_data(const struct occ_cfg_inputs *inputs, + uint8_t *data, uint16_t *size) +{ + enum { OCC_CFGDATA_AVSBUS_CONFIG_VERSION = 0x01 }; + + /* ATTR_NO_APSS_PROC_POWER_VCS_VIO_WATTS, from talos.xml */ + const uint16_t power_adder = 19; + + uint16_t index = 0; + + data[index++] = OCC_CFGDATA_AVSBUS_CONFIG; + data[index++] = OCC_CFGDATA_AVSBUS_CONFIG_VERSION; + data[index++] = 0; // Vdd Bus, ATTR_VDD_AVSBUS_BUSNUM + data[index++] = 0; // Vdd Rail Sel, ATTR_VDD_AVSBUS_RAIL + data[index++] = 0xFF; // reserved + data[index++] = 0xFF; // reserved + data[index++] = 1; // Vdn Bus, ATTR_VDN_AVSBUS_BUSNUM, from talos.xml + data[index++] = 0; // Vdn Rail sel, ATTR_VDN_AVSBUS_RAIL, from talos.xml + + data[index++] = (power_adder >> 8) & 0xFF; + data[index++] = power_adder & 0xFF; + + /* ATTR_VDD_CURRENT_OVERFLOW_WORKAROUND_ENABLE == 0 */ + + *size = index; +} + +static void get_power_data(const struct occ_cfg_inputs *inputs, + uint16_t *power_max, uint16_t *power_drop) +{ + const struct voltage_bucket_data *bucket = get_voltage_data(inputs->chip); + + /* All processor chips (do not have to be functional) */ + const uint8_t num_procs = 2; // from Hostboot log + + const uint16_t proc_socket_power = 250; // ATTR_PROC_SOCKET_POWER_WATTS, default + const uint16_t misc_power = 0; // ATTR_MISC_SYSTEM_COMPONENTS_MAX_POWER_WATTS, default + + const uint16_t mem_power_min_throttles = 36; // from Hostboot log + const uint16_t mem_power_max_throttles = 23; // from Hostboot log + + /* + * Calculate Total non-GPU maximum power (Watts): + * Maximum system power excluding GPUs when CPUs are at maximum frequency + * (ultra turbo) and memory at maximum power (least throttled) plus + * everything else (fans...) excluding GPUs. + */ + *power_max = proc_socket_power * num_procs; + *power_max += mem_power_min_throttles + misc_power; + + OCCPstateParmBlock *oppb = (void *)inputs->homer->ppmr.occ_parm_block; + uint16_t min_freq_mhz = oppb->frequency_min_khz / 1000; + const uint16_t mhz_per_watt = 28; // ATTR_PROC_MHZ_PER_WATT, from talos.xml + /* Drop is always calculated from Turbo to Min (not ultra) */ + uint32_t proc_drop = (bucket->turbo.freq - min_freq_mhz) / mhz_per_watt; + proc_drop *= num_procs; + const uint16_t memory_drop = mem_power_min_throttles - mem_power_max_throttles; + + *power_drop = proc_drop + memory_drop; +} + +static void get_gpu_msg_data(const struct occ_cfg_inputs *inputs, uint8_t *data, uint16_t *size) +{ + enum { + OCC_CFGDATA_GPU_CONFIG_VERSION = 0x01, + MAX_GPUS = 3, + }; + + uint16_t power_max = 0; + uint16_t power_drop = 0; + + uint16_t index = 0; + + data[index++] = OCC_CFGDATA_GPU_CONFIG; + data[index++] = OCC_CFGDATA_GPU_CONFIG_VERSION; + + get_power_data(inputs, &power_max, &power_drop); + + memcpy(&data[index], &power_max, 2); // Total non-GPU max power (W) + index += 2; + + memcpy(&data[index], &power_drop, 2); // Total proc/mem power drop (W) + index += 2; + data[index++] = 0; // reserved + data[index++] = 0; // reserved + + /* No sensors ID. Might require OBus or just be absent. */ + uint32_t gpu_func_sensors[MAX_GPUS] = {0}; + uint32_t gpu_temp_sensors[MAX_GPUS] = {0}; + uint32_t gpu_memtemp_sensors[MAX_GPUS] = {0}; + + /* GPU0 */ + memcpy(&data[index], &gpu_temp_sensors[0], 4); + index += 4; + memcpy(&data[index], &gpu_memtemp_sensors[0], 4); + index += 4; + memcpy(&data[index], &gpu_func_sensors[0], 4); + index += 4; + + /* GPU1 */ + memcpy(&data[index], &gpu_temp_sensors[1], 4); + index += 4; + memcpy(&data[index], &gpu_memtemp_sensors[1], 4); + index += 4; + memcpy(&data[index], &gpu_func_sensors[1], 4); + index += 4; + + /* GPU2 */ + memcpy(&data[index], &gpu_temp_sensors[2], 4); + index += 4; + memcpy(&data[index], &gpu_memtemp_sensors[2], 4); + index += 4; + memcpy(&data[index], &gpu_func_sensors[2], 4); + index += 4; + + *size = index; +} + +static void send_occ_config_data(uint8_t chip, struct homer_st *homer) +{ + enum { + TO_ALL = 0, /* to_master_only = false */ + TO_MASTER = 1, /* to_master_only = true */ + }; + + /* + * Order in which these are sent is important! + * Not every order works. + */ + struct occ_cfg_info cfg_info[] = { + { "System config", &get_sys_cfg_msg_data, TO_ALL }, + { "APSS config", &get_apss_msg_data, TO_ALL }, + { "OCC role", &get_occ_role_msg_data, TO_ALL }, + { "Frequency points", &get_freq_point_msg_data, TO_MASTER }, + { "Memory config", &get_mem_cfg_msg_data, TO_ALL }, + { "Power cap", &get_power_cap_msg_data, TO_MASTER }, + { "Thermal control", &get_thermal_ctrl_msg_data, TO_ALL }, + { "AVS", &get_avs_bus_cfg_msg_data, TO_ALL }, + { "GPU", &get_gpu_msg_data, TO_ALL }, + }; + + const struct occ_cfg_inputs inputs = { + .homer = homer, + .chip = chip, + .is_master_occ = (chip == 0), + }; + + uint8_t i; + + for (i = 0; i < ARRAY_SIZE(cfg_info); ++i) { + /* All our messages are short */ + uint8_t data[256]; + uint16_t data_len = 0; + uint32_t response_len = 0; + + /* Poll is sent between configuration packets to flush errors */ + struct occ_poll_response poll_response; + + /* + * Certain kinds of configuration data is broadcasted to slave + * OCCs by the master and must not be sent to them directly + */ + if (cfg_info[i].to_master_only && !inputs.is_master_occ) + continue; + + cfg_info[i].func(&inputs, data, &data_len); + if (data_len > sizeof(data)) + die("Buffer for OCC data is too small!\n"); + + send_occ_cmd(chip, homer, OCC_CMD_SETUP_CFG_DATA, data, data_len, + NULL, &response_len); + poll_occ(chip, homer, /*flush_all_errors=*/false, &poll_response); + } +} + +static void send_occ_user_power_cap(uint8_t chip, struct homer_st *homer) +{ + /* No power limit */ + const uint8_t data[2] = { 0x00, 0x00 }; + uint32_t response_len = 0; + send_occ_cmd(chip, homer, OCC_CMD_SET_POWER_CAP, data, sizeof(data), + NULL, &response_len); +} + +static void wait_for_occ_status(uint8_t chip, struct homer_st *homer, uint8_t status_bit) +{ + enum { + MAX_POLLS = 200, + DELAY_BETWEEN_POLLS_US = 50000, + }; + + uint8_t num_polls = 0; + struct occ_poll_response poll_response; + + for (num_polls = 0; num_polls < MAX_POLLS; ++num_polls) { + poll_occ(chip, homer, /*flush_all_errors=*/false, &poll_response); + if (poll_response.status & status_bit) + break; + + if (poll_response.requested_cfg != 0x00) { + die("OCC requests 0x%02x configuration data\n", + poll_response.requested_cfg); + } + + if (num_polls < MAX_POLLS) + udelay(DELAY_BETWEEN_POLLS_US); + } + + if (num_polls == MAX_POLLS) + die("Failed to wait until OCC has reached state 0x%02x\n", status_bit); +} + +static void set_occ_state(uint8_t chip, struct homer_st *homer, uint8_t state) +{ + struct occ_poll_response poll_response; + + /* Fields: version, state, reserved */ + const uint8_t data[3] = { 0x00, state, 0x00 }; + uint32_t response_len = 0; + + /* Send poll cmd to confirm comm has been established and flush old errors */ + poll_occ(chip, homer, /*flush_all_errors=*/true, &poll_response); + + /* Try to switch to a new state */ + send_occ_cmd(chip, homer, OCC_CMD_SET_STATE, data, sizeof(data), NULL, &response_len); + + /* Send poll to query state of all OCC and flush any errors */ + poll_occ(chip, homer, /*flush_all_errors=*/true, &poll_response); + + if (poll_response.state != state) + die("State of OCC is 0x%02x instead of 0x%02x.\n", + poll_response.state, state); +} + +static void set_occ_active_state(uint8_t chip, struct homer_st *homer) +{ + enum { + OCC_STATUS_ACTIVE_READY = 0x01, + OCC_STATE_ACTIVE = 0x03, + }; + + wait_for_occ_status(chip, homer, OCC_STATUS_ACTIVE_READY); + set_occ_state(chip, homer, OCC_STATE_ACTIVE); +} + +void activate_occ(uint8_t chips, struct homer_st *homers) +{ + /* Make sure OCCs are ready for communication */ + for (uint8_t chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) + wait_for_occ_checkpoint(chip); + } + + /* Send initial poll to all OCCs to establish communication */ + for (uint8_t chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) { + struct occ_poll_response poll_response; + poll_occ(chip, &homers[chip], /*flush_all_errors=*/false, + &poll_response); + } + } + + /* Send OCC's config data */ + for (uint8_t chip = 0; chip < MAX_CHIPS; chip++) { + if (chips & (1 << chip)) + send_occ_config_data(chip, &homers[chip]); + } + + /* Set the User PCAP (sent only to master OCC) */ + send_occ_user_power_cap(/*chip=*/0, &homers[0]); + + /* Switch for OCC to active state (sent only to master OCC) */ + set_occ_active_state(/*chip=*/0, &homers[0]); + + /* + * Hostboot sets active sensors for all OCCs via IPMI here, so BMC can + * start communication with them. However, in practice that seems to + * make no difference and Hostboot seems to have a bug in that it uses + * wrong operation code when it tries to change sensor's state likely + * turning the interaction into a no-op. + */ +} + +void pm_occ_fir_init(uint8_t chip) +{ + enum { + PERV_TP_OCC_SCOM_OCCLFIR = 0x01010800, + + /* Bits of OCC LFIR */ + OCC_FW0 = 0, + OCC_FW1 = 1, + CME_ERR_NOTIFY = 2, + STOP_RCV_NOTIFY_PRD = 3, + OCC_HB_NOTIFY = 4, + GPE0_WD_TIMEOUT = 5, + GPE1_WD_TIMEOUT = 6, + GPE2_WD_TIMEOUT = 7, + GPE3_WD_TIMEOUT = 8, + GPE0_ERR = 9, + GPE1_ERR = 10, + GPE2_ERR = 11, + GPE3_ERR = 12, + OCB_ERR = 13, + SRAM_UE = 14, + SRAM_CE = 15, + SRAM_READ_ERR = 16, + SRAM_WRITE_ERR = 17, + SRAM_DATAOUT_PERR = 18, + SRAM_OCI_WDATA_PARITY = 19, + SRAM_OCI_BE_PARITY_ERR = 20, + SRAM_OCI_ADDR_PARITY_ERR = 21, + GPE0_HALTED = 22, + GPE1_HALTED = 23, + GPE2_HALTED = 24, + GPE3_HALTED = 25, + EXT_TRAP = 26, + PPC405_CORE_RESET = 27, + PPC405_CHIP_RESET = 28, + PPC405_SYS_RESET = 29, + PPC405_WAIT_STATE = 30, + PPC405_DBGSTOPACK = 31, + OCB_DB_OCI_TIMEOUT = 32, + OCB_DB_OCI_RDATA_PARITY = 33, + OCB_DB_OCI_SLVERR = 34, + OCB_PIB_ADDR_PARITY_ERR = 35, + OCB_DB_PIB_DATA_PARITY_ERR = 36, + OCB_IDC0_ERR = 37, + OCB_IDC1_ERR = 38, + OCB_IDC2_ERR = 39, + OCB_IDC3_ERR = 40, + SRT_FSM_ERR = 41, + JTAGACC_ERR = 42, + SPARE_ERR_38 = 43, + C405_ECC_UE = 44, + C405_ECC_CE = 45, + C405_OCI_MC_CHK = 46, + SRAM_SPARE_DIRERR0 = 47, + SRAM_SPARE_DIRERR1 = 48, + SRAM_SPARE_DIRERR2 = 49, + SRAM_SPARE_DIRERR3 = 50, + GPE0_OCISLV_ERR = 51, + GPE1_OCISLV_ERR = 52, + GPE2_OCISLV_ERR = 53, + GPE3_OCISLV_ERR = 54, + C405ICU_M_TIMEOUT = 55, + C405DCU_M_TIMEOUT = 56, + OCC_CMPLX_FAULT = 57, + OCC_CMPLX_NOTIFY = 58, + SPARE_59 = 59, + SPARE_60 = 60, + SPARE_61 = 61, + FIR_PARITY_ERR_DUP = 62, + FIR_PARITY_ERR = 63, + }; + + const uint64_t action0_bits = 0; + const uint64_t action1_bits = + PPC_BIT(C405_ECC_CE) | PPC_BIT(C405_OCI_MC_CHK) + | PPC_BIT(C405DCU_M_TIMEOUT) | PPC_BIT(GPE0_ERR) + | PPC_BIT(GPE0_OCISLV_ERR) | PPC_BIT(GPE1_ERR) + | PPC_BIT(GPE1_OCISLV_ERR) | PPC_BIT(GPE2_OCISLV_ERR) + | PPC_BIT(GPE3_OCISLV_ERR) | PPC_BIT(JTAGACC_ERR) + | PPC_BIT(OCB_DB_OCI_RDATA_PARITY) | PPC_BIT(OCB_DB_OCI_SLVERR) + | PPC_BIT(OCB_DB_OCI_TIMEOUT) | PPC_BIT(OCB_DB_PIB_DATA_PARITY_ERR) + | PPC_BIT(OCB_IDC0_ERR) | PPC_BIT(OCB_IDC1_ERR) + | PPC_BIT(OCB_IDC2_ERR) | PPC_BIT(OCB_IDC3_ERR) + | PPC_BIT(OCB_PIB_ADDR_PARITY_ERR) | PPC_BIT(OCC_CMPLX_FAULT) + | PPC_BIT(OCC_CMPLX_NOTIFY) | PPC_BIT(SRAM_CE) + | PPC_BIT(SRAM_DATAOUT_PERR) | PPC_BIT(SRAM_OCI_ADDR_PARITY_ERR) + | PPC_BIT(SRAM_OCI_BE_PARITY_ERR) | PPC_BIT(SRAM_OCI_WDATA_PARITY) + | PPC_BIT(SRAM_READ_ERR) | PPC_BIT(SRAM_SPARE_DIRERR0) + | PPC_BIT(SRAM_SPARE_DIRERR1) | PPC_BIT(SRAM_SPARE_DIRERR2) + | PPC_BIT(SRAM_SPARE_DIRERR3) | PPC_BIT(SRAM_UE) + | PPC_BIT(SRAM_WRITE_ERR) | PPC_BIT(SRT_FSM_ERR) + | PPC_BIT(STOP_RCV_NOTIFY_PRD) | PPC_BIT(C405_ECC_UE); + + uint64_t mask = read_scom(chip, PERV_TP_OCC_SCOM_OCCLFIR + MASK_INCR); + mask &= ~action0_bits; + mask &= ~action1_bits; + + write_scom(chip, PERV_TP_OCC_SCOM_OCCLFIR, 0); + write_scom(chip, PERV_TP_OCC_SCOM_OCCLFIR + ACTION0_INCR, action0_bits); + write_scom(chip, PERV_TP_OCC_SCOM_OCCLFIR + ACTION1_INCR, action1_bits); + write_scom(chip, PERV_TP_OCC_SCOM_OCCLFIR + MASK_WOR_INCR, mask); + write_scom(chip, PERV_TP_OCC_SCOM_OCCLFIR + MASK_WAND_INCR, mask); +} + +void pm_pba_fir_init(uint8_t chip) +{ + enum { + PU_PBAFIR = 0x05012840, + + /* Bits of PBA LFIR. */ + PBAFIR_OCI_APAR_ERR = 0, + PBAFIR_PB_RDADRERR_FW = 1, + PBAFIR_PB_RDDATATO_FW = 2, + PBAFIR_PB_SUE_FW = 3, + PBAFIR_PB_UE_FW = 4, + PBAFIR_PB_CE_FW = 5, + PBAFIR_OCI_SLAVE_INIT = 6, + PBAFIR_OCI_WRPAR_ERR = 7, + PBAFIR_SPARE = 8, + PBAFIR_PB_UNEXPCRESP = 9, + PBAFIR_PB_UNEXPDATA = 10, + PBAFIR_PB_PARITY_ERR = 11, + PBAFIR_PB_WRADRERR_FW = 12, + PBAFIR_PB_BADCRESP = 13, + PBAFIR_PB_ACKDEAD_FW_RD = 14, + PBAFIR_PB_CRESPTO = 15, + PBAFIR_BCUE_SETUP_ERR = 16, + PBAFIR_BCUE_PB_ACK_DEAD = 17, + PBAFIR_BCUE_PB_ADRERR = 18, + PBAFIR_BCUE_OCI_DATERR = 19, + PBAFIR_BCDE_SETUP_ERR = 20, + PBAFIR_BCDE_PB_ACK_DEAD = 21, + PBAFIR_BCDE_PB_ADRERR = 22, + PBAFIR_BCDE_RDDATATO_ERR = 23, + PBAFIR_BCDE_SUE_ERR = 24, + PBAFIR_BCDE_UE_ERR = 25, + PBAFIR_BCDE_CE = 26, + PBAFIR_BCDE_OCI_DATERR = 27, + PBAFIR_INTERNAL_ERR = 28, + PBAFIR_ILLEGAL_CACHE_OP = 29, + PBAFIR_OCI_BAD_REG_ADDR = 30, + PBAFIR_AXPUSH_WRERR = 31, + PBAFIR_AXRCV_DLO_ERR = 32, + PBAFIR_AXRCV_DLO_TO = 33, + PBAFIR_AXRCV_RSVDATA_TO = 34, + PBAFIR_AXFLOW_ERR = 35, + PBAFIR_AXSND_DHI_RTYTO = 36, + PBAFIR_AXSND_DLO_RTYTO = 37, + PBAFIR_AXSND_RSVTO = 38, + PBAFIR_AXSND_RSVERR = 39, + PBAFIR_PB_ACKDEAD_FW_WR = 40, + PBAFIR_RESERVED_41 = 41, + PBAFIR_RESERVED_42 = 42, + PBAFIR_RESERVED_43 = 43, + PBAFIR_FIR_PARITY_ERR2 = 44, + PBAFIR_FIR_PARITY_ERR = 45, + }; + + const uint64_t action0_bits = 0; + const uint64_t action1_bits = + PPC_BIT(PBAFIR_OCI_APAR_ERR) | PPC_BIT(PBAFIR_PB_UE_FW) + | PPC_BIT(PBAFIR_PB_CE_FW) | PPC_BIT(PBAFIR_OCI_SLAVE_INIT) + | PPC_BIT(PBAFIR_OCI_WRPAR_ERR) | PPC_BIT(PBAFIR_PB_UNEXPCRESP) + | PPC_BIT(PBAFIR_PB_UNEXPDATA) | PPC_BIT(PBAFIR_PB_PARITY_ERR) + | PPC_BIT(PBAFIR_PB_WRADRERR_FW) | PPC_BIT(PBAFIR_PB_BADCRESP) + | PPC_BIT(PBAFIR_PB_CRESPTO) | PPC_BIT(PBAFIR_INTERNAL_ERR) + | PPC_BIT(PBAFIR_ILLEGAL_CACHE_OP) | PPC_BIT(PBAFIR_OCI_BAD_REG_ADDR); + + uint64_t mask = PPC_BITMASK(0, 63); + mask &= ~action0_bits; + mask &= ~action1_bits; + + write_scom(chip, PU_PBAFIR, 0); + write_scom(chip, PU_PBAFIR + ACTION0_INCR, action0_bits); + write_scom(chip, PU_PBAFIR + ACTION1_INCR, action1_bits); + write_scom(chip, PU_PBAFIR + MASK_WOR_INCR, mask); + write_scom(chip, PU_PBAFIR + MASK_WAND_INCR, mask); +} diff --git a/src/soc/ibm/power9/ops.h b/src/soc/ibm/power9/ops.h new file mode 100644 index 00000000000..e66df58a90a --- /dev/null +++ b/src/soc/ibm/power9/ops.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef CPU_PPC64_OPS_H +#define CPU_PPC64_OPS_H + +#include + +static const uint32_t ATTN_OP = 0x00000200; +static const uint32_t BLR_OP = 0x4E800020; +static const uint32_t BR_OP = 0x48000000; +static const uint32_t BCCTR_OP = 0x4C000420; +static const uint32_t ORI_OP = 0x60000000; +static const uint32_t LIS_OP = 0x3C000000; +static const uint32_t MTSPR_OP = 0x7C0003A6; +static const uint32_t SKIP_SPR_REST_INST = 0x4800001C; +static const uint32_t MR_R0_TO_R10_OP = 0x7C0A0378; +static const uint32_t MR_R0_TO_R21_OP = 0x7C150378; +static const uint32_t MR_R0_TO_R9_OP = 0x7C090378; +static const uint32_t MTLR_R30_OP = 0x7FC803A6; +static const uint32_t MFLR_R30_OP = 0x7FC802A6; + +static inline uint32_t ppc_lis(uint16_t rt, uint16_t data) +{ + uint32_t inst; + inst = LIS_OP; + inst |= rt << (31 - 10); + inst |= data; + return inst; +} + +static inline uint32_t ppc_ori(uint16_t rs, uint16_t ra, uint16_t data) +{ + uint32_t inst; + inst = ORI_OP; + inst |= rs << (31 - 10); + inst |= ra << (31 - 15); + inst |= data; + return inst; +} + +static inline uint32_t ppc_mtspr(uint16_t rs, uint16_t spr) +{ + uint32_t temp = ((spr & 0x03FF) << (31 - 20)); + + uint32_t inst; + inst = MTSPR_OP; + inst |= rs << (31 - 10); + inst |= (temp & 0x0000F800) << 5; // Perform swizzle + inst |= (temp & 0x001F0000) >> 5; // Perform swizzle + return inst; +} + +static inline uint32_t ppc_bctr(void) +{ + uint32_t inst; + inst = BCCTR_OP; + inst |= 20 << (31 - 10); // BO + return inst; +} + +static inline uint32_t ppc_b(uint32_t target_addr) +{ + uint32_t inst; + inst = BR_OP; + inst |= (target_addr & 0x03FFFFFF); + return inst; +} + +#endif /* CPU_PPC64_OPS_H */ diff --git a/src/soc/ibm/power9/pci.h b/src/soc/ibm/power9/pci.h new file mode 100644 index 00000000000..6f90ca40130 --- /dev/null +++ b/src/soc/ibm/power9/pci.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __SOC_IBM_POWER9_PCI_H +#define __SOC_IBM_POWER9_PCI_H + +#define MAX_PEC_PER_PROC 3 +#define MAX_PHB_PER_PROC 6 + +/* Enum giving bitmask values for enabled PHBs */ +enum phb_active_mask { + PHB_MASK_NA = 0x00, // Sentinel mask (loop terminations) + PHB0_MASK = 0x80, // PHB0 enabled + PHB1_MASK = 0x40, // PHB1 enabled + PHB2_MASK = 0x20, // PHB2 enabled + PHB3_MASK = 0x10, // PHB3 enabled + PHB4_MASK = 0x08, // PHB4 enabled + PHB5_MASK = 0x04, // PHB5 enabled +}; + +struct pci_info { + /* Combination of values from phb_active_mask enumeration */ + uint8_t phb_active_mask; + + /* + * Mask of functional PHBs for each PEC, corresponds to + * ATTR_PROC_PCIE_IOVALID_ENABLE in Hostboot. + * + * LSB is the PHB with the highest number for the given PEC. + */ + uint8_t iovalid_enable[MAX_PEC_PER_PROC]; +}; + +#endif /* __SOC_IBM_POWER9_PCI_H */ diff --git a/src/soc/ibm/power9/powerbus.c b/src/soc/ibm/power9/powerbus.c new file mode 100644 index 00000000000..68ce0178b2d --- /dev/null +++ b/src/soc/ibm/power9/powerbus.c @@ -0,0 +1,249 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include + +#include +#include +#include +#include +#include + +#include "scratch.h" + +#define EPSILON_MAX_VALUE 0xFFFFFFFF + +#define EPS_GUARDBAND 20 + +/* From src/import/chips/p9/procedures/hwp/nest/p9_fbc_eff_config.C */ +/* LE epsilon (2 chips per-group) */ +static const uint32_t EPSILON_R_T0_LE[] = { 7, 7, 8, 8, 10, 22 }; +static const uint32_t EPSILON_R_T1_LE[] = { 7, 7, 8, 8, 10, 22 }; +static const uint32_t EPSILON_R_T2_LE[] = { 67, 69, 71, 74, 79, 103 }; +static const uint32_t EPSILON_W_T0_LE[] = { 0, 0, 0, 0, 0, 5 }; +static const uint32_t EPSILON_W_T1_LE[] = { 15, 16, 17, 19, 21, 33 }; + +/* See get_first_valid_pdV_pbFreq() in Hostboot */ + +static bool read_voltage_data(uint8_t chip, struct powerbus_cfg *cfg) +{ + int i = 0; + const struct voltage_kwd *voltage = NULL; + + /* + * ATTR_FREQ_PB_MHZ + * + * It's equal to the first non-zero PowerBus frequency, unless its + * value is fixed for the platform, which is the case for Talos II. + */ + const uint32_t pb_freq = 1866; + /* ATTR_FREQ_CORE_CEILING_MHZ, equal to the minimum of turbo frequencies */ + uint32_t freq_ceiling = 0; + /* ATTR_FREQ_CORE_FLOOR_MHZ, equal to the maximum of powersave frequencies */ + uint32_t freq_floor = 0; + + /* Using LRP0 because frequencies are the same in all LRP records */ + voltage = mvpd_get_voltage_data(chip, /*lrp=*/0); + + for (i = 0; i < VOLTAGE_BUCKET_COUNT; ++i) { + const struct voltage_bucket_data *bucket = &voltage->buckets[i]; + if (bucket->id == 0) + continue; + + if (bucket->powersave.freq != 0 && + (freq_floor == 0 || bucket->powersave.freq > freq_floor)) { + freq_floor = bucket->powersave.freq; + } + + if (bucket->turbo.freq != 0 && + (freq_ceiling == 0 || bucket->turbo.freq < freq_ceiling)) { + freq_ceiling = bucket->turbo.freq; + } + } + + cfg->fabric_freq = pb_freq; + cfg->freq_core_floor = freq_floor; + cfg->freq_core_ceiling = freq_ceiling; + + return true; +} + +static bool calculate_frequencies(struct powerbus_cfg *cfg) +{ + const uint32_t pb_freq = cfg->fabric_freq; + const uint32_t freq_floor = cfg->freq_core_floor; + const uint32_t freq_ceiling = cfg->freq_core_ceiling; + + enum FABRIC_CORE_FLOOR_RATIO floor_ratio; + enum FABRIC_CORE_CEILING_RATIO ceiling_ratio; + + /* breakpoint ratio: core floor 4.0, pb 2.0 (cache floor :: pb = 8/8) */ + if (freq_floor >= (2 * pb_freq)) { + floor_ratio = FABRIC_CORE_FLOOR_RATIO_RATIO_8_8; + /* breakpoint ratio: core floor 3.5, pb 2.0 (cache floor :: pb = 7/8) */ + } else if ((4 * freq_floor) >= (7 * pb_freq)) { + floor_ratio = FABRIC_CORE_FLOOR_RATIO_RATIO_7_8; + /* breakpoint ratio: core floor 3.0, pb 2.0 (cache floor :: pb = 6/8) */ + } else if ((2 * freq_floor) >= (3 * pb_freq)) { + floor_ratio = FABRIC_CORE_FLOOR_RATIO_RATIO_6_8; + /* breakpoint ratio: core floor 2.5, pb 2.0 (cache floor :: pb = 5/8) */ + } else if ((4 * freq_floor) >= (5 * pb_freq)) { + floor_ratio = FABRIC_CORE_FLOOR_RATIO_RATIO_5_8; + /* breakpoint ratio: core floor 2.0, pb 2.0 (cache floor :: pb = 4/8) */ + } else if (freq_floor >= pb_freq) { + floor_ratio = FABRIC_CORE_FLOOR_RATIO_RATIO_4_8; + /* breakpoint ratio: core floor 1.0, pb 2.0 (cache floor :: pb = 2/8) */ + } else if ((2 * freq_floor) >= pb_freq) { + floor_ratio = FABRIC_CORE_FLOOR_RATIO_RATIO_2_8; + } else { + printk(BIOS_ERR, "Unsupported core ceiling/PB frequency ratio = (%d/%d)\n", + freq_floor, pb_freq); + return false; + } + + /* breakpoint ratio: core ceiling 4.0, pb 2.0 (cache ceiling :: pb = 8/8) */ + if (freq_ceiling >= (2 * pb_freq)) { + ceiling_ratio = FABRIC_CORE_CEILING_RATIO_RATIO_8_8; + /* breakpoint ratio: core ceiling 3.5, pb 2.0 (cache ceiling :: pb = 7/8) */ + } else if ((4 * freq_ceiling) >= (7 * pb_freq)) { + ceiling_ratio = FABRIC_CORE_CEILING_RATIO_RATIO_7_8; + /* breakpoint ratio: core ceiling 3.0, pb 2.0 (cache ceiling :: pb = 6/8) */ + } else if ((2 * freq_ceiling) >= (3 * pb_freq)) { + ceiling_ratio = FABRIC_CORE_CEILING_RATIO_RATIO_6_8; + /* breakpoint ratio: core ceiling 2.5, pb 2.0 (cache ceiling :: pb = 5/8) */ + } else if ((4 * freq_ceiling) >= (5 * pb_freq)) { + ceiling_ratio = FABRIC_CORE_CEILING_RATIO_RATIO_5_8; + /* breakpoint ratio: core ceiling 2.0, pb 2.0 (cache ceiling :: pb = 4/8) */ + } else if (freq_ceiling >= pb_freq) { + ceiling_ratio = FABRIC_CORE_CEILING_RATIO_RATIO_4_8; + /* breakpoint ratio: core ceiling 1.0, pb 2.0 (cache ceiling :: pb = 2/8) */ + } else if ((2 * freq_ceiling) >= pb_freq) { + ceiling_ratio = FABRIC_CORE_CEILING_RATIO_RATIO_2_8; + } else { + printk(BIOS_ERR, "Unsupported core ceiling/PB frequency ratio = (%d/%d)\n", + freq_ceiling, pb_freq); + return false; + } + + cfg->core_floor_ratio = floor_ratio; + cfg->core_ceiling_ratio = ceiling_ratio; + return true; +} + +static void config_guardband_epsilon(uint8_t gb_percentage, uint32_t *target_value) +{ + uint32_t delta = (*target_value * gb_percentage) / 100; + delta += ((*target_value * gb_percentage) % 100) ? 1 : 0; + + /* Clamp to maximum value if necessary */ + if (delta > (EPSILON_MAX_VALUE - *target_value)) { + printk(BIOS_DEBUG, "Guardband application generated out-of-range target value," + " clamping to maximum value!\n"); + *target_value = EPSILON_MAX_VALUE; + } else { + *target_value += delta; + } +} + +static void dump_epsilons(struct powerbus_cfg *cfg) +{ + uint32_t i; + + for (i = 0; i < NUM_EPSILON_READ_TIERS; i++) + printk(BIOS_DEBUG, " R_T[%d] = %d\n", i, cfg->eps_r[i]); + + for (i = 0; i < NUM_EPSILON_WRITE_TIERS; i++) + printk(BIOS_DEBUG, " W_T[%d] = %d\n", i, cfg->eps_w[i]); +} + +static void calculate_epsilons(struct powerbus_cfg *cfg) +{ + const enum FABRIC_CORE_FLOOR_RATIO floor_ratio = cfg->core_floor_ratio; + const enum FABRIC_CORE_CEILING_RATIO ceiling_ratio = cfg->core_ceiling_ratio; + const uint32_t pb_freq = cfg->fabric_freq; + const uint32_t freq_ceiling = cfg->freq_core_ceiling; + + uint32_t *eps_r = cfg->eps_r; + uint32_t *eps_w = cfg->eps_w; + + uint32_t i; + + uint64_t scratch_reg6 = read_scom(0, MBOX_SCRATCH_REG1 + 5); + /* ATTR_PROC_FABRIC_PUMP_MODE, it's either node or group pump mode */ + bool node_pump_mode = !(scratch_reg6 & PPC_BIT(MBOX_SCRATCH_REG6_GROUP_PUMP_MODE)); + + /* Assuming that ATTR_PROC_EPS_TABLE_TYPE = EPS_TYPE_LE in talos.xml is always correct */ + + eps_r[0] = EPSILON_R_T0_LE[floor_ratio]; + + if (node_pump_mode) + eps_r[1] = EPSILON_R_T1_LE[floor_ratio]; + else + eps_r[1] = EPSILON_R_T0_LE[floor_ratio]; + + eps_r[2] = EPSILON_R_T2_LE[floor_ratio]; + + eps_w[0] = EPSILON_W_T0_LE[floor_ratio]; + eps_w[1] = EPSILON_W_T1_LE[floor_ratio]; + + /* Dump base epsilon values */ + printk(BIOS_DEBUG, "Base epsilon values read from table:\n"); + dump_epsilons(cfg); + + /* Scale base epsilon values if core is running 2x nest frequency */ + if (ceiling_ratio == FABRIC_CORE_CEILING_RATIO_RATIO_8_8) { + uint8_t scale_percentage = 100 * freq_ceiling / (2 * pb_freq); + if (scale_percentage < 100) + die("scale_percentage is too small!"); + scale_percentage -= 100; + + printk(BIOS_DEBUG, "Scaling based on ceiling frequency\n"); + + for (i = 0; i < NUM_EPSILON_READ_TIERS; i++) + config_guardband_epsilon(scale_percentage, &eps_r[i]); + + for (i = 0; i < NUM_EPSILON_WRITE_TIERS; i++) + config_guardband_epsilon(scale_percentage, &eps_w[i]); + } + + for (i = 0; i < NUM_EPSILON_READ_TIERS; i++) + config_guardband_epsilon(EPS_GUARDBAND, &eps_r[i]); + + for (i = 0; i < NUM_EPSILON_WRITE_TIERS; i++) + config_guardband_epsilon(EPS_GUARDBAND, &eps_w[i]); + + /* Dump final epsilon values */ + printk(BIOS_DEBUG, "Scaled epsilon values based on %s%d percent guardband:\n", + (EPS_GUARDBAND >= 0 ? "+" : "-"), EPS_GUARDBAND); + dump_epsilons(cfg); + + /* + * Check relationship of epsilon counters: + * read tier values are strictly increasing + * write tier values are strictly increasing + */ + if (eps_r[0] > eps_r[1] || eps_r[1] > eps_r[2] || eps_w[0] > eps_w[1]) + printk(BIOS_WARNING, "Invalid relationship between base epsilon values\n"); +} + +const struct powerbus_cfg *powerbus_cfg(uint8_t chip) +{ + static struct powerbus_cfg cfg[2]; + static bool init_done[2]; + + if (chip >= MAX_CHIPS) + die("Unsupported CPU number for powerbus config query: %d.\n", chip); + + if (init_done[chip]) + return &cfg[chip]; + + if (!read_voltage_data(chip, &cfg[chip])) + die("Failed to read voltage data"); + + if (!calculate_frequencies(&cfg[chip])) + die("Incorrect core or PowerBus frequency"); + + calculate_epsilons(&cfg[chip]); + + init_done[chip] = true; + return &cfg[chip]; +} diff --git a/src/soc/ibm/power9/pstates.c b/src/soc/ibm/power9/pstates.c new file mode 100644 index 00000000000..e8db9e3c6a1 --- /dev/null +++ b/src/soc/ibm/power9/pstates.c @@ -0,0 +1,1076 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include "homer.h" +#include "wof.h" +#include +#include +#include +#include +#include +#include +#include // memcpy +#include +#include + +#define IDDQ_MEASUREMENTS 6 +#define MAX_UT_PSTATES 64 // Oversized +#define FREQ_STEP_KHZ 16666 + +#define SYSTEM_VFRT_SIZE 128 + +#ifndef _BIG_ENDIAN +#error "_BIG_ENDIAN not defined" +#endif + +/* Comes from p9_resclk_defines.H */ +static const int resclk_freq_mhz[] = + {0, 1500, 2000, 3000, 3400, 3700, 3900, 4100}; + +static ResonantClockingSetup resclk = +{ + { }, // pstates - filled by code + { 3, 3, 21, 23, 24, 22, 20, 19}, // idx + { + {0x2000}, {0x3000}, {0x1000}, {0x0000}, + {0x0010}, {0x0030}, {0x0020}, {0x0060}, + {0x0070}, {0x0050}, {0x0040}, {0x00C0}, + {0x00D0}, {0x00F0}, {0x00E0}, {0x00A0}, + {0x00B0}, {0x0090}, {0x0080}, {0x8080}, + {0x9080}, {0xB080}, {0xA080}, {0xE080}, + {0xF080}, {0xF080}, {0xF080}, {0xF080}, + {0xF080}, {0xF080}, {0xF080}, {0xF080}, + {0xF080}, {0xF080}, {0xF080}, {0xF080}, + {0xF080}, {0xF080}, {0xF080}, {0xF080}, + {0xF080}, {0xF080}, {0xF080}, {0xF080}, + {0xF080}, {0xF080}, {0xF080}, {0xF080}, + {0xF080}, {0xF080}, {0xF080}, {0xF080}, + {0xF080}, {0xF080}, {0xF080}, {0xF080}, + {0xF080}, {0xF080}, {0xF080}, {0xF080}, + {0xF080}, {0xF080}, {0xF080}, {0xF080} + }, // Array containing the transition steps + 0, // Delay between steps (in nanoseconds) + { 0, 1, 3, 2}, // L3 clock stepping array + 580 // L3 voltage threshold +}; + +#define WOF_IMAGE_MAGIC_VALUE ((uint32_t)0x57544948) // "WTIH" +#define WOF_IMAGE_VERSION ((uint32_t)1) + +#define WOF_TABLES_MAGIC_VALUE ((uint32_t)0x57465448) // "WFTH" +#define WOF_TABLES_VERSION ((uint32_t)2) +#define WOF_TABLES_MAX_VERSION WOF_TABLES_VERSION + +/* + * WOF image: + * - header (struct wof_image_hdr) + * - section table + * - array of WOF tables + * + * WOF table: + * - wof_tables_hdr for header + * - data begins with vfrt_hdr + */ + +/* Top-level header for WOF */ +struct wof_image_hdr { + uint32_t magic_number; // WOF_IMAGE_MAGIC_VALUE + uint8_t version; // WOF_IMAGE_VERSION + uint8_t entry_count; // Number of entries in section table + uint32_t offset; // BE offset to section table from image start +} __attribute__((__packed__)); + +/* Entry of WOF's section table */ +struct wof_image_entry { + uint32_t offset; // BE offset to section from image start + uint32_t size; // BE size of the section +} __attribute__((__packed__)); + +/* Header of WOF's section */ +struct wof_tables_hdr { + uint32_t magic_number; // WOF_TABLES_MAGIC_VALUE + + uint16_t reserved; + uint8_t mode; // version 1 = 0; version 2 = 1 or 2; WOF_MODE_* + uint8_t version; + + uint16_t vfrt_block_size; + uint16_t vfrt_block_header_size; + uint16_t vfrt_data_size; + uint8_t quads_active_size; + uint8_t core_count; + uint16_t vdn_start; // CeffVdn value represented by index 0 (in 0.01%) + uint16_t vdn_step; // CeffVdn step value for each CeffVdn index (in 0.01%) + uint16_t vdn_size; // Number of CeffVdn indexes + uint16_t vdd_start; // CeffVdd value represented by index 0 (in 0.01%) + uint16_t vdd_step; // CeffVdd step value for each CeffVdd index (in 0.01%) + uint16_t vdd_size; // Number of CeffVdd indexes + uint16_t vratio_start; // Vratio value represented by index 0 (in 0.01%) + uint16_t vratio_step; // Vratio step value for each CeffVdd index (in 0.01%) + uint16_t vratio_size; // Number of Vratio indexes + uint16_t fratio_start; // Fratio value represented by index 0 (in 0.01%) + uint16_t fratio_step; // Fratio step value for each CeffVdd index (in 0.01%) + uint16_t fratio_size; // Number of Fratio indexes + + uint16_t vdn_percent[8]; // Currently unused + + uint16_t socket_power_w; + uint16_t nest_frequency_mhz; + uint16_t sort_power_freq_mhz; // Either the Nominal or Turbo #V frequency + uint16_t rdp_capacity; // Regulator Design Point Capacity (in Amps) + + char wof_table_source_tag[8]; + char package_name[16]; +} __attribute__((packed, aligned(128))); + +#define VFRT_HDR_MAGIC 0x5654 // "VT" +#define VFRT_HDR_VERSION 2 + +/* Header of data within a WOF table */ +struct vfrt_hdr { + uint16_t magic_number; // VFRT_HDR_MAGIC + uint16_t reserved; + // bits 4-7 are type: 0 -- "System", 1 -- "Homer" + // bits 0-3 are version: 1 -- 12 row(voltage) X 11 column(freq) + // 2 -- 24 row(Voltage) X 5 column (Freq) + uint8_t type_version; + uint8_t res_vdnId; // Vdn assumptions + uint8_t vddId_QAId; // Vdd assumptions + uint8_t rsvd_QAId; // bits 0-2: Quad Active assumptions +} __attribute__((packed)); + +/* Data is provided in 1/24ths granularity with adjustments for integer representation */ +#define VFRT_VRATIO_SIZE 24 +/* 5 steps down from 100% is Fratio_step sizes */ +#define VFRT_FRATIO_SIZE 5 + +/* Form of VFRT data as stored in HOMER */ +struct homer_vfrt_entry { + struct vfrt_hdr vfrt_hdr; + uint8_t pstate[VFRT_FRATIO_SIZE * VFRT_VRATIO_SIZE]; +} __attribute__((packed, aligned(256))); + +static void copy_poundW_v2_to_v3(PoundW_data_per_quad *v3, PoundW_data *v2) +{ + memset(v3, 0, sizeof(PoundW_data_per_quad)); + + /* Copy poundW */ + for (int i = 0; i < NUM_OP_POINTS; i++) { + v3->poundw[i].ivdd_tdp_ac_current_10ma = + v2->poundw[i].ivdd_tdp_ac_current_10ma; + v3->poundw[i].ivdd_tdp_dc_current_10ma = + v2->poundw[i].ivdd_tdp_dc_current_10ma; + v3->poundw[i].vdm_overvolt_small_thresholds = + v2->poundw[i].vdm_overvolt_small_thresholds; + v3->poundw[i].vdm_large_extreme_thresholds = + v2->poundw[i].vdm_large_extreme_thresholds; + v3->poundw[i].vdm_normal_freq_drop = + v2->poundw[i].vdm_normal_freq_drop; + v3->poundw[i].vdm_normal_freq_return = + v2->poundw[i].vdm_normal_freq_return; + v3->poundw[i].vdm_vid_compare_per_quad[0] = + v2->poundw[i].vdm_vid_compare_ivid; + v3->poundw[i].vdm_vid_compare_per_quad[1] = + v2->poundw[i].vdm_vid_compare_ivid; + v3->poundw[i].vdm_vid_compare_per_quad[2] = + v2->poundw[i].vdm_vid_compare_ivid; + v3->poundw[i].vdm_vid_compare_per_quad[3] = + v2->poundw[i].vdm_vid_compare_ivid; + v3->poundw[i].vdm_vid_compare_per_quad[4] = + v2->poundw[i].vdm_vid_compare_ivid; + v3->poundw[i].vdm_vid_compare_per_quad[5] = + v2->poundw[i].vdm_vid_compare_ivid; + } + + /* Copy resistance data */ + memcpy(&v3->resistance_data, &v2->resistance_data, + sizeof(v2->resistance_data)); + + v3->resistance_data.r_undervolt_allowed = v2->undervolt_tested; +} + +static void check_valid_poundV(struct voltage_bucket_data *bucket, int wof_enabled) +{ + int num_op_points = NUM_OP_POINTS; // skip powerbus + + struct voltage_data *data = &bucket->nominal; + assert(bucket != NULL); + + /* Skip UltraTurbo if WOF is disabled */ + if (!wof_enabled) + --num_op_points; + + for (int i = 0; i < num_op_points; i++) { + if (data[i].freq == 0 || data[i].vdd_voltage == 0 || + data[i].idd_current == 0 || data[i].vcs_voltage == 0 || + data[i].ics_current == 0) + die("Bad #V data\n"); + } + // TODO: check if values increase with operating points + // (skipping UltraTurbo if WOF is disabled) +} + +static void check_valid_poundW(PoundW_data_per_quad *poundW_bucket, + uint64_t functional_cores, + int wof_enabled) +{ + uint8_t prev_vid_compare_per_quad[MAXIMUM_QUADS] = {}; + /* + * TODO: If the #W version is less than 3, validate Turbo VDM large + * threshold not larger than -32mV to filter out parts that have bad VPD. + */ + + for (int op = 0; op < NUM_OP_POINTS; op++) { + if (wof_enabled) { + /* Check that TDP VDD currents are nonzero */ + if (poundW_bucket->poundw[op].ivdd_tdp_ac_current_10ma == 0 || + poundW_bucket->poundw[op].ivdd_tdp_dc_current_10ma == 0) + die("TDP VDD current equals zero\n"); + } + + /* Assuming VDM is enabled - validate threshold values */ + for (int quad = 0; quad < MAXIMUM_QUADS; quad++) { + if (!IS_EQ_FUNCTIONAL(quad, functional_cores)) + continue; + + if (poundW_bucket->poundw[op].vdm_vid_compare_per_quad[quad] == 0) + die("VID compare per quad is zero for quad %d\n", quad); + + if (poundW_bucket->poundw[op].vdm_vid_compare_per_quad[quad] < + prev_vid_compare_per_quad[quad]) + die("VID compare per quad is decreasing for quad %d\n", quad); + + prev_vid_compare_per_quad[quad] = + poundW_bucket->poundw[op].vdm_vid_compare_per_quad[quad]; + } + + /* For threshold to be valid... */ + if (/* overvolt threshold must be <= 7 or == 0xC */ + ((poundW_bucket->poundw[op].vdm_overvolt_small_thresholds & 0xF0) > 0x70 && + (poundW_bucket->poundw[op].vdm_overvolt_small_thresholds & 0xF0) != 0xC0) || + /* small threshold must be != 8 and != 9 */ + ((poundW_bucket->poundw[op].vdm_overvolt_small_thresholds & 0x0F) == 0x08 || + (poundW_bucket->poundw[op].vdm_overvolt_small_thresholds & 0x0F) == 0x09) || + /* large threshold must be != 8 and != 9 */ + ((poundW_bucket->poundw[op].vdm_large_extreme_thresholds & 0xF0) == 0x80 || + (poundW_bucket->poundw[op].vdm_large_extreme_thresholds & 0xF0) == 0x90) || + /* extreme threshold must be != 8 and != 9 */ + ((poundW_bucket->poundw[op].vdm_large_extreme_thresholds & 0x0F) == 0x08 || + (poundW_bucket->poundw[op].vdm_large_extreme_thresholds & 0x0F) == 0x09) || + /* N_L must be <= 7 */ + (poundW_bucket->poundw[op].vdm_normal_freq_drop & 0x0F) > 7 || + /* N_S must be <= N_L */ + (((poundW_bucket->poundw[op].vdm_normal_freq_drop & 0xF0) >> 4) > + (poundW_bucket->poundw[op].vdm_normal_freq_drop & 0x0F)) || + /* S_N must be <= N_S */ + ((poundW_bucket->poundw[op].vdm_normal_freq_return & 0x0F) > + ((poundW_bucket->poundw[op].vdm_normal_freq_drop & 0xF0) >> 4)) || + /* L_S must be <= N_L - S_N */ + (((poundW_bucket->poundw[op].vdm_normal_freq_return & 0xF0) >> 4) > + ((poundW_bucket->poundw[op].vdm_normal_freq_drop & 0x0F) - + (poundW_bucket->poundw[op].vdm_normal_freq_return & 0x0F)))) + die("Bad #W threshold values\n"); + } +} + +static void check_valid_iddq(IddqTable *iddq) +{ + if (iddq->iddq_version == 0 || + iddq->good_quads_per_sort == 0 || + iddq->good_normal_cores_per_sort == 0 || + iddq->good_caches_per_sort == 0) + die("Bad IDDQ data\n"); + + for (int i = 0; i < IDDQ_MEASUREMENTS; i++) { + if (iddq->ivdd_all_cores_off_caches_off[i] & 0x8000) + iddq->ivdd_all_cores_off_caches_off[i] = 0; + } +} + +static inline +uint32_t sysp_mv_offset(uint32_t i_100ma, SysPowerDistParms sysparams) +{ + // 100mA*uOhm/10 -> uV + return (i_100ma * (sysparams.loadline_uohm + sysparams.distloss_uohm) / 10 + + sysparams.distoffset_uv) / 1000; +} + +static const +uint8_t grey_map [] = +{ + /* 0mV 0x00*/ 0, + /* - 8mV 0x01*/ 1, + /* -24mV 0x02*/ 3, + /* -16mV 0x03*/ 2, + /* -56mV 0x04*/ 7, + /* -48mV 0x05*/ 6, + /* -32mV 0x06*/ 4, + /* -40mV 0x07*/ 5, + /* -96mV 0x08*/ 12, + /* -96mV 0x09*/ 12, + /* -96mV 0x0a*/ 12, + /* -96mV 0x0b*/ 12, + /* -64mV 0x0c*/ 8, + /* -72mV 0x0d*/ 9, + /* -88mV 0x0e*/ 11, + /* -80mV 0x0f*/ 10 +}; + +/* + * Hostboot has two versions of this function - one for unsigned values and one + * for signed. Usually we are passing smaller types, the only time uint32_t is + * passed is for 'vdd_mv'. As long as these voltages are below 2^31 mV (~2 MV) + * signed type doesn't matter. + */ +static int16_t calc_slope(int32_t y1, int32_t y0, int32_t x1, int32_t x0) +{ + int32_t half = (x1 - x0) / 2; + return (((y1 - y0) << 12) + half) / (x1 - x0); +} + +static void calculate_slopes(GlobalPstateParmBlock *gppb, + PoundW_data_per_quad *pW) +{ + VpdOperatingPoint *ops = gppb->operating_points_set[VPD_PT_SET_BIASED]; + + for (int op = 0; op < NUM_OP_POINTS; op++) { + /* + * Even though vid_point_set doesn't have space for per-quad data, + * Hostboot still writes to the same field for each functional quad. + */ + gppb->vid_point_set[op] = pW->poundw[op].vdm_vid_compare_per_quad[0]; + + gppb->threshold_set[op][VDM_OVERVOLT_INDEX] = + grey_map[(pW->poundw[op].vdm_overvolt_small_thresholds >> 4) & 0x0F]; + gppb->threshold_set[op][VDM_SMALL_INDEX] = + grey_map[pW->poundw[op].vdm_overvolt_small_thresholds & 0x0F]; + gppb->threshold_set[op][VDM_LARGE_INDEX] = + grey_map[(pW->poundw[op].vdm_large_extreme_thresholds >> 4) & 0x0F]; + gppb->threshold_set[op][VDM_XTREME_INDEX] = + grey_map[pW->poundw[op].vdm_large_extreme_thresholds & 0x0F]; + + gppb->jump_value_set[op][VDM_N_S_INDEX] = + (pW->poundw[op].vdm_normal_freq_drop >> 4) & 0x0F; + gppb->jump_value_set[op][VDM_N_L_INDEX] = + pW->poundw[op].vdm_normal_freq_drop & 0x0F; + gppb->jump_value_set[op][VDM_L_S_INDEX] = + (pW->poundw[op].vdm_normal_freq_return >> 4) & 0x0F; + gppb->jump_value_set[op][VDM_S_N_INDEX] = + pW->poundw[op].vdm_normal_freq_return & 0x0F; + } + + /* Slopes are saved in 4.12 fixed point format */ + for (int sl = 0; sl < VPD_NUM_SLOPES_REGION; sl++) { + gppb->PsVIDCompSlopes[sl] = calc_slope(gppb->vid_point_set[sl+1], + gppb->vid_point_set[sl], + ops[sl].pstate, + ops[sl+1].pstate); + + for (int i = 0; i < NUM_THRESHOLD_POINTS; i++) { + gppb->PsVDMThreshSlopes[sl][i] = calc_slope(gppb->threshold_set[sl+1][i], + gppb->threshold_set[sl][i], + ops[sl].pstate, + ops[sl+1].pstate); + } + + for (int i = 0; i < NUM_JUMP_VALUES; i++) { + gppb->PsVDMJumpSlopes[sl][i] = calc_slope(gppb->jump_value_set[sl+1][i], + gppb->jump_value_set[sl][i], + ops[sl].pstate, + ops[sl+1].pstate); + } + } + + #define OPS gppb->operating_points_set + for (int set = 0; set < NUM_VPD_PTS_SET; set++) { + for (int sl = 0; sl < VPD_NUM_SLOPES_REGION; sl++) { + gppb->PStateVSlopes[set][sl] = calc_slope(OPS[set][sl+1].vdd_mv, + OPS[set][sl].vdd_mv, + OPS[set][sl].pstate, + OPS[set][sl+1].pstate); + gppb->VPStateSlopes[set][sl] = calc_slope(OPS[set][sl].pstate, + OPS[set][sl+1].pstate, + OPS[set][sl+1].vdd_mv, + OPS[set][sl].vdd_mv); + } + } + #undef OPS +} + +static uint32_t calculate_sm_voltage(uint8_t sm_pstate, + GlobalPstateParmBlock *gppb) +{ + int op = NUM_OP_POINTS - 1; + VpdOperatingPoint *ops = gppb->operating_points_set[VPD_PT_SET_BIASED_SYSP]; + uint16_t *slopes = gppb->PStateVSlopes[VPD_PT_SET_BIASED_SYSP]; + + while (op >= 0 && (ops[op].pstate < sm_pstate)) + op--; + + assert(ops[op].pstate >= sm_pstate); + + /* sm_pstate is somewhere between op and op+1 */ + return ops[op].vdd_mv + ((ops[op].pstate - sm_pstate) * slopes[op] >> 12); +} + +/* resclk is always sorted */ +static void update_resclk(int ref_freq_khz) +{ + uint8_t prev_idx = resclk.resclk_index[0]; + for (int i = 0; i < RESCLK_FREQ_REGIONS; i++) { + /* If freq == 0 round pstate down - can't have negative frequency */ + if (resclk_freq_mhz[i] == 0) { + resclk.resclk_freq[i] = ref_freq_khz / FREQ_STEP_KHZ; + continue; + } + + /* If freq > ref_freq - cap and use previous index */ + if (resclk_freq_mhz[i] * 1000 > ref_freq_khz) { + resclk.resclk_freq[i] = 0; + resclk.resclk_index[i] = prev_idx; + continue; + } + + /* Otherwise always round pstate up */ + resclk.resclk_freq[i] = (ref_freq_khz - resclk_freq_mhz[i] * 1000 + + FREQ_STEP_KHZ - 1) / FREQ_STEP_KHZ; + + prev_idx = resclk.resclk_index[i]; + } +} + +static int32_t wof_find(struct wof_image_entry *entries, uint8_t entry_count, + uint32_t core_count, + const struct voltage_bucket_data *poundV_bucket) +{ + const struct region_device *wof_device = wof_device_ro(); + + const uint16_t socket_power_w = poundV_bucket->sort_power_turbo; + const uint16_t sort_power_freq_mhz = poundV_bucket->turbo.freq; + + int32_t i = 0; + + for (i = 0; i < entry_count; ++i) { + uint8_t tbl_hdr_buf[sizeof(struct wof_tables_hdr)]; + struct wof_tables_hdr *tbl_hdr = (void *)tbl_hdr_buf; + uint8_t mode = 0; + + if (rdev_readat(wof_device, tbl_hdr_buf, entries[i].offset, + sizeof(tbl_hdr_buf)) != sizeof(tbl_hdr_buf)) + die("Failed to read a WOF tables header!\n"); + + if (tbl_hdr->magic_number != WOF_TABLES_MAGIC_VALUE) + die("Incorrect magic value of WOF table header!\n"); + + if (tbl_hdr->version == 0 || tbl_hdr->version > WOF_TABLES_MAX_VERSION) + die("Unsupported version of WOF table header: %d!\n", + tbl_hdr->version); + + mode = (tbl_hdr->mode & 0x0F); + if (tbl_hdr->version >= WOF_TABLES_VERSION && + mode != WOF_MODE_UNKNOWN && + mode != WOF_MODE_TURBO) + continue; + + if (tbl_hdr->core_count == core_count && + tbl_hdr->socket_power_w == socket_power_w && + tbl_hdr->sort_power_freq_mhz == sort_power_freq_mhz) + /* Found a suitable WOF tables entry */ + return i; + } + + return -1; +} + +static void import_vfrt(const struct vfrt_hdr *src, struct homer_vfrt_entry *dst, + const OCCPstateParmBlock *oppb) +{ + const uint32_t ref_freq = oppb->frequency_max_khz; + const uint32_t freq_step = oppb->frequency_step_khz; + + uint16_t i = 0; + uint8_t *freq = NULL; + + if (src->magic_number != VFRT_HDR_MAGIC) + die("Invalid magic value of a VFRT header: %d!\n", src->magic_number); + + if ((src->type_version & 0x0F) != VFRT_HDR_VERSION) { + die("Expected VFRT header version %d, got %d!", + VFRT_HDR_VERSION, (src->type_version & 0x0F)); + } + + dst->vfrt_hdr = *src; + /* Flip type from "System" to "Homer" */ + dst->vfrt_hdr.type_version |= 0x10; + + freq = (uint8_t *)src + sizeof(*src); + for (i = 0; i < VFRT_FRATIO_SIZE * VFRT_VRATIO_SIZE; ++i) { + const uint32_t freq_khz = freq[i] * freq_step + 1000000; + + /* Round towards zero */ + dst->pstate[i] = (ref_freq - freq_khz) / freq_step; + } +} + +static void wof_extract(uint8_t *buf, struct wof_image_entry entry, + const OCCPstateParmBlock *oppb) +{ + const struct region_device *wof_device = wof_device_ro(); + + struct wof_tables_hdr *tbl_hdr = NULL; + + uint32_t i; + + uint8_t *table_data = NULL; + uint8_t *wof_vfrt_entry = NULL; + struct homer_vfrt_entry *homer_vfrt_entry = NULL; + + table_data = rdev_mmap(wof_device, entry.offset, entry.size); + if (!table_data) + die("Failed to map WOF section!\n"); + + tbl_hdr = (void *)table_data; + memcpy(buf, tbl_hdr, sizeof(*tbl_hdr)); + + wof_vfrt_entry = table_data + sizeof(*tbl_hdr); + homer_vfrt_entry = (struct homer_vfrt_entry *)(buf + sizeof(*tbl_hdr)); + + for (i = 0; i < tbl_hdr->vdn_size * tbl_hdr->vdd_size * MAX_QUADS_PER_CHIP; ++i) { + import_vfrt((const struct vfrt_hdr *)wof_vfrt_entry, homer_vfrt_entry, oppb); + + wof_vfrt_entry += SYSTEM_VFRT_SIZE; + ++homer_vfrt_entry; + } + + if (rdev_munmap(wof_device, table_data)) + die("Failed to unmap WOF section!\n"); +} + +/* Returns WOF state */ +static uint8_t wof_init(uint8_t *buf, uint32_t core_count, + const OCCPstateParmBlock *oppb, + const struct voltage_bucket_data *poundV_bucket) +{ + const struct region_device *wof_device = NULL; + + uint8_t hdr_buf[sizeof(struct wof_image_hdr)]; + struct wof_image_hdr *hdr = (void *)hdr_buf; + + struct wof_image_entry *entries = NULL; + int32_t entry_idx = 0; + + wof_device_init(); + wof_device = wof_device_ro(); + + if (rdev_readat(wof_device, hdr_buf, 0, sizeof(hdr_buf)) != sizeof(hdr_buf)) + die("Failed to read WOF header!\n"); + + if (hdr->magic_number != WOF_IMAGE_MAGIC_VALUE) + die("Incorrect magic value of WOF header!\n"); + + if (hdr->version != WOF_IMAGE_VERSION) { + die("Expected WOF header version %d, got %d!", + WOF_IMAGE_VERSION, hdr->version); + } + + entries = rdev_mmap(wof_device, hdr->offset, hdr->entry_count * sizeof(entries)); + if (!entries) + die("Failed to map section table of WOF!\n"); + + entry_idx = wof_find(entries, hdr->entry_count, core_count, poundV_bucket); + if (entry_idx == -1) + printk(BIOS_NOTICE, "Matching WOF tables section not found, disabling WOF\n"); + else + wof_extract(buf, entries[entry_idx], oppb); + + if (rdev_munmap(wof_device, entries)) + die("Failed to unmap section table of WOF!\n"); + + return (entry_idx == -1 ? 0 : 1); +} + +/* Assumption: no bias is applied to operating points */ +void build_parameter_blocks(uint8_t chip, struct homer_st *homer, uint64_t functional_cores) +{ + uint8_t buf[512]; + uint32_t size = sizeof(buf); + struct voltage_kwd *poundV = (struct voltage_kwd *)&buf; + struct voltage_bucket_data *bucket = NULL; + struct voltage_bucket_data poundV_bucket = {}; + PoundW_data_per_quad poundW_bucket = {}; + char record[] = "LRP0"; + struct cme_img_header *cme_hdr = (struct cme_img_header *) + &homer->cpmr.cme_sram_region[INT_VECTOR_SIZE]; + cme_hdr->pstate_offset = cme_hdr->core_spec_ring_offset + + cme_hdr->max_spec_ring_len; + cme_hdr->custom_length = + ALIGN_UP(cme_hdr->max_spec_ring_len * 32 + sizeof(LocalPstateParmBlock), 32) / + 32; + + /* + * OCC Pstate Parameter Block and Global Pstate Parameter Block are filled + * directly in their final place as we go. + * + * Local Pstate Parameter Block in Hostboot uses an array with entry for + * each quad (note there are two CMEs per quad, those are written with the + * same entry). Nevertheless, data written to LPPB for each quad (and CME) + * is identical - the only field that could have per-quad data is VID comp, + * but it is filled with data for quad 0. It looks as if the code was made + * with anticipation of #W v3, but that version is not yet used. + * + * Here, we use CME 0 on quad 0 as a template that is filled as we go. This + * structure is then copied to other functional CMEs. Note that the first + * CME doesn't have to be functional, but always writing to its region is + * much easier than finding out proper source for memcpy later. + */ + OCCPstateParmBlock *oppb = (OCCPstateParmBlock *)homer->ppmr.occ_parm_block; + GlobalPstateParmBlock *gppb = (GlobalPstateParmBlock *) + &homer->ppmr.pgpe_sram_img[homer->ppmr.header.hcode_len]; + LocalPstateParmBlock *lppb = (LocalPstateParmBlock *) + &homer->cpmr.cme_sram_region[cme_hdr->pstate_offset * 32]; + + /* Start with the assumption that WOF will work if it's supported by the chip */ + oppb->wof.wof_enabled = get_dd() > 0x20; + + /* OPPB - constant fields */ + + oppb->magic = OCC_PARMSBLOCK_MAGIC; // "OCCPPB00" + oppb->frequency_step_khz = FREQ_STEP_KHZ; + + /* Default values are from talos.xml */ + oppb->vdd_sysparm.loadline_uohm = 254; + oppb->vdd_sysparm.distloss_uohm = 0; + oppb->vdd_sysparm.distoffset_uv = 0; + + oppb->vcs_sysparm.loadline_uohm = 0; + oppb->vcs_sysparm.distloss_uohm = 64; + oppb->vcs_sysparm.distoffset_uv = 0; + + oppb->vdn_sysparm.loadline_uohm = 0; + oppb->vdn_sysparm.distloss_uohm = 50; + oppb->vdn_sysparm.distoffset_uv = 0; + + /* GPPB -constant fields */ + + gppb->magic = PSTATE_PARMSBLOCK_MAGIC; // "PSTATE00" + gppb->options.options = 0; + gppb->frequency_step_khz = FREQ_STEP_KHZ; + + /* + * VpdBias External and Internal Biases for Global and Local parameter + * blocks - assumed no bias, filled with 0. HOMER was already cleared so + * no need to repeat it. + */ + + /* Default values are from talos.xml */ + gppb->vdd_sysparm.loadline_uohm = 254; + gppb->vdd_sysparm.distloss_uohm = 0; + gppb->vdd_sysparm.distoffset_uv = 0; + + gppb->vcs_sysparm.loadline_uohm = 0; + gppb->vcs_sysparm.distloss_uohm = 64; + gppb->vcs_sysparm.distoffset_uv = 0; + + gppb->vdn_sysparm.loadline_uohm = 0; + gppb->vdn_sysparm.distloss_uohm = 50; + gppb->vdn_sysparm.distoffset_uv = 0; + + /* External VRM parameters - values are internal defaults */ + gppb->ext_vrm_transition_start_ns = 8000; + gppb->ext_vrm_transition_rate_inc_uv_per_us = 10000; + gppb->ext_vrm_transition_rate_dec_uv_per_us = 10000; + gppb->ext_vrm_stabilization_time_us = 5; + gppb->ext_vrm_step_size_mv = 50; + + /* WOV parameters - values are internal defaults */ + gppb->wov_sample_125us = 2; + gppb->wov_max_droop_pct = 125; + gppb->wov_underv_perf_loss_thresh_pct = 5; + gppb->wov_underv_step_incr_pct = 5; + gppb->wov_underv_step_decr_pct = 5; + gppb->wov_underv_max_pct = 100; + gppb->wov_overv_vmax_mv = 1150; + gppb->wov_overv_step_incr_pct = 5; + gppb->wov_overv_step_decr_pct = 5; + gppb->wov_overv_max_pct = 0; + + /* Avs Bus topology - values come from talos.xml */ + gppb->avs_bus_topology.vdd_avsbus_num = 0; + gppb->avs_bus_topology.vdd_avsbus_rail = 0; + gppb->avs_bus_topology.vdn_avsbus_num = 1; + gppb->avs_bus_topology.vdn_avsbus_rail = 0; + gppb->avs_bus_topology.vcs_avsbus_num = 0; + gppb->avs_bus_topology.vcs_avsbus_rail = 1; + + /* LPPB - constant fields */ + lppb->magic = LOCAL_PARMSBLOCK_MAGIC; // "CMEPPB00" + + /* Default values are from talos.xml */ + lppb->vdd_sysparm.loadline_uohm = 254; + lppb->vdd_sysparm.distloss_uohm = 0; + lppb->vdd_sysparm.distoffset_uv = 0; + + + /* Read and validate #V */ + + for (int quad = 0; quad < MAXIMUM_QUADS; quad++) { + if (!IS_EQ_FUNCTIONAL(quad, functional_cores)) + continue; + + record[3] = '0' + quad; + size = sizeof(buf); + if (!mvpd_extract_keyword(chip, record, "#V", buf, &size)) { + die("Failed to read %s record from MVPD", record); + } + + assert(poundV->version == VOLTAGE_DATA_VERSION); + assert(size >= sizeof(struct voltage_kwd)); + + /* + * Q: How does Hostboot decide which bucket to use? + * A: It checks if bucket's PB freq equals PB freq saved in attribute. + * Q: Where does PB freq attribute value come from? + * A: #V - it is first non-zero value. + * + * Given that in any case we would have to iterate over all buckets, + * there is no need to read PB freq again. + */ + for (int i = 0; i < VOLTAGE_BUCKET_COUNT; i++) { + if (poundV->buckets[i].powerbus.freq != 0) { + bucket = £V->buckets[i]; + break; + } + } + + check_valid_poundV(bucket, oppb->wof.wof_enabled); + + if (poundV_bucket.id == 0) { + memcpy(£V_bucket, bucket, sizeof(poundV_bucket)); + continue; + } + + /* Frequencies must match */ + if (bucket->nominal.freq != poundV_bucket.nominal.freq || + bucket->powersave.freq != poundV_bucket.powersave.freq || + bucket->turbo.freq != poundV_bucket.turbo.freq || + bucket->ultra_turbo.freq != poundV_bucket.ultra_turbo.freq || + bucket->powerbus.freq != poundV_bucket.powerbus.freq) + die("Frequency mismatch in #V MVPD between quads\n"); + + /* + * Voltages don't have to match, but we want to know the bucket ID for + * the highest voltage. Note: vdd_voltage in powerbus is actually VDN. + */ + if (bucket->nominal.vdd_voltage > poundV_bucket.nominal.vdd_voltage || + bucket->powersave.vdd_voltage > poundV_bucket.powersave.vdd_voltage || + bucket->turbo.vdd_voltage > poundV_bucket.turbo.vdd_voltage || + bucket->ultra_turbo.vdd_voltage > poundV_bucket.ultra_turbo.vdd_voltage || + bucket->powerbus.vdd_voltage > poundV_bucket.powerbus.vdd_voltage) + memcpy(£V_bucket, bucket, sizeof(poundV_bucket)); + } + + assert(poundV_bucket.id != 0); + struct voltage_data *vd = £V_bucket.nominal; + + + /* OPPB - #V data */ + + /* Save UltraTurbo frequency as reference */ + oppb->frequency_max_khz = vd[VPD_PV_ULTRA].freq * 1000; + oppb->nest_frequency_mhz = vd[VPD_PV_POWERBUS].freq; + + /* If WOF is supported, try initializing it. Disable WOF if initialization fails. */ + if (oppb->wof.wof_enabled) { + uint32_t core_count = __builtin_popcount((uint32_t)functional_cores) + + __builtin_popcount(functional_cores >> 32); + /* wof_init() only needs two fields of oppb, both of which are + * initialized by now. */ + oppb->wof.wof_enabled = wof_init(homer->ppmr.wof_tables, core_count, oppb, + £V_bucket); + } + + if (oppb->wof.wof_enabled) { + /* ATTR_TDP_RDP_CURRENT_FACTOR from talos.xml */ + oppb->wof.tdp_rdp_factor = 0; + /* ATTR_NEST_LEAKAGE_PERCENT from hb_temp_defaults.xml */ + oppb->nest_leakage_percent = 60; + + /* + * As the Vdn dimension is not supported in the WOF tables, hardcoding this + * value to the OCC as non-zero to keep it happy. + */ + oppb->ceff_tdp_vdn = 1; + } + + for (int op = 0; op < NUM_OP_POINTS; op++) { + /* Assuming no bias */ + oppb->operating_points[op].frequency_mhz = vd[op].freq; + oppb->operating_points[op].vdd_mv = vd[op].vdd_voltage; + oppb->operating_points[op].idd_100ma = vd[op].idd_current; + oppb->operating_points[op].vcs_mv = vd[op].vcs_voltage; + oppb->operating_points[op].ics_100ma = vd[op].ics_current; + /* Integer math rounds pstates down (i.e. towards higher frequency) */ + oppb->operating_points[op].pstate = + (oppb->frequency_max_khz - vd[op].freq * 1000) / oppb->frequency_step_khz; + } + + /* Sort operating points - swap power saving with nominal */ + { + VpdOperatingPoint nom; + nom = oppb->operating_points[VPD_PV_NOMINAL]; + oppb->operating_points[POWERSAVE] = + oppb->operating_points[VPD_PV_POWERSAVE]; + oppb->operating_points[NOMINAL] = nom; + } + + /* GPPB - #V data */ + + gppb->reference_frequency_khz = oppb->frequency_max_khz; + gppb->nest_frequency_mhz = oppb->nest_frequency_mhz; + /* This is Pstate value that would be assigned to frequency of 0 */ + gppb->dpll_pstate0_value = gppb->reference_frequency_khz / + gppb->frequency_step_khz; + + update_resclk(gppb->reference_frequency_khz); + memcpy(&gppb->resclk, &resclk, sizeof(ResonantClockingSetup)); + + /* + * Global PPB VDM iVRM are set based on attributes, but all of them are by + * default 0. HOMER was memset to 0, so no need to do anything more. + * + * For Local PPBs, VDM is explicitly set to 0 even when attributes have + * different values. iVRM are still set based on attributes. + */ + + memcpy(gppb->operating_points, oppb->operating_points, + sizeof(gppb->operating_points)); + { + + memcpy(gppb->operating_points_set[VPD_PT_SET_RAW], oppb->operating_points, + sizeof(gppb->operating_points)); + memcpy(gppb->operating_points_set[VPD_PT_SET_SYSP], oppb->operating_points, + sizeof(gppb->operating_points)); + /* Assuming no bias */ + memcpy(gppb->operating_points_set[VPD_PT_SET_BIASED], + oppb->operating_points, sizeof(gppb->operating_points)); + memcpy(gppb->operating_points_set[VPD_PT_SET_BIASED_SYSP], + oppb->operating_points, sizeof(gppb->operating_points)); + + for (int op = 0; op < NUM_OP_POINTS; op++) { + gppb->operating_points_set[VPD_PT_SET_SYSP][op].vdd_mv += + sysp_mv_offset(gppb->operating_points_set[VPD_PT_SET_SYSP][op].idd_100ma, + gppb->vdd_sysparm); + gppb->operating_points_set[VPD_PT_SET_SYSP][op].vcs_mv += + sysp_mv_offset(gppb->operating_points_set[VPD_PT_SET_SYSP][op].ics_100ma, + gppb->vcs_sysparm); + gppb->operating_points_set[VPD_PT_SET_BIASED_SYSP][op].vdd_mv += + sysp_mv_offset(gppb->operating_points_set[VPD_PT_SET_BIASED_SYSP][op].idd_100ma, + gppb->vdd_sysparm); + gppb->operating_points_set[VPD_PT_SET_BIASED_SYSP][op].vcs_mv += + sysp_mv_offset(gppb->operating_points_set[VPD_PT_SET_BIASED_SYSP][op].ics_100ma, + gppb->vdd_sysparm); + } + } + + /* LPPB - #V data */ + + /* LPPB has neither reference frequency nor step size, use GPPB values */ + lppb->dpll_pstate0_value = gppb->reference_frequency_khz / + gppb->frequency_step_khz; + memcpy(lppb->operating_points, oppb->operating_points, + sizeof(lppb->operating_points)); + memcpy(&lppb->resclk, &resclk, sizeof(ResonantClockingSetup)); + + /* + * #W is in CRP0, there is no CRP1..5 for other quads. Format of #W: + * - Version: 1 byte + * - #V Bucket ID #1: 1 byte + * - VDM Data for bucket 1: varies by version + * - #V Bucket ID #2: 1 byte + * - ... + * - #V Bucket ID #6: 1 byte + * - VDM Data for bucket 6: varies by version + * + * Size of each VDM data (excluding bucket ID) by version: + * - 0x1 - 0x28 bytes + * - 0x2-0xF - 0x3C bytes + * - 0x30 - 0x87 bytes + * + * Following code supports the second and third version only. + * + * HOSTBUG: Hostboot reads #W for each (functional) quad, does all the + * parsing and then writes it to one output buffer, overwriting data written + * previously. As there is only one #W, this doesn't make any sense. It also + * first parses/writes, then tests if bucket ID even match. + */ + size = sizeof(buf); + if (!mvpd_extract_keyword(chip, "CRP0", "#W", buf, &size)) { + die("Failed to read %s record from MVPD", "CRP0"); + } + + if ((buf[0] < 0x2 || buf[0] > 0xF) && buf[0] != 0x30) + die("Unsupported version (%#x) of #W MVPD\n", buf[0]); + + if (buf[0] == 0x30) { + /* Version 3, just find proper bucket and copy data */ + assert(size >= 1 + VOLTAGE_BUCKET_COUNT * + (1 + sizeof(PoundW_data_per_quad))); + for (int i = 0; i < VOLTAGE_BUCKET_COUNT; i++) { + /* Version + i * (bucket ID + bucket data) */ + int offset = 1 + i * (1 + sizeof(PoundW_data_per_quad)); + if (buf[offset] == poundV_bucket.id) { + memcpy(£W_bucket, &buf[offset + 1], sizeof(poundW_bucket)); + break; + } + } + } + else { + /* Version 2, different data size (0x3C) and format */ + /* + * HOSTBUG: we should be able to use sizeof(PoundW_data), but we can't. + * #W is packed in MVPD, but not in the type's definition. + */ + assert(size >= 1 + VOLTAGE_BUCKET_COUNT * (1 + 0x3C)); + for (int i = 0; i < VOLTAGE_BUCKET_COUNT; i++) { + /* Version + i * (bucket ID + bucket data) */ + int offset = 1 + i * (1 + 0x3C); + if (buf[offset] == poundV_bucket.id) { + copy_poundW_v2_to_v3(£W_bucket, + (PoundW_data *)&buf[offset + 1]); + break; + } + } + } + + /* Sort operating points - swap power saving with nominal */ + { + poundw_entry_per_quad_t nom; + nom = poundW_bucket.poundw[VPD_PV_NOMINAL]; + poundW_bucket.poundw[POWERSAVE] = + poundW_bucket.poundw[VPD_PV_POWERSAVE]; + poundW_bucket.poundw[NOMINAL] = nom; + } + + check_valid_poundW(£W_bucket, functional_cores, oppb->wof.wof_enabled); + + /* OPPB - #W data */ + + if (oppb->wof.wof_enabled) { + oppb->lac_tdp_vdd_turbo_10ma = + poundW_bucket.poundw[TURBO].ivdd_tdp_ac_current_10ma; + oppb->lac_tdp_vdd_nominal_10ma = + poundW_bucket.poundw[NOMINAL].ivdd_tdp_ac_current_10ma; + } + + /* Calculate safe mode frequency/pstate/voltage */ + { + /* + * Assumption: N_L values are the same for PS and N operating points. + * Not sure if this is always true so assert just in case. + * + * This makes calculation of jump value much easier. + */ + assert((poundW_bucket.poundw[POWERSAVE].vdm_normal_freq_drop & 0x0F) == + (poundW_bucket.poundw[NOMINAL].vdm_normal_freq_drop & 0x0F)); + uint8_t jump_value = + poundW_bucket.poundw[POWERSAVE].vdm_normal_freq_drop & 0x0F; + + uint32_t sm_freq = (oppb->frequency_max_khz - + (oppb->operating_points[POWERSAVE].pstate * + oppb->frequency_step_khz)) + * 32 / (32 - jump_value); + + uint8_t sm_pstate = (oppb->frequency_max_khz - sm_freq) / + oppb->frequency_step_khz; + + assert(sm_pstate < oppb->operating_points[POWERSAVE].pstate); + + oppb->pstate_min = sm_pstate; + /* Reverse calculation to deal with rounding caused by integer math */ + oppb->frequency_min_khz = oppb->frequency_max_khz - + sm_pstate * oppb->frequency_step_khz; + + assert(oppb->frequency_min_khz < oppb->frequency_max_khz); + } + + /* GPPB - #W data */ + + calculate_slopes(gppb, £W_bucket); + gppb->safe_frequency_khz = oppb->frequency_min_khz; + gppb->safe_voltage_mv = calculate_sm_voltage(oppb->pstate_min, gppb); + gppb->wov_underv_vmin_mv = gppb->safe_voltage_mv; + + printk(BIOS_DEBUG, "Safe mode freq = %d kHZ, voltage = %d mv\n", + gppb->safe_frequency_khz, gppb->safe_voltage_mv); + + /* LPPB - #W data */ + /* + * This basically repeats calculate_slopes() for LPPB. Unfortunately, the + * structures aren't compatible. + */ + memcpy(lppb->vid_point_set, gppb->vid_point_set, + sizeof(lppb->vid_point_set)); + memcpy(lppb->threshold_set, gppb->threshold_set, + sizeof(lppb->threshold_set)); + memcpy(lppb->jump_value_set, gppb->jump_value_set, + sizeof(lppb->jump_value_set)); + memcpy(lppb->PsVIDCompSlopes, gppb->PsVIDCompSlopes, + sizeof(lppb->PsVIDCompSlopes)); + memcpy(lppb->PsVDMThreshSlopes, gppb->PsVDMThreshSlopes, + sizeof(lppb->PsVDMThreshSlopes)); + memcpy(lppb->PsVDMJumpSlopes, gppb->PsVDMJumpSlopes, + sizeof(lppb->PsVDMJumpSlopes)); + + if (oppb->wof.wof_enabled) { + /* + * IDDQ - can't read straight to IddqTable, see comment before spare bytes + * in struct definition. + */ + size = sizeof(buf); + if (!mvpd_extract_keyword(chip, "CRP0", "IQ", buf, &size)) + die("Failed to read %s record from MVPD", "CRP0"); + assert(size >= sizeof(IddqTable)); + memcpy(&oppb->iddq, buf, sizeof(IddqTable)); + + check_valid_iddq(&oppb->iddq); + } + + /* + * Pad was re-purposed, Hostboot developers created additional union. The + * new union is in the same header file, few lines above the structure, but + * the original field still uses 'uint32_t pad', instead of new type. This + * leads to the following monstrosity. + */ + ((GPPBOptionsPadUse *)&gppb->options.pad)->fields.good_cores_in_sort = + oppb->iddq.good_normal_cores_per_sort; + + /* Copy LPPB to functional CMEs */ + for (int cme = 1; cme < MAX_CMES_PER_CHIP; cme++) { + if (!IS_EX_FUNCTIONAL(cme, functional_cores)) + continue; + + memcpy(&homer->cpmr.cme_sram_region[cme * cme_hdr->custom_length * 32 + + cme_hdr->pstate_offset * 32], + lppb, sizeof(LocalPstateParmBlock)); + } + + /* Finally, update headers */ + homer->ppmr.header.gppb_offset = homer->ppmr.header.hcode_offset + + homer->ppmr.header.hcode_len; + homer->ppmr.header.gppb_len = ALIGN_UP(sizeof(GlobalPstateParmBlock), 8); + + homer->ppmr.header.oppb_offset = offsetof(struct ppmr_st, occ_parm_block); + homer->ppmr.header.oppb_len = ALIGN_UP(sizeof(OCCPstateParmBlock), 8); + + /* Assuming >= CPMR_2.0 */ + homer->ppmr.header.lppb_offset = 0; + homer->ppmr.header.lppb_len = 0; + + homer->ppmr.header.pstables_offset = offsetof(struct ppmr_st, pstate_table); + homer->ppmr.header.pstables_len = PSTATE_OUTPUT_TABLES_SIZE; // 16 KiB + + homer->ppmr.header.wof_table_offset = OCC_WOF_TABLES_OFFSET; + homer->ppmr.header.wof_table_len = OCC_WOF_TABLES_SIZE; + + homer->ppmr.header.sram_img_size = homer->ppmr.header.hcode_len + + homer->ppmr.header.gppb_len; +} diff --git a/src/soc/ibm/power9/rom_media.c b/src/soc/ibm/power9/rom_media.c new file mode 100644 index 00000000000..eb38e388473 --- /dev/null +++ b/src/soc/ibm/power9/rom_media.c @@ -0,0 +1,533 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "../../../../3rdparty/ffs/ffs/ffs.h" +#include "wof.h" + +#define LPC_FLASH_MIN (MMIO_GROUP0_CHIP0_LPC_BASE_ADDR + LPCHC_FW_SPACE) +#define LPC_FLASH_TOP (LPC_FLASH_MIN + FW_SPACE_SIZE) + +#define CBFS_PARTITION_NAME "HBI" +#define MEMD_PARTITION_NAME "MEMD" +#define MVPD_PARTITION_NAME "MVPD" +#define WOF_PARTITION_NAME "WOFDATA" + +/* ffs_entry is not complete in included ffs.h, it lacks user data layout. + * See https://github.com/open-power/skiboot/blob/master/libflash/ffs.h */ + +/* Data integrity flags */ +#define FFS_ENRY_INTEG_ECC 0x8000 + +/* Version Checking : 1 byte */ +#define FFS_VERS_SHA512 0x80 + +enum ecc_status { + CLEAN=0, //< No ECC Error was detected. + CORRECTED=1, //< ECC error detected and corrected. + UNCORRECTABLE=2 //< ECC error detected and uncorrectable. +}; +typedef enum ecc_status ecc_status_t; + +enum ecc_bitfields { + GD = 0xff, //< Good, ECC matches. + UE = 0xfe, //< Uncorrectable. + E0 = 71, //< Error in ECC bit 0 + E1 = 70, //< Error in ECC bit 1 + E2 = 69, //< Error in ECC bit 2 + E3 = 68, //< Error in ECC bit 3 + E4 = 67, //< Error in ECC bit 4 + E5 = 66, //< Error in ECC bit 5 + E6 = 65, //< Error in ECC bit 6 + E7 = 64 //< Error in ECC bit 7 +}; + +/* +static uint64_t ecc_matrix[] = { + //0000000000000000111010000100001000111100000011111001100111111111 + 0x0000e8423c0f99ff, + //0000000011101000010000100011110000001111100110011111111100000000 + 0x00e8423c0f99ff00, + //1110100001000010001111000000111110011001111111110000000000000000 + 0xe8423c0f99ff0000, + //0100001000111100000011111001100111111111000000000000000011101000 + 0x423c0f99ff0000e8, + //0011110000001111100110011111111100000000000000001110100001000010 + 0x3c0f99ff0000e842, + //0000111110011001111111110000000000000000111010000100001000111100 + 0x0f99ff0000e8423c, + //1001100111111111000000000000000011101000010000100011110000001111 + 0x99ff0000e8423c0f, + //1111111100000000000000001110100001000010001111000000111110011001 + 0xff0000e8423c0f99 +}; +*/ + +/* + * Compressed version of table above, saves 48 bytes. Rotating value in register + * results in exactly the same size as full table, due to cost of loading values + * into registers. + */ +static uint8_t ecc_matrix[] = { + 0x00, 0x00, 0xe8, 0x42, 0x3c, 0x0f, 0x99, 0xff, + 0x00, 0x00, 0xe8, 0x42, 0x3c, 0x0f, 0x99 +}; + +static uint8_t syndrome_matrix[] = { + GD, E7, E6, UE, E5, UE, UE, 47, E4, UE, UE, 37, UE, 35, 39, UE, + E3, UE, UE, 48, UE, 30, 29, UE, UE, 57, 27, UE, 31, UE, UE, UE, + E2, UE, UE, 17, UE, 18, 40, UE, UE, 58, 22, UE, 21, UE, UE, UE, + UE, 16, 49, UE, 19, UE, UE, UE, 23, UE, UE, UE, UE, 20, UE, UE, + E1, UE, UE, 51, UE, 46, 9, UE, UE, 34, 10, UE, 32, UE, UE, 36, + UE, 62, 50, UE, 14, UE, UE, UE, 13, UE, UE, UE, UE, UE, UE, UE, + UE, 61, 8, UE, 41, UE, UE, UE, 11, UE, UE, UE, UE, UE, UE, UE, + 15, UE, UE, UE, UE, UE, UE, UE, UE, UE, 12, UE, UE, UE, UE, UE, + E0, UE, UE, 55, UE, 45, 43, UE, UE, 56, 38, UE, 1, UE, UE, UE, + UE, 25, 26, UE, 2, UE, UE, UE, 24, UE, UE, UE, UE, UE, 28, UE, + UE, 59, 54, UE, 42, UE, UE, 44, 6, UE, UE, UE, UE, UE, UE, UE, + 5, UE, UE, UE, UE, UE, UE, UE, UE, UE, UE, UE, UE, UE, UE, UE, + UE, 63, 53, UE, 0, UE, UE, UE, 33, UE, UE, UE, UE, UE, UE, UE, + 3, UE, UE, 52, UE, UE, UE, UE, UE, UE, UE, UE, UE, UE, UE, UE, + 7, UE, UE, UE, UE, UE, UE, UE, UE, 60, UE, UE, UE, UE, UE, UE, + UE, UE, UE, UE, 4, UE, UE, UE, UE, UE, UE, UE, UE, UE, UE, UE, +}; + +static inline uint32_t rd32(void *addr) +{ + uint64_t ret; + + /* Cache-inhibited load word */ + asm volatile("lwzcix %0, 0, %1" + : "=r" (ret) + : "r" (addr) + : ); + + return ret; +} + +static uint64_t rd64_unaligned(void *addr, int first_read) +{ + static uint64_t tmp1; /* static is used to reduce number of PNOR reads */ + uint64_t tmp2; + uint64_t ret; + uint64_t addr_aligned = ALIGN_DOWN((uint64_t)addr, 8); + unsigned shift = 8 * ((uint64_t) addr - addr_aligned); + + if (shift == 0 /* Previous tmp2 ended with ECC byte */ + || first_read) { /* or it is the first invocation from remove_ecc */ + asm volatile("ldcix %0, 0, %1" + : "=r" (tmp1) + : "r" (addr_aligned) + : ); + } + + asm volatile("ldcix %0, 0, %1" + : "=r" (tmp2) + : "r" (addr_aligned+8) + : ); + + ret = (tmp1 << shift) | (tmp2 >> (64 - shift)); + tmp1 = tmp2; + + return ret; +} + +/* + * memcpy from cache-inhibited source + * + * Assume src is 8B-aligned and does not overlap with dest. Copies ALIGN(n,8) + * bytes, make sure dest is big enough. + */ +static inline void memcpy_ci_src(void *dest, const void *src, size_t n) +{ + int i; + uint64_t tmp; + for (i = 0; i < n; i+=8) { + asm volatile("ldcix %0, %1, %2" + : "=r" (tmp) + : "b"(src), "r" (i)); + asm volatile("stdx %0, %1, %2" + :: "r" (tmp), "b"(dest), "r" (i) + : "memory"); + } +} + +static uint8_t generate_ecc(uint64_t i_data) +{ + uint8_t result = 0; + + for (int i = 0; i < 8; i++) { + result |= __builtin_parityll((*(uint64_t *)&ecc_matrix[i]) & i_data) << i; + } + return result; +} + +static uint8_t verify_ecc(uint64_t i_data, uint8_t i_ecc) +{ + return syndrome_matrix[generate_ecc(i_data) ^ i_ecc ]; +} + +static uint8_t correct_ecc(uint64_t *io_data, uint8_t *io_ecc) +{ + uint8_t bad_bit = verify_ecc(*io_data, *io_ecc); + + if ((bad_bit != GD) && (bad_bit != UE)) { /* Good is done, UE is hopeless */ + /* Determine if the ECC or data part is bad, do bit flip. */ + if (bad_bit >= E7) { + *io_ecc ^= (1 << (bad_bit - E7)); + } else { + *io_data ^=(1ull << (63 - bad_bit)); + } + } + return bad_bit; +} + +static ecc_status_t remove_ecc(uint8_t* io_src, size_t i_srcSz, + uint8_t* o_dst, size_t i_dstSz) +{ + ecc_status_t rc = CLEAN; + int first_read = 1; + + for(size_t i = 0, o = 0; i < i_srcSz; + i += sizeof(uint64_t) + sizeof(uint8_t), o += sizeof(uint64_t)) { + /* + * Read data and ECC parts. Reads from cache-inhibited storage always + * have to be aligned! + */ + uint64_t data = rd64_unaligned(&io_src[i], first_read); + first_read = 0; + + uint8_t ecc = io_src[i + sizeof(uint64_t)]; + + /* Calculate failing bit and fix data */ + uint8_t bad_bit = correct_ecc(&data, &ecc); + + /* Perform correction and status update */ + if (bad_bit == UE) + { + rc = UNCORRECTABLE; + } + /* Unused, our source is not writable */ + /* + else if (bad_bit != GD) + { + if (rc != UNCORRECTABLE) + { + rc = CORRECTED; + } + + *(uint64_t*)(&io_src[i]) = data; + io_src[i + sizeof(uint64_t)] = ecc; + } + */ + + /* Copy fixed data to destination buffer */ + *(uint64_t*)(&o_dst[o]) = data; + } + return rc; +} + +static char *pnor_base; + +/* + * PNOR has to be accessed with Cache Inhibited forms of instructions, and they + * require that the address is aligned, so we can just memcpy the data. + */ +static ssize_t no_ecc_readat(const struct region_device *rd, void *b, + size_t offset, size_t size) +{ + uint8_t tmp[8]; + offset -= rd->region.offset; + size_t off_a = ALIGN_DOWN(offset, 8); + size_t size_left = size; + char *part_base = pnor_base + rd->region.offset; + + /* If offset is not 8B-aligned */ + if (offset & 0x7) { + int i; + memcpy_ci_src(tmp, &part_base[off_a], 8); + for (i = 8 - (offset & 7); i < 8; i++) { + *((uint8_t *)(b++)) = tmp[i]; + if (!--size_left) + return size; + } + off_a += 8; + } + + /* Align down size_left to 8B */ + memcpy_ci_src(b, &part_base[off_a], ALIGN_DOWN(size_left, 8)); + + /* Copy the rest of requested unaligned data, if any */ + if (size_left & 7) { + off_a += ALIGN_DOWN(size_left, 8); + b += ALIGN_DOWN(size_left, 8); + int i; + memcpy_ci_src(tmp, &part_base[off_a], 8); + for (i = 0; i < (size_left & 7); i++) { + *((uint8_t *)(b++)) = tmp[i]; + } + } + + return size; +} + +static ssize_t ecc_readat(const struct region_device *rd, void *b, + size_t offset, size_t size) +{ + uint8_t tmp[8]; + offset -= rd->region.offset; + size_t off_a = ALIGN_DOWN(offset, 8); + size_t size_left = size; + char *part_base = pnor_base + rd->region.offset; + + /* If offset is not 8B-aligned */ + if (offset & 0x7) { + int i; + remove_ecc((uint8_t *) &part_base[(off_a * 9)/8], 9, tmp, 8); + for (i = 8 - (offset & 7); i < 8; i++) { + *((uint8_t *)(b++)) = tmp[i]; + if (!--size_left) + return size; + } + off_a += 8; + } + + /* Align down size_left to 8B */ + remove_ecc((uint8_t *) &part_base[(off_a * 9)/8], + (ALIGN_DOWN(size_left, 8) * 9) / 8, + b, + ALIGN_DOWN(size_left, 8)); + + /* Copy the rest of requested unaligned data, if any */ + if (size_left & 7) { + off_a += ALIGN_DOWN(size_left, 8); + b += ALIGN_DOWN(size_left, 8); + int i; + remove_ecc((uint8_t *) &part_base[(off_a * 9)/8], 9, tmp, 8); + for (i = 0; i < (size_left & 7); i++) { + *((uint8_t *)(b++)) = tmp[i]; + } + } + + return size; +} + +struct region_device_ops no_rdev_ops = {}; + +struct region_device_ops no_ecc_rdev_ops = { + .mmap = mmap_helper_rdev_mmap, + .munmap = mmap_helper_rdev_munmap, + .readat = no_ecc_readat, +}; + +struct region_device_ops ecc_rdev_ops = { + .mmap = mmap_helper_rdev_mmap, + .munmap = mmap_helper_rdev_munmap, + .readat = ecc_readat, +}; + +void mount_part_from_pnor(const char *part_name, + struct mmap_helper_region_device *mdev) +{ + size_t base, size; + unsigned int i, block_size, entry_count = 0; + struct ffs_hdr *hdr_pnor = (struct ffs_hdr *)LPC_FLASH_TOP; + + /* This loop could be skipped if we may assume that PNOR is always 64M */ + while (hdr_pnor > (struct ffs_hdr *)LPC_FLASH_MIN) { + uint32_t csum = 0; + /* Size is aligned up to 8 because of how memcpy_ci_src works */ + uint8_t buffer[ALIGN(FFS_HDR_SIZE, 8)]; + struct ffs_hdr *hdr = (struct ffs_hdr *)buffer; + + /* Assume block_size = 4K */ + hdr_pnor = (struct ffs_hdr *)(((char *)hdr_pnor) - 0x1000); + + if (FFS_MAGIC != rd32(&hdr_pnor->magic)) + continue; + + if (FFS_VERSION_1 != rd32(&hdr_pnor->version)) + continue; + + /* Copy the header so we won't have to rd32() for further accesses */ + memcpy_ci_src(buffer, hdr_pnor, FFS_HDR_SIZE); + csum = hdr->magic ^ hdr->version ^ hdr->size ^ hdr->entry_size ^ + hdr->entry_count ^ hdr->block_size ^ hdr->block_count ^ + hdr->resvd[0] ^ hdr->resvd[1] ^ hdr->resvd[2] ^ hdr->resvd[3] ^ + hdr->checksum; + if (csum != 0) continue; + + pnor_base = (char *) LPC_FLASH_TOP - hdr->block_size * hdr->block_count; + entry_count = hdr->entry_count; + block_size = hdr->block_size; + + /* Every byte counts when building for SEEPROM */ +#if !CONFIG(BOOTBLOCK_IN_SEEPROM) + printk(BIOS_DEBUG, "FFS header at %p\n", hdr_pnor); + printk(BIOS_SPEW, " size %x\n", hdr->size); + printk(BIOS_SPEW, " entry_size %x\n", hdr->entry_size); + printk(BIOS_SPEW, " entry_count %x\n", hdr->entry_count); + printk(BIOS_SPEW, " block_size %x\n", hdr->block_size); + printk(BIOS_SPEW, " block_count %x\n", hdr->block_count); + printk(BIOS_DEBUG, "PNOR base at %p\n", pnor_base); +#endif + + break; + } + + if (hdr_pnor <= (struct ffs_hdr *)LPC_FLASH_MIN) + die("FFS header not found!\n"); + + for (i = 0; i < entry_count; i++) { + uint32_t *val, csum = 0; + int j; + /* Size is aligned up to 8 because of how memcpy_ci_src works */ + uint8_t buffer[ALIGN(FFS_ENTRY_SIZE, 8)]; + struct ffs_entry *e = (struct ffs_entry *)buffer; + + /* Copy the entry so we won't have to rd32() for further accesses */ + memcpy_ci_src(buffer, &hdr_pnor->entries[i], FFS_ENTRY_SIZE); + + /* Every byte counts when building for SEEPROM */ +#if !CONFIG(BOOTBLOCK_IN_SEEPROM) + printk(BIOS_SPEW, "%s: base %x, size %x (%x)\n\t type %x, flags %x\n", + e->name, e->base, e->size, e->actual, e->type, e->flags); +#endif + + if (strcmp(e->name, part_name) != 0) + continue; + + val = (uint32_t *) e; + for (j = 0; j < (FFS_ENTRY_SIZE / sizeof(uint32_t)); j++) + csum ^= val[j]; + + if (csum != 0) + continue; + + base = block_size * e->base; + /* This is size of the partition, it does not include header or ECC */ + size = e->actual; + + mdev->rdev.ops = &no_ecc_rdev_ops; + + if (e->user.data[0] & FFS_ENRY_INTEG_ECC) { + printk(BIOS_DEBUG, "%s partition has ECC\n", part_name); + mdev->rdev.ops = &ecc_rdev_ops; + size = size / 9 * 8; + } + + if ((e->user.data[1] >> 24) & FFS_VERS_SHA512) { + /* Skip PNOR partition header */ + base += 0x1000; + + /* Possibly skip ECC of the header */ + if (e->user.data[0] & FFS_ENRY_INTEG_ECC) + base += 0x200; + } + + mdev->rdev.region.offset = base; + mdev->rdev.region.size = size; + + break; + } +} + +static struct mmap_helper_region_device memd_mdev = MMAP_HELPER_DEV_INIT( + &no_ecc_rdev_ops, 0, CONFIG_ROM_SIZE, &cbfs_cache); + +void memd_device_init(void) +{ + static int init_done; + if (init_done) + return; + + mount_part_from_pnor(MEMD_PARTITION_NAME, &memd_mdev); + + init_done = 1; +} + +void memd_device_unmount(void) +{ + memd_mdev.rdev.ops = &no_rdev_ops; +} + +const struct region_device *memd_device_ro(void) +{ + return &memd_mdev.rdev; +} + +static struct mmap_helper_region_device mvpd_mdev = MMAP_HELPER_DEV_INIT( + &no_ecc_rdev_ops, 0, CONFIG_ROM_SIZE, &cbfs_cache); + +void mvpd_device_init(void) +{ + static int init_done; + if (init_done) + return; + + mount_part_from_pnor(MVPD_PARTITION_NAME, &mvpd_mdev); + + init_done = 1; +} + +void mvpd_device_unmount(void) +{ + mvpd_mdev.rdev.ops = &no_rdev_ops; +} + +const struct region_device *mvpd_device_ro(void) +{ + return &mvpd_mdev.rdev; +} + +static struct mmap_helper_region_device wof_mdev = MMAP_HELPER_DEV_INIT( + &no_ecc_rdev_ops, 0, CONFIG_ROM_SIZE, &cbfs_cache); + +void wof_device_init(void) +{ + static int init_done; + if (init_done) + return; + + mount_part_from_pnor(WOF_PARTITION_NAME, &wof_mdev); + + init_done = 1; +} + +void wof_device_unmount(void) +{ + wof_mdev.rdev.ops = &no_rdev_ops; +} + +const struct region_device *wof_device_ro(void) +{ + return &wof_mdev.rdev; +} + +static struct mmap_helper_region_device boot_mdev = MMAP_HELPER_DEV_INIT( + &no_ecc_rdev_ops, 0, CONFIG_ROM_SIZE, &cbfs_cache); + +void boot_device_init(void) +{ + static int init_done; + if (init_done) + return; + + mount_part_from_pnor(CBFS_PARTITION_NAME, &boot_mdev); + + init_done = 1; +} + +const struct region_device *boot_device_ro(void) +{ + return &boot_mdev.rdev; +} diff --git a/src/soc/ibm/power9/romstage.c b/src/soc/ibm/power9/romstage.c new file mode 100644 index 00000000000..56a48157191 --- /dev/null +++ b/src/soc/ibm/power9/romstage.c @@ -0,0 +1,497 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "fsi.h" +#include "pci.h" + +/* DIMM SPD addresses */ +#define DIMM0 0x50 +#define DIMM1 0x51 +#define DIMM2 0x52 +#define DIMM3 0x53 +#define DIMM4 0x54 +#define DIMM5 0x55 +#define DIMM6 0x56 +#define DIMM7 0x57 + +mcbist_data_t mem_data[MAX_CHIPS]; + +static void dump_mca_data(mca_data_t *mca) +{ + printk(BIOS_SPEW, "\tCL = %d\n", mca->cl); + printk(BIOS_SPEW, "\tCCD_L = %d\n", mca->nccd_l); + printk(BIOS_SPEW, "\tWTR_S = %d\n", mca->nwtr_s); + printk(BIOS_SPEW, "\tWTR_L = %d\n", mca->nwtr_l); + printk(BIOS_SPEW, "\tFAW = %d\n", mca->nfaw); + printk(BIOS_SPEW, "\tRCD = %d\n", mca->nrcd); + printk(BIOS_SPEW, "\tRP = %d\n", mca->nrp); + printk(BIOS_SPEW, "\tRAS = %d\n", mca->nras); + printk(BIOS_SPEW, "\tWR = %d\n", mca->nwr); + printk(BIOS_SPEW, "\tRRD_S = %d\n", mca->nrrd_s); + printk(BIOS_SPEW, "\tRRD_L = %d\n", mca->nrrd_l); + printk(BIOS_SPEW, "\tRFC = %d\n", mca->nrfc); + printk(BIOS_SPEW, "\tRFC_DLR = %d\n", mca->nrfc_dlr); + + int i; + for (i = 0; i < 2; i++) { + if (mca->dimm[i].present) { + printk(BIOS_SPEW, "\tDIMM%d: %dRx%d ", i, mca->dimm[i].mranks, + (mca->dimm[i].width + 1) * 4); + + if (mca->dimm[i].log_ranks != mca->dimm[i].mranks) + printk(BIOS_SPEW, "%dH 3DS ", mca->dimm[i].log_ranks / mca->dimm[i].mranks); + + printk(BIOS_SPEW, "%dGB\n", mca->dimm[i].size_gb); + } else { + printk(BIOS_SPEW, "\tDIMM%d: not installed\n", i); + } + } +} + +/* TODO: add checks for same ranks configuration for both DIMMs under one MCA */ +static inline bool is_proper_dimm(spd_raw_data spd, int slot) +{ + struct dimm_attr_ddr4_st attr; + if (spd == NULL) + return false; + + if (spd_decode_ddr4(&attr, spd) != SPD_STATUS_OK) { + printk(BIOS_ERR, "Malformed SPD for slot %d\n", slot); + return false; + } + + if (attr.dram_type != SPD_MEMORY_TYPE_DDR4_SDRAM || + attr.dimm_type != SPD_DDR4_DIMM_TYPE_RDIMM || + !attr.ecc_extension) { + printk(BIOS_ERR, "Bad DIMM type in slot %d\n", slot); + return false; + } + + return true; +} + +static void mark_nonfunctional(uint8_t chip, int mcs, int mca) +{ + mem_data[chip].mcs[mcs].mca[mca].functional = false; + + /* Propagate upwards */ + if (!mem_data[chip].mcs[mcs].mca[mca ^ 1].functional) { + mem_data[chip].mcs[mcs].functional = false; + if (!mem_data[chip].mcs[mcs ^ 1].functional) + printk(BIOS_INFO, "No functional MCS left on chip %d\n", chip); + } +} + +static uint64_t find_min_mtb_ftb(uint8_t chip, rdimm_data_t *dimm, int mtb_idx, int ftb_idx) +{ + uint64_t val0 = 0, val1 = 0; + + if (dimm[0].present) + val0 = mtb_ftb_to_nck(chip, dimm[0].spd[mtb_idx], (int8_t)dimm[0].spd[ftb_idx]); + if (dimm[1].present) + val1 = mtb_ftb_to_nck(chip, dimm[1].spd[mtb_idx], (int8_t)dimm[1].spd[ftb_idx]); + + return (val0 < val1) ? val1 : val0; +} + +static uint64_t find_min_multi_mtb(uint8_t chip, rdimm_data_t *dimm, int mtb_l, int mtb_h, + uint8_t mask, int shift) +{ + uint64_t val0 = 0, val1 = 0; + + if (dimm[0].present) + val0 = dimm[0].spd[mtb_l] | ((dimm[0].spd[mtb_h] & mask) << shift); + if (dimm[1].present) + val1 = dimm[1].spd[mtb_l] | ((dimm[1].spd[mtb_h] & mask) << shift); + + return (val0 < val1) ? mtb_ftb_to_nck(chip, val1, 0) : mtb_ftb_to_nck(chip, val0, 0); +} + +/* This is most of step 7 condensed into one function */ +static void prepare_cpu_dimm_data(uint8_t chip) +{ + int i, mcs, mca; + int tckmin = 0x06; // Platform limit + unsigned int spd_bus = I2C_BUSES_PER_CPU * chip + SPD_I2C_BUS; + + /* + * DIMMs 4-7 are under a different port. This is not the same as bus, but we + * need to pass that information to I2C function. As there is no easier way, + * use MSB of address and mask it out at the receiving side. This will print + * wrong addresses in dump_spd_info(), but that is small price to pay. + */ + struct spd_block blk = { + .addr_map = { DIMM0, DIMM1, DIMM2, DIMM3, + DIMM4 | 0x80, DIMM5 | 0x80, DIMM6 | 0x80, DIMM7 | 0x80 }, + }; + + get_spd_i2c(spd_bus, &blk); + dump_spd_info(&blk); + + /* + * We need to find the highest common (for all DIMMs and the platform) + * supported frequency, meaning we need to compare minimum clock cycle times + * and choose the highest value. For the range supported by the platform we + * can check MTB only. + * + * TODO: check if we can have different frequencies across MCSs. + */ + for (i = 0; i < CONFIG_DIMM_MAX; i++) { + if (is_proper_dimm(blk.spd_array[i], i)) { + mcs = i / DIMMS_PER_MCS; + mca = (i % DIMMS_PER_MCS) / MCA_PER_MCS; + int dimm_idx = i % 2; // (i % DIMMS_PER_MCS) % MCA_PER_MCS + + + /* Maximum for 2 DIMMs on one port (channel, MCA) is 2400 MT/s */ + if (tckmin < 0x07 && mem_data[chip].mcs[mcs].mca[mca].functional) + tckmin = 0x07; + + mem_data[chip].mcs[mcs].functional = true; + mem_data[chip].mcs[mcs].mca[mca].functional = true; + + rdimm_data_t *dimm = &mem_data[chip].mcs[mcs].mca[mca].dimm[dimm_idx]; + + dimm->present = true; + dimm->spd = blk.spd_array[i]; + /* RCD address is the same as SPD, with one additional bit set */ + dimm->rcd_i2c_addr = blk.addr_map[i] | 0x08; + /* + * SPD fields in spd.h are not compatible with DDR4 and those in + * spd_bin.h are just a few of all required. + * + * TODO: add fields that are lacking to either of those files or + * add a file specific to DDR4 SPD. + */ + dimm->width = blk.spd_array[i][12] & 7; + dimm->mranks = ((blk.spd_array[i][12] >> 3) & 0x7) + 1; + dimm->log_ranks = dimm->mranks * (((blk.spd_array[i][6] >> 4) & 0x7) + 1); + dimm->density = blk.spd_array[i][4] & 0xF; + dimm->size_gb = (1 << (dimm->density - 2)) * (2 - dimm->width) * + dimm->log_ranks; + + if ((blk.spd_array[i][5] & 0x38) == 0x30) + die("DIMMs with 18 row address bits are not supported\n"); + + if (blk.spd_array[i][18] > tckmin) + tckmin = blk.spd_array[i][18]; + } + } + + switch (tckmin) { + /* For CWL assume 1tCK write preamble */ + case 0x06: + mem_data[chip].speed = 2666; + mem_data[chip].cwl = 14; + break; + case 0x07: + mem_data[chip].speed = 2400; + mem_data[chip].cwl = 12; + break; + case 0x08: + mem_data[chip].speed = 2133; + mem_data[chip].cwl = 11; + break; + case 0x09: + mem_data[chip].speed = 1866; + mem_data[chip].cwl = 10; + break; + default: + die("Unsupported tCKmin: %d ps (+/- 125)\n", tckmin * 125); + } + + /* Now that we know our speed, we can calculate the rest of the data */ + mem_data[chip].nrefi = ns_to_nck(chip, 7800); + mem_data[chip].nrtp = ps_to_nck(chip, 7500); + printk(BIOS_SPEW, "Common memory parameters:\n" + "\tspeed =\t%d MT/s\n" + "\tREFI =\t%d clock cycles\n" + "\tCWL =\t%d clock cycles\n" + "\tRTP =\t%d clock cycles\n", + mem_data[chip].speed, mem_data[chip].nrefi, + mem_data[chip].cwl, mem_data[chip].nrtp); + + for (mcs = 0; mcs < MCS_PER_PROC; mcs++) { + if (!mem_data[chip].mcs[mcs].functional) continue; + for (mca = 0; mca < MCA_PER_MCS; mca++) { + if (!mem_data[chip].mcs[mcs].mca[mca].functional) continue; + + rdimm_data_t *dimm = mem_data[chip].mcs[mcs].mca[mca].dimm; + uint32_t val0, val1, common; + int min; /* Minimum compatible with both DIMMs is the bigger value */ + + /* CAS Latency */ + val0 = dimm[0].present ? le32_to_cpu(*(uint32_t *)&dimm[0].spd[20]) : -1; + val1 = dimm[1].present ? le32_to_cpu(*(uint32_t *)&dimm[1].spd[20]) : -1; + /* Assuming both DIMMs are in low CL range, true for all DDR4 speed bins */ + common = val0 & val1; + + /* tAAmin - minimum CAS latency time */ + min = find_min_mtb_ftb(chip, dimm, 24, 123); + while (min <= 36 && ((common >> (min - 7)) & 1) == 0) + min++; + + if (min > 36) { + /* Maybe just die() instead? */ + printk(BIOS_WARNING, "Cannot find CL supported by all DIMMs under MCS%d, MCA%d." + " Marking as nonfunctional.\n", mcs, mca); + mark_nonfunctional(chip, mcs, mca); + continue; + } + + mem_data[chip].mcs[mcs].mca[mca].cl = min; + + /* + * There are also minimal values in Table 170 of JEDEC Standard No. 79-4C which + * probably should also be honored. Some of them (e.g. RRD) depend on the page + * size, which depends on DRAM width. On tested DIMM they are just right - it is + * either minimal legal value or rounded up to whole clock cycle. Can we rely on + * vendors to put sane values in SPD or do we have to check them for validity? + */ + + /* Minimum CAS to CAS Delay Time, Same Bank Group */ + mem_data[chip].mcs[mcs].mca[mca].nccd_l = find_min_mtb_ftb(chip, dimm, 40, 117); + + /* Minimum Write to Read Time, Different Bank Group */ + mem_data[chip].mcs[mcs].mca[mca].nwtr_s = find_min_multi_mtb(chip, dimm, 44, 43, 0x0F, 8); + + /* Minimum Write to Read Time, Same Bank Group */ + mem_data[chip].mcs[mcs].mca[mca].nwtr_l = find_min_multi_mtb(chip, dimm, 45, 43, 0xF0, 4); + + /* Minimum Four Activate Window Delay Time */ + mem_data[chip].mcs[mcs].mca[mca].nfaw = find_min_multi_mtb(chip, dimm, 37, 36, 0x0F, 8); + + /* Minimum RAS to CAS Delay Time */ + mem_data[chip].mcs[mcs].mca[mca].nrcd = find_min_mtb_ftb(chip, dimm, 25, 122); + + /* Minimum Row Precharge Delay Time */ + mem_data[chip].mcs[mcs].mca[mca].nrp = find_min_mtb_ftb(chip, dimm, 26, 121); + + /* Minimum Active to Precharge Delay Time */ + mem_data[chip].mcs[mcs].mca[mca].nras = find_min_multi_mtb(chip, dimm, 28, 27, 0x0F, 8); + + /* Minimum Write Recovery Time */ + mem_data[chip].mcs[mcs].mca[mca].nwr = find_min_multi_mtb(chip, dimm, 42, 41, 0x0F, 8); + + /* Minimum Activate to Activate Delay Time, Different Bank Group */ + mem_data[chip].mcs[mcs].mca[mca].nrrd_s = find_min_mtb_ftb(chip, dimm, 38, 119); + + /* Minimum Activate to Activate Delay Time, Same Bank Group */ + mem_data[chip].mcs[mcs].mca[mca].nrrd_l = find_min_mtb_ftb(chip, dimm, 39, 118); + + /* Minimum Refresh Recovery Delay Time */ + /* Assuming no fine refresh mode. */ + mem_data[chip].mcs[mcs].mca[mca].nrfc = find_min_multi_mtb(chip, dimm, 30, 31, 0xFF, 8); + + /* Minimum Refresh Recovery Delay Time for Different Logical Rank (3DS only) */ + /* + * This one is set per MCA, but it depends on DRAM density, which can be + * mixed between DIMMs under the same channel. We need to choose the bigger + * minimum time, which corresponds to higher density. + * + * Assuming no fine refresh mode. + */ + val0 = dimm[0].present ? dimm[0].spd[4] & 0xF : 0; + val1 = dimm[1].present ? dimm[1].spd[4] & 0xF : 0; + min = (val0 < val1) ? val1 : val0; + + switch (min) { + case 0x4: + mem_data[chip].mcs[mcs].mca[mca].nrfc_dlr = ns_to_nck(chip, 90); + break; + case 0x5: + mem_data[chip].mcs[mcs].mca[mca].nrfc_dlr = ns_to_nck(chip, 120); + break; + case 0x6: + mem_data[chip].mcs[mcs].mca[mca].nrfc_dlr = ns_to_nck(chip, 185); + break; + default: + die("Unsupported DRAM density\n"); + } + + printk(BIOS_SPEW, "MCS%d, MCA%d times (in clock cycles):\n", mcs, mca); + dump_mca_data(&mem_data[chip].mcs[mcs].mca[mca]); + } + } +} + +/* This is most of step 7 condensed into one function */ +static void prepare_dimm_data(uint8_t chips) +{ + bool have_dimms = false; + + uint8_t chip; + + for (chip = 0; chip < MAX_CHIPS; chip++) { + int mcs; + + if (chips & (1 << chip)) + prepare_cpu_dimm_data(chip); + + for (mcs = 0; mcs < MCS_PER_PROC; mcs++) + have_dimms |= mem_data[chip].mcs[mcs].functional; + } + + /* + * There is one (?) MCBIST per CPU. Fail if there are no supported DIMMs + * connected, otherwise assume it is functional. There is no reason to redo + * this test in the rest of isteps. + */ + if (!have_dimms) + die("No DIMMs detected, aborting\n"); +} + +static void build_mvpds(uint8_t chips) +{ + uint8_t chip; + + printk(BIOS_NOTICE, "Building MVPDs...\n"); + + /* Calling mvpd_get_available_cores() triggers building and caching of MVPD */ + for (chip = 0; chip < MAX_CHIPS; ++chip) { + if (chips & (1 << chip)) + (void)mvpd_get_available_cores(chip); + } +} + +void main(void) +{ + uint8_t chips; + + struct pci_info pci_info[MAX_CHIPS] = { 0 }; + + init_timer(); + + timestamp_add_now(TS_ROMSTAGE_START); + + console_init(); + + if (ipmi_premem_init(CONFIG_BMC_BT_BASE, 0) != CB_SUCCESS) + die("Failed to initialize IPMI\n"); + + /* + * Two minutes to load. + * Not handling return code, because the function itself prints log messages + * and its failure is not a critical error. + */ + (void)ipmi_init_and_start_bmc_wdt(CONFIG_BMC_BT_BASE, 120, TIMEOUT_HARD_RESET); + + printk(BIOS_DEBUG, "Initializing FSI...\n"); + fsi_init(); + chips = fsi_get_present_chips(); + printk(BIOS_DEBUG, "Initialized FSI (chips mask: 0x%02X)\n", chips); + + build_mvpds(chips); + + istep_8_1(chips); + istep_8_2(chips); + istep_8_3(chips); + istep_8_4(chips); + istep_8_9(chips); + istep_8_10(chips); + istep_8_11(chips); + + istep_9_2(chips); + istep_9_4(chips); + istep_9_6(chips); + istep_9_7(chips); + + istep_10_1(chips); + istep_10_6(chips); + istep_10_10(chips, pci_info); + istep_10_12(chips); + istep_10_13(chips); + + timestamp_add_now(TS_INITRAM_START); + + vpd_pnor_main(); + prepare_dimm_data(chips); + + report_istep(13, 1); // no-op + istep_13_2(chips); + istep_13_3(chips); + istep_13_4(chips); + report_istep(13, 5); // no-op + istep_13_6(chips); + report_istep(13, 7); // no-op + istep_13_8(chips); + istep_13_9(chips); + istep_13_10(chips); + istep_13_11(chips); + report_istep(13, 12); // optional, not yet implemented + istep_13_13(chips); + + istep_14_1(chips); + istep_14_2(chips); + istep_14_3(chips, pci_info); + report_istep(14, 4); // no-op + istep_14_5(chips); + + timestamp_add_now(TS_INITRAM_END); + + /* Test if SCOM still works. Maybe should check also indirect access? */ + printk(BIOS_WARNING, "0xF000F = %llx\n", read_scom(0, 0xF000F)); + + /* + * Halt to give a chance to inspect FIRs, otherwise checkstops from + * ramstage may cover up the failure in romstage. + */ + if (read_scom(0, 0xF000F) == 0xFFFFFFFFFFFFFFFF) + die("SCOM stopped working, check FIRs, halting now\n"); + + cbmem_initialize_empty(); + run_ramstage(); +} + +/* Stores global mem_data variable into cbmem for future use by ramstage */ +static void store_mem_data(int is_recovery) +{ + const struct cbmem_entry *entry; + uint8_t *data; + int dimm_i; + + (void)is_recovery; /* unused */ + + /* Layout: mem_data itself then SPD data of each dimm which has it */ + entry = cbmem_entry_add(CBMEM_ID_MEMINFO, sizeof(mem_data) + + MAX_CHIPS * DIMMS_PER_PROC * CONFIG_DIMM_SPD_SIZE); + if (entry == NULL) + die("Failed to add mem_data entry to CBMEM in romstage!"); + + data = cbmem_entry_start(entry); + + memcpy(data, &mem_data, sizeof(mem_data)); + data += sizeof(mem_data); + + for (dimm_i = 0; dimm_i < MAX_CHIPS * DIMMS_PER_PROC; dimm_i++) { + int chip = dimm_i / DIMMS_PER_PROC; + int mcs = (dimm_i % DIMMS_PER_PROC) / DIMMS_PER_MCS; + int mca = (dimm_i % DIMMS_PER_MCS) / DIMMS_PER_MCA; + int dimm = dimm_i % DIMMS_PER_MCA; + + rdimm_data_t *dimm_data = &mem_data[chip].mcs[mcs].mca[mca].dimm[dimm]; + if (dimm_data->spd == NULL) + continue; + + memcpy(data, dimm_data->spd, CONFIG_DIMM_SPD_SIZE); + data += CONFIG_DIMM_SPD_SIZE; + } +} +CBMEM_CREATION_HOOK(store_mem_data); diff --git a/src/soc/ibm/power9/sbeio.c b/src/soc/ibm/power9/sbeio.c new file mode 100644 index 00000000000..c9400e9429f --- /dev/null +++ b/src/soc/ibm/power9/sbeio.c @@ -0,0 +1,236 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include +#include + +#include "fsi.h" + +enum fifo_regs { + SBE_FIFO_UPFIFO_DATA_IN = 0x00002400, + SBE_FIFO_UPFIFO_STATUS = 0x00002404, + SBE_FIFO_UPFIFO_SIG_EOT = 0x00002408, + SBE_FIFO_UPFIFO_REQ_RESET = 0x0000240C, + SBE_FIFO_DNFIFO_DATA_OUT = 0x00002440, + SBE_FIFO_DNFIFO_STATUS = 0x00002444, + SBE_FIFO_DNFIFO_RESET = 0x00002450, + SBE_FIFO_DNFIFO_ACK_EOT = 0x00002454, + SBE_FIFO_DNFIFO_MAX_TSFR = 0x00002458, +}; + +enum { + SBE_FIFO_CLASS_SCOM_ACCESS = 0xA2, + SBE_FIFO_CMD_GET_SCOM = 0x01, + SBE_FIFO_CMD_PUT_SCOM = 0x02, + + FSB_FIFO_SIG_EOT = 0x80000000, + MAX_FIFO_TIMEOUT_US = 2 * 1000 * 1000, // Hostboot waits up to 90s! + + FIFO_STATUS_MAGIC = 0xC0DE, +}; + +struct get_scom_request_t { + uint32_t word_count; // size in uint32_t (4) + uint16_t reserved; // 0 + uint8_t cmd_class; // SBE_FIFO_CLASS_SCOM_ACCESS + uint8_t cmd; // SBE_FIFO_CMD_GET_SCOM + uint64_t addr; +} __attribute__((packed)); + +struct put_scom_request_t { + uint32_t word_count; // size in uint32_t (6) + uint16_t reserved; // 0 + uint8_t cmd_class; // SBE_FIFO_CLASS_SCOM_ACCESS + uint8_t cmd; // SBE_FIFO_CMD_PUT_SCOM + uint64_t addr; + uint64_t data; +} __attribute__((packed)); + +/* This structure is part of every response */ +struct status_hdr_t { + uint16_t magic; // FIFO_STATUS_MAGIC + uint8_t cmd_class; + uint8_t cmd; + uint16_t primary_status; + uint16_t secondary_status; +} __attribute__((packed)); + +static void fifo_push(uint8_t chip, uint32_t addr, uint32_t data) +{ + enum { UPFIFO_STATUS_FIFO_FULL = 0x00200000 }; + + uint64_t elapsed_time_us = 0; + + while (true) { + uint32_t status = read_fsi(chip, SBE_FIFO_UPFIFO_STATUS); + if (!(status & UPFIFO_STATUS_FIFO_FULL)) + break; + + if (elapsed_time_us >= MAX_FIFO_TIMEOUT_US) + die("Timeout waiting for upstream SBE FIFO to be not full"); + + udelay(10); + elapsed_time_us += 10; + } + + write_fsi(chip, addr, data); +} + +static void write_request(uint8_t chip, const void *request, uint32_t word_count) +{ + const uint32_t *words = request; + + /* + * Ensure Downstream Max Transfer Counter is 0 since we have no need for + * it and non-0 can cause protocol issues. + */ + write_fsi(chip, SBE_FIFO_DNFIFO_MAX_TSFR, 0x0); + + for (uint32_t i = 0; i < word_count; i++) + fifo_push(chip, SBE_FIFO_UPFIFO_DATA_IN, words[i]); + + /* Notify SBE that last word has been sent */ + fifo_push(chip, SBE_FIFO_UPFIFO_SIG_EOT, FSB_FIFO_SIG_EOT); +} + +/* Returns true when there is no more data to be read */ +static bool fifo_pop(uint8_t chip, uint32_t *data) +{ + enum { + DNFIFO_STATUS_DEQUEUED_EOT_FLAG = 0x00800000, + DNFIFO_STATUS_FIFO_EMPTY = 0x00100000, + }; + + uint64_t elapsed_time_us = 0; + + while (true) { + uint32_t status = read_fsi(chip, SBE_FIFO_DNFIFO_STATUS); + + /* If we're done receiving response */ + if (status & DNFIFO_STATUS_DEQUEUED_EOT_FLAG) + return false; + + /* If there is more data */ + if (!(status & DNFIFO_STATUS_FIFO_EMPTY)) + break; + + if (elapsed_time_us >= MAX_FIFO_TIMEOUT_US) { + printk(BIOS_INFO, "Last downstream SBE status: 0x%08x\n", status); + die("Timeout waiting for downstream SBE FIFO to be not empty\n"); + } + + udelay(10); + elapsed_time_us += 10; + } + + *data = read_fsi(chip, SBE_FIFO_DNFIFO_DATA_OUT); + return true; +} + +static void read_response(uint8_t chip, void *response, uint32_t word_count) +{ + enum { + MSG_BUFFER_SIZE = 2048, + + STATUS_SIZE_WORDS = sizeof(struct status_hdr_t) / sizeof(uint32_t), + + SBE_PRI_OPERATION_SUCCESSFUL = 0x00, + SBE_SEC_OPERATION_SUCCESSFUL = 0x00, + }; + + /* Large enough to receive FFDC */ + static uint32_t buffer[MSG_BUFFER_SIZE]; + + uint32_t idx; + uint32_t offset_idx; + uint32_t status_idx; + struct status_hdr_t *status_hdr; + + uint32_t *words = response; + + /* + * Message Schema: + * |Return Data (optional)| Status Header | FFDC (optional) + * |Offset to Status Header (starting from EOT) | EOT | + */ + + for (idx = 0; idx < MSG_BUFFER_SIZE; ++idx) { + if (!fifo_pop(chip, &buffer[idx])) + break; + + if (idx < word_count) + words[idx] = buffer[idx]; + } + + if (idx == MSG_BUFFER_SIZE) + die("SBE IO response exceeded maximum allowed size\n"); + + /* Notify SBE that EOT has been received */ + write_fsi(chip, SBE_FIFO_DNFIFO_ACK_EOT, FSB_FIFO_SIG_EOT); + + /* + * Final index for a minimum complete message (No return data and no FFDC): + * Word Length of status header + Length of Offset (1) + Length of EOT (1) + */ + if (idx < STATUS_SIZE_WORDS + 2) { + printk(BIOS_INFO, "Response length in words: 0x%08x\n", idx); + die("SBE IO response is too short\n"); + } + + /* + * |offset to header| EOT marker | current insert pos | <- idx + * The offset is how far to move back from from the EOT position to + * to get the index of the Status Header. + */ + offset_idx = idx - 2; + + /* Validate the offset to the status header */ + if (buffer[offset_idx] - 1 > offset_idx) + die("SBE response offset is too large\n"); + else if (buffer[offset_idx] < STATUS_SIZE_WORDS + 1) + die("SBE response offset is too small\n"); + + status_idx = offset_idx - (buffer[offset_idx] - 1); + status_hdr = (struct status_hdr_t *)&buffer[status_idx]; + + /* Check status for success */ + if (status_hdr->magic != FIFO_STATUS_MAGIC || + status_hdr->primary_status != SBE_PRI_OPERATION_SUCCESSFUL || + status_hdr->secondary_status != SBE_SEC_OPERATION_SUCCESSFUL) + die("Invalid status in SBE IO response\n"); +} + +/* Private API used only by SCOM dispatcher, no need to expose it */ +void write_sbe_scom(uint8_t chip, uint64_t addr, uint64_t data); +uint64_t read_sbe_scom(uint8_t chip, uint64_t addr); + +void write_sbe_scom(uint8_t chip, uint64_t addr, uint64_t data) +{ + struct put_scom_request_t request = { + .word_count = sizeof(request) / sizeof(uint32_t), + .cmd_class = SBE_FIFO_CLASS_SCOM_ACCESS, + .cmd = SBE_FIFO_CMD_PUT_SCOM, + .addr = addr, + .data = data, + }; + + write_request(chip, &request, request.word_count); + read_response(chip, NULL, 0); +} + +uint64_t read_sbe_scom(uint8_t chip, uint64_t addr) +{ + uint64_t data; + struct get_scom_request_t request = { + .word_count = sizeof(request) / sizeof(uint32_t), + .cmd_class = SBE_FIFO_CLASS_SCOM_ACCESS, + .cmd = SBE_FIFO_CMD_GET_SCOM, + .addr = addr, + }; + + write_request(chip, &request, request.word_count); + read_response(chip, &data, sizeof(data) / sizeof(uint32_t)); + + return data; +} diff --git a/src/soc/ibm/power9/scratch.h b/src/soc/ibm/power9/scratch.h new file mode 100644 index 00000000000..5b2cc29bad3 --- /dev/null +++ b/src/soc/ibm/power9/scratch.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __SOC_IBM_POWER9_SCRATCH_H +#define __SOC_IBM_POWER9_SCRATCH_H + +/* + * This file is for common definitions related to + * TP.TPVSB.FSI.W.FSI_MAILBOX.FSXCOMP.FSXLOG.SCRATCH_REGISTER_1 + * and the consecutive 7 scratch registers. + */ + +/* SCOM address of the first scratch register */ +#define MBOX_SCRATCH_REG1 0x00050038 + +/* CFAM address of the first scratch register (word addressing) */ +#define MBOX_SCRATCH_REG1_FSI 0x00002838 + +#define MBOX_SCRATCH_REG6_GROUP_PUMP_MODE 23 + +#endif /* __SOC_IBM_POWER9_SCRATCH_H */ diff --git a/src/soc/ibm/power9/timer.c b/src/soc/ibm/power9/timer.c new file mode 100644 index 00000000000..162668c028f --- /dev/null +++ b/src/soc/ibm/power9/timer.c @@ -0,0 +1,78 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include +#include + +/* Time base frequency is 512 MHz so 512 ticks per usec */ +#define TB_TICKS_PER_USEC 512 + +#if CONFIG(COLLECT_TIMESTAMPS) +uint64_t timestamp_get(void) +{ + return read_spr(SPR_TB); +} +#endif + +int timestamp_tick_freq_mhz(void) +{ + return TB_TICKS_PER_USEC; +} + +void init_timer(void) +{ + /* + * Set both decrementers to the highest possible value. POWER9 implements + * 56 bits, they decrement with 512MHz frequency. Decrementer exception + * condition exists when the MSB implemented bit gets (HDEC) or is (DEC) + * set, meaning that maximal possible timeout for DEC is one bit less than + * that (this gives roughly 7 * 10^7 s = ~2.2 years for DEC and twice that + * for HDEC). By default DEC uses only 32 bits, this can be changed by + * setting bit 46 (LD) of LPCR (Logical Partitioning Control Register). + * Without it the counter overflows and generates an interrupt after ~4.2 s. + */ + + write_spr(SPR_LPCR, read_spr(SPR_LPCR) | SPR_LPCR_LD); + write_spr(SPR_DEC, SPR_DEC_LONGEST_TIME); + write_spr(SPR_HDEC, SPR_DEC_LONGEST_TIME); +} + +/* TODO: with HDEC we can get ~2ns resolution, may be useful for RAM init. */ +void udelay(unsigned int usec) +{ + uint64_t start = read_spr(SPR_TB); + uint64_t end = start + usec * TB_TICKS_PER_USEC; + + /* + * "When the contents of the DEC0 change from 0 to 1, a Decrementer + * exception will come into existence within a reasonable period of time", + * but this may not be precise enough. Set an interrupt for 1us less than + * requested and busy-loop the rest. + * + * In tests on Talos 2 this gives between 0 and 1/32 us more than requested, + * while interrupt only solution gave between 6/32 and 11/32 us more. + */ + if (usec > 1) { + write_spr(SPR_DEC, (usec - 1) * TB_TICKS_PER_USEC); + asm volatile("or 31,31,31"); // Lower priority + + do { + asm volatile("wait"); + } while(read_spr(SPR_DEC) < SPR_DEC_LONGEST_TIME); + + /* + * "When the contents of DEC0 change from 1 to 0, the existing + * Decrementer exception, if any, will cease to exist within a + * reasonable period of time, but not later than the completion of + * the next context synchronizing instruction or event" - last part + * of sentence doesn't matter, in worst case 'wait' in next udelay() + * will be executed more than once but this is still cheaper than + * synchronizing context explicitly. + */ + write_spr(SPR_DEC, SPR_DEC_LONGEST_TIME); + asm volatile("or 2,2,2"); // Back to normal priority + } + + while (end > read_spr(SPR_TB)); +} diff --git a/src/soc/ibm/power9/tor.c b/src/soc/ibm/power9/tor.c new file mode 100644 index 00000000000..876b11b3267 --- /dev/null +++ b/src/soc/ibm/power9/tor.c @@ -0,0 +1,850 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include "tor.h" + +#include +#include +#include + +#include +#include +#include +#include + +#define UNDEFINED_PPE_TYPE ((uint8_t)0xFF) + +#define NUM_OF_CORES ((uint8_t)24) +#define NUM_OF_QUADS ((uint8_t)6) +#define CORES_PER_QUAD (NUM_OF_CORES / NUM_OF_QUADS) + +#define TOR_VERSION 7 + +#define TOR_MAGIC ((uint32_t)0x544F52) // "TOR" +#define TOR_MAGIC_HW ((uint32_t)0x544F5248) // "TORH" +#define TOR_MAGIC_SGPE ((uint32_t)0x544F5247) // "TORG" +#define TOR_MAGIC_CME ((uint32_t)0x544F524D) // "TORM" +#define TOR_MAGIC_OVLY ((uint32_t)0x544F524C) // "TORL" + +#define NUM_CHIP_TYPES 4 + +/* + * Structure of a TOR section: + * - Header (tor_hdr) + * - Payload + * - either array of PPE blocks that point to ring sections (HW TOR) + * - or ring section + * + * PPE block: + * - uint32_t -- offset (relative to payload) in BE to a ring section + * - uint32_t -- size in BE + * + * Ring section: + * - Array of chiplet blocks (we assume size of one for non-overlay rings) + * - Chiplet block + * - Array of TOR slots (value of 0 means "no such ring") + * - Array of rings pointed to by TOR slots + * + * Chiplet block: + * - uint32_t -- offset (relative to payload) to slots for common rings in BE + * - uint32_t -- offset (relative to payload) to slots for instance rings in BE + * + * TOR slot (`(max_instance_id - min_instance_id + 1)*ring_count` of them): + * - uint16_t -- offset (relative to chiplet block) to a ring in BE + */ + +enum chiplet_type { + PERV_TYPE, + N0_TYPE, + N1_TYPE, + N2_TYPE, + N3_TYPE, + XB_TYPE, + MC_TYPE, + OB0_TYPE, + OB1_TYPE, + OB2_TYPE, + OB3_TYPE, + PCI0_TYPE, + PCI1_TYPE, + PCI2_TYPE, + EQ_TYPE, + EC_TYPE, + SBE_NOOF_CHIPLETS +}; + +/* Description of a PPE block */ +struct tor_ppe_block { + uint32_t offset; + uint32_t size; +} __attribute__((packed)); + +/* Offsets to different kinds of rings within a section */ +struct tor_chiplet_block { + uint32_t common_offset; + uint32_t instance_offset; +} __attribute__((packed)); + +/* Static information about a ring to be searched for by the name */ +struct ring_info { + enum ring_id ring_id; + uint8_t min_instance_id; // Lower bound of instance id range + uint8_t max_instance_id; // Upper bound of instance id range +}; + +/* Static information about a chiplet */ +struct chiplet_info { + uint8_t common_rings_count; // [0..common_rings_count) + uint8_t instance_rings_count; // [0..instance_rings_count) +}; + +const struct ring_info EQ_COMMON_RING_INFO[] = { + {EQ_FURE , 0x10, 0x10}, + {EQ_GPTR , 0x10, 0x10}, + {EQ_TIME , 0x10, 0x10}, + {EQ_INEX , 0x10, 0x10}, + {EX_L3_FURE , 0x10, 0x10}, + {EX_L3_GPTR , 0x10, 0x10}, + {EX_L3_TIME , 0x10, 0x10}, + {EX_L2_MODE , 0x10, 0x10}, + {EX_L2_FURE , 0x10, 0x10}, + {EX_L2_GPTR , 0x10, 0x10}, + {EX_L2_TIME , 0x10, 0x10}, + {EX_L3_REFR_FURE , 0x10, 0x10}, + {EX_L3_REFR_GPTR , 0x10, 0x10}, + {EQ_ANA_FUNC , 0x10, 0x10}, + {EQ_ANA_GPTR , 0x10, 0x10}, + {EQ_DPLL_FUNC , 0x10, 0x10}, + {EQ_DPLL_GPTR , 0x10, 0x10}, + {EQ_DPLL_MODE , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_0 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_1 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_2 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_3 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_4 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_5 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_6 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_7 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_8 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_9 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_10 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_11 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_12 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_13 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_14 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_15 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_16 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_17 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_18 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_19 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_20 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_21 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_22 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_23 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_24 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_25 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_L3DCC , 0x10, 0x10}, + {EQ_ANA_MODE , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_26 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_27 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_28 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_29 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_30 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_31 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_32 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_33 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_34 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_35 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_36 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_37 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_38 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_39 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_40 , 0x10, 0x10}, + {EQ_ANA_BNDY_BUCKET_41 , 0x10, 0x10}, + {EQ_INEX_BUCKET_1 , 0x10, 0x10}, + {EQ_INEX_BUCKET_2 , 0x10, 0x10}, + {EQ_INEX_BUCKET_3 , 0x10, 0x10}, + {EQ_INEX_BUCKET_4 , 0x10, 0x10}, +}; + +const struct ring_info EQ_INSTANCE_RING_INFO[] = { + {EQ_REPR , 0x10, 0x1B}, + {EX_L3_REPR , 0x10, 0x1B}, + {EX_L2_REPR , 0x10, 0x1B}, + {EX_L3_REFR_REPR , 0x10, 0x1B}, + {EX_L3_REFR_TIME , 0x10, 0x1B}, +}; + +static const struct chiplet_info EQ_CHIPLET_INFO = { + 66, // 66 common rings for Quad chiplet. + 5, // 5 instance specific rings for each EQ chiplet +}; + +const struct ring_info EC_COMMON_RING_INFO[] = { + {EC_FUNC , 0x20, 0x20}, + {EC_GPTR , 0x20, 0x20}, + {EC_TIME , 0x20, 0x20}, + {EC_MODE , 0x20, 0x20}, + {EC_ABST , 0x20, 0x20}, + {EC_CMSK , 0xFF, 0xFF}, +}; + +const struct ring_info EC_INSTANCE_RING_INFO[] = { + {EC_REPR , 0x20, 0x37}, +}; + +static const struct chiplet_info EC_CHIPLET_INFO = { + 6, // 6 common rings for Core chiplet + 1, // 1 instance specific ring for each Core chiplet +}; + +static const struct ring_query RING_QUERIES_PDG[] = { + /* ring_id ring_class kwd_name instance_id */ + /* min max */ + { PERV_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x01 , 0x01 }, + { PERV_TIME , RING_CLASS_NEST , "#G" , 0x01 , 0x01 }, + { OCC_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x01 , 0x01 }, + { OCC_TIME , RING_CLASS_NEST , "#G" , 0x01 , 0x01 }, + { SBE_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x01 , 0x01 }, + { PERV_ANA_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x01 , 0x01 }, + { PERV_PLL_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x01 , 0x01 }, + { N0_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x02 , 0x02 }, + { N0_TIME , RING_CLASS_NEST , "#G" , 0x02 , 0x02 }, + { N0_NX_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x02 , 0x02 }, + { N0_NX_TIME , RING_CLASS_NEST , "#G" , 0x02 , 0x02 }, + { N0_CXA0_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x02 , 0x02 }, + { N0_CXA0_TIME , RING_CLASS_NEST , "#G" , 0x02 , 0x02 }, + { N1_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x03 , 0x03 }, + { N1_TIME , RING_CLASS_NEST , "#G" , 0x03 , 0x03 }, + { N1_IOO0_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x03 , 0x03 }, + { N1_IOO0_TIME , RING_CLASS_NEST , "#G" , 0x03 , 0x03 }, + { N1_IOO1_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x03 , 0x03 }, + { N1_IOO1_TIME , RING_CLASS_NEST , "#G" , 0x03 , 0x03 }, + { N1_MCS23_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x03 , 0x03 }, + { N1_MCS23_TIME , RING_CLASS_NEST , "#G" , 0x03 , 0x03 }, + { N2_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x04 , 0x04 }, + { N2_TIME , RING_CLASS_NEST , "#G" , 0x04 , 0x04 }, + { N2_CXA1_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x04 , 0x04 }, + { N2_CXA1_TIME , RING_CLASS_NEST , "#G" , 0x04 , 0x04 }, + { N2_PSI_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x04 , 0x04 }, + { N3_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x05 , 0x05 }, + { N3_TIME , RING_CLASS_NEST , "#G" , 0x05 , 0x05 }, + { N3_MCS01_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x05 , 0x05 }, + { N3_MCS01_TIME , RING_CLASS_NEST , "#G" , 0x05 , 0x05 }, + { N3_NP_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x05 , 0x05 }, + { N3_NP_TIME , RING_CLASS_NEST , "#G" , 0x05 , 0x05 }, + { XB_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x06 , 0x06 }, + { XB_TIME , RING_CLASS_NEST , "#G" , 0x06 , 0x06 }, + { XB_IO0_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x06 , 0x06 }, + { XB_IO0_TIME , RING_CLASS_NEST , "#G" , 0x06 , 0x06 }, + { XB_IO1_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x06 , 0x06 }, + { XB_IO1_TIME , RING_CLASS_NEST , "#G" , 0x06 , 0x06 }, + { XB_IO2_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x06 , 0x06 }, + { XB_IO2_TIME , RING_CLASS_NEST , "#G" , 0x06 , 0x06 }, + { XB_PLL_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x06 , 0x06 }, + { MC_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x07 , 0xFF }, + { MC_TIME , RING_CLASS_NEST , "#G" , 0x07 , 0xFF }, + { MC_IOM01_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x07 , 0xFF }, + { MC_IOM23_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x07 , 0xFF }, + { MC_PLL_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x07 , 0xFF }, + { MC_OMI0_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x07 , 0xFF }, + { MC_OMI1_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x07 , 0xFF }, + { MC_OMI2_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x07 , 0xFF }, + { MC_OMIPPE_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x07 , 0xFF }, + { MC_OMIPPE_TIME , RING_CLASS_NEST , "#G" , 0x07 , 0xFF }, + { OB0_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x09 , 0x09 }, + { OB0_TIME , RING_CLASS_NEST , "#G" , 0x09 , 0x09 }, + { OB0_PLL_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x09 , 0x09 }, + { OB1_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x0A , 0x0A }, + { OB1_TIME , RING_CLASS_NEST , "#G" , 0x0A , 0x0A }, + { OB1_PLL_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x0A , 0x0A }, + { OB2_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x0B , 0x0B }, + { OB2_TIME , RING_CLASS_NEST , "#G" , 0x0B , 0x0B }, + { OB2_PLL_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x0B , 0x0B }, + { OB3_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x0C , 0x0C }, + { OB3_TIME , RING_CLASS_NEST , "#G" , 0x0C , 0x0C }, + { OB3_PLL_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x0C , 0x0C }, + { PCI0_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x0D , 0x0D }, + { PCI0_TIME , RING_CLASS_NEST , "#G" , 0x0D , 0x0D }, + { PCI0_PLL_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x0D , 0x0D }, + { PCI1_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x0E , 0x0E }, + { PCI1_TIME , RING_CLASS_NEST , "#G" , 0x0E , 0x0E }, + { PCI1_PLL_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x0E , 0x0E }, + { PCI2_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x0F , 0x0F }, + { PCI2_TIME , RING_CLASS_NEST , "#G" , 0x0F , 0x0F }, + { PCI2_PLL_GPTR , RING_CLASS_GPTR_NEST , "#G" , 0x0F , 0x0F }, + { EQ_GPTR , RING_CLASS_GPTR_EQ , "#G" , 0x10 , 0xFF }, + { EQ_TIME , RING_CLASS_EQ , "#G" , 0x10 , 0xFF }, + { EX_L3_GPTR , RING_CLASS_GPTR_EX , "#G" , 0x10 , 0xFF }, + { EX_L3_TIME , RING_CLASS_EX , "#G" , 0x10 , 0xFF }, + { EX_L2_GPTR , RING_CLASS_GPTR_EX , "#G" , 0x10 , 0xFF }, + { EX_L2_TIME , RING_CLASS_EX , "#G" , 0x10 , 0xFF }, + { EX_L3_REFR_GPTR , RING_CLASS_GPTR_EX , "#G" , 0x10 , 0xFF }, + { EQ_ANA_GPTR , RING_CLASS_GPTR_EQ , "#G" , 0x10 , 0xFF }, + { EQ_DPLL_GPTR , RING_CLASS_GPTR_EQ , "#G" , 0x10 , 0xFF }, + { EC_GPTR , RING_CLASS_GPTR_EC , "#G" , 0x20 , 0xFF }, + { EC_TIME , RING_CLASS_EC , "#G" , 0x20 , 0xFF }, +}; + +static const struct ring_query RING_QUERIES_PDR[] = { + /* ring_id ring_class kwd_name instance_id */ + /* min max */ + { PERV_REPR , RING_CLASS_NEST , "#R" , 0x01 , 0x01 }, + { OCC_REPR , RING_CLASS_NEST , "#R" , 0x01 , 0x01 }, + { SBE_REPR , RING_CLASS_NEST , "#R" , 0x01 , 0x01 }, + { N0_REPR , RING_CLASS_NEST , "#R" , 0x02 , 0x02 }, + { N0_NX_REPR , RING_CLASS_NEST , "#R" , 0x02 , 0x02 }, + { N0_CXA0_REPR , RING_CLASS_NEST , "#R" , 0x02 , 0x02 }, + { N1_REPR , RING_CLASS_NEST , "#R" , 0x03 , 0x03 }, + { N1_IOO0_REPR , RING_CLASS_NEST , "#R" , 0x03 , 0x03 }, + { N1_IOO1_REPR , RING_CLASS_NEST , "#R" , 0x03 , 0x03 }, + { N1_MCS23_REPR , RING_CLASS_NEST , "#R" , 0x03 , 0x03 }, + { N2_REPR , RING_CLASS_NEST , "#R" , 0x04 , 0x04 }, + { N2_CXA1_REPR , RING_CLASS_NEST , "#R" , 0x04 , 0x04 }, + { N3_REPR , RING_CLASS_NEST , "#R" , 0x05 , 0x05 }, + { N3_MCS01_REPR , RING_CLASS_NEST , "#R" , 0x05 , 0x05 }, + { N3_NP_REPR , RING_CLASS_NEST , "#R" , 0x05 , 0x05 }, + { XB_REPR , RING_CLASS_NEST , "#R" , 0x06 , 0x06 }, + { XB_IO0_REPR , RING_CLASS_NEST , "#R" , 0x06 , 0x06 }, + { XB_IO1_REPR , RING_CLASS_NEST , "#R" , 0x06 , 0x06 }, + { XB_IO2_REPR , RING_CLASS_NEST , "#R" , 0x06 , 0x06 }, + { MC_REPR , RING_CLASS_NEST , "#R" , 0x07 , 0x08 }, + { MC_IOM23_REPR , RING_CLASS_NEST , "#R" , 0x07 , 0x08 }, + { MC_OMIPPE_REPR , RING_CLASS_NEST , "#R" , 0x07 , 0x08 }, + { OB0_REPR , RING_CLASS_NEST , "#R" , 0x09 , 0x09 }, + { OB1_REPR , RING_CLASS_NEST , "#R" , 0x0A , 0x0A }, + { OB2_REPR , RING_CLASS_NEST , "#R" , 0x0B , 0x0B }, + { OB3_REPR , RING_CLASS_NEST , "#R" , 0x0C , 0x0C }, + { PCI0_REPR , RING_CLASS_NEST , "#R" , 0x0D , 0x0D }, + { PCI1_REPR , RING_CLASS_NEST , "#R" , 0x0E , 0x0E }, + { PCI2_REPR , RING_CLASS_NEST , "#R" , 0x0F , 0x0F }, + { EQ_REPR , RING_CLASS_EQ_INS , "#R" , 0x10 , 0x15 }, + { EX_L3_REFR_TIME , RING_CLASS_EX_INS , "#R" , 0x10 , 0x15 }, + { EX_L3_REPR , RING_CLASS_EX_INS , "#R" , 0x10 , 0x15 }, + { EX_L2_REPR , RING_CLASS_EX_INS , "#R" , 0x10 , 0x15 }, + { EX_L3_REFR_REPR , RING_CLASS_EX_INS , "#R" , 0x10 , 0x15 }, + { EC_REPR , RING_CLASS_EC_INS , "#R" , 0x20 , 0x37 }, +}; + +/* Retrieves properties for specified kind of TOR */ +static void get_section_properties(uint32_t tor_magic, + uint8_t chiplet_type, + const struct chiplet_info **chiplet_info, + const struct ring_info **common_ring_info, + const struct ring_info **instance_ring_info) +{ + if (tor_magic == TOR_MAGIC_CME) + chiplet_type = EC_TYPE; + else if (tor_magic == TOR_MAGIC_SGPE) + chiplet_type = EQ_TYPE; + else if (tor_magic != TOR_MAGIC_OVLY) + die("Unexpected TOR type\n"); + + switch (chiplet_type) { + case EC_TYPE: + *chiplet_info = &EC_CHIPLET_INFO; + *common_ring_info = EC_COMMON_RING_INFO; + *instance_ring_info = EC_INSTANCE_RING_INFO; + break; + case EQ_TYPE: + *chiplet_info = &EQ_CHIPLET_INFO; + *common_ring_info = EQ_COMMON_RING_INFO; + *instance_ring_info = EQ_INSTANCE_RING_INFO; + break; + default: + *chiplet_info = NULL; + *common_ring_info = NULL; + *instance_ring_info = NULL; + break; + }; +} + +/* Either reads ring into the buffer (on GET_RING_DATA) or treats it as an + instance of ring_put_info (on GET_RING_PUT_INFO) */ +static bool ring_access(struct tor_hdr *ring_section, uint16_t ring_id, + uint8_t ring_variant, uint8_t instance_id, + void *data_buf, uint32_t *data_buf_size, + enum ring_operation operation) +{ + const bool overlay = (be32toh(ring_section->magic) == TOR_MAGIC_OVLY); + uint8_t i = 0; + uint8_t chiplet_count = (overlay ? SBE_NOOF_CHIPLETS : 1); + uint8_t max_variants = (overlay ? 1 : NUM_RING_VARIANTS); + + assert(ring_section->version == TOR_VERSION); + + for (i = 0; i < chiplet_count * 2; ++i) { + const uint8_t chiplet_idx = i / 2; + const bool instance_rings = (i % 2 == 1); + + uint32_t tor_slot_idx = 0; + uint8_t instance = 0; + + const struct ring_info *ring_info; + uint8_t ring_count; + struct tor_chiplet_block *blocks; + uint32_t chiplet_offset; + uint8_t variant_count; + + const struct chiplet_info *chiplet_info; + const struct ring_info *common_ring_info; + const struct ring_info *instance_ring_info; + + get_section_properties(be32toh(ring_section->magic), + chiplet_idx, &chiplet_info, + &common_ring_info, &instance_ring_info); + if (chiplet_info == NULL) + continue; + + ring_info = instance_rings ? instance_ring_info : common_ring_info; + + ring_count = instance_rings + ? chiplet_info->instance_rings_count + : chiplet_info->common_rings_count; + blocks = (void *)ring_section->data; + chiplet_offset = instance_rings + ? be32toh(blocks[chiplet_idx].instance_offset) + : be32toh(blocks[chiplet_idx].common_offset); + /* Instance rings have only BASE variant and both EC and EQ have + * all of them and their order matches enumeration values */ + variant_count = (instance_rings ? 1 : max_variants); + + for (instance = ring_info->min_instance_id; + instance <= ring_info->max_instance_id; + ++instance) { + uint8_t ring_idx; + for (ring_idx = 0; ring_idx < ring_count; ++ring_idx) { + if (ring_info[ring_idx].ring_id != ring_id || + (instance_rings && instance != instance_id)) { + /* Jump over all variants of the ring */ + tor_slot_idx += variant_count; + continue; + } + + if (variant_count > 1) + /* Skip to the slot with the variant */ + tor_slot_idx += ring_variant; + + uint16_t *tor_slots = + (void *)&ring_section->data[chiplet_offset]; + if (operation == GET_RING_DATA) { + uint16_t slot_value = be16toh(tor_slots[tor_slot_idx]); + uint32_t ring_slot_offset = chiplet_offset + slot_value; + struct ring_hdr *ring = + (void *)&ring_section->data[ring_slot_offset]; + uint32_t ring_size = be16toh(ring->size); + + if (slot_value == 0) + /* Didn't find the ring */ + return false; + + if (ring->magic != htobe16(RS4_MAGIC)) { + printk(BIOS_EMERG, "chiplet_offset = 0x%08x\n", chiplet_offset); + printk(BIOS_EMERG, "tor_slot_idx = 0x%08x\n", tor_slot_idx); + printk(BIOS_EMERG, "slot_value = 0x%08x\n", slot_value); + printk(BIOS_EMERG, "ring_slot_offset = 0x%08x\n", ring_slot_offset); + printk(BIOS_EMERG, "Full section:\n"); + hexdump(ring_section, ring_section->size); + printk(BIOS_EMERG, "Ring:\n"); + hexdump(ring, ring_size); + die("Got junk instead of a ring"); + } + + if (*data_buf_size != 0 && *data_buf_size >= ring_size) + memcpy(data_buf, ring, ring_size); + *data_buf_size = ring_size; + } else if (operation == GET_RING_PUT_INFO) { + struct ring_put_info *put_info = data_buf; + + if (tor_slots[tor_slot_idx] != 0) + die("Slot isn't empty!"); + + if (*data_buf_size != sizeof(struct ring_put_info)) + die("Invalid buffer for GET_RING_PUT_INFO!"); + + put_info->chiplet_offset = sizeof(*ring_section) + + chiplet_offset; + put_info->ring_slot_offset = + (uint8_t*)&tor_slots[tor_slot_idx] - + (uint8_t*)ring_section; + } + return true; + } + } + } + + return false; +} + +/* A wrapper around ring_access() that does safety checks and tor traversal if + necessary */ +bool tor_access_ring(struct tor_hdr *ring_section, uint16_t ring_id, + enum ppe_type ppe_type, uint8_t ring_variant, + uint8_t instance_id, void *data_buf, + uint32_t *data_buf_size, enum ring_operation operation) +{ + if (be32toh(ring_section->magic) >> 8 != TOR_MAGIC || + ring_section->version == 0 || + ring_section->version > TOR_VERSION || + ring_section->chip_type >= NUM_CHIP_TYPES) + die("Invalid call to tor_access_ring()!"); + + if (operation == GET_RING_DATA || operation == GET_RING_PUT_INFO) { + struct tor_hdr *section = ring_section; + if (be32toh(ring_section->magic) == TOR_MAGIC_HW) { + struct tor_ppe_block *tor_ppe_block = (void *)ring_section->data; + const uint32_t section_offset = be32toh(tor_ppe_block[ppe_type].offset); + section = (void *)&ring_section->data[section_offset]; + } + + return ring_access(section, ring_id, ring_variant, instance_id, + data_buf, data_buf_size, operation); + } + + if (operation == GET_PPE_LEVEL_RINGS) { + uint32_t section_size = 0; + uint32_t section_offset = 0; + struct tor_ppe_block *tor_ppe_block = (void *)ring_section->data; + + assert(ring_id == UNDEFINED_RING_ID); + assert(ring_variant == UNDEFINED_RING_VARIANT); + assert(instance_id == UNDEFINED_INSTANCE_ID); + assert(be32toh(ring_section->magic) == TOR_MAGIC_HW); + + section_size = be32toh(tor_ppe_block[ppe_type].size); + section_offset = be32toh(tor_ppe_block[ppe_type].offset); + + if (*data_buf_size != 0 && *data_buf_size >= section_size) + memcpy(data_buf, &ring_section->data[section_offset], + section_size); + + *data_buf_size = section_size; + return true; + } + + die("Unhandled TOR ring access operation!"); +} + +/* Retrieves an overlay ring in both compressed and uncompressed forms */ +static bool get_overlays_ring(struct tor_hdr *overlays_section, + uint16_t ring_id, void *rs4_buf, void *raw_buf) +{ + uint32_t uncompressed_bit_size = 0; + uint32_t rs4_buf_size = 0xFFFFFFFF; + + if (!tor_access_ring(overlays_section, ring_id, UNDEFINED_PPE_TYPE, + UNDEFINED_RING_VARIANT, UNDEFINED_INSTANCE_ID, + rs4_buf, &rs4_buf_size, GET_RING_DATA)) + return false; + + rs4_decompress(raw_buf, raw_buf + MAX_RING_BUF_SIZE / 2, + MAX_RING_BUF_SIZE / 2, &uncompressed_bit_size, + (struct ring_hdr *)rs4_buf); + return true; +} + +/* Decompress ring, modify it to leave only data allowed by overlay mask and + compress back */ +static void apply_overlays_ring(struct ring_hdr *ring, uint8_t *rs4_buf, + const uint8_t *raw_buf) +{ + uint8_t *data = rs4_buf; + uint8_t *care = rs4_buf + MAX_RING_BUF_SIZE / 2; + const uint8_t *overlay = raw_buf + MAX_RING_BUF_SIZE / 2; + uint32_t uncompressed_bit_size; + + rs4_decompress(data, care, MAX_RING_BUF_SIZE / 2, &uncompressed_bit_size, ring); + + /* + * Copies bits from raw_buf into both data and care only if bit at the + * same index in overlay is set. + */ + for (uint32_t bit = 0; bit < uncompressed_bit_size; ++bit) { + const int byte_idx = bit / 8; + const uint8_t bit_mask = (0x80 >> bit % 8); + if (overlay[byte_idx] & bit_mask) { + if (raw_buf[byte_idx] & bit_mask) { + data[byte_idx] |= bit_mask; + care[byte_idx] |= bit_mask; + } else { + data[byte_idx] &= ~bit_mask; + care[byte_idx] &= ~bit_mask; + } + } + } + + rs4_compress(ring, MAX_RING_BUF_SIZE, data, care, uncompressed_bit_size, + be32toh(ring->scan_addr), be16toh(ring->ring_id)); +} + +static void apply_overlays_to_gptr(struct tor_hdr *overlays_section, + struct ring_hdr *ring, uint8_t *rs4_buf, + uint8_t *raw_buf) +{ + if (get_overlays_ring(overlays_section, be16toh(ring->ring_id), rs4_buf, + raw_buf)) { + /* raw_buf is passed from get_overlays_ring(), rs4_buf is just reused */ + apply_overlays_ring(ring, rs4_buf, raw_buf); + } +} + +static void tor_append_ring(struct tor_hdr *ring_section, + uint32_t *ring_section_size, uint16_t ring_id, + enum ppe_type ppe_type, uint8_t ring_variant, + uint8_t instance_id, struct ring_hdr *ring) +{ + uint16_t ring_offset; + uint32_t ring_size; + + struct ring_put_info put_info; + uint32_t put_info_size = sizeof(put_info); + + if (!tor_access_ring(ring_section, ring_id, ppe_type, ring_variant, + instance_id, &put_info, &put_info_size, GET_RING_PUT_INFO)) + die("Failed to find where to put a ring!"); + + if (*ring_section_size - put_info.chiplet_offset > MAX_TOR_RING_OFFSET) + die("TOR section has reached its maximum size!"); + + ring_offset = htobe16(*ring_section_size - put_info.chiplet_offset); + ring_size = be16toh(ring->size); + + memcpy((uint8_t *)ring_section + put_info.ring_slot_offset, + &ring_offset, sizeof(ring_offset)); + memcpy((uint8_t *)ring_section + *ring_section_size, ring, ring_size); + + *ring_section_size = be32toh(ring_section->size) + ring_size; + ring_section->size = htobe32(*ring_section_size); +} + +/* + * Extracts a ring from CP00 record of MVPD and appends it to the ring section + * applying overlay if necessary. All buffers must be be at least + * MAX_RING_BUF_SIZE bytes in length. Indicates result by setting *ring_status. + */ +static void tor_fetch_and_insert_vpd_ring(uint8_t chip, + struct tor_hdr *ring_section, + uint32_t *ring_section_size, + const struct ring_query *query, + uint32_t max_ring_section_size, + struct tor_hdr *overlays_section, + enum ppe_type ppe_type, + uint8_t chiplet_id, + uint8_t even_odd, + uint8_t *buf1, + uint8_t *buf2, + uint8_t *buf3, + enum ring_status *ring_status) +{ + + bool success = false; + uint8_t instance_id = 0; + struct ring_hdr *ring = NULL; + + success = mvpd_extract_ring(chip, "CP00", query->kwd_name, + chiplet_id, even_odd, + query->ring_id, buf1, MAX_RING_BUF_SIZE); + if (!success) { + *ring_status = RING_NOT_FOUND; + return; + } + + ring = (struct ring_hdr *)buf1; + + if (query->ring_class == RING_CLASS_GPTR_NEST || + query->ring_class == RING_CLASS_GPTR_EQ || + query->ring_class == RING_CLASS_GPTR_EX || + query->ring_class == RING_CLASS_GPTR_EC) + apply_overlays_to_gptr(overlays_section, ring, buf2, buf3); + + if (ring->magic == htobe16(RS4_MAGIC)) { + int redundant = 0; + rs4_redundant(ring, &redundant); + if (redundant) { + *ring_status = RING_REDUNDANT; + return; + } + } + + if (*ring_section_size + be16toh(ring->size) > max_ring_section_size) + die("Not enough memory to append the ring: %d > %d", + *ring_section_size + be16toh(ring->size), + max_ring_section_size); + + instance_id = chiplet_id + even_odd; + if (query->ring_class == RING_CLASS_EX_INS) + instance_id += chiplet_id - query->min_instance_id; + + tor_append_ring(ring_section, ring_section_size, query->ring_id, + ppe_type, RV_BASE, instance_id, ring); + + *ring_status = RING_FOUND; +} + +void tor_fetch_and_insert_vpd_rings(uint8_t chip, + struct tor_hdr *ring_section, + uint32_t *ring_section_size, + uint32_t max_ring_section_size, + struct tor_hdr *overlays_section, + enum ppe_type ppe_type, + uint8_t *buf1, uint8_t *buf2, uint8_t *buf3) +{ + const size_t pdg_query_count = + sizeof(RING_QUERIES_PDG) / sizeof(RING_QUERIES_PDG[0]); + const size_t pdr_query_count = + sizeof(RING_QUERIES_PDR) / sizeof(RING_QUERIES_PDR[0]); + const size_t ring_query_count = pdg_query_count + pdr_query_count; + + size_t i = 0; + uint8_t eq = 0; + + const struct ring_query *eq_query = NULL; + const struct ring_query *ec_query = NULL; + + const struct ring_query *ex_queries[4]; + uint8_t ex_query_count = 0; + + /* Add all common rings */ + for (i = 0; i < ring_query_count; ++i) { + uint8_t instance = 0; + uint8_t max_instance_id = 0; + const struct ring_query *query = NULL; + + if (i < pdg_query_count) + query = &RING_QUERIES_PDG[i]; + else + query = &RING_QUERIES_PDR[i - pdg_query_count]; + + if (query->ring_class == RING_CLASS_EQ_INS || + query->ring_class == RING_CLASS_EX_INS || + query->ring_class == RING_CLASS_EC_INS) + continue; + + max_instance_id = query->max_instance_id; + /* 0xff meant multicast in Power8, but doesn't in Power9 */ + if (max_instance_id == 0xff) + max_instance_id = query->min_instance_id; + + if (ppe_type == PT_CME && + query->ring_class != RING_CLASS_EC && + query->ring_class != RING_CLASS_GPTR_EC) + continue; + + if (ppe_type == PT_SGPE && + query->ring_class != RING_CLASS_EX && + query->ring_class != RING_CLASS_EQ && + query->ring_class != RING_CLASS_GPTR_EQ && + query->ring_class != RING_CLASS_GPTR_EX) + continue; + + for (instance = query->min_instance_id; + instance <= max_instance_id; + ++instance) { + enum ring_status ring_status; + tor_fetch_and_insert_vpd_ring(chip, + ring_section, + ring_section_size, + query, + max_ring_section_size, + overlays_section, + ppe_type, + instance, + /*even_odd=*/0, + buf1, buf2, buf3, + &ring_status); + + if (ring_status == RING_NOT_FOUND) + die("Failed to insert a common ring."); + } + } + + /* Add all instance rings */ + + for (i = 0; i < pdr_query_count; ++i) { + const struct ring_query *query = &RING_QUERIES_PDR[i]; + const enum ring_class class = query->ring_class; + if (class == RING_CLASS_EQ_INS && eq_query == NULL) { + eq_query = query; + } else if (class == RING_CLASS_EX_INS && ex_query_count < 4) { + ex_queries[ex_query_count] = query; + ++ex_query_count; + } else if (class == RING_CLASS_EC_INS && ec_query == NULL) { + ec_query = query; + } + } + + for (eq = 0; eq < NUM_OF_QUADS; ++eq) { + /* EQ instances */ + if ((ppe_type == PT_SBE || ppe_type == PT_SGPE) && eq_query != NULL) { + const uint8_t instance = eq_query->min_instance_id + eq; + + enum ring_status ring_status; + + tor_fetch_and_insert_vpd_ring(chip, + ring_section, + ring_section_size, + eq_query, + max_ring_section_size, + overlays_section, + ppe_type, + instance, + /*even_odd=*/0, + buf1, buf2, buf3, + &ring_status); + + if (ring_status == RING_NOT_FOUND) + die("Failed to insert an EQ ring."); + } + + /* EX instances */ + if ((ppe_type == PT_SBE || ppe_type == PT_SGPE) && ex_query_count != 0) { + uint8_t ex = 0; + for (ex = 2 * eq; ex < 2 * (eq + 1); ++ex) { + for (i = 0; i < ex_query_count; ++i) { + const uint8_t instance = ex_queries[i]->min_instance_id + eq; + + enum ring_status ring_status; + + tor_fetch_and_insert_vpd_ring(chip, + ring_section, + ring_section_size, + ex_queries[i], + max_ring_section_size, + overlays_section, + ppe_type, + instance, + /*even_odd=*/ex % 2, + buf1, buf2, buf3, + &ring_status); + + if (ring_status == RING_NOT_FOUND) + die("Failed to insert an EC ring."); + } + } + } + + /* EC instances */ + if ((ppe_type == PT_SBE || ppe_type == PT_CME) && ec_query != NULL) { + uint8_t ec = 0; + for (ec = 4 * eq; ec < 4 * (eq + 1); ++ec) { + const uint8_t instance = ec_query->min_instance_id + ec; + + enum ring_status ring_status; + + tor_fetch_and_insert_vpd_ring(chip, + ring_section, + ring_section_size, + ec_query, + max_ring_section_size, + overlays_section, + ppe_type, + instance, + /*even_odd=*/0, + buf1, buf2, buf3, + &ring_status); + + if (ring_status == RING_NOT_FOUND) + die("Failed to insert an EC ring."); + } + } + } +} diff --git a/src/soc/ibm/power9/tor.h b/src/soc/ibm/power9/tor.h new file mode 100644 index 00000000000..266ebb74c40 --- /dev/null +++ b/src/soc/ibm/power9/tor.h @@ -0,0 +1,458 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __SOC_IBM_POWER9_TOR_H +#define __SOC_IBM_POWER9_TOR_H + +#include +#include + +#define UNDEFINED_RING_ID ((uint16_t)0xFFFF) +#define UNDEFINED_RING_VARIANT ((uint8_t)0xFF) +#define UNDEFINED_INSTANCE_ID ((uint8_t)0xFF) + +#define MAX_RING_BUF_SIZE ((uint32_t)60000) +#define MAX_TOR_RING_OFFSET ((uint16_t)0xFFFF) + +/* List of all Ring IDs as they appear in data */ +enum ring_id { + /* Perv Chiplet Rings */ + PERV_FURE = 0, + PERV_GPTR = 1, + PERV_TIME = 2, + OCC_FURE = 3, + OCC_GPTR = 4, + OCC_TIME = 5, + PERV_ANA_FUNC = 6, + PERV_ANA_GPTR = 7, + PERV_PLL_GPTR = 8, + PERV_PLL_BNDY = 9, + PERV_PLL_BNDY_BUCKET_1 = 10, + PERV_PLL_BNDY_BUCKET_2 = 11, + PERV_PLL_BNDY_BUCKET_3 = 12, + PERV_PLL_BNDY_BUCKET_4 = 13, + PERV_PLL_BNDY_BUCKET_5 = 14, + PERV_PLL_FUNC = 15, + PERV_REPR = 16, + OCC_REPR = 17, + SBE_FURE = 18, + SBE_GPTR = 19, + SBE_REPR = 20, + + /* Nest Chiplet Rings - N0 */ + N0_FURE = 21, + N0_GPTR = 22, + N0_TIME = 23, + N0_NX_FURE = 24, + N0_NX_GPTR = 25, + N0_NX_TIME = 26, + N0_CXA0_FURE = 27, + N0_CXA0_GPTR = 28, + N0_CXA0_TIME = 29, + N0_REPR = 30, + N0_NX_REPR = 31, + N0_CXA0_REPR = 32, + + /* Nest Chiplet Rings - N1 */ + N1_FURE = 33, + N1_GPTR = 34, + N1_TIME = 35, + N1_IOO0_FURE = 36, + N1_IOO0_GPTR = 37, + N1_IOO0_TIME = 38, + N1_IOO1_FURE = 39, + N1_IOO1_GPTR = 40, + N1_IOO1_TIME = 41, + N1_MCS23_FURE = 42, + N1_MCS23_GPTR = 43, + N1_MCS23_TIME = 44, + N1_REPR = 45, + N1_IOO0_REPR = 46, + N1_IOO1_REPR = 47, + N1_MCS23_REPR = 48, + + /* Nest Chiplet Rings - N2 */ + N2_FURE = 49, + N2_GPTR = 50, + N2_TIME = 51, + N2_CXA1_FURE = 52, + N2_CXA1_GPTR = 53, + N2_CXA1_TIME = 54, + N2_PSI_FURE = 55, + N2_PSI_GPTR = 56, + N2_PSI_TIME = 57, + N2_REPR = 58, + N2_CXA1_REPR = 59, + /* Values 60-61 unused */ + + /* Nest Chiplet Rings - N3 */ + N3_FURE = 62, + N3_GPTR = 63, + N3_TIME = 64, + N3_MCS01_FURE = 65, + N3_MCS01_GPTR = 66, + N3_MCS01_TIME = 67, + N3_NP_FURE = 68, + N3_NP_GPTR = 69, + N3_NP_TIME = 70, + N3_REPR = 71, + N3_MCS01_REPR = 72, + N3_NP_REPR = 73, + N3_BR_FURE = 74, + + /* X-Bus Chiplet Rings */ + /* Common - apply to all instances of X-Bus */ + XB_FURE = 75, + XB_GPTR = 76, + XB_TIME = 77, + XB_IO0_FURE = 78, + XB_IO0_GPTR = 79, + XB_IO0_TIME = 80, + XB_IO1_FURE = 81, + XB_IO1_GPTR = 82, + XB_IO1_TIME = 83, + XB_IO2_FURE = 84, + XB_IO2_GPTR = 85, + XB_IO2_TIME = 86, + XB_PLL_GPTR = 87, + XB_PLL_BNDY = 88, + XB_PLL_FUNC = 89, + + /* X-Bus Chiplet Rings */ + /* X0, X1 and X2 instance specific Rings */ + XB_REPR = 90, + XB_IO0_REPR = 91, + XB_IO1_REPR = 92, + XB_IO2_REPR = 93, + /* Values 94-95 unused */ + + /* MC Chiplet Rings */ + /* Common - apply to all instances of MC */ + MC_FURE = 96, + MC_GPTR = 97, + MC_TIME = 98, + MC_IOM01_FURE = 99, + MC_IOM01_GPTR = 100, + MC_IOM01_TIME = 101, + MC_IOM23_FURE = 102, + MC_IOM23_GPTR = 103, + MC_IOM23_TIME = 104, + MC_PLL_GPTR = 105, + MC_PLL_BNDY = 106, + MC_PLL_BNDY_BUCKET_1 = 107, + MC_PLL_BNDY_BUCKET_2 = 108, + MC_PLL_BNDY_BUCKET_3 = 109, + MC_PLL_BNDY_BUCKET_4 = 110, + MC_PLL_BNDY_BUCKET_5 = 111, + MC_PLL_FUNC = 112, + + /* MC Chiplet Rings */ + /* MC01 and MC23 instance specific Rings */ + MC_REPR = 113, + /* Value 114 unused */ + MC_IOM23_REPR = 115, + + /* OB0 Chiplet Rings */ + OB0_PLL_BNDY = 116, + OB0_PLL_BNDY_BUCKET_1 = 117, + OB0_PLL_BNDY_BUCKET_2 = 118, + OB0_GPTR = 119, + OB0_TIME = 120, + OB0_PLL_GPTR = 121, + OB0_FURE = 122, + OB0_PLL_BNDY_BUCKET_3 = 123, + + /* OB0 Chiplet instance specific Ring */ + OB0_REPR = 124, + + /* OB1 Chiplet Rings */ + OB1_PLL_BNDY = 125, + OB1_PLL_BNDY_BUCKET_1 = 126, + OB1_PLL_BNDY_BUCKET_2 = 127, + OB1_GPTR = 128, + OB1_TIME = 129, + OB1_PLL_GPTR = 130, + OB1_FURE = 131, + OB1_PLL_BNDY_BUCKET_3 = 132, + + /* OB1 Chiplet instance specific Ring */ + OB1_REPR = 133, + + /* OB2 Chiplet Rings */ + OB2_PLL_BNDY = 134, + OB2_PLL_BNDY_BUCKET_1 = 135, + OB2_PLL_BNDY_BUCKET_2 = 136, + OB2_GPTR = 137, + OB2_TIME = 138, + OB2_PLL_GPTR = 139, + OB2_FURE = 140, + OB2_PLL_BNDY_BUCKET_3 = 141, + + /* OB2 Chiplet instance specific Ring */ + OB2_REPR = 142, + + /* OB3 Chiplet Rings */ + OB3_PLL_BNDY = 143, + OB3_PLL_BNDY_BUCKET_1 = 144, + OB3_PLL_BNDY_BUCKET_2 = 145, + OB3_GPTR = 146, + OB3_TIME = 147, + OB3_PLL_GPTR = 148, + OB3_FURE = 149, + OB3_PLL_BNDY_BUCKET_3 = 150, + + /* OB3 Chiplet instance specific Rings */ + OB3_REPR = 151, + + /* Values 152-153 unused */ + + /* PCI Chiplet Rings */ + /* PCI0 Common Rings */ + PCI0_FURE = 154, + PCI0_GPTR = 155, + PCI0_TIME = 156, + PCI0_PLL_BNDY = 157, + PCI0_PLL_GPTR = 158, + /* Instance specific Rings */ + PCI0_REPR = 159, + + /* PCI1 Common Rings */ + PCI1_FURE = 160, + PCI1_GPTR = 161, + PCI1_TIME = 162, + PCI1_PLL_BNDY = 163, + PCI1_PLL_GPTR = 164, + /* Instance specific Rings */ + PCI1_REPR = 165, + + /* PCI2 Common Rings */ + PCI2_FURE = 166, + PCI2_GPTR = 167, + PCI2_TIME = 168, + PCI2_PLL_BNDY = 169, + PCI2_PLL_GPTR = 170, + /* Instance specific Rings */ + PCI2_REPR = 171, + + /* Quad Chiplet Rings */ + /* Common - apply to all Quad instances */ + EQ_FURE = 172, + EQ_GPTR = 173, + EQ_TIME = 174, + EQ_INEX = 175, + EX_L3_FURE = 176, + EX_L3_GPTR = 177, + EX_L3_TIME = 178, + EX_L2_MODE = 179, + EX_L2_FURE = 180, + EX_L2_GPTR = 181, + EX_L2_TIME = 182, + EX_L3_REFR_FURE = 183, + EX_L3_REFR_GPTR = 184, + EX_L3_REFR_TIME = 185, + EQ_ANA_FUNC = 186, + EQ_ANA_GPTR = 187, + EQ_DPLL_FUNC = 188, + EQ_DPLL_GPTR = 189, + EQ_DPLL_MODE = 190, + EQ_ANA_BNDY = 191, + EQ_ANA_BNDY_BUCKET_0 = 192, + EQ_ANA_BNDY_BUCKET_1 = 193, + EQ_ANA_BNDY_BUCKET_2 = 194, + EQ_ANA_BNDY_BUCKET_3 = 195, + EQ_ANA_BNDY_BUCKET_4 = 196, + EQ_ANA_BNDY_BUCKET_5 = 197, + EQ_ANA_BNDY_BUCKET_6 = 198, + EQ_ANA_BNDY_BUCKET_7 = 199, + EQ_ANA_BNDY_BUCKET_8 = 200, + EQ_ANA_BNDY_BUCKET_9 = 201, + EQ_ANA_BNDY_BUCKET_10 = 202, + EQ_ANA_BNDY_BUCKET_11 = 203, + EQ_ANA_BNDY_BUCKET_12 = 204, + EQ_ANA_BNDY_BUCKET_13 = 205, + EQ_ANA_BNDY_BUCKET_14 = 206, + EQ_ANA_BNDY_BUCKET_15 = 207, + EQ_ANA_BNDY_BUCKET_16 = 208, + EQ_ANA_BNDY_BUCKET_17 = 209, + EQ_ANA_BNDY_BUCKET_18 = 210, + EQ_ANA_BNDY_BUCKET_19 = 211, + EQ_ANA_BNDY_BUCKET_20 = 212, + EQ_ANA_BNDY_BUCKET_21 = 213, + EQ_ANA_BNDY_BUCKET_22 = 214, + EQ_ANA_BNDY_BUCKET_23 = 215, + EQ_ANA_BNDY_BUCKET_24 = 216, + EQ_ANA_BNDY_BUCKET_25 = 217, + EQ_ANA_BNDY_BUCKET_L3DCC = 218, + EQ_ANA_MODE = 219, + + /* Quad Chiplet Rings */ + /* EQ0 - EQ5 instance specific Rings */ + EQ_REPR = 220, + EX_L3_REPR = 221, + EX_L2_REPR = 222, + EX_L3_REFR_REPR = 223, + + /* Core Chiplet Rings */ + /* Common - apply to all Core instances */ + EC_FUNC = 224, + EC_GPTR = 225, + EC_TIME = 226, + EC_MODE = 227, + + /* Core Chiplet Rings */ + /* EC0 - EC23 instance specific Ring */ + EC_REPR = 228, + + /* Values 229-230 unused */ + + /* Core Chiplet Rings */ + /* ABIST engine mode */ + EC_ABST = 231, + + /* Additional rings for Nimbus DD2 */ + EQ_ANA_BNDY_BUCKET_26 = 232, + EQ_ANA_BNDY_BUCKET_27 = 233, + EQ_ANA_BNDY_BUCKET_28 = 234, + EQ_ANA_BNDY_BUCKET_29 = 235, + EQ_ANA_BNDY_BUCKET_30 = 236, + EQ_ANA_BNDY_BUCKET_31 = 237, + EQ_ANA_BNDY_BUCKET_32 = 238, + EQ_ANA_BNDY_BUCKET_33 = 239, + EQ_ANA_BNDY_BUCKET_34 = 240, + EQ_ANA_BNDY_BUCKET_35 = 241, + EQ_ANA_BNDY_BUCKET_36 = 242, + EQ_ANA_BNDY_BUCKET_37 = 243, + EQ_ANA_BNDY_BUCKET_38 = 244, + EQ_ANA_BNDY_BUCKET_39 = 245, + EQ_ANA_BNDY_BUCKET_40 = 246, + EQ_ANA_BNDY_BUCKET_41 = 247, + + /* EQ Inex ring bucket */ + EQ_INEX_BUCKET_1 = 248, + EQ_INEX_BUCKET_2 = 249, + EQ_INEX_BUCKET_3 = 250, + EQ_INEX_BUCKET_4 = 251, + + /* CMSK ring */ + EC_CMSK = 252, + + /* Perv PLL filter override rings */ + PERV_PLL_BNDY_FLT_1 = 253, + PERV_PLL_BNDY_FLT_2 = 254, + PERV_PLL_BNDY_FLT_3 = 255, + PERV_PLL_BNDY_FLT_4 = 256, + + /* MC OMI rings */ + MC_OMI0_FURE = 257, + MC_OMI0_GPTR = 258, + MC_OMI1_FURE = 259, + MC_OMI1_GPTR = 260, + MC_OMI2_FURE = 261, + MC_OMI2_GPTR = 262, + MC_OMIPPE_FURE = 263, + MC_OMIPPE_GPTR = 264, + MC_OMIPPE_TIME = 265, + /* Instance rings */ + MC_OMIPPE_REPR = 266, + + NUM_RING_IDS = 267 +}; + +/* Supported ring variants. Values match order in ring sections. */ +enum ring_variant { + RV_BASE, + RV_CC, + RV_RL, // Kernel and user protection + RV_RL2, // Kernel only protection + RV_RL3, // Rugby v4 + RV_RL4, // Java performance + RV_RL5, // Spare + NUM_RING_VARIANTS +}; + +/* List of groups of rings */ +enum ring_class { + RING_CLASS_NEST, // Common NEST rings except GPTR #G rings + RING_CLASS_GPTR_NEST, // Common GPTR #G rings-NEST + RING_CLASS_GPTR_EQ, // Common GPTR #G rings-EQ + RING_CLASS_GPTR_EX, // Common GPTR #G rings-EX + RING_CLASS_GPTR_EC, // Common GPTR #G rings-EC + RING_CLASS_EQ, // Common EQ rings + RING_CLASS_EX, // Common EX rings + RING_CLASS_EC, // Common EC rings + RING_CLASS_EQ_INS, // Instance EQ rings + RING_CLASS_EX_INS, // Instance EX rings + RING_CLASS_EC_INS, // Instance EC rings +}; + +/* PPE types, enum values match indices inside rings section */ +enum ppe_type { + PT_SBE, + PT_CME, + PT_SGPE, +}; + +/* Available ring access operations */ +enum ring_operation { + GET_RING_DATA, + GET_RING_PUT_INFO, + GET_PPE_LEVEL_RINGS, +}; + +/* Result of calling tor_fetch_and_insert_vpd_rings() */ +enum ring_status { + RING_NOT_FOUND, + RING_FOUND, + RING_REDUNDANT, +}; + +/* Information necessary to put a ring into a ring section */ +struct ring_put_info { + uint32_t chiplet_offset; // Relative to ring section + uint32_t ring_slot_offset; // Relative to ring section +}; + +/* Describes ring search characteristics for tor_fetch_and_insert_vpd_rings() */ +struct ring_query { + enum ring_id ring_id; + enum ring_class ring_class; + char kwd_name[3]; // Keyword name + uint8_t min_instance_id; + uint8_t max_instance_id; +}; + +/* Header of a ring section */ +struct tor_hdr { + uint32_t magic; // One of TOR_MAGIC_* + uint8_t version; + uint8_t chip_type; + uint8_t dd_level; + uint8_t undefined; + uint32_t size; + uint8_t data[]; +} __attribute__((packed)); + +/* + * Either reads ring into the buffer (on GET_RING_DATA) or treats the buffer as + * an instance of ring_put_info (on GET_RING_PUT_INFO) + */ +bool tor_access_ring(struct tor_hdr *ring_section, uint16_t ring_id, + enum ppe_type ppe_type, uint8_t ring_variant, + uint8_t instance_id, void *data_buf, + uint32_t *data_buf_size, enum ring_operation operation); + +/* + * Extracts rings from CP00 record of MVPD and appends them to the ring section + * applying overlay if necessary. All buffers must be be at least + * MAX_RING_BUF_SIZE bytes in length. + */ +void tor_fetch_and_insert_vpd_rings(uint8_t chip, + struct tor_hdr *ring_section, + uint32_t *ring_section_size, + uint32_t max_ring_section_size, + struct tor_hdr *overlays_section, + enum ppe_type ppe_type, + uint8_t *buf1, + uint8_t *buf2, + uint8_t *buf3); + +#endif // __SOC_IBM_POWER9_TOR_H diff --git a/src/soc/ibm/power9/vpd.c b/src/soc/ibm/power9/vpd.c new file mode 100644 index 00000000000..6581c843e23 --- /dev/null +++ b/src/soc/ibm/power9/vpd.c @@ -0,0 +1,624 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "../../../../3rdparty/ffs/ffs/ffs.h" + +/* Properly rounded size of MEMD header */ +#define MEMD_HDR_SIZE ALIGN(sizeof(struct memd_hdr), 16) +/* Divisor used for section size in MEMD header */ +#define MEMD_SECTION_ROUNDING_DIVISOR 1000 + +/* Supported mapping layout version */ +#define VPD_MAPPING_VERSION 1 +/* Size of entries in MR and MT mappings */ +#define VPD_MAPPING_ENTRY_SIZE 6 + +/* + * Structure of nesting: + * - MEMD + * - VPD blob + * - VPD keyword (VPD name, keyword mapping, attributes, something else) + * + * Either part of VDP record or of MEMD header (depending on the source): + * - 11 bytes -- ECC (unimplemented, should be ignored) + * - 0x84 byte -- resource type (this byte is missing from PNOR image) (opt) + * + * VPD Record (this is a part of binary VPD which is stored in .rvpd-files): + * - 2 bytes -- size of the record's data in LE (>= 40) + * - RT keyword -- always the first record with 4 bytes of data + * - other keywords (as many as data size allows) + * - PF keyword -- padding, always present + * - 0x78 byte -- closing resource type + * + * Keyword: + * - 2 bytes -- keyword name + * - 1 or 2 bytes -- keyword's data size (little endian) + * (2 bytes if first char of keyword name is #) + * - N bytes -- N == data size + * + * Minimal record size is 40 bytes. If it exceeds that, padding aligned on word + * boundary (where a word is 4 bytes long). + * + * Format of MR, MT, Q0 and CK keywords that provide mapping: + * - Header: + * - 1 byte -- version + * - 1 byte -- entry count + * - 1 byte -- entry size in bytes (only for Q0 and CK keywords) + * - 1 byte -- reserved + * - Entry (0xff value in first 5 fields means "matches everything"): + * - 1 byte -- mcs mask (high byte) + * - 1 byte -- mcs mask (low byte) + * - 1 byte -- rank mask (high byte) + * - 1 byte -- rank mask (low byte) + * - 1 byte -- frequency mask + * 0x80 - 1866 + * 0x40 - 2133 + * 0x20 - 2400 + * 0x10 - 2666 + * - 1 byte -- kw ([0-9A-Z]) + * 0x00 for row after last + * 0xff for unsupported configuration + * + * Glossary: + * - MR keyword -- the mapping + * - MR configuration -- one of J#, X#, etc. + * + * See the following sources in talos-hostboot: + * - src/import/chips/p9/procedures/hwp/accessors/p9_get_mem_vpd_keyword.C + * - src/import/chips/p9/procedures/hwp/accessors/p9_get_mem_vpd_keyword.H + * - src/import/chips/p9/procedures/hwp/memory/lib/dimm/eff_dimm.C + * - src/import/chips/p9/procedures/hwp/memory/lib/mss_vpd_decoder.H + * - src/usr/fapi2/test/getVpdTest.C + */ + +/* Size of this structure should be rounded to 16 bytes */ +struct memd_hdr { + char eyecatch[4]; // Magic number to determine validity "OKOK" + char header_version[4]; // Version of this header + char memd_version[4]; // Version of the MEMD payload + uint32_t section_size; // / 1000 + 1 + uint16_t section_count; // Number of MEMD instances + char reserved[8]; // Reserved bytes +} __attribute__((packed)); + +/* Combines pointer to VPD area with configuration information */ +struct vpd_info { + const uint8_t *data; // VPD area pointer + int mcs_i; // MCS position (spans CPUs) + int freq; // Frequency in MHz + int dimm0_rank; + int dimm1_rank; +}; + +/* Memory terminator data */ + +uint8_t ATTR_MSS_VPD_MT_DRAM_RTT_NOM[4]; +uint8_t ATTR_MSS_VPD_MT_DRAM_RTT_PARK[4]; +uint8_t ATTR_MSS_VPD_MT_DRAM_RTT_WR[4]; + +uint8_t ATTR_MSS_VPD_MT_ODT_RD[4][2][2]; +uint8_t ATTR_MSS_VPD_MT_ODT_WR[4][2][2]; + +uint8_t ATTR_MSS_VPD_MT_VREF_DRAM_WR[4]; +uint32_t ATTR_MSS_VPD_MT_VREF_MC_RD[4]; + +uint8_t ATTR_MSS_VPD_MT_DRAM_DRV_IMP_DQ_DQS; +uint32_t ATTR_MSS_VPD_MT_MC_DQ_ACBOOST_RD_UP; +uint32_t ATTR_MSS_VPD_MT_MC_DQ_ACBOOST_WR_DOWN; +uint32_t ATTR_MSS_VPD_MT_MC_DQ_ACBOOST_WR_UP; +uint64_t ATTR_MSS_VPD_MT_MC_DQ_CTLE_CAP; +uint64_t ATTR_MSS_VPD_MT_MC_DQ_CTLE_RES; +uint8_t ATTR_MSS_VPD_MT_MC_DRV_IMP_CLK; +uint8_t ATTR_MSS_VPD_MT_MC_DRV_IMP_CMD_ADDR; +uint8_t ATTR_MSS_VPD_MT_MC_DRV_IMP_CNTL; +uint8_t ATTR_MSS_VPD_MT_MC_DRV_IMP_CSCID; +uint8_t ATTR_MSS_VPD_MT_MC_DRV_IMP_DQ_DQS; +uint8_t ATTR_MSS_VPD_MT_MC_RCV_IMP_DQ_DQS; +uint8_t ATTR_MSS_VPD_MT_PREAMBLE; +uint16_t ATTR_MSS_VPD_MT_WINDAGE_RD_CTR; + +/* End of terminator data */ + +/* Memory rotator data */ + +uint8_t ATTR_MSS_VPD_MR_DPHY_GPO; +uint8_t ATTR_MSS_VPD_MR_DPHY_RLO; +uint8_t ATTR_MSS_VPD_MR_DPHY_WLO; +uint8_t ATTR_MSS_VPD_MR_MC_2N_MODE_AUTOSET; + +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A00[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A01[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A02[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A03[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A04[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A05[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A06[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A07[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A08[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A09[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A10[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A11[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A12[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A13[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A17[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_BA0[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_BA1[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_BG0[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_BG1[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_C0[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_C1[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_C2[28][2]; + +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_ACTN[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_ADDR_CASN_A15[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_ADDR_RASN_A16[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_ADDR_WEN_A14[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_PAR[28][2]; + +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_CKE0[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_CKE1[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_CSN0[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_CSN1[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_ODT0[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_ODT1[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_CKE0[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_CKE1[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_CSN0[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_CSN1[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_ODT0[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_ODT1[28][2]; + +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_D0_CLKN[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_D0_CLKP[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_D1_CLKN[28][2]; +uint8_t ATTR_MSS_VPD_MR_MC_PHASE_ROT_D1_CLKP[28][2]; + +uint8_t ATTR_MSS_VPD_MR_TSYS_ADR[4]; +uint8_t ATTR_MSS_VPD_MR_TSYS_DATA[4]; + +/* End of rotator data */ + +/* Looks up an entry matching specified configuration in an MT or MR mapping. + Returns a character or '\0' on lookup failure. */ +static char mapping_lookup(const struct vpd_info *vpd, const uint8_t *mapping, size_t size) +{ + int i = 0; + int entry_count = 0; + int offset = 0; + uint16_t mcs_mask = 0; + uint16_t freq_mask = 0; + uint16_t rank_mask = 0; + + /* Mapping header size */ + if (size < 3) + die("Mapping is too small!\n"); + offset = 3; + + if (mapping[0] != VPD_MAPPING_VERSION) + die("Unsupported mapping version!\n"); + + /* 0x8000 is a mask for MCS #0 */ + assert(vpd->mcs_i >= 0 && vpd->mcs_i <= 15); + mcs_mask = 0x8000 >> vpd->mcs_i; + + /* (0, 0) -> 0x8000; (0, 1) -> 0x4000; ...; (1, 0) -> 0x0800; ... */ + assert(vpd->dimm0_rank >= 0 && vpd->dimm0_rank <= 3); + assert(vpd->dimm1_rank >= 0 && vpd->dimm1_rank <= 3); + rank_mask = 0x8000 >> (vpd->dimm0_rank*4 + vpd->dimm1_rank); + + switch (vpd->freq) { + case 1866: + freq_mask = 0x80; + break; + case 2133: + freq_mask = 0x40; + break; + case 2400: + freq_mask = 0x20; + break; + case 2666: + freq_mask = 0x10; + break; + default: + die("Unhandled frequency value: %d\n", vpd->freq); + break; + } + + entry_count = mapping[1]; + for (i = 0; i < entry_count; ++i, offset += VPD_MAPPING_ENTRY_SIZE) { + const uint16_t mcs_mask_value = + (mapping[offset + 0] << 8) | mapping[offset + 1]; + const uint16_t rank_mask_value = + (mapping[offset + 2] << 8) | mapping[offset + 3]; + + /* Data ended sooner than expected */ + if (mapping[offset + 5] == 0x00) + continue; + + if ((mcs_mask_value & mcs_mask) != mcs_mask) + continue; + if ((rank_mask_value & rank_mask) != rank_mask) + continue; + if ((mapping[offset + 4] & freq_mask) != freq_mask) + continue; + + return mapping[offset + 5]; + } + + return '\0'; +} + +const uint8_t *vpd_find_kwd(const uint8_t *record, const char *record_name, + const char *kwd_name, size_t *size) +{ + size_t offset = 0; + uint16_t record_size = 0; + + if (strlen(kwd_name) != VPD_KWD_NAME_LEN) + die("Keyword name has wrong length: %s!\n", kwd_name); + + memcpy(&record_size, &record[offset], sizeof(record_size)); + offset += VPD_RECORD_SIZE_LEN; + record_size = le16toh(record_size); + + /* Skip mandatory "RT" and one byte of record size (always 4) */ + offset += VPD_KWD_NAME_LEN + 1; + + if (memcmp(&record[offset], record_name, VPD_RECORD_NAME_LEN)) + die("Expected to be working with %s record!\n", record_name); + offset += VPD_RECORD_NAME_LEN; + + while (offset < record_size) { + uint16_t kwd_size = 0; + bool match = false; + const int two_byte_size = (record[offset] == '#'); + + /* This is always the last keyword */ + if (!memcmp(&record[offset], "PF", VPD_KWD_NAME_LEN)) + break; + + match = !memcmp(&record[offset], kwd_name, VPD_KWD_NAME_LEN); + + offset += VPD_KWD_NAME_LEN; + + if (two_byte_size) { + memcpy(&kwd_size, &record[offset], sizeof(kwd_size)); + kwd_size = le16toh(kwd_size); + offset += 2; + } else { + kwd_size = record[offset]; + offset += 1; + } + + if (match) { + *size = kwd_size; + return &record[offset]; + } + + offset += kwd_size; + } + + return NULL; +} + +/* Looks up configuration in specified mapping and loads it or dies */ +static const uint8_t *find_vpd_conf(const struct vpd_info *vpd, const char *mapping_name, + size_t *size) +{ + const uint8_t *mapping = NULL; + const uint8_t *conf = NULL; + size_t kwd_size = 0; + + char conf_name[3] = {}; + + if (!strcmp(mapping_name, "MR")) + conf_name[0] = 'J'; + else if (!strcmp(mapping_name, "MT")) + conf_name[0] = 'X'; + else + die("Unsupported %s mapping type\n", mapping_name); + + mapping = vpd_find_kwd(vpd->data, "MEMD", mapping_name, &kwd_size); + if (!mapping) + die("VPD is missing %s keyword!\n", mapping_name); + + conf_name[1] = mapping_lookup(vpd, mapping, kwd_size); + if (!conf_name[1]) + die("Failed to find matching %s configuration!\n", mapping_name); + + conf = vpd_find_kwd(vpd->data, "MEMD", conf_name, &kwd_size); + if (!conf) + die("Failed to read %s configuration!\n", mapping_name); + + *size = kwd_size; + return conf; +} + +static void load_mt_attrs(const uint8_t *mt_conf, size_t size, int vpd_idx) +{ + uint8_t version_layout; + uint8_t version_data; + + if (size < 2) + die("MT configuration is way too small!\n"); + + version_layout = mt_conf[0]; + version_data = mt_conf[1]; + + if (version_layout > 1) + die("Unsupported layout of MT configuration!\n"); + + if (size < 218) + die("MT configuration is smaller than expected!\n"); + + ATTR_MSS_VPD_MT_DRAM_RTT_NOM[vpd_idx] = mt_conf[38]; + ATTR_MSS_VPD_MT_DRAM_RTT_PARK[vpd_idx] = mt_conf[54]; + ATTR_MSS_VPD_MT_DRAM_RTT_WR[vpd_idx] = mt_conf[70]; + + switch (version_layout) { + case 0: + memcpy(&ATTR_MSS_VPD_MT_ODT_RD[vpd_idx][0][0], &mt_conf[170], + 2); + memcpy(&ATTR_MSS_VPD_MT_ODT_RD[vpd_idx][1][0], &mt_conf[174], + 2); + memcpy(&ATTR_MSS_VPD_MT_ODT_WR[vpd_idx][0][0], &mt_conf[186], + 2); + memcpy(&ATTR_MSS_VPD_MT_ODT_WR[vpd_idx][1][0], &mt_conf[190], + 2); + ATTR_MSS_VPD_MT_VREF_DRAM_WR[vpd_idx] = mt_conf[204]; + memcpy(&ATTR_MSS_VPD_MT_VREF_MC_RD[vpd_idx], &mt_conf[206], 4); + + /* The following data is the same for all configurations */ + if (vpd_idx == 0) { + memcpy(&ATTR_MSS_VPD_MT_MC_DQ_ACBOOST_RD_UP, &mt_conf[86], 4); + memcpy(&ATTR_MSS_VPD_MT_MC_DQ_ACBOOST_WR_DOWN, &mt_conf[94], 4); + memcpy(&ATTR_MSS_VPD_MT_MC_DQ_ACBOOST_WR_UP, &mt_conf[102], 4); + + memcpy(&ATTR_MSS_VPD_MT_MC_DQ_CTLE_CAP, &mt_conf[110], 8); + memcpy(&ATTR_MSS_VPD_MT_MC_DQ_CTLE_RES, &mt_conf[126], 8); + + ATTR_MSS_VPD_MT_MC_DRV_IMP_CLK = mt_conf[142]; + ATTR_MSS_VPD_MT_MC_DRV_IMP_CMD_ADDR = mt_conf[144]; + ATTR_MSS_VPD_MT_MC_DRV_IMP_CNTL = mt_conf[146]; + ATTR_MSS_VPD_MT_MC_DRV_IMP_CSCID = mt_conf[148]; + + ATTR_MSS_VPD_MT_MC_DRV_IMP_DQ_DQS = mt_conf[150]; + ATTR_MSS_VPD_MT_MC_RCV_IMP_DQ_DQS = mt_conf[160]; + + memcpy(&ATTR_MSS_VPD_MT_PREAMBLE, &mt_conf[202], 2); + + memcpy(&ATTR_MSS_VPD_MT_WINDAGE_RD_CTR, &mt_conf[214], 2); + } + break; + case 1: + memcpy(&ATTR_MSS_VPD_MT_ODT_RD[vpd_idx][0][0], &mt_conf[172], 2); + memcpy(&ATTR_MSS_VPD_MT_ODT_RD[vpd_idx][1][0], &mt_conf[176], 2); + memcpy(&ATTR_MSS_VPD_MT_ODT_WR[vpd_idx][0][0], &mt_conf[188], 2); + memcpy(&ATTR_MSS_VPD_MT_ODT_WR[vpd_idx][1][0], &mt_conf[192], 2); + ATTR_MSS_VPD_MT_VREF_DRAM_WR[vpd_idx] = mt_conf[206]; + memcpy(&ATTR_MSS_VPD_MT_VREF_MC_RD[vpd_idx], &mt_conf[208], 4); + + /* The following data is the same for all configurations */ + if (vpd_idx == 0) { + memcpy(&ATTR_MSS_VPD_MT_MC_DQ_ACBOOST_RD_UP, &mt_conf[88], 4); + memcpy(&ATTR_MSS_VPD_MT_MC_DQ_ACBOOST_WR_DOWN, &mt_conf[96], 4); + memcpy(&ATTR_MSS_VPD_MT_MC_DQ_ACBOOST_WR_UP, &mt_conf[104], 4); + + memcpy(&ATTR_MSS_VPD_MT_MC_DQ_CTLE_CAP, &mt_conf[112], 8); + memcpy(&ATTR_MSS_VPD_MT_MC_DQ_CTLE_RES, &mt_conf[128], 8); + + ATTR_MSS_VPD_MT_MC_DRV_IMP_CLK = mt_conf[144]; + ATTR_MSS_VPD_MT_MC_DRV_IMP_CMD_ADDR = mt_conf[146]; + ATTR_MSS_VPD_MT_MC_DRV_IMP_CNTL = mt_conf[148]; + ATTR_MSS_VPD_MT_MC_DRV_IMP_CSCID = mt_conf[150]; + + ATTR_MSS_VPD_MT_MC_DRV_IMP_DQ_DQS = mt_conf[152]; + ATTR_MSS_VPD_MT_MC_RCV_IMP_DQ_DQS = mt_conf[162]; + + ATTR_MSS_VPD_MT_PREAMBLE = mt_conf[204]; + + memcpy(&ATTR_MSS_VPD_MT_WINDAGE_RD_CTR, &mt_conf[216], 2); + } + break; + } + + /* The following data is the same for all configurations */ + if (vpd_idx == 0) + ATTR_MSS_VPD_MT_DRAM_DRV_IMP_DQ_DQS = mt_conf[22]; +} + +static void load_mt(const uint8_t *vpd_data) +{ + int vpd_idx = 0; + + /* Assuming that data differs only per DIMM pairs */ + for (vpd_idx = 0; vpd_idx < 4; ++vpd_idx) { + const int dimm0_rank = 1 + vpd_idx / 2; + const int dimm1_rank = (vpd_idx % 2 ? dimm0_rank : 0); + + struct vpd_info vpd = { + .data = vpd_data, + .mcs_i = 0, + .freq = 1866, + .dimm0_rank = dimm0_rank, + .dimm1_rank = dimm1_rank, + }; + + const uint8_t *mt_conf = NULL; + size_t size = 0; + + mt_conf = find_vpd_conf(&vpd, "MT", &size); + if (!mt_conf) + die("Failed to read MT configuration!\n"); + + load_mt_attrs(mt_conf, size, vpd_idx); + } +} + +static void load_mr_attrs(const uint8_t *mr_conf, size_t size, int vpd_idx) +{ + uint8_t version_layout; + uint8_t version_data; + + if (size < 2) + die("MR configuration is way too small!\n"); + + version_layout = mr_conf[0]; + version_data = mr_conf[1]; + + if (version_layout != 0) + die("Unsupported layout of MR configuration!\n"); + + if (size < 101) + die("MR configuration is smaller than expected!\n"); + + /* The following data is the same for all configurations */ + if (vpd_idx == 0) { + ATTR_MSS_VPD_MR_DPHY_GPO = mr_conf[6]; + ATTR_MSS_VPD_MR_DPHY_RLO = mr_conf[8]; + ATTR_MSS_VPD_MR_DPHY_WLO = mr_conf[10]; + + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_D0_CLKN, &mr_conf[58], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_D0_CLKP, &mr_conf[56], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_D1_CLKN, &mr_conf[62], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_D1_CLKP, &mr_conf[60], 2); + + ATTR_MSS_VPD_MR_MC_2N_MODE_AUTOSET = mr_conf[98]; + } + + /* The following data changes per frequency */ + if (vpd_idx % 8 == 0) { + const int freq_i = vpd_idx / 8; + ATTR_MSS_VPD_MR_TSYS_ADR[freq_i] = mr_conf[99]; + ATTR_MSS_VPD_MR_TSYS_DATA[freq_i] = mr_conf[100]; + } + + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A00[vpd_idx], &mr_conf[12], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A01[vpd_idx], &mr_conf[14], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A02[vpd_idx], &mr_conf[16], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A03[vpd_idx], &mr_conf[18], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A04[vpd_idx], &mr_conf[20], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A05[vpd_idx], &mr_conf[22], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A06[vpd_idx], &mr_conf[24], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A07[vpd_idx], &mr_conf[26], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A08[vpd_idx], &mr_conf[28], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A09[vpd_idx], &mr_conf[30], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A10[vpd_idx], &mr_conf[32], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A11[vpd_idx], &mr_conf[34], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A12[vpd_idx], &mr_conf[36], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A13[vpd_idx], &mr_conf[38], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_A17[vpd_idx], &mr_conf[40], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_BA0[vpd_idx], &mr_conf[42], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_BA1[vpd_idx], &mr_conf[44], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_BG0[vpd_idx], &mr_conf[46], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_BG1[vpd_idx], &mr_conf[48], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_C0[vpd_idx], &mr_conf[50], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_C1[vpd_idx], &mr_conf[52], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_ADDR_C2[vpd_idx], &mr_conf[54], 2); + + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_ACTN[vpd_idx], &mr_conf[64], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_ADDR_CASN_A15[vpd_idx], &mr_conf[66], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_ADDR_RASN_A16[vpd_idx], &mr_conf[68], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_ADDR_WEN_A14[vpd_idx], &mr_conf[70], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CMD_PAR[vpd_idx], &mr_conf[72], 2); + + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_CKE0[vpd_idx], &mr_conf[74], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_CKE1[vpd_idx], &mr_conf[76], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_CSN0[vpd_idx], &mr_conf[82], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_CSN1[vpd_idx], &mr_conf[84], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_ODT0[vpd_idx], &mr_conf[90], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D0_ODT1[vpd_idx], &mr_conf[92], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_CKE0[vpd_idx], &mr_conf[78], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_CKE1[vpd_idx], &mr_conf[80], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_CSN0[vpd_idx], &mr_conf[86], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_CSN1[vpd_idx], &mr_conf[88], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_ODT0[vpd_idx], &mr_conf[94], 2); + memcpy(ATTR_MSS_VPD_MR_MC_PHASE_ROT_CNTL_D1_ODT1[vpd_idx], &mr_conf[96], 2); +} + +static void load_mr(const uint8_t *vpd_data) +{ + const int freqs[] = { 1866, 2133, 2400, 2666 }; + int vpd_idx = 0; + + /* Index matches indexing of ATTR_MSS_VPD_MR_MC_PHASE_ROT_* data */ + for (vpd_idx = 0; vpd_idx < 28; ++vpd_idx) { + const int freq = freqs[vpd_idx / 8]; + const int mcs_i = vpd_idx % 4; + const int dimm1_rank = (vpd_idx % 8 >= 4 ? 1 : 0); + + struct vpd_info v = { + .data = vpd_data, + .mcs_i = mcs_i, + .freq = freq, + /* Configurations differ only per DIMM presence */ + .dimm0_rank = 1, + .dimm1_rank = dimm1_rank, + }; + + const uint8_t *mr_conf = NULL; + size_t size = 0; + + mr_conf = find_vpd_conf(&v, "MR", &size); + if (!mr_conf) + die("Failed to read MT configuration!\n"); + + load_mr_attrs(mr_conf, size, vpd_idx); + } +} + +static void load_vpd_attrs(const uint8_t *vpd_data) +{ + load_mr(vpd_data); + load_mt(vpd_data); +} + +void vpd_pnor_main(void) +{ + const struct region_device *memd_device = NULL; + + uint8_t buf[MEMD_HDR_SIZE]; + struct memd_hdr *hdr_memd = (struct memd_hdr *)buf; + + const uint8_t *vpd_data = NULL; + size_t vpd_size = 0; + + memd_device_init(); + memd_device = memd_device_ro(); + + /* Copy all header at once */ + if (rdev_readat(memd_device, buf, 0, sizeof(buf)) != sizeof(buf)) + die("Failed to read MEMD header!\n"); + + if (memcmp(hdr_memd->eyecatch, "OKOK", 4)) + die("Invalid MEMD header!\n"); + if (memcmp(hdr_memd->header_version, "01.0", 4)) + die("Unsupported MEMD header version!\n"); + if (memcmp(hdr_memd->memd_version, "01.0", 4)) + die("Unsupported MEMD version!\n"); + + /* We don't loop over sections */ + if (hdr_memd->section_count != 1) + die("Failed to map VPD data!\n"); + + vpd_size = hdr_memd->section_size*MEMD_SECTION_ROUNDING_DIVISOR; + vpd_data = rdev_mmap(memd_device, MEMD_HDR_SIZE, vpd_size); + if (!vpd_data) + die("Failed to map VPD data!\n"); + + load_vpd_attrs(vpd_data); + + if (rdev_munmap(memd_device, (void *)vpd_data)) + die("Failed to unmap VPD data!\n"); + + memd_device_unmount(); +} diff --git a/src/soc/ibm/power9/wof.h b/src/soc/ibm/power9/wof.h new file mode 100644 index 00000000000..9bf4f51d5f9 --- /dev/null +++ b/src/soc/ibm/power9/wof.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __SOC_IBM_POWER9_WOF_H +#define __SOC_IBM_POWER9_WOF_H + +struct region_device; + +void wof_device_init(void); + +void wof_device_unmount(void); + +const struct region_device *wof_device_ro(void); + +#endif /* __SOC_IBM_POWER9_WOF_H */ diff --git a/src/soc/ibm/power9/xbus.h b/src/soc/ibm/power9/xbus.h new file mode 100644 index 00000000000..77cf87b694d --- /dev/null +++ b/src/soc/ibm/power9/xbus.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __SOC_IBM_POWER9_XBUS_H +#define __SOC_IBM_POWER9_XBUS_H + +#include +#include + +#define XBUS_LANE_COUNT 17 + +#define XBUS_LINK_GROUP_OFFSET 0x2000000000 + +/* Updates address that targets XBus chiplet to use a specific XBus link number. + * Does nothing to non-XBus addresses. */ +static inline uint64_t xbus_addr(uint64_t addr) +{ + enum { + XBUS_COUNT = 0x3, // number of XBus links + XBUS_LINK = 0x1, // hard-coded link number + XB_IOX_0_RING_ID = 0x3, // IOX_0 + XB_PBIOX_0_RING_ID = 0x6, // PBIOX_0 + }; + + uint8_t ring = (addr >> 10) & 0xF; + uint8_t chiplet = (addr >> 24) & 0x3F; + + if (chiplet != XB_CHIPLET_ID) + return addr; + + if (ring >= XB_IOX_0_RING_ID && ring < XB_IOX_0_RING_ID + XBUS_COUNT) + PPC_INSERT(addr, XB_IOX_0_RING_ID + XBUS_LINK, 50, 4); + else if (ring >= XB_PBIOX_0_RING_ID && ring < XB_PBIOX_0_RING_ID + XBUS_COUNT) + PPC_INSERT(addr, XB_PBIOX_0_RING_ID + XBUS_LINK, 50, 4); + + return addr; +} + +#endif /* __SOC_IBM_POWER9_XBUS_H */ diff --git a/src/soc/ibm/power9/xip.h b/src/soc/ibm/power9/xip.h new file mode 100644 index 00000000000..fb386c17daa --- /dev/null +++ b/src/soc/ibm/power9/xip.h @@ -0,0 +1,112 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __SOC_IBM_POWER9_XIP_H +#define __SOC_IBM_POWER9_XIP_H + +#define XIP_MAGIC_HW (0x5849502020204857) // "XIP HW" +#define XIP_MAGIC_SGPE (0x5849502053475045) // "XIP SGPE" +#define XIP_MAGIC_RESTORE (0x5849502052455354) // "XIP REST" + +/* All fields are big-endian */ + +struct xip_section { + uint32_t offset; + uint32_t size; + uint8_t alignment; + uint8_t dd_support; + uint8_t reserved8[2]; +}; + +/* Each XIP header holds 15 XIP sections, some of them are sometimes unused. */ +#define XIP_HEADER_COMMON_FIELDS_TOP \ + uint64_t magic; \ + uint64_t l1_addr; \ + uint64_t l2_addr; \ + uint64_t kernel_addr; \ + uint64_t link_address; \ + uint64_t reserved64[3]; \ + struct xip_section sections[5]; + +#define XIP_HEADER_COMMON_FIELDS_BOTTOM \ + uint32_t image_size; \ + /* In yyyymmdd format, e.g. 20110630, when read as decimal, not hex */ \ + uint32_t build_date; \ + /* In hhmm format, e.g. 0756 */ \ + uint32_t build_time; \ + char build_tag[20]; \ + uint8_t header_version; \ + uint8_t normalized; \ + uint8_t toc_sorted; \ + uint8_t reserved8[5]; \ + char build_user[16]; \ + char build_host[40]; \ + char reserved_char[8]; \ + +struct xip_hw_header { + XIP_HEADER_COMMON_FIELDS_TOP + struct xip_section sgpe; + struct xip_section restore; + struct xip_section cme; + struct xip_section pgpe; + struct xip_section ioppe; + struct xip_section fppe; + struct xip_section rings; + struct xip_section overlays; + struct xip_section unused[2]; /* Pad to 15 sections. */ + XIP_HEADER_COMMON_FIELDS_BOTTOM +}; + +struct xip_sgpe_header { + XIP_HEADER_COMMON_FIELDS_TOP + struct xip_section qpmr; + struct xip_section l1_bootloader; + struct xip_section l2_bootloader; + struct xip_section hcode; + struct xip_section unused[6]; /* Pad to 15 sections. */ + XIP_HEADER_COMMON_FIELDS_BOTTOM +}; + +struct xip_restore_header { + XIP_HEADER_COMMON_FIELDS_TOP + struct xip_section cpmr; + struct xip_section self; + struct xip_section unused[8]; /* Pad to 15 sections. */ + XIP_HEADER_COMMON_FIELDS_BOTTOM +}; + +struct xip_cme_header { + XIP_HEADER_COMMON_FIELDS_TOP + struct xip_section hcode; + struct xip_section unused[9]; /* Pad to 15 sections. */ + XIP_HEADER_COMMON_FIELDS_BOTTOM +}; + +struct xip_pgpe_header { + XIP_HEADER_COMMON_FIELDS_TOP + struct xip_section ppmr; + struct xip_section l1_bootloader; + struct xip_section l2_bootloader; + struct xip_section hcode; + struct xip_section aux_task; + struct xip_section unused[5]; /* Pad to 15 sections. */ + XIP_HEADER_COMMON_FIELDS_BOTTOM +}; + +#define DD_CONTAINER_MAGIC 0x4444434F // "DDCO" + +struct dd_block { + uint32_t offset; + uint32_t size; + uint8_t dd; + uint8_t reserved[3]; +}; + +struct dd_container { + uint32_t magic; + uint8_t num; + uint8_t reserved[3]; + struct dd_block blocks[0]; +}; + + +#endif /* __SOC_IBM_POWER9_XIP_H */ diff --git a/src/soc/ibm/power9/xive.c b/src/soc/ibm/power9/xive.c new file mode 100644 index 00000000000..8d6ac06e52a --- /dev/null +++ b/src/soc/ibm/power9/xive.c @@ -0,0 +1,112 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include +#include +#include +#include + +#include "homer.h" + +#define CODE_SIZE(x) ((x ## _end) - (x)) + +extern uint8_t sys_reset_int[]; +extern uint8_t sys_reset_int_end[]; +extern uint8_t ext_int[]; +extern uint8_t ext_int_end[]; +extern uint8_t hyp_virt_int[]; +extern uint8_t hyp_virt_int_end[]; + +#define IVPE_BAR 0x0006020000000000 +#define FSP_BAR 0x0006030100000000 +#define PSI_HB_BAR 0x0006030203000000 +#define PSI_HB_ESB_BAR 0x00060302031C0000 +#define XIVE_IC_BAR 0x0006030203100000 + +/* + * XIVE is not officially documented anywhere. There are bits and pieces that + * can be put together in [KVM] and [QEMU], but those are mostly about using + * XIVE for virtualization, not bare metal. Code below was ported from Hostboot + * and probably this is the best that we can hope for without documentation. + * + * [KVM] https://www.kernel.org/doc/html/latest/virt/kvm/devices/xive.html + * [QEMU] https://qemu.readthedocs.io/en/latest/specs/ppc-xive.html + */ +void configure_xive(int core) +{ + uint64_t tmp; + + /* Install handlers */ + memcpy((void *)0x100, sys_reset_int, CODE_SIZE(sys_reset_int)); + memcpy((void *)0x500, ext_int, CODE_SIZE(ext_int)); + memcpy((void *)0xEA0, hyp_virt_int, CODE_SIZE(hyp_virt_int)); + + /* IVPE BAR + enable bit */ + write_scom(0, 0x05013012, IVPE_BAR | PPC_BIT(0)); + + /* FSP BAR */ + write_scom(0, 0x0501290B, FSP_BAR); + + /* PSI HB BAR + enable bit */ + /* TODO: check if 2 separate writes are required */ + write_scom(0, 0x0501290A, PSI_HB_BAR); + write_scom(0, 0x0501290A, PSI_HB_BAR | PPC_BIT(63)); + + /* Disable VPC Pull error */ + scom_and(0, 0x05013179, ~PPC_BIT(30)); + + /* PSI HB ESB BAR + enable bit */ + /* TODO: check if 2 separate writes are required */ + write_scom(0, 0x05012916, PSI_HB_ESB_BAR); + write_scom(0, 0x05012916, PSI_HB_ESB_BAR | PPC_BIT(63)); + + /* XIVE IC BAR + enable bit */ + write_scom(0, 0x05013010, XIVE_IC_BAR | PPC_BIT(0)); + + /* Set HB mode on P3PC register */ + scom_or(0, 0x05013110, PPC_BIT(33)); + + /* Disable PSI interrupts */ + write_scom(0, 0x05012913, PPC_BIT(3)); + + void *esb_bar = (void *)PSI_HB_ESB_BAR; + /* Mask all interrupt sources */ + for (int i = 0; i < 14; i++) { + tmp = read64(esb_bar + i*0x1000 + 0xD00); + eieio(); + tmp = read64(esb_bar + i*0x1000 + 0x800); + assert(tmp == 1); + } + + /* Route interrupts to CEC (whatever that is) instead of FSP */ + void *hb_bar = (void *)PSI_HB_BAR; + write64(hb_bar + 0x20, read64(hb_bar + 0x20) | PPC_BIT(3)); + + /* Enable PSIHB interrupts */ + write64(hb_bar + 0x58, read64(hb_bar + 0x58) | PPC_BIT(0)); + + /* Route interrupts to first thread of active core */ + int offset = (core < 16) ? 0x48 : 0x68; + void *xive_ic_bar = (void *)XIVE_IC_BAR; + write64(xive_ic_bar + 0x400 + offset, PPC_BIT(4 * (core % 16))); + eieio(); + + /* Configure LSI mode for HB CEC interrupts */ + void *ivpe_bar = (void *)IVPE_BAR; + write8(ivpe_bar + 0x38, 0x81); + eieio(); + + /* Route LSI to master processor */ + /* TODO: check if 2 separate writes are required */ + write64(hb_bar + 0x68, 0x0006030203102000); + write64(hb_bar + 0x68, 0x0006030203102001); + write64(hb_bar + 0x58, 0); + + /* Enable LSI interrupts */ + tmp = read64(xive_ic_bar + 0x3000 + 0xC00); + + /* Unmask PSU interrupts */ + tmp = read64(esb_bar + 0xD*0x1000 + 0xC00); + eieio(); + tmp = read64(esb_bar + 0xD*0x1000 + 0x800); + assert(tmp == 0); +} diff --git a/src/soc/ibm/power9/xscom.c b/src/soc/ibm/power9/xscom.c new file mode 100644 index 00000000000..0358abc487d --- /dev/null +++ b/src/soc/ibm/power9/xscom.c @@ -0,0 +1,188 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include +#include // HMER +#include + +#define MMIO_GROUP0_CHIP0_SCOM_BASE_ADDR 0x800603FC00000000 +#define MMIO_GROUP_SIZE 0x0000200000000000 + +#define XSCOM_ADDR_IND_FLAG PPC_BIT(0) +#define XSCOM_DATA_IND_FORM1 PPC_BIT(3) +#define XSCOM_ADDR_IND_ADDR PPC_BITMASK(11, 31) +#define XSCOM_ADDR_IND_DATA PPC_BITMASK(48, 63) + +#define XSCOM_DATA_IND_READ PPC_BIT(0) +#define XSCOM_DATA_IND_COMPLETE PPC_BIT(32) +#define XSCOM_DATA_IND_ERR PPC_BITMASK(33, 35) +#define XSCOM_DATA_IND_DATA PPC_BITMASK(48, 63) +#define XSCOM_DATA_IND_FORM1_DATA PPC_BITMASK(12, 63) +#define XSCOM_IND_MAX_RETRIES 10 + +#define XSCOM_IND_FORM1_ADDR PPC_BITMASK(32, 63) +#define XSCOM_IND_FORM1_DATA_FROM_ADDR PPC_BITMASK(0, 11) +#define XSCOM_IND_FORM1_DATA_IN_ADDR PPC_BITMASK(20, 31) + +#define XSCOM_RCVED_STAT_REG 0x00090018 +#define XSCOM_LOG_REG 0x00090012 +#define XSCOM_ERR_REG 0x00090013 + +static void write_xscom_direct(uint8_t chip, uint64_t reg_address, uint64_t data); + +static void reset_xscom_engine(uint8_t chip) +{ + /* + * With cross-CPU SCOM accesses, first register should be cleared on the + * executing CPU, the other two on target CPU. In that case it may be + * necessary to do the remote writes in assembly directly to skip checking + * HMER and possibly end in a loop. + */ + write_xscom_direct(0, XSCOM_RCVED_STAT_REG, 0); + write_xscom_direct(chip, XSCOM_LOG_REG, 0); + write_xscom_direct(chip, XSCOM_ERR_REG, 0); + clear_hmer(); + eieio(); +} + +static uint64_t read_xscom_direct(uint8_t chip, uint64_t reg_address) +{ + uint64_t val; + uint64_t hmer = 0; + do { + /* + * Clearing HMER on every SCOM access seems to slow down CCS up + * to a point where it starts hitting timeout on "less ideal" + * DIMMs for write centering. Clear it only if this do...while + * executes more than once. + */ + if ((hmer & SPR_HMER_XSCOM_STATUS) == SPR_HMER_XSCOM_OCCUPIED) + clear_hmer(); + + eieio(); + asm volatile( + "ldcix %0, %1, %2" : + "=r"(val) : + "b"(MMIO_GROUP0_CHIP0_SCOM_BASE_ADDR + chip * MMIO_GROUP_SIZE), + "r"(reg_address << 3)); + eieio(); + hmer = read_hmer(); + } while ((hmer & SPR_HMER_XSCOM_STATUS) == SPR_HMER_XSCOM_OCCUPIED); + + if (hmer & SPR_HMER_XSCOM_STATUS) { + reset_xscom_engine(chip); + /* + * All F's are returned in case of error, but code polls for a set bit + * after changes that can make such error appear (e.g. clock settings). + * Return 0 so caller won't have to test for all F's in that case. + */ + return 0; + } + return val; +} + +static void write_xscom_direct(uint8_t chip, uint64_t reg_address, uint64_t data) +{ + uint64_t hmer = 0; + do { + /* See comment in read_xscom_direct() */ + if ((hmer & SPR_HMER_XSCOM_STATUS) == SPR_HMER_XSCOM_OCCUPIED) + clear_hmer(); + + eieio(); + asm volatile( + "stdcix %0, %1, %2":: + "r"(data), + "b"(MMIO_GROUP0_CHIP0_SCOM_BASE_ADDR + chip * MMIO_GROUP_SIZE ), + "r"(reg_address << 3)); + eieio(); + hmer = read_hmer(); + } while ((hmer & SPR_HMER_XSCOM_STATUS) == SPR_HMER_XSCOM_OCCUPIED); + + if (hmer & SPR_HMER_XSCOM_STATUS) + reset_xscom_engine(chip); +} + +static void write_xscom_indirect_form0(uint8_t chip, uint64_t reg_address, uint64_t value) +{ + uint64_t addr; + uint64_t data; + addr = reg_address & 0x7FFFFFFF; + data = reg_address & XSCOM_ADDR_IND_ADDR; + data |= value & XSCOM_ADDR_IND_DATA; + + write_xscom_direct(chip, addr, data); + + for (int retries = 0; retries < XSCOM_IND_MAX_RETRIES; ++retries) { + data = read_xscom_direct(chip, addr); + if((data & XSCOM_DATA_IND_COMPLETE) && ((data & XSCOM_DATA_IND_ERR) == 0)) { + return; + } + else if(data & XSCOM_DATA_IND_COMPLETE) { + printk(BIOS_EMERG, "SCOM WR error %16.16llx = %16.16llx : %16.16llx\n", + reg_address, value, data); + } + // TODO: delay? + } +} + +static void write_xscom_indirect_form1(uint8_t chip, uint64_t reg_address, uint64_t value) +{ + uint64_t addr; + uint64_t data; + + if (value & XSCOM_IND_FORM1_DATA_FROM_ADDR) + die("Value for form 1 indirect SCOM must have bits 0-11 zeroed!"); + + data = value | ((reg_address & XSCOM_IND_FORM1_DATA_IN_ADDR) << 20); + addr = reg_address & XSCOM_IND_FORM1_ADDR; + + write_xscom_direct(chip, addr, data); +} + +static uint64_t read_xscom_indirect_form0(uint8_t chip, uint64_t reg_address) +{ + uint64_t addr; + uint64_t data; + addr = reg_address & 0x7FFFFFFF; + data = XSCOM_DATA_IND_READ | (reg_address & XSCOM_ADDR_IND_ADDR); + + write_xscom_direct(chip, addr, data); + + for (int retries = 0; retries < XSCOM_IND_MAX_RETRIES; ++retries) { + data = read_xscom_direct(chip, addr); + if((data & XSCOM_DATA_IND_COMPLETE) && ((data & XSCOM_DATA_IND_ERR) == 0)) { + break; + } + else if(data & XSCOM_DATA_IND_COMPLETE) { + printk(BIOS_EMERG, "SCOM RD error %16.16llx : %16.16llx\n", + reg_address, data); + } + // TODO: delay? + } + + return data & XSCOM_DATA_IND_DATA; +} + +/* Private API used only by SCOM dispatcher, no need to expose it */ +void write_xscom(uint8_t chip, uint64_t addr, uint64_t data); +uint64_t read_xscom(uint8_t chip, uint64_t addr); + +void write_xscom(uint8_t chip, uint64_t addr, uint64_t data) +{ + if (!(addr & XSCOM_ADDR_IND_FLAG)) + write_xscom_direct(chip, addr, data); + else if (!(addr & XSCOM_DATA_IND_FORM1)) + write_xscom_indirect_form0(chip, addr, data); + else + write_xscom_indirect_form1(chip, addr, data); +} + +uint64_t read_xscom(uint8_t chip, uint64_t addr) +{ + if (!(addr & XSCOM_ADDR_IND_FLAG)) + return read_xscom_direct(chip, addr); + else if (!(addr & XSCOM_DATA_IND_FORM1)) + return read_xscom_indirect_form0(chip, addr); + else + die("Form 1 indirect SCOM does not have a read operation!"); +} diff --git a/src/soc/mediatek/mt8173/memlayout.ld b/src/soc/mediatek/mt8173/memlayout.ld index 092cfdf2bf4..8dce4284de9 100644 --- a/src/soc/mediatek/mt8173/memlayout.ld +++ b/src/soc/mediatek/mt8173/memlayout.ld @@ -26,7 +26,7 @@ SECTIONS SRAM_START(0x00100000) VBOOT2_WORK(0x00100000, 12K) - TPM_TCPA_LOG(0x00103000, 2K) + TPM_LOG(0x00103000, 2K) FMAP_CACHE(0x00103800, 2K) PRERAM_CBMEM_CONSOLE(0x00104000, 12K) WATCHDOG_TOMBSTONE(0x00107000, 4) diff --git a/src/soc/mediatek/mt8183/memlayout.ld b/src/soc/mediatek/mt8183/memlayout.ld index 0acd174c84f..390842693f5 100644 --- a/src/soc/mediatek/mt8183/memlayout.ld +++ b/src/soc/mediatek/mt8183/memlayout.ld @@ -23,7 +23,7 @@ SECTIONS { SRAM_START(0x00100000) VBOOT2_WORK(0x00100000, 12K) - TPM_TCPA_LOG(0x00103000, 2K) + TPM_LOG(0x00103000, 2K) FMAP_CACHE(0x00103800, 2K) WATCHDOG_TOMBSTONE(0x00104000, 4) PRERAM_CBMEM_CONSOLE(0x00104004, 63K - 4) diff --git a/src/soc/mediatek/mt8186/include/soc/memlayout.ld b/src/soc/mediatek/mt8186/include/soc/memlayout.ld index 1764632f291..f8bb0fa898e 100644 --- a/src/soc/mediatek/mt8186/include/soc/memlayout.ld +++ b/src/soc/mediatek/mt8186/include/soc/memlayout.ld @@ -31,7 +31,7 @@ SECTIONS /* EMPTY(0x0010a804, 1K - 4) */ /* Regions that can also be moved to SRAM_L2C. */ TIMESTAMP(0x0010ac00, 1K) - TPM_TCPA_LOG(0x0010b000, 2K) + TPM_LOG(0x0010b000, 2K) FMAP_CACHE(0x0010b800, 2K) CBFS_MCACHE(0x0010c000, 16K) SRAM_END(0x00110000) diff --git a/src/soc/mediatek/mt8188/include/soc/memlayout.ld b/src/soc/mediatek/mt8188/include/soc/memlayout.ld index dc4090d74ba..8d1f2bde650 100644 --- a/src/soc/mediatek/mt8188/include/soc/memlayout.ld +++ b/src/soc/mediatek/mt8188/include/soc/memlayout.ld @@ -34,7 +34,7 @@ SECTIONS CBFS_MCACHE(0x00120000, 16k) VBOOT2_WORK(0x00124000, 12K) FMAP_CACHE(0x00127000, 2k) - TPM_TCPA_LOG(0x00127800, 2k) + TPM_LOG(0x00127800, 2k) TIMESTAMP(0x00128000, 1k) /* End of regions that can also be moved to SRAM_L2C. */ /* EMPTY(0x00128400, 31K) */ diff --git a/src/soc/mediatek/mt8192/include/soc/memlayout.ld b/src/soc/mediatek/mt8192/include/soc/memlayout.ld index 150bfdde788..6c238c7d8fc 100644 --- a/src/soc/mediatek/mt8192/include/soc/memlayout.ld +++ b/src/soc/mediatek/mt8192/include/soc/memlayout.ld @@ -23,7 +23,7 @@ SECTIONS { SRAM_START(0x00100000) VBOOT2_WORK(0x00100000, 12K) - TPM_TCPA_LOG(0x00103000, 2K) + TPM_LOG(0x00103000, 2K) FMAP_CACHE(0x00103800, 2K) WATCHDOG_TOMBSTONE(0x00104000, 4) CBFS_MCACHE(0x00107c00, 8K) diff --git a/src/soc/mediatek/mt8195/include/soc/memlayout.ld b/src/soc/mediatek/mt8195/include/soc/memlayout.ld index e8b51d24047..8b8463716fc 100644 --- a/src/soc/mediatek/mt8195/include/soc/memlayout.ld +++ b/src/soc/mediatek/mt8195/include/soc/memlayout.ld @@ -26,7 +26,7 @@ SECTIONS { SRAM_START(0x00100000) VBOOT2_WORK(0x00100000, 12K) - TPM_TCPA_LOG(0x00103000, 2K) + TPM_LOG(0x00103000, 2K) FMAP_CACHE(0x00103800, 2K) WATCHDOG_TOMBSTONE(0x00104000, 4) EARLY_INIT(0x00104010, 128) diff --git a/src/soc/nvidia/tegra124/memlayout.ld b/src/soc/nvidia/tegra124/memlayout.ld index 68c70c10545..ed386f1fdc7 100644 --- a/src/soc/nvidia/tegra124/memlayout.ld +++ b/src/soc/nvidia/tegra124/memlayout.ld @@ -19,7 +19,7 @@ SECTIONS CBFS_MCACHE(0x40006000, 8K) PRERAM_CBFS_CACHE(0x40008000, 6K) VBOOT2_WORK(0x40009800, 12K) - TPM_TCPA_LOG(0x4000D800, 2K) + TPM_LOG(0x4000D800, 2K) STACK(0x4000E000, 8K) BOOTBLOCK(0x40010000, 32K) VERSTAGE(0x40018000, 70K) diff --git a/src/soc/nvidia/tegra210/memlayout.ld b/src/soc/nvidia/tegra210/memlayout.ld index 42f21646447..4898fc14696 100644 --- a/src/soc/nvidia/tegra210/memlayout.ld +++ b/src/soc/nvidia/tegra210/memlayout.ld @@ -20,7 +20,7 @@ SECTIONS PRERAM_CBFS_CACHE(0x40001000, 20K) CBFS_MCACHE(0x40006000, 8K) VBOOT2_WORK(0x40008000, 12K) - TPM_TCPA_LOG(0x4000B000, 2K) + TPM_LOG(0x4000B000, 2K) #if ENV_ARM64 STACK(0x4000B800, 3K) #else /* AVP gets a separate stack to avoid any chance of handoff races. */ diff --git a/src/soc/qualcomm/sc7180/memlayout.ld b/src/soc/qualcomm/sc7180/memlayout.ld index 938f3e1e422..e956c647ff5 100644 --- a/src/soc/qualcomm/sc7180/memlayout.ld +++ b/src/soc/qualcomm/sc7180/memlayout.ld @@ -32,7 +32,7 @@ SECTIONS REGION(pbl_timestamps, 0x14800000, 83K, 4K) WATCHDOG_TOMBSTONE(0x14814FFC, 4) BOOTBLOCK(0x14815000, 48K) - TPM_TCPA_LOG(0x14821000, 2K) + TPM_LOG(0x14821000, 2K) PRERAM_CBFS_CACHE(0x14821800, 60K) PRERAM_CBMEM_CONSOLE(0x14830800, 32K) TIMESTAMP(0x14838800, 1K) diff --git a/src/soc/samsung/exynos5250/memlayout.ld b/src/soc/samsung/exynos5250/memlayout.ld index eec9f60a91f..142a8924b06 100644 --- a/src/soc/samsung/exynos5250/memlayout.ld +++ b/src/soc/samsung/exynos5250/memlayout.ld @@ -21,7 +21,7 @@ SECTIONS PRERAM_CBFS_CACHE(0x205C000, 68K) CBFS_MCACHE(0x206D000, 8K) FMAP_CACHE(0x206F000, 2K) - TPM_TCPA_LOG(0x206F800, 2K) + TPM_LOG(0x206F800, 2K) VBOOT2_WORK(0x2070000, 12K) STACK(0x2074000, 16K) SRAM_END(0x2078000) diff --git a/src/vendorcode/Makefile.inc b/src/vendorcode/Makefile.inc index 36a13bb6f67..9cc4f2903a0 100644 --- a/src/vendorcode/Makefile.inc +++ b/src/vendorcode/Makefile.inc @@ -5,3 +5,4 @@ subdirs-y += siemens subdirs-y += cavium subdirs-y += eltan subdirs-y += mediatek +subdirs-y += ibm diff --git a/src/vendorcode/eltan/security/mboot/mboot.c b/src/vendorcode/eltan/security/mboot/mboot.c index 575c5fc0220..50ca0256f39 100644 --- a/src/vendorcode/eltan/security/mboot/mboot.c +++ b/src/vendorcode/eltan/security/mboot/mboot.c @@ -136,7 +136,8 @@ int mboot_hash_extend_log(uint64_t flags, uint8_t *hashData, uint32_t hashDataLe printk(BIOS_DEBUG, "%s: SHA256 Hash Digest:\n", __func__); mboot_print_buffer(digest->digest.sha256, VB2_SHA256_DIGEST_SIZE); - return (tlcl_extend(newEventHdr->pcrIndex, (uint8_t *)&(newEventHdr->digest), NULL)); + return (tlcl_extend(newEventHdr->pcrIndex, (uint8_t *)&(newEventHdr->digest), + VB2_HASH_SHA256)); } /* diff --git a/src/vendorcode/ibm/Makefile.inc b/src/vendorcode/ibm/Makefile.inc new file mode 100644 index 00000000000..ecf3cedab77 --- /dev/null +++ b/src/vendorcode/ibm/Makefile.inc @@ -0,0 +1,3 @@ +## SPDX-License-Identifier: GPL-2.0-only + +subdirs-$(CONFIG_CPU_IBM_POWER9) += power9 diff --git a/src/vendorcode/ibm/power9/Makefile.inc b/src/vendorcode/ibm/power9/Makefile.inc new file mode 100644 index 00000000000..50ff5a4fa53 --- /dev/null +++ b/src/vendorcode/ibm/power9/Makefile.inc @@ -0,0 +1,3 @@ +## SPDX-License-Identifier: GPL-2.0-only + +ramstage-y += rs4.c diff --git a/src/vendorcode/ibm/power9/README.md b/src/vendorcode/ibm/power9/README.md new file mode 100644 index 00000000000..6dda50360d9 --- /dev/null +++ b/src/vendorcode/ibm/power9/README.md @@ -0,0 +1,10 @@ +RS4 implementation is copied from HostBoot mostly as is: + * some renaming + * removal of unused functions + * updates to make things work outside of HostBoot + +URLs for files that served as prototypes: + * `rs4.c`: + https://git.raptorcs.com/git/talos-hostboot/tree/src/import/chips/p9/utils/imageProcs/p9_scan_compression.C + * `rs4.h`: + https://git.raptorcs.com/git/talos-hostboot/tree/src/import/chips/p9/utils/imageProcs/p9_scan_compression.H diff --git a/src/vendorcode/ibm/power9/pstates/README.md b/src/vendorcode/ibm/power9/pstates/README.md new file mode 100644 index 00000000000..01992f68871 --- /dev/null +++ b/src/vendorcode/ibm/power9/pstates/README.md @@ -0,0 +1,28 @@ +Files in this directory come from talos-hostboot repo, commit a2ddbf3 [1]. No +changes were made, other than: + * converting `#include <...>` to `#include "..."` + * commenting out some #include directives to not bring those headers + * using a differently named constant for maximum number of quads + +In some cases units mentioned in comments in the bigger structure are different +than in comments above internal structure definitions and field names. An +example of such difference is VpdOperatingPoint, defined in p9_pstates_common.h, +where "voltages are specified in units of 1mV, and characterization currents are +specified in units of 100mA", which is consistent with its fields names. When +this structure is used in other structures (each other file uses it), unit for +voltage becomes 5mV, and for currents - 500mA. + +Another issue is poundW_data - it doesn't have 'packed' attribute, but it is +packed in MVPD. Additional fields were added at some point [2], with a comment +that additional reserved field was added to keep the size the same. The problem +is that new field (the important one, not reserved) was added in the middle of +the structure, between uint64 that is by default naturally aligned to 8B, and +that is the biggest alignment used in that structure. Modifying anything after +that field won't help in keeping the size the same as before. Luckily, offsets +to all of the non-reserved fields are proper. sizeof() or an array of this type +cannot be used. + +There may be other inconsistencies, be advised. + +[1]: https://git.raptorcs.com/git/talos-hostboot/tree/src/import/chips/p9/procedures/hwp/lib?id=a2ddbf3150e2c02ccc904b25d6650c9932a8a841 +[2]: https://git.raptorcs.com/git/talos-hostboot/commit/src/import/chips/p9/procedures/hwp/lib/p9_pstates_cmeqm.h?id=2ab88987e5fed942b71b757e0c2972adee5b8e1b diff --git a/src/vendorcode/ibm/power9/pstates/p9_pstates_cmeqm.h b/src/vendorcode/ibm/power9/pstates/p9_pstates_cmeqm.h new file mode 100644 index 00000000000..0f8deee8fb8 --- /dev/null +++ b/src/vendorcode/ibm/power9/pstates/p9_pstates_cmeqm.h @@ -0,0 +1,357 @@ +/* IBM_PROLOG_BEGIN_TAG */ +/* This is an automatically generated prolog. */ +/* */ +/* $Source: src/import/chips/p9/procedures/hwp/lib/p9_pstates_cmeqm.h $ */ +/* */ +/* OpenPOWER HostBoot Project */ +/* */ +/* Contributors Listed Below - COPYRIGHT 2015,2019 */ +/* [+] International Business Machines Corp. */ +/* */ +/* */ +/* Licensed under the Apache License, Version 2.0 (the "License"); */ +/* you may not use this file except in compliance with the License. */ +/* You may obtain a copy of the License at */ +/* */ +/* http://www.apache.org/licenses/LICENSE-2.0 */ +/* */ +/* Unless required by applicable law or agreed to in writing, software */ +/* distributed under the License is distributed on an "AS IS" BASIS, */ +/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */ +/* implied. See the License for the specific language governing */ +/* permissions and limitations under the License. */ +/* */ +/* IBM_PROLOG_END_TAG */ +/// @file p9_pstates_cmeqm.h +/// @brief Pstate structures and support routines for CME Hcode +/// +// *HWP HW Owner : Rahul Batra +// *HWP HW Owner : Michael Floyd +// *HWP Team : PM +// *HWP Level : 1 +// *HWP Consumed by : CME:PGPE + +#ifndef __P9_PSTATES_CME_H__ +#define __P9_PSTATES_CME_H__ + +#include "p9_pstates_common.h" +//#include + + +/// @} + +#ifndef __ASSEMBLER__ +#ifdef __cplusplus +extern "C" { +#endif + +#include + +/// LocalParmsBlock Magic Number +/// +/// This magic number identifies a particular version of the +/// PstateParmsBlock and its substructures. The version number should be +/// kept up to date as changes are made to the layout or contents of the +/// structure. + +#define LOCAL_PARMSBLOCK_MAGIC 0x434d455050423030ull /* CMEPPB00 */ + +/// Quad Manager Flags +/// + +typedef union +{ + uint16_t value; + struct + { +#ifdef _BIG_ENDIAN + uint16_t resclk_enable : 1; + uint16_t ivrm_enable : 1; + uint16_t vdm_enable : 1; + uint16_t wof_enable : 1; + uint16_t dpll_dynamic_fmax_enable : 1; + uint16_t dpll_dynamic_fmin_enable : 1; + uint16_t dpll_droop_protect_enable : 1; + uint16_t reserved : 9; +#else + uint16_t reserved : 9; + uint16_t dpll_droop_protect_enable : 1; + uint16_t dpll_dynamic_fmin_enable : 1; + uint16_t dpll_dynamic_fmax_enable : 1; + uint16_t wof_enable : 1; + uint16_t vdm_enable : 1; + uint16_t ivrm_enable : 1; + uint16_t resclk_enable : 1; +#endif // _BIG_ENDIAN + } fields; + +} QuadManagerFlags; + +/// Resonant Clock Stepping Entry +/// +typedef union +{ + uint16_t value; + struct + { +#ifdef _BIG_ENDIAN + uint16_t sector_buffer : 4; + uint16_t spare1 : 1; + uint16_t pulse_enable : 1; + uint16_t pulse_mode : 2; + uint16_t resonant_switch : 4; + uint16_t spare4 : 4; +#else + uint16_t spare4 : 4; + uint16_t resonant_switch : 4; + uint16_t pulse_mode : 2; + uint16_t pulse_enable : 1; + uint16_t spare1 : 1; + uint16_t sector_buffer : 4; +#endif // _BIG_ENDIAN + } fields; + +} ResonantClockingStepEntry; + +#define RESCLK_FREQ_REGIONS 8 +#define RESCLK_STEPS 64 +#define RESCLK_L3_STEPS 4 + +typedef struct ResonantClockControl +{ + uint8_t resclk_freq[RESCLK_FREQ_REGIONS]; // Lower frequency of Resclk Regions + + uint8_t resclk_index[RESCLK_FREQ_REGIONS]; // Index into value array for the + // respective Resclk Region + + /// Array containing the transition steps + ResonantClockingStepEntry steparray[RESCLK_STEPS]; + + /// Delay between steps (in nanoseconds) + /// Maximum delay: 65.536us + uint16_t step_delay_ns; + + /// L3 Clock Stepping Array + uint8_t l3_steparray[RESCLK_L3_STEPS]; + + /// Resonant Clock Voltage Threshold (in millivolts) + /// This value is used to choose the appropriate L3 clock region setting. + uint16_t l3_threshold_mv; + +} ResonantClockingSetup; + +// #W data points (version 2) +typedef struct +{ + uint16_t ivdd_tdp_ac_current_10ma; + uint16_t ivdd_tdp_dc_current_10ma; + uint8_t vdm_overvolt_small_thresholds; + uint8_t vdm_large_extreme_thresholds; + uint8_t vdm_normal_freq_drop; // N_S and N_L Drop + uint8_t vdm_normal_freq_return; // L_S and S_N Return + uint8_t vdm_vid_compare_ivid; + uint8_t vdm_spare; +} poundw_entry_t; + +typedef struct +{ + uint16_t r_package_common; + uint16_t r_quad; + uint16_t r_core; + uint16_t r_quad_header; + uint16_t r_core_header; +} resistance_entry_t; + +typedef struct __attribute__((packed)) +{ + uint16_t r_package_common; + uint16_t r_quad; + uint16_t r_core; + uint16_t r_quad_header; + uint16_t r_core_header; + uint8_t r_vdm_cal_version; + uint8_t r_avg_min_scale_fact; + uint16_t r_undervolt_vmin_floor_limit; + uint8_t r_min_bin_protect_pc_adder; + uint8_t r_min_bin_protect_bin_adder; + uint8_t r_undervolt_allowed; + uint8_t reserve[10]; +} +resistance_entry_per_quad_t; + +typedef struct +{ + poundw_entry_t poundw[NUM_OP_POINTS]; + resistance_entry_t resistance_data; + uint8_t undervolt_tested; + uint8_t reserved; + uint64_t reserved1; + uint8_t reserved2; //This field was added to keep the size of struct same when undervolt_tested field was added +} PoundW_data; + +/// VDM/Droop Parameter Block +/// +typedef struct +{ + PoundW_data vpd_w_data; +} LP_VDMParmBlock; + +typedef struct __attribute__((packed)) +{ + uint16_t ivdd_tdp_ac_current_10ma; + uint16_t ivdd_tdp_dc_current_10ma; + uint8_t vdm_overvolt_small_thresholds; + uint8_t vdm_large_extreme_thresholds; + uint8_t vdm_normal_freq_drop; // N_S and N_L Drop + uint8_t vdm_normal_freq_return; // L_S and S_N Return + uint8_t vdm_vid_compare_per_quad[MAXIMUM_QUADS]; + uint8_t vdm_cal_state_avg_min_per_quad[MAXIMUM_QUADS]; + uint16_t vdm_cal_state_vmin; + uint8_t vdm_cal_state_avg_core_dts; + uint16_t vdm_cal_state_avg_core_current; + uint16_t vdm_spare; +} +poundw_entry_per_quad_t; + +typedef struct __attribute__((packed)) +{ + poundw_entry_per_quad_t poundw[NUM_OP_POINTS]; + resistance_entry_per_quad_t resistance_data; +} +PoundW_data_per_quad; + + +typedef struct +{ + PoundW_data_per_quad vpd_w_data; +} LP_VDMParmBlock_PerQuad; + +/// The layout of the data created by the Pstate table creation firmware for +/// comsumption by the Pstate GPE. This data will reside in the Quad +/// Power Management Region (QPMR). +/// + +/// Standard options controlling Pstate setup procedures + +/// System Power Distribution Paramenters +/// +/// Parameters set by system design that influence the power distribution +/// for a rail to the processor module. This values are typically set in the +/// system machine readable workbook and are used in the generation of the +/// Global Pstate Table. This values are carried in the Pstate SuperStructure +/// for use and/or reference by OCC firmware (eg the WOF algorithm) + + +/// IVRM Parameter Block +/// +/// @todo Major work item. Largely will seed the CME Quad Manager to perform +/// iVRM voltage calculations + +#define IVRM_ARRAY_SIZE 64 +typedef struct iVRMInfo +{ + + /// Pwidth from 0.03125 to 1.96875 in 1/32 increments at Vin=Vin_Max + uint8_t strength_lookup[IVRM_ARRAY_SIZE]; // Each entry is a six bit value, right justified + + /// Scaling factor for the Vin_Adder calculation. + uint8_t vin_multiplier[IVRM_ARRAY_SIZE]; // Each entry is from 0 to 255. + + /// Vin_Max used in Vin_Adder calculation (in millivolts) + uint16_t vin_max_mv; + + /// Delay between steps (in nanoseconds) + /// Maximum delay: 65.536us + uint16_t step_delay_ns; + + /// Stabilization delay once target voltage has been reached (in nanoseconds) + /// Maximum delay: 65.536us + uint16_t stablization_delay_ns; + + /// Deadzone (in millivolts) + /// Maximum: 255mV. If this value is 0, 50mV is assumed. + uint8_t deadzone_mv; + + /// Pad to 8B + uint8_t pad; + +} IvrmParmBlock; + +typedef uint8_t CompareVIDPoints; + +/// The layout of the data created by the Pstate table creation firmware for +/// comsumption by the CME Quad Manager. This data will reside in the Core +/// Power Management Region (CPMR). +/// +typedef struct +{ + + /// Magic Number + uint64_t magic; // the last byte of this number the structure's version. + + // QM Flags + QuadManagerFlags qmflags; + + /// Operating points + /// + /// VPD operating points are stored without load-line correction. Frequencies + /// are in MHz, voltages are specified in units of 5mV, and currents are + /// in units of 500mA. + VpdOperatingPoint operating_points[NUM_OP_POINTS]; + + /// Loadlines and Distribution values for the VDD rail + SysPowerDistParms vdd_sysparm; + + /// External Biases + /// + /// Biases applied to the VPD operating points prior to load-line correction + /// in setting the external voltages. This is used to recompute the Vin voltage + /// based on the Global Actual Pstate . + /// Values in 0.5% + VpdBias ext_biases[NUM_OP_POINTS]; + + /// Internal Biases + /// + /// Biases applied to the VPD operating points that are used for interpolation + /// in setting the internal voltages (eg Vout to the iVRMs) as part of the + /// Local Actual Pstate. + /// Values in 0.5% + VpdBias int_biases[NUM_OP_POINTS]; + + /// IVRM Data + IvrmParmBlock ivrm; + + /// Resonant Clock Grid Management Setup + ResonantClockingSetup resclk; + + /// VDM Data + LP_VDMParmBlock vdm; + + /// DPLL pstate 0 value + uint32_t dpll_pstate0_value; + + // Biased Compare VID operating points + CompareVIDPoints vid_point_set[NUM_OP_POINTS]; + + // Biased Threshold operation points + uint8_t threshold_set[NUM_OP_POINTS][NUM_THRESHOLD_POINTS]; + + //pstate-volt compare slopes + int16_t PsVIDCompSlopes[VPD_NUM_SLOPES_REGION]; + + //pstate-volt threshold slopes + int16_t PsVDMThreshSlopes[VPD_NUM_SLOPES_REGION][NUM_THRESHOLD_POINTS]; + + //Jump value operating points + uint8_t jump_value_set[NUM_OP_POINTS][NUM_JUMP_VALUES]; + + //Jump-value slopes + int16_t PsVDMJumpSlopes[VPD_NUM_SLOPES_REGION][NUM_JUMP_VALUES]; + +} LocalPstateParmBlock; + +#ifdef __cplusplus +} // end extern C +#endif +#endif /* __ASSEMBLER__ */ +#endif /* __P9_PSTATES_CME_H__ */ diff --git a/src/vendorcode/ibm/power9/pstates/p9_pstates_common.h b/src/vendorcode/ibm/power9/pstates/p9_pstates_common.h new file mode 100644 index 00000000000..ffee05153de --- /dev/null +++ b/src/vendorcode/ibm/power9/pstates/p9_pstates_common.h @@ -0,0 +1,435 @@ +/* IBM_PROLOG_BEGIN_TAG */ +/* This is an automatically generated prolog. */ +/* */ +/* $Source: src/import/chips/p9/procedures/hwp/lib/p9_pstates_common.h $ */ +/* */ +/* OpenPOWER HostBoot Project */ +/* */ +/* Contributors Listed Below - COPYRIGHT 2015,2019 */ +/* [+] International Business Machines Corp. */ +/* */ +/* */ +/* Licensed under the Apache License, Version 2.0 (the "License"); */ +/* you may not use this file except in compliance with the License. */ +/* You may obtain a copy of the License at */ +/* */ +/* http://www.apache.org/licenses/LICENSE-2.0 */ +/* */ +/* Unless required by applicable law or agreed to in writing, software */ +/* distributed under the License is distributed on an "AS IS" BASIS, */ +/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */ +/* implied. See the License for the specific language governing */ +/* permissions and limitations under the License. */ +/* */ +/* IBM_PROLOG_END_TAG */ +/// @file p9_pstates_common.h +/// @brief Common Pstate definitions +/// +// *HWP HW Owner : Rahul Batra +// *HWP HW Owner : Michael Floyd +// *HWP Team : PM +// *HWP Level : 1 +// *HWP Consumed by : PGPE:CME:HB:OCC + + +#ifndef __P9_PSTATES_COMMON_H__ +#define __P9_PSTATES_COMMON_H__ + +/// The maximum Pstate (knowing the increasing Pstates numbers represent +/// decreasing frequency) +#define PSTATE_MAX 255 + +/// The minimum Pstate (knowing the increasing Pstates numbers represent +/// decreasing frequency) +#define PSTATE_MIN 0 + + +/// Maximum number of Quads (4 cores plus associated caches) +#define MAXIMUM_QUADS 6 + +// Constants associated with VRM stepping +// @todo Determine what is needed here (eg Attribute mapping) and if any constants +// are warrented + +/// VPD #V Data from keyword (eg VPD order) + +#define NUM_JUMP_VALUES 4 +#define NUM_THRESHOLD_POINTS 4 + +// @todo RTC 181607 +// This is synchronization work-around to avoid a co-req update between CME Hcode +// and the Pstate Parameter Block. The CME uses "IDX" while these use "INDEX". +// In the future, these should be common between the two platforms. +// +// As this file is included in both platforms, the definition below can be used +// in the CME Hcode and the "IDX" versions deprecated once this file version +// is included in both platforms. +#ifndef __ASSEMBLER__ +typedef enum +{ + VDM_OVERVOLT_INDEX = 0, + VDM_SMALL_INDEX = 1, + VDM_LARGE_INDEX = 2, + VDM_XTREME_INDEX = 3 +} VDM_THRESHOLD_INDEX; + +typedef enum +{ + VDM_N_S_INDEX = 0, + VDM_N_L_INDEX = 1, + VDM_L_S_INDEX = 2, + VDM_S_N_INDEX = 3 +} VDM_JUMP_VALUE_INDEX; +#endif + +#define NUM_OP_POINTS 4 +#define VPD_PV_POWERSAVE 1 +#define VPD_PV_NOMINAL 0 +#define VPD_PV_TURBO 2 +#define VPD_PV_ULTRA 3 +#define VPD_PV_POWERBUS 4 + +#define VPD_PV_ORDER {VPD_PV_POWERSAVE, VPD_PV_NOMINAL, VPD_PV_TURBO, VPD_PV_ULTRA} +#define VPD_PV_ORDER_STR {"Nominal ","PowerSave ", "Turbo ", "UltraTurbo"} +#define VPD_THRESHOLD_ORDER_STR {"Overvolt", "Small", "Large", "Extreme" } + +/// VPD #V Operating Points (eg Natural order) +#define POWERSAVE 0 +#define NOMINAL 1 +#define TURBO 2 +#define ULTRA 3 +#define POWERBUS 4 +#define PV_OP_ORDER {POWERSAVE, NOMINAL, TURBO, ULTRA} +#define PV_OP_ORDER_STR {"PowerSave ", "Nominal ","Turbo ", "UltraTurbo"} + +#define VPD_PV_CORE_FREQ_MHZ 0 +#define VPD_PV_VDD_MV 1 +#define VPD_PV_IDD_100MA 2 +#define VPD_PV_VCS_MV 3 +#define VPD_PV_ICS_100MA 4 +#define VPD_PV_PB_FREQ_MHZ 0 +#define VPD_PV_VDN_MV 1 +#define VPD_PV_IDN_100MA 2 + +#define VPD_NUM_SLOPES_REGION 3 +#define REGION_POWERSAVE_NOMINAL 0 +#define REGION_NOMINAL_TURBO 1 +#define REGION_TURBO_ULTRA 2 +#define VPD_OP_SLOPES_REGION_ORDER {REGION_POWERSAVE_NOMINAL,REGION_NOMINAL_TURBO,REGION_TURBO_ULTRA} +#define VPD_OP_SLOPES_REGION_ORDER_STR {"POWERSAVE_NOMINAL", "NOMINAL_TURBO ","TURBO_ULTRA "} + +// Different points considered for calculating slopes +#define NUM_VPD_PTS_SET 4 +#define VPD_PT_SET_RAW 0 +#define VPD_PT_SET_SYSP 1 +#define VPD_PT_SET_BIASED 2 +#define VPD_PT_SET_BIASED_SYSP 3 +#define VPD_PT_SET_ORDER {VPD_PT_SET_RAW, VPD_PT_SET_SYSP, VPD_PT_SET_BIASED, VPD_PT_SET_BIASED_SYSP} +#define VPD_PT_SET_ORDER_STR {"Raw", "SysParam","Biased", "Biased/SysParam"} + +#define VID_SLOPE_FP_SHIFT 13 //TODO: Remove this. RTC 174743 +#define VID_SLOPE_FP_SHIFT_12 12 +#define THRESH_SLOPE_FP_SHIFT 12 + +// 0 = PowerSave, 1 = Nominal; 2 = Turbo; 3 = UltraTurbo; 4 = Enable +#define VDM_DROOP_OP_POINTS 5 + + +#define PSTATE_LT_PSTATE_MIN 0x00778a03 +#define PSTATE_GT_PSTATE_MAX 0x00778a04 +#define ACTIVE_QUADS 6 + +/// IDDQ readings, +#define IDDQ_MEASUREMENTS 6 +#define IDDQ_ARRAY_VOLTAGES { 0.60 , 0.70 , 0.80 , 0.90 , 1.00 , 1.10} +#define IDDQ_ARRAY_VOLTAGES_STR {"0.60", "0.70", "0.80", "0.90", "1.00", "1.10"} + +/// WOF Items +#define NUM_ACTIVE_CORES 24 +#define MAX_UT_PSTATES 64 // Oversized + + + +#ifndef __ASSEMBLER__ +#ifdef __cplusplus +extern "C" { +#endif + +/// A Pstate type +/// +/// Pstates are unsigned but, to avoid bugs, Pstate register fields should +/// always be extracted to a variable of type Pstate. If the size of Pstate +/// variables ever changes we will have to revisit this convention. +typedef uint8_t Pstate; + +/// A DPLL frequency code +/// +/// DPLL frequency codes (Fmax and Fmult) are 15 bits +typedef uint16_t DpllCode; + +/// An AVS VID code +typedef uint16_t VidAVS; + +/// A VPD operating point +/// +/// VPD operating points are stored without load-line correction. Frequencies +/// are in MHz, voltages are specified in units of 1mV, and characterization +/// currents are specified in units of 100mA. +/// +typedef struct +{ + uint32_t vdd_mv; + uint32_t vcs_mv; + uint32_t idd_100ma; + uint32_t ics_100ma; + uint32_t frequency_mhz; + uint8_t pstate; // Pstate of this VpdOperating + uint8_t pad[3]; // Alignment padding +} VpdOperatingPoint; + +//Defined same as #V vpd points used to validate +typedef struct +{ + uint32_t frequency_mhz; + uint32_t vdd_mv; + uint32_t idd_100ma; + uint32_t vcs_mv; + uint32_t ics_100ma; +} VpdPoint; +/// VPD Biases. +/// +/// Percent bias applied to VPD operating points prior to interolation +/// +/// All values on in .5 percent (half percent -> hp) +typedef struct +{ + + int8_t vdd_ext_hp; + int8_t vdd_int_hp; + int8_t vdn_ext_hp; + int8_t vcs_ext_hp; + int8_t frequency_hp; + +} VpdBias; + +/// System Power Distribution Paramenters +/// +/// Parameters set by system design that influence the power distribution +/// for a rail to the processor module. This values are typically set in the +/// system machine readable workbook and are used in the generation of the +/// Global Pstate Table. This values are carried in the Pstate SuperStructure +/// for use and/or reference by OCC firmware (eg the WOF algorithm) + +typedef struct +{ + + /// Loadline + /// Impedance (binary microOhms) of the load line from a processor VDD VRM + /// to the Processor Module pins. + uint32_t loadline_uohm; + + /// Distribution Loss + /// Impedance (binary in microOhms) of the VDD distribution loss sense point + /// to the circuit. + uint32_t distloss_uohm; + + /// Distribution Offset + /// Offset voltage (binary in microvolts) to apply to the rail VRM + /// distribution to the processor module. + uint32_t distoffset_uv; + +} SysPowerDistParms; + +/// AVSBUS Topology +/// +/// AVS Bus and Rail numbers for VDD, VDN, VCS, and VIO +/// +typedef struct +{ + uint8_t vdd_avsbus_num; + uint8_t vdd_avsbus_rail; + uint8_t vdn_avsbus_num; + uint8_t vdn_avsbus_rail; + uint8_t vcs_avsbus_num; + uint8_t vcs_avsbus_rail; + uint8_t vio_avsbus_num; + uint8_t vio_avsbus_rail; +} AvsBusTopology_t; + +// +// WOF Voltage, Frequency Ratio Tables +// +//VFRT calculation part +#define SYSTEM_VERSION_FRQUENCY(VFRT) (1000 + (16.67 * VFRT)) +#define SYSTEM_VFRT_VALUE(FREQ) ((FREQ - 1000)/16.67) + + +#define HOMER_VFRT_VALUE(FREQ,BSF) ((BSF - FREQ)/16.67) +#define HOMER_VERSION_FREQUENCY(VFRT,BSF) (BSF - (16.67 * VFRT)) + + +//VFRT Header fields +typedef struct __attribute__((packed)) VFRTHeaderLayout +{ + // VFRT Magic code "VT" + uint16_t magic_number; + + uint16_t reserved; + // 0:System type, 1:Homer type (0:3) + // if version 1: VFRT size is 12 row(voltage) X 11 column(freq) of size uint8_t + // (4:7) + // if version 2: VFRT size is 24 row(Voltage) X 5 column (Freq) of size uint8_t + uint8_t type_version; + //Identifies the Vdn assumptions tht went in this VFRT (0:7) + uint8_t res_vdnId; + //Identifies the Vdd assumptions tht went in this VFRT (0:7) + uint8_t VddId_QAId; + //Identifies the Quad Active assumptions tht went in this VFRT (5:7) + uint8_t rsvd_QAId; +} VFRTHeaderLayout_t;// WOF Tables Header + +typedef enum +{ + WOF_MODE_UNKNOWN = 0, + WOF_MODE_NOMINAL = 1, + WOF_MODE_TURBO = 2 +} WOF_MODE; + +typedef struct __attribute__((packed, aligned(128))) WofTablesHeader +{ + + /// Magic Number + /// Set to ASCII "WFTH___x" where x is the version of the VFRT structure + uint32_t magic_number; + + /// Reserved version + /// version 1 - mode is reserved (0) + /// version 2 - mode is SET to 1 or 2 + union + { + uint32_t reserved_version; + struct + { + unsigned reserved_bits: 20; + unsigned mode: 4; /// new to version 2 (1 = Nominal, 2 = Turbo) + uint8_t version; + } PACKED; + }; + + /// VFRT Block Size + /// Length, in bytes, of a VFRT + uint16_t vfrt_block_size; + + /// VFRT block header size + uint16_t vfrt_block_header_size; + + /// VFRT Data Size + /// Length, in bytes, of the data field. + uint16_t vfrt_data_size; + + /// Quad Active Size + /// Total number of Active Quads + uint8_t quads_active_size; + + /// Core count + uint8_t core_count; + + /// Ceff Vdn Start + /// CeffVdn value represented by index 0 (in 0.01%) + uint16_t vdn_start; + + /// Ceff Vdn Step + /// CeffVdn step value for each CeffVdn index (in 0.01%) + uint16_t vdn_step; + + /// Ceff Vdn Size + /// Number of CeffVdn indexes + uint16_t vdn_size; + + /// Ceff Vdd Start + /// CeffVdd value represented by index 0 (in 0.01%) + uint16_t vdd_start; + + /// Ceff Vdd Step + /// CeffVdd step value for each CeffVdd index (in 0.01%) + uint16_t vdd_step; + + /// Ceff Vdd Size + /// Number of CeffVdd indexes + uint16_t vdd_size; + + /// Vratio Start + /// Vratio value represented by index 0 (in 0.01%) + uint16_t vratio_start; + + /// Vratio Step + /// Vratio step value for each CeffVdd index (in 0.01%) + uint16_t vratio_step; + + /// Vratio Size + /// Number of Vratio indexes + uint16_t vratio_size; + + /// Fratio Start + /// Fratio value represented by index 0 (in 0.01%) + uint16_t fratio_start; + + /// Fratio Step + /// Fratio step value for each CeffVdd index (in 0.01%) + uint16_t fratio_step; + + /// Fratio Size + /// Number of Fratio indexes + uint16_t fratio_size; + + /// Future usage + uint16_t Vdn_percent[8]; + + /// Socket Power (in Watts) for the WOF Tables + uint16_t socket_power_w; + + /// Nest Frequency (in MHz) used in building the WOF Tables + uint16_t nest_frequency_mhz; + + /// Core Sort Power Target Frequency (in MHz) - The #V frequency associated + /// with the sort power target for this table set. This will be either the + /// Nominal or Turbo #V frequency + uint16_t sort_power_freq_mhz; + + /// Regulator Design Point Capacity (in Amps) + uint16_t rdp_capacity; + + /// Up to 8 ASCII characters to be defined by the Table generation team to + /// back reference table sources + char wof_table_source_tag[8]; + + /// Up to 16 ASCII characters as a Package designator + char package_name[16]; + + // Padding to 128B is left to the compiler via the following attribute. + +} WofTablesHeader_t; + + +// Data is provided in 1/24ths granularity with adjustments for integer +// representation +#define VFRT_VRATIO_SIZE 24 + +// 5 steps down from 100% is Fratio_step sizes +#define VFRT_FRATIO_SIZE 5 + + +// HOMER VFRT Layout +typedef struct __attribute__((packed, aligned(256))) HomerVFRTLayout +{ + VFRTHeaderLayout_t vfrtHeader; + uint8_t vfrt_data[VFRT_FRATIO_SIZE][VFRT_VRATIO_SIZE]; + uint8_t padding[128]; +} HomerVFRTLayout_t; + + +#ifdef __cplusplus +} // end extern C +#endif +#endif /* __ASSEMBLER__ */ +#endif /* __P9_PSTATES_COMMON_H__ */ diff --git a/src/vendorcode/ibm/power9/pstates/p9_pstates_occ.h b/src/vendorcode/ibm/power9/pstates/p9_pstates_occ.h new file mode 100644 index 00000000000..a523bf7dc68 --- /dev/null +++ b/src/vendorcode/ibm/power9/pstates/p9_pstates_occ.h @@ -0,0 +1,216 @@ +/* IBM_PROLOG_BEGIN_TAG */ +/* This is an automatically generated prolog. */ +/* */ +/* $Source: src/import/chips/p9/procedures/hwp/lib/p9_pstates_occ.h $ */ +/* */ +/* OpenPOWER HostBoot Project */ +/* */ +/* Contributors Listed Below - COPYRIGHT 2015,2019 */ +/* [+] International Business Machines Corp. */ +/* */ +/* */ +/* Licensed under the Apache License, Version 2.0 (the "License"); */ +/* you may not use this file except in compliance with the License. */ +/* You may obtain a copy of the License at */ +/* */ +/* http://www.apache.org/licenses/LICENSE-2.0 */ +/* */ +/* Unless required by applicable law or agreed to in writing, software */ +/* distributed under the License is distributed on an "AS IS" BASIS, */ +/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */ +/* implied. See the License for the specific language governing */ +/* permissions and limitations under the License. */ +/* */ +/* IBM_PROLOG_END_TAG */ +/// @file p9_pstates.h +/// @brief Pstate structures and support routines for OCC product firmware +/// +// *HWP HW Owner : Greg Still +// *HWP HW Owner : Michael Floyd +// *HWP FW Owner : Martha Broyles +// *HWP Team : PM +// *HWP Level : 1 +// *HWP Consumed by : HB:OCC + +#ifndef __P9_PSTATES_OCC_H__ +#define __P9_PSTATES_OCC_H__ + +#include "p9_pstates_common.h" +#include "p9_pstates_pgpe.h" + +#ifndef __ASSEMBLER__ +#ifdef __cplusplus +extern "C" { +#endif + +/// PstateParmsBlock Magic Number +/// +/// This magic number identifies a particular version of the +/// PstateParmsBlock and its substructures. The version number should be +/// kept up to date as changes are made to the layout or contents of the +/// structure. + +#define OCC_PARMSBLOCK_MAGIC 0x4f43435050423030ull /* OCCPPB00 */ + +/// IDDQ Reading Type +/// Each entry is 2 bytes. The values are in 6.25mA units; this allow for a +/// maximum value of 409.6A to be represented. +/// +typedef uint16_t iddq_entry_t; + +/// AvgTemp Reading Type +/// Each entry is 1 byte. The values are in 0.5degC units; this allow for a +/// maximum value of 127degC to be represented. +/// +typedef uint8_t avgtemp_entry_t; + +/// Iddq Table +/// +/// A set of arrays of leakage values (Iddq) collected at various voltage +/// conditions during manufacturing test that will feed into the Workload +/// Optimized Frequency algorithms on the OCC. These values are not installed +/// in any hardware facilities. +/// +typedef struct +{ + + /// IDDQ version + uint8_t iddq_version; + + /// Good Quads per Sort + uint8_t good_quads_per_sort; + + /// Good Normal Cores per Sort + uint8_t good_normal_cores_per_sort; + + /// Good Caches per Sort + uint8_t good_caches_per_sort; + + /// Good Normal Cores + uint8_t good_normal_cores[MAXIMUM_QUADS]; + + /// Good Caches + uint8_t good_caches[MAXIMUM_QUADS]; + + /// RDP to TDP Scaling Factor in 0.01% units + uint16_t rdp_to_tdp_scale_factor; + + /// WOF Iddq Margin (aging factor) in 0.01% units + uint16_t wof_iddq_margin_factor; + + /// VDD Temperature Scale Factor per 10C in 0.01% units + uint16_t vdd_temperature_scale_factor; + + /// VDN Temperature Scale Factor per 10C in 0.01% units + uint16_t vdn_temperature_scale_factor; + + /// Spare + uint8_t spare[8]; + + /// IVDD ALL Good Cores ON; 5mA units + iddq_entry_t ivdd_all_good_cores_on_caches_on[IDDQ_MEASUREMENTS]; + + /// IVDD ALL Cores OFF; 5mA units + iddq_entry_t ivdd_all_cores_off_caches_off[IDDQ_MEASUREMENTS]; + + /// IVDD ALL Good Cores OFF; 5mA units + iddq_entry_t ivdd_all_good_cores_off_good_caches_on[IDDQ_MEASUREMENTS]; + + /// IVDD Quad 0 Good Cores ON, Caches ON; 5mA units + iddq_entry_t ivdd_quad_good_cores_on_good_caches_on[MAXIMUM_QUADS][IDDQ_MEASUREMENTS]; + + /// IVDDN; 5mA units + iddq_entry_t ivdn[IDDQ_MEASUREMENTS]; + + /// IVDD ALL Good Cores ON, Caches ON; 0.5C units + avgtemp_entry_t avgtemp_all_good_cores_on[IDDQ_MEASUREMENTS]; + + /// avgtemp ALL Cores OFF, Caches OFF; 0.5C units + avgtemp_entry_t avgtemp_all_cores_off_caches_off[IDDQ_MEASUREMENTS]; + + /// avgtemp ALL Good Cores OFF, Caches ON; 0.5C units + avgtemp_entry_t avgtemp_all_good_cores_off[IDDQ_MEASUREMENTS]; + + /// avgtemp Quad 0 Good Cores ON, Caches ON; 0.5C units + avgtemp_entry_t avgtemp_quad_good_cores_on[MAXIMUM_QUADS][IDDQ_MEASUREMENTS]; + + /// avgtempN; 0.5C units + avgtemp_entry_t avgtemp_vdn[IDDQ_MEASUREMENTS]; + + /// spare (per MVPD documentation + /// + /// NOTE: The MVPD documentation defines 43 spare bytes to lead to a 255B structure. However, + /// some consuming code already assumed a 250B structure and the correction of this size was disruptive. + /// This is not a problem until the IQ keyword actually defines these bytes at which time a keyword + /// version update will be need. Thus, this structure will remain at 250B. + uint8_t spare_1[38]; +} IddqTable; + + + +/// The layout of the data created by the Pstate table creation firmware for +/// comsumption by the OCC firmware. This data will reside in the Quad +/// Power Management Region (QPMR). +/// +/// This structure is aligned to 128B to allow for easy downloading using the +/// OCC block copy engine +/// +typedef struct +{ + + /// Magic Number + uint64_t magic; // the last byte of this number the structure's version. + + /// Operating points + /// + /// VPD operating points are stored without load-line correction. Frequencies + /// are in MHz, voltages are specified in units of 5mV, and currents are + /// in units of 500mA. + VpdOperatingPoint operating_points[NUM_OP_POINTS]; + + /// Loadlines and Distribution values for the VDD rail + SysPowerDistParms vdd_sysparm; + + /// Loadlines and Distribution values for the VDN rail + SysPowerDistParms vdn_sysparm; + + /// Loadlines and Distribution values for the VCS rail + SysPowerDistParms vcs_sysparm; + + /// Iddq Table + IddqTable iddq; + + /// WOF Controls + WOFElements wof; + + // Frequency Limits + uint32_t frequency_min_khz; // Comes from Safe Mode computation + uint32_t frequency_max_khz; // Comes from UltraTurbo #V point after biases + uint32_t frequency_step_khz; // Comes from refclk/dpll_divider attributes. + + // Minimum Pstate; Maximum is always 0. + uint32_t pstate_min; // Pstate reflecting frequency_min_khz + + /// Nest frequency in Mhz. This is used by FIT interrupt + uint32_t nest_frequency_mhz; + + //Nest leakage percentage used to calculate the Core leakage + uint16_t nest_leakage_percent; + + uint16_t ceff_tdp_vdn; + + // AC tdp vdd turbo + uint16_t lac_tdp_vdd_turbo_10ma; + + // AC tdp vdd nominal + uint16_t lac_tdp_vdd_nominal_10ma; + + AvsBusTopology_t avs_bus_topology; + +} __attribute__((aligned(128))) OCCPstateParmBlock; + +#ifdef __cplusplus +} // end extern C +#endif +#endif /* __ASSEMBLER__ */ +#endif /* __P9_PSTATES_OCC_H__ */ diff --git a/src/vendorcode/ibm/power9/pstates/p9_pstates_pgpe.h b/src/vendorcode/ibm/power9/pstates/p9_pstates_pgpe.h new file mode 100644 index 00000000000..1f22bf6b6a8 --- /dev/null +++ b/src/vendorcode/ibm/power9/pstates/p9_pstates_pgpe.h @@ -0,0 +1,367 @@ +/* IBM_PROLOG_BEGIN_TAG */ +/* This is an automatically generated prolog. */ +/* */ +/* $Source: src/import/chips/p9/procedures/hwp/lib/p9_pstates_pgpe.h $ */ +/* */ +/* OpenPOWER HostBoot Project */ +/* */ +/* Contributors Listed Below - COPYRIGHT 2015,2019 */ +/* [+] International Business Machines Corp. */ +/* */ +/* */ +/* Licensed under the Apache License, Version 2.0 (the "License"); */ +/* you may not use this file except in compliance with the License. */ +/* You may obtain a copy of the License at */ +/* */ +/* http://www.apache.org/licenses/LICENSE-2.0 */ +/* */ +/* Unless required by applicable law or agreed to in writing, software */ +/* distributed under the License is distributed on an "AS IS" BASIS, */ +/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */ +/* implied. See the License for the specific language governing */ +/* permissions and limitations under the License. */ +/* */ +/* IBM_PROLOG_END_TAG */ +/// @file p9_pstates_pgpe.h +/// @brief Pstate structures and support routines for PGPE Hcode +/// +// *HWP HW Owner : Rahul Batra +// *HWP HW Owner : Michael Floyd +// *HWP Team : PM +// *HWP Level : 1 +// *HWP Consumed by : PGPE:HS + + +#ifndef __P9_PSTATES_PGPE_H__ +#define __P9_PSTATES_PGPE_H__ + +#include "p9_pstates_common.h" +#include "p9_pstates_cmeqm.h" + +/// PstateParmsBlock Magic Number +/// +/// This magic number identifies a particular version of the +/// PstateParmsBlock and its substructures. The version number should be +/// kept up to date as changes are made to the layout or contents of the +/// structure. + +#define PSTATE_PARMSBLOCK_MAGIC 0x5053544154453030ull /* PSTATE00 */ + +#ifndef __ASSEMBLER__ +#ifdef __cplusplus +extern "C" { +#endif + +#include + + +/// Pad repurpose structure +typedef union +{ + uint32_t value; + struct + { + // Reserve 3 bytes + uint16_t reserved16; + uint8_t reserved8; + + // The following is used by PGPE for the WOF algorithm that computes + // vratio. The placement here is, frankly, a bit of a hack but is + // done to allievate cross-platform dependencies by not changing the + // overall size of the Global Paramter Block structure. In the future, + // this field should be moved into the base Global Paramter Block + // structure. + uint8_t good_cores_in_sort; + } fields; +} GPPBOptionsPadUse; + +/// Standard options controlling Pstate setup and installation procedures +typedef struct +{ + + /// Option flags; See \ref pstate_options + uint32_t options; + + /// Pad structure to 8 bytes. Could also be used for other options later. + uint32_t pad; + +} PGPEOptions; + +/// UltraTurbo Segment VIDs by Core Count +typedef struct +{ + + /// Number of Segment Pstates + uint8_t ut_segment_pstates; + + /// Maximum number of core possibly active + uint8_t ut_max_cores; + + /// VDD VID modification + /// 1 core active = offset 0 + /// 2 cores active = offset 1 + /// ... + /// 12 cores active = offset 11 + uint8_t ut_segment_vdd_vid[MAX_UT_PSTATES][NUM_ACTIVE_CORES]; + + /// VCS VID modification + /// 1 core active = offset 0 + /// 2 cores active = offset 1 + /// ... + /// 12 cores active = offset 11 + uint8_t ut_segment_vcs_vid[MAX_UT_PSTATES][NUM_ACTIVE_CORES]; + +} VIDModificationTable; + +/// Workload Optimized Frequency (WOF) Elements +/// +/// Structure defining various control elements needed by the WOF algorithm +/// firmware running on the OCC. +/// +typedef struct +{ + + /// WOF Enablement + uint8_t wof_enabled; + + /// TDP<>RDP Current Factor + /// Value read from ??? VPD + /// Defines the scaling factor that converts current (amperage) value from + /// the Thermal Design Point to the Regulator Design Point (RDP) as input + /// to the Workload Optimization Frequency (WOF) OCC algorithm. + /// + /// This is a ratio value and has a granularity of 0.01 decimal. Data + /// is held in hexidecimal (eg 1.22 is represented as 122 and then converted + /// to hex 0x7A). + uint32_t tdp_rdp_factor; + + /// UltraTurbo Segment VIDs by Core Count + VIDModificationTable ut_vid_mod; + +} WOFElements; + +/// VDM/Droop Parameter Block +/// +typedef struct +{ + uint8_t vid_compare_override_mv[VDM_DROOP_OP_POINTS]; + uint8_t vdm_response; + + // For the following *_enable fields, bits are defined to indicate + // which of the respective *override* array entries are valid. + // bit 0: UltraTurbo; bit 1: Turbo; bit 2: Nominal; bit 3: PowSave + + // The respecitve *_enable above indicate which index values are valid + uint8_t droop_small_override[VDM_DROOP_OP_POINTS]; + uint8_t droop_large_override[VDM_DROOP_OP_POINTS]; + uint8_t droop_extreme_override[VDM_DROOP_OP_POINTS]; + uint8_t overvolt_override[VDM_DROOP_OP_POINTS]; + uint16_t fmin_override_khz[VDM_DROOP_OP_POINTS]; + uint16_t fmax_override_khz[VDM_DROOP_OP_POINTS]; + + /// Pad structure to 8-byte alignment + /// @todo pad once fully structure is complete. + // uint8_t pad[1]; + +} GP_VDMParmBlock; + +/// Global Pstate Parameter Block +/// +/// The GlobalPstateParameterBlock is an abstraction of a set of voltage/frequency +/// operating points along with hardware limits. Besides the hardware global +/// Pstate table, the abstract table contains enough extra information to make +/// it the self-contained source for setting up and managing voltage and +/// frequency in either Hardware or Firmware Pstate mode. +/// +/// When installed in PMC, Global Pstate table indices are adjusted such that +/// the defined Pstates begin with table entry 0. The table need not be full - +/// the \a pmin and \a entries fields define the minimum and maximum Pstates +/// represented in the table. However at least 1 entry must be defined to +/// create a legal table. +/// +/// Note that Global Pstate table structures to be mapped into PMC hardware +/// must be 1KB-aligned. This requirement is fullfilled by ensuring that +/// instances of this structure are 1KB-aligned. +typedef struct +{ + + + /// Magic Number + uint64_t magic; // the last byte of this number the structure's version. + + /// Pstate options + /// + /// The options are included as part of the GlobalPstateTable so that they + /// are available to upon PGPE initialization. + PGPEOptions options; + + /// The frequency associated with Pstate[0] in KHz + uint32_t reference_frequency_khz; + + /// The frequency step in KHz + uint32_t frequency_step_khz; + + /// Operating points + /// + /// VPD operating points are stored without load-line correction. Frequencies + /// are in MHz, voltages are specified in units of 5mV, and currents are + /// in units of 500mA. + /// \todo Remove this. RTC: 174743 + VpdOperatingPoint operating_points[NUM_OP_POINTS]; + + /// Biases + /// + /// Biases applied to the VPD operating points prior to load-line correction + /// in setting the external voltages. + /// Values in 0.5% + VpdBias ext_biases[NUM_OP_POINTS]; + + /// Loadlines and Distribution values for the VDD rail + SysPowerDistParms vdd_sysparm; + + /// Loadlines and Distribution values for the VCS rail + SysPowerDistParms vcs_sysparm; + + /// Loadlines and Distribution values for the VDN rail + SysPowerDistParms vdn_sysparm; + + /// The "Safe" Voltage + /// + /// A voltage to be used when safe-mode is activated + uint32_t safe_voltage_mv; + + /// The "Safe" Frequency + /// + /// A voltage to be used when safe-mode is activated + uint32_t safe_frequency_khz; + + /// The exponent of the exponential encoding of Pstate stepping delay + uint8_t vrm_stepdelay_range; + + /// The significand of the exponential encoding of Pstate stepping delay + uint8_t vrm_stepdelay_value; + + /// VDM Data + GP_VDMParmBlock vdm; + + /// The following are needed to generated the Pstate Table to HOMER. + + /// Internal Biases + /// + /// Biases applied to the VPD operating points that are used for interpolation + /// in setting the internal voltages (eg Vout to the iVRMs) as part of the + /// Local Actual Pstate. + /// Values in 0.5% + VpdBias int_biases[NUM_OP_POINTS]; + + /// IVRM Data + IvrmParmBlock ivrm; + + /// Resonant Clock Grid Management Setup + ResonantClockingSetup resclk; + + /// Time b/w ext VRM detects write voltage cmd and when voltage begins to move + uint32_t ext_vrm_transition_start_ns; + + /// Transition rate for an increasing VDD voltage excursion + uint32_t ext_vrm_transition_rate_inc_uv_per_us; + + /// Transition rate for an decreasing VDD voltage excursion + uint32_t ext_vrm_transition_rate_dec_uv_per_us; + + /// Delay to account for VDD rail setting + uint32_t ext_vrm_stabilization_time_us; + + /// External VRM transition step size + uint32_t ext_vrm_step_size_mv; + + /// Nest frequency in Mhz. This is used by FIT interrupt + uint32_t nest_frequency_mhz; + + //Maximum performance loss threshold when undervolting(in 0.1%, tenths of percent) + uint8_t wov_underv_perf_loss_thresh_pct; + + //WOV undervolting increment percentage(in 0.1%, tenths of percent) + uint8_t wov_underv_step_incr_pct; + + //WOV undervolting decrement percentage(in 0.1%, tenths of percent) + uint8_t wov_underv_step_decr_pct; + + //WOV undervolting max percentage(in 0.1%, tenths of percent) + uint8_t wov_underv_max_pct; + + //When undervolting, if this value is non-zero, then voltage will never be set + //below this value. If it is zero, then the minimum voltage is only bounded by + //wov_underv_max_pct. + uint16_t wov_underv_vmin_mv; + + //When overvolting, then voltage will never be set above this value + uint16_t wov_overv_vmax_mv; + + //WOV overvolting increment percentage(in 0.1%, tenths of percent) + uint8_t wov_overv_step_incr_pct; + + //WOV overvolting decrement percentage(in 0.1%, tenths of percent) + uint8_t wov_overv_step_decr_pct; + + //WOV overvolting max percentage(in 0.1%, tenths of percent) + uint8_t wov_overv_max_pct; + + uint8_t pad; + + //Determine how often to call the wov algorithm with respect + //to PGPE FIT ticks + uint32_t wov_sample_125us; + + //Maximum performance loss(in 0.1%, tenths of percent). We should never be at + //this level, but we check using this value inside PGPE to make sure that this + //is reported if it ever happens + uint32_t wov_max_droop_pct; + + uint32_t pad1; + + /// All operating points + VpdOperatingPoint operating_points_set[NUM_VPD_PTS_SET][NUM_OP_POINTS]; + + //DPLL pstate 0 value + uint32_t dpll_pstate0_value; + + /// Precalculated Pstate-Voltage Slopes + uint16_t PStateVSlopes[NUM_VPD_PTS_SET][VPD_NUM_SLOPES_REGION]; + + /// Precalculated Voltage-Pstates Slopes + uint16_t VPStateSlopes[NUM_VPD_PTS_SET][VPD_NUM_SLOPES_REGION]; + + // Biased Compare VID operating points + CompareVIDPoints vid_point_set[NUM_OP_POINTS]; + + // Biased Threshold operation points + uint8_t threshold_set[NUM_OP_POINTS][NUM_THRESHOLD_POINTS]; + + //pstate-volt compare slopes + int16_t PsVIDCompSlopes[VPD_NUM_SLOPES_REGION]; + + //pstate-volt threshold slopes + int16_t PsVDMThreshSlopes[VPD_NUM_SLOPES_REGION][NUM_THRESHOLD_POINTS]; + + //Jump value operating points + uint8_t jump_value_set[NUM_OP_POINTS][NUM_JUMP_VALUES]; + + //Jump-value slopes + int16_t PsVDMJumpSlopes[VPD_NUM_SLOPES_REGION][NUM_JUMP_VALUES]; + + uint8_t pad2[2]; + + //AvsBusTopology + AvsBusTopology_t avs_bus_topology; + + // @todo DPLL Droop Settings. These need communication to SGPE for STOP + +} __attribute__((packed, aligned(1024))) GlobalPstateParmBlock; + + +#ifdef __cplusplus +} // end extern C +#endif +#endif /* __ASSEMBLER__ */ +#endif /* __P9_PSTATES_PGPE_H__ */ diff --git a/src/vendorcode/ibm/power9/rs4.c b/src/vendorcode/ibm/power9/rs4.c new file mode 100644 index 00000000000..cef97d48e0e --- /dev/null +++ b/src/vendorcode/ibm/power9/rs4.c @@ -0,0 +1,826 @@ +/* IBM_PROLOG_BEGIN_TAG */ +/* This is an automatically generated prolog. */ +/* */ +/* $Source: src/import/chips/p9/utils/imageProcs/p9_scan_compression.C $ */ +/* */ +/* OpenPOWER HostBoot Project */ +/* */ +/* Contributors Listed Below - COPYRIGHT 2016,2017 */ +/* [+] International Business Machines Corp. */ +/* */ +/* */ +/* Licensed under the Apache License, Version 2.0 (the "License"); */ +/* you may not use this file except in compliance with the License. */ +/* You may obtain a copy of the License at */ +/* */ +/* http://www.apache.org/licenses/LICENSE-2.0 */ +/* */ +/* Unless required by applicable law or agreed to in writing, software */ +/* distributed under the License is distributed on an "AS IS" BASIS, */ +/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */ +/* implied. See the License for the specific language governing */ +/* permissions and limitations under the License. */ +/* */ +/* IBM_PROLOG_END_TAG */ +// Note: This file was originally named p8_scan_compression.c; See CVS archive +// for revision history of p8_scan_compression.c. + +#include "rs4.h" + +/// RS4 Compression Format (version 2) +/// ================================== +/// +/// Scan strings are compressed using a simple run-length encoding called +/// RS4. The string to be decompressed and scanned is the difference between +/// the current state of the ring and the desired final state of the ring. +/// +/// Both the data to be compressed and the final compressed data are treated +/// as strings of 4-bit nibbles. In the scan data structure the compressed +/// strings are padded with 0x0 nibbles to the next even multiple of 4. +/// The compressed string consists of control nibbles and data +/// nibbles. The string format includes a special control/data sequence that +/// marks the end of the string and the final bits of scan data. +/// +/// Special control/data sequences have been been added for RS4v2 to +/// store pairs of care mask nibble and data nibble. This enhancement +/// is needed to allow the scanning of significant zeros. +/// The RS4v1 format assumed that all zeros have no meaning other than +/// the positioning of 1 bits. +/// +/// Runs of 0x0 nibbles as determined by the care mask (rotates) are encoded +/// using a simple variable-length integer encoding known as a "stop code". +/// This code treats each nibble in a variable-length integer encoding as an +/// octal digit (the low-order 3 bits) plus a stop bit (the high-order bit). +/// The examples below illustrate the encoding. +/// +/// 1xxx - Rotate 0bxxx nibbles (0 - 7) +/// 0xxx 1yyy - Rotate 0bxxxyyy nibbles (8 - 63) +/// 0xxx 0yyy 1zzz - Rotate 0bxxxyyyzzz nibbles (64 - 511) +/// etc. +/// +/// A 0-length rotate (code 0b1000) is needed to resynchronize the state +/// machine in the event of long scans (see below), or a string that begins +/// with a non-0x0 nibble. +/// +/// Runs of non-0x0 nibbles (scans) are inserted verbatim into the compressed +/// string after a control nibble indicating the number of nibbles of +/// uncompressed data. If a run is longer than 14 nibbles, the compression +/// algorithm must insert a 0-length rotate and a new scan-length control +/// before continuing with the non-0 data nibbles. +/// +/// xxxx - Scan 0bxxxx nibbles which follow, 0bxxxx != 0 and 0bxxxx != 15 +/// +/// The special case of a 0b0000 code where a scan count is expected marks the +/// end of the string. The end of string marker is always followed by a +/// nibble that contains the terminal bit count in the range 0-3. If the +/// length of the original binary string was not an even multiple of 4, then a +/// final nibble contains the final scan data left justified. +/// +/// 0000 00nn [ttt0] - Terminate 0bnn bits, data 0bttt0 if 0bnn != 0 +/// +/// The special case of a 0b1111 code where a scan count is expected announces +/// a pair of care mask nibble and data nibble containing significant zeros. +/// Only a single pair can be stored this way, and longer sequences of such +/// pairs require resynchronization using zero rotates and special scan count +/// 0b1111 to be inserted. +/// +/// Termination with care mask and data is accomplished by a special +/// terminal data count: +/// +/// 0000 10nn [ccc0] [ttt0] - Terminate +/// 0bnn bits care mask and 0bnn bits data, +/// care mask 0bccc0 and data 0bttt0 if 0bnn != 0 +/// +/// BNF Grammar +/// =========== +/// +/// Following is a BNF grammar for the strings accepted by the RS4 +/// decompression and scan algorithm. At a high level, the state machine +/// recognizes a series of 1 or more sequences of a rotate (R) followed by a +/// scan (S) or end-of-string marker (E), followed by the terminal count (T) +/// and optional terminal data (D). +/// +/// (R S)* (R E) T D? +/// +/// \code +/// +/// ::= | +/// +/// +/// ::= | +/// +/// +/// ::= '0x0' | ... | '0x7' +/// +/// ::= '0x8' | ... | '0xf' +/// +/// ::= | +/// +/// +/// ::= * 0bnnnn, for N = 0bnnnn, N != 0 & N != 15 * +/// +/// ::= '0xf' +/// +/// ::= * N nibbles of uncompressed data, 0 < N < 15 * +/// +/// ::= +/// '0x0' | +/// '0x0' 0)> | +/// '0x0' 0)> +/// +/// ::= * 0b00nn, for T = 0bnn * +/// +/// ::= * 0b10nn, for T = 0bnn & T != 0 * +/// +/// ::= '0x0' | '0x8' +/// +/// ::= '0x0' | '0x4' | '0x8' | '0xc' +/// +/// ::= '0x0' | '0x2' | '0x4' | ... | '0xe' +/// +/// ::= * 0b1000 0b0000 * +/// +/// ::= * 0bij00 0bwx00, for +/// i >= w & j >= x & +/// ij > wx * +/// +/// ::= * 0bijk0 0bwxy0, for +/// i >= w & j >= x & k >= y & +/// ijk > wxy * +/// +/// ::= * 0bijkl 0bwxyz, for +/// i >= w & j >= x & k >= y & l >= z & +/// ijkl > wxyz * +/// +/// \endcode + +#include +#include +#include +#include + +#define MY_DBG(...) while(false) + +// Diagnostic aids for debugging +#ifdef DEBUG_P9_SCAN_COMPRESSION + +#include + + +#define BUG(rc) \ + ({ \ + fprintf(stderr,"%s:%d : Trapped rc = %d\n", \ + __FILE__, __LINE__, (rc)); \ + (rc); \ + }) + +#define BUGX(rc, ...) \ + ({ \ + BUG(rc); \ + fprintf(stderr, ##__VA_ARGS__); \ + (rc); \ + }) + +#else // DEBUG_P9_SCAN_COMPRESSION + +#define BUG(rc) (rc) +#define BUGX(rc, ...) (rc) + +#endif // DEBUG_P9_SCAN_COMPRESSION + +#define RS4_MAGIC (uint16_t)0x5253 // "RS" + +/// Scan data types +#define RS4_SCAN_DATA_TYPE_CMSK 1 +#define RS4_SCAN_DATA_TYPE_NON_CMSK 0 + +#define MAX_RING_BUF_SIZE_TOOL 200000 + +#define RS4_VERSION 3 + +typedef uint16_t RingId_t; + +typedef struct ring_hdr CompressedScanData; + +// Return a big-endian-indexed nibble from a byte string + +static int +rs4_get_nibble(const uint8_t* i_string, const uint32_t i_i) +{ + uint8_t byte; + int nibble; + + byte = i_string[i_i / 2]; + + if (i_i % 2) + { + nibble = byte & 0xf; + } + else + { + nibble = byte >> 4; + } + + return nibble; +} + + +// Set a big-endian-indexed nibble in a byte string + +static int +rs4_set_nibble(uint8_t* io_string, const uint32_t i_i, const int i_nibble) +{ + uint8_t* byte; + + byte = &(io_string[i_i / 2]); + + if (i_i % 2) + { + *byte = (*byte & 0xf0) | i_nibble; + } + else + { + *byte = (*byte & 0x0f) | (i_nibble << 4); + } + + return i_nibble; +} + + +// Encode an unsigned integer into a 4-bit octal stop code directly into a +// nibble stream at io_string, returning the number of nibbles in the +// resulting code. + +static int +rs4_stop_encode(const uint32_t i_count, uint8_t* io_string, const uint32_t i_i) +{ + uint32_t count; + int digits, offset; + + // Determine the number of octal digits. There is always at least 1. + + count = i_count >> 3; + digits = 1; + + while (count) + { + count >>= 3; + digits++; + } + + // First insert the stop (low-order) digit + + offset = digits - 1; + rs4_set_nibble(io_string, i_i + offset, (i_count & 0x7) | 0x8); + + // Now insert the high-order digits + + count = i_count >> 3; + offset--; + + while (count) + { + rs4_set_nibble(io_string, i_i + offset, count & 0x7); + offset--; + count >>= 3; + } + + return digits; +} + + +// Decode an unsigned integer from a 4-bit octal stop code appearing in a byte +// string at i_string, returning the number of nibbles decoded. + +static int +stop_decode(uint32_t* o_count, const uint8_t* i_string, const uint32_t i_i) +{ + int digits, nibble; + uint32_t i, count; + + digits = 0; + count = 0; + i = i_i; + + do + { + nibble = rs4_get_nibble(i_string, i); + count = (count * 8) + (nibble & 0x7); + i++; + digits++; + } + while ((nibble & 0x8) == 0); + + *o_count = count; + return digits; +} + + +// RS4 compression algorithm notes: +// +// RS4 compression processes i_data_str/i_care_str as a strings of nibbles. +// Final special-case code handles the 0-3 remaining terminal bits. +// +// There is a special case for 0x0 nibbles embedded in a string of non-0x0 +// nibbles. It is more efficient to encode a single 0x0 nibble as part of a +// longer string of non 0x0 nibbles. However it is break-even (actually a +// slight statistical advantage) to break a scan sequence for 2 0x0 nibbles. +// +// If a run of 14 scan nibbles is found the scan is terminated and we return +// to the rotate state. Runs of more than 14 scans will always include a +// 0-length rotate between the scan sequences. +// +// The ability to store a 15th consecutive scan nibble was given up for an +// enhancement of the compression algorithm: +// The scan count 15 has a special meaning and is reserved for handling +// single nibbles that come with a care mask, that is, an extra nibble that +// determines the significance of scan bits, including both 1 and 0 bits. +// +// Returns a scan compression return code. + +static int +__rs4_compress(uint8_t* o_rs4_str, + uint32_t* o_nibbles, + const uint8_t* i_data_str, + const uint8_t* i_care_str, + const uint32_t i_length) +{ + int state; /* 0 : Rotate, 1 : Scan */ + uint32_t n; /* Number of whole nibbles in i_data */ + uint32_t r; /* Number of reminaing bits in i_data */ + uint32_t i; /* Nibble index in i_data_str/i_care_str */ + uint32_t j; /* Nibble index in o_rs4_str */ + uint32_t k; /* Location to place */ + uint32_t count; /* Counts rotate/scan nibbles */ + int care_nibble; + int data_nibble; + + n = i_length / 4; + r = i_length % 4; + i = 0; + j = 0; + k = 0; /* Makes GCC happy */ + care_nibble = 0; + data_nibble = 0; + count = 0; + state = 0; + + // Process the bulk of the string. Note that state changes do not + // increment 'i' - the nibble at i_data is always scanned again. + + while (i < n) + { + care_nibble = rs4_get_nibble(i_care_str, i); + data_nibble = rs4_get_nibble(i_data_str, i); + + if (~care_nibble & data_nibble) + { + return BUGX(SCAN_COMPRESSION_INPUT_ERROR, + "Conflicting data and mask bits in nibble %d\n", i); + } + + if (state == 0) + //----------------// + // Rotate section // + //----------------// + { + if (care_nibble == 0) + { + count++; + i++; + } + else + { + j += rs4_stop_encode(count, o_rs4_str, j); + count = 0; + k = j; + j++; + + if ((care_nibble ^ data_nibble) == 0) + { + // Only one-data in nibble. + state = 1; + } + else + { + // There is zero-data in nibble. + state = 2; + } + } + } + else if (state == 1) + //------------------// + // One-data section // + //------------------// + { + if (care_nibble == 0) + { + if (((i + 1) < n) && (rs4_get_nibble(i_care_str, i + 1) == 0)) + { + // Set the in nibble k since no more data in + // current AND next nibble (or next nibble might be last). + rs4_set_nibble(o_rs4_str, k, count); + count = 0; + state = 0; + } + else + { + // Whether next nibble is last nibble or contains data, lets include the + // current empty nibble in the scan_data(N) count because its + // more efficient than inserting rotate go+stop nibbles. + rs4_set_nibble(o_rs4_str, j, 0); + count++; + i++; + j++; + } + } + else if ((care_nibble ^ data_nibble) == 0) + { + // Only one-data in nibble. Continue pilling on one-data nibbles. + rs4_set_nibble(o_rs4_str, j, data_nibble); + count++; + i++; + j++; + } + else + { + // There is zero-data in nibble. + // First set the in nibble k to end current + // sequence of one-data nibbles. + rs4_set_nibble(o_rs4_str, k, count); + count = 0; + state = 0; + } + + if ((state == 1) && (count == 14)) + { + rs4_set_nibble(o_rs4_str, k, 14); + count = 0; + state = 0; + } + } + else // state==2 + //-------------------// + // Zero-data section // + //-------------------// + { + rs4_set_nibble(o_rs4_str, k, 15); + rs4_set_nibble(o_rs4_str, j, care_nibble); + j++; + rs4_set_nibble(o_rs4_str, j, data_nibble); + i++; + j++; + count = 0; + state = 0; + } + } // End of while (i0, the remainder data + // nibble. Note that here we indicate the number of bits (0<=r<4). + if (r == 0) + { + rs4_set_nibble(o_rs4_str, j, r); + j++; + } + else + { + care_nibble = rs4_get_nibble(i_care_str, n) & ((0xf >> (4 - r)) << (4 - r)); // Make excess bits zero + data_nibble = rs4_get_nibble(i_data_str, n) & ((0xf >> (4 - r)) << (4 - r)); // Make excess bits zero + + if (~care_nibble & data_nibble) + { + return BUGX(SCAN_COMPRESSION_INPUT_ERROR, + "Conflicting data and mask bits in nibble %d\n", i); + } + + if ((care_nibble ^ data_nibble) == 0) + { + // Only one-data in rem nibble. + rs4_set_nibble(o_rs4_str, j, r); + j++; + rs4_set_nibble(o_rs4_str, j, data_nibble); + j++; + } + else + { + // Zero-data in rem nibble. + rs4_set_nibble(o_rs4_str, j, r + 8); + j++; + rs4_set_nibble(o_rs4_str, j, care_nibble); + j++; + rs4_set_nibble(o_rs4_str, j, data_nibble); + j++; + } + } + + *o_nibbles = j; + + return SCAN_COMPRESSION_OK; +} + + +// The worst-case compression for RS4 v2 occurs if all data nibbles +// contain significant zeros as specified by corresponding care nibbles, +// and if the raw ring length is a whole multiple of four. +// +// In general, each data and care nibble pair, which are one nibble +// in terms of input string length, are compressed into 4 nibbles: +// +// 1. a special data count nibble that indicates special case with care mask +// 2. a care mask nibble +// 3. a data nibble +// 4. a rotate nibble +// +// Then, if the raw ring length is a whole multiple of four (worst case), +// the last raw nibble also requires those RS4 four nibbles, and it is +// followed by 2 additional nibbles that terminate the compressed data. +// So a total of six nibbles to account for the last input nibble: +// +// 5. a '0x0' terminate nibble +// 6. a terminal count(0) nibble +// +// If on the other hand the last input nibble is partial, then that requires +// only four output nibbles because the terminate tag and data are combined +// in the encoding of : +// +// 1. a '0x0' terminate nibbel +// 2. a terminal count nibble for masked data +// 3. a care mask nibble +// 4. a data nibble +// +// Besides there is always a rotate nibble at the begin of the compressed +// data: +// +// 0. rotate + +static inline uint32_t +rs4_max_compressed_nibbles(const uint32_t i_length) +{ + uint32_t nibbles_raw, nibbles_rs4; + + nibbles_raw = (i_length + 3) / 4; // bits rounded up to full nibbles + nibbles_rs4 = 1 // initial rotate nibble + + nibbles_raw * 4 // worst case whole nibble encoding + + 1 // terminate nibble + + 1; // zero terminal count nibble + + return nibbles_rs4; +} + +static inline uint32_t +rs4_max_compressed_bytes(uint32_t nibbles) +{ + uint32_t bytes; + + bytes = ((nibbles + 1) / 2); // nibbles rounded up to full bytes + bytes += sizeof(CompressedScanData); // plus rs4 header + bytes = ((bytes + 3) / 4) * 4; // rounded up to multiple of 4 bytes + + return bytes; +} + + +// We always require the worst-case amount of memory including the header and +// any rounding required to guarantee that the data size is a multiple of 4 +// bytes. The final image size is also rounded up to a multiple of 4 bytes. +// +// Returns a scan compression return code. + +int +rs4_compress(CompressedScanData* io_rs4, + const uint32_t i_size, + const uint8_t* i_data_str, + const uint8_t* i_care_str, + const uint32_t i_length, + const uint32_t i_scanAddr, + const RingId_t i_ringId) +{ + int rc; + uint32_t nibbles = rs4_max_compressed_nibbles(i_length); + uint32_t bytes = rs4_max_compressed_bytes(nibbles); + uint8_t* rs4_str = (uint8_t*)io_rs4 + sizeof(CompressedScanData); + + if (bytes > i_size) + { + return BUG(SCAN_COMPRESSION_BUFFER_OVERFLOW); + } + + memset(io_rs4, 0, i_size); + + rc = __rs4_compress(rs4_str, &nibbles, i_data_str, i_care_str, i_length); + + if (rc == SCAN_COMPRESSION_OK) + { + bytes = rs4_max_compressed_bytes(nibbles); + + io_rs4->magic = htobe16(RS4_MAGIC); + io_rs4->version = RS4_VERSION; + // For now this assumes non-CMSK scan data. + // For CMSK support, we would need to: + // - either add a CMSK function parameter and set type here, + // - or rely on caller to set type later. + io_rs4->type = RS4_SCAN_DATA_TYPE_NON_CMSK; + io_rs4->size = htobe16(bytes); + io_rs4->ring_id = htobe16(i_ringId); + io_rs4->scan_addr = htobe32(i_scanAddr); + } + + return rc; +} + + +// Decompress an RS4-encoded string into a output string whose length must be +// exactly i_length bits. +// +// Returns a scan compression return code. + +static int +__rs4_decompress(uint8_t* o_data_str, + uint8_t* o_care_str, + uint32_t i_size, + uint32_t* o_length, + const uint8_t* i_rs4_str) +{ + int state; /* 0 : Rotate, 1 : Scan */ + uint32_t i; /* Nibble index in i_rs4_str */ + uint32_t j; /* Nibble index in o_data_str/o_care_str */ + uint32_t k; /* Loop index */ + uint32_t bits; /* Number of output bits decoded so far */ + uint32_t count; /* Count of rotate nibbles */ + uint32_t nibbles; /* Rotate encoding or scan nibbles to process */ + int r; /* Remainder bits */ + int masked; /* if a care mask is available */ + + i = 0; + j = 0; + bits = 0; + state = 0; + + // Decompress the bulk of the string + do + { + if (state == 0) + { + nibbles = stop_decode(&count, i_rs4_str, i); + i += nibbles; + + bits += 4 * count; + + if (bits > i_size * 8) + { + return BUG(SCAN_COMPRESSION_BUFFER_OVERFLOW); + } + + // keep 'count' zero care and data nibbles + // as initialised by memset in calling function + j += count; + + state = 1; + } + else + { + nibbles = rs4_get_nibble(i_rs4_str, i); + i++; + + if (nibbles == 0) + { + break; + } + + masked = (nibbles == 15 ? 1 : 0); + nibbles = (masked ? 1 : nibbles); + bits += 4 * nibbles; + + if (bits > i_size * 8) + { + return BUG(SCAN_COMPRESSION_BUFFER_OVERFLOW); + } + + for (k = 0; k < nibbles; k++) + { + rs4_set_nibble(o_care_str, j, rs4_get_nibble(i_rs4_str, i)); + i = (masked ? i + 1 : i); + rs4_set_nibble(o_data_str, j, rs4_get_nibble(i_rs4_str, i)); + i++; + j++; + } + + state = 0; + } + } + while (1); + + // Now handle string termination + + nibbles = rs4_get_nibble(i_rs4_str, i); + i++; + + masked = nibbles & 0x8; + r = nibbles & 0x3; + bits += r; + + if (bits > i_size * 8) + { + return BUG(SCAN_COMPRESSION_BUFFER_OVERFLOW); + } + + if (r != 0) + { + rs4_set_nibble(o_care_str, j, rs4_get_nibble(i_rs4_str, i)); + i = (masked ? i + 1 : i); + rs4_set_nibble(o_data_str, j, rs4_get_nibble(i_rs4_str, i)); + } + + *o_length = bits; + return SCAN_COMPRESSION_OK; +} + +int +rs4_decompress(uint8_t* o_data_str, + uint8_t* o_care_str, + uint32_t i_size, + uint32_t* o_length, + const CompressedScanData* i_rs4) +{ + uint8_t* rs4_str = (uint8_t*)i_rs4 + sizeof(CompressedScanData); + + if (be16toh(i_rs4->magic) != RS4_MAGIC) + { + return BUG(SCAN_DECOMPRESSION_MAGIC_ERROR); + } + + if (i_rs4->version != RS4_VERSION) + { + return BUG(SCAN_COMPRESSION_VERSION_ERROR); + } + + memset(o_data_str, 0, i_size); + memset(o_care_str, 0, i_size); + + return __rs4_decompress(o_data_str, o_care_str, i_size, + o_length, rs4_str); +} + +int +rs4_redundant(const CompressedScanData* i_data, int* o_redundant) +{ + uint8_t* data; + uint32_t length, pos; + + *o_redundant = 0; + + if (htobe16(i_data->magic) != RS4_MAGIC) + { + return BUG(SCAN_DECOMPRESSION_MAGIC_ERROR); + } + + data = (uint8_t*)i_data + sizeof(CompressedScanData); + + // A compressed scan string is redundant if the initial rotate is + // followed by the end-of-string marker, and any remaining mod-4 bits + // are also 0. + + pos = stop_decode(&length, data, 0); + length *= 4; + + if (rs4_get_nibble(data, pos) == 0) + { + if (rs4_get_nibble(data, pos + 1) == 0) + { + *o_redundant = 1; + } + else + { + length += rs4_get_nibble(data, pos + 1); + + if (rs4_get_nibble(data, pos + 2) == 0) + { + *o_redundant = 1; + } + } + } + + return SCAN_COMPRESSION_OK; +} diff --git a/src/vendorcode/ibm/power9/rs4.h b/src/vendorcode/ibm/power9/rs4.h new file mode 100644 index 00000000000..ba8f836c3dc --- /dev/null +++ b/src/vendorcode/ibm/power9/rs4.h @@ -0,0 +1,93 @@ +/* IBM_PROLOG_BEGIN_TAG */ +/* This is an automatically generated prolog. */ +/* */ +/* $Source: src/import/chips/p9/utils/imageProcs/p9_scan_compression.H $ */ +/* */ +/* OpenPOWER HostBoot Project */ +/* */ +/* Contributors Listed Below - COPYRIGHT 2016,2017 */ +/* [+] International Business Machines Corp. */ +/* */ +/* */ +/* Licensed under the Apache License, Version 2.0 (the "License"); */ +/* you may not use this file except in compliance with the License. */ +/* You may obtain a copy of the License at */ +/* */ +/* http://www.apache.org/licenses/LICENSE-2.0 */ +/* */ +/* Unless required by applicable law or agreed to in writing, software */ +/* distributed under the License is distributed on an "AS IS" BASIS, */ +/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or */ +/* implied. See the License for the specific language governing */ +/* permissions and limitations under the License. */ +/* */ +/* IBM_PROLOG_END_TAG */ + +#ifndef __SOC_IBM_POWER9_RS4_H +#define __SOC_IBM_POWER9_RS4_H + +#include + +#define RS4_MAGIC (uint16_t)0x5253 // "RS" + +/// Normal return code +#define SCAN_COMPRESSION_OK (uint8_t)0 + +/// The (de)compression algorithm could not allocate enough memory for the +/// (de)compression. +#define SCAN_COMPRESSION_NO_MEMORY (uint8_t)1 + +/// Magic number mismatch on scan decompression +#define SCAN_DECOMPRESSION_MAGIC_ERROR (uint8_t)2 + +/// Decompression size error +/// +/// Decompression produced a string of a size different than indicated in the +/// header, indicating either a bug or data corruption. Note that the entire +/// application should be considered corrupted if this error occurs since it +/// may not be discovered until after the decompression buffer is +/// overrun. This error may also be returned by rs4_redundant() in the event +/// of inconsistencies in the compressed string. +#define SCAN_DECOMPRESSION_SIZE_ERROR (uint8_t)3 + +/// A buffer would overflow +/// +/// Either the caller-supplied memory buffer to rs4_decompress() was too +/// small to contain the decompressed string, or a caller-supplied buffer to +/// rs4_compress() was not large enough to hold the worst-case compressed +/// string. +#define SCAN_COMPRESSION_BUFFER_OVERFLOW (uint8_t)4 + +/// Inconsistent input data +/// +/// 1 in data is masked by 0 in care mask +#define SCAN_COMPRESSION_INPUT_ERROR 5 + +/// Invalid transition in state machine +#define SCAN_COMPRESSION_STATE_ERROR 6 + +/// wrong compression version +#define SCAN_COMPRESSION_VERSION_ERROR 7 + +/* Header of an RS4 compressed ring */ +struct ring_hdr { + uint16_t magic; // Always "RS" + uint8_t version; + uint8_t type; + uint16_t size; // Header + data size in BE + uint16_t ring_id; + uint32_t scan_addr; + uint8_t data[]; +} __attribute__((packed)); + +int rs4_compress(struct ring_hdr *io_rs4, const uint32_t i_size, + const uint8_t *i_data_str, const uint8_t *i_care_str, + const uint32_t i_length, const uint32_t i_scanAddr, + const uint16_t ring_id); + +int rs4_decompress(uint8_t *o_data_str, uint8_t *o_care_str, uint32_t i_size, + uint32_t *o_length, const struct ring_hdr *i_rs4); + +int rs4_redundant(const struct ring_hdr *i_data, int *o_redundant); + +#endif // __SOC_IBM_POWER9_RS4_H diff --git a/util/cbmem/cbmem.c b/util/cbmem/cbmem.c index 15431f1d6e2..37c5245176a 100644 --- a/util/cbmem/cbmem.c +++ b/util/cbmem/cbmem.c @@ -19,9 +19,10 @@ #include #include #include +#include #include #include -#include +#include #include #ifdef __OpenBSD__ @@ -58,12 +59,21 @@ static int verbose = 0; #define debug(x...) if(verbose) printf(x) /* File handle used to access /dev/mem */ -static int mem_fd; +static int mem_fd = -1; static struct mapping lbtable_mapping; +/* File handle used to parse CBMEM from file instead of RAM */ +static int file_fd = -1; + /* TSC frequency from the LB_TAG_TSC_INFO record. 0 if not present. */ static uint32_t tsc_freq_khz = 0; +static struct lb_cbmem_ref timestamps; +static struct lb_cbmem_ref console; +static struct lb_cbmem_ref tpm_cb_log; +static struct lb_range tpm_std_log; +static struct lb_memory_range cbmem; + static void die(const char *msg) { if (msg) @@ -101,40 +111,74 @@ static void *mapping_virt(const struct mapping *mapping) static void *map_memory_with_prot(struct mapping *mapping, unsigned long long phys, size_t sz, int prot) { - void *v; - unsigned long long page_size; + if (file_fd < 0) { + void *v; + unsigned long long page_size; - page_size = system_page_size(); + page_size = system_page_size(); - mapping->virt = NULL; - mapping->offset = phys % page_size; - mapping->virt_size = sz + mapping->offset; - mapping->size = sz; - mapping->phys = phys; - - if (size_to_mib(mapping->virt_size) == 0) { - debug("Mapping %zuB of physical memory at 0x%llx (requested 0x%llx).\n", - mapping->virt_size, phys - mapping->offset, phys); - } else { - debug("Mapping %zuMB of physical memory at 0x%llx (requested 0x%llx).\n", - size_to_mib(mapping->virt_size), phys - mapping->offset, - phys); - } + mapping->virt = NULL; + mapping->offset = phys % page_size; + mapping->virt_size = sz + mapping->offset; + mapping->size = sz; + mapping->phys = phys; - v = mmap(NULL, mapping->virt_size, prot, MAP_SHARED, mem_fd, - phys - mapping->offset); + if (size_to_mib(mapping->virt_size) == 0) { + debug("Mapping %zuB of physical memory at 0x%llx (requested 0x%llx).\n", + mapping->virt_size, phys - mapping->offset, phys); + } else { + debug("Mapping %zuMB of physical memory at 0x%llx (requested 0x%llx).\n", + size_to_mib(mapping->virt_size), phys - mapping->offset, + phys); + } - if (v == MAP_FAILED) { - debug("Mapping failed %zuB of physical memory at 0x%llx.\n", - mapping->virt_size, phys - mapping->offset); - return NULL; - } + v = mmap(NULL, mapping->virt_size, prot, MAP_SHARED, mem_fd, + phys - mapping->offset); - mapping->virt = v; + if (v == MAP_FAILED) { + debug("Mapping failed %zuB of physical memory at 0x%llx.\n", + mapping->virt_size, phys - mapping->offset); + return NULL; + } - if (mapping->offset != 0) - debug(" ... padding virtual address with 0x%zx bytes.\n", - mapping->offset); + mapping->virt = v; + + if (mapping->offset != 0) + debug(" ... padding virtual address with 0x%zx bytes.\n", + mapping->offset); + } else { + ssize_t ret; + mapping->virt = malloc(sz); + mapping->offset = 0; + mapping->virt_size = sz; + mapping->size = sz; + mapping->phys = phys; + + debug("map_memory phys = %llx, size = %zuB, cbmem.start = %lx\n", phys, sz, cbmem.start); + + if (mapping->virt == NULL) { + debug("Couldn't allocate %zuB of memory.\n", sz); + return NULL; + } + + if (lseek(file_fd, mapping->phys - cbmem.start, SEEK_SET) < 0) { + debug("Couldn't read file from offset %llx.\n", + mapping->phys - cbmem.start); + free(mapping->virt); + return NULL; + } + + ret = read(file_fd, mapping->virt, sz); + if (ret < 0) { + debug("Error reading file: %s\n", strerror(errno)); + free(mapping->virt); + return NULL; + } + if ((size_t)ret != sz) { + debug("Truncated read from offset %llx, requested %zuB, got %zuB.\n", + mapping->phys - cbmem.start, sz, ret); + } + } return mapping_virt(mapping); } @@ -153,7 +197,11 @@ static int unmap_memory(struct mapping *mapping) if (mapping->virt == NULL) return -1; - munmap(mapping->virt, mapping->virt_size); + if (file_fd < 0) + munmap(mapping->virt, mapping->virt_size); + else + free(mapping->virt); + mapping->virt = NULL; mapping->offset = 0; mapping->virt_size = 0; @@ -265,11 +313,6 @@ static int find_cbmem_entry(uint32_t id, uint64_t *addr, size_t *size) * none found. */ -static struct lb_cbmem_ref timestamps; -static struct lb_cbmem_ref console; -static struct lb_cbmem_ref tcpa_log; -static struct lb_memory_range cbmem; - /* This is a work-around for a nasty problem introduced by initially having * pointer sized entries in the lb_cbmem_ref structures. This caused problems * on 64bit x86 systems because coreboot is 32bit on those systems. @@ -291,6 +334,13 @@ static struct lb_cbmem_ref parse_cbmem_ref(const struct lb_cbmem_ref *cbmem_ref) return ret; } +static struct lb_range parse_range(const struct lb_range *range) +{ + struct lb_range ret; + aligned_memcpy(&ret, range, sizeof(ret)); + return ret; +} + static void parse_memory_tags(const struct lb_memory *mem) { int num_entries; @@ -336,9 +386,9 @@ static int parse_cbtable_entries(const struct mapping *table_mapping) console = parse_cbmem_ref((struct lb_cbmem_ref *)lbr_p); continue; } - case LB_TAG_TCPA_LOG: { - debug(" Found tcpa log table.\n"); - tcpa_log = + case LB_TAG_TPM_CB_LOG: { + debug(" Found TPM CB log table.\n"); + tpm_cb_log = parse_cbmem_ref((struct lb_cbmem_ref *)lbr_p); continue; } @@ -346,6 +396,11 @@ static int parse_cbtable_entries(const struct mapping *table_mapping) debug(" Found TSC info.\n"); tsc_freq_khz = ((struct lb_tsc_info *)lbr_p)->freq_khz; continue; + case LB_TAG_TPM_STD_LOG: { + debug(" Found TPM standard log table.\n"); + tpm_std_log = parse_range((struct lb_range *)lbr_p); + continue; + } case LB_TAG_FORWARD: { int ret; /* @@ -843,35 +898,254 @@ static void timestamp_add_now(uint32_t timestamp_id) unmap_memory(×tamp_mapping); } -/* dump the tcpa log table */ -static void dump_tcpa_log(void) +static bool can_print(const uint8_t *data, size_t len) +{ + unsigned int i; + for (i = 0; i < len; i++) { + if (!isprint(data[i]) && !isspace(data[i])) { + /* If printable prefix is followed by zeroes, this is a valid string */ + for (; i < len; i++) { + if (data[i] != 0) + return false; + } + return true; + } + } + return true; +} + +static void print_hex(uint8_t *hex, size_t len) +{ + unsigned int i; + for (i = 0; i < len; i++) + printf("%02x", *(hex + i)); + printf("\n"); +} + +static void parse_tpm12_log(const struct tcpa_spec_entry *spec_log) +{ + static uint8_t zero_block[sizeof(struct tcpa_spec_entry)]; + + uintptr_t current; + uint32_t counter = 0; + + printf("TCPA log:\n"); + printf("\tSpecification: %d.%d%d", + spec_log->spec_version_major, + spec_log->spec_version_minor, + spec_log->spec_errata); + printf("\tPlatform class: %s\n", + le32toh(spec_log->platform_class) == 0 ? "PC Client" : + le32toh(spec_log->platform_class) == 1 ? "Server" : "Unknown"); + + current = (uintptr_t)&spec_log->vendor_info[spec_log->vendor_info_size]; + while (memcmp((const void *)current, (const void *)zero_block, sizeof(zero_block))) { + uint32_t len; + struct tcpa_log_entry *log_entry = (void *)current; + uint32_t event_type = le32toh(log_entry->event_type); + + printf("TCPA log entry %u:\n", ++counter); + printf("\tPCR: %d\n", le32toh(log_entry->pcr)); + if (event_type >= ARRAY_SIZE(tpm_event_types)) + printf("\tEvent type: Unknown (0x%x)\n", event_type); + else + printf("\tEvent type: %s\n", tpm_event_types[event_type]); + printf("\tDigest: "); + print_hex(log_entry->digest, SHA1_DIGEST_SIZE); + current += sizeof(struct tcpa_log_entry); + len = le32toh(log_entry->event_data_size); + if (len != 0) { + current += len; + printf("\tEvent data: "); + if (can_print(log_entry->event, len)) + printf("%.*s\n", len, log_entry->event); + else + print_hex(log_entry->event, len); + } else { + printf("\tEvent data not provided\n"); + } + } +} + +static uint32_t print_tpm2_digests(struct tcg_pcr_event2_header *log_entry) { - const struct tcpa_table *tclt_p; + unsigned int i; + uintptr_t current = (uintptr_t)log_entry->digests; + + for (i = 0; i < le32toh(log_entry->digest_count); i++) { + struct tpm_hash_algorithm *hash = (struct tpm_hash_algorithm *)current; + switch (le16toh(hash->hashAlg)) { + case TPM2_ALG_SHA1: + printf("\t\t SHA1: "); + print_hex(hash->digest.sha1, SHA1_DIGEST_SIZE); + current += sizeof(hash->hashAlg) + SHA1_DIGEST_SIZE; + break; + case TPM2_ALG_SHA256: + printf("\t\t SHA256: "); + print_hex(hash->digest.sha256, SHA256_DIGEST_SIZE); + current += sizeof(hash->hashAlg) + SHA256_DIGEST_SIZE; + break; + case TPM2_ALG_SHA384: + printf("\t\t SHA384: "); + print_hex(hash->digest.sha384, SHA384_DIGEST_SIZE); + current += sizeof(hash->hashAlg) + SHA384_DIGEST_SIZE; + break; + case TPM2_ALG_SHA512: + printf("\t\t SHA512: "); + print_hex(hash->digest.sha512, SHA512_DIGEST_SIZE); + current += sizeof(hash->hashAlg) + SHA512_DIGEST_SIZE; + break; + case TPM2_ALG_SM3_256: + printf("\t\t SM3: "); + print_hex(hash->digest.sm3_256, SM3_256_DIGEST_SIZE); + current += sizeof(hash->hashAlg) + SM3_256_DIGEST_SIZE; + break; + default: + die("Unknown hash algorithm\n"); + } + } + + return current - (uintptr_t)&log_entry->digest_count; +} + +static void parse_tpm2_log(const struct tcg_efi_spec_id_event *tpm2_log) +{ + static uint8_t zero_block[12]; /* Only PCR index, event type and digest count */ + + uintptr_t current; + uint32_t counter = 0; + + printf("TPM2 log:\n"); + printf("\tSpecification: %d.%d%d\n", + tpm2_log->spec_version_major, + tpm2_log->spec_version_minor, + tpm2_log->spec_errata); + printf("\tPlatform class: %s\n", + le32toh(tpm2_log->platform_class) == 0 ? "PC Client" : + le32toh(tpm2_log->platform_class) == 1 ? "Server" : "Unknown"); + + /* Start after the first variable-sized part of the header */ + current = (uintptr_t)&tpm2_log->digest_sizes[le32toh(tpm2_log->num_of_algorithms)]; + /* current is at `uint8_t vendor_info_size` here */ + current += 1 + *(uint8_t *)current; + + while (memcmp((const void *)current, (const void *)zero_block, sizeof(zero_block))) { + uint32_t len; + struct tcg_pcr_event2_header *log_entry = (void *)current; + uint32_t event_type = le32toh(log_entry->event_type); + + printf("TPM2 log entry %u:\n", ++counter); + printf("\tPCR: %d\n", le32toh(log_entry->pcr_index)); + if (event_type >= ARRAY_SIZE(tpm_event_types)) + printf("\tEvent type: Unknown (0x%x)\n", event_type); + else + printf("\tEvent type: %s\n", tpm_event_types[event_type]); + + current = (uintptr_t)&log_entry->digest_count; + if (le32toh(log_entry->digest_count) > 0) { + printf("\tDigests:\n"); + current += print_tpm2_digests(log_entry); + } else { + printf("\tNo digests in this log entry\n"); + current += sizeof(log_entry->digest_count); + } + /* Now event size and event are left to be parsed */ + len = le32toh(*(uint32_t *)current); + current += sizeof(uint32_t); + if (len != 0) { + printf("\tEvent data: %d %.*s\n", len, len, (const char *)current); + current += len; + } else { + printf("\tEvent data not provided\n"); + } + } +} + +/* Dump the TPM log table in format defined by specifications */ +static void dump_tpm_log(void) +{ + const void *event_log; + const struct tcpa_spec_entry *tspec_entry; + const struct tcg_efi_spec_id_event *tcg_spec_entry; + uint64_t addr; + size_t size; + struct mapping log_mapping; + + if (tpm_std_log.tag != LB_TAG_TPM_STD_LOG) { + fprintf(stderr, "No TPM log found in coreboot table.\n"); + return; + } + + addr = tpm_std_log.range_start; + size = tpm_std_log.range_size; + + event_log = map_memory(&log_mapping, addr, size); + if (!event_log) + die("Unable to map TPM eventlog\n"); + + tspec_entry = event_log; + if (!strcmp((const char *)tspec_entry->signature, TCPA_SPEC_ID_EVENT_SIGNATURE)) { + if (tspec_entry->spec_version_major == 1 && + tspec_entry->spec_version_minor == 2 && + tspec_entry->spec_errata >= 1 && + le32toh(tspec_entry->entry.event_type) == EV_NO_ACTION) { + parse_tpm12_log(tspec_entry); + } else { + fprintf(stderr, "Unknown TPM1.2 log specification\n"); + } + unmap_memory(&log_mapping); + return; + } + + tcg_spec_entry = event_log; + if (!strcmp((const char *)tcg_spec_entry->signature, TCG_EFI_SPEC_ID_EVENT_SIGNATURE)) { + if (tcg_spec_entry->spec_version_major == 2 && + tcg_spec_entry->spec_version_minor == 0 && + le32toh(tcg_spec_entry->event_type) == EV_NO_ACTION) { + parse_tpm2_log(tcg_spec_entry); + } else { + fprintf(stderr, "Unknown TPM2 log specification.\n"); + } + unmap_memory(&log_mapping); + return; + } + + fprintf(stderr, "Unknown TPM log specification: %.*s\n", + (int)sizeof(tcg_spec_entry->signature), + (const char *)tcg_spec_entry->signature); + + unmap_memory(&log_mapping); +} + +/* dump the TPM CB log table */ +static void dump_tpm_cb_log(void) +{ + const struct tpm_cb_log_table *tclt_p; size_t size; - struct mapping tcpa_mapping; + struct mapping log_mapping; - if (tcpa_log.tag != LB_TAG_TCPA_LOG) { - fprintf(stderr, "No tcpa log found in coreboot table.\n"); + if (tpm_cb_log.tag != LB_TAG_TPM_CB_LOG) { + fprintf(stderr, "No TPM log found in coreboot table.\n"); return; } size = sizeof(*tclt_p); - tclt_p = map_memory(&tcpa_mapping, tcpa_log.cbmem_addr, size); + tclt_p = map_memory(&log_mapping, tpm_cb_log.cbmem_addr, size); if (!tclt_p) - die("Unable to map tcpa log header\n"); + die("Unable to map TPM log header\n"); size += tclt_p->num_entries * sizeof(tclt_p->entries[0]); - unmap_memory(&tcpa_mapping); + unmap_memory(&log_mapping); - tclt_p = map_memory(&tcpa_mapping, tcpa_log.cbmem_addr, size); + tclt_p = map_memory(&log_mapping, tpm_cb_log.cbmem_addr, size); if (!tclt_p) - die("Unable to map full tcpa log table\n"); + die("Unable to map full TPM log table\n"); - printf("coreboot TCPA log:\n\n"); + printf("coreboot TPM log:\n\n"); for (uint16_t i = 0; i < tclt_p->num_entries; i++) { - const struct tcpa_entry *tce = &tclt_p->entries[i]; + const struct tpm_cb_log_entry *tce = &tclt_p->entries[i]; printf(" PCR-%u ", tce->pcr); @@ -881,7 +1155,7 @@ static void dump_tcpa_log(void) printf(" %s [%s]\n", tce->digest_type, tce->name); } - unmap_memory(&tcpa_mapping); + unmap_memory(&log_mapping); } struct cbmem_console { @@ -1339,7 +1613,8 @@ static void print_usage(const char *name, int exit_code) " -T | --parseable-timestamps: print parseable timestamps\n" " -S | --stacked-timestamps: print stacked timestamps (e.g. for flame graph tools)\n" " -a | --add-timestamp ID: append timestamp with ID\n" - " -L | --tcpa-log print TCPA log\n" + " -L | --tcpa-log print TPM log\n" + " -f | --file FILE: read CBMEM from FILE instead of memory\n" " -V | --verbose: verbose (debugging) output\n" " -v | --version: print the version\n" " -h | --help: print this help\n" @@ -1493,12 +1768,13 @@ int main(int argc, char** argv) {"add-timestamp", required_argument, 0, 'a'}, {"hexdump", 0, 0, 'x'}, {"rawdump", required_argument, 0, 'r'}, + {"file", required_argument, 0, 'f'}, {"verbose", 0, 0, 'V'}, {"version", 0, 0, 'v'}, {"help", 0, 0, 'h'}, {0, 0, 0, 0} }; - while ((opt = getopt_long(argc, argv, "c12B:CltTSa:LxVvh?r:", + while ((opt = getopt_long(argc, argv, "c12B:CltTSa:LxVvh?r:f:", long_options, &option_index)) != EOF) { switch (opt) { case 'c': @@ -1539,6 +1815,14 @@ int main(int argc, char** argv) print_defaults = 0; rawdump_id = strtoul(optarg, NULL, 16); break; + case 'f': + file_fd = open(optarg, O_RDONLY, 0); + if (file_fd < 0) { + fprintf(stderr, "Failed to open file '%s': %s\n", + optarg, strerror(errno)); + return 1; + } + break; case 't': timestamp_type = TIMESTAMPS_PRINT_NORMAL; print_defaults = 0; @@ -1580,11 +1864,18 @@ int main(int argc, char** argv) print_usage(argv[0], 1); } - mem_fd = open("/dev/mem", timestamp_id ? O_RDWR : O_RDONLY, 0); - if (mem_fd < 0) { - fprintf(stderr, "Failed to gain memory access: %s\n", - strerror(errno)); - return 1; + /* Check if Linux driver exposes CBMEM in sysfs. */ + if (file_fd < 0) { + file_fd = open("/sys/firmware/cbmem", O_RDONLY, 0); + } + + if (file_fd < 0) { + mem_fd = open("/dev/mem", timestamp_id ? O_RDWR : O_RDONLY, 0); + if (mem_fd < 0) { + fprintf(stderr, "Failed to gain memory access: %s\n", + strerror(errno)); + return 1; + } } #if defined(__arm__) || defined(__aarch64__) @@ -1676,11 +1967,19 @@ int main(int argc, char** argv) if (timestamp_type != TIMESTAMPS_PRINT_NONE) dump_timestamps(timestamp_type); - if (print_tcpa_log) - dump_tcpa_log(); + if (print_tcpa_log) { + if (tpm_std_log.tag != LB_TAG_UNUSED) + dump_tpm_log(); + else + dump_tpm_cb_log(); + } unmap_memory(&lbtable_mapping); - close(mem_fd); + if (file_fd >= 0) + close(file_fd); + if (mem_fd >= 0) + close(mem_fd); + return 0; }