diff --git a/Makefile b/Makefile index 0662f56..becef8c 100644 --- a/Makefile +++ b/Makefile @@ -2,21 +2,26 @@ GAS = as GAS_FLAGS = --32 LD = ld -LD_FLAGS = --oformat binary -m elf_i386 -s -nostdlib +LD_FLAGS = -m i386pe -s -nostdlib --image-base 0 + +OBJCOPY = objcopy CC = gcc CC_FLAGS = -m32 -march=i386 -fno-pic -static -fno-asynchronous-unwind-tables \ -fno-stack-protector -ffreestanding -nostdlib -O2 -DD = dd +PS = powershell -NoProfile -Command +QEMU ?= C:/Program Files/qemu/qemu-system-i386.exe +STAGE2_SECTORS = 127 +IMAGE_SIZE_MB = 64 BUILD_DIR = ./build SRC_DIR = ./src # not files -.PHONY: all clean run +.PHONY: all clean run install check-stage2-size # default option: make .img file @@ -25,22 +30,22 @@ all: $(BUILD_DIR)/bootloader.img # get .img from stages $(BUILD_DIR)/bootloader.img: $(BUILD_DIR)/fstage.bin $(BUILD_DIR)/sstage.bin - # create disk img - $(DD) if=/dev/zero of=$@ bs=1M count=64 - # write Stage 1 - $(DD) if=$(BUILD_DIR)/fstage.bin of=$@ bs=1 count=446 conv=notrunc - # write Stage 2 + 2.5 - $(DD) if=$(BUILD_DIR)/sstage.bin of=$@ bs=512 seek=1 conv=notrunc + $(PS) "$$fs=[IO.File]::Open('$@',[IO.FileMode]::Create,[IO.FileAccess]::ReadWrite); $$fs.SetLength($(IMAGE_SIZE_MB)MB); $$fs.Close()" + $(PS) "$$b=[IO.File]::ReadAllBytes('$(BUILD_DIR)/fstage.bin'); $$fs=[IO.File]::Open('$@',[IO.FileMode]::Open,[IO.FileAccess]::ReadWrite); $$fs.Write($$b,0,446); $$fs.Seek(510,[IO.SeekOrigin]::Begin) > $$null; $$fs.Write($$b,510,2); $$fs.Close()" + $(PS) "$$b=[IO.File]::ReadAllBytes('$(BUILD_DIR)/sstage.bin'); $$fs=[IO.File]::Open('$@',[IO.FileMode]::Open,[IO.FileAccess]::ReadWrite); $$fs.Seek(512,[IO.SeekOrigin]::Begin) > $$null; $$fs.Write($$b,0,$$b.Length); $$fs.Close()" # link asm code to .bin $(BUILD_DIR)/fstage.bin: $(BUILD_DIR)/fstage.o - $(LD) $(LD_FLAGS) -Ttext 0x7c00 -o $@ $< + $(LD) $(LD_FLAGS) -Ttext 0x7c00 -e _start -o $(BUILD_DIR)/fstage.pe $< + $(OBJCOPY) -O binary -j .text $(BUILD_DIR)/fstage.pe $@ # link stage 2 asm and C code to .bin $(BUILD_DIR)/sstage.bin: $(BUILD_DIR)/sstage.o $(BUILD_DIR)/sstagec.o - $(LD) $(LD_FLAGS) -Ttext 0x8000 -o $@ $^ + $(LD) $(LD_FLAGS) -Ttext 0x7e00 -e stage2_start -o $(BUILD_DIR)/sstage.pe $^ + $(OBJCOPY) -O binary $(BUILD_DIR)/sstage.pe $@ + powershell -NoProfile -Command "if ((Get-Item '$@').Length -gt ($(STAGE2_SECTORS) * 512)) { throw 'Stage 2 is larger than $(STAGE2_SECTORS) sectors' }" # compile C code to .o @@ -55,14 +60,22 @@ $(BUILD_DIR)/%.o: $(SRC_DIR)/*/%.asm | $(BUILD_DIR) # create ./build folder if not found $(BUILD_DIR): - mkdir -p $(BUILD_DIR) + if not exist "$(BUILD_DIR)" mkdir "$(BUILD_DIR)" + + +# install loader into an existing raw disk image with an ext2 Linux partition. +# Usage: make install DISK=debian-ext2.img +install: $(BUILD_DIR)/fstage.bin $(BUILD_DIR)/sstage.bin + @if "$(DISK)" == "" (echo DISK is not set. Usage: make install DISK=debian-ext2.img & exit /b 1) + $(PS) "$$b=[IO.File]::ReadAllBytes('$(BUILD_DIR)/fstage.bin'); $$fs=[IO.File]::Open('$(DISK)',[IO.FileMode]::Open,[IO.FileAccess]::ReadWrite); $$fs.Write($$b,0,446); $$fs.Seek(510,[IO.SeekOrigin]::Begin) > $$null; $$fs.Write($$b,510,2); $$fs.Close()" + $(PS) "$$b=[IO.File]::ReadAllBytes('$(BUILD_DIR)/sstage.bin'); $$fs=[IO.File]::Open('$(DISK)',[IO.FileMode]::Open,[IO.FileAccess]::ReadWrite); $$fs.Seek(512,[IO.SeekOrigin]::Begin) > $$null; $$fs.Write($$b,0,$$b.Length); $$fs.Close()" # run bootloader.img via qemu run: $(BUILD_DIR)/bootloader.img - qemu-system-i386 -drive format=raw,file=$< + "$(QEMU)" -m 256M -drive format=raw,file=$< -serial stdio # cleanup build artifacts clean: - rm -rf $(BUILD_DIR) + if exist "$(BUILD_DIR)" rmdir /s /q "$(BUILD_DIR)" diff --git a/README.md b/README.md index 9ff3715..844008c 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,47 @@ # PSLBoot Polytech Simple Linux Bootloader **(WIP)** ___ -# Notes -- **TODO**: Update Makefile when sources are complete. +# What works + +PSLBoot can load a Linux bzImage and initrd from the first Linux/ext2 +partition of a raw QEMU IDE disk. Stage 2 is stored in the post-MBR gap +starting at LBA 1, so create the first partition at the usual 1 MiB offset +or later. + +# Build + +```sh +make +``` + +This creates `build/bootloader.img`, mostly useful as a build artifact sanity +check. For a real Debian disk image, install the loader into an existing raw +image: + +```sh +make install DISK=debian-ext2.img +``` + +The install target preserves the partition table, writes MBR boot code, writes +the boot signature, and writes Stage 2 at LBA 1. + +# Debian/ext2 disk expectations + +- QEMU disk must be attached as the primary IDE disk. +- The first Linux partition must contain an ext2 filesystem, not ext4-with-extents. +- Kernel/initrd are searched as `/boot/vmlinuz`, `/boot/initrd.img`, then by + `vmlinuz*` and `initrd.img*` inside `/boot`. +- Kernel command line is currently built in as: + `root=/dev/sda1 ro console=tty0 console=ttyS0,115200n8`. + +Example run after installing into a prepared disk: + +```sh +make run +``` + +or directly: + +```sh +"C:/Program Files/qemu/qemu-system-i386.exe" -m 256M -drive format=raw,file=debian-ext2.img -serial stdio +``` diff --git a/src/stage-1/fstage.asm b/src/stage-1/fstage.asm index cd099f6..f92a409 100644 --- a/src/stage-1/fstage.asm +++ b/src/stage-1/fstage.asm @@ -1,82 +1,129 @@ -################################################################################ -# -# fstage.asm -# -# OS CW26-1. Polytech Simple Linux Bootloader. Stage 1. -# -# -# Recommended build command: -# $ as --32 -o fstage.o fstage.asm -# $ ld -Ttext 0x7c00 --oformat binary -m elf_i386 -o fstage.bin fstage.o -# -################################################################################ - - -.code16 -.att_syntax - -.section .text -.globl _start - - -_start: - # ==== init - cli - xor %ax, %ax - mov %ax, %ds - mov %ax, %es - mov %ax, %ss - mov $0x7C00, %sp - sti - - mov %dl, boot_drive # saving boot drive number - - # ==== check extensions present - mov $0x41, %ah - mov boot_drive, %dl - mov $0x55AA, %bx - int $0x13 - jc disk_error - - # ==== loading Stage 2 from boot drive - lea dap, %si - mov $0x42, %ah - mov boot_drive, %dl - int $0x13 - jc disk_error - - ljmp $0x0000, $0x7E00 # passing control to Stage 2 - - -disk_error: - mov $err_msg, %si -.loop: - lodsb - or %al, %al - jz hang - xor %bh, %bh - mov $0x0E, %ah - int $0x10 - jmp .loop -hang: - hlt - jmp hang - - -dap: # Disk Address Packet for int 0x13 (ah = 0x42) - .byte 0x10 # struct size - .byte 0x00 # reserved - .word 16 # sector amt - .word 0x7E00 # offset - .word 0x0000 # segment - .quad 1 # LBA-address - -boot_drive: .byte 0 -err_msg: .asciz "Disk error!" - - -.zero (446 - (. - _start)) # padding before partition table - -.zero 64 # partition table (manual filling) - -.word 0xAA55 # bootsect signature +################################################################################ +# +# fstage.asm +# +# OS CW26-1. Polytech Simple Linux Bootloader. Stage 1. +# +# +# Recommended build command: +# $ as --32 -o fstage.o fstage.asm +# $ ld -Ttext 0x7c00 --oformat binary -m elf_i386 -o fstage.bin fstage.o +# +################################################################################ + + +.code16 +.att_syntax + +.section .text +.globl _start + +# Where we stash the BIOS E820 memory map for Stage 2: +# 0x4000 : dword, number of entries +# 0x4004 + n*20 : entries (each: u64 addr, u64 size, u32 type) +E820_COUNT = 0x4000 +E820_ENTRIES = 0x4004 + + +_start: + # ==== init + cli + xor %ax, %ax + mov %ax, %ds + mov %ax, %es + mov %ax, %ss + mov $0x7C00, %sp + sti + + mov %dl, boot_drive # saving boot drive number + + # ==== collect BIOS E820 memory map (int 0x15, eax=0xE820) + call do_e820 + + # ==== check extensions present + mov $0x41, %ah + mov boot_drive, %dl + mov $0x55AA, %bx + int $0x13 + jc disk_error + + # ==== loading Stage 2 from boot drive + lea dap, %si + mov $0x42, %ah + mov boot_drive, %dl + int $0x13 + jc disk_error + + ljmp $0x0000, $0x7E00 # passing control to Stage 2 + + +# ==== Build the E820 map at E820_ENTRIES, count at E820_COUNT. +do_e820: + push %es + pusha + xor %ax, %ax + mov %ax, %es + mov $E820_ENTRIES, %di # es:di -> destination buffer + xor %ebx, %ebx # continuation = 0 + xor %bp, %bp # entry counter + mov $0x534D4150, %edx # 'SMAP' + mov $0xE820, %eax + mov $24, %ecx + int $0x15 + jc .e820_done # carry on first call => unsupported + cmp $0x534D4150, %eax # BIOS must return 'SMAP' in eax + jne .e820_done + jmp .e820_check +.e820_loop: + mov $0xE820, %eax + mov $24, %ecx + int $0x15 + jc .e820_done # carry => end of list +.e820_check: + jcxz .e820_skip # length-0 entry => skip + inc %bp + add $20, %di # advance to next 20-byte slot +.e820_skip: + test %ebx, %ebx # ebx == 0 => that was the last entry + jz .e820_done + jmp .e820_loop +.e820_done: + movzwl %bp, %eax + mov %eax, (E820_COUNT) + popa + pop %es + ret + + +disk_error: + mov $err_msg, %si +.loop: + lodsb + or %al, %al + jz hang + xor %bh, %bh + mov $0x0E, %ah + int $0x10 + jmp .loop +hang: + hlt + jmp hang + + +dap: # Disk Address Packet for int 0x13 (ah = 0x42) + .byte 0x10 # struct size + .byte 0x00 # reserved + .word 127 # sector amt + .word 0x7E00 # offset + .word 0x0000 # segment + .quad 1 # LBA-address + +boot_drive: .byte 0 +err_msg: .asciz "Disk error!" + + +.zero (446 - (. - _start)) # padding before partition table + +.zero 64 # partition table (manual filling) + +.word 0xAA55 # bootsect signature \ No newline at end of file diff --git a/src/stage-2/sstage.asm b/src/stage-2/sstage.asm index 53f54be..2f5348b 100644 --- a/src/stage-2/sstage.asm +++ b/src/stage-2/sstage.asm @@ -1,70 +1,90 @@ -#=============================================================================== -# -# sstage.asm -# -# OS CW26-1. Polytech Simple Linux Bootloader. Stage 2. -# -# ENSURE THAT CODE STARTS AT 0x7E00 !!! -# -#=============================================================================== - - -.code16 -.att_syntax - -.text -.globl stage2_start -.extern stage2_main - -stage2_start: - cli # interrupts atp will cause triple fault - - # ==== Enable A20 - in $0x92, %al - or $0x02, %al - and $0xFE, %al # protect System Reset bit - out %al, $0x92 - - # check if A20 is actually enabled?? - - lgdt gdt_descriptor # load gdt - - # ==== Switch to protected mode - mov %cr0, %eax - or $1, %eax - mov %eax, %cr0 - ljmp $CODE_SEG, $protected_mode - - -.code32 -protected_mode: - mov $DATA_SEG, %ax - mov %ax, %ds - mov %ax, %ss - mov %ax, %es - mov %ax, %fs - mov %ax, %gs - mov $0x90000, %esp - - call stage2_main - hlt - - -# ==== GDT -gdt_start: - .quad 0 # null dsc -gdt_code: # Code Segment - .word 0xFFFF, 0x0000 - .byte 0x00, 0b10011010, 0b11001111, 0x00 -gdt_data: # Data Segment - .word 0xFFFF, 0x0000 - .byte 0x00, 0b10010010, 0b11001111, 0x00 -gdt_end: - -gdt_descriptor: - .word gdt_end - gdt_start - 1 - .long gdt_start - - -CODE_SEG = gdt_code - gdt_start -DATA_SEG = gdt_data - gdt_start +#=============================================================================== +# +# sstage.asm +# +# OS CW26-1. Polytech Simple Linux Bootloader. Stage 2. +# +# ENSURE THAT CODE STARTS AT 0x7E00 !!! +# +#=============================================================================== + + +.code16 +.att_syntax + +.text +.globl stage2_start +.globl _jump_linux +.extern _stage2_main + +stage2_start: + cli # interrupts atp will cause triple fault + + # ==== Enable A20 + in $0x92, %al + or $0x02, %al + and $0xFE, %al # protect System Reset bit + out %al, $0x92 + + # check if A20 is actually enabled?? + + lgdt gdt_descriptor # load gdt + + # ==== Switch to protected mode + mov %cr0, %eax + or $1, %eax + mov %eax, %cr0 + ljmp $CODE_SEG, $protected_mode + + +.code32 +protected_mode: + mov $DATA_SEG, %ax + mov %ax, %ds + mov %ax, %ss + mov %ax, %es + mov %ax, %fs + mov %ax, %gs + mov $0x70000, %esp + + call _stage2_main + hlt + +_jump_linux: + mov 4(%esp), %eax + mov 8(%esp), %esi + mov $DATA_SEG, %dx + mov %dx, %ds + mov %dx, %es + mov %dx, %fs + mov %dx, %gs + mov %dx, %ss + mov $0x80000, %esp + xor %ebp, %ebp + xor %edi, %edi + xor %ebx, %ebx + cld + jmp *%eax + + +# ==== GDT +# Linux 32-bit boot protocol expects __BOOT_CS = 0x10 and __BOOT_DS = 0x18. +# So we add a dummy entry to push code to offset 0x10 and data to 0x18. +gdt_start: + .quad 0 # null dsc (0x00) + .quad 0 # dummy (0x08) +gdt_code: # Code Segment (0x10) + .word 0xFFFF, 0x0000 + .byte 0x00, 0b10011010, 0b11001111, 0x00 +gdt_data: # Data Segment (0x18) + .word 0xFFFF, 0x0000 + .byte 0x00, 0b10010010, 0b11001111, 0x00 +gdt_end: + +gdt_descriptor: + .word gdt_end - gdt_start - 1 + .long gdt_start + + +CODE_SEG = gdt_code - gdt_start +DATA_SEG = gdt_data - gdt_start \ No newline at end of file diff --git a/src/stage-2/sstagec.c b/src/stage-2/sstagec.c index 8c645a9..ef791c3 100644 --- a/src/stage-2/sstagec.c +++ b/src/stage-2/sstagec.c @@ -1,33 +1,60 @@ /* + * PSLBoot. Stage 2.5. * - * PSLBoot. Stage 2.5. - * - * WIP - * + * Minimal ext2 + Linux bzImage loader for a QEMU IDE disk. */ +#include + +#define SECTOR_SIZE 512u +#define MAX_BLOCK_SIZE 4096u +#define EXT2_SUPER_LBA 2u +#define EXT2_MAGIC 0xEF53u +#define EXT2_ROOT_INO 2u +#define EXT2_NDIR_BLOCKS 12u +#define EXT2_IND_BLOCK EXT2_NDIR_BLOCKS +#define EXT2_DIND_BLOCK (EXT2_IND_BLOCK + 1u) +#define EXT2_N_BLOCKS 15u +#define EXT2_S_IFMT 0xF000u +#define EXT2_S_IFLNK 0xA000u +#define EXT2_S_IFREG 0x8000u +#define EXT2_S_IFDIR 0x4000u +#define EXT2_FEATURE_INCOMPAT_EXTENTS 0x0040u + +#define MBR_PART_TABLE 446u +#define MBR_PART_SIZE 16u +#define MBR_PART_TYPE_LINUX 0x83u + +#define KERNEL_LOAD_ADDR 0x100000u +#define INITRD_LOAD_ADDR 0x4000000u +#define BOOT_PARAMS_ADDR 0x90000u +#define CMDLINE_ADDR 0x91000u + +#define ZP_SETUP_SECTS 0x1F1u +#define ZP_HEADER 0x202u +#define ZP_VERSION 0x206u +#define ZP_TYPE_OF_LOADER 0x210u +#define ZP_LOADFLAGS 0x211u +#define ZP_CODE32_START 0x214u +#define ZP_RAMDISK_IMAGE 0x218u +#define ZP_RAMDISK_SIZE 0x21Cu +#define ZP_HEAP_END_PTR 0x224u +#define ZP_CMD_LINE_PTR 0x228u +#define ZP_INITRD_ADDR_MAX 0x22Cu + +/* E820 memory map in the zeropage. */ +#define ZP_E820_ENTRIES 0x1E8u /* one byte: number of entries */ +#define ZP_E820_TABLE 0x2D0u /* array of 20-byte e820 entries */ +#define E820_MAX_ENTRIES 128u + +/* Where Stage 1 stashed the BIOS map (must match fstage.asm). */ +#define E820_SRC_COUNT 0x4000u +#define E820_SRC_ENTRIES 0x4004u + +#define LOADED_HIGH 0x01u +#define CAN_USE_HEAP 0x80u -#include // freestanding - - -// constants used in structs -#define EXT2_NDIR_BLOCKS 12 -#define EXT2_IND_BLOCK EXT2_NDIR_BLOCKS -#define EXT2_DIND_BLOCK (EXT2_IND_BLOCK + 1) -#define EXT2_TIND_BLOCK (EXT2_DIND_BLOCK + 1) -#define EXT2_N_BLOCKS (EXT2_TIND_BLOCK + 1) - -#define KERNEL_LOAD_ADDR 0x100000 // 1 MB -#define INITRD_LOAD_ADDR 0x1000000 // 16 MB -#define SECTOR_SIZE 512 -#define EXT2_SUPER_OFFSET 1024 - - -// refer to https://wiki.osdev.org/Ext2 -// https://github.com/torvalds/linux/blob/master/fs/ext2/ext2.h#L410 typedef struct { - // BASE FIELDS - uint32_t s_inodes_count; uint32_t s_blocks_count; uint32_t s_r_blocks_count; @@ -41,243 +68,694 @@ typedef struct { uint32_t s_inodes_per_group; uint32_t s_mtime; uint32_t s_wtime; - uint16_t s_mnt_count; uint16_t s_max_mnt_count; uint16_t s_magic; uint16_t s_state; uint16_t s_errors; uint16_t s_minor_rev_level; - uint32_t s_lastcheck; uint32_t s_checkinterval; uint32_t s_creator_os; uint32_t s_rev_level; - uint16_t s_def_resuid; uint16_t s_def_resgid; - - // EXTENDED FIELDS - uint32_t s_first_ino; - uint16_t s_inode_size; uint16_t s_block_group_nr; - uint32_t s_feature_compat; uint32_t s_feature_incompat; uint32_t s_feature_ro_compat; - uint8_t s_uuid[16]; - char s_volume_name[16]; char s_last_mounted[64]; - uint32_t s_algorithm_usage_bitmap; - uint8_t s_prealloc_blocks; uint8_t s_prealloc_dir_blocks; - uint16_t s_padding1; - uint8_t s_journal_uuid[16]; - uint32_t s_journal_inum; uint32_t s_journal_dev; uint32_t s_last_orphan; - uint32_t s_hash_seed[4]; - uint8_t s_def_hash_version; - - uint8_t s_reserved_char_pad; // 3 bytes total + uint8_t s_reserved_char_pad; uint16_t s_reserved_word_pad; - uint32_t s_default_mount_opts; uint32_t s_first_meta_bg; + uint32_t s_reserved[190]; +} __attribute__((packed)) ext2_super_block_t; - uint32_t s_reserved[190]; // 760 bytes total -} __attribute__((packed)) ext2_super_block_t; // ext2_super_block in kernel - -// https://github.com/torvalds/linux/blob/master/fs/ext2/ext2.h#L191 typedef struct { uint32_t bg_block_bitmap; uint32_t bg_inode_bitmap; uint32_t bg_inode_table; - uint16_t bg_free_blocks_count; uint16_t bg_free_inodes_count; uint16_t bg_used_dirs_count; uint16_t bg_pad; - - uint32_t bg_reserved[3]; // 12 bytes total -} __attribute__((packed)) ext2_bgd_t; // ext2_group_desc in kernel + uint32_t bg_reserved[3]; +} __attribute__((packed)) ext2_bgd_t; -// https://github.com/torvalds/linux/blob/master/fs/ext2/ext2.h#L290 typedef struct { uint16_t i_mode; uint16_t i_uid; - uint32_t i_size; uint32_t i_atime; uint32_t i_ctime; uint32_t i_mtime; uint32_t i_dtime; - uint16_t i_gid; uint16_t i_links_count; - uint32_t i_blocks; uint32_t i_flags; - - uint32_t l_i_reserved1; // OS dependent; Linux assumed - + uint32_t l_i_reserved1; uint32_t i_block[EXT2_N_BLOCKS]; - uint32_t i_generation; uint32_t i_file_acl; uint32_t i_dir_acl; uint32_t i_faddr; - - // OS DEPENDENT 2 (LINUX) - uint8_t l_i_frag; uint8_t l_i_fsize; - uint16_t i_pad1; uint16_t l_i_uid_high; uint16_t l_i_gid_high; - uint32_t l_i_reserved2; -} __attribute__((packed)) ext2_inode_t; // ext2_inode in kernel +} __attribute__((packed)) ext2_inode_t; +typedef struct { + uint32_t inode; + uint16_t rec_len; + uint8_t name_len; + uint8_t file_type; + char name[]; +} __attribute__((packed)) ext2_dirent_t; -// should be implemented in asm -// TODO: check asm implementations -extern void disk_read(uint32_t lba, uint32_t count, void* buf); -extern void* memcpy(void* dst, const void* src, uint32_t n); - +/* + * Transfers control to the 32-bit Linux kernel entry point. + * Args: entry is the kernel entry address, boot_params is the boot params address. + * Returns: never returns. + */ +extern void jump_linux(uint32_t entry, uint32_t boot_params); static ext2_super_block_t super; +static uint32_t partition_lba; static uint32_t block_size; +static uint32_t inode_size; +static uint8_t block_buf[MAX_BLOCK_SIZE]; +static uint8_t block_buf2[MAX_BLOCK_SIZE]; +static uint8_t sector_buf[SECTOR_SIZE]; +/* + * Writes one byte to an I/O port. + * Args: port is the I/O port number, value is the byte to write. + * Returns: nothing. + */ +static inline void outb(uint16_t port, uint8_t value) { + __asm__ volatile ("outb %0, %1" : : "a"(value), "Nd"(port)); +} -void read_block(uint32_t block_num, void* buf) { - uint32_t lba = (block_num * block_size) / SECTOR_SIZE; - uint32_t sectors = block_size / SECTOR_SIZE; +/* + * Reads one byte from an I/O port. + * Args: port is the I/O port number. + * Returns: the byte read from the port. + */ +static inline uint8_t inb(uint16_t port) { + uint8_t value; + __asm__ volatile ("inb %1, %0" : "=a"(value) : "Nd"(port)); + return value; +} - disk_read(lba, sectors, buf); +/* + * Reads words from an I/O port into memory. + * Args: port is the I/O port number, addr is the destination buffer, count is the word count. + * Returns: nothing. + */ +static inline void insw(uint16_t port, void *addr, uint32_t count) { + __asm__ volatile ("cld; rep insw" + : "+D"(addr), "+c"(count) + : "d"(port) + : "memory"); } +/* + * Copies bytes from one memory area to another. + * Args: dst is the destination, src is the source, n is the byte count. + * Returns: dst. + */ +static void *memcpy(void *dst, const void *src, uint32_t n) { + uint8_t *d = (uint8_t*)dst; + const uint8_t *s = (const uint8_t*)src; + while (n--) *d++ = *s++; + return dst; +} -ext2_inode_t get_inode(uint32_t inode_num) { - uint32_t inodes_per_group = super.s_inodes_count; - uint32_t group = (inode_num - 1) / inodes_per_group; +/* + * Fills a memory area with a byte value. + * Args: dst is the destination, value is the byte value, n is the byte count. + * Returns: dst. + */ +static void *memset(void *dst, int value, uint32_t n) { + uint8_t *d = (uint8_t*)dst; + while (n--) *d++ = (uint8_t)value; + return dst; +} - uint8_t bgd_buf[block_size]; - read_block(super.s_first_data_block + 1, bgd_buf); - ext2_bgd_t* bgd = (ext2_bgd_t*)bgd_buf + group; +/* + * Computes the length of a null-terminated string. + * Args: s is the string to measure. + * Returns: the number of bytes before the null terminator. + */ +static uint32_t strlen(const char *s) { + uint32_t n = 0; + while (s[n]) n++; + return n; +} + +/* + * Compares a fixed-length name with a null-terminated string. + * Args: a is the fixed-length name, b is the string, n is the length of a. + * Returns: nonzero if a matches b exactly, otherwise zero. + */ +static int streqn(const char *a, const char *b, uint32_t n) { + uint32_t i; + for (i = 0; i < n; i++) { + if (a[i] != b[i] || b[i] == 0) return 0; + } + return b[n] == 0; +} - // reading inode table - uint8_t inode_buf[block_size]; - uint32_t inode_offset = ((inode_num - 1) % inodes_per_group) * 128; - read_block(bgd->bg_inode_table + inode_offset / block_size, inode_buf); +/* + * Checks whether a fixed-length name starts with a prefix. + * Args: name is the fixed-length name, name_len is its length, prefix is the string prefix. + * Returns: nonzero if name starts with prefix, otherwise zero. + */ +static int startswith(const char *name, uint32_t name_len, const char *prefix) { + uint32_t i = 0; + while (prefix[i]) { + if (i >= name_len || name[i] != prefix[i]) return 0; + i++; + } + return 1; +} - ext2_inode_t inode; - memcpy(&inode, inode_buf + (inode_offset % block_size), sizeof(inode)); - - return inode; +/* + * Prints a string to VGA text memory and COM1. + * Args: s is the null-terminated string to print. + * Returns: nothing. + */ +static void puts(const char *s) { + volatile uint16_t *vga = (volatile uint16_t*)0xB8000; + static uint32_t pos = 0; + while (*s) { + while ((inb(0x3F8 + 5) & 0x20u) == 0) {} + outb(0x3F8, (uint8_t)*s); + if (*s == '\n') { + pos = (pos / 80u + 1u) * 80u; + } else { + vga[pos++] = (uint16_t)(0x0700u | (uint8_t)*s); + } + if (pos >= 80u * 25u) pos = 0; + s++; + } } +/* + * Prints a fatal error message and halts the CPU. + * Args: msg is the error message to print. + * Returns: never returns. + */ +static void die(const char *msg) { + puts("PSLBoot: "); + puts(msg); + puts("\n"); + for (;;) __asm__ volatile ("hlt"); +} -// boot parameters are passed via Linux Boot Protocol -// https://github.com/torvalds/linux/blob/master/arch/x86/include/uapi/asm/bootparam.h -typedef struct { - uint8_t setup_sects; - - uint16_t root_flags; - - uint32_t syssize; - - uint16_t ram_size; - uint16_t vid_mode; - uint16_t root_dev; - uint16_t boot_flag; - uint16_t jump; - - uint32_t header; - - uint16_t version; - - uint32_t realmode_swtch; - - uint16_t start_sys_seg; - uint16_t kernel_version; - - uint8_t type_of_loader; - uint8_t loadflags; - - uint16_t setup_move_size; - - uint32_t code32_start; - uint32_t ramdisk_image; - uint32_t ramdisk_size; - uint32_t bootsect_kludge; - - uint16_t heap_end_ptr; - - uint8_t ext_loader_ver; - uint8_t ext_loader_type; - - uint32_t cmd_line_ptr; - uint32_t initrd_addr_max; - uint32_t kernel_alignment; - - uint8_t relocatable_kernel; - uint8_t min_alignment; - - uint16_t xloadflags; - - uint32_t cmdline_size; - uint32_t hardware_subarch; - - uint64_t hardware_subarch_data; - - uint32_t payload_offset; - uint32_t payload_length; - - uint64_t setup_data; - uint64_t pref_address; - - uint32_t init_size; - uint32_t handover_offset; - uint32_t kernel_info_offset; -} __attribute__((packed)) boot_params_t; +/* + * Waits until the primary ATA device is no longer busy. + * Args: none. + * Returns: nothing. + */ +static void ata_wait_ready(void) { + uint8_t status; + do { + status = inb(0x1F7); + } while (status & 0x80u); +} +/* + * Waits until the primary ATA device is ready to transfer data. + * Args: none. + * Returns: nothing, or halts on an ATA error. + */ +static void ata_wait_drq(void) { + uint8_t status; + do { + status = inb(0x1F7); + if (status & 0x01u) die("ATA read error"); + } while ((status & 0x08u) == 0); +} -void stage2_main(void) { - // ==== reading ext2 superblock - uint8_t buf[1024]; - disk_read(2, 2, buf); - memcpy(&super, buf, sizeof(super)); +/* + * Reads sectors from the primary ATA disk using PIO. + * Args: lba is the first sector, count is the sector count, buf is the destination buffer. + * Returns: nothing. + */ +static void disk_read(uint32_t lba, uint32_t count, void *buf) { + uint8_t *dst = (uint8_t*)buf; + + while (count) { + uint8_t chunk = count > 255u ? 255u : (uint8_t)count; + uint32_t i; + + /* ATA PIO LBA28 read: select drive/head, sector count, LBA bytes, then command. */ + ata_wait_ready(); + outb(0x1F6, (uint8_t)(0xE0u | ((lba >> 24) & 0x0Fu))); + outb(0x1F2, chunk); + outb(0x1F3, (uint8_t)lba); + outb(0x1F4, (uint8_t)(lba >> 8)); + outb(0x1F5, (uint8_t)(lba >> 16)); + outb(0x1F7, 0x20); + + for (i = 0; i < chunk; i++) { + ata_wait_drq(); + insw(0x1F0, dst, SECTOR_SIZE / 2u); + dst += SECTOR_SIZE; + } + + lba += chunk; + count -= chunk; + } +} + +/* + * Reads a little-endian 16-bit integer from memory. + * Args: p points to the first byte of the integer. + * Returns: the decoded 16-bit value. + */ +static uint16_t rd16(const void *p) { + const uint8_t *b = (const uint8_t*)p; + return (uint16_t)b[0] | ((uint16_t)b[1] << 8); +} + +/* + * Reads a little-endian 32-bit integer from memory. + * Args: p points to the first byte of the integer. + * Returns: the decoded 32-bit value. + */ +static uint32_t rd32(const void *p) { + const uint8_t *b = (const uint8_t*)p; + return (uint32_t)b[0] | ((uint32_t)b[1] << 8) | + ((uint32_t)b[2] << 16) | ((uint32_t)b[3] << 24); +} - // infinite loop if magic is invalid - if (super.s_magic != 0xEF53) while (1); +/* + * Writes a little-endian 16-bit integer to memory. + * Args: p points to the destination bytes, v is the value to write. + * Returns: nothing. + */ +static void wr16(void *p, uint16_t v) { + uint8_t *b = (uint8_t*)p; + b[0] = (uint8_t)v; + b[1] = (uint8_t)(v >> 8); +} - block_size = 1024 << super.s_log_block_size; +/* + * Writes a little-endian 32-bit integer to memory. + * Args: p points to the destination bytes, v is the value to write. + * Returns: nothing. + */ +static void wr32(void *p, uint32_t v) { + uint8_t *b = (uint8_t*)p; + b[0] = (uint8_t)v; + b[1] = (uint8_t)(v >> 8); + b[2] = (uint8_t)(v >> 16); + b[3] = (uint8_t)(v >> 24); +} - // TODO: look for vmlinuz - // ... traverse ext2 dirs ... +/* + * Detects the first usable Linux partition in the MBR. + * Args: none. + * Returns: nothing. + */ +static void detect_partition(void) { + uint32_t i; + disk_read(0, 1, sector_buf); + partition_lba = 0; + + /* Keep stage 2 in the post-MBR gap; only use the table to find filesystem start. */ + for (i = 0; i < 4; i++) { + uint8_t *p = sector_buf + MBR_PART_TABLE + i * MBR_PART_SIZE; + uint8_t type = p[4]; + if (type == MBR_PART_TYPE_LINUX || type == 0x00u) { + uint32_t start = rd32(p + 8); + if (start != 0) { + partition_lba = start; + return; + } + } + } +} - // TODO: Load kernel from KERNEL_LOAD_ADDR - // ... read inode blocks ... +/* + * Reads one ext2 filesystem block from disk. + * Args: block_num is the ext2 block number, buf is the destination buffer. + * Returns: nothing. + */ +static void read_block(uint32_t block_num, void *buf) { + uint32_t lba = partition_lba + (block_num * block_size) / SECTOR_SIZE; + disk_read(lba, block_size / SECTOR_SIZE, buf); +} - // ==== filling boot_params and jumping to kernel - boot_params_t *bp = (boot_params_t*)0x90000; - bp->cmd_line_ptr = 0x91000; - // write "root=/dev/sda1 ro quiet" to 0x91000 +/* + * Reads an ext2 inode by number. + * Args: inode_num is the ext2 inode number, inode is the destination structure. + * Returns: nothing. + */ +static void read_inode(uint32_t inode_num, ext2_inode_t *inode) { + uint32_t group = (inode_num - 1u) / super.s_inodes_per_group; + uint32_t index = (inode_num - 1u) % super.s_inodes_per_group; + uint32_t desc_block = super.s_first_data_block + 1u; + uint32_t desc_offset = group * sizeof(ext2_bgd_t); + ext2_bgd_t *bgd; + uint32_t inode_offset; + + read_block(desc_block + desc_offset / block_size, block_buf); + bgd = (ext2_bgd_t*)(block_buf + (desc_offset % block_size)); + + /* Inode tables are per block group; inode numbers are globally 1-based. */ + inode_offset = index * inode_size; + read_block(bgd->bg_inode_table + inode_offset / block_size, block_buf); + memcpy(inode, block_buf + (inode_offset % block_size), sizeof(ext2_inode_t)); +} - // ==== jump to kernel entry point (usually KERNEL_LOAD_ADDR + 0x200) - void (*kernel_entry)(void) = (void*)(KERNEL_LOAD_ADDR + 0x200); - kernel_entry(); +/* + * Resolves a file-relative block index to a physical ext2 block. + * Args: inode describes the file, file_block is the zero-based file block index. + * Returns: the physical block number, or zero for a sparse block. + */ +static uint32_t inode_block_at(const ext2_inode_t *inode, uint32_t file_block) { + uint32_t ptrs = block_size / 4u; + uint32_t *table; + + if (file_block < EXT2_NDIR_BLOCKS) return inode->i_block[file_block]; + file_block -= EXT2_NDIR_BLOCKS; + + /* Debian kernels and initrds commonly need single or double indirect blocks. */ + if (file_block < ptrs) { + if (!inode->i_block[EXT2_IND_BLOCK]) return 0; + read_block(inode->i_block[EXT2_IND_BLOCK], block_buf2); + table = (uint32_t*)block_buf2; + return table[file_block]; + } + file_block -= ptrs; + + if (file_block < ptrs * ptrs) { + uint32_t first = file_block / ptrs; + uint32_t second = file_block % ptrs; + if (!inode->i_block[EXT2_DIND_BLOCK]) return 0; + read_block(inode->i_block[EXT2_DIND_BLOCK], block_buf2); + table = (uint32_t*)block_buf2; + if (!table[first]) return 0; + read_block(table[first], block_buf2); + table = (uint32_t*)block_buf2; + return table[second]; + } + + die("file uses triple-indirect blocks"); + return 0; +} + +/* + * Reads a byte range from an ext2 file into memory. + * Args: inode describes the file, offset and size select the range, dst is the destination. + * Returns: nothing. + */ +static void read_file_range(const ext2_inode_t *inode, uint32_t offset, + uint32_t size, void *dst) { + uint8_t *out = (uint8_t*)dst; + uint32_t left = size; + + while (left) { + uint32_t file_block = offset / block_size; + uint32_t block_off = offset % block_size; + uint32_t chunk = block_size - block_off; + uint32_t phys = inode_block_at(inode, file_block); + + if (chunk > left) chunk = left; + if (phys == 0) { + memset(out, 0, chunk); + } else if (block_off == 0 && chunk == block_size) { + read_block(phys, out); + } else { + read_block(phys, block_buf); + memcpy(out, block_buf + block_off, chunk); + } + + offset += chunk; + out += chunk; + left -= chunk; + } +} + +/* + * Finds an exact file name inside an ext2 directory. + * Args: dir is the directory inode, name is the null-terminated name to find. + * Returns: the matching inode number, or zero if not found. + */ +static uint32_t find_in_dir(const ext2_inode_t *dir, const char *name) { + uint32_t off = 0; + uint32_t name_len = strlen(name); + + while (off < dir->i_size) { + uint32_t chunk = dir->i_size - off; + uint32_t pos = 0; + if (chunk > block_size) chunk = block_size; + read_file_range(dir, off, chunk, block_buf); + + /* ext2 directory entries are variable-length records packed into file blocks. */ + while (pos + 8u <= chunk) { + ext2_dirent_t *de = (ext2_dirent_t*)(block_buf + pos); + if (de->rec_len == 0) die("bad ext2 dirent"); + if (de->inode && de->name_len == name_len && + streqn(de->name, name, name_len)) { + return de->inode; + } + pos += de->rec_len; + } + off += chunk; + } + + return 0; +} + +/* + * Finds the first file name with a prefix inside an ext2 directory. + * Args: dir is the directory inode, prefix is the null-terminated prefix to find. + * Returns: the matching inode number, or zero if not found. + */ +static uint32_t find_prefixed_in_dir(const ext2_inode_t *dir, const char *prefix) { + uint32_t off = 0; + + while (off < dir->i_size) { + uint32_t chunk = dir->i_size - off; + uint32_t pos = 0; + if (chunk > block_size) chunk = block_size; + read_file_range(dir, off, chunk, block_buf); + + while (pos + 8u <= chunk) { + ext2_dirent_t *de = (ext2_dirent_t*)(block_buf + pos); + if (de->rec_len == 0) die("bad ext2 dirent"); + if (de->inode && startswith(de->name, de->name_len, prefix)) { + return de->inode; + } + pos += de->rec_len; + } + off += chunk; + } + + return 0; +} + +/* + * Resolves an absolute ext2 path to an inode number. + * Args: path is the null-terminated absolute path. + * Returns: the inode number, or zero if not found. + */ +static uint32_t path_lookup(const char *path) { + ext2_inode_t dir; + uint32_t ino = EXT2_ROOT_INO; + const char *p = path; + + if (*p == '/') p++; + read_inode(ino, &dir); + + while (*p) { + char component[64]; + uint32_t n = 0; + /* Walk one path component at a time because ext2 has no pathname index here. */ + while (p[n] && p[n] != '/') { + if (n + 1u >= sizeof(component)) die("path component too long"); + component[n] = p[n]; + n++; + } + component[n] = 0; + + ino = find_in_dir(&dir, component); + if (!ino) return 0; + read_inode(ino, &dir); + + p += n; + if (*p == '/') p++; + } + + return ino; +} + +/* + * Finds a boot file by exact path or by prefix in /boot. + * Args: exact is the preferred path, prefix is the fallback name prefix. + * Returns: the matching inode number, or zero if not found. + */ +static uint32_t find_boot_file(const char *exact, const char *prefix) { + uint32_t ino = path_lookup(exact); + uint32_t boot_ino; + ext2_inode_t boot_dir; + ext2_inode_t exact_inode; + + if (ino) { + read_inode(ino, &exact_inode); + if ((exact_inode.i_mode & EXT2_S_IFMT) == EXT2_S_IFREG) return ino; + } + boot_ino = path_lookup("/boot"); + if (!boot_ino) return 0; + read_inode(boot_ino, &boot_dir); + if ((boot_dir.i_mode & EXT2_S_IFMT) != EXT2_S_IFDIR) return 0; + return find_prefixed_in_dir(&boot_dir, prefix); +} + +/* + * Copies the built-in Linux command line to the boot parameter area. + * Args: none. + * Returns: nothing. + */ +static void copy_cmdline(void) { + static const char cmdline[] = + "root=/dev/sda1 ro mem=512M console=tty0 console=ttyS0,115200n8"; + memcpy((void*)CMDLINE_ADDR, cmdline, sizeof(cmdline)); +} + +/* + * Copies the BIOS E820 memory map collected by Stage 1 into the zeropage. + * Args: bp is the boot params base address. + * Returns: nothing. + */ +static void copy_e820(uint8_t *bp) { + uint32_t count = *(volatile uint32_t*)E820_SRC_COUNT; + + if (count == 0 || count > E820_MAX_ENTRIES) { + /* Fallback: a single conservative RAM region for QEMU (~512 MiB). */ + uint8_t *e = bp + ZP_E820_TABLE; + memset(e, 0, 20); + wr32(e + 0, 0x00000000u); /* base low */ + wr32(e + 8, 0x20000000u); /* size low = 512 MiB */ + wr32(e + 16, 1u); /* type = usable RAM */ + bp[ZP_E820_ENTRIES] = 1u; + return; + } + + /* Each entry is 20 bytes: u64 addr, u64 size, u32 type. */ + memcpy(bp + ZP_E820_TABLE, (const void*)E820_SRC_ENTRIES, count * 20u); + bp[ZP_E820_ENTRIES] = (uint8_t)count; +} + +/* + * Loads Linux from ext2 and transfers execution to the kernel. + * Args: none. + * Returns: never returns on success. + */ +void stage2_main(void) { + ext2_inode_t kernel; + ext2_inode_t initrd; + uint32_t kernel_ino; + uint32_t initrd_ino; + uint8_t *bp = (uint8_t*)BOOT_PARAMS_ADDR; + uint8_t setup_sects; + uint32_t kernel_offset; + uint32_t entry; + + outb(0x3F8 + 1, 0x00); + outb(0x3F8 + 3, 0x80); + outb(0x3F8 + 0, 0x01); + outb(0x3F8 + 1, 0x00); + outb(0x3F8 + 3, 0x03); + outb(0x3F8 + 2, 0xC7); + outb(0x3F8 + 4, 0x0B); + + puts("PSLBoot\n"); + + detect_partition(); + disk_read(partition_lba + EXT2_SUPER_LBA, 2, block_buf); + memcpy(&super, block_buf, sizeof(super)); + + if (super.s_magic != EXT2_MAGIC) die("ext2 superblock not found"); + if (super.s_feature_incompat & EXT2_FEATURE_INCOMPAT_EXTENTS) { + die("extents are not supported; create a real ext2 filesystem"); + } + + block_size = 1024u << super.s_log_block_size; + if (block_size > MAX_BLOCK_SIZE) die("ext2 block size > 4096"); + + inode_size = super.s_inode_size ? super.s_inode_size : 128u; + if (inode_size < sizeof(ext2_inode_t)) die("unsupported inode size"); + + kernel_ino = find_boot_file("/boot/vmlinuz", "vmlinuz"); + if (!kernel_ino) die("kernel not found"); + read_inode(kernel_ino, &kernel); + if ((kernel.i_mode & EXT2_S_IFMT) != EXT2_S_IFREG) die("kernel is not regular"); + + memset(bp, 0, 4096); + read_file_range(&kernel, 0, 4096, bp); + + /* The first 4 KiB of a bzImage contains setup code plus the boot protocol header. */ + if (rd16(bp + 0x1FE) != 0xAA55u || rd32(bp + ZP_HEADER) != 0x53726448u) { + die("bad Linux kernel header"); + } + if (rd16(bp + ZP_VERSION) < 0x0200u) die("Linux boot protocol too old"); + + setup_sects = bp[ZP_SETUP_SECTS] ? bp[ZP_SETUP_SECTS] : 4u; + kernel_offset = ((uint32_t)setup_sects + 1u) * SECTOR_SIZE; + if (kernel_offset >= kernel.i_size) die("bad kernel size"); + + /* Load only the protected-mode payload; the setup sectors stay in boot_params. */ + puts("Loading kernel\n"); + read_file_range(&kernel, kernel_offset, kernel.i_size - kernel_offset, + (void*)KERNEL_LOAD_ADDR); + + initrd_ino = find_boot_file("/boot/initrd.img", "initrd.img"); + if (initrd_ino) { + read_inode(initrd_ino, &initrd); + if ((initrd.i_mode & EXT2_S_IFMT) == EXT2_S_IFREG) { + puts("Loading initrd\n"); + read_file_range(&initrd, 0, initrd.i_size, (void*)INITRD_LOAD_ADDR); + wr32(bp + ZP_RAMDISK_IMAGE, INITRD_LOAD_ADDR); + wr32(bp + ZP_RAMDISK_SIZE, initrd.i_size); + } + } + + copy_cmdline(); + copy_e820(bp); + bp[ZP_TYPE_OF_LOADER] = 0xFFu; + bp[ZP_LOADFLAGS] |= LOADED_HIGH | CAN_USE_HEAP; + /* These zeropage fields are the minimum Linux needs for cmdline and initrd. */ + wr16(bp + ZP_HEAP_END_PTR, 0xFE00u); + wr32(bp + ZP_CMD_LINE_PTR, CMDLINE_ADDR); + wr32(bp + ZP_INITRD_ADDR_MAX, 0x7FFFFFFFu); + + entry = rd32(bp + ZP_CODE32_START); + if (!entry) entry = KERNEL_LOAD_ADDR; + + puts("Booting Linux\n"); + jump_linux(entry, BOOT_PARAMS_ADDR); }