You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

14 KiB

Tutorial 07 - UART Chainloader

tl;dr

Running from an SD card was a nice experience, but it would be extremely tedious to do it for every new binary. Let's write a chainloader using position independent code. This will be the last binary you need to put on the SD card for quite some time. Each following tutorial will provide a chainboot target in the Makefile that lets you conveniently load the kernel over UART.

Our chainloader is called MiniLoad and is inspired by raspbootin.

You can try it with this tutorial already:

  1. Copy kernel8.img to the SD card.
  2. Execute make chainboot.
  3. Now plug in the USB Serial.
  4. Let the magic happen.

In this tutorial, a version of the kernel from the previous tutorial is loaded for demo purposes. In subsequent tuts, it will be the working directory's kernel.

Observing the jump

The Makefile in this tutorial has an additional target, qemuasm, that lets you nicely observe the jump from the loaded address (0x80_XXX) to the relocated code at (0x3EFF_0XXX):

make qemuasm
[...]
IN:
0x000809fc:  d0000008  adrp     x8, #0x82000
0x00080a00:  52800020  movz     w0, #0x1
0x00080a04:  f9408908  ldr      x8, [x8, #0x110]
0x00080a08:  d63f0100  blr      x8

----------------
IN:
0x3eff0528:  d0000008  adrp     x8, #0x3eff2000
0x3eff052c:  d0000009  adrp     x9, #0x3eff2000
0x3eff0530:  f9411508  ldr      x8, [x8, #0x228]
0x3eff0534:  f9411929  ldr      x9, [x9, #0x230]
0x3eff0538:  eb08013f  cmp      x9, x8
0x3eff053c:  540000c2  b.hs     #0x3eff0554
[...]

Diff to previous

Binary files 06_drivers_gpio_uart/demo_payload.img and 07_uart_chainloader/demo_payload.img differ

diff -uNr 06_drivers_gpio_uart/Makefile 07_uart_chainloader/Makefile
--- 06_drivers_gpio_uart/Makefile
+++ 07_uart_chainloader/Makefile
@@ -15,7 +15,7 @@
 	QEMU_MACHINE_TYPE = raspi3
 	QEMU_MISC_ARGS = -serial null -serial stdio
 	LINKER_FILE = src/bsp/rpi3/link.ld
-	RUSTC_MISC_ARGS = -C target-cpu=cortex-a53
+	RUSTC_MISC_ARGS = -C target-cpu=cortex-a53 -C relocation-model=pic
 endif

 SOURCES = $(wildcard **/*.rs) $(wildcard **/*.S) $(wildcard **/*.ld)
@@ -39,9 +39,14 @@

 DOCKER_CMD        = docker run -it --rm
 DOCKER_ARG_CURDIR = -v $(shell pwd):/work -w /work
-DOCKER_EXEC_QEMU  = $(QEMU_BINARY) -M $(QEMU_MACHINE_TYPE) -kernel $(OUTPUT)
+DOCKER_ARG_TTY    = --privileged -v /dev:/dev

-.PHONY: all doc qemu clippy clean readelf objdump nm
+DOCKER_EXEC_QEMU         = $(QEMU_BINARY) -M $(QEMU_MACHINE_TYPE) -kernel $(OUTPUT)
+DOCKER_EXEC_RASPBOOT     = raspbootcom
+DOCKER_EXEC_RASPBOOT_DEV = /dev/ttyUSB0
+# DOCKER_EXEC_RASPBOOT_DEV = /dev/ttyACM0
+
+.PHONY: all doc qemu qemuasm chainboot clippy clean readelf objdump nm

 all: clean $(OUTPUT)

@@ -60,6 +65,15 @@
 	$(DOCKER_CMD) $(DOCKER_ARG_CURDIR) $(CONTAINER_UTILS) \
 	$(DOCKER_EXEC_QEMU) $(QEMU_MISC_ARGS)

+qemuasm: all
+	$(DOCKER_CMD) $(DOCKER_ARG_CURDIR) $(CONTAINER_UTILS) \
+	$(DOCKER_EXEC_QEMU) -d in_asm
+
+chainboot:
+	$(DOCKER_CMD) $(DOCKER_ARG_CURDIR) $(DOCKER_ARG_TTY) \
+	$(CONTAINER_UTILS) $(DOCKER_EXEC_RASPBOOT) $(DOCKER_EXEC_RASPBOOT_DEV) \
+	demo_payload.img
+
 clippy:
 	cargo xclippy --target=$(TARGET) --features $(BSP)


diff -uNr 06_drivers_gpio_uart/src/arch/aarch64.rs 07_uart_chainloader/src/arch/aarch64.rs
--- 06_drivers_gpio_uart/src/arch/aarch64.rs
+++ 07_uart_chainloader/src/arch/aarch64.rs
@@ -22,7 +22,7 @@

     if bsp::BOOT_CORE_ID == MPIDR_EL1.get() & CORE_MASK {
         SP.set(bsp::BOOT_CORE_STACK_START);
-        crate::runtime_init::init()
+        crate::relocate::relocate_self::<u64>()
     } else {
         // if not core0, infinitely wait for events
         wait_forever()

diff -uNr 06_drivers_gpio_uart/src/bsp/driver/bcm/bcm2xxx_mini_uart.rs 07_uart_chainloader/src/bsp/driver/bcm/bcm2xxx_mini_uart.rs
--- 06_drivers_gpio_uart/src/bsp/driver/bcm/bcm2xxx_mini_uart.rs
+++ 07_uart_chainloader/src/bsp/driver/bcm/bcm2xxx_mini_uart.rs
@@ -50,10 +50,12 @@
         /// shifting out the last bit).
         TX_IDLE    OFFSET(6) NUMBITS(1) [],

-        /// This bit is set if the transmit FIFO can accept at least one byte.
+        /// This bit is set if the transmit FIFO can accept at least
+        /// one byte.
         TX_EMPTY   OFFSET(5) NUMBITS(1) [],

-        /// This bit is set if the receive FIFO holds at least 1 symbol.
+        /// This bit is set if the receive FIFO holds at least 1
+        /// symbol.
         DATA_READY OFFSET(0) NUMBITS(1) []
     ],

@@ -247,6 +249,15 @@
         let mut r = &self.inner;
         r.lock(|inner| fmt::Write::write_fmt(inner, args))
     }
+
+    fn flush(&self) {
+        let mut r = &self.inner;
+        r.lock(|inner| loop {
+            if inner.AUX_MU_LSR.is_set(AUX_MU_LSR::TX_IDLE) {
+                break;
+            }
+        });
+    }
 }

 impl interface::console::Read for MiniUart {
@@ -263,14 +274,14 @@
             }

             // Read one character.
-            let mut ret = inner.AUX_MU_IO.get() as u8 as char;
-
-            // Convert carrige return to newline.
-            if ret == '
' {
-                ret = '
'
-            }
+            inner.AUX_MU_IO.get() as u8 as char
+        })
+    }

-            ret
+    fn clear(&self) {
+        let mut r = &self.inner;
+        r.lock(|inner| {
+            inner.AUX_MU_IIR.write(AUX_MU_IIR::FIFO_CLEAR::All);
         })
     }
 }

diff -uNr 06_drivers_gpio_uart/src/bsp/rpi3/link.ld 07_uart_chainloader/src/bsp/rpi3/link.ld
--- 06_drivers_gpio_uart/src/bsp/rpi3/link.ld
+++ 07_uart_chainloader/src/bsp/rpi3/link.ld
@@ -5,9 +5,10 @@

 SECTIONS
 {
-    /* Set current address to the value from which the RPi3 starts execution */
-    . = 0x80000;
+    /* Set the link address to the top-most 40 KiB of DRAM */
+    . = 0x3F000000 - 0x10000;

+    __binary_start = .;
     .text :
     {
         *(.text._start) *(.text*)
@@ -31,5 +32,14 @@
         __bss_end = .;
     }

+    .got :
+    {
+        *(.got*)
+    }
+
+    /* Fill up to 8 byte, b/c relocating the binary is done in u64 chunks */
+    . = ALIGN(8);
+    __binary_end = .;
+
     /DISCARD/ : { *(.comment*) }
 }

diff -uNr 06_drivers_gpio_uart/src/bsp/rpi3.rs 07_uart_chainloader/src/bsp/rpi3.rs
--- 06_drivers_gpio_uart/src/bsp/rpi3.rs
+++ 07_uart_chainloader/src/bsp/rpi3.rs
@@ -12,6 +12,9 @@
 pub const BOOT_CORE_ID: u64 = 0;
 pub const BOOT_CORE_STACK_START: u64 = 0x80_000;

+/// The address on which the RPi3 firmware loads every binary by default.
+pub const BOARD_DEFAULT_LOAD_ADDRESS: usize = 0x80_000;
+
 //--------------------------------------------------------------------------------------------------
 // Global BSP driver instances
 //--------------------------------------------------------------------------------------------------
@@ -34,8 +37,7 @@
     &MINI_UART
 }

-/// Return an array of references to all `DeviceDriver` compatible `BSP`
-/// drivers.
+/// Return an array of references to all `DeviceDriver` compatible `BSP` drivers.
 ///
 /// # Safety
 ///

diff -uNr 06_drivers_gpio_uart/src/interface.rs 07_uart_chainloader/src/interface.rs
--- 06_drivers_gpio_uart/src/interface.rs
+++ 07_uart_chainloader/src/interface.rs
@@ -26,6 +26,10 @@
     pub trait Write {
         fn write_char(&self, c: char);
         fn write_fmt(&self, args: fmt::Arguments) -> fmt::Result;
+
+        /// Block execution until the last character has been physically put on the TX wire
+        /// (draining TX buffers/FIFOs, if any).
+        fn flush(&self);
     }

     /// Console read functions.
@@ -33,6 +37,9 @@
         fn read_char(&self) -> char {
             ' '
         }
+
+        /// Clear RX buffers, if any.
+        fn clear(&self);
     }

     /// Console statistics.

diff -uNr 06_drivers_gpio_uart/src/main.rs 07_uart_chainloader/src/main.rs
--- 06_drivers_gpio_uart/src/main.rs
+++ 07_uart_chainloader/src/main.rs
@@ -29,7 +29,11 @@
 // the first function to run.
 mod arch;

-// `_start()` then calls `runtime_init::init()`, which on completion, jumps to `kernel_entry()`.
+// `_start()` then calls `relocate::relocate_self()`.
+mod relocate;
+
+// `relocate::relocate_self()` calls `runtime_init::init()`, which on completion, jumps to
+// `kernel_entry()`.
 mod runtime_init;

 // Conditionally includes the selected `BSP` code.
@@ -46,18 +50,48 @@
     // Run the BSP's initialization code.
     bsp::init();

-    println!("[0] Booting on: {}", bsp::board_name());
+    println!(" __  __ _      _ _                 _ ");
+    println!("|  \/  (_)_ _ (_) |   ___  __ _ __| |");
+    println!("| |\/| | | ' \| | |__/ _ \/ _` / _` |");
+    println!("|_|  |_|_|_||_|_|____\___/\__,_\__,_|");
+    println!();
+    println!("{:^37}", bsp::board_name());
+    println!();
+    println!("[ML] Requesting binary");
+    bsp::console().flush();
+
+    // Clear the RX FIFOs, if any, of spurious received characters before starting with the loader
+    // protocol.
+    bsp::console().clear();
+
+    // Notify raspbootcom to send the binary.
+    for _ in 0..3 {
+        bsp::console().write_char(3 as char);
+    }

-    println!("[1] Drivers loaded:");
-    for (i, driver) in bsp::device_drivers().iter().enumerate() {
-        println!("      {}. {}", i + 1, driver.compatible());
+    // Read the binary's size.
+    let mut size: u32 = u32::from(bsp::console().read_char() as u8);
+    size |= u32::from(bsp::console().read_char() as u8) << 8;
+    size |= u32::from(bsp::console().read_char() as u8) << 16;
+    size |= u32::from(bsp::console().read_char() as u8) << 24;
+
+    // Trust it's not too big.
+    print!("OK");
+
+    let kernel_addr: *mut u8 = bsp::BOARD_DEFAULT_LOAD_ADDRESS as *mut u8;
+    unsafe {
+        // Read the kernel byte by byte.
+        for i in 0..size {
+            *kernel_addr.offset(i as isize) = bsp::console().read_char() as u8;
+        }
     }

-    println!("[2] Chars written: {}", bsp::console().chars_written());
-    println!("[3] Echoing input now");
+    println!("[ML] Loaded! Executing the payload now
");
+    bsp::console().flush();

-    loop {
-        let c = bsp::console().read_char();
-        bsp::console().write_char(c);
-    }
+    // Use black magic to get a function pointer.
+    let kernel: extern "C" fn() -> ! = unsafe { core::mem::transmute(kernel_addr as *const ()) };
+
+    // Jump to loaded kernel!
+    kernel()
 }

diff -uNr 06_drivers_gpio_uart/src/relocate.rs 07_uart_chainloader/src/relocate.rs
--- 06_drivers_gpio_uart/src/relocate.rs
+++ 07_uart_chainloader/src/relocate.rs
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: MIT
+//
+// Copyright (c) 2018-2019 Andre Richter <andre.o.richter@gmail.com>
+
+//! Relocation code.
+
+/// Relocates the own binary from `bsp::BOARD_DEFAULT_LOAD_ADDRESS` to the `__binary_start` address
+/// from the linker script.
+///
+/// # Safety
+///
+/// - Only a single core must be active and running this function.
+/// - Function must not use the `bss` section.
+pub unsafe fn relocate_self<T>() -> ! {
+    extern "C" {
+        static __binary_start: usize;
+        static __binary_end: usize;
+    }
+
+    let binary_start_addr: usize = &__binary_start as *const _ as _;
+    let binary_end_addr: usize = &__binary_end as *const _ as _;
+    let binary_size_in_byte: usize = binary_end_addr - binary_start_addr;
+
+    // Get the relocation destination address from the linker symbol.
+    let mut reloc_dst_addr: *mut T = binary_start_addr as *mut T;
+
+    // The address of where the previous firmware loaded us.
+    let mut src_addr: *const T = crate::bsp::BOARD_DEFAULT_LOAD_ADDRESS as *const _;
+
+    // Copy the whole binary.
+    //
+    // This is essentially a `memcpy()` optimized for throughput by transferring in chunks of T.
+    let n = binary_size_in_byte / core::mem::size_of::<T>();
+    for _ in 0..n {
+        use core::ptr;
+
+        ptr::write_volatile::<T>(reloc_dst_addr, ptr::read_volatile::<T>(src_addr));
+        reloc_dst_addr = reloc_dst_addr.offset(1);
+        src_addr = src_addr.offset(1);
+    }
+
+    // Call `init()` through a trait object, causing the jump to use an absolute address to reach
+    // the relocated binary. An elaborate explanation can be found in the runtime_init.rs source
+    // comments.
+    crate::runtime_init::get().init()
+}

diff -uNr 06_drivers_gpio_uart/src/runtime_init.rs 07_uart_chainloader/src/runtime_init.rs
--- 06_drivers_gpio_uart/src/runtime_init.rs
+++ 07_uart_chainloader/src/runtime_init.rs
@@ -4,23 +4,41 @@

 //! Rust runtime initialization code.

-/// Equivalent to `crt0` or `c0` code in C/C++ world. Clears the `bss` section, then calls the
-/// kernel entry.
+/// We are outsmarting the compiler here by using a trait as a layer of indirection. Because we are
+/// generating PIC code, a static dispatch to `init()` would generate a relative jump from the
+/// callee to `init()`. However, when calling `init()`, code just finished copying the binary to the
+/// actual link-time address, and hence is still running at whatever location the previous loader
+/// has put it. So we do not want a relative jump, because it would not jump to the relocated code.
 ///
-/// Called from `BSP` code.
-///
-/// # Safety
-///
-/// - Only a single core must be active and running this function.
-pub unsafe fn init() -> ! {
-    extern "C" {
-        // Boundaries of the .bss section, provided by the linker script.
-        static mut __bss_start: u64;
-        static mut __bss_end: u64;
+/// By indirecting through a trait object, we can make use of the property that vtables store
+/// absolute addresses. So calling `init()` this way will kick execution to the relocated binary.
+pub trait RunTimeInit {
+    /// Equivalent to `crt0` or `c0` code in C/C++ world. Clears the `bss` section, then calls the
+    /// kernel entry.
+    ///
+    /// Called from `BSP` code.
+    ///
+    /// # Safety
+    ///
+    /// - Only a single core must be active and running this function.
+    unsafe fn init(&self) -> ! {
+        extern "C" {
+            // Boundaries of the .bss section, provided by the linker script.
+            static mut __bss_start: u64;
+            static mut __bss_end: u64;
+        }
+
+        // Zero out the .bss section.
+        r0::zero_bss(&mut __bss_start, &mut __bss_end);
+
+        crate::kernel_entry()
     }
+}

-    // Zero out the .bss section.
-    r0::zero_bss(&mut __bss_start, &mut __bss_end);
+struct Traitor;
+impl RunTimeInit for Traitor {}

-    crate::kernel_entry()
+/// Give the callee a `RunTimeInit` trait object.
+pub fn get() -> &'static dyn RunTimeInit {
+    &Traitor {}
 }