Add tutorial 0D_cache_performance

pull/9/head
Andre Richter 6 years ago
parent bf2a1fff7e
commit c65e2e56cd
No known key found for this signature in database
GPG Key ID: 2116C1AB102F615E

Binary file not shown.

Binary file not shown.

@ -219,8 +219,8 @@ pub unsafe fn init() {
// First, force all previous changes to be seen before the MMU is enabled.
barrier::isb(barrier::SY);
// Enable the MMU
SCTLR_EL1.modify(SCTLR_EL1::M::Enable);
// Enable the MMU and turn on caching
SCTLR_EL1.modify(SCTLR_EL1::M::Enable + SCTLR_EL1::C::Cacheable);
// Force MMU init to complete before next instruction
barrier::isb(barrier::SY);

@ -0,0 +1,6 @@
[target.aarch64-unknown-none]
rustflags = [
"-C", "link-arg=-Tlink.ld",
"-C", "target-feature=-fp-armv8",
"-C", "target-cpu=cortex-a53",
]

@ -0,0 +1,55 @@
[[package]]
name = "cortex-a"
version = "2.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"register 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "kernel8"
version = "0.1.0"
dependencies = [
"cortex-a 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"raspi3_boot 0.1.0",
"register 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "panic-abort"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "r0"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "raspi3_boot"
version = "0.1.0"
dependencies = [
"cortex-a 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"panic-abort 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
"r0 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "register"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"tock-registers 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "tock-registers"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[metadata]
"checksum cortex-a 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "fe913628f34718bc9e7d0d07e13ca1374638f64f0edc6eb063ec8abe581d395d"
"checksum panic-abort 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "6bc796c620f27056d4ffe7c558533fd67ae5af0fd8e919fbe38de803368af73e"
"checksum r0 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "e2a38df5b15c8d5c7e8654189744d8e396bddc18ad48041a500ce52d6948941f"
"checksum register 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "157a11ac0b1882ff4a527a92f911dd288df17367faaaa0c36f188cd61ec36fc1"
"checksum tock-registers 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3a385d94f3f62e60445a0adb9ff8d9621faa272234530d4c0f848ec98f88e316"

@ -0,0 +1,12 @@
[package]
name = "kernel8"
version = "0.1.0"
authors = ["Andre Richter <andre.o.richter@gmail.com>"]
[dependencies]
raspi3_boot = { path = "raspi3_boot" }
cortex-a = "2.2.1"
register = "0.2.0"
[package.metadata.cargo-xbuild]
sysroot_path = "../xbuild_sysroot"

@ -0,0 +1,66 @@
#
# MIT License
#
# Copyright (c) 2018 Andre Richter <andre.o.richter@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
#
TARGET = aarch64-unknown-none
OBJCOPY = cargo objcopy --
OBJCOPY_PARAMS = --strip-all -O binary
UTILS_CONTAINER = andrerichter/raspi3-utils
DOCKER_CMD = docker run -it --rm -v $(shell pwd):/work -w /work
DOCKER_TTY = --privileged -v /dev:/dev
QEMU_CMD = qemu-system-aarch64 -M raspi3 -kernel kernel8.img
RASPBOOT_CMD = raspbootcom /dev/ttyUSB0 kernel8.img
all: clean kernel8.img
target/$(TARGET)/debug/kernel8: src/main.rs
cargo xbuild --target=$(TARGET)
cp $@ .
target/$(TARGET)/release/kernel8: src/main.rs
cargo xbuild --target=$(TARGET) --release
cp $@ .
ifeq ($(DEBUG),1)
kernel8: target/$(TARGET)/debug/kernel8
else
kernel8: target/$(TARGET)/release/kernel8
endif
kernel8.img: kernel8
$(OBJCOPY) $(OBJCOPY_PARAMS) $< kernel8.img
qemu: all
$(DOCKER_CMD) $(UTILS_CONTAINER) $(QEMU_CMD) -serial stdio
raspboot: all
$(DOCKER_CMD) $(DOCKER_TTY) $(UTILS_CONTAINER) $(RASPBOOT_CMD)
clippy:
cargo xclippy --target=$(TARGET)
clean:
cargo clean
rm -f kernel8

@ -0,0 +1,57 @@
# Tutorial 0D - Cache Performance
Now that we finally have virtual memory capabilities available, we also have
fine grained control over `cacheability`. You've caught a glimpse already in the
last tutorial, where we used page table entries to reference the `MAIR_EL1`
register to indicate the cacheability of a page or block.
Unfortunately, for the user it is often hard to grasp the advantage of caching
in early stages of OS or bare-metal software development. This tutorial is a
short interlude that tries to give you a feeling of what caching can do for
performance.
## Benchmark
Let's write a tiny, arbitrary micro-benchmark to showcase the performance of
operating on the same DRAM with caching enabled and disabled.
### mmu.rs
Therefore, we will map the same physical memory via two different virtual
addresses. We set up our pagetables such that the virtual address `0x200000`
points to the physical DRAM at `0x400000`, and we configure it as
`non-cacheable` in the page tables.
We are still using a `2 MiB` granule, and set up the next block, which starts at
virtual `0x400000`, to point at physical `0x400000` (this is an identity mapped
block). This time, the block is configured as cacheable.
### benchmark.rs
We write a little function that iteratively reads memory of five times the size
of a `cacheline`, in steps of 8 bytes, aka one processor register at a time. We
read the value, add 1, and write it back. This whole process is repeated
`100_000` times.
### main.rs
The benchmark function is called twice. Once for the cacheable and once for the
non-cacheable virtual addresses. Remember that both virtual addresses point to
the _same_ physical DRAM, so the difference in time that we will see will
showcase how much faster it is to operate on DRAM with caching enabled.
## Results
On my Raspberry, I get the following results:
```text
Benchmarking non-cacheable DRAM modifications at virtual 0x00200000, physical 0x00400000:
664 miliseconds.
Benchmarking cacheable DRAM modifications at virtual 0x00400000, physical 0x00400000:
148 miliseconds.
With caching, the function is 348% faster!
```
Impressive, isn't it?

Binary file not shown.

Binary file not shown.

@ -0,0 +1,57 @@
/*
* MIT License
*
* Copyright (c) 2018 Andre Richter <andre.o.richter@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
ENTRY(_boot_cores);
SECTIONS
{
. = 0x80000; /* This is already 4KiB aligned */
__ro_start = .;
.text :
{
KEEP(*(.text.boot)) *(.text .text.*)
}
.rodata :
{
*(.rodata .rodata.*)
}
. = ALIGN(4096); /* Fill up to 4KiB */
__ro_end = .;
.data :
{
*(.data .data.*)
}
.bss ALIGN(8):
{
__bss_start = .;
*(.bss .bss.*)
*(COMMON)
__bss_end = .;
}
/DISCARD/ : { *(.comment) *(.gnu*) *(.note*) *(.eh_frame*) }
}

@ -0,0 +1,9 @@
[package]
name = "raspi3_boot"
version = "0.1.0"
authors = ["Andre Richter <andre.o.richter@gmail.com>"]
[dependencies]
cortex-a = "2.2.1"
panic-abort = "0.2.0"
r0 = "0.2.2"

@ -0,0 +1,131 @@
/*
* MIT License
*
* Copyright (c) 2018 Jorge Aparicio
* Copyright (c) 2018 Andre Richter <andre.o.richter@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#![deny(missing_docs)]
#![deny(warnings)]
#![no_std]
//! Low-level boot of the Raspberry's processor
extern crate cortex_a;
extern crate panic_abort;
extern crate r0;
#[macro_export]
macro_rules! entry {
($path:path) => {
#[export_name = "main"]
pub unsafe fn __main() -> ! {
// type check the given path
let f: fn() -> ! = $path;
f()
}
};
}
/// Reset function.
///
/// Initializes the bss section before calling into the user's `main()`.
unsafe fn reset() -> ! {
extern "C" {
// Boundaries of the .bss section, provided by the linker script
static mut __bss_start: u64;
static mut __bss_end: u64;
}
// Zeroes the .bss section
r0::zero_bss(&mut __bss_start, &mut __bss_end);
extern "Rust" {
fn main() -> !;
}
main()
}
/// Prepare and execute transition from EL2 to EL1.
#[inline]
fn setup_and_enter_el1_from_el2() -> ! {
use cortex_a::{asm, regs::*};
// Enable timer counter registers for EL1
CNTHCTL_EL2.write(CNTHCTL_EL2::EL1PCEN::SET + CNTHCTL_EL2::EL1PCTEN::SET);
// No offset for reading the counters
CNTVOFF_EL2.set(0);
// Set EL1 execution state to AArch64
// TODO: Explain the SWIO bit
HCR_EL2.write(HCR_EL2::RW::EL1IsAarch64 + HCR_EL2::SWIO::SET);
// Set up a simulated exception return.
//
// First, fake a saved program status, where all interrupts were
// masked and SP_EL0 was used as a stack pointer.
SPSR_EL2.write(
SPSR_EL2::D::Masked
+ SPSR_EL2::A::Masked
+ SPSR_EL2::I::Masked
+ SPSR_EL2::F::Masked
+ SPSR_EL2::M::EL1t,
);
// Second, let the link register point to reset().
ELR_EL2.set(reset as *const () as u64);
// Set up SP_EL0 (stack pointer), which will be used by EL1 once
// we "return" to it.
SP_EL0.set(0x80_000);
// Use `eret` to "return" to EL1. This will result in execution of
// `reset()` in EL1.
asm::eret()
}
/// Entrypoint of the processor.
///
/// Parks all cores except core0 and checks if we started in EL2. If
/// so, proceeds with setting up EL1.
#[link_section = ".text.boot"]
#[no_mangle]
pub unsafe extern "C" fn _boot_cores() -> ! {
use cortex_a::{asm, regs::*};
const CORE_0: u64 = 0;
const CORE_MASK: u64 = 0x3;
const EL2: u32 = CurrentEL::EL::EL2.value;
if let CORE_0 = MPIDR_EL1.get() & CORE_MASK {
if let EL2 = CurrentEL.get() {
setup_and_enter_el1_from_el2()
}
}
// if not core0 or EL != 2, infinitely wait for events
loop {
asm::wfe();
}
}

@ -0,0 +1,40 @@
use core::sync::atomic::{compiler_fence, Ordering};
use cortex_a::{barrier, regs::*};
/// We assume that addr is cacheline aligned
pub fn batch_modify(addr: u64) -> u32 {
const CACHELINE_SIZE_BYTES: u64 = 64; // TODO: retrieve this from a system register
const NUM_CACHELINES_TOUCHED: u64 = 5;
const BYTES_PER_U64_REG: usize = 8;
const NUM_BENCH_ITERATIONS: u64 = 100_000;
const NUM_BYTES_TOUCHED: u64 = CACHELINE_SIZE_BYTES * NUM_CACHELINES_TOUCHED;
let t1 = CNTPCT_EL0.get();
compiler_fence(Ordering::SeqCst);
let mut data_ptr: *mut u64;
let mut temp: u64;
for _ in 0..NUM_BENCH_ITERATIONS {
for i in (addr..(addr + NUM_BYTES_TOUCHED)).step_by(BYTES_PER_U64_REG) {
data_ptr = i as *mut u64;
unsafe {
temp = core::ptr::read_volatile(data_ptr);
core::ptr::write_volatile(data_ptr, temp + 1);
}
}
}
// Insert a barrier to ensure that the last memory operation has finished
// before we retrieve the elapsed time with the subsequent counter read. Not
// needed at all given the sample size, but let's be a bit pedantic here for
// education purposes. For measuring single-instructions, this would be
// needed.
unsafe { barrier::dsb(barrier::SY) };
let t2 = CNTPCT_EL0.get();
((t2 - t1) * 1000 / u64::from(CNTFRQ_EL0.get())) as u32
}

@ -0,0 +1,75 @@
/*
* MIT License
*
* Copyright (c) 2018 Andre Richter <andre.o.richter@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
use super::MMIO_BASE;
use register::mmio::ReadWrite;
// Descriptions taken from
// https://github.com/raspberrypi/documentation/files/1888662/BCM2837-ARM-Peripherals.-.Revised.-.V2-1.pdf
register_bitfields! {
u32,
/// GPIO Function Select 1
GPFSEL1 [
/// Pin 15
FSEL15 OFFSET(15) NUMBITS(3) [
Input = 0b000,
Output = 0b001,
RXD0 = 0b100, // UART0 - Alternate function 0
RXD1 = 0b010 // Mini UART - Alternate function 5
],
/// Pin 14
FSEL14 OFFSET(12) NUMBITS(3) [
Input = 0b000,
Output = 0b001,
TXD0 = 0b100, // UART0 - Alternate function 0
TXD1 = 0b010 // Mini UART - Alternate function 5
]
],
/// GPIO Pull-up/down Clock Register 0
GPPUDCLK0 [
/// Pin 15
PUDCLK15 OFFSET(15) NUMBITS(1) [
NoEffect = 0,
AssertClock = 1
],
/// Pin 14
PUDCLK14 OFFSET(14) NUMBITS(1) [
NoEffect = 0,
AssertClock = 1
]
]
}
pub const GPFSEL1: *const ReadWrite<u32, GPFSEL1::Register> =
(MMIO_BASE + 0x0020_0004) as *const ReadWrite<u32, GPFSEL1::Register>;
pub const GPPUD: *const ReadWrite<u32> = (MMIO_BASE + 0x0020_0094) as *const ReadWrite<u32>;
pub const GPPUDCLK0: *const ReadWrite<u32, GPPUDCLK0::Register> =
(MMIO_BASE + 0x0020_0098) as *const ReadWrite<u32, GPPUDCLK0::Register>;

@ -0,0 +1,111 @@
/*
* MIT License
*
* Copyright (c) 2018 Andre Richter <andre.o.richter@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#![no_std]
#![no_main]
#![feature(asm)]
#![feature(const_fn)]
extern crate cortex_a;
#[macro_use]
extern crate raspi3_boot;
#[macro_use]
extern crate register;
const MMIO_BASE: u32 = 0x3F00_0000;
mod gpio;
mod mbox;
mod mmu;
mod uart;
mod benchmark;
fn do_benchmarks(uart: &uart::Uart) {
const SIZE_2MIB: u64 = 2 * 1024 * 1024;
// Start of the __SECOND__ virtual 2 MiB block (counting starts at zero).
// NON-cacheable DRAM memory.
let non_cacheable_addr: u64 = SIZE_2MIB;
// Start of the __THIRD__ virtual 2 MiB block.
// Cacheable DRAM memory
let cacheable_addr: u64 = 2 * SIZE_2MIB;
uart.puts("Benchmarking non-cacheable DRAM modifications at virtual 0x");
uart.hex(non_cacheable_addr as u32);
uart.puts(", physical 0x");
uart.hex(2 * SIZE_2MIB as u32);
uart.puts(":\n");
let result_nc = benchmark::batch_modify(non_cacheable_addr);
uart.dec(result_nc);
uart.puts(" miliseconds.\n\n");
uart.puts("Benchmarking cacheable DRAM modifications at virtual 0x");
uart.hex(cacheable_addr as u32);
uart.puts(", physical 0x");
uart.hex(2 * SIZE_2MIB as u32);
uart.puts(":\n");
let result_c = benchmark::batch_modify(cacheable_addr);
uart.dec(result_c);
uart.puts(" miliseconds.\n\n");
let percent_diff = (result_nc - result_c) * 100 / result_c;
uart.puts("With caching, the function is ");
uart.dec(percent_diff);
uart.puts("% faster!\n");
}
entry!(kernel_entry);
fn kernel_entry() -> ! {
let mut mbox = mbox::Mbox::new();
let uart = uart::Uart::new(uart::UART_PHYS_BASE);
// set up serial console
if uart.init(&mut mbox).is_err() {
loop {
cortex_a::asm::wfe() // If UART fails, abort early
}
}
uart.getc(); // Press a key first before being greeted
uart.puts("Hello Rustacean!\n\n");
uart.puts("\nSwitching MMU on now...");
unsafe { mmu::init() };
uart.puts("MMU is live \\o/\n\n");
do_benchmarks(&uart);
// echo everything back
loop {
uart.send(uart.getc());
}
}

@ -0,0 +1,159 @@
/*
* MIT License
*
* Copyright (c) 2018 Andre Richter <andre.o.richter@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
use super::MMIO_BASE;
use core::ops;
use cortex_a::asm;
use register::mmio::{ReadOnly, WriteOnly};
register_bitfields! {
u32,
STATUS [
FULL OFFSET(31) NUMBITS(1) [],
EMPTY OFFSET(30) NUMBITS(1) []
]
}
const VIDEOCORE_MBOX: u32 = MMIO_BASE + 0xB880;
#[allow(non_snake_case)]
#[repr(C)]
pub struct RegisterBlock {
READ: ReadOnly<u32>, // 0x00
__reserved_0: [u32; 5], // 0x04
STATUS: ReadOnly<u32, STATUS::Register>, // 0x18
__reserved_1: u32, // 0x1C
WRITE: WriteOnly<u32>, // 0x20
}
// Custom errors
pub enum MboxError {
ResponseError,
UnknownError,
}
pub type Result<T> = ::core::result::Result<T, MboxError>;
// Channels
pub mod channel {
pub const PROP: u32 = 8;
}
// Tags
pub mod tag {
pub const SETCLKRATE: u32 = 0x38002;
pub const LAST: u32 = 0;
}
// Clocks
pub mod clock {
pub const UART: u32 = 0x0_0000_0002;
}
// Responses
mod response {
pub const SUCCESS: u32 = 0x8000_0000;
pub const ERROR: u32 = 0x8000_0001; // error parsing request buffer (partial response)
}
pub const REQUEST: u32 = 0;
// Public interface to the mailbox
#[repr(C)]
#[repr(align(16))]
pub struct Mbox {
// The address for buffer needs to be 16-byte aligned so that the
// Videcore can handle it properly.
pub buffer: [u32; 36],
}
/// Deref to RegisterBlock
///
/// Allows writing
/// ```
/// self.STATUS.read()
/// ```
/// instead of something along the lines of
/// ```
/// unsafe { (*Mbox::ptr()).STATUS.read() }
/// ```
impl ops::Deref for Mbox {
type Target = RegisterBlock;
fn deref(&self) -> &Self::Target {
unsafe { &*Self::ptr() }
}
}
impl Mbox {
pub fn new() -> Mbox {
Mbox { buffer: [0; 36] }
}
/// Returns a pointer to the register block
fn ptr() -> *const RegisterBlock {
VIDEOCORE_MBOX as *const _
}
/// Make a mailbox call. Returns Err(MboxError) on failure, Ok(()) success
pub fn call(&self, channel: u32) -> Result<()> {
// wait until we can write to the mailbox
loop {
if !self.STATUS.is_set(STATUS::FULL) {
break;
}
asm::nop();
}
let buf_ptr = self.buffer.as_ptr() as u32;
// write the address of our message to the mailbox with channel identifier
self.WRITE.set((buf_ptr & !0xF) | (channel & 0xF));
// now wait for the response
loop {
// is there a response?
loop {
if !self.STATUS.is_set(STATUS::EMPTY) {
break;
}
asm::nop();
}
let resp: u32 = self.READ.get();
// is it a response to our message?
if ((resp & 0xF) == channel) && ((resp & !0xF) == buf_ptr) {
// is it a valid successful response?
return match self.buffer[1] {
response::SUCCESS => Ok(()),
response::ERROR => Err(MboxError::ResponseError),
_ => Err(MboxError::UnknownError),
};
}
}
}
}

@ -0,0 +1,219 @@
/*
* MIT License
*
* Copyright (c) 2018 Andre Richter <andre.o.richter@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
use cortex_a::{barrier, regs::*};
register_bitfields! {u64,
// AArch64 Reference Manual page 2150
STAGE1_DESCRIPTOR [
/// Execute-never
XN OFFSET(54) NUMBITS(1) [
False = 0,
True = 1
],
/// Various address fields, depending on use case
LVL2_OUTPUT_ADDR_4KiB OFFSET(21) NUMBITS(27) [], // [47:21]
NEXT_LVL_TABLE_ADDR_4KiB OFFSET(12) NUMBITS(36) [], // [47:12]
/// Access flag
AF OFFSET(10) NUMBITS(1) [
False = 0,
True = 1
],
/// Shareability field
SH OFFSET(8) NUMBITS(2) [
OuterShareable = 0b10,
InnerShareable = 0b11
],
/// Access Permissions
AP OFFSET(6) NUMBITS(2) [
RW_EL1 = 0b00,
RW_EL1_EL0 = 0b01,
RO_EL1 = 0b10,
RO_EL1_EL0 = 0b11
],
/// Memory attributes index into the MAIR_EL1 register
AttrIndx OFFSET(2) NUMBITS(3) [],
TYPE OFFSET(1) NUMBITS(1) [
Block = 0,
Table = 1
],
VALID OFFSET(0) NUMBITS(1) [
False = 0,
True = 1
]
]
}
trait BaseAddr {
fn base_addr(&self) -> u64;
}
impl BaseAddr for [u64; 512] {
fn base_addr(&self) -> u64 {
self as *const u64 as u64
}
}
const NUM_ENTRIES_4KIB: usize = 512;
static mut LVL2_TABLE: [u64; NUM_ENTRIES_4KIB] = [0; NUM_ENTRIES_4KIB];
static mut SINGLE_LVL3_TABLE: [u64; NUM_ENTRIES_4KIB] = [0; NUM_ENTRIES_4KIB];
/// Set up identity mapped page tables for the first 1 gigabyte of address
/// space.
pub unsafe fn init() {
// First, define the three memory types that we will map. Cacheable and
// non-cacheable normal DRAM, and device.
MAIR_EL1.write(
// Attribute 2
MAIR_EL1::Attr2_HIGH::Memory_OuterNonCacheable
+ MAIR_EL1::Attr2_LOW_MEMORY::InnerNonCacheable
// Attribute 1
+ MAIR_EL1::Attr1_HIGH::Memory_OuterWriteBack_NonTransient_ReadAlloc_WriteAlloc
+ MAIR_EL1::Attr1_LOW_MEMORY::InnerWriteBack_NonTransient_ReadAlloc_WriteAlloc
// Attribute 0
+ MAIR_EL1::Attr0_HIGH::Device
+ MAIR_EL1::Attr0_LOW_DEVICE::Device_nGnRE,
);
// Descriptive consts for indexing into the correct MAIR_EL1 attributes.
mod mair {
pub const DEVICE: u64 = 0;
pub const NORMAL: u64 = 1;
pub const NORMAL_NON_CACHEABLE: u64 = 2;
}
// Set up the first LVL2 entry, pointing to a 4KiB table base address.
let lvl3_base: u64 = SINGLE_LVL3_TABLE.base_addr() >> 12;
LVL2_TABLE[0] = (STAGE1_DESCRIPTOR::VALID::True
+ STAGE1_DESCRIPTOR::TYPE::Table
+ STAGE1_DESCRIPTOR::NEXT_LVL_TABLE_ADDR_4KiB.val(lvl3_base))
.value;
// The second 2 MiB block.
LVL2_TABLE[1] = (STAGE1_DESCRIPTOR::VALID::True
+ STAGE1_DESCRIPTOR::TYPE::Block
+ STAGE1_DESCRIPTOR::AttrIndx.val(mair::NORMAL_NON_CACHEABLE)
+ STAGE1_DESCRIPTOR::AP::RW_EL1
+ STAGE1_DESCRIPTOR::SH::OuterShareable
+ STAGE1_DESCRIPTOR::AF::True
// This translation is accessed for virtual 0x200000. Point to physical
// 0x400000, aka the third phyiscal 2 MiB DRAM block (third block == 2,
// because we start counting at 0).
//
// Here, we configure it non-cacheable.
+ STAGE1_DESCRIPTOR::LVL2_OUTPUT_ADDR_4KiB.val(2)
+ STAGE1_DESCRIPTOR::XN::True)
.value;
// Fill the rest of the LVL2 (2MiB) entries as block
// descriptors. Differentiate between normal and device mem.
let mmio_base: u64 = (super::MMIO_BASE >> 21).into();
let common = STAGE1_DESCRIPTOR::VALID::True
+ STAGE1_DESCRIPTOR::TYPE::Block
+ STAGE1_DESCRIPTOR::AP::RW_EL1
+ STAGE1_DESCRIPTOR::AF::True
+ STAGE1_DESCRIPTOR::XN::True;
// Notice the skip(2). Start at the third 2 MiB DRAM block, which will point
// virtual 0x400000 to physical 0x400000, configured as cacheable memory.
for (i, entry) in LVL2_TABLE.iter_mut().enumerate().skip(2) {
let j: u64 = i as u64;
let mem_attr = if j >= mmio_base {
STAGE1_DESCRIPTOR::SH::OuterShareable + STAGE1_DESCRIPTOR::AttrIndx.val(mair::DEVICE)
} else {
STAGE1_DESCRIPTOR::SH::InnerShareable + STAGE1_DESCRIPTOR::AttrIndx.val(mair::NORMAL)
};
*entry = (common + mem_attr + STAGE1_DESCRIPTOR::LVL2_OUTPUT_ADDR_4KiB.val(j)).value;
}
// Finally, fill the single LVL3 table (4KiB granule). Differentiate between
// code/RO and RW sections.
//
// Using the linker script, we ensure that the RO sections are 4KiB aligned,
// and we export their boundaries via symbols.
extern "C" {
static mut __ro_start: u64;
static mut __ro_end: u64;
}
const PAGESIZE: u64 = 4096;
let ro_start: u64 = &__ro_start as *const _ as u64 / PAGESIZE;
let ro_end: u64 = &__ro_end as *const _ as u64 / PAGESIZE;
let common = STAGE1_DESCRIPTOR::VALID::True
+ STAGE1_DESCRIPTOR::TYPE::Table
+ STAGE1_DESCRIPTOR::AttrIndx.val(mair::NORMAL)
+ STAGE1_DESCRIPTOR::SH::InnerShareable
+ STAGE1_DESCRIPTOR::AF::True;
for (i, entry) in SINGLE_LVL3_TABLE.iter_mut().enumerate() {
let j: u64 = i as u64;
let mem_attr = if j < ro_start || j > ro_end {
STAGE1_DESCRIPTOR::AP::RW_EL1 + STAGE1_DESCRIPTOR::XN::True
} else {
STAGE1_DESCRIPTOR::AP::RO_EL1 + STAGE1_DESCRIPTOR::XN::False
};
*entry = (common + mem_attr + STAGE1_DESCRIPTOR::NEXT_LVL_TABLE_ADDR_4KiB.val(j)).value;
}
// Point to the LVL2 table base address in TTBR0.
TTBR0_EL1.set_baddr(LVL2_TABLE.base_addr());
// Configure various settings of stage 1 of the EL1 translation regime.
let ips = ID_AA64MMFR0_EL1.read(ID_AA64MMFR0_EL1::PARange);
TCR_EL1.write(
TCR_EL1::TBI0::Ignored
+ TCR_EL1::IPS.val(ips)
+ TCR_EL1::TG0::KiB_4 // 4 KiB granule
+ TCR_EL1::SH0::Inner
+ TCR_EL1::ORGN0::WriteBack_ReadAlloc_WriteAlloc_Cacheable
+ TCR_EL1::IRGN0::WriteBack_ReadAlloc_WriteAlloc_Cacheable
+ TCR_EL1::EPD0::EnableTTBR0Walks
+ TCR_EL1::T0SZ.val(34), // Start walks at level 2
);
// Switch the MMU on.
//
// First, force all previous changes to be seen before the MMU is enabled.
barrier::isb(barrier::SY);
// Enable the MMU and turn on caching
SCTLR_EL1.modify(SCTLR_EL1::M::Enable + SCTLR_EL1::C::Cacheable);
// Force MMU init to complete before next instruction
barrier::isb(barrier::SY);
}

@ -0,0 +1,310 @@
/*
* MIT License
*
* Copyright (c) 2018 Andre Richter <andre.o.richter@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
use super::MMIO_BASE;
use core::{
ops,
sync::atomic::{compiler_fence, Ordering},
};
use cortex_a::asm;
use gpio;
use mbox;
use register::mmio::*;
// PL011 UART registers.
//
// Descriptions taken from
// https://github.com/raspberrypi/documentation/files/1888662/BCM2837-ARM-Peripherals.-.Revised.-.V2-1.pdf
register_bitfields! {
u32,
/// Flag Register
FR [
/// Transmit FIFO full. The meaning of this bit depends on the
/// state of the FEN bit in the UARTLCR_ LCRH Register. If the
/// FIFO is disabled, this bit is set when the transmit
/// holding register is full. If the FIFO is enabled, the TXFF
/// bit is set when the transmit FIFO is full.
TXFF OFFSET(5) NUMBITS(1) [],
/// Receive FIFO empty. The meaning of this bit depends on the
/// state of the FEN bit in the UARTLCR_H Register. If the
/// FIFO is disabled, this bit is set when the receive holding
/// register is empty. If the FIFO is enabled, the RXFE bit is
/// set when the receive FIFO is empty.
RXFE OFFSET(4) NUMBITS(1) []
],
/// Integer Baud rate divisor
IBRD [
/// Integer Baud rate divisor
IBRD OFFSET(0) NUMBITS(16) []
],
/// Fractional Baud rate divisor
FBRD [
/// Fractional Baud rate divisor
FBRD OFFSET(0) NUMBITS(6) []
],
/// Line Control register
LCRH [
/// Word length. These bits indicate the number of data bits
/// transmitted or received in a frame.
WLEN OFFSET(5) NUMBITS(2) [
FiveBit = 0b00,
SixBit = 0b01,
SevenBit = 0b10,
EightBit = 0b11
]
],
/// Control Register
CR [
/// Receive enable. If this bit is set to 1, the receive
/// section of the UART is enabled. Data reception occurs for
/// UART signals. When the UART is disabled in the middle of
/// reception, it completes the current character before
/// stopping.
RXE OFFSET(9) NUMBITS(1) [
Disabled = 0,
Enabled = 1
],
/// Transmit enable. If this bit is set to 1, the transmit
/// section of the UART is enabled. Data transmission occurs
/// for UART signals. When the UART is disabled in the middle
/// of transmission, it completes the current character before
/// stopping.
TXE OFFSET(8) NUMBITS(1) [
Disabled = 0,
Enabled = 1
],
/// UART enable
UARTEN OFFSET(0) NUMBITS(1) [
/// If the UART is disabled in the middle of transmission
/// or reception, it completes the current character
/// before stopping.
Disabled = 0,
Enabled = 1
]
],
/// Interupt Clear Register
ICR [
/// Meta field for all pending interrupts
ALL OFFSET(0) NUMBITS(11) []
]
}
pub const UART_PHYS_BASE: u32 = MMIO_BASE + 0x20_1000;
#[allow(non_snake_case)]
#[repr(C)]
pub struct RegisterBlock {
DR: ReadWrite<u32>, // 0x00
__reserved_0: [u32; 5], // 0x04
FR: ReadOnly<u32, FR::Register>, // 0x18
__reserved_1: [u32; 2], // 0x1c
IBRD: WriteOnly<u32, IBRD::Register>, // 0x24
FBRD: WriteOnly<u32, FBRD::Register>, // 0x28
LCRH: WriteOnly<u32, LCRH::Register>, // 0x2C
CR: WriteOnly<u32, CR::Register>, // 0x30
__reserved_2: [u32; 4], // 0x34
ICR: WriteOnly<u32, ICR::Register>, // 0x44
}
pub enum UartError {
MailboxError,
}
pub type Result<T> = ::core::result::Result<T, UartError>;
pub struct Uart {
uart_base: u32,
}
impl ops::Deref for Uart {
type Target = RegisterBlock;
fn deref(&self) -> &Self::Target {
unsafe { &*self.ptr() }
}
}
impl Uart {
pub fn new(uart_base: u32) -> Uart {
Uart { uart_base }
}
/// Returns a pointer to the register block
fn ptr(&self) -> *const RegisterBlock {
self.uart_base as *const _
}
///Set baud rate and characteristics (115200 8N1) and map to GPIO
pub fn init(&self, mbox: &mut mbox::Mbox) -> Result<()> {
// turn off UART0
self.CR.set(0);
// set up clock for consistent divisor values
mbox.buffer[0] = 9 * 4;
mbox.buffer[1] = mbox::REQUEST;
mbox.buffer[2] = mbox::tag::SETCLKRATE;
mbox.buffer[3] = 12;
mbox.buffer[4] = 8;
mbox.buffer[5] = mbox::clock::UART; // UART clock
mbox.buffer[6] = 4_000_000; // 4Mhz
mbox.buffer[7] = 0; // skip turbo setting
mbox.buffer[8] = mbox::tag::LAST;
// Insert a compiler fence that ensures that all stores to the
// mbox buffer are finished before the GPU is signaled (which
// is done by a store operation as well).
compiler_fence(Ordering::Release);
if mbox.call(mbox::channel::PROP).is_err() {
return Err(UartError::MailboxError); // Abort if UART clocks couldn't be set
};
// map UART0 to GPIO pins
unsafe {
(*gpio::GPFSEL1).modify(gpio::GPFSEL1::FSEL14::TXD0 + gpio::GPFSEL1::FSEL15::RXD0);
(*gpio::GPPUD).set(0); // enable pins 14 and 15
for _ in 0..150 {
asm::nop();
}
(*gpio::GPPUDCLK0).modify(
gpio::GPPUDCLK0::PUDCLK14::AssertClock + gpio::GPPUDCLK0::PUDCLK15::AssertClock,
);
for _ in 0..150 {
asm::nop();
}
(*gpio::GPPUDCLK0).set(0);
}
self.ICR.write(ICR::ALL::CLEAR);
self.IBRD.write(IBRD::IBRD.val(2)); // Results in 115200 baud
self.FBRD.write(FBRD::FBRD.val(0xB));
self.LCRH.write(LCRH::WLEN::EightBit); // 8N1
self.CR
.write(CR::UARTEN::Enabled + CR::TXE::Enabled + CR::RXE::Enabled);
Ok(())
}
/// Send a character
pub fn send(&self, c: char) {
// wait until we can send
loop {
if !self.FR.is_set(FR::TXFF) {
break;
}
asm::nop();
}
// write the character to the buffer
self.DR.set(c as u32);
}
/// Receive a character
pub fn getc(&self) -> char {
// wait until something is in the buffer
loop {
if !self.FR.is_set(FR::RXFE) {
break;
}
asm::nop();
}
// read it and return
let mut ret = self.DR.get() as u8 as char;
// convert carrige return to newline
if ret == '\r' {
ret = '\n'
}
ret
}
/// Display a string
pub fn puts(&self, string: &str) {
for c in string.chars() {
// convert newline to carrige return + newline
if c == '\n' {
self.send('\r')
}
self.send(c);
}
}
/// Display a binary value in hexadecimal
pub fn hex(&self, d: u32) {
let mut n;
for i in 0..8 {
// get highest tetrad
n = d.wrapping_shr(28 - i * 4) & 0xF;
// 0-9 => '0'-'9', 10-15 => 'A'-'F'
// Add proper offset for ASCII table
if n > 9 {
n += 0x37;
} else {
n += 0x30;
}
self.send(n as u8 as char);
}
}
/// Display a binary value in decimal
pub fn dec(&self, d: u32) {
let mut digits: [char; 10] = ['\0'; 10];
let mut d = d;
let mut i: usize = 0;
loop {
digits[i] = ((d % 10) + 0x30) as u8 as char;
i += 1;
d /= 10;
if d == 0 {
break;
}
}
for c in digits.iter().rev() {
self.send(*c);
}
}
}
Loading…
Cancel
Save