Add tutorial 0D_cache_performance
parent
bf2a1fff7e
commit
c65e2e56cd
Binary file not shown.
Binary file not shown.
@ -0,0 +1,6 @@
|
||||
[target.aarch64-unknown-none]
|
||||
rustflags = [
|
||||
"-C", "link-arg=-Tlink.ld",
|
||||
"-C", "target-feature=-fp-armv8",
|
||||
"-C", "target-cpu=cortex-a53",
|
||||
]
|
@ -0,0 +1,55 @@
|
||||
[[package]]
|
||||
name = "cortex-a"
|
||||
version = "2.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"register 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kernel8"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"cortex-a 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"raspi3_boot 0.1.0",
|
||||
"register 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "panic-abort"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "r0"
|
||||
version = "0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "raspi3_boot"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"cortex-a 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"panic-abort 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"r0 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "register"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"tock-registers 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tock-registers"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[metadata]
|
||||
"checksum cortex-a 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "fe913628f34718bc9e7d0d07e13ca1374638f64f0edc6eb063ec8abe581d395d"
|
||||
"checksum panic-abort 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "6bc796c620f27056d4ffe7c558533fd67ae5af0fd8e919fbe38de803368af73e"
|
||||
"checksum r0 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "e2a38df5b15c8d5c7e8654189744d8e396bddc18ad48041a500ce52d6948941f"
|
||||
"checksum register 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "157a11ac0b1882ff4a527a92f911dd288df17367faaaa0c36f188cd61ec36fc1"
|
||||
"checksum tock-registers 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3a385d94f3f62e60445a0adb9ff8d9621faa272234530d4c0f848ec98f88e316"
|
@ -0,0 +1,12 @@
|
||||
[package]
|
||||
name = "kernel8"
|
||||
version = "0.1.0"
|
||||
authors = ["Andre Richter <andre.o.richter@gmail.com>"]
|
||||
|
||||
[dependencies]
|
||||
raspi3_boot = { path = "raspi3_boot" }
|
||||
cortex-a = "2.2.1"
|
||||
register = "0.2.0"
|
||||
|
||||
[package.metadata.cargo-xbuild]
|
||||
sysroot_path = "../xbuild_sysroot"
|
@ -0,0 +1,66 @@
|
||||
#
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2018 Andre Richter <andre.o.richter@gmail.com>
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
#
|
||||
|
||||
TARGET = aarch64-unknown-none
|
||||
|
||||
OBJCOPY = cargo objcopy --
|
||||
OBJCOPY_PARAMS = --strip-all -O binary
|
||||
|
||||
UTILS_CONTAINER = andrerichter/raspi3-utils
|
||||
DOCKER_CMD = docker run -it --rm -v $(shell pwd):/work -w /work
|
||||
DOCKER_TTY = --privileged -v /dev:/dev
|
||||
QEMU_CMD = qemu-system-aarch64 -M raspi3 -kernel kernel8.img
|
||||
RASPBOOT_CMD = raspbootcom /dev/ttyUSB0 kernel8.img
|
||||
|
||||
all: clean kernel8.img
|
||||
|
||||
target/$(TARGET)/debug/kernel8: src/main.rs
|
||||
cargo xbuild --target=$(TARGET)
|
||||
cp $@ .
|
||||
|
||||
target/$(TARGET)/release/kernel8: src/main.rs
|
||||
cargo xbuild --target=$(TARGET) --release
|
||||
cp $@ .
|
||||
|
||||
ifeq ($(DEBUG),1)
|
||||
kernel8: target/$(TARGET)/debug/kernel8
|
||||
else
|
||||
kernel8: target/$(TARGET)/release/kernel8
|
||||
endif
|
||||
|
||||
kernel8.img: kernel8
|
||||
$(OBJCOPY) $(OBJCOPY_PARAMS) $< kernel8.img
|
||||
|
||||
qemu: all
|
||||
$(DOCKER_CMD) $(UTILS_CONTAINER) $(QEMU_CMD) -serial stdio
|
||||
|
||||
raspboot: all
|
||||
$(DOCKER_CMD) $(DOCKER_TTY) $(UTILS_CONTAINER) $(RASPBOOT_CMD)
|
||||
|
||||
clippy:
|
||||
cargo xclippy --target=$(TARGET)
|
||||
|
||||
clean:
|
||||
cargo clean
|
||||
rm -f kernel8
|
@ -0,0 +1,57 @@
|
||||
# Tutorial 0D - Cache Performance
|
||||
|
||||
Now that we finally have virtual memory capabilities available, we also have
|
||||
fine grained control over `cacheability`. You've caught a glimpse already in the
|
||||
last tutorial, where we used page table entries to reference the `MAIR_EL1`
|
||||
register to indicate the cacheability of a page or block.
|
||||
|
||||
Unfortunately, for the user it is often hard to grasp the advantage of caching
|
||||
in early stages of OS or bare-metal software development. This tutorial is a
|
||||
short interlude that tries to give you a feeling of what caching can do for
|
||||
performance.
|
||||
|
||||
## Benchmark
|
||||
|
||||
Let's write a tiny, arbitrary micro-benchmark to showcase the performance of
|
||||
operating on the same DRAM with caching enabled and disabled.
|
||||
|
||||
### mmu.rs
|
||||
|
||||
Therefore, we will map the same physical memory via two different virtual
|
||||
addresses. We set up our pagetables such that the virtual address `0x200000`
|
||||
points to the physical DRAM at `0x400000`, and we configure it as
|
||||
`non-cacheable` in the page tables.
|
||||
|
||||
We are still using a `2 MiB` granule, and set up the next block, which starts at
|
||||
virtual `0x400000`, to point at physical `0x400000` (this is an identity mapped
|
||||
block). This time, the block is configured as cacheable.
|
||||
|
||||
### benchmark.rs
|
||||
|
||||
We write a little function that iteratively reads memory of five times the size
|
||||
of a `cacheline`, in steps of 8 bytes, aka one processor register at a time. We
|
||||
read the value, add 1, and write it back. This whole process is repeated
|
||||
`100_000` times.
|
||||
|
||||
### main.rs
|
||||
|
||||
The benchmark function is called twice. Once for the cacheable and once for the
|
||||
non-cacheable virtual addresses. Remember that both virtual addresses point to
|
||||
the _same_ physical DRAM, so the difference in time that we will see will
|
||||
showcase how much faster it is to operate on DRAM with caching enabled.
|
||||
|
||||
## Results
|
||||
|
||||
On my Raspberry, I get the following results:
|
||||
|
||||
```text
|
||||
Benchmarking non-cacheable DRAM modifications at virtual 0x00200000, physical 0x00400000:
|
||||
664 miliseconds.
|
||||
|
||||
Benchmarking cacheable DRAM modifications at virtual 0x00400000, physical 0x00400000:
|
||||
148 miliseconds.
|
||||
|
||||
With caching, the function is 348% faster!
|
||||
```
|
||||
|
||||
Impressive, isn't it?
|
Binary file not shown.
Binary file not shown.
@ -0,0 +1,57 @@
|
||||
/*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2018 Andre Richter <andre.o.richter@gmail.com>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
ENTRY(_boot_cores);
|
||||
|
||||
SECTIONS
|
||||
{
|
||||
. = 0x80000; /* This is already 4KiB aligned */
|
||||
__ro_start = .;
|
||||
.text :
|
||||
{
|
||||
KEEP(*(.text.boot)) *(.text .text.*)
|
||||
}
|
||||
|
||||
.rodata :
|
||||
{
|
||||
*(.rodata .rodata.*)
|
||||
}
|
||||
. = ALIGN(4096); /* Fill up to 4KiB */
|
||||
__ro_end = .;
|
||||
|
||||
.data :
|
||||
{
|
||||
*(.data .data.*)
|
||||
}
|
||||
|
||||
.bss ALIGN(8):
|
||||
{
|
||||
__bss_start = .;
|
||||
*(.bss .bss.*)
|
||||
*(COMMON)
|
||||
__bss_end = .;
|
||||
}
|
||||
|
||||
/DISCARD/ : { *(.comment) *(.gnu*) *(.note*) *(.eh_frame*) }
|
||||
}
|
@ -0,0 +1,9 @@
|
||||
[package]
|
||||
name = "raspi3_boot"
|
||||
version = "0.1.0"
|
||||
authors = ["Andre Richter <andre.o.richter@gmail.com>"]
|
||||
|
||||
[dependencies]
|
||||
cortex-a = "2.2.1"
|
||||
panic-abort = "0.2.0"
|
||||
r0 = "0.2.2"
|
@ -0,0 +1,131 @@
|
||||
/*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2018 Jorge Aparicio
|
||||
* Copyright (c) 2018 Andre Richter <andre.o.richter@gmail.com>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#![deny(missing_docs)]
|
||||
#![deny(warnings)]
|
||||
#![no_std]
|
||||
|
||||
//! Low-level boot of the Raspberry's processor
|
||||
|
||||
extern crate cortex_a;
|
||||
extern crate panic_abort;
|
||||
extern crate r0;
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! entry {
|
||||
($path:path) => {
|
||||
#[export_name = "main"]
|
||||
pub unsafe fn __main() -> ! {
|
||||
// type check the given path
|
||||
let f: fn() -> ! = $path;
|
||||
|
||||
f()
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/// Reset function.
|
||||
///
|
||||
/// Initializes the bss section before calling into the user's `main()`.
|
||||
unsafe fn reset() -> ! {
|
||||
extern "C" {
|
||||
// Boundaries of the .bss section, provided by the linker script
|
||||
static mut __bss_start: u64;
|
||||
static mut __bss_end: u64;
|
||||
}
|
||||
|
||||
// Zeroes the .bss section
|
||||
r0::zero_bss(&mut __bss_start, &mut __bss_end);
|
||||
|
||||
extern "Rust" {
|
||||
fn main() -> !;
|
||||
}
|
||||
|
||||
main()
|
||||
}
|
||||
|
||||
/// Prepare and execute transition from EL2 to EL1.
|
||||
#[inline]
|
||||
fn setup_and_enter_el1_from_el2() -> ! {
|
||||
use cortex_a::{asm, regs::*};
|
||||
|
||||
// Enable timer counter registers for EL1
|
||||
CNTHCTL_EL2.write(CNTHCTL_EL2::EL1PCEN::SET + CNTHCTL_EL2::EL1PCTEN::SET);
|
||||
|
||||
// No offset for reading the counters
|
||||
CNTVOFF_EL2.set(0);
|
||||
|
||||
// Set EL1 execution state to AArch64
|
||||
// TODO: Explain the SWIO bit
|
||||
HCR_EL2.write(HCR_EL2::RW::EL1IsAarch64 + HCR_EL2::SWIO::SET);
|
||||
|
||||
// Set up a simulated exception return.
|
||||
//
|
||||
// First, fake a saved program status, where all interrupts were
|
||||
// masked and SP_EL0 was used as a stack pointer.
|
||||
SPSR_EL2.write(
|
||||
SPSR_EL2::D::Masked
|
||||
+ SPSR_EL2::A::Masked
|
||||
+ SPSR_EL2::I::Masked
|
||||
+ SPSR_EL2::F::Masked
|
||||
+ SPSR_EL2::M::EL1t,
|
||||
);
|
||||
|
||||
// Second, let the link register point to reset().
|
||||
ELR_EL2.set(reset as *const () as u64);
|
||||
|
||||
// Set up SP_EL0 (stack pointer), which will be used by EL1 once
|
||||
// we "return" to it.
|
||||
SP_EL0.set(0x80_000);
|
||||
|
||||
// Use `eret` to "return" to EL1. This will result in execution of
|
||||
// `reset()` in EL1.
|
||||
asm::eret()
|
||||
}
|
||||
|
||||
/// Entrypoint of the processor.
|
||||
///
|
||||
/// Parks all cores except core0 and checks if we started in EL2. If
|
||||
/// so, proceeds with setting up EL1.
|
||||
#[link_section = ".text.boot"]
|
||||
#[no_mangle]
|
||||
pub unsafe extern "C" fn _boot_cores() -> ! {
|
||||
use cortex_a::{asm, regs::*};
|
||||
|
||||
const CORE_0: u64 = 0;
|
||||
const CORE_MASK: u64 = 0x3;
|
||||
const EL2: u32 = CurrentEL::EL::EL2.value;
|
||||
|
||||
if let CORE_0 = MPIDR_EL1.get() & CORE_MASK {
|
||||
if let EL2 = CurrentEL.get() {
|
||||
setup_and_enter_el1_from_el2()
|
||||
}
|
||||
}
|
||||
|
||||
// if not core0 or EL != 2, infinitely wait for events
|
||||
loop {
|
||||
asm::wfe();
|
||||
}
|
||||
}
|
@ -0,0 +1,40 @@
|
||||
use core::sync::atomic::{compiler_fence, Ordering};
|
||||
use cortex_a::{barrier, regs::*};
|
||||
|
||||
/// We assume that addr is cacheline aligned
|
||||
pub fn batch_modify(addr: u64) -> u32 {
|
||||
const CACHELINE_SIZE_BYTES: u64 = 64; // TODO: retrieve this from a system register
|
||||
const NUM_CACHELINES_TOUCHED: u64 = 5;
|
||||
const BYTES_PER_U64_REG: usize = 8;
|
||||
const NUM_BENCH_ITERATIONS: u64 = 100_000;
|
||||
|
||||
const NUM_BYTES_TOUCHED: u64 = CACHELINE_SIZE_BYTES * NUM_CACHELINES_TOUCHED;
|
||||
|
||||
let t1 = CNTPCT_EL0.get();
|
||||
|
||||
compiler_fence(Ordering::SeqCst);
|
||||
|
||||
let mut data_ptr: *mut u64;
|
||||
let mut temp: u64;
|
||||
for _ in 0..NUM_BENCH_ITERATIONS {
|
||||
for i in (addr..(addr + NUM_BYTES_TOUCHED)).step_by(BYTES_PER_U64_REG) {
|
||||
data_ptr = i as *mut u64;
|
||||
|
||||
unsafe {
|
||||
temp = core::ptr::read_volatile(data_ptr);
|
||||
core::ptr::write_volatile(data_ptr, temp + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Insert a barrier to ensure that the last memory operation has finished
|
||||
// before we retrieve the elapsed time with the subsequent counter read. Not
|
||||
// needed at all given the sample size, but let's be a bit pedantic here for
|
||||
// education purposes. For measuring single-instructions, this would be
|
||||
// needed.
|
||||
unsafe { barrier::dsb(barrier::SY) };
|
||||
|
||||
let t2 = CNTPCT_EL0.get();
|
||||
|
||||
((t2 - t1) * 1000 / u64::from(CNTFRQ_EL0.get())) as u32
|
||||
}
|
@ -0,0 +1,75 @@
|
||||
/*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2018 Andre Richter <andre.o.richter@gmail.com>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
use super::MMIO_BASE;
|
||||
use register::mmio::ReadWrite;
|
||||
|
||||
// Descriptions taken from
|
||||
// https://github.com/raspberrypi/documentation/files/1888662/BCM2837-ARM-Peripherals.-.Revised.-.V2-1.pdf
|
||||
register_bitfields! {
|
||||
u32,
|
||||
|
||||
/// GPIO Function Select 1
|
||||
GPFSEL1 [
|
||||
/// Pin 15
|
||||
FSEL15 OFFSET(15) NUMBITS(3) [
|
||||
Input = 0b000,
|
||||
Output = 0b001,
|
||||
RXD0 = 0b100, // UART0 - Alternate function 0
|
||||
RXD1 = 0b010 // Mini UART - Alternate function 5
|
||||
|
||||
],
|
||||
|
||||
/// Pin 14
|
||||
FSEL14 OFFSET(12) NUMBITS(3) [
|
||||
Input = 0b000,
|
||||
Output = 0b001,
|
||||
TXD0 = 0b100, // UART0 - Alternate function 0
|
||||
TXD1 = 0b010 // Mini UART - Alternate function 5
|
||||
]
|
||||
],
|
||||
|
||||
/// GPIO Pull-up/down Clock Register 0
|
||||
GPPUDCLK0 [
|
||||
/// Pin 15
|
||||
PUDCLK15 OFFSET(15) NUMBITS(1) [
|
||||
NoEffect = 0,
|
||||
AssertClock = 1
|
||||
],
|
||||
|
||||
/// Pin 14
|
||||
PUDCLK14 OFFSET(14) NUMBITS(1) [
|
||||
NoEffect = 0,
|
||||
AssertClock = 1
|
||||
]
|
||||
]
|
||||
}
|
||||
|
||||
pub const GPFSEL1: *const ReadWrite<u32, GPFSEL1::Register> =
|
||||
(MMIO_BASE + 0x0020_0004) as *const ReadWrite<u32, GPFSEL1::Register>;
|
||||
|
||||
pub const GPPUD: *const ReadWrite<u32> = (MMIO_BASE + 0x0020_0094) as *const ReadWrite<u32>;
|
||||
|
||||
pub const GPPUDCLK0: *const ReadWrite<u32, GPPUDCLK0::Register> =
|
||||
(MMIO_BASE + 0x0020_0098) as *const ReadWrite<u32, GPPUDCLK0::Register>;
|
@ -0,0 +1,111 @@
|
||||
/*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2018 Andre Richter <andre.o.richter@gmail.com>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#![no_std]
|
||||
#![no_main]
|
||||
#![feature(asm)]
|
||||
#![feature(const_fn)]
|
||||
|
||||
extern crate cortex_a;
|
||||
|
||||
#[macro_use]
|
||||
extern crate raspi3_boot;
|
||||
|
||||
#[macro_use]
|
||||
extern crate register;
|
||||
|
||||
const MMIO_BASE: u32 = 0x3F00_0000;
|
||||
|
||||
mod gpio;
|
||||
mod mbox;
|
||||
mod mmu;
|
||||
mod uart;
|
||||
mod benchmark;
|
||||
|
||||
fn do_benchmarks(uart: &uart::Uart) {
|
||||
const SIZE_2MIB: u64 = 2 * 1024 * 1024;
|
||||
|
||||
// Start of the __SECOND__ virtual 2 MiB block (counting starts at zero).
|
||||
// NON-cacheable DRAM memory.
|
||||
let non_cacheable_addr: u64 = SIZE_2MIB;
|
||||
|
||||
// Start of the __THIRD__ virtual 2 MiB block.
|
||||
// Cacheable DRAM memory
|
||||
let cacheable_addr: u64 = 2 * SIZE_2MIB;
|
||||
|
||||
uart.puts("Benchmarking non-cacheable DRAM modifications at virtual 0x");
|
||||
uart.hex(non_cacheable_addr as u32);
|
||||
uart.puts(", physical 0x");
|
||||
uart.hex(2 * SIZE_2MIB as u32);
|
||||
uart.puts(":\n");
|
||||
|
||||
let result_nc = benchmark::batch_modify(non_cacheable_addr);
|
||||
uart.dec(result_nc);
|
||||
uart.puts(" miliseconds.\n\n");
|
||||
|
||||
uart.puts("Benchmarking cacheable DRAM modifications at virtual 0x");
|
||||
uart.hex(cacheable_addr as u32);
|
||||
uart.puts(", physical 0x");
|
||||
uart.hex(2 * SIZE_2MIB as u32);
|
||||
uart.puts(":\n");
|
||||
let result_c = benchmark::batch_modify(cacheable_addr);
|
||||
uart.dec(result_c);
|
||||
uart.puts(" miliseconds.\n\n");
|
||||
|
||||
let percent_diff = (result_nc - result_c) * 100 / result_c;
|
||||
|
||||
uart.puts("With caching, the function is ");
|
||||
uart.dec(percent_diff);
|
||||
uart.puts("% faster!\n");
|
||||
}
|
||||
|
||||
entry!(kernel_entry);
|
||||
|
||||
fn kernel_entry() -> ! {
|
||||
let mut mbox = mbox::Mbox::new();
|
||||
let uart = uart::Uart::new(uart::UART_PHYS_BASE);
|
||||
|
||||
// set up serial console
|
||||
if uart.init(&mut mbox).is_err() {
|
||||
loop {
|
||||
cortex_a::asm::wfe() // If UART fails, abort early
|
||||
}
|
||||
}
|
||||
|
||||
uart.getc(); // Press a key first before being greeted
|
||||
uart.puts("Hello Rustacean!\n\n");
|
||||
|
||||
uart.puts("\nSwitching MMU on now...");
|
||||
|
||||
unsafe { mmu::init() };
|
||||
|
||||
uart.puts("MMU is live \\o/\n\n");
|
||||
|
||||
do_benchmarks(&uart);
|
||||
|
||||
// echo everything back
|
||||
loop {
|
||||
uart.send(uart.getc());
|
||||
}
|
||||
}
|
@ -0,0 +1,159 @@
|
||||
/*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2018 Andre Richter <andre.o.richter@gmail.com>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
use super::MMIO_BASE;
|
||||
use core::ops;
|
||||
use cortex_a::asm;
|
||||
use register::mmio::{ReadOnly, WriteOnly};
|
||||
|
||||
register_bitfields! {
|
||||
u32,
|
||||
|
||||
STATUS [
|
||||
FULL OFFSET(31) NUMBITS(1) [],
|
||||
EMPTY OFFSET(30) NUMBITS(1) []
|
||||
]
|
||||
}
|
||||
|
||||
const VIDEOCORE_MBOX: u32 = MMIO_BASE + 0xB880;
|
||||
|
||||
#[allow(non_snake_case)]
|
||||
#[repr(C)]
|
||||
pub struct RegisterBlock {
|
||||
READ: ReadOnly<u32>, // 0x00
|
||||
__reserved_0: [u32; 5], // 0x04
|
||||
STATUS: ReadOnly<u32, STATUS::Register>, // 0x18
|
||||
__reserved_1: u32, // 0x1C
|
||||
WRITE: WriteOnly<u32>, // 0x20
|
||||
}
|
||||
|
||||
// Custom errors
|
||||
pub enum MboxError {
|
||||
ResponseError,
|
||||
UnknownError,
|
||||
}
|
||||
pub type Result<T> = ::core::result::Result<T, MboxError>;
|
||||
|
||||
// Channels
|
||||
pub mod channel {
|
||||
pub const PROP: u32 = 8;
|
||||
}
|
||||
|
||||
// Tags
|
||||
pub mod tag {
|
||||
pub const SETCLKRATE: u32 = 0x38002;
|
||||
pub const LAST: u32 = 0;
|
||||
}
|
||||
|
||||
// Clocks
|
||||
pub mod clock {
|
||||
pub const UART: u32 = 0x0_0000_0002;
|
||||
}
|
||||
|
||||
// Responses
|
||||
mod response {
|
||||
pub const SUCCESS: u32 = 0x8000_0000;
|
||||
pub const ERROR: u32 = 0x8000_0001; // error parsing request buffer (partial response)
|
||||
}
|
||||
|
||||
pub const REQUEST: u32 = 0;
|
||||
|
||||
// Public interface to the mailbox
|
||||
#[repr(C)]
|
||||
#[repr(align(16))]
|
||||
pub struct Mbox {
|
||||
// The address for buffer needs to be 16-byte aligned so that the
|
||||
// Videcore can handle it properly.
|
||||
pub buffer: [u32; 36],
|
||||
}
|
||||
|
||||
/// Deref to RegisterBlock
|
||||
///
|
||||
/// Allows writing
|
||||
/// ```
|
||||
/// self.STATUS.read()
|
||||
/// ```
|
||||
/// instead of something along the lines of
|
||||
/// ```
|
||||
/// unsafe { (*Mbox::ptr()).STATUS.read() }
|
||||
/// ```
|
||||
impl ops::Deref for Mbox {
|
||||
type Target = RegisterBlock;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
unsafe { &*Self::ptr() }
|
||||
}
|
||||
}
|
||||
|
||||
impl Mbox {
|
||||
pub fn new() -> Mbox {
|
||||
Mbox { buffer: [0; 36] }
|
||||
}
|
||||
|
||||
/// Returns a pointer to the register block
|
||||
fn ptr() -> *const RegisterBlock {
|
||||
VIDEOCORE_MBOX as *const _
|
||||
}
|
||||
|
||||
/// Make a mailbox call. Returns Err(MboxError) on failure, Ok(()) success
|
||||
pub fn call(&self, channel: u32) -> Result<()> {
|
||||
// wait until we can write to the mailbox
|
||||
loop {
|
||||
if !self.STATUS.is_set(STATUS::FULL) {
|
||||
break;
|
||||
}
|
||||
|
||||
asm::nop();
|
||||
}
|
||||
|
||||
let buf_ptr = self.buffer.as_ptr() as u32;
|
||||
|
||||
// write the address of our message to the mailbox with channel identifier
|
||||
self.WRITE.set((buf_ptr & !0xF) | (channel & 0xF));
|
||||
|
||||
// now wait for the response
|
||||
loop {
|
||||
// is there a response?
|
||||
loop {
|
||||
if !self.STATUS.is_set(STATUS::EMPTY) {
|
||||
break;
|
||||
}
|
||||
|
||||
asm::nop();
|
||||
}
|
||||
|
||||
let resp: u32 = self.READ.get();
|
||||
|
||||
// is it a response to our message?
|
||||
if ((resp & 0xF) == channel) && ((resp & !0xF) == buf_ptr) {
|
||||
// is it a valid successful response?
|
||||
return match self.buffer[1] {
|
||||
response::SUCCESS => Ok(()),
|
||||
response::ERROR => Err(MboxError::ResponseError),
|
||||
_ => Err(MboxError::UnknownError),
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,219 @@
|
||||
/*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2018 Andre Richter <andre.o.richter@gmail.com>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
use cortex_a::{barrier, regs::*};
|
||||
|
||||
register_bitfields! {u64,
|
||||
// AArch64 Reference Manual page 2150
|
||||
STAGE1_DESCRIPTOR [
|
||||
/// Execute-never
|
||||
XN OFFSET(54) NUMBITS(1) [
|
||||
False = 0,
|
||||
True = 1
|
||||
],
|
||||
|
||||
/// Various address fields, depending on use case
|
||||
LVL2_OUTPUT_ADDR_4KiB OFFSET(21) NUMBITS(27) [], // [47:21]
|
||||
NEXT_LVL_TABLE_ADDR_4KiB OFFSET(12) NUMBITS(36) [], // [47:12]
|
||||
|
||||
/// Access flag
|
||||
AF OFFSET(10) NUMBITS(1) [
|
||||
False = 0,
|
||||
True = 1
|
||||
],
|
||||
|
||||
/// Shareability field
|
||||
SH OFFSET(8) NUMBITS(2) [
|
||||
OuterShareable = 0b10,
|
||||
InnerShareable = 0b11
|
||||
],
|
||||
|
||||
/// Access Permissions
|
||||
AP OFFSET(6) NUMBITS(2) [
|
||||
RW_EL1 = 0b00,
|
||||
RW_EL1_EL0 = 0b01,
|
||||
RO_EL1 = 0b10,
|
||||
RO_EL1_EL0 = 0b11
|
||||
],
|
||||
|
||||
/// Memory attributes index into the MAIR_EL1 register
|
||||
AttrIndx OFFSET(2) NUMBITS(3) [],
|
||||
|
||||
TYPE OFFSET(1) NUMBITS(1) [
|
||||
Block = 0,
|
||||
Table = 1
|
||||
],
|
||||
|
||||
VALID OFFSET(0) NUMBITS(1) [
|
||||
False = 0,
|
||||
True = 1
|
||||
]
|
||||
]
|
||||
}
|
||||
|
||||
trait BaseAddr {
|
||||
fn base_addr(&self) -> u64;
|
||||
}
|
||||
|
||||
impl BaseAddr for [u64; 512] {
|
||||
fn base_addr(&self) -> u64 {
|
||||
self as *const u64 as u64
|
||||
}
|
||||
}
|
||||
|
||||
const NUM_ENTRIES_4KIB: usize = 512;
|
||||
|
||||
static mut LVL2_TABLE: [u64; NUM_ENTRIES_4KIB] = [0; NUM_ENTRIES_4KIB];
|
||||
static mut SINGLE_LVL3_TABLE: [u64; NUM_ENTRIES_4KIB] = [0; NUM_ENTRIES_4KIB];
|
||||
|
||||
/// Set up identity mapped page tables for the first 1 gigabyte of address
|
||||
/// space.
|
||||
pub unsafe fn init() {
|
||||
// First, define the three memory types that we will map. Cacheable and
|
||||
// non-cacheable normal DRAM, and device.
|
||||
MAIR_EL1.write(
|
||||
// Attribute 2
|
||||
MAIR_EL1::Attr2_HIGH::Memory_OuterNonCacheable
|
||||
+ MAIR_EL1::Attr2_LOW_MEMORY::InnerNonCacheable
|
||||
|
||||
// Attribute 1
|
||||
+ MAIR_EL1::Attr1_HIGH::Memory_OuterWriteBack_NonTransient_ReadAlloc_WriteAlloc
|
||||
+ MAIR_EL1::Attr1_LOW_MEMORY::InnerWriteBack_NonTransient_ReadAlloc_WriteAlloc
|
||||
|
||||
// Attribute 0
|
||||
+ MAIR_EL1::Attr0_HIGH::Device
|
||||
+ MAIR_EL1::Attr0_LOW_DEVICE::Device_nGnRE,
|
||||
);
|
||||
|
||||
// Descriptive consts for indexing into the correct MAIR_EL1 attributes.
|
||||
mod mair {
|
||||
pub const DEVICE: u64 = 0;
|
||||
pub const NORMAL: u64 = 1;
|
||||
pub const NORMAL_NON_CACHEABLE: u64 = 2;
|
||||
}
|
||||
|
||||
// Set up the first LVL2 entry, pointing to a 4KiB table base address.
|
||||
let lvl3_base: u64 = SINGLE_LVL3_TABLE.base_addr() >> 12;
|
||||
LVL2_TABLE[0] = (STAGE1_DESCRIPTOR::VALID::True
|
||||
+ STAGE1_DESCRIPTOR::TYPE::Table
|
||||
+ STAGE1_DESCRIPTOR::NEXT_LVL_TABLE_ADDR_4KiB.val(lvl3_base))
|
||||
.value;
|
||||
|
||||
// The second 2 MiB block.
|
||||
LVL2_TABLE[1] = (STAGE1_DESCRIPTOR::VALID::True
|
||||
+ STAGE1_DESCRIPTOR::TYPE::Block
|
||||
+ STAGE1_DESCRIPTOR::AttrIndx.val(mair::NORMAL_NON_CACHEABLE)
|
||||
+ STAGE1_DESCRIPTOR::AP::RW_EL1
|
||||
+ STAGE1_DESCRIPTOR::SH::OuterShareable
|
||||
+ STAGE1_DESCRIPTOR::AF::True
|
||||
// This translation is accessed for virtual 0x200000. Point to physical
|
||||
// 0x400000, aka the third phyiscal 2 MiB DRAM block (third block == 2,
|
||||
// because we start counting at 0).
|
||||
//
|
||||
// Here, we configure it non-cacheable.
|
||||
+ STAGE1_DESCRIPTOR::LVL2_OUTPUT_ADDR_4KiB.val(2)
|
||||
+ STAGE1_DESCRIPTOR::XN::True)
|
||||
.value;
|
||||
|
||||
// Fill the rest of the LVL2 (2MiB) entries as block
|
||||
// descriptors. Differentiate between normal and device mem.
|
||||
let mmio_base: u64 = (super::MMIO_BASE >> 21).into();
|
||||
let common = STAGE1_DESCRIPTOR::VALID::True
|
||||
+ STAGE1_DESCRIPTOR::TYPE::Block
|
||||
+ STAGE1_DESCRIPTOR::AP::RW_EL1
|
||||
+ STAGE1_DESCRIPTOR::AF::True
|
||||
+ STAGE1_DESCRIPTOR::XN::True;
|
||||
|
||||
// Notice the skip(2). Start at the third 2 MiB DRAM block, which will point
|
||||
// virtual 0x400000 to physical 0x400000, configured as cacheable memory.
|
||||
for (i, entry) in LVL2_TABLE.iter_mut().enumerate().skip(2) {
|
||||
let j: u64 = i as u64;
|
||||
|
||||
let mem_attr = if j >= mmio_base {
|
||||
STAGE1_DESCRIPTOR::SH::OuterShareable + STAGE1_DESCRIPTOR::AttrIndx.val(mair::DEVICE)
|
||||
} else {
|
||||
STAGE1_DESCRIPTOR::SH::InnerShareable + STAGE1_DESCRIPTOR::AttrIndx.val(mair::NORMAL)
|
||||
};
|
||||
|
||||
*entry = (common + mem_attr + STAGE1_DESCRIPTOR::LVL2_OUTPUT_ADDR_4KiB.val(j)).value;
|
||||
}
|
||||
|
||||
// Finally, fill the single LVL3 table (4KiB granule). Differentiate between
|
||||
// code/RO and RW sections.
|
||||
//
|
||||
// Using the linker script, we ensure that the RO sections are 4KiB aligned,
|
||||
// and we export their boundaries via symbols.
|
||||
extern "C" {
|
||||
static mut __ro_start: u64;
|
||||
static mut __ro_end: u64;
|
||||
}
|
||||
|
||||
const PAGESIZE: u64 = 4096;
|
||||
let ro_start: u64 = &__ro_start as *const _ as u64 / PAGESIZE;
|
||||
let ro_end: u64 = &__ro_end as *const _ as u64 / PAGESIZE;
|
||||
let common = STAGE1_DESCRIPTOR::VALID::True
|
||||
+ STAGE1_DESCRIPTOR::TYPE::Table
|
||||
+ STAGE1_DESCRIPTOR::AttrIndx.val(mair::NORMAL)
|
||||
+ STAGE1_DESCRIPTOR::SH::InnerShareable
|
||||
+ STAGE1_DESCRIPTOR::AF::True;
|
||||
|
||||
for (i, entry) in SINGLE_LVL3_TABLE.iter_mut().enumerate() {
|
||||
let j: u64 = i as u64;
|
||||
|
||||
let mem_attr = if j < ro_start || j > ro_end {
|
||||
STAGE1_DESCRIPTOR::AP::RW_EL1 + STAGE1_DESCRIPTOR::XN::True
|
||||
} else {
|
||||
STAGE1_DESCRIPTOR::AP::RO_EL1 + STAGE1_DESCRIPTOR::XN::False
|
||||
};
|
||||
|
||||
*entry = (common + mem_attr + STAGE1_DESCRIPTOR::NEXT_LVL_TABLE_ADDR_4KiB.val(j)).value;
|
||||
}
|
||||
|
||||
// Point to the LVL2 table base address in TTBR0.
|
||||
TTBR0_EL1.set_baddr(LVL2_TABLE.base_addr());
|
||||
|
||||
// Configure various settings of stage 1 of the EL1 translation regime.
|
||||
let ips = ID_AA64MMFR0_EL1.read(ID_AA64MMFR0_EL1::PARange);
|
||||
TCR_EL1.write(
|
||||
TCR_EL1::TBI0::Ignored
|
||||
+ TCR_EL1::IPS.val(ips)
|
||||
+ TCR_EL1::TG0::KiB_4 // 4 KiB granule
|
||||
+ TCR_EL1::SH0::Inner
|
||||
+ TCR_EL1::ORGN0::WriteBack_ReadAlloc_WriteAlloc_Cacheable
|
||||
+ TCR_EL1::IRGN0::WriteBack_ReadAlloc_WriteAlloc_Cacheable
|
||||
+ TCR_EL1::EPD0::EnableTTBR0Walks
|
||||
+ TCR_EL1::T0SZ.val(34), // Start walks at level 2
|
||||
);
|
||||
|
||||
// Switch the MMU on.
|
||||
//
|
||||
// First, force all previous changes to be seen before the MMU is enabled.
|
||||
barrier::isb(barrier::SY);
|
||||
|
||||
// Enable the MMU and turn on caching
|
||||
SCTLR_EL1.modify(SCTLR_EL1::M::Enable + SCTLR_EL1::C::Cacheable);
|
||||
|
||||
// Force MMU init to complete before next instruction
|
||||
barrier::isb(barrier::SY);
|
||||
}
|
@ -0,0 +1,310 @@
|
||||
/*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2018 Andre Richter <andre.o.richter@gmail.com>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
use super::MMIO_BASE;
|
||||
use core::{
|
||||
ops,
|
||||
sync::atomic::{compiler_fence, Ordering},
|
||||
};
|
||||
use cortex_a::asm;
|
||||
use gpio;
|
||||
use mbox;
|
||||
use register::mmio::*;
|
||||
|
||||
// PL011 UART registers.
|
||||
//
|
||||
// Descriptions taken from
|
||||
// https://github.com/raspberrypi/documentation/files/1888662/BCM2837-ARM-Peripherals.-.Revised.-.V2-1.pdf
|
||||
register_bitfields! {
|
||||
u32,
|
||||
|
||||
/// Flag Register
|
||||
FR [
|
||||
/// Transmit FIFO full. The meaning of this bit depends on the
|
||||
/// state of the FEN bit in the UARTLCR_ LCRH Register. If the
|
||||
/// FIFO is disabled, this bit is set when the transmit
|
||||
/// holding register is full. If the FIFO is enabled, the TXFF
|
||||
/// bit is set when the transmit FIFO is full.
|
||||
TXFF OFFSET(5) NUMBITS(1) [],
|
||||
|
||||
/// Receive FIFO empty. The meaning of this bit depends on the
|
||||
/// state of the FEN bit in the UARTLCR_H Register. If the
|
||||
/// FIFO is disabled, this bit is set when the receive holding
|
||||
/// register is empty. If the FIFO is enabled, the RXFE bit is
|
||||
/// set when the receive FIFO is empty.
|
||||
RXFE OFFSET(4) NUMBITS(1) []
|
||||
],
|
||||
|
||||
/// Integer Baud rate divisor
|
||||
IBRD [
|
||||
/// Integer Baud rate divisor
|
||||
IBRD OFFSET(0) NUMBITS(16) []
|
||||
],
|
||||
|
||||
/// Fractional Baud rate divisor
|
||||
FBRD [
|
||||
/// Fractional Baud rate divisor
|
||||
FBRD OFFSET(0) NUMBITS(6) []
|
||||
],
|
||||
|
||||
/// Line Control register
|
||||
LCRH [
|
||||
/// Word length. These bits indicate the number of data bits
|
||||
/// transmitted or received in a frame.
|
||||
WLEN OFFSET(5) NUMBITS(2) [
|
||||
FiveBit = 0b00,
|
||||
SixBit = 0b01,
|
||||
SevenBit = 0b10,
|
||||
EightBit = 0b11
|
||||
]
|
||||
],
|
||||
|
||||
/// Control Register
|
||||
CR [
|
||||
/// Receive enable. If this bit is set to 1, the receive
|
||||
/// section of the UART is enabled. Data reception occurs for
|
||||
/// UART signals. When the UART is disabled in the middle of
|
||||
/// reception, it completes the current character before
|
||||
/// stopping.
|
||||
RXE OFFSET(9) NUMBITS(1) [
|
||||
Disabled = 0,
|
||||
Enabled = 1
|
||||
],
|
||||
|
||||
/// Transmit enable. If this bit is set to 1, the transmit
|
||||
/// section of the UART is enabled. Data transmission occurs
|
||||
/// for UART signals. When the UART is disabled in the middle
|
||||
/// of transmission, it completes the current character before
|
||||
/// stopping.
|
||||
TXE OFFSET(8) NUMBITS(1) [
|
||||
Disabled = 0,
|
||||
Enabled = 1
|
||||
],
|
||||
|
||||
/// UART enable
|
||||
UARTEN OFFSET(0) NUMBITS(1) [
|
||||
/// If the UART is disabled in the middle of transmission
|
||||
/// or reception, it completes the current character
|
||||
/// before stopping.
|
||||
Disabled = 0,
|
||||
Enabled = 1
|
||||
]
|
||||
],
|
||||
|
||||
/// Interupt Clear Register
|
||||
ICR [
|
||||
/// Meta field for all pending interrupts
|
||||
ALL OFFSET(0) NUMBITS(11) []
|
||||
]
|
||||
}
|
||||
|
||||
pub const UART_PHYS_BASE: u32 = MMIO_BASE + 0x20_1000;
|
||||
|
||||
#[allow(non_snake_case)]
|
||||
#[repr(C)]
|
||||
pub struct RegisterBlock {
|
||||
DR: ReadWrite<u32>, // 0x00
|
||||
__reserved_0: [u32; 5], // 0x04
|
||||
FR: ReadOnly<u32, FR::Register>, // 0x18
|
||||
__reserved_1: [u32; 2], // 0x1c
|
||||
IBRD: WriteOnly<u32, IBRD::Register>, // 0x24
|
||||
FBRD: WriteOnly<u32, FBRD::Register>, // 0x28
|
||||
LCRH: WriteOnly<u32, LCRH::Register>, // 0x2C
|
||||
CR: WriteOnly<u32, CR::Register>, // 0x30
|
||||
__reserved_2: [u32; 4], // 0x34
|
||||
ICR: WriteOnly<u32, ICR::Register>, // 0x44
|
||||
}
|
||||
|
||||
pub enum UartError {
|
||||
MailboxError,
|
||||
}
|
||||
pub type Result<T> = ::core::result::Result<T, UartError>;
|
||||
|
||||
pub struct Uart {
|
||||
uart_base: u32,
|
||||
}
|
||||
|
||||
impl ops::Deref for Uart {
|
||||
type Target = RegisterBlock;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
unsafe { &*self.ptr() }
|
||||
}
|
||||
}
|
||||
|
||||
impl Uart {
|
||||
pub fn new(uart_base: u32) -> Uart {
|
||||
Uart { uart_base }
|
||||
}
|
||||
|
||||
/// Returns a pointer to the register block
|
||||
fn ptr(&self) -> *const RegisterBlock {
|
||||
self.uart_base as *const _
|
||||
}
|
||||
|
||||
///Set baud rate and characteristics (115200 8N1) and map to GPIO
|
||||
pub fn init(&self, mbox: &mut mbox::Mbox) -> Result<()> {
|
||||
// turn off UART0
|
||||
self.CR.set(0);
|
||||
|
||||
// set up clock for consistent divisor values
|
||||
mbox.buffer[0] = 9 * 4;
|
||||
mbox.buffer[1] = mbox::REQUEST;
|
||||
mbox.buffer[2] = mbox::tag::SETCLKRATE;
|
||||
mbox.buffer[3] = 12;
|
||||
mbox.buffer[4] = 8;
|
||||
mbox.buffer[5] = mbox::clock::UART; // UART clock
|
||||
mbox.buffer[6] = 4_000_000; // 4Mhz
|
||||
mbox.buffer[7] = 0; // skip turbo setting
|
||||
mbox.buffer[8] = mbox::tag::LAST;
|
||||
|
||||
// Insert a compiler fence that ensures that all stores to the
|
||||
// mbox buffer are finished before the GPU is signaled (which
|
||||
// is done by a store operation as well).
|
||||
compiler_fence(Ordering::Release);
|
||||
|
||||
if mbox.call(mbox::channel::PROP).is_err() {
|
||||
return Err(UartError::MailboxError); // Abort if UART clocks couldn't be set
|
||||
};
|
||||
|
||||
// map UART0 to GPIO pins
|
||||
unsafe {
|
||||
(*gpio::GPFSEL1).modify(gpio::GPFSEL1::FSEL14::TXD0 + gpio::GPFSEL1::FSEL15::RXD0);
|
||||
|
||||
(*gpio::GPPUD).set(0); // enable pins 14 and 15
|
||||
for _ in 0..150 {
|
||||
asm::nop();
|
||||
}
|
||||
|
||||
(*gpio::GPPUDCLK0).modify(
|
||||
gpio::GPPUDCLK0::PUDCLK14::AssertClock + gpio::GPPUDCLK0::PUDCLK15::AssertClock,
|
||||
);
|
||||
for _ in 0..150 {
|
||||
asm::nop();
|
||||
}
|
||||
|
||||
(*gpio::GPPUDCLK0).set(0);
|
||||
}
|
||||
|
||||
self.ICR.write(ICR::ALL::CLEAR);
|
||||
self.IBRD.write(IBRD::IBRD.val(2)); // Results in 115200 baud
|
||||
self.FBRD.write(FBRD::FBRD.val(0xB));
|
||||
self.LCRH.write(LCRH::WLEN::EightBit); // 8N1
|
||||
self.CR
|
||||
.write(CR::UARTEN::Enabled + CR::TXE::Enabled + CR::RXE::Enabled);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Send a character
|
||||
pub fn send(&self, c: char) {
|
||||
// wait until we can send
|
||||
loop {
|
||||
if !self.FR.is_set(FR::TXFF) {
|
||||
break;
|
||||
}
|
||||
|
||||
asm::nop();
|
||||
}
|
||||
|
||||
// write the character to the buffer
|
||||
self.DR.set(c as u32);
|
||||
}
|
||||
|
||||
/// Receive a character
|
||||
pub fn getc(&self) -> char {
|
||||
// wait until something is in the buffer
|
||||
loop {
|
||||
if !self.FR.is_set(FR::RXFE) {
|
||||
break;
|
||||
}
|
||||
|
||||
asm::nop();
|
||||
}
|
||||
|
||||
// read it and return
|
||||
let mut ret = self.DR.get() as u8 as char;
|
||||
|
||||
// convert carrige return to newline
|
||||
if ret == '\r' {
|
||||
ret = '\n'
|
||||
}
|
||||
|
||||
ret
|
||||
}
|
||||
|
||||
/// Display a string
|
||||
pub fn puts(&self, string: &str) {
|
||||
for c in string.chars() {
|
||||
// convert newline to carrige return + newline
|
||||
if c == '\n' {
|
||||
self.send('\r')
|
||||
}
|
||||
|
||||
self.send(c);
|
||||
}
|
||||
}
|
||||
|
||||
/// Display a binary value in hexadecimal
|
||||
pub fn hex(&self, d: u32) {
|
||||
let mut n;
|
||||
|
||||
for i in 0..8 {
|
||||
// get highest tetrad
|
||||
n = d.wrapping_shr(28 - i * 4) & 0xF;
|
||||
|
||||
// 0-9 => '0'-'9', 10-15 => 'A'-'F'
|
||||
// Add proper offset for ASCII table
|
||||
if n > 9 {
|
||||
n += 0x37;
|
||||
} else {
|
||||
n += 0x30;
|
||||
}
|
||||
|
||||
self.send(n as u8 as char);
|
||||
}
|
||||
}
|
||||
|
||||
/// Display a binary value in decimal
|
||||
pub fn dec(&self, d: u32) {
|
||||
let mut digits: [char; 10] = ['\0'; 10];
|
||||
let mut d = d;
|
||||
let mut i: usize = 0;
|
||||
|
||||
loop {
|
||||
digits[i] = ((d % 10) + 0x30) as u8 as char;
|
||||
|
||||
i += 1;
|
||||
d /= 10;
|
||||
|
||||
if d == 0 {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for c in digits.iter().rev() {
|
||||
self.send(*c);
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue