#include #include #include #include #define assert(expression) \ do { if(!(expression)) { \ console_printf("Assertion failed: " _PDCLIB_symbol2string(expression)\ ", function ", __func__, \ ", file " __FILE__ \ ", line " _PDCLIB_symbol2string( __LINE__ ) \ "." _PDCLIB_endl ); \ while(1); \ } \ } while(0) #include "serial.h" #include "console.h" #include "atags.h" #include #include #define ROUND_UP(N, S) ((((N) + (S) - 1) / (S)) * (S)) #define ARM_SCTLR_M 0x1 /* MMU enable */ #define ARM_SCTLR_V 0x2000 /* vectors base (high vs VBAR) */ #define ARM_SCTLR_VE 0x1000000 /* interrupt vectors enable */ #define ARM_SCTLR_AFE (1UL << 29) /* access flag enable -- simplified PTEs */ #define ARM_SCR_NS 0x01 // non-secure bit // defined in linker script extern char monitor_image_start, monitor_image_data, monitor_image_end; // defined in monitor image extern char monitor_stack_base, _monitor_vectors, _secure_vectors, g_secure_physbase; void park_secondary_cores(void); void leave_secure_world(void); static inline uint8_t mycoreid(void) { uint32_t val; __asm("mrc p15, 0, %0, c0, c0, 5" : "=r" (val)); return val & 0xff; } extern void print_hex(uint32_t val); static void secure_world_init(uintptr_t ptbase, uintptr_t vbar, uintptr_t mvbar, uintptr_t mon_sp) { uint32_t reg; /* setup secure-world page tables */ /* load the same page table base into both TTBR0 and TTBR1 * TTBR0 will change in the monitor's context switching code */ assert((ptbase & 0x3fff) == 0); uintptr_t ttbr = ptbase | 0x6a; // XXX: cache pt walks, seems a good idea! __asm volatile("mcr p15, 0, %0, c2, c0, 0" :: "r" (ttbr)); __asm volatile("mcr p15, 0, %0, c2, c0, 1" :: "r" (ttbr)); /* setup TTBCR for a 1G/3G address split, and enable both TTBR0 and TTBR1 * ref: B3.5.4 Selecting between TTBR0 and TTBR1, Short-descriptor * translation table format and B4.1.153 TTBCR, Translation Table * Base Control Register, VMSA */ __asm volatile("mcr p15, 0, %0, c2, c0, 2" :: "r" (2)); /* set domain 0 to manager access (??) */ __asm volatile("mcr p15, 0, %0, c3, c0, 0" :: "r" (3)); /* ensure that CONTEXTIDR (containing the current ASID) is set to 0 */ __asm volatile("mcr p15, 0, %0, c13, c0, 1" :: "r" (0)); /* flush stuff */ __asm volatile("dsb"); __asm volatile("isb"); __asm volatile("mcr p15, 0, r0, c8, c7, 0"); // TLBIALL /* enable the MMU in the system control register * (this should be ok, since we have a 1:1 map for low RAM) */ __asm volatile("mrc p15, 0, %0, c1, c0, 0" : "=r" (reg)); reg |= ARM_SCTLR_M | ARM_SCTLR_AFE; // while we're here, ensure that there's no funny business with the VBAR reg &= ~(ARM_SCTLR_V | ARM_SCTLR_VE); console_printf("updating SCR to %lx\n", reg); __asm volatile("mcr p15, 0, %0, c1, c0, 0" : : "r" (reg)); /* setup secure VBAR and MVBAR */ __asm volatile("mcr p15, 0, %0, c12, c0, 0" :: "r" (vbar)); __asm volatile("mcr p15, 0, %0, c12, c0, 1" :: "r" (mvbar)); /* update monitor-mode's banked SP value for use in later SMCs */ /* FIXME: this causes an undefined instruction exception on qemu, which doesn't seem to enable the virtualization extension on its cortex-a15 model (as it should!) */ __asm volatile("msr sp_mon, %0" :: "r" (mon_sp)); /* flush again */ __asm volatile("isb"); } static volatile bool global_barrier; static uintptr_t g_ptbase, g_vbar, g_mvbar; static void __attribute__((noreturn)) secondary_main(uint8_t coreid) { while (!global_barrier) __asm volatile("yield"); // TODO: compute per-core monitor stack pointer secure_world_init(g_ptbase, g_vbar, g_mvbar, 0xffffffff /* TODO */); leave_secure_world(); park_secondary_cores(); /* TODO */ while (1) {} } static void map_section(armpte_short_l1 *l1pt, uintptr_t vaddr, uintptr_t paddr) { uintptr_t idx = vaddr >> ARM_L1_SECTION_BITS; l1pt[idx].raw = (armpte_short_l1) { .section = { .type = 1, .b = 1, // write-back, write-allocate .c = 0, // write-back, write-allocate .xn = 0, .domain = 0, .ap0 = 1, // access flag = 1 (already accessed) .ap1 = 0, // system .tex = 5, // 0b101: cacheable, write-back, write-allocate .ap2 = 0, .s = 1, // shareable .ng = 0, // global (ASID doesn't apply) .ns = 0, // secure-world PA, not that it makes a difference on Pi .secbase = paddr >> ARM_L1_SECTION_BITS, } }.raw; } static void map_l2_pages(armpte_short_l2 *l2pt, uintptr_t vaddr, uintptr_t paddr, size_t bytes, bool exec) { assert((bytes & 0xfff) == 0); for (uintptr_t idx = (vaddr >> 12) & 0xff; bytes > 0; idx++) { //console_printf("map PA %lx at index %lx\n", paddr, idx); l2pt[idx].raw = (armpte_short_l2) { .smallpage = { .xn = exec ? 0 : 1, .type = 1, .b = 1, // write-back, write-allocate .c = 0, // write-back, write-allocate .ap0 = 1, // access flag = 1 (already accessed) .ap1 = 0, // system .tex = 5, // 0b101: cacheable, write-back, write-allocate .ap2 = exec ? 1 : 0, .s = 1, // shareable .ng = 0, // global (ASID doesn't apply) .base = paddr >> 12 } }.raw; bytes -= 0x1000; paddr += 0x1000; } } #if 0 static uintptr_t smc(uintptr_t arg0, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3) { register uintptr_t r0 __asm("r0") = arg0; register uintptr_t r1 __asm("r1") = arg1; register uintptr_t r2 __asm("r2") = arg2; register uintptr_t r3 __asm("r3") = arg3; __asm("smc #0" : "+r" (r0), "+r" (r1), "+r" (r2), "+r" (r3) : "0" (r0), "1" (r1), "2" (r2), "3" (r3) ); return r0; } #endif void __attribute__((noreturn)) main(void) { uint8_t coreid = mycoreid(); if (coreid != 0) { secondary_main(coreid); } serial_init(); serial_putc('H'); console_puts("ello world\n"); uintptr_t reg; __asm("mrs %0, cpsr" : "=r" (reg)); console_printf("Initial CPSR: %lx\n", reg); __asm("mrc p15, 0, %0, c1, c1, 0" : "=r" (reg)); console_printf("Initial SCR: %lx\n", reg); /* dump ATAGS, and reserve some high RAM for monitor etc. */ atags_init((void *)0x100); atags_dump(); /* we need to reserve enough memory for the monitor image, plus * alignment up to a 16kB boundary, plus the L1 page table (16kB * in size), plus the L2 table (only 1kB in size, but we make it * 4kB for page-alignment of the overall allocation) */ uintptr_t monitor_physbase, ptbase; size_t monitor_image_bytes = &monitor_image_end - &monitor_image_start; size_t monitor_image_reserve = ROUND_UP(monitor_image_bytes, ARM_L1_PTABLE_BYTES); monitor_physbase = atags_reserve_physmem(monitor_image_reserve + ARM_L1_PTABLE_BYTES + KOM_PAGE_SIZE + KOM_SECURE_RESERVE); /* copy the monitor image into place */ console_printf("Copying monitor to %lx\n", monitor_physbase); memcpy((void *)monitor_physbase, &monitor_image_start, monitor_image_bytes); console_puts("Constructing page tables\n"); /* L1 page table must be 16kB-aligned */ ptbase = ROUND_UP(monitor_physbase + monitor_image_bytes, ARM_L1_PTABLE_BYTES); armpte_short_l1 *l1pt = (void *)ptbase; armpte_short_l2 *l2pt = (void *)(ptbase + ARM_L1_PTABLE_BYTES); uintptr_t secure_physbase = (uintptr_t)l2pt + KOM_PAGE_SIZE; console_printf("L1 %p L2 %p\n", l1pt, l2pt); /* direct-map first 1MB of RAM and UART registers using section mappings */ map_section(l1pt, 0, 0); map_section(l1pt, 0x3f200000, 0x3f200000); // TODO: not-cacheable!? /* direct-map phys memory in the 2-4G region for the monitor to use */ for (uintptr_t off = 0; off < KOM_DIRECTMAP_SIZE; off += ARM_L1_SECTION_SIZE) { map_section(l1pt, KOM_DIRECTMAP_VBASE + off, off); } /* install a second-level page table for the monitor image */ l1pt[KOM_MON_VBASE >> 20].raw = (armpte_short_l1){ .pagetable = { .type = 1, .pxn = 0, .ns = 0, // secure world PA, not that it matters on Pi? .ptbase = ((uintptr_t)l2pt) >> 10, } }.raw; // text and rodata size_t monitor_executable_size = &monitor_image_data - &monitor_image_start; console_printf("mapping monitor executable at %lx-%lx\n", KOM_MON_VBASE, KOM_MON_VBASE + monitor_executable_size); map_l2_pages(l2pt, KOM_MON_VBASE, monitor_physbase, monitor_executable_size, true); // data and bss console_printf("mapping monitor RW at %lx-%lx\n", KOM_MON_VBASE + monitor_executable_size, KOM_MON_VBASE + ROUND_UP(monitor_image_bytes, 0x1000)); map_l2_pages(l2pt, KOM_MON_VBASE + monitor_executable_size, monitor_physbase + monitor_executable_size, ROUND_UP(monitor_image_bytes, 0x1000) - monitor_executable_size, false); uintptr_t monitor_stack = &monitor_stack_base - &monitor_image_start + KOM_MON_VBASE; g_ptbase = ptbase; assert(&_monitor_vectors == &monitor_image_start); g_mvbar = &_monitor_vectors - &monitor_image_start + KOM_MON_VBASE; g_vbar = &_secure_vectors - &monitor_image_start + KOM_MON_VBASE; //print_hex(0x4237); //console_printf(" <-- Print_hex test\n"), secure_world_init(g_ptbase, g_vbar, g_mvbar, monitor_stack); /* init the monitor by setting up the secure physbase */ console_printf("passing secure_physbase %lx to monitor\n", secure_physbase); uintptr_t *monitor_secure_physbase = (uintptr_t *)(&g_secure_physbase - &monitor_image_start + KOM_MON_VBASE); *monitor_secure_physbase = secure_physbase; // this call will return in non-secure world (where MMUs are still off) leave_secure_world(); global_barrier = true; console_printf("entering kernel...\n"); typedef void kernel_entry(uintptr_t zero, uintptr_t boardid, void *atags); ((kernel_entry *)0x8000)(0, 0xc43, (void *)0x100); while (1) {} } void data_abort_handler(void) { uintptr_t dfar, dfsr; serial_putc('&'); console_puts("\nData abort!\n"); __asm("mrc p15, 0, %0, c5, c0, 0" : "=r" (dfsr)); __asm("mrc p15, 0, %0, c6, c0, 0" : "=r" (dfar)); console_printf ("DFAR %lx DFSR %lx\n", dfar, dfsr); while (1) {} }