diff --git a/JOURNAL.md b/JOURNAL.md index c72e264..377cd98 100644 --- a/JOURNAL.md +++ b/JOURNAL.md @@ -2093,3 +2093,114 @@ It continues to hang, but at least now we don't have overlap of memory regions: Let's remove the clint from the device tree for now, just to see if it is affecting. + +Disabling the clint doesn't seem to cause any effect. In fact, the kernel is +still able to get a clock as shown in the kernel messages. + +I enabled the ftrace for initcall and I can see that is getting stuck in the +unaligned check: + + Domain0 Name : root + Domain0 Boot HART : 0 + Domain0 HARTs : 0* + Domain0 Region00 : 0x0000000040001000-0x0000000040001fff M: (I,R,W) S/U: (R,W) + Domain0 Region01 : 0x0000000080040000-0x000000008005ffff M: (R,W) S/U: () + Domain0 Region02 : 0x0000000080000000-0x000000008003ffff M: (R,X) S/U: () + Domain0 Region03 : 0x0000000040800000-0x0000000040bfffff M: (I,R,W) S/U: (R,W) + Domain0 Region04 : 0x0000000000000000-0xffffffffffffffff M: () S/U: (R,W,X) + Domain0 Next Address : 0x0000000080200000 + Domain0 Next Arg1 : 0x0000000080100000 + Domain0 Next Mode : S-mode + Domain0 SysReset : yes + Domain0 SysSuspend : yes + ... + => setenv bootargs "root=/dev/ram0 loglevel=7 debug rw earlycon=sbi console=hvc0 trace_event=initcall:* trace_options=sym-addr tp_printk trace_buf_size=1M" + => setenv ramdisk_size 12614846 + => booti ${kernel_addr_r} ${ramdisk_addr_r}:${ramdisk_size} ${fdtcontroladdr} + ... + [ 0.000000] printk: legacy bootconsole [sbi0] enabled + [ 0.000000] Reserved memory: created DMA memory pool at 0x0000000060000000, size 256 MiB + [ 0.000000] OF: reserved mem: initialized node dma_pool@60000000, compatible id shared-dma-pool + [ 0.000000] OF: reserved mem: 0x0000000060000000..0x000000006fffffff (262144 KiB) map non-reusable dma_pool@60000000 + [ 0.000000] Reserved memory: created DMA memory pool at 0x0000000070000000, size 256 MiB + [ 0.000000] OF: reserved mem: initialized node dma_pool@70000000, compatible id shared-dma-pool + [ 0.000000] OF: reserved mem: 0x0000000070000000..0x000000007fffffff (262144 KiB) map non-reusable dma_pool@70000000 + [ 0.000000] OF: reserved mem: 0x0000000080000000..0x000000008003ffff (256 KiB) nomap non-reusable mmode_resv1@80000000 + [ 0.000000] OF: reserved mem: 0x0000000080040000..0x000000008005ffff (128 KiB) nomap non-reusable mmode_resv0@80040000 + [ 0.000000] cma: Reserved 16 MiB at 0x00000000af000000 on node -1 + [ 0.000000] Zone ranges: + [ 0.000000] DMA32 [mem 0x0000000080000000-0x00000000afffffff] + [ 0.000000] Normal empty + [ 0.000000] Movable zone start for each node + [ 0.000000] Early memory node ranges + [ 0.000000] node 0: [mem 0x0000000080000000-0x000000008005ffff] + [ 0.000000] node 0: [mem 0x0000000080060000-0x00000000afffffff] + [ 0.000000] Initmem setup node 0 [mem 0x0000000080000000-0x00000000afffffff] + ... + [ 2.736300] initcall_finish: func=init_ladder+0x0/0x40 ret=0 + [ 2.742640] initcall_start: func=init_menu+0x0/0x38 + [ 2.748200] initcall_finish: func=init_menu+0x0/0x38 ret=0 + [ 2.754400] initcall_start: func=rpmsg_init+0x0/0xc0 + [ 2.763400] initcall_finish: func=rpmsg_init+0x0/0xc0 ret=0 + [ 2.769840] initcall_start: func=rpmsg_chrdev_init+0x0/0xc0 + [ 2.777660] initcall_finish: func=rpmsg_chrdev_init+0x0/0xc0 ret=0 + [ 2.784540] initcall_start: func=rpmsg_ctrldev_init+0x0/0xbc + [ 2.792460] initcall_finish: func=rpmsg_ctrldev_init+0x0/0xbc ret=0 + [ 2.799420] initcall_start: func=rpmsg_ns_init+0x0/0x6c + [ 2.807460] initcall_finish: func=rpmsg_ns_init+0x0/0x6c ret=0 + [ 2.814120] initcall_start: func=kobject_uevent_init+0x0/0x30 + [ 2.821120] initcall_finish: func=kobject_uevent_init+0x0/0x30 ret=0 + [ 2.830980] initcall_level: level=arch + [ 2.835460] initcall_start: func=riscv_cpuinfo_init+0x0/0x78 + [ 2.841960] initcall_finish: func=riscv_cpuinfo_init+0x0/0x78 ret=0 + [ 2.848920] initcall_start: func=vdso_init+0x0/0x4c + [ 2.855560] initcall_finish: func=vdso_init+0x0/0x4c ret=0 + [ 2.861900] initcall_start: func=check_unaligned_access_all_cpus+0x0/0x1d4 + +This makes me wonder if what has just happened is that somehow we have +overwritten the OpenSBI trap for unaligned access and we are now executing some +garbage code in machine mode. + +Based on the OpenSBI regions, I would assume here is where the traps should be: + + Domain0 Region02 : 0x0000000080000000-0x000000008003ffff M: (R,X) S/U: () + +It should be feasible to read the memory from the host where those traps are +placed. + +Here are the FDT reserved areas that U-Boot sees before switching to the kernel: + + => fdt print /reserved-memory + reserved-memory { + #address-cells = <0x00000002>; + #size-cells = <0x00000002>; + ranges; + mmode_resv1@80000000 { + reg = <0x00000000 0x80000000 0x00000000 0x00040000>; + no-map; + }; + mmode_resv0@80040000 { + reg = <0x00000000 0x80040000 0x00000000 0x00020000>; + no-map; + }; + dma_pool@60000000 { + reg = <0x00000000 0x60000000 0x00000000 0x10000000>; + compatible = "shared-dma-pool"; + }; + dma_pool@70000000 { + reg = <0x00000000 0x70000000 0x00000000 0x10000000>; + compatible = "shared-dma-pool"; + }; + }; + +Which match with the ones the kernel parse: + + [ 0.000000] OF: reserved mem: 0x0000000080000000..0x000000008003ffff (256 KiB) nomap non-reusable mmode_resv1@80000000 + [ 0.000000] OF: reserved mem: 0x0000000080040000..0x000000008005ffff (128 KiB) nomap non-reusable mmode_resv0@80040000 + +And with the OpenSBI regions (in reverse order): + + Domain0 Region01 : 0x0000000080040000-0x000000008005ffff M: (R,W) S/U: () + Domain0 Region02 : 0x0000000080000000-0x000000008003ffff M: (R,X) S/U: () + +Let's add some instrumentation in the code that handles the traps in OpenSBI. diff --git a/opensbi-timer-debug.patch b/opensbi-timer-debug.patch index 292e46c..8e3fc97 100644 --- a/opensbi-timer-debug.patch +++ b/opensbi-timer-debug.patch @@ -33,6 +33,38 @@ index 7b618de..65e42b0 100644 } void sbi_timer_exit(struct sbi_scratch *scratch) +diff --git a/lib/sbi/sbi_trap.c b/lib/sbi/sbi_trap.c +index b4f3a17..3c05a5b 100644 +--- a/lib/sbi/sbi_trap.c ++++ b/lib/sbi/sbi_trap.c +@@ -306,15 +306,18 @@ struct sbi_trap_context *sbi_trap_handler(struct sbi_trap_context *tcntx) + + switch (mcause) { + case CAUSE_ILLEGAL_INSTRUCTION: ++ sbi_printf("OpenSBI: illegall instruction\n"); + rc = sbi_illegal_insn_handler(tcntx); + msg = "illegal instruction handler failed"; + break; + case CAUSE_MISALIGNED_LOAD: ++ sbi_printf("OpenSBI: misaligned load\n"); + sbi_pmu_ctr_incr_fw(SBI_PMU_FW_MISALIGNED_LOAD); + rc = sbi_misaligned_load_handler(tcntx); + msg = "misaligned load handler failed"; + break; + case CAUSE_MISALIGNED_STORE: ++ sbi_printf("OpenSBI: misaligned store\n"); + sbi_pmu_ctr_incr_fw(SBI_PMU_FW_MISALIGNED_STORE); + rc = sbi_misaligned_store_handler(tcntx); + msg = "misaligned store handler failed"; +@@ -344,6 +347,8 @@ struct sbi_trap_context *sbi_trap_handler(struct sbi_trap_context *tcntx) + trap_done: + if (rc) + sbi_trap_error(msg, rc, tcntx); ++ else ++ sbi_printf("OpenSBI: good trap\n"); + + if (((regs->mstatus & MSTATUS_MPP) >> MSTATUS_MPP_SHIFT) != PRV_M) + sbi_sse_process_pending_events(regs); diff --git a/lib/utils/timer/fdt_timer.c b/lib/utils/timer/fdt_timer.c index f468730..db20526 100644 --- a/lib/utils/timer/fdt_timer.c