Increase RAM and improve DT

This commit is contained in:
Rodrigo Arias 2024-10-10 15:34:06 +02:00
parent c4da8156dc
commit 99c8d5e3de
5 changed files with 142 additions and 96 deletions

View File

@ -5278,4 +5278,16 @@ There are some operations we need to do on the FS before running the tests:
bash-5.2# export TMPDIR=/tmp
bash-5.2# speclaunch
So, let's prepare a script that performs the mini-init.
So, let's prepare a script that runs the SPEC mini.
The first benchmark to run is `600.perlbench_s` which seems to take 5338 seconds
(1.5 h) to run. I configured the pipeline to stop as soon as we have 2 h of
silence, but after 150 minutes (2.5 h) of execution time it has not finished yet.
Not sure if something is wrong now. Maybe I can run vmstat a few times and see
the mount points to check everything is correct.
I may want to also increase the RAM available, so we can potentially run other
benchmarks too.
At some point we may want to be able to specify the bootcmd from fpgactl
directly.

View File

@ -10,6 +10,7 @@ clean:
%.pp.dts: %.dts *.h
$(CC) $(CPPFLAGS) $< -o $@
sed -i 's/@0x0*/@/' $@
%.dtb: %.pp.dts
dtc -O dtb -o $@ $^

View File

@ -7,21 +7,13 @@
compatible = "riscv,rv64i";
model = "Barcelona Supercomputing Center - Lagarto Ox (NixOS)";
aliases {
serial0 = &uart_console; // ttyS0
// serial1 = &uart_testing; // ttyS1
};
// chosen {
// bootargs = "earlyprintk ignore_loglevel earlycon=sbi console=hvc0 root=/dev/pmem0p1 ro init=/bin/bash";
// };
cpus {
#address-cells = <1>;
#size-cells = <0>;
timebase-frequency = <RTC_CLOCK_FREQUENCY>;
timebase-frequency = <RTC_FREQ>;
CPU0: cpu@0 {
clock-frequency = <CPU_CLOCK_FREQUENCY>;
clock-frequency = <CPU_FREQ>;
device_type = "cpu";
reg = <0>;
status = "okay";
@ -30,13 +22,6 @@
mmu-type = "riscv,sv39";
tlb-split;
// OpenPiton+Ariane Platform
// L1I Size / Assoc: 16 kB / 4
// L1D Size / Assoc: 32 kB / 4
// L15 Size / Assoc: 128 kB / 8
// L2 Size / Assoc: 256 kB / 4
// L15/L1D Cacheline size 64
i-cache-block-size = <64>; // Guess
i-cache-sets = <4>;
i-cache-size = <16384>;
@ -49,7 +34,6 @@
d-tlb-sets = <1>; // Guess
d-tlb-size = <32>; // Guess
phandle = <0x00000004>;
/* Hart-Level Interrupt Controller: Every interrupt is
* ultimately routed through a hart's HLIC before it
* interrupts that hart. */
@ -57,7 +41,6 @@
#interrupt-cells = <1>;
interrupt-controller; /* Receives interrupts */
compatible = "riscv,cpu-intc";
phandle = <0x5>;
};
};
cpu-map {
@ -68,109 +51,111 @@
};
};
};
/* Memory layout:
*
* [0x0_6000_0000, 0x0_7000_0000) -> DMA pool (256 MiB)
* [0x0_7000_0000, 0x0_8000_0000) -> DMA pool (256 MiB)
* [0x0_8000_0000, 0x0_b000_0000) -> RAM memory (768 MiB)
* [0x0_b000_0000, 0x0_c000_0000) -> Broken? (256 MiB)
* [0x0_c000_0000, 0x1_0000_0000) -> Empty (1024 MiB)
* [0x1_0000_0000, 0x1_c000_0000) -> PMEM (3072 MiB)
* [0x1_c000_0000, 0x2_8000_0000) -> Empty (3072 MiB)
*/
memory@80000000 {
memory@MEM_ADDR {
device_type = "memory";
reg = <0x0 0x80000000 0x0 0x30000000>;
reg = /bits/ 64 <MEM_ADDR MEM_SIZE>;
};
reserved-memory {
#address-cells = <2>; /* Starting address and size */
#size-cells = <2>; /* 64 bits memory addresses */
ranges;
eth_pool: dma_pool@60000000 {
reg = <0x0 0x60000000 0x0 0x10000000>;
eth_pool: dma_pool@ETHPOOL_ADDR {
reg = /bits/ 64 <ETHPOOL_ADDR ETHPOOL_SIZE>;
compatible = "shared-dma-pool";
};
onic_pool: dma_pool@70000000 {
reg = <0x0 0x70000000 0x0 0x10000000>;
onic_pool: dma_pool@ONICPOOL_ADDR {
reg = /bits/ 64 <ONICPOOL_ADDR ONICPOOL_SIZE>;
compatible = "shared-dma-pool";
};
};
dma_clk: dma_clk {
compatible = "fixed-clock";
#clock-cells = <0x00000000>;
clock-frequency = <0x09502f90>;
phandle = <0x00000002>;
};
pmem@100000000 {
pmem@PMEM_ADDR {
/* volatile; This property indicates that this region is
* actually backed by non-persistent memory. This lets the OS
* know that it may skip the cache flushes required to ensure
* data is made persistent after a write. */
volatile;
compatible = "pmem-region";
reg = <0x1 0x00000000 0x0 0xc0000000>;
reg = /bits/ 64 <PMEM_ADDR PMEM_SIZE>;
};
soc {
#address-cells = <0x00000002>;
#size-cells = <0x00000002>;
#address-cells = <2>;
#size-cells = <2>;
compatible = "BSC,Lagarto-ox-soc", "simple-bus";
ranges;
/* For bitstream e97dd7b2-397f-11ef-abe0-bbd201a5a630 with two
* consoles */
#ifdef ENABLE_UART0
/* The serial for the kernel console */
uart_console: serial@UART0_ADDR_HEX {
uart_console: serial@UART0_ADDR {
compatible = "ns16550";
reg = <0x0 UART0_ADDR 0x0 0x1000>;
reg = /bits/ 64 <UART0_ADDR UART0_SIZE>;
reg-shift = <2>;
/* No interrupts for this UART, use console=hvc0 */
/* This clock is the SERIAL_CLK */
clock-frequency = <CPU_CLOCK_FREQUENCY>;
current-speed = <UART_SPEED>;
clock-frequency = <CPU_FREQ>;
current-speed = <UART0_SPEED>;
status = "okay";
};
#endif /* ENABLE_UART0 */
#ifdef ENABLE_UART1
/* The serial for interrupt tests */
uart_testing: serial@40003000 {
uart_testing: serial@UART1_ADDR {
compatible = "ns16550";
reg = <0x0 0x40003000 0x0 0x1000>;
reg = /bits/ 64 <UART1_ADDR UART1_SIZE>;
reg-shift = <2>;
/* Output interrupt 1 (the first one) */
interrupts = <1>;
interrupt-parent = <&PLIC>;
clock-frequency = <CPU_CLOCK_FREQUENCY>;
current-speed = <UART_SPEED>;
clock-frequency = <CPU_FREQ>;
current-speed = <UART1_SPEED>;
status = "okay";
};
#endif /* ENABLE_UART1 */
#ifdef ENABLE_ETHERNET
ethernet0 {
xlnx,rxmem = <0x000005f2>;
carv,mtu = <0x000005dc>;
carv,no-mac;
device_type = "network";
// 02:$node:00:01:00:$fpga -> 02:05:00:01:00:02
// 10.5.1.$N/16 -> 10.5.1.184/16
// N = 150 + ($node - 1) * 8 + $fpga
local-mac-address = [00 00 00 00 00 00];
axistream-connected = <&axi_dma>;
compatible = "xlnx,xxv-ethernet-1.0-carv";
memory-region = <&eth_pool>;
xlnx,rxmem = <1522>;
carv,mtu = <1500>;
carv,no-mac;
device_type = "network";
// 02:$node:00:01:00:$fpga -> 02:05:00:01:00:02
// 10.5.1.$N/16 -> 10.5.1.184/16
// N = 150 + ($node - 1) * 8 + $fpga
local-mac-address = [00 00 00 00 00 00];
axistream-connected = <&axi_dma>;
compatible = "xlnx,xxv-ethernet-1.0-carv";
memory-region = <&eth_pool>;
};
#endif /* ENABLE_ETHERNET */
axi_dma: dma@40400000 {
#ifdef ENABLE_AXIDMA
dma_clk: dma_clk {
compatible = "fixed-clock";
#clock-cells = <0x0>;
clock-frequency = <AXIDMA_FREQ>;
};
axi_dma: dma@AXIDMA_ADDR {
reg = /bits/ 64 <AXIDMA_ADDR AXIDMA_SIZE>;
reg-shift = <2>;
#address-cells = <2>;
#size-cells = <2>;
xlnx,include-dre;
#dma-cells = <0x00000001>;
#dma-cells = <0x1>;
compatible = "xlnx,axi-dma-1.00.a";
clock-names = "s_axi_lite_aclk", "m_axi_mm2s_aclk", "m_axi_s2mm_aclk", "m_axi_sg_aclk";
clock-names = "s_axi_lite_aclk", "m_axi_mm2s_aclk",
"m_axi_s2mm_aclk", "m_axi_sg_aclk";
clocks = <&dma_clk>, <&dma_clk>, <&dma_clk>, <&dma_clk>;
reg = <0x00000000 0x40400000 0x00000000 0x00400000>;
interrupt-names = "mm2s_introut", "s2mm_introut";
interrupt-parent = <&PLIC>;
interrupts = <2 3>;
xlnx,addrwidth = <0x28>;
xlnx,include-sg;
xlnx,sg-length-width = <0x17>;
dma-channel@40400000 {
dma-channel@AXIDMA_CH0 {
reg = /bits/ 64 <AXIDMA_CH0 0x30>;
compatible = "xlnx,axi-dma-mm2s-channel";
dma-channels = <0>;
interrupts = <2>;
@ -178,7 +163,8 @@
xlnx,device-id = <0x0>;
xlnx,include-dre;
};
dma-channel@40400030 {
dma-channel@AXIDMA_CH1 {
reg = /bits/ 64 <AXIDMA_CH1 0x30>;
compatible = "xlnx,axi-dma-s2mm-channel";
dma-channels = <1>;
interrupts = <3>;
@ -187,10 +173,13 @@
xlnx,include-dre;
};
};
#endif /* ENABLE_AXIDMA */
#ifdef ENABLE_PLIC
/* Platform-Level Interrupt Controller: Delivers interrupts to
* HARTs. */
PLIC: plic@40800000 {
PLIC: plic@PLIC_ADDR {
reg = /bits/ 64 <PLIC_ADDR PLIC_SIZE>;
compatible = "riscv,plic0";
interrupt-controller; /* Receives interrupts */
#address-cells = <0>;
@ -201,11 +190,12 @@
* - context 1: supervisor mode external interrupt (9)
*/
interrupts-extended = <&HLIC0 11>, <&HLIC0 9>;
reg = < 0x0 0x40800000 0x0 0x00400000>;
riscv,ndev = <4>;
riscv,ndev = <PLIC_NDEV>;
//riscv,max-priority = <0x7>;
phandle = <0x3>;
};
#endif /* ENABLE_PLIC */
#ifdef ENABLE_CLINT
/* Core Local Interruptor: It directly connects to the timer and
* inter-processor interrupt lines of various HARTs (or CPUs) so
* RISC-V per-HART (or per-CPU) local interrupt controller is
@ -216,32 +206,33 @@
* Documentation/devicetree/bindings/riscv/cpus.yaml
*/
clint: clint@40100000 {
reg = <0x0 0x40100000 0x0 0x00010000>;
reg = /bits/ 64 <CLINT_ADDR CLINT_SIZE>;
reg-names = "control";
interrupts-extended = <&HLIC0 3>, <&HLIC0 7>;
compatible = "riscv,clint0";
};
#endif /* ENABLE_CLINT */
#if 0
/* There is another auxiliar clint (timer) at 40010000 for
* tests, but we don't tell the kernel so we can use it for
* testing interrupts manually. */
#if 0
aux_timer: clint@40010000 {
reg = <0x0 0x40010000 0x0 0x00010000>;
reg = /bits/ 64 <0x0 0x40010000 0x0 0x00010000>;
reg-names = "control";
interrupts = <4>; /* PLIC input source 4 */
interrupt-parent = <&PLIC>;
compatible = "riscv,clint0";
};
#endif
#endif
#ifdef ENABLE_SPI
#ifdef ENABLE_SPI
uart16750: serial@40005000 {
compatible = "ns16750";
reg = <0x00000000 0x40005000 0x00000000 0x00001000>;
interrupt-parent = <&PLIC>;
interrupts = <5>;
clock-frequency = <CPU_CLOCK_FREQUENCY>;
clock-frequency = <CPU_FREQ>;
current-speed = <0x0001c200>;
status = "okay";
};
@ -258,6 +249,7 @@
loopback-mode = <1>;
status = "okay";
};
#endif
#endif /* ENABLE_SPI */
};
};

View File

@ -1,21 +1,62 @@
/* CPU is at 50 MHz */
#define CPU_CLOCK_FREQUENCY 50000000
/* The RTC timer is clocked at the CPU frequency / 1525, so
* around 32786.88 Hz */
//#define RTC_CLOCK_FREQUENCY 32786
#define CPU_FREQ 50000000 /* 50 MHz */
/* FIXME: The real RTC frequency is around half that, as the divider was wrongly
* configured. So for now lets use the real frequency:
* 50e6 / (1525*2) = 16393.44262295082 -> 16393 Hz */
#define RTC_CLOCK_FREQUENCY 16393
#define RTC_FREQ 16393
#define UART_SPEED 115200
#define UART0_ADDR 0x40001000
#define UART0_ADDR_HEX 40001000
/* Memory layout:
*
* [0x0_4000_0000, 0x0_6000_0000) -> IO (512 MiB)
* [0x0_6000_0000, 0x0_7000_0000) -> DMA pool (256 MiB)
* [0x0_7000_0000, 0x0_8000_0000) -> DMA pool (256 MiB)
* [0x0_8000_0000, 0x1_c000_0000) -> RAM memory (5 GiB)
* [0x1_c000_0000, 0x2_8000_0000) -> PMEM (3 GiB)
*/
#define PLIC_ADDR 0x40800000
#define UART_ADDR 0x40001000
#define UART0_SPEED 115200
#define UART0_ADDR 0x40001000
#define UART0_SIZE 0x00001000
#define UART1_SPEED UART0_SPEED
#define UART1_ADDR 0x40003000
#define UART1_SIZE 0x00001000
#define AUXTIMER_ADDR 0x40010000
#define AUXTIMER_SIZE 0x00010000
#define CLINT_ADDR 0x40100000
#define CLINT_SIZE 0x00010000
#define AXIDMA_ADDR 0x40400000
#define AXIDMA_SIZE 0x00400000
#define AXIDMA_CH0 0x40400000
#define AXIDMA_CH1 0x40400030
#define AXIDMA_FREQ 156250000
#define PLIC_ADDR 0x40800000
#define PLIC_SIZE 0x00400000
#define PLIC_NDEV 4
#define ETHPOOL_ADDR 0x60000000
#define ETHPOOL_SIZE 0x10000000
#define ONICPOOL_ADDR 0x70000000
#define ONICPOOL_SIZE 0x10000000
/* Notice addresses > 32 bits from here */
#define MEM_ADDR 0x080000000
#define MEM_SIZE 0x140000000
#define PMEM_ADDR 0x1c0000000
#define PMEM_SIZE 0x0c0000000
/* Toggles */
#define ENABLE_UART0
#define ENABLE_UART1
#define ENABLE_ETHERNET
#define ENABLE_AXIDMA
#define ENABLE_PLIC
#define ENABLE_CLINT
//#define ENABLE_SPI

View File

@ -43,7 +43,7 @@ function setup_meep()
export FPGACTL_BOOTLOADER_ADDR=$((0x80000000+$delta_addr))
export FPGACTL_KERNEL_ADDR=$((0x84000000+$delta_addr))
export FPGACTL_INITRD_ADDR=$((0x8c300000+$delta_addr))
export FPGACTL_ROOTFS_ADDR=$((0x100000000+$delta_addr))
export FPGACTL_ROOTFS_ADDR=$((0x1c0000000+$delta_addr))
export FPGACTL_BOOTROM_ADDR=$((0x00000100))
}