diff --git a/CHANGELOG.md b/CHANGELOG.md index b3b6c63..b17a773 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Add nOS-V API subsystem events for `nosv_create()` and `nosv_destroy()`. - Add TAMPI model with `T` code. - Add subsytem events and cfgs for TAMPI model. +- Add MPI model with `M` code. +- Add interface events and cfgs for MPI model. ## [1.2.2] - 2022-07-26 diff --git a/cfg/cpu/mpi/function.cfg b/cfg/cpu/mpi/function.cfg new file mode 100644 index 0000000..a25b0ee --- /dev/null +++ b/cfg/cpu/mpi/function.cfg @@ -0,0 +1,42 @@ +#ParaverCFG +ConfigFile.Version: 3.4 +ConfigFile.NumWindows: 1 + + +################################################################################ +< NEW DISPLAYING WINDOW CPU: MPI function of the RUNNING thread > +################################################################################ +window_name CPU: MPI function of the RUNNING thread +window_type single +window_id 1 +window_position_x 0 +window_position_y 0 +window_width 600 +window_height 150 +window_comm_lines_enabled true +window_flags_enabled false +window_noncolor_mode true +window_logical_filtered true +window_physical_filtered false +window_comm_fromto true +window_comm_tagsize true +window_comm_typeval true +window_units Microseconds +window_maximum_y 1000.0 +window_minimum_y 1.0 +window_compute_y_max true +window_level thread +window_scale_relative 1.000000000000 +window_end_time_relative 1.000000000000 +window_object appl { 1, { All } } +window_begin_time_relative 0.000000000000 +window_open true +window_drawmode draw_randnotzero +window_drawmode_rows draw_randnotzero +window_pixel_size 1 +window_labels_to_draw 1 +window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } } +window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } } +window_filter_module evt_type 1 25 +window_filter_module evt_type_label 1 "CPU: MPI function of the RUNNING thread" + diff --git a/cfg/thread/mpi/function.cfg b/cfg/thread/mpi/function.cfg new file mode 100644 index 0000000..3f87ff6 --- /dev/null +++ b/cfg/thread/mpi/function.cfg @@ -0,0 +1,42 @@ +#ParaverCFG +ConfigFile.Version: 3.4 +ConfigFile.NumWindows: 1 + + +################################################################################ +< NEW DISPLAYING WINDOW Thread: MPI function of the RUNNING thread > +################################################################################ +window_name Thread: MPI function of the RUNNING thread +window_type single +window_id 1 +window_position_x 0 +window_position_y 0 +window_width 600 +window_height 150 +window_comm_lines_enabled true +window_flags_enabled false +window_noncolor_mode true +window_logical_filtered true +window_physical_filtered false +window_comm_fromto true +window_comm_tagsize true +window_comm_typeval true +window_units Microseconds +window_maximum_y 1000.0 +window_minimum_y 1.0 +window_compute_y_max true +window_level thread +window_scale_relative 1.000000000000 +window_end_time_relative 1.000000000000 +window_object appl { 1, { All } } +window_begin_time_relative 0.000000000000 +window_open true +window_drawmode draw_randnotzero +window_drawmode_rows draw_randnotzero +window_pixel_size 1 +window_labels_to_draw 1 +window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } } +window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } } +window_filter_module evt_type 1 25 +window_filter_module evt_type_label 1 "Thread: MPI function of the RUNNING thread" + diff --git a/doc/user/emulation/events.md b/doc/user/emulation/events.md index 1f7e955..27c8cc8 100644 --- a/doc/user/emulation/events.md +++ b/doc/user/emulation/events.md @@ -226,3 +226,110 @@ TTC Ends creating a ticket linked to a set of requests and a task TTw Begins to wait a ticket completion TTW Ends waiting a ticket completion ``` + +-------------------- MPI (model=M) ------------------------- + +MUi Enters MPI_Init +MUI Exits MPI_Init +MUt Enters MPI_Init_thread +MUT Exits MPI_Init_thread +MUf Enters MPI_Finalize +MUF Exits MPI_Finalize + +MW[ Enters MPI_Wait +MW] Exits MPI_Wait +MWa Enters MPI_Waitall +MWA Exits MPI_Waitall +MWy Enters MPI_Waitany +MWY Exits MPI_Waitany +MWs Enters MPI_Waitsome +MWS Exits MPI_Waitsome + +MT[ Enters MPI_Test +MT] Exits MPI_Test +MTa Enters MPI_Testall +MTA Exits MPI_Testall +MTy Enters MPI_Testany +MTY Exits MPI_Testany +MTs Enters MPI_Testsome +MTS Exits MPI_Testsome + +MS[ Enters MPI_Send +MS] Exits MPI_Send +MSb Enters MPI_Bsend +MSB Exits MPI_Bsend +MSr Enters MPI_Rsend +MSR Exits MPI_Rsend +MSs Enters MPI_Ssend +MSS Exits MPI_Ssend +MR[ Enters MPI_Recv +MR] Exits MPI_Recv +MRs Enters MPI_Sendrecv +MRS Exits MPI_Sendrecv +MRo Enters MPI_Sendrecv_replace +MRO Exits MPI_Sendrecv_replace + +MAg Enters MPI_Allgather +MAG Exits MPI_Allgather +MAr Enters MPI_Allreduce +MAR Exits MPI_Allreduce +MAa Enters MPI_Alltoall +MAA Exits MPI_Alltoall +MCb Enters MPI_Barrier +MCB Exits MPI_Barrier +MCe Enters MPI_Exscan +MCE Exits MPI_Exscan +MCs Enters MPI_Scan +MCS Exits MPI_Scan +MDb Enters MPI_Bcast +MDB Exits MPI_Bcast +MDg Enters MPI_Gather +MDG Exits MPI_Gather +MDs Enters MPI_Scatter +MDS Exits MPI_Scatter +ME[ Enters MPI_Reduce +ME] Exits MPI_Reduce +MEs Enters MPI_Reduce_scatter +MES Exits MPI_Reduce_scatter +MEb Enters MPI_Reduce_scatter_block +MEB Exits MPI_Reduce_scatter_block + +Ms[ Enters MPI_Isend +Ms] Exits MPI_Isend +Msb Enters MPI_Ibsend +MsB Exits MPI_Ibsend +Msr Enters MPI_Irsend +MsR Exits MPI_Irsend +Mss Enters MPI_Issend +MsS Exits MPI_Issend +Mr[ Enters MPI_Irecv +Mr] Exits MPI_Irecv +Mrs Enters MPI_Isendrecv +MrS Exits MPI_Isendrecv +Mro Enters MPI_Isendrecv_replace +MrO Exits MPI_Isendrecv_replace + +Mag Enters MPI_Iallgather +MaG Exits MPI_Iallgather +Mar Enters MPI_Iallreduce +MaR Exits MPI_Iallreduce +Maa Enters MPI_Ialltoall +MaA Exits MPI_Ialltoall +Mcb Enters MPI_Ibarrier +McB Exits MPI_Ibarrier +Mce Enters MPI_Iexscan +McE Exits MPI_Iexscan +Mcs Enters MPI_Iscan +McS Exits MPI_Iscan +Mdb Enters MPI_Ibcast +MdB Exits MPI_Ibcast +Mdg Enters MPI_Igather +MdG Exits MPI_Igather +Mds Enters MPI_Iscatter +MdS Exits MPI_Iscatter +Me[ Enters MPI_Ireduce +Me] Exits MPI_Ireduce +Mes Enters MPI_Ireduce_scatter +MeS Exits MPI_Ireduce_scatter +Meb Enters MPI_Ireduce_scatter_block +MeB Exits MPI_Ireduce_scatter_block diff --git a/doc/user/emulation/fig/mpi-function.png b/doc/user/emulation/fig/mpi-function.png new file mode 100644 index 0000000..00da081 Binary files /dev/null and b/doc/user/emulation/fig/mpi-function.png differ diff --git a/doc/user/emulation/mpi.md b/doc/user/emulation/mpi.md new file mode 100644 index 0000000..86d3723 --- /dev/null +++ b/doc/user/emulation/mpi.md @@ -0,0 +1,242 @@ +# MPI model + +The [Message Passing Interface (MPI)][mpi] is a standard library interface +specification for message-passing communication libraries targeting parallel +computing architectures. The interface defines functions for point-to-point +communication primitives, collectives, remote memory access (RMA), I/O and +process management. + +The [Sonar][sonar] library instruments the most essential MPI functions that any +user application or any external library may execute. Sonar tracks the calls to +these MPI functions made at each point. Both users and developers can use this +information to analyze the time spent inside MPI functions. The next section +explains a view that is provided to achieve this goal. + +The Sonar library is compatible with the MPI standards 3.0, 3.1 and 4.0. See the +[MPI documentation][mpi docs] for more information about the MPI standards and +their functions. + +[mpi]: https://www.mpi-forum.org +[mpi docs]: https://www.mpi-forum.org/docs +[sonar]: https://pm.bsc.es/gitlab/ovni/sonar +[sonar docs]: https://pm.bsc.es/gitlab/ovni/sonar/-/blob/main/README.md + +Sonar requires an installation of the ovni library and an MPI library. Use the +option `--with-ovni=prefix` when building Sonar to specify the ovni prefix. The +building procedure will compile and install the `libsonar-mpi.so`. See the +[Sonar documentation][sonar docs] for more details about the building steps. + +An application can instrument the MPI function calls by linking with the Sonar +library `libsonar-mpi.so`. At run-time, the Sonar library does not enable the +instrumentation by default. Sonar instruments the MPI functions when the +environment variable `SONAR_MPI_INSTRUMENT` is defined to `ovni`. Its default +value is `none`. + +As an example, a user can generate a trace with MPI function events of an MPI +program `app.c` in this way: + +``` +$ mpicc -c app.c -o app.o +$ mpicc app.o -o app -L${SONAR_PREFIX}/lib -lsonar-mpi +$ export SONAR_MPI_INSTRUMENT=ovni +$ mpirun -n 2 ./app +``` + +This will generate an ovni trace in the `ovni` directory, which can be emulated +using the `ovniemu` tool. + +!!! Note + + Notice that the order of libraries at the linking stage is important. The + Sonar library should always have precedence on the MPI library. That's the + usual behavior when using `mpicc` tools. The `mpicc` tool should link the + application with the MPI libraries as the last libraries in the list of + application's dependencies. If this order is not respected, the Sonar + library would not be able to intercept the MPI function calls and instrument + them. + +!!! Note + + Notice the Task-Aware MPI (TAMPI), as well as other external libraries, + intercepts the MPI functions and may call MPI functions instead. Thus, the + order in which such libraries and Sonar are linked to the application will + also alter the resulting ovni trace. Give precedence to the Sonar library to + instrument the MPI function calls made by the application. You can achieve + by linking your application with the linking options `-lsonar-mpi -ltampi`. + Otherwise, give precendence to the TAMPI library to track the real MPI + functions that are being executed (i.e., the ones that the MPI library + actually runs). In this case, use the linking options `-ltampi -lsonar-mpi`. + +## Function view + +The function view attempts to provide a general overview of which are the MPI +functions being executed at any point in time. The function view shows the MPI +functions called by each thread (and for each CPU, the MPI functions executed +by the running thread in that CPU). + +The function states shown in this view are listed below. Each function state +(in bold) includes a list of all the MPI functions that are instrumented as +that particular state. Notice that only the most important functions are +instrumented. Also, notice that not all functions have their own state. For +instance, the large count MPI (with `_c` suffix) introduced in MPI 4.0, the +extended functions (with `v` or `w` suffix), and Fortran functions (with lower +case name and `_` suffix) are instrumented as their simple C function without +suffix. + +- *Setup functions*: The running thread is executing MPI setup functions to + initialize and finalize the MPI environment. The following function states + are shown: + + - **MPI_Init**: `MPI_Init`, `mpi_init_` + + - **MPI_Init_thread**: `MPI_Init_thread`, `mpi_init_thread_` + + - **MPI_Finalize**: `MPI_Finalize`, `mpi_finalize_` + +- *Request functions*: The running thread is executing MPI functions that wait + or test MPI requests after being generated by non-blocking MPI operations. The + following functions are instrumented: + + - **MPI_Wait**: `MPI_Wait`, `mpi_wait_` + + - **MPI_Waitall**: `MPI_Waitall`, `mpi_waitall_` + + - **MPI_Waitany**: `MPI_Waitany`, `mpi_waitany_` + + - **MPI_Waitsome**: `MPI_Waitsome`, `mpi_waitsome_` + + - **MPI_Test**: `MPI_Test`, `mpi_test_` + + - **MPI_Testall**: `MPI_Testall`, `mpi_testall_` + + - **MPI_Testany**: `MPI_Testany`, `mpi_testany_` + + - **MPI_Testsome**: `MPI_Testsome`, `mpi_testsome_` + +- *Point-to-point functions*: The running thread is communicating through MPI + by executing point-to-point primitives. The instrumented functions are: + + - **MPI_Recv**: `MPI_Recv`, `MPI_Recv_c`, `mpi_recv_` + + - **MPI_Send**: `MPI_Send`, `MPI_Send_c`, `mpi_send_` + + - **MPI_Bsend**: `MPI_Bsend`, `MPI_Bsend_c`, `mpi_bsend_` + + - **MPI_Rsend**: `MPI_Rsend`, `MPI_Rsend_c`, `mpi_rsend_` + + - **MPI_Ssend**: `MPI_Ssend`, `MPI_Ssend_c`, `mpi_ssend_` + + - **MPI_Sendrecv**: `MPI_Sendrecv`, `MPI_Sendrecv_c`, `mpi_sendrecv_` + + - **MPI_Sendrecv_replace**: `MPI_Sendrecv_replace`, `MPI_Sendrecv_replace_c`, + `mpi_sendrecv_replace_` + + - **MPI_Irecv**: `MPI_Irecv`, `MPI_Irecv_c`, `mpi_irecv_` + + - **MPI_Isend**: `MPI_Isend`, `MPI_Isend_c`, `mpi_isend_` + + - **MPI_Ibsend**: `MPI_Ibsend`, `MPI_Ibsend_c`, `mpi_ibsend_` + + - **MPI_Irsend**: `MPI_Irsend`, `MPI_Irsend_c`, `mpi_irsend_` + + - **MPI_Issend**: `MPI_Issend`, `MPI_Issend_c`, `mpi_issend_` + + - **MPI_Isendrecv**: `MPI_Isendrecv`, `MPI_Isendrecv_c`, `mpi_isendrecv_` + + - **MPI_Isendrecv_replace**: `MPI_Isendrecv_replace`, + `MPI_Isendrecv_replace_c`, `mpi_isendrecv_replace_` + +- *Collective functions*: The running thread is communicating through MPI by + executing collective functions. The instrumented functions are: + + - **MPI_Gather**: `MPI_Gather`, `MPI_Gatherv`, `MPI_Gather_c`, + `MPI_Gatherv_c`, `mpi_gather_`, `mpi_gatherv_` + + - **MPI_Allgather**: `MPI_Allgather`, `MPI_Allgatherv`, `MPI_Allgather_c`, + `MPI_Allgatherv_c`, `mpi_allgather_`, `mpi_allgatherv_` + + - **MPI_Scatter**: `MPI_Scatter`, `MPI_Scatterv`, `MPI_Scatter_c`, + `MPI_Scatterv_c`, `mpi_scatter_`, `mpi_scatterv_` + + - **MPI_Reduce**: `MPI_Reduce`, `MPI_Reduce_c`, `mpi_reduce_` + + - **MPI_Reduce_scatter**: `MPI_Reduce_scatter`, `MPI_Reduce_scatter_c`, + `mpi_reduce_scatter_` + + - **MPI_Reduce_scatter_block**: `MPI_Reduce_scatter_block`, + `MPI_Reduce_scatter_block_c`, `mpi_reduce_scatter_block_` + + - **MPI_Allreduce**: `MPI_Allreduce`, `MPI_Allreduce_c`, `mpi_allreduce_` + + - **MPI_Barrier**: `MPI_Barrier`, `MPI_Barrier_c`, `mpi_barrier_` + + - **MPI_Bcast**: `MPI_Bcast`, `MPI_Bcast_c`, `mpi_bcast` + + - **MPI_Alltoall**: `MPI_Alltoall`, `MPI_Alltoallv`, `MPI_Alltoallw`, + `MPI_Alltoall_c`, `MPI_Alltoallv_c`, `MPI_Alltoallw_c`, `mpi_alltoall_`, + `mpi_alltoallv_`, `mpi_alltoallw_` + + - **MPI_Scan**: `MPI_Scan`, `MPI_Scan_c`, `mpi_scan_` + + - **MPI_Exscan**: `MPI_Exscan`, `MPI_Exscan_c`, `mpi_exscan_` + + - **MPI_Igather**: `MPI_Igather`, `MPI_Igatherv`, `MPI_Igather_c`, + `MPI_Igatherv_c`, `mpi_igather_`, `mpi_igatherv_` + + - **MPI_Iallgather**: `MPI_Iallgather`, `MPI_Iallgatherv`, + `MPI_Iallgather_c`, `MPI_Iallgatherv_c`, `mpi_iallgather_`, + `mpi_iallgatherv_` + + - **MPI_Iscatter**: `MPI_Iscatter`, `MPI_Iscatterv`, `MPI_Iscatter_c`, + `MPI_Iscatterv_c`, `mpi_iscatter_`, `mpi_iscatterv_` + + - **MPI_Ireduce**: `MPI_Ireduce`, `MPI_Ireduce_c`, `mpi_ireduce_` + + - **MPI_Iallreduce**: `MPI_Iallreduce`, `MPI_Iallreduce_c`, `mpi_iallreduce_` + + - **MPI_Ireduce_scatter**: `MPI_Ireduce_scatter`, `MPI_Ireduce_scatter_c`, + `mpi_ireduce_scatter_` + + - **MPI_Ireduce_scatter_block**: `MPI_Ireduce_scatter_block`, + `MPI_Ireduce_scatter_block_c`, `mpi_ireduce_scatter_block_` + + - **MPI_Ibarrier**: `MPI_Ibarrier`, `MPI_Ibarrier_c`, `mpi_ibarrier_` + + - **MPI_Ibcast**: `MPI_Ibcast`, `MPI_Ibcast_c`, `mpi_ibcast_` + + - **MPI_Ialltoall**: `MPI_Ialltoall`, `MPI_Ialltoallv`, `MPI_Ialltoallw`, + `MPI_Ialltoall_c`, `MPI_Ialltoallv_c`, `MPI_Ialltoallw_c`, + `mpi_ialltoall_`, `mpi_ialltoallv_`, `mpi_ialltoallw_` + + - **MPI_Iscan**: `MPI_Iscan`, `MPI_Iscan_c`, `mpi_iscan_` + + - **MPI_Iexscan**: `MPI_Iexscan`, `MPI_Iexscan_c`, `mpi_iexscan_` + +!!! Note + + The Sonar library does not support large count MPI functions for the Fortran + language yet, and thus, these functions are not instrumented. + +The figure below shows an example of the MPI function view. The program executes +a distributed stencil algorithm with MPI and OmpSs-2. There are several MPI +processes, each running OmpSs-2 tasks on an exclusive set of CPUs. Most of these +are computation tasks, while the others are concurrent tasks performing +communications using the blocking mode of the TAMPI library. These use `MPI_Send` +and `MPI_Recv` functions to send and receive blocks of data. The program was +linked with Sonar and preceding the TAMPI library. Thus, the trace shows the +blocking MPI function calls made by the application. + +![MPI function view example](fig/mpi-function.png) + +The light green areas correspond to the `MPI_Init_thread` calls, the grey ones +are `MPI_Send` calls and the dark green areas are `MPI_Recv` calls. There are +other secondary calls like `MPI_Bcast` (orange), `MPI_Barrier` (blue) and +`MPI_Finalize` (red) calls. + +As mentioned above, the trace shows the blocking MPI functions called by the +application because Sonar was placed before TAMPI in the linking order. However, +these blocking calls may not be actually executed by the MPI library; TAMPI will +transparently replace them with non-blocking calls (e.g., `MPI_Isend` and +`MPI_Irecv`) and a polling mechanism for the generated MPI requests. If you want +to explore the actual MPI functions being executed, you should link the Sonar +library after TAMPI. diff --git a/mkdocs.yml b/mkdocs.yml index f86796d..e09a3e6 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -33,6 +33,7 @@ nav: - user/emulation/nosv.md - user/emulation/nanos6.md - user/emulation/tampi.md + - user/emulation/mpi.md - user/emulation/events.md - 'Developer guide': - dev/index.md diff --git a/src/emu/CMakeLists.txt b/src/emu/CMakeLists.txt index 1a6f72a..c252c6c 100644 --- a/src/emu/CMakeLists.txt +++ b/src/emu/CMakeLists.txt @@ -53,6 +53,8 @@ add_library(emu STATIC nosv/event.c nodes/setup.c nodes/event.c + mpi/setup.c + mpi/event.c tampi/setup.c tampi/event.c kernel/setup.c diff --git a/src/emu/emu_prv.h b/src/emu/emu_prv.h index 6e2ac01..4ae4c47 100644 --- a/src/emu/emu_prv.h +++ b/src/emu/emu_prv.h @@ -19,6 +19,7 @@ enum emu_prv_types { PRV_NOSV_SUBSYSTEM = 13, PRV_NOSV_RANK = 14, PRV_TAMPI_SUBSYSTEM = 20, + PRV_MPI_FUNCTION = 25, PRV_NODES_SUBSYSTEM = 30, PRV_NANOS6_TASKID = 35, PRV_NANOS6_TYPE = 36, diff --git a/src/emu/models.c b/src/emu/models.c index 40a35b7..c7788dc 100644 --- a/src/emu/models.c +++ b/src/emu/models.c @@ -12,6 +12,7 @@ extern struct model_spec model_nanos6; extern struct model_spec model_nosv; extern struct model_spec model_nodes; extern struct model_spec model_tampi; +extern struct model_spec model_mpi; extern struct model_spec model_kernel; static struct model_spec *models[] = { @@ -20,6 +21,7 @@ static struct model_spec *models[] = { &model_nosv, &model_nodes, &model_tampi, + &model_mpi, &model_kernel, NULL }; diff --git a/src/emu/mpi/event.c b/src/emu/mpi/event.c new file mode 100644 index 0000000..ba8a4e1 --- /dev/null +++ b/src/emu/mpi/event.c @@ -0,0 +1,201 @@ +/* Copyright (c) 2023 Barcelona Supercomputing Center (BSC) + * SPDX-License-Identifier: GPL-3.0-or-later */ + +#include "mpi_priv.h" +#include "chan.h" +#include "common.h" +#include "emu.h" +#include "emu_ev.h" +#include "extend.h" +#include "model_thread.h" +#include "thread.h" +#include "value.h" + +enum { PUSH = 1, POP = 2, IGN = 3 }; + +static const int fn_table[256][256][3] = { + ['U'] = { + ['i'] = { CH_FUNCTION, PUSH, ST_MPI_INIT }, + ['I'] = { CH_FUNCTION, POP, ST_MPI_INIT }, + ['t'] = { CH_FUNCTION, PUSH, ST_MPI_INIT_THREAD }, + ['T'] = { CH_FUNCTION, POP, ST_MPI_INIT_THREAD }, + ['f'] = { CH_FUNCTION, PUSH, ST_MPI_FINALIZE }, + ['F'] = { CH_FUNCTION, POP, ST_MPI_FINALIZE }, + }, + ['W'] = { + ['['] = { CH_FUNCTION, PUSH, ST_MPI_WAIT }, + [']'] = { CH_FUNCTION, POP, ST_MPI_WAIT }, + ['a'] = { CH_FUNCTION, PUSH, ST_MPI_WAITALL }, + ['A'] = { CH_FUNCTION, POP, ST_MPI_WAITALL }, + ['y'] = { CH_FUNCTION, PUSH, ST_MPI_WAITANY }, + ['Y'] = { CH_FUNCTION, POP, ST_MPI_WAITANY }, + ['s'] = { CH_FUNCTION, PUSH, ST_MPI_WAITSOME }, + ['S'] = { CH_FUNCTION, POP, ST_MPI_WAITSOME }, + }, + ['T'] = { + ['['] = { CH_FUNCTION, PUSH, ST_MPI_TEST }, + [']'] = { CH_FUNCTION, POP, ST_MPI_TEST }, + ['a'] = { CH_FUNCTION, PUSH, ST_MPI_TESTALL }, + ['A'] = { CH_FUNCTION, POP, ST_MPI_TESTALL }, + ['y'] = { CH_FUNCTION, PUSH, ST_MPI_TESTANY }, + ['Y'] = { CH_FUNCTION, POP, ST_MPI_TESTANY }, + ['s'] = { CH_FUNCTION, PUSH, ST_MPI_TESTSOME }, + ['S'] = { CH_FUNCTION, POP, ST_MPI_TESTSOME }, + }, + ['R'] = { + ['['] = { CH_FUNCTION, PUSH, ST_MPI_RECV }, + [']'] = { CH_FUNCTION, POP, ST_MPI_RECV }, + ['s'] = { CH_FUNCTION, PUSH, ST_MPI_SENDRECV }, + ['S'] = { CH_FUNCTION, POP, ST_MPI_SENDRECV }, + ['o'] = { CH_FUNCTION, PUSH, ST_MPI_SENDRECV_REPLACE }, + ['O'] = { CH_FUNCTION, POP, ST_MPI_SENDRECV_REPLACE }, + }, + ['r'] = { + ['['] = { CH_FUNCTION, PUSH, ST_MPI_IRECV }, + [']'] = { CH_FUNCTION, POP, ST_MPI_IRECV }, + ['s'] = { CH_FUNCTION, PUSH, ST_MPI_ISENDRECV }, + ['S'] = { CH_FUNCTION, POP, ST_MPI_ISENDRECV }, + ['o'] = { CH_FUNCTION, PUSH, ST_MPI_ISENDRECV_REPLACE }, + ['O'] = { CH_FUNCTION, POP, ST_MPI_ISENDRECV_REPLACE }, + }, + ['S'] = { + ['['] = { CH_FUNCTION, PUSH, ST_MPI_SEND }, + [']'] = { CH_FUNCTION, POP, ST_MPI_SEND }, + ['b'] = { CH_FUNCTION, PUSH, ST_MPI_BSEND }, + ['B'] = { CH_FUNCTION, POP, ST_MPI_BSEND }, + ['r'] = { CH_FUNCTION, PUSH, ST_MPI_RSEND }, + ['R'] = { CH_FUNCTION, POP, ST_MPI_RSEND }, + ['s'] = { CH_FUNCTION, PUSH, ST_MPI_SSEND }, + ['S'] = { CH_FUNCTION, POP, ST_MPI_SSEND }, + }, + ['s'] = { + ['['] = { CH_FUNCTION, PUSH, ST_MPI_ISEND }, + [']'] = { CH_FUNCTION, POP, ST_MPI_ISEND }, + ['b'] = { CH_FUNCTION, PUSH, ST_MPI_IBSEND }, + ['B'] = { CH_FUNCTION, POP, ST_MPI_IBSEND }, + ['r'] = { CH_FUNCTION, PUSH, ST_MPI_IRSEND }, + ['R'] = { CH_FUNCTION, POP, ST_MPI_IRSEND }, + ['s'] = { CH_FUNCTION, PUSH, ST_MPI_ISSEND }, + ['S'] = { CH_FUNCTION, POP, ST_MPI_ISSEND }, + }, + ['A'] = { + ['g'] = { CH_FUNCTION, PUSH, ST_MPI_ALLGATHER }, + ['G'] = { CH_FUNCTION, POP, ST_MPI_ALLGATHER }, + ['r'] = { CH_FUNCTION, PUSH, ST_MPI_ALLREDUCE }, + ['R'] = { CH_FUNCTION, POP, ST_MPI_ALLREDUCE }, + ['a'] = { CH_FUNCTION, PUSH, ST_MPI_ALLTOALL }, + ['A'] = { CH_FUNCTION, POP, ST_MPI_ALLTOALL }, + }, + ['a'] = { + ['g'] = { CH_FUNCTION, PUSH, ST_MPI_IALLGATHER }, + ['G'] = { CH_FUNCTION, POP, ST_MPI_IALLGATHER }, + ['r'] = { CH_FUNCTION, PUSH, ST_MPI_IALLREDUCE }, + ['R'] = { CH_FUNCTION, POP, ST_MPI_IALLREDUCE }, + ['a'] = { CH_FUNCTION, PUSH, ST_MPI_IALLTOALL }, + ['A'] = { CH_FUNCTION, POP, ST_MPI_IALLTOALL }, + }, + ['C'] = { + ['b'] = { CH_FUNCTION, PUSH, ST_MPI_BARRIER }, + ['B'] = { CH_FUNCTION, POP, ST_MPI_BARRIER }, + ['s'] = { CH_FUNCTION, PUSH, ST_MPI_SCAN }, + ['S'] = { CH_FUNCTION, POP, ST_MPI_SCAN }, + ['e'] = { CH_FUNCTION, PUSH, ST_MPI_EXSCAN }, + ['E'] = { CH_FUNCTION, POP, ST_MPI_EXSCAN }, + }, + ['c'] = { + ['b'] = { CH_FUNCTION, PUSH, ST_MPI_IBARRIER }, + ['B'] = { CH_FUNCTION, POP, ST_MPI_IBARRIER }, + ['s'] = { CH_FUNCTION, PUSH, ST_MPI_ISCAN }, + ['S'] = { CH_FUNCTION, POP, ST_MPI_ISCAN }, + ['e'] = { CH_FUNCTION, PUSH, ST_MPI_IEXSCAN }, + ['E'] = { CH_FUNCTION, POP, ST_MPI_IEXSCAN }, + }, + ['D'] = { + ['b'] = { CH_FUNCTION, PUSH, ST_MPI_BCAST }, + ['B'] = { CH_FUNCTION, POP, ST_MPI_BCAST }, + ['g'] = { CH_FUNCTION, PUSH, ST_MPI_GATHER }, + ['G'] = { CH_FUNCTION, POP, ST_MPI_GATHER }, + ['s'] = { CH_FUNCTION, PUSH, ST_MPI_SCATTER }, + ['S'] = { CH_FUNCTION, POP, ST_MPI_SCATTER }, + }, + ['d'] = { + ['b'] = { CH_FUNCTION, PUSH, ST_MPI_IBCAST }, + ['B'] = { CH_FUNCTION, POP, ST_MPI_IBCAST }, + ['g'] = { CH_FUNCTION, PUSH, ST_MPI_IGATHER }, + ['G'] = { CH_FUNCTION, POP, ST_MPI_IGATHER }, + ['s'] = { CH_FUNCTION, PUSH, ST_MPI_ISCATTER }, + ['S'] = { CH_FUNCTION, POP, ST_MPI_ISCATTER }, + }, + ['E'] = { + ['['] = { CH_FUNCTION, PUSH, ST_MPI_REDUCE }, + [']'] = { CH_FUNCTION, POP, ST_MPI_REDUCE }, + ['s'] = { CH_FUNCTION, PUSH, ST_MPI_REDUCE_SCATTER }, + ['S'] = { CH_FUNCTION, POP, ST_MPI_REDUCE_SCATTER }, + ['b'] = { CH_FUNCTION, PUSH, ST_MPI_REDUCE_SCATTER_BLOCK }, + ['B'] = { CH_FUNCTION, POP, ST_MPI_REDUCE_SCATTER_BLOCK }, + }, + ['e'] = { + ['['] = { CH_FUNCTION, PUSH, ST_MPI_IREDUCE }, + [']'] = { CH_FUNCTION, POP, ST_MPI_IREDUCE }, + ['s'] = { CH_FUNCTION, PUSH, ST_MPI_IREDUCE_SCATTER }, + ['S'] = { CH_FUNCTION, POP, ST_MPI_IREDUCE_SCATTER }, + ['b'] = { CH_FUNCTION, PUSH, ST_MPI_IREDUCE_SCATTER_BLOCK }, + ['B'] = { CH_FUNCTION, POP, ST_MPI_IREDUCE_SCATTER_BLOCK }, + }, +}; + +static int +process_ev(struct emu *emu) +{ + if (!emu->thread->is_running) { + err("current thread %d not running", emu->thread->tid); + return -1; + } + + const int *entry = fn_table[emu->ev->c][emu->ev->v]; + int chind = entry[0]; + int action = entry[1]; + int st = entry[2]; + + struct mpi_thread *th = EXT(emu->thread, 'M'); + struct chan *ch = &th->m.ch[chind]; + + if (action == PUSH) { + return chan_push(ch, value_int64(st)); + } else if (action == POP) { + return chan_pop(ch, value_int64(st)); + } else if (action == IGN) { + return 0; /* do nothing */ + } + + err("unknown mpi function event"); + return -1; +} + +int +model_mpi_event(struct emu *emu) +{ + static int enabled = 0; + + if (!enabled) { + if (model_mpi_connect(emu) != 0) { + err("mpi_connect failed"); + return -1; + } + enabled = 1; + } + + dbg("in mpi_event"); + if (emu->ev->m != 'M') { + err("unexpected event model %c", emu->ev->m); + return -1; + } + + dbg("got mpi event %s", emu->ev->mcv); + if (process_ev(emu) != 0) { + err("error processing mpi event"); + return -1; + } + + return 0; +} diff --git a/src/emu/mpi/mpi_priv.h b/src/emu/mpi/mpi_priv.h new file mode 100644 index 0000000..980b7f6 --- /dev/null +++ b/src/emu/mpi/mpi_priv.h @@ -0,0 +1,84 @@ +/* Copyright (c) 2023 Barcelona Supercomputing Center (BSC) + * SPDX-License-Identifier: GPL-3.0-or-later */ + +#ifndef MPI_PRIV_H +#define MPI_PRIV_H + +#include "emu.h" +#include "model_cpu.h" +#include "model_thread.h" + +/* Private enums */ + +enum mpi_chan { + CH_FUNCTION = 0, + CH_MAX, +}; + +enum mpi_function_values { + ST_MPI_INIT = 1, + ST_MPI_INIT_THREAD, + ST_MPI_FINALIZE, + ST_MPI_WAIT, + ST_MPI_WAITALL, + ST_MPI_WAITANY, + ST_MPI_WAITSOME, + ST_MPI_TEST, + ST_MPI_TESTALL, + ST_MPI_TESTANY, + ST_MPI_TESTSOME, + ST_MPI_RECV, + ST_MPI_SEND, + ST_MPI_BSEND, + ST_MPI_RSEND, + ST_MPI_SSEND, + ST_MPI_SENDRECV, + ST_MPI_SENDRECV_REPLACE, + ST_MPI_IRECV, + ST_MPI_ISEND, + ST_MPI_IBSEND, + ST_MPI_IRSEND, + ST_MPI_ISSEND, + ST_MPI_ISENDRECV, + ST_MPI_ISENDRECV_REPLACE, + ST_MPI_ALLGATHER, + ST_MPI_ALLREDUCE, + ST_MPI_ALLTOALL, + ST_MPI_BARRIER, + ST_MPI_BCAST, + ST_MPI_GATHER, + ST_MPI_REDUCE, + ST_MPI_REDUCE_SCATTER, + ST_MPI_REDUCE_SCATTER_BLOCK, + ST_MPI_SCATTER, + ST_MPI_SCAN, + ST_MPI_EXSCAN, + ST_MPI_IALLGATHER, + ST_MPI_IALLREDUCE, + ST_MPI_IALLTOALL, + ST_MPI_IBARRIER, + ST_MPI_IBCAST, + ST_MPI_IGATHER, + ST_MPI_IREDUCE, + ST_MPI_IREDUCE_SCATTER, + ST_MPI_IREDUCE_SCATTER_BLOCK, + ST_MPI_ISCATTER, + ST_MPI_ISCAN, + ST_MPI_IEXSCAN, +}; + +struct mpi_thread { + struct model_thread m; +}; + +struct mpi_cpu { + struct model_cpu m; +}; + +int model_mpi_probe(struct emu *emu); +int model_mpi_create(struct emu *emu); +int model_mpi_connect(struct emu *emu); +int model_mpi_event(struct emu *emu); +int model_mpi_finish(struct emu *emu); + +#endif /* MPI_PRIV_H */ diff --git a/src/emu/mpi/setup.c b/src/emu/mpi/setup.c new file mode 100644 index 0000000..5aa948e --- /dev/null +++ b/src/emu/mpi/setup.c @@ -0,0 +1,252 @@ +/* Copyright (c) 2023 Barcelona Supercomputing Center (BSC) + * SPDX-License-Identifier: GPL-3.0-or-later */ + +#include "mpi_priv.h" +#include +#include "chan.h" +#include "common.h" +#include "emu.h" +#include "emu_args.h" +#include "emu_prv.h" +#include "extend.h" +#include "model.h" +#include "model_chan.h" +#include "model_cpu.h" +#include "model_pvt.h" +#include "model_thread.h" +#include "pv/pcf.h" +#include "pv/prv.h" +#include "system.h" +#include "thread.h" +#include "track.h" +#include "value.h" + +static const char model_name[] = "mpi"; +enum { model_id = 'M' }; + +struct model_spec model_mpi = { + .name = model_name, + .model = model_id, + .create = model_mpi_create, +// .connect = model_mpi_connect, + .event = model_mpi_event, + .probe = model_mpi_probe, + .finish = model_mpi_finish, +}; + +/* ----------------- channels ------------------ */ + +static const char *chan_name[CH_MAX] = { + [CH_FUNCTION] = "function", +}; + +static const int chan_stack[CH_MAX] = { + [CH_FUNCTION] = 1, +}; + +/* ----------------- pvt ------------------ */ + +static const int pvt_type[CH_MAX] = { + [CH_FUNCTION] = PRV_MPI_FUNCTION, +}; + +static const char *pcf_prefix[CH_MAX] = { + [CH_FUNCTION] = "MPI function", +}; + +static const struct pcf_value_label mpi_function_values[] = { + { ST_MPI_INIT, "MPI_Init" }, + { ST_MPI_INIT_THREAD, "MPI_Init_thread" }, + { ST_MPI_FINALIZE, "MPI_Finalize" }, + { ST_MPI_WAIT, "MPI_Wait" }, + { ST_MPI_WAITALL, "MPI_Waitall" }, + { ST_MPI_WAITANY, "MPI_Waitany" }, + { ST_MPI_WAITSOME, "MPI_Waitsome" }, + { ST_MPI_TEST, "MPI_Test" }, + { ST_MPI_TESTALL, "MPI_Testall" }, + { ST_MPI_TESTANY, "MPI_Testany" }, + { ST_MPI_TESTSOME, "MPI_Testsome" }, + { ST_MPI_RECV, "MPI_Recv" }, + { ST_MPI_SEND, "MPI_Send" }, + { ST_MPI_BSEND, "MPI_Bsend" }, + { ST_MPI_RSEND, "MPI_Rsend" }, + { ST_MPI_SSEND, "MPI_Ssend" }, + { ST_MPI_SENDRECV, "MPI_Sendrecv" }, + { ST_MPI_SENDRECV_REPLACE, "MPI_Sendrecv_replace" }, + { ST_MPI_IRECV, "MPI_Irecv" }, + { ST_MPI_ISEND, "MPI_Isend" }, + { ST_MPI_IBSEND, "MPI_Ibsend" }, + { ST_MPI_IRSEND, "MPI_Irsend" }, + { ST_MPI_ISSEND, "MPI_Issend" }, + { ST_MPI_ISENDRECV, "MPI_Isendrecv" }, + { ST_MPI_ISENDRECV_REPLACE, "MPI_Isendrecv_replace" }, + { ST_MPI_ALLGATHER, "MPI_Allgather" }, + { ST_MPI_ALLREDUCE, "MPI_Allreduce" }, + { ST_MPI_ALLTOALL, "MPI_Alltoall" }, + { ST_MPI_BARRIER, "MPI_Barrier" }, + { ST_MPI_BCAST, "MPI_Bcast" }, + { ST_MPI_GATHER, "MPI_Gather" }, + { ST_MPI_REDUCE, "MPI_Reduce" }, + { ST_MPI_REDUCE_SCATTER, "MPI_Reduce_scatter" }, + { ST_MPI_REDUCE_SCATTER_BLOCK, "MPI_Reduce_scatter_block" }, + { ST_MPI_SCATTER, "MPI_Scatter" }, + { ST_MPI_SCAN, "MPI_Scan" }, + { ST_MPI_EXSCAN, "MPI_Exscan" }, + { ST_MPI_IALLGATHER, "MPI_Iallgather" }, + { ST_MPI_IALLREDUCE, "MPI_Iallreduce" }, + { ST_MPI_IALLTOALL, "MPI_Ialltoall" }, + { ST_MPI_IBARRIER, "MPI_Ibarrier" }, + { ST_MPI_IBCAST, "MPI_Ibcast" }, + { ST_MPI_IGATHER, "MPI_Igather" }, + { ST_MPI_IREDUCE, "MPI_Ireduce" }, + { ST_MPI_IREDUCE_SCATTER, "MPI_Ireduce_scatter" }, + { ST_MPI_IREDUCE_SCATTER_BLOCK, "MPI_Ireduce_scatter_block" }, + { ST_MPI_ISCATTER, "MPI_Iscatter" }, + { ST_MPI_ISCAN, "MPI_Iscan" }, + { ST_MPI_IEXSCAN, "MPI_Iexscan" }, + { -1, NULL }, +}; + +static const struct pcf_value_label *pcf_labels[CH_MAX] = { + [CH_FUNCTION] = mpi_function_values, +}; + +static const long prv_flags[CH_MAX] = { + [CH_FUNCTION] = PRV_SKIPDUP, +}; + +static const struct model_pvt_spec pvt_spec = { + .type = pvt_type, + .prefix = pcf_prefix, + .label = pcf_labels, + .flags = prv_flags, +}; + +/* ----------------- tracking ------------------ */ + +static const int th_track[CH_MAX] = { + [CH_FUNCTION] = TRACK_TH_RUN, +}; + +static const int cpu_track[CH_MAX] = { + [CH_FUNCTION] = TRACK_TH_RUN, +}; + +/* ----------------- chan_spec ------------------ */ + +static const struct model_chan_spec th_chan = { + .nch = CH_MAX, + .prefix = model_name, + .ch_names = chan_name, + .ch_stack = chan_stack, + .pvt = &pvt_spec, + .track = th_track, +}; + +static const struct model_chan_spec cpu_chan = { + .nch = CH_MAX, + .prefix = model_name, + .ch_names = chan_name, + .ch_stack = chan_stack, + .pvt = &pvt_spec, + .track = cpu_track, +}; + +/* ----------------- models ------------------ */ + +static const struct model_cpu_spec cpu_spec = { + .size = sizeof(struct mpi_cpu), + .chan = &cpu_chan, + .model = &model_mpi, +}; + +static const struct model_thread_spec th_spec = { + .size = sizeof(struct mpi_thread), + .chan = &th_chan, + .model = &model_mpi, +}; + +/* ----------------------------------------------------- */ + +int +model_mpi_probe(struct emu *emu) +{ + if (emu->system.nthreads == 0) + return 1; + + return 0; +} + +int +model_mpi_create(struct emu *emu) +{ + if (model_thread_create(emu, &th_spec) != 0) { + err("model_thread_init failed"); + return -1; + } + + if (model_cpu_create(emu, &cpu_spec) != 0) { + err("model_cpu_init failed"); + return -1; + } + + return 0; +} + +int +model_mpi_connect(struct emu *emu) +{ + if (model_thread_connect(emu, &th_spec) != 0) { + err("model_thread_connect failed"); + return -1; + } + + if (model_cpu_connect(emu, &cpu_spec) != 0) { + err("model_cpu_connect failed"); + return -1; + } + + return 0; +} + +static int +end_lint(struct emu *emu) +{ + /* Only run the check if we finished the complete trace */ + if (!emu->finished) + return 0; + + struct system *sys = &emu->system; + + /* Ensure we run out of function states */ + for (struct thread *t = sys->threads; t; t = t->gnext) { + struct mpi_thread *th = EXT(t, model_id); + struct chan *ch = &th->m.ch[CH_FUNCTION]; + int stacked = ch->data.stack.n; + if (stacked > 0) { + struct value top; + if (chan_read(ch, &top) != 0) { + err("chan_read failed for function"); + return -1; + } + + err("thread %d ended with %d stacked mpi functions", + t->tid, stacked); + return -1; + } + } + + return 0; +} + +int +model_mpi_finish(struct emu *emu) +{ + /* When running in linter mode perform additional checks */ + if (emu->args.linter_mode && end_lint(emu) != 0) { + err("end_lint failed"); + return -1; + } + + return 0; +}