Add TAMPI model with subsystems view

This commit is contained in:
Kevin Sala 2023-08-18 12:33:01 +02:00
parent 9269dd7202
commit 276afd5479
13 changed files with 641 additions and 0 deletions

View File

@ -12,6 +12,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Add `ovni_version_get()` function.
- Add the `ovniver` program to report the libovni version and commit.
- Add nOS-V API subsystem events for `nosv_create()` and `nosv_destroy()`.
- Add TAMPI model with `T` code.
- Add subsytem events and cfgs for TAMPI model.
## [1.2.2] - 2022-07-26

View File

@ -0,0 +1,42 @@
#ParaverCFG
ConfigFile.Version: 3.4
ConfigFile.NumWindows: 1
################################################################################
< NEW DISPLAYING WINDOW CPU: TAMPI subsystem of the RUNNING thread >
################################################################################
window_name CPU: TAMPI subsystem of the RUNNING thread
window_type single
window_id 1
window_position_x 0
window_position_y 0
window_width 600
window_height 150
window_comm_lines_enabled true
window_flags_enabled false
window_noncolor_mode true
window_logical_filtered true
window_physical_filtered false
window_comm_fromto true
window_comm_tagsize true
window_comm_typeval true
window_units Microseconds
window_maximum_y 1000.0
window_minimum_y 1.0
window_compute_y_max true
window_level thread
window_scale_relative 1.000000000000
window_end_time_relative 1.000000000000
window_object appl { 1, { All } }
window_begin_time_relative 0.000000000000
window_open true
window_drawmode draw_randnotzero
window_drawmode_rows draw_randnotzero
window_pixel_size 1
window_labels_to_draw 1
window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } }
window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } }
window_filter_module evt_type 1 20
window_filter_module evt_type_label 1 "CPU: TAMPI subsystem of the RUNNING thread"

View File

@ -0,0 +1,42 @@
#ParaverCFG
ConfigFile.Version: 3.4
ConfigFile.NumWindows: 1
################################################################################
< NEW DISPLAYING WINDOW Thread: TAMPI subsystem of the ACTIVE thread >
################################################################################
window_name Thread: TAMPI subsystem of the ACTIVE thread
window_type single
window_id 1
window_position_x 0
window_position_y 0
window_width 600
window_height 150
window_comm_lines_enabled true
window_flags_enabled false
window_noncolor_mode true
window_logical_filtered true
window_physical_filtered false
window_comm_fromto true
window_comm_tagsize true
window_comm_typeval true
window_units Microseconds
window_maximum_y 1000.0
window_minimum_y 1.0
window_compute_y_max true
window_level thread
window_scale_relative 1.000000000000
window_end_time_relative 1.000000000000
window_object appl { 1, { All } }
window_begin_time_relative 0.000000000000
window_open true
window_drawmode draw_randnotzero
window_drawmode_rows draw_randnotzero
window_pixel_size 1
window_labels_to_draw 1
window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } }
window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } }
window_filter_module evt_type 1 20
window_filter_module evt_type_label 1 "Thread: TAMPI subsystem of the ACTIVE thread"

View File

@ -193,4 +193,36 @@ KCI Is back in the CPU due to a context switch
6MA Ends allocating memory
6Mf Begins freeing memory
6MF Ends freeing memory
-------------------- TAMPI (model=T) ----------------------
TCi Begins to issue a non-blocking communication operation
TCI Ends issuing a non-blocking communication operation
TGc Begins to check pending requests from the global array
TGC Ends checking pending requests from the global array
TLi Begins the library code at an API function
TLI Ends the library code at an API function
TLp Begins the library code at a polling function
TLP Ends the library code at a polling function
TQa Begins to add a ticket/requests to a queue
TQA Ends adding a ticket/requests to a queue
TQt Begins to transfer tickets/requests from queues to global array
TQT Ends transfering tickets/requests from queues to global array
TRc Begins to process a completed request
TRC Ends processing a completed request
TRt Begins to test a single request with MPI_Test
TRT Ends testing a single request with MPI_Test
TRa Begins to test several requests with MPI_Testall
TRA Ends testing several requests with MPI_Testall
TRs Begins to test several requests with MPI_Testsome
TRS Ends testing several requests with MPI_Testsome
TTc Begins to create a ticket linked to a set of requests and a task
TTC Ends creating a ticket linked to a set of requests and a task
TTw Begins to wait a ticket completion
TTW Ends waiting a ticket completion
```

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

144
doc/user/emulation/tampi.md Normal file
View File

@ -0,0 +1,144 @@
# TAMPI model
The Task-Aware MPI (TAMPI) library extends the functionality of standard MPI
libraries by providing new mechanisms for improving the interoperability between
parallel task-based programming models, such as OpenMP and OmpSs-2, and MPI
communications. This library allows the safe and efficient execution of MPI
operations from concurrent tasks and guarantees the transparent management and
progress of these communications.
[tampi repo]: https://github.com/bsc-pm/tampi
[tampi docs]: https://github.com/bsc-pm/tampi#readme
[tampi blk]: https://github.com/bsc-pm/tampi#blocking-mode-ompss-2
[tampi nonblk]: https://github.com/bsc-pm/tampi#non-blocking-mode-openmp--ompss-2
The TAMPI library has instrumented the execution of its task-aware functions
with ovni. To obtain an instrumented library, TAMPI must be built passing the
`--with-ovni` configure option and specifying the ovni installation prefix. At
run-time, the user can enable the instrumentation by defining the environment
variable `TAMPI_INSTRUMENT=ovni`.
For more information regarding TAMPI or how to enable its instrumentation see
the TAMPI [repository][tampi repo] and [documentation][tampi docs].
TAMPI is instrumented to track the execution path inside the run-time library
to identify what is happening at each moment. This information can be used by
both users and developers to analyze problems or to better understand the
execution behavior of TAMPI communications and its background services. There is
one view generated to achieve this goal.
## Subsystem view
The subsystem view attempts to provide a general overview of what TAMPI is doing
at any point in time. The view shows the state inside the TAMPI library for each
thread (and for each CPU, the state of the running thread in that CPU). This
subsystem state view sticks to the definition of subsystem states from the
[Nanos6](nanos6.md#subsystem_view).
The states shown in this view are:
- **Library code subsystem**: Indicating whether the running thread is executing
effective TAMPI library code. These subsystem states wrap the rest of
subsystems that are described below. No other TAMPI state can appear outside
of a TAMPI library code subsystem state.
- **Interface function**: Running any TAMPI API function or an intercepted
MPI function which requires task-awareness. When the user application
disables a TAMPI mode, whether the [blocking][tampi blk] or
[non-blocking][tampi nonblk] mode, any call to an interface function
corresponding to the disabled mode will not appear in the view. Operations
that are directly forwarded to MPI (because TAMPI is not asked to apply
task-awareness) will not appear.
- **Polling function**: The TAMPI library can launch internal tasks to
execute polling functions in the background. Currently, TAMPI launches a
polling task that periodically checks and processes the pending MPI
requests generated by task-aware operations. This polling state may not
appear if none of the TAMPI modes are enabled by the user application.
- **Communication subsystem**: The running thread is communicating through MPI
or issuing an asynchronous communication operation.
- **Issuing a non-blocking operation**: Issuing a non-blocking MPI operation
that can generate an MPI request.
- **Ticket subsystem**: Creation and managing of tickets. A ticket is an
internal object that describes the relation between a set of pending MPI
requests and the user communication task that is *waiting* (synchronous or
asynchronously) on them. A ticket is used for both [blocking][tampi blk] and
[non-blocking][tampi nonblk] operations.
- **Creating a ticket**: Creating a ticket that is linked to a set of MPI
requests and a user task. The user task is the task that is *waiting* for
these requests to complete. Notice that *waiting* does not mean that the
task will synchronously wait for them. The ticket is initialized with a
counter of how many requests are still pending. The ticket is completed,
and thus, the task is notified, when this counter becomes zero.
- **Waiting for the ticket completion**: The user task, during a blocking
TAMPI operation, is waiting a ticket and its requests to complete. The
task may be blocked and yield the CPU meanwhile. Notice that user tasks
calling non-blocking TAMPI operations will not enter in this state.
- **Staging queue subsystem**: Queueing and dequeueing requests from the staging
queues before being transferred to the global array of requests and tickets.
These queues are used to optimize and control insertion of these objects into
the global array.
- **Adding to a queue**: A user communication task running a task-aware
TAMPI operation is pushing the corresponding MPI requests and the related
ticket into a staging queue.
- **Transfering from queues to the global array**: The polling task is
transferring the staged requests and tickets from the queues to the global
array.
- **Global array subsystem**: Managing the per-process global array of tickets
and MPI requests related to TAMPI operations.
- **Checking pending requests**: Testing all pending MPI requests from the
global array, processing the completed requests, and reorganizing the
array to keep it compacted.
- **Request subsystem**: Management and testing of pending MPI requests, and
processing the completed ones. This state considers only the management of MPI
requests concerning task-aware operations, which are exclusively tested by the
TAMPI library. Any testing function call made by the user application or other
libraries is not considered.
- **Testing a request with MPI_Test**: Testing a single MPI request by
calling MPI_Test inside the TAMPI library.
- **Testing requests with MPI_Testall**: Testing multiple MPI requests by
calling MPI_Testall inside the TAMPI library.
- **Testing requests with MPI_Testsome**: Testing multiple MPI requests by
calling MPI_Testsome inside the TAMPI library.
- **Processing a completed request**: Processing a completed MPI request by
decreasing the number of pending requests of the linked ticket. If the
ticket does not have any other request to wait, the ticket is completed
and the *waiting* task is notified. In such a case, a call to the tasking
runtime system will occur. If the operation was [blocking][tampi blk], the
*waiting* task will be unblocked and will eventually resume the execution.
If the operation was [non-blocking][tampi nonblk], the library will
decrease the external events of the *waiting* task.
The figure below shows an example of the subsystem view. The program executes a
distributed stencil algorithm with MPI and OmpSs-2. There are several MPI
processes and each process has OmpSs-2 tasks running exlusively on multiple CPU
resources.
![Subsystem view example](fig/tampi-subsystem.png)
The view show there are several user tasks running task-aware communication
operations. The light blue areas show when a user task is testing a request that
was generated by a non-blocking MPI communication function. There is also one
polling task per process. The yellow areas show when the polling tasks are
calling MPI_Testsome. Just after the testsome call, the violet areas show the
moment when the polling task is processing the completed requests.
This view shows that most of the time inside the TAMPI library is spent testing
requests. This could give us a clue that the underlying MPI library may have
concurrency issues (e.g., thread contention) when multiple threads try to test
requests in parallel.

View File

@ -32,6 +32,7 @@ nav:
- user/emulation/ovni.md
- user/emulation/nosv.md
- user/emulation/nanos6.md
- user/emulation/tampi.md
- user/emulation/events.md
- 'Developer guide':
- dev/index.md

View File

@ -53,6 +53,8 @@ add_library(emu STATIC
nosv/event.c
nodes/setup.c
nodes/event.c
tampi/setup.c
tampi/event.c
kernel/setup.c
kernel/event.c
)

View File

@ -18,6 +18,7 @@ enum emu_prv_types {
PRV_NOSV_APPID = 12,
PRV_NOSV_SUBSYSTEM = 13,
PRV_NOSV_RANK = 14,
PRV_TAMPI_SUBSYSTEM = 20,
PRV_NODES_SUBSYSTEM = 30,
PRV_NANOS6_TASKID = 35,
PRV_NANOS6_TYPE = 36,

View File

@ -11,6 +11,7 @@ extern struct model_spec model_ovni;
extern struct model_spec model_nanos6;
extern struct model_spec model_nosv;
extern struct model_spec model_nodes;
extern struct model_spec model_tampi;
extern struct model_spec model_kernel;
static struct model_spec *models[] = {
@ -18,6 +19,7 @@ static struct model_spec *models[] = {
&model_nanos6,
&model_nosv,
&model_nodes,
&model_tampi,
&model_kernel,
NULL
};

111
src/emu/tampi/event.c Normal file
View File

@ -0,0 +1,111 @@
/* Copyright (c) 2023 Barcelona Supercomputing Center (BSC)
* SPDX-License-Identifier: GPL-3.0-or-later */
#include "tampi_priv.h"
#include "chan.h"
#include "common.h"
#include "emu.h"
#include "emu_ev.h"
#include "extend.h"
#include "model_thread.h"
#include "thread.h"
#include "value.h"
enum { PUSH = 1, POP = 2, IGN = 3 };
#define CHSS CH_SUBSYSTEM
static const int ss_table[256][256][3] = {
['C'] = {
['i'] = { CHSS, PUSH, ST_COMM_ISSUE_NONBLOCKING },
['I'] = { CHSS, POP, ST_COMM_ISSUE_NONBLOCKING },
},
['G'] = {
['c'] = { CHSS, PUSH, ST_GLOBAL_ARRAY_CHECK },
['C'] = { CHSS, POP, ST_GLOBAL_ARRAY_CHECK },
},
['L'] = {
['i'] = { CHSS, PUSH, ST_LIBRARY_INTERFACE },
['I'] = { CHSS, POP, ST_LIBRARY_INTERFACE },
['p'] = { CHSS, PUSH, ST_LIBRARY_POLLING },
['P'] = { CHSS, POP, ST_LIBRARY_POLLING },
},
['Q'] = {
['a'] = { CHSS, PUSH, ST_QUEUE_ADD },
['A'] = { CHSS, POP, ST_QUEUE_ADD },
['t'] = { CHSS, PUSH, ST_QUEUE_TRANSFER },
['T'] = { CHSS, POP, ST_QUEUE_TRANSFER },
},
['R'] = {
['c'] = { CHSS, PUSH, ST_REQUEST_COMPLETED },
['C'] = { CHSS, POP, ST_REQUEST_COMPLETED },
['t'] = { CHSS, PUSH, ST_REQUEST_TEST },
['T'] = { CHSS, POP, ST_REQUEST_TEST },
['a'] = { CHSS, PUSH, ST_REQUEST_TESTALL },
['A'] = { CHSS, POP, ST_REQUEST_TESTALL },
['s'] = { CHSS, PUSH, ST_REQUEST_TESTSOME },
['S'] = { CHSS, POP, ST_REQUEST_TESTSOME },
},
['T'] = {
['c'] = { CHSS, PUSH, ST_TICKET_CREATE },
['C'] = { CHSS, POP, ST_TICKET_CREATE },
['w'] = { CHSS, PUSH, ST_TICKET_WAIT },
['W'] = { CHSS, POP, ST_TICKET_WAIT },
},
};
static int
process_ev(struct emu *emu)
{
if (!emu->thread->is_running) {
err("current thread %d not running", emu->thread->tid);
return -1;
}
const int *entry = ss_table[emu->ev->c][emu->ev->v];
int chind = entry[0];
int action = entry[1];
int st = entry[2];
struct tampi_thread *th = EXT(emu->thread, 'T');
struct chan *ch = &th->m.ch[chind];
if (action == PUSH) {
return chan_push(ch, value_int64(st));
} else if (action == POP) {
return chan_pop(ch, value_int64(st));
} else if (action == IGN) {
return 0; /* do nothing */
}
err("unknown TAMPI subsystem event");
return -1;
}
int
model_tampi_event(struct emu *emu)
{
static int enabled = 0;
if (!enabled) {
if (model_tampi_connect(emu) != 0) {
err("tampi_connect failed");
return -1;
}
enabled = 1;
}
dbg("in tampi_event");
if (emu->ev->m != 'T') {
err("unexpected event model %c", emu->ev->m);
return -1;
}
dbg("got tampi event %s", emu->ev->mcv);
if (process_ev(emu) != 0) {
err("error processing TAMPI event");
return -1;
}
return 0;
}

215
src/emu/tampi/setup.c Normal file
View File

@ -0,0 +1,215 @@
/* Copyright (c) 2023 Barcelona Supercomputing Center (BSC)
* SPDX-License-Identifier: GPL-3.0-or-later */
#include "tampi_priv.h"
#include <stddef.h>
#include "chan.h"
#include "common.h"
#include "emu.h"
#include "emu_args.h"
#include "emu_prv.h"
#include "extend.h"
#include "model.h"
#include "model_chan.h"
#include "model_cpu.h"
#include "model_pvt.h"
#include "model_thread.h"
#include "pv/pcf.h"
#include "pv/prv.h"
#include "system.h"
#include "thread.h"
#include "track.h"
#include "value.h"
static const char model_name[] = "tampi";
enum { model_id = 'T' };
struct model_spec model_tampi = {
.name = model_name,
.model = model_id,
.create = model_tampi_create,
// .connect = model_tampi_connect,
.event = model_tampi_event,
.probe = model_tampi_probe,
.finish = model_tampi_finish,
};
/* ----------------- channels ------------------ */
static const char *chan_name[CH_MAX] = {
[CH_SUBSYSTEM] = "subsystem",
};
static const int chan_stack[CH_MAX] = {
[CH_SUBSYSTEM] = 1,
};
/* ----------------- pvt ------------------ */
static const int pvt_type[CH_MAX] = {
[CH_SUBSYSTEM] = PRV_TAMPI_SUBSYSTEM,
};
static const char *pcf_prefix[CH_MAX] = {
[CH_SUBSYSTEM] = "TAMPI subsystem",
};
static const struct pcf_value_label tampi_ss_values[] = {
{ ST_COMM_ISSUE_NONBLOCKING, "Communication: Issuing a non-blocking operation" },
{ ST_GLOBAL_ARRAY_CHECK, "Global array: Checking pending requests" },
{ ST_LIBRARY_INTERFACE, "Library code: Interface function" },
{ ST_LIBRARY_POLLING, "Library code: Polling function" },
{ ST_QUEUE_ADD, "Queue: Adding to a queue" },
{ ST_QUEUE_TRANSFER, "Queue: Transfering to global array" },
{ ST_REQUEST_TEST, "Request: Testing a request" },
{ ST_REQUEST_COMPLETED, "Request: Processing a completed request" },
{ ST_REQUEST_TESTALL, "Request: Testing all requests" },
{ ST_REQUEST_TESTSOME, "Request: Testing some requests" },
{ ST_TICKET_CREATE, "Ticket: Creating a ticket" },
{ ST_TICKET_WAIT, "Ticket: Waiting a ticket" },
{ -1, NULL },
};
static const struct pcf_value_label *pcf_labels[CH_MAX] = {
[CH_SUBSYSTEM] = tampi_ss_values,
};
static const long prv_flags[CH_MAX] = {
[CH_SUBSYSTEM] = PRV_SKIPDUP,
};
static const struct model_pvt_spec pvt_spec = {
.type = pvt_type,
.prefix = pcf_prefix,
.label = pcf_labels,
.flags = prv_flags,
};
/* ----------------- tracking ------------------ */
static const int th_track[CH_MAX] = {
[CH_SUBSYSTEM] = TRACK_TH_ACT,
};
static const int cpu_track[CH_MAX] = {
[CH_SUBSYSTEM] = TRACK_TH_RUN,
};
/* ----------------- chan_spec ------------------ */
static const struct model_chan_spec th_chan = {
.nch = CH_MAX,
.prefix = model_name,
.ch_names = chan_name,
.ch_stack = chan_stack,
.pvt = &pvt_spec,
.track = th_track,
};
static const struct model_chan_spec cpu_chan = {
.nch = CH_MAX,
.prefix = model_name,
.ch_names = chan_name,
.ch_stack = chan_stack,
.pvt = &pvt_spec,
.track = cpu_track,
};
/* ----------------- models ------------------ */
static const struct model_cpu_spec cpu_spec = {
.size = sizeof(struct tampi_cpu),
.chan = &cpu_chan,
.model = &model_tampi,
};
static const struct model_thread_spec th_spec = {
.size = sizeof(struct tampi_thread),
.chan = &th_chan,
.model = &model_tampi,
};
/* ----------------------------------------------------- */
int
model_tampi_probe(struct emu *emu)
{
if (emu->system.nthreads == 0)
return 1;
return 0;
}
int
model_tampi_create(struct emu *emu)
{
if (model_thread_create(emu, &th_spec) != 0) {
err("model_thread_init failed");
return -1;
}
if (model_cpu_create(emu, &cpu_spec) != 0) {
err("model_cpu_init failed");
return -1;
}
return 0;
}
int
model_tampi_connect(struct emu *emu)
{
if (model_thread_connect(emu, &th_spec) != 0) {
err("model_thread_connect failed");
return -1;
}
if (model_cpu_connect(emu, &cpu_spec) != 0) {
err("model_cpu_connect failed");
return -1;
}
return 0;
}
static int
end_lint(struct emu *emu)
{
/* Only run the check if we finished the complete trace */
if (!emu->finished)
return 0;
struct system *sys = &emu->system;
/* Ensure we run out of subsystem states */
for (struct thread *t = sys->threads; t; t = t->gnext) {
struct tampi_thread *th = EXT(t, model_id);
struct chan *ch = &th->m.ch[CH_SUBSYSTEM];
int stacked = ch->data.stack.n;
if (stacked > 0) {
struct value top;
if (chan_read(ch, &top) != 0) {
err("chan_read failed for subsystem");
return -1;
}
err("thread %d ended with %d stacked tampi subsystems",
t->tid, stacked);
return -1;
}
}
return 0;
}
int
model_tampi_finish(struct emu *emu)
{
/* When running in linter mode perform additional checks */
if (emu->args.linter_mode && end_lint(emu) != 0) {
err("end_lint failed");
return -1;
}
return 0;
}

View File

@ -0,0 +1,47 @@
/* Copyright (c) 2023 Barcelona Supercomputing Center (BSC)
* SPDX-License-Identifier: GPL-3.0-or-later */
#ifndef TAMPI_PRIV_H
#define TAMPI_PRIV_H
#include "emu.h"
#include "model_cpu.h"
#include "model_thread.h"
/* Private enums */
enum tampi_chan {
CH_SUBSYSTEM = 0,
CH_MAX,
};
enum tampi_ss_values {
ST_COMM_ISSUE_NONBLOCKING = 1,
ST_GLOBAL_ARRAY_CHECK,
ST_LIBRARY_INTERFACE,
ST_LIBRARY_POLLING,
ST_QUEUE_ADD,
ST_QUEUE_TRANSFER,
ST_REQUEST_COMPLETED,
ST_REQUEST_TEST,
ST_REQUEST_TESTALL,
ST_REQUEST_TESTSOME,
ST_TICKET_CREATE,
ST_TICKET_WAIT,
};
struct tampi_thread {
struct model_thread m;
};
struct tampi_cpu {
struct model_cpu m;
};
int model_tampi_probe(struct emu *emu);
int model_tampi_create(struct emu *emu);
int model_tampi_connect(struct emu *emu);
int model_tampi_event(struct emu *emu);
int model_tampi_finish(struct emu *emu);
#endif /* TAMPI_PRIV_H */