Add TAMPI model with subsystems view

2023-08-18 12:33:01 +02:00 · 2023-08-18 12:33:01 +02:00 · 276afd5479
commit 276afd5479
parent 9269dd7202
13 changed files with 641 additions and 0 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -12,6 +12,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Add `ovni_version_get()` function.
 - Add the `ovniver` program to report the libovni version and commit.
 - Add nOS-V API subsystem events for `nosv_create()` and `nosv_destroy()`.
+- Add TAMPI model with `T` code.
+- Add subsytem events and cfgs for TAMPI model.

 ## [1.2.2] - 2022-07-26

--- a/cfg/cpu/tampi/subsystem.cfg
+++ b/cfg/cpu/tampi/subsystem.cfg
@ -0,0 +1,42 @@
+#ParaverCFG
+ConfigFile.Version: 3.4
+ConfigFile.NumWindows: 1
+
+
+################################################################################
+< NEW DISPLAYING WINDOW CPU: TAMPI subsystem of the RUNNING thread >
+################################################################################
+window_name CPU: TAMPI subsystem of the RUNNING thread
+window_type single
+window_id 1
+window_position_x 0
+window_position_y 0
+window_width 600
+window_height 150
+window_comm_lines_enabled true
+window_flags_enabled false
+window_noncolor_mode true
+window_logical_filtered true
+window_physical_filtered false
+window_comm_fromto true
+window_comm_tagsize true
+window_comm_typeval true
+window_units Microseconds
+window_maximum_y 1000.0
+window_minimum_y 1.0
+window_compute_y_max true
+window_level thread
+window_scale_relative 1.000000000000
+window_end_time_relative 1.000000000000
+window_object appl { 1, { All } }
+window_begin_time_relative 0.000000000000
+window_open true
+window_drawmode draw_randnotzero
+window_drawmode_rows draw_randnotzero
+window_pixel_size 1
+window_labels_to_draw 1
+window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } }
+window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } }
+window_filter_module evt_type 1 20
+window_filter_module evt_type_label 1 "CPU: TAMPI subsystem of the RUNNING thread"
+
--- a/cfg/thread/tampi/subsystem.cfg
+++ b/cfg/thread/tampi/subsystem.cfg
@ -0,0 +1,42 @@
+#ParaverCFG
+ConfigFile.Version: 3.4
+ConfigFile.NumWindows: 1
+
+
+################################################################################
+< NEW DISPLAYING WINDOW Thread: TAMPI subsystem of the ACTIVE thread >
+################################################################################
+window_name Thread: TAMPI subsystem of the ACTIVE thread
+window_type single
+window_id 1
+window_position_x 0
+window_position_y 0
+window_width 600
+window_height 150
+window_comm_lines_enabled true
+window_flags_enabled false
+window_noncolor_mode true
+window_logical_filtered true
+window_physical_filtered false
+window_comm_fromto true
+window_comm_tagsize true
+window_comm_typeval true
+window_units Microseconds
+window_maximum_y 1000.0
+window_minimum_y 1.0
+window_compute_y_max true
+window_level thread
+window_scale_relative 1.000000000000
+window_end_time_relative 1.000000000000
+window_object appl { 1, { All } }
+window_begin_time_relative 0.000000000000
+window_open true
+window_drawmode draw_randnotzero
+window_drawmode_rows draw_randnotzero
+window_pixel_size 1
+window_labels_to_draw 1
+window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } }
+window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } }
+window_filter_module evt_type 1 20
+window_filter_module evt_type_label 1 "Thread: TAMPI subsystem of the ACTIVE thread"
+
--- a/doc/user/emulation/events.md
+++ b/doc/user/emulation/events.md
@ -193,4 +193,36 @@ KCI	Is back in the CPU due to a context switch
 6MA	Ends allocating memory
 6Mf	Begins freeing memory
 6MF	Ends freeing memory
+
+-------------------- TAMPI (model=T) ----------------------
+
+TCi Begins to issue a non-blocking communication operation
+TCI Ends issuing a non-blocking communication operation
+
+TGc Begins to check pending requests from the global array
+TGC Ends checking pending requests from the global array
+
+TLi Begins the library code at an API function
+TLI Ends the library code at an API function
+TLp Begins the library code at a polling function
+TLP Ends the library code at a polling function
+
+TQa Begins to add a ticket/requests to a queue
+TQA Ends adding a ticket/requests to a queue
+TQt Begins to transfer tickets/requests from queues to global array
+TQT Ends transfering tickets/requests from queues to global array
+
+TRc Begins to process a completed request
+TRC Ends processing a completed request
+TRt Begins to test a single request with MPI_Test
+TRT Ends testing a single request with MPI_Test
+TRa Begins to test several requests with MPI_Testall
+TRA Ends testing several requests with MPI_Testall
+TRs Begins to test several requests with MPI_Testsome
+TRS Ends testing several requests with MPI_Testsome
+
+TTc Begins to create a ticket linked to a set of requests and a task
+TTC Ends creating a ticket linked to a set of requests and a task
+TTw Begins to wait a ticket completion
+TTW Ends waiting a ticket completion
 ```
--- a/doc/user/emulation/fig/tampi-subsystem.png
+++ b/doc/user/emulation/fig/tampi-subsystem.png
--- a/doc/user/emulation/tampi.md
+++ b/doc/user/emulation/tampi.md
@ -0,0 +1,144 @@
+# TAMPI model
+
+The Task-Aware MPI (TAMPI) library extends the functionality of standard MPI
+libraries by providing new mechanisms for improving the interoperability between
+parallel task-based programming models, such as OpenMP and OmpSs-2, and MPI
+communications. This library allows the safe and efficient execution of MPI
+operations from concurrent tasks and guarantees the transparent management and
+progress of these communications.
+
+[tampi repo]: https://github.com/bsc-pm/tampi
+[tampi docs]: https://github.com/bsc-pm/tampi#readme
+[tampi blk]: https://github.com/bsc-pm/tampi#blocking-mode-ompss-2
+[tampi nonblk]: https://github.com/bsc-pm/tampi#non-blocking-mode-openmp--ompss-2
+
+The TAMPI library has instrumented the execution of its task-aware functions
+with ovni. To obtain an instrumented library, TAMPI must be built passing the
+`--with-ovni` configure option and specifying the ovni installation prefix. At
+run-time, the user can enable the instrumentation by defining the environment
+variable `TAMPI_INSTRUMENT=ovni`.
+
+For more information regarding TAMPI or how to enable its instrumentation see
+the TAMPI [repository][tampi repo] and [documentation][tampi docs].
+
+TAMPI is instrumented to track the execution path inside the run-time library
+to identify what is happening at each moment. This information can be used by
+both users and developers to analyze problems or to better understand the
+execution behavior of TAMPI communications and its background services. There is
+one view generated to achieve this goal.
+
+## Subsystem view
+
+The subsystem view attempts to provide a general overview of what TAMPI is doing
+at any point in time. The view shows the state inside the TAMPI library for each
+thread (and for each CPU, the state of the running thread in that CPU). This
+subsystem state view sticks to the definition of subsystem states from the
+[Nanos6](nanos6.md#subsystem_view).
+
+The states shown in this view are:
+
+- **Library code subsystem**: Indicating whether the running thread is executing
+  effective TAMPI library code. These subsystem states wrap the rest of
+  subsystems that are described below. No other TAMPI state can appear outside
+  of a TAMPI library code subsystem state.
+
+    - **Interface function**: Running any TAMPI API function or an intercepted
+      MPI function which requires task-awareness. When the user application
+      disables a TAMPI mode, whether the [blocking][tampi blk] or
+      [non-blocking][tampi nonblk] mode, any call to an interface function
+      corresponding to the disabled mode will not appear in the view. Operations
+      that are directly forwarded to MPI (because TAMPI is not asked to apply
+      task-awareness) will not appear.
+
+    - **Polling function**: The TAMPI library can launch internal tasks to
+      execute polling functions in the background. Currently, TAMPI launches a
+      polling task that periodically checks and processes the pending MPI
+      requests generated by task-aware operations. This polling state may not
+      appear if none of the TAMPI modes are enabled by the user application.
+
+- **Communication subsystem**: The running thread is communicating through MPI
+  or issuing an asynchronous communication operation.
+
+  - **Issuing a non-blocking operation**: Issuing a non-blocking MPI operation
+    that can generate an MPI request.
+
+- **Ticket subsystem**: Creation and managing of tickets. A ticket is an
+  internal object that describes the relation between a set of pending MPI
+  requests and the user communication task that is *waiting* (synchronous or
+  asynchronously) on them. A ticket is used for both [blocking][tampi blk] and
+  [non-blocking][tampi nonblk] operations.
+
+    - **Creating a ticket**: Creating a ticket that is linked to a set of MPI
+      requests and a user task. The user task is the task that is *waiting* for
+      these requests to complete. Notice that *waiting* does not mean that the
+      task will synchronously wait for them. The ticket is initialized with a
+      counter of how many requests are still pending. The ticket is completed,
+      and thus, the task is notified, when this counter becomes zero.
+
+    - **Waiting for the ticket completion**: The user task, during a blocking
+      TAMPI operation, is waiting a ticket and its requests to complete. The
+      task may be blocked and yield the CPU meanwhile. Notice that user tasks
+      calling non-blocking TAMPI operations will not enter in this state.
+
+- **Staging queue subsystem**: Queueing and dequeueing requests from the staging
+  queues before being transferred to the global array of requests and tickets.
+  These queues are used to optimize and control insertion of these objects into
+  the global array.
+
+    - **Adding to a queue**: A user communication task running a task-aware
+      TAMPI operation is pushing the corresponding MPI requests and the related
+      ticket into a staging queue.
+
+    - **Transfering from queues to the global array**: The polling task is
+      transferring the staged requests and tickets from the queues to the global
+      array.
+
+- **Global array subsystem**: Managing the per-process global array of tickets
+  and MPI requests related to TAMPI operations.
+
+    - **Checking pending requests**: Testing all pending MPI requests from the
+      global array, processing the completed requests, and reorganizing the
+      array to keep it compacted.
+
+- **Request subsystem**: Management and testing of pending MPI requests, and
+  processing the completed ones. This state considers only the management of MPI
+  requests concerning task-aware operations, which are exclusively tested by the
+  TAMPI library. Any testing function call made by the user application or other
+  libraries is not considered.
+
+    - **Testing a request with MPI_Test**: Testing a single MPI request by
+      calling MPI_Test inside the TAMPI library.
+
+    - **Testing requests with MPI_Testall**: Testing multiple MPI requests by
+      calling MPI_Testall inside the TAMPI library.
+
+    - **Testing requests with MPI_Testsome**: Testing multiple MPI requests by
+      calling MPI_Testsome inside the TAMPI library.
+
+    - **Processing a completed request**: Processing a completed MPI request by
+      decreasing the number of pending requests of the linked ticket. If the
+      ticket does not have any other request to wait, the ticket is completed
+      and the *waiting* task is notified. In such a case, a call to the tasking
+      runtime system will occur. If the operation was [blocking][tampi blk], the
+      *waiting* task will be unblocked and will eventually resume the execution.
+      If the operation was [non-blocking][tampi nonblk], the library will
+      decrease the external events of the *waiting* task.
+
+The figure below shows an example of the subsystem view. The program executes a
+distributed stencil algorithm with MPI and OmpSs-2. There are several MPI
+processes and each process has OmpSs-2 tasks running exlusively on multiple CPU
+resources.
+
+![Subsystem view example](fig/tampi-subsystem.png)
+
+The view show there are several user tasks running task-aware communication
+operations. The light blue areas show when a user task is testing a request that
+was generated by a non-blocking MPI communication function. There is also one
+polling task per process. The yellow areas show when the polling tasks are
+calling MPI_Testsome. Just after the testsome call, the violet areas show the
+moment when the polling task is processing the completed requests.
+
+This view shows that most of the time inside the TAMPI library is spent testing
+requests. This could give us a clue that the underlying MPI library may have
+concurrency issues (e.g., thread contention) when multiple threads try to test
+requests in parallel.
--- a/mkdocs.yml
+++ b/mkdocs.yml
@ -32,6 +32,7 @@ nav:
      - user/emulation/ovni.md
      - user/emulation/nosv.md
      - user/emulation/nanos6.md
+      - user/emulation/tampi.md
      - user/emulation/events.md
  - 'Developer guide':
    - dev/index.md
--- a/src/emu/CMakeLists.txt
+++ b/src/emu/CMakeLists.txt
@ -53,6 +53,8 @@ add_library(emu STATIC
  nosv/event.c
  nodes/setup.c
  nodes/event.c
+  tampi/setup.c
+  tampi/event.c
  kernel/setup.c
  kernel/event.c
 )
--- a/src/emu/emu_prv.h
+++ b/src/emu/emu_prv.h
@ -18,6 +18,7 @@ enum emu_prv_types {
 	PRV_NOSV_APPID       = 12,
 	PRV_NOSV_SUBSYSTEM   = 13,
 	PRV_NOSV_RANK        = 14,
+	PRV_TAMPI_SUBSYSTEM  = 20,
 	PRV_NODES_SUBSYSTEM  = 30,
 	PRV_NANOS6_TASKID    = 35,
 	PRV_NANOS6_TYPE      = 36,
--- a/src/emu/models.c
+++ b/src/emu/models.c
@ -11,6 +11,7 @@ extern struct model_spec model_ovni;
 extern struct model_spec model_nanos6;
 extern struct model_spec model_nosv;
 extern struct model_spec model_nodes;
+extern struct model_spec model_tampi;
 extern struct model_spec model_kernel;

 static struct model_spec *models[] = {
@ -18,6 +19,7 @@ static struct model_spec *models[] = {
 	&model_nanos6,
 	&model_nosv,
 	&model_nodes,
+	&model_tampi,
 	&model_kernel,
 	NULL
 };
--- a/src/emu/tampi/event.c
+++ b/src/emu/tampi/event.c
@ -0,0 +1,111 @@
+/* Copyright (c) 2023 Barcelona Supercomputing Center (BSC)
+ * SPDX-License-Identifier: GPL-3.0-or-later */
+
+#include "tampi_priv.h"
+#include "chan.h"
+#include "common.h"
+#include "emu.h"
+#include "emu_ev.h"
+#include "extend.h"
+#include "model_thread.h"
+#include "thread.h"
+#include "value.h"
+
+enum { PUSH = 1, POP = 2, IGN = 3 };
+
+#define CHSS CH_SUBSYSTEM
+
+static const int ss_table[256][256][3] = {
+	['C'] = {
+		['i'] = { CHSS, PUSH, ST_COMM_ISSUE_NONBLOCKING },
+		['I'] = { CHSS, POP,  ST_COMM_ISSUE_NONBLOCKING },
+	},
+	['G'] = {
+		['c'] = { CHSS, PUSH, ST_GLOBAL_ARRAY_CHECK },
+		['C'] = { CHSS, POP,  ST_GLOBAL_ARRAY_CHECK },
+	},
+	['L'] = {
+		['i'] = { CHSS, PUSH, ST_LIBRARY_INTERFACE },
+		['I'] = { CHSS, POP,  ST_LIBRARY_INTERFACE },
+		['p'] = { CHSS, PUSH, ST_LIBRARY_POLLING },
+		['P'] = { CHSS, POP,  ST_LIBRARY_POLLING },
+	},
+	['Q'] = {
+		['a'] = { CHSS, PUSH, ST_QUEUE_ADD },
+		['A'] = { CHSS, POP,  ST_QUEUE_ADD },
+		['t'] = { CHSS, PUSH, ST_QUEUE_TRANSFER },
+		['T'] = { CHSS, POP,  ST_QUEUE_TRANSFER },
+	},
+	['R'] = {
+		['c'] = { CHSS, PUSH, ST_REQUEST_COMPLETED },
+		['C'] = { CHSS, POP,  ST_REQUEST_COMPLETED },
+		['t'] = { CHSS, PUSH, ST_REQUEST_TEST },
+		['T'] = { CHSS, POP,  ST_REQUEST_TEST },
+		['a'] = { CHSS, PUSH, ST_REQUEST_TESTALL },
+		['A'] = { CHSS, POP,  ST_REQUEST_TESTALL },
+		['s'] = { CHSS, PUSH, ST_REQUEST_TESTSOME },
+		['S'] = { CHSS, POP,  ST_REQUEST_TESTSOME },
+	},
+	['T'] = {
+		['c'] = { CHSS, PUSH, ST_TICKET_CREATE },
+		['C'] = { CHSS, POP,  ST_TICKET_CREATE },
+		['w'] = { CHSS, PUSH, ST_TICKET_WAIT },
+		['W'] = { CHSS, POP,  ST_TICKET_WAIT },
+	},
+};
+
+static int
+process_ev(struct emu *emu)
+{
+	if (!emu->thread->is_running) {
+		err("current thread %d not running", emu->thread->tid);
+		return -1;
+	}
+
+	const int *entry = ss_table[emu->ev->c][emu->ev->v];
+	int chind = entry[0];
+	int action = entry[1];
+	int st = entry[2];
+
+	struct tampi_thread *th = EXT(emu->thread, 'T');
+	struct chan *ch = &th->m.ch[chind];
+
+	if (action == PUSH) {
+		return chan_push(ch, value_int64(st));
+	} else if (action == POP) {
+		return chan_pop(ch, value_int64(st));
+	} else if (action == IGN) {
+		return 0; /* do nothing */
+	}
+
+	err("unknown TAMPI subsystem event");
+	return -1;
+}
+
+int
+model_tampi_event(struct emu *emu)
+{
+	static int enabled = 0;
+
+	if (!enabled) {
+		if (model_tampi_connect(emu) != 0) {
+			err("tampi_connect failed");
+			return -1;
+		}
+		enabled = 1;
+	}
+
+	dbg("in tampi_event");
+	if (emu->ev->m != 'T') {
+		err("unexpected event model %c", emu->ev->m);
+		return -1;
+	}
+
+	dbg("got tampi event %s", emu->ev->mcv);
+	if (process_ev(emu) != 0) {
+		err("error processing TAMPI event");
+		return -1;
+	}
+
+	return 0;
+}
--- a/src/emu/tampi/setup.c
+++ b/src/emu/tampi/setup.c
@ -0,0 +1,215 @@
+/* Copyright (c) 2023 Barcelona Supercomputing Center (BSC)
+ * SPDX-License-Identifier: GPL-3.0-or-later */
+
+#include "tampi_priv.h"
+#include <stddef.h>
+#include "chan.h"
+#include "common.h"
+#include "emu.h"
+#include "emu_args.h"
+#include "emu_prv.h"
+#include "extend.h"
+#include "model.h"
+#include "model_chan.h"
+#include "model_cpu.h"
+#include "model_pvt.h"
+#include "model_thread.h"
+#include "pv/pcf.h"
+#include "pv/prv.h"
+#include "system.h"
+#include "thread.h"
+#include "track.h"
+#include "value.h"
+
+static const char model_name[] = "tampi";
+enum { model_id = 'T' };
+
+struct model_spec model_tampi = {
+	.name = model_name,
+	.model = model_id,
+	.create  = model_tampi_create,
+//	.connect = model_tampi_connect,
+	.event   = model_tampi_event,
+	.probe   = model_tampi_probe,
+	.finish  = model_tampi_finish,
+};
+
+/* ----------------- channels ------------------ */
+
+static const char *chan_name[CH_MAX] = {
+	[CH_SUBSYSTEM] = "subsystem",
+};
+
+static const int chan_stack[CH_MAX] = {
+	[CH_SUBSYSTEM] = 1,
+};
+
+/* ----------------- pvt ------------------ */
+
+static const int pvt_type[CH_MAX] = {
+	[CH_SUBSYSTEM] = PRV_TAMPI_SUBSYSTEM,
+};
+
+static const char *pcf_prefix[CH_MAX] = {
+	[CH_SUBSYSTEM]   = "TAMPI subsystem",
+};
+
+static const struct pcf_value_label tampi_ss_values[] = {
+	{ ST_COMM_ISSUE_NONBLOCKING, "Communication: Issuing a non-blocking operation" },
+	{ ST_GLOBAL_ARRAY_CHECK,     "Global array: Checking pending requests" },
+	{ ST_LIBRARY_INTERFACE,      "Library code: Interface function" },
+	{ ST_LIBRARY_POLLING,        "Library code: Polling function" },
+	{ ST_QUEUE_ADD,              "Queue: Adding to a queue" },
+	{ ST_QUEUE_TRANSFER,         "Queue: Transfering to global array" },
+	{ ST_REQUEST_TEST,           "Request: Testing a request" },
+	{ ST_REQUEST_COMPLETED,      "Request: Processing a completed request" },
+	{ ST_REQUEST_TESTALL,        "Request: Testing all requests" },
+	{ ST_REQUEST_TESTSOME,       "Request: Testing some requests" },
+	{ ST_TICKET_CREATE,          "Ticket: Creating a ticket" },
+	{ ST_TICKET_WAIT,            "Ticket: Waiting a ticket" },
+	{ -1, NULL },
+};
+
+static const struct pcf_value_label *pcf_labels[CH_MAX] = {
+	[CH_SUBSYSTEM] = tampi_ss_values,
+};
+
+static const long prv_flags[CH_MAX] = {
+	[CH_SUBSYSTEM] = PRV_SKIPDUP,
+};
+
+static const struct model_pvt_spec pvt_spec = {
+	.type = pvt_type,
+	.prefix = pcf_prefix,
+	.label = pcf_labels,
+	.flags = prv_flags,
+};
+
+/* ----------------- tracking ------------------ */
+
+static const int th_track[CH_MAX] = {
+	[CH_SUBSYSTEM] = TRACK_TH_ACT,
+};
+
+static const int cpu_track[CH_MAX] = {
+	[CH_SUBSYSTEM] = TRACK_TH_RUN,
+};
+
+/* ----------------- chan_spec ------------------ */
+
+static const struct model_chan_spec th_chan = {
+	.nch = CH_MAX,
+	.prefix = model_name,
+	.ch_names = chan_name,
+	.ch_stack = chan_stack,
+	.pvt = &pvt_spec,
+	.track = th_track,
+};
+
+static const struct model_chan_spec cpu_chan = {
+	.nch = CH_MAX,
+	.prefix = model_name,
+	.ch_names = chan_name,
+	.ch_stack = chan_stack,
+	.pvt = &pvt_spec,
+	.track = cpu_track,
+};
+
+/* ----------------- models ------------------ */
+
+static const struct model_cpu_spec cpu_spec = {
+	.size = sizeof(struct tampi_cpu),
+	.chan = &cpu_chan,
+	.model = &model_tampi,
+};
+
+static const struct model_thread_spec th_spec = {
+	.size = sizeof(struct tampi_thread),
+	.chan = &th_chan,
+	.model = &model_tampi,
+};
+
+/* ----------------------------------------------------- */
+
+int
+model_tampi_probe(struct emu *emu)
+{
+	if (emu->system.nthreads == 0)
+		return 1;
+
+	return 0;
+}
+
+int
+model_tampi_create(struct emu *emu)
+{
+	if (model_thread_create(emu, &th_spec) != 0) {
+		err("model_thread_init failed");
+		return -1;
+	}
+
+	if (model_cpu_create(emu, &cpu_spec) != 0) {
+		err("model_cpu_init failed");
+		return -1;
+	}
+
+	return 0;
+}
+
+int
+model_tampi_connect(struct emu *emu)
+{
+	if (model_thread_connect(emu, &th_spec) != 0) {
+		err("model_thread_connect failed");
+		return -1;
+	}
+
+	if (model_cpu_connect(emu, &cpu_spec) != 0) {
+		err("model_cpu_connect failed");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+end_lint(struct emu *emu)
+{
+	/* Only run the check if we finished the complete trace */
+	if (!emu->finished)
+		return 0;
+
+	struct system *sys = &emu->system;
+
+	/* Ensure we run out of subsystem states */
+	for (struct thread *t = sys->threads; t; t = t->gnext) {
+		struct tampi_thread *th = EXT(t, model_id);
+		struct chan *ch = &th->m.ch[CH_SUBSYSTEM];
+		int stacked = ch->data.stack.n;
+		if (stacked > 0) {
+			struct value top;
+			if (chan_read(ch, &top) != 0) {
+				err("chan_read failed for subsystem");
+				return -1;
+			}
+
+			err("thread %d ended with %d stacked tampi subsystems",
+					t->tid, stacked);
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+int
+model_tampi_finish(struct emu *emu)
+{
+	/* When running in linter mode perform additional checks */
+	if (emu->args.linter_mode && end_lint(emu) != 0) {
+		err("end_lint failed");
+		return -1;
+	}
+
+	return 0;
+}
--- a/src/emu/tampi/tampi_priv.h
+++ b/src/emu/tampi/tampi_priv.h
@ -0,0 +1,47 @@
+/* Copyright (c) 2023 Barcelona Supercomputing Center (BSC)
+ * SPDX-License-Identifier: GPL-3.0-or-later */
+
+#ifndef TAMPI_PRIV_H
+#define TAMPI_PRIV_H
+
+#include "emu.h"
+#include "model_cpu.h"
+#include "model_thread.h"
+
+/* Private enums */
+
+enum tampi_chan {
+	CH_SUBSYSTEM = 0,
+	CH_MAX,
+};
+
+enum tampi_ss_values {
+	ST_COMM_ISSUE_NONBLOCKING = 1,
+	ST_GLOBAL_ARRAY_CHECK,
+	ST_LIBRARY_INTERFACE,
+	ST_LIBRARY_POLLING,
+	ST_QUEUE_ADD,
+	ST_QUEUE_TRANSFER,
+	ST_REQUEST_COMPLETED,
+	ST_REQUEST_TEST,
+	ST_REQUEST_TESTALL,
+	ST_REQUEST_TESTSOME,
+	ST_TICKET_CREATE,
+	ST_TICKET_WAIT,
+};
+
+struct tampi_thread {
+	struct model_thread m;
+};
+
+struct tampi_cpu {
+	struct model_cpu m;
+};
+
+int model_tampi_probe(struct emu *emu);
+int model_tampi_create(struct emu *emu);
+int model_tampi_connect(struct emu *emu);
+int model_tampi_event(struct emu *emu);
+int model_tampi_finish(struct emu *emu);
+
+#endif /* TAMPI_PRIV_H */