Don't modify nOS-V subsystem state on task pause
In nOS-V, when a task was paused via the VTp event, two things were happening: 1) the task state was set to pause and 2) the subsystem state "Task: Running" was being popped. This causes a problem when a task calls nosv_submit() in blocking mode, as it will call nosv_pause() which will emit a VTp event from a subsystem different than "Task: Running". To solve this conflict, we handle the subsystems state and the task state separately with the VTp and VTr events. The subsystem state "Task: Running" no longer is connected to the state of the task and only shows if we entered the body of the task or not. It has now been renamed to "Task: In body". The new state "Task: In body" represents that the task body has begun the execution and is still in the stack, but the task may be paused. The subsystem is not changed by the VTp (pause) or VTr (resume) events. Fixes: https://pm.bsc.es/gitlab/rarias/ovni/-/issues/128
This commit is contained in:
parent
0df018cf5f
commit
4b4f1bd218
@ -11,6 +11,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
|
||||
- Add `OVNI_TRACEDIR` envar to change the trace directory (default is `ovni`).
|
||||
|
||||
### Changed
|
||||
|
||||
- Don't modify nOS-V subsystem state on task pause. The "Task: Running"
|
||||
state is now renamed to "Task: In body" to reflect the change.
|
||||
|
||||
## [1.3.0] - 2023-09-07
|
||||
|
||||
### Added
|
||||
|
@ -35,8 +35,8 @@ OU] Exits the region of past events (HACK)
|
||||
-------------------- nOS-V (model=V) ----------------------
|
||||
|
||||
VTc Creates a new task (punctual event)
|
||||
VTx Task execute
|
||||
VTe Task end
|
||||
VTx Task execute (enter task body)
|
||||
VTe Task end (exit task body)
|
||||
VTp Task pause
|
||||
VTr Task resume
|
||||
|
||||
|
@ -22,3 +22,9 @@ provides two desirable properties:
|
||||
For more details, see [this MR][1].
|
||||
|
||||
[1]: https://pm.bsc.es/gitlab/rarias/ovni/-/merge_requests/27
|
||||
|
||||
## Subsystem view
|
||||
|
||||
The subsystem view provides a simplified view on what is the nOS-V
|
||||
runtime doing over time. The view follows the same rules described in
|
||||
the [subsystem view of Nanos6](../nanos6/#subsystem_view).
|
||||
|
@ -120,11 +120,6 @@ chan_task_stopped(struct emu *emu)
|
||||
}
|
||||
}
|
||||
|
||||
if (chan_pop(&ch[CH_SUBSYSTEM], value_int64(ST_TASK_RUNNING)) != 0) {
|
||||
err("chan_pop subsystem failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -167,16 +162,12 @@ chan_task_running(struct emu *emu, struct task *task)
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
if (chan_push(&ch[CH_SUBSYSTEM], value_int64(ST_TASK_RUNNING)) != 0) {
|
||||
err("chan_push subsystem failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
chan_task_switch(struct emu *emu, struct task *prev, struct task *next, char tr)
|
||||
chan_task_switch(struct emu *emu, struct task *prev, struct task *next)
|
||||
{
|
||||
struct nosv_thread *th = EXT(emu->thread, 'V');
|
||||
struct chan *ch = th->m.ch;
|
||||
@ -227,20 +218,6 @@ chan_task_switch(struct emu *emu, struct task *prev, struct task *next, char tr)
|
||||
}
|
||||
}
|
||||
|
||||
/* Update the subsystem depending if we are pushing (X) or
|
||||
* poping (E) a task from the stack */
|
||||
if (tr == 'X') {
|
||||
if (chan_push(&ch[CH_SUBSYSTEM], value_int64(ST_TASK_RUNNING)) != 0) {
|
||||
err("chan_push subsystem failed");
|
||||
return -1;
|
||||
}
|
||||
} else if (tr == 'E') {
|
||||
if (chan_pop(&ch[CH_SUBSYSTEM], value_int64(ST_TASK_RUNNING)) != 0) {
|
||||
err("chan_pop subsystem failed");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -331,7 +308,7 @@ update_task_channels(struct emu *emu, char tr, struct task *prev, struct task *n
|
||||
/* Additional nested transitions */
|
||||
case 'X':
|
||||
case 'E':
|
||||
ret = chan_task_switch(emu, prev, next, tr);
|
||||
ret = chan_task_switch(emu, prev, next);
|
||||
break;
|
||||
default:
|
||||
err("unexpected transition value %c", tr);
|
||||
@ -347,22 +324,8 @@ update_task_channels(struct emu *emu, char tr, struct task *prev, struct task *n
|
||||
}
|
||||
|
||||
static int
|
||||
enforce_task_rules(struct emu *emu, char tr, struct task_stack *stack, struct task *next)
|
||||
enforce_task_rules(struct emu *emu, char tr, struct task *next)
|
||||
{
|
||||
struct nosv_thread *th = EXT(emu->thread, 'V');
|
||||
struct value ss;
|
||||
if (chan_read(&th->m.ch[CH_SUBSYSTEM], &ss) != 0) {
|
||||
err("chan_read failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* If there is no task in the stack, the subsystem must not be
|
||||
* running a task */
|
||||
if (stack->top == NULL && ss.type == VALUE_INT64 && ss.i == ST_TASK_RUNNING) {
|
||||
err("task stack empty but subsystem is Task running");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (tr != 'x' && tr != 'X')
|
||||
return 0;
|
||||
|
||||
@ -374,7 +337,14 @@ enforce_task_rules(struct emu *emu, char tr, struct task_stack *stack, struct ta
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (ss.type == VALUE_INT64 && ss.i != ST_TASK_RUNNING) {
|
||||
struct nosv_thread *th = EXT(emu->thread, 'V');
|
||||
struct value ss;
|
||||
if (chan_read(&th->m.ch[CH_SUBSYSTEM], &ss) != 0) {
|
||||
err("chan_read failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (ss.type == VALUE_INT64 && ss.i != ST_TASK_BODY) {
|
||||
err("wrong subsystem state on task begin");
|
||||
return -1;
|
||||
}
|
||||
@ -382,6 +352,28 @@ enforce_task_rules(struct emu *emu, char tr, struct task_stack *stack, struct ta
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
update_task_ss_channel(struct emu *emu, char tr)
|
||||
{
|
||||
struct nosv_thread *th = EXT(emu->thread, 'V');
|
||||
struct chan *ss = &th->m.ch[CH_SUBSYSTEM];
|
||||
|
||||
/* Only if starting or ending. Not changed when paused or resumed */
|
||||
if (tr == 'x') {
|
||||
if (chan_push(ss, value_int64(ST_TASK_BODY)) != 0) {
|
||||
err("chan_push subsystem failed");
|
||||
return -1;
|
||||
}
|
||||
} else if (tr == 'e') {
|
||||
if (chan_pop(ss, value_int64(ST_TASK_BODY)) != 0) {
|
||||
err("chan_pop subsystem failed");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
update_task(struct emu *emu)
|
||||
{
|
||||
@ -398,6 +390,12 @@ update_task(struct emu *emu)
|
||||
|
||||
struct task *next = task_get_running(stack);
|
||||
|
||||
/* Update the subsystem channel */
|
||||
if (update_task_ss_channel(emu, emu->ev->v) != 0) {
|
||||
err("update_task_ss_channel failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
char tr;
|
||||
int was_running = (prev != NULL);
|
||||
int runs_now = (next != NULL);
|
||||
@ -412,7 +410,7 @@ update_task(struct emu *emu)
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (enforce_task_rules(emu, tr, stack, next) != 0) {
|
||||
if (enforce_task_rules(emu, tr, next) != 0) {
|
||||
err("enforce_task_rules failed");
|
||||
return -1;
|
||||
}
|
||||
|
@ -26,7 +26,7 @@ enum nosv_ss_values {
|
||||
ST_SCHED_SUBMITTING,
|
||||
ST_MEM_ALLOCATING,
|
||||
ST_MEM_FREEING,
|
||||
ST_TASK_RUNNING,
|
||||
ST_TASK_BODY,
|
||||
ST_API_CREATE,
|
||||
ST_API_DESTROY,
|
||||
ST_API_SUBMIT,
|
||||
|
@ -83,7 +83,7 @@ static const struct pcf_value_label nosv_ss_values[] = {
|
||||
{ ST_SCHED_SUBMITTING, "Scheduler: Submitting" },
|
||||
{ ST_MEM_ALLOCATING, "Memory: Allocating" },
|
||||
{ ST_MEM_FREEING, "Memory: Freeing" },
|
||||
{ ST_TASK_RUNNING, "Task: Running" },
|
||||
{ ST_TASK_BODY, "Task: In body" },
|
||||
{ ST_API_CREATE, "API: Create" },
|
||||
{ ST_API_DESTROY, "API: Destroy" },
|
||||
{ ST_API_SUBMIT, "API: Submit" },
|
||||
|
@ -9,3 +9,4 @@ test_emu(pause.c MP)
|
||||
test_emu(mp-rank.c MP)
|
||||
test_emu(switch-same-type.c)
|
||||
test_emu(multiple-segment.c MP NPROC 4)
|
||||
test_emu(task-pause-from-submit.c)
|
||||
|
68
test/emu/nosv/task-pause-from-submit.c
Normal file
68
test/emu/nosv/task-pause-from-submit.c
Normal file
@ -0,0 +1,68 @@
|
||||
/* Copyright (c) 2023 Barcelona Supercomputing Center (BSC)
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include "common.h"
|
||||
#include "emu_prv.h"
|
||||
#include "instr.h"
|
||||
#include "instr_nosv.h"
|
||||
#include "emu/nosv/nosv_priv.h"
|
||||
|
||||
int
|
||||
main(void)
|
||||
{
|
||||
/* Tests the case where a task is paused from nosv_submit():
|
||||
*
|
||||
* A VTp event causes a pop of the subsystem channel, which is expected
|
||||
* to be ST_TASK_RUNNING. This is not true in the case of a blocking
|
||||
* submit (NOSV_SUBMIT_BLOCKING), which calls nosv_pause from inside
|
||||
* nosv_submit, causing the transition to happen from the unexpected
|
||||
* ST_API_SUBMIT state.
|
||||
*/
|
||||
|
||||
instr_start(0, 1);
|
||||
|
||||
/* Match the PRV line in the trace */
|
||||
FILE *f = fopen("match.sh", "w");
|
||||
if (f == NULL)
|
||||
die("fopen failed:");
|
||||
|
||||
uint32_t typeid = 100;
|
||||
instr_nosv_type_create(typeid);
|
||||
|
||||
instr_nosv_task_create(1, typeid);
|
||||
|
||||
instr_nosv_task_execute(1);
|
||||
|
||||
/* When starting a task, it must cause the subsystems to enter
|
||||
* the "Task: In body" state */
|
||||
int prvtype = PRV_NOSV_SUBSYSTEM;
|
||||
int st = ST_TASK_BODY;
|
||||
fprintf(f, "grep ':%ld:%d:%d$' ovni/thread.prv\n", get_delta(), prvtype, st);
|
||||
fprintf(f, "grep ':%ld:%d:%d$' ovni/cpu.prv\n", get_delta(), prvtype, st);
|
||||
|
||||
instr_nosv_submit_enter(); /* Blocking submit */
|
||||
instr_nosv_task_pause(1);
|
||||
|
||||
/* Should be left in the submit state, so no state transition in
|
||||
* subsystems view */
|
||||
fprintf(f, "! grep ':%ld:%d:[0-9]*$' ovni/thread.prv\n", get_delta(), prvtype);
|
||||
fprintf(f, "! grep ':%ld:%d:[0-9]*$' ovni/cpu.prv\n", get_delta(), prvtype);
|
||||
|
||||
/* But the task state must be set to pause, so the task id
|
||||
* must be null */
|
||||
prvtype = PRV_NOSV_TASKID;
|
||||
fprintf(f, "grep ':%ld:%d:0$' ovni/thread.prv\n", get_delta(), prvtype);
|
||||
fprintf(f, "grep ':%ld:%d:0$' ovni/cpu.prv\n", get_delta(), prvtype);
|
||||
|
||||
instr_nosv_submit_exit();
|
||||
instr_nosv_task_resume(1);
|
||||
instr_nosv_task_end(1);
|
||||
|
||||
fclose(f);
|
||||
|
||||
instr_end();
|
||||
|
||||
return 0;
|
||||
}
|
Loading…
Reference in New Issue
Block a user