Don't modify nOS-V subsystem state on task pause

In nOS-V, when a task was paused via the VTp event, two things were
happening: 1) the task state was set to pause and 2) the subsystem state
"Task: Running" was being popped.

This causes a problem when a task calls nosv_submit() in blocking mode,
as it will call nosv_pause() which will emit a VTp event from a
subsystem different than "Task: Running".

To solve this conflict, we handle the subsystems state and the task
state separately with the VTp and VTr events. The subsystem state "Task:
Running" no longer is connected to the state of the task and only shows
if we entered the body of the task or not. It has now been renamed to
"Task: In body".

The new state "Task: In body" represents that the task body has begun
the execution and is still in the stack, but the task may be paused. The
subsystem is not changed by the VTp (pause) or VTr (resume) events.

Fixes: https://pm.bsc.es/gitlab/rarias/ovni/-/issues/128
This commit is contained in:
Rodrigo Arias 2023-07-27 19:21:19 +02:00 committed by Rodrigo Arias Mallo
parent 0df018cf5f
commit 4b4f1bd218
8 changed files with 124 additions and 46 deletions

View File

@ -11,6 +11,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Add `OVNI_TRACEDIR` envar to change the trace directory (default is `ovni`). - Add `OVNI_TRACEDIR` envar to change the trace directory (default is `ovni`).
### Changed
- Don't modify nOS-V subsystem state on task pause. The "Task: Running"
state is now renamed to "Task: In body" to reflect the change.
## [1.3.0] - 2023-09-07 ## [1.3.0] - 2023-09-07
### Added ### Added

View File

@ -35,8 +35,8 @@ OU] Exits the region of past events (HACK)
-------------------- nOS-V (model=V) ---------------------- -------------------- nOS-V (model=V) ----------------------
VTc Creates a new task (punctual event) VTc Creates a new task (punctual event)
VTx Task execute VTx Task execute (enter task body)
VTe Task end VTe Task end (exit task body)
VTp Task pause VTp Task pause
VTr Task resume VTr Task resume

View File

@ -22,3 +22,9 @@ provides two desirable properties:
For more details, see [this MR][1]. For more details, see [this MR][1].
[1]: https://pm.bsc.es/gitlab/rarias/ovni/-/merge_requests/27 [1]: https://pm.bsc.es/gitlab/rarias/ovni/-/merge_requests/27
## Subsystem view
The subsystem view provides a simplified view on what is the nOS-V
runtime doing over time. The view follows the same rules described in
the [subsystem view of Nanos6](../nanos6/#subsystem_view).

View File

@ -120,11 +120,6 @@ chan_task_stopped(struct emu *emu)
} }
} }
if (chan_pop(&ch[CH_SUBSYSTEM], value_int64(ST_TASK_RUNNING)) != 0) {
err("chan_pop subsystem failed");
return -1;
}
return 0; return 0;
} }
@ -167,16 +162,12 @@ chan_task_running(struct emu *emu, struct task *task)
return -1; return -1;
} }
} }
if (chan_push(&ch[CH_SUBSYSTEM], value_int64(ST_TASK_RUNNING)) != 0) {
err("chan_push subsystem failed");
return -1;
}
return 0; return 0;
} }
static int static int
chan_task_switch(struct emu *emu, struct task *prev, struct task *next, char tr) chan_task_switch(struct emu *emu, struct task *prev, struct task *next)
{ {
struct nosv_thread *th = EXT(emu->thread, 'V'); struct nosv_thread *th = EXT(emu->thread, 'V');
struct chan *ch = th->m.ch; struct chan *ch = th->m.ch;
@ -227,20 +218,6 @@ chan_task_switch(struct emu *emu, struct task *prev, struct task *next, char tr)
} }
} }
/* Update the subsystem depending if we are pushing (X) or
* poping (E) a task from the stack */
if (tr == 'X') {
if (chan_push(&ch[CH_SUBSYSTEM], value_int64(ST_TASK_RUNNING)) != 0) {
err("chan_push subsystem failed");
return -1;
}
} else if (tr == 'E') {
if (chan_pop(&ch[CH_SUBSYSTEM], value_int64(ST_TASK_RUNNING)) != 0) {
err("chan_pop subsystem failed");
return -1;
}
}
return 0; return 0;
} }
@ -331,7 +308,7 @@ update_task_channels(struct emu *emu, char tr, struct task *prev, struct task *n
/* Additional nested transitions */ /* Additional nested transitions */
case 'X': case 'X':
case 'E': case 'E':
ret = chan_task_switch(emu, prev, next, tr); ret = chan_task_switch(emu, prev, next);
break; break;
default: default:
err("unexpected transition value %c", tr); err("unexpected transition value %c", tr);
@ -347,22 +324,8 @@ update_task_channels(struct emu *emu, char tr, struct task *prev, struct task *n
} }
static int static int
enforce_task_rules(struct emu *emu, char tr, struct task_stack *stack, struct task *next) enforce_task_rules(struct emu *emu, char tr, struct task *next)
{ {
struct nosv_thread *th = EXT(emu->thread, 'V');
struct value ss;
if (chan_read(&th->m.ch[CH_SUBSYSTEM], &ss) != 0) {
err("chan_read failed");
return -1;
}
/* If there is no task in the stack, the subsystem must not be
* running a task */
if (stack->top == NULL && ss.type == VALUE_INT64 && ss.i == ST_TASK_RUNNING) {
err("task stack empty but subsystem is Task running");
return -1;
}
if (tr != 'x' && tr != 'X') if (tr != 'x' && tr != 'X')
return 0; return 0;
@ -374,7 +337,14 @@ enforce_task_rules(struct emu *emu, char tr, struct task_stack *stack, struct ta
return -1; return -1;
} }
if (ss.type == VALUE_INT64 && ss.i != ST_TASK_RUNNING) { struct nosv_thread *th = EXT(emu->thread, 'V');
struct value ss;
if (chan_read(&th->m.ch[CH_SUBSYSTEM], &ss) != 0) {
err("chan_read failed");
return -1;
}
if (ss.type == VALUE_INT64 && ss.i != ST_TASK_BODY) {
err("wrong subsystem state on task begin"); err("wrong subsystem state on task begin");
return -1; return -1;
} }
@ -382,6 +352,28 @@ enforce_task_rules(struct emu *emu, char tr, struct task_stack *stack, struct ta
return 0; return 0;
} }
static int
update_task_ss_channel(struct emu *emu, char tr)
{
struct nosv_thread *th = EXT(emu->thread, 'V');
struct chan *ss = &th->m.ch[CH_SUBSYSTEM];
/* Only if starting or ending. Not changed when paused or resumed */
if (tr == 'x') {
if (chan_push(ss, value_int64(ST_TASK_BODY)) != 0) {
err("chan_push subsystem failed");
return -1;
}
} else if (tr == 'e') {
if (chan_pop(ss, value_int64(ST_TASK_BODY)) != 0) {
err("chan_pop subsystem failed");
return -1;
}
}
return 0;
}
static int static int
update_task(struct emu *emu) update_task(struct emu *emu)
{ {
@ -398,6 +390,12 @@ update_task(struct emu *emu)
struct task *next = task_get_running(stack); struct task *next = task_get_running(stack);
/* Update the subsystem channel */
if (update_task_ss_channel(emu, emu->ev->v) != 0) {
err("update_task_ss_channel failed");
return -1;
}
char tr; char tr;
int was_running = (prev != NULL); int was_running = (prev != NULL);
int runs_now = (next != NULL); int runs_now = (next != NULL);
@ -412,7 +410,7 @@ update_task(struct emu *emu)
return -1; return -1;
} }
if (enforce_task_rules(emu, tr, stack, next) != 0) { if (enforce_task_rules(emu, tr, next) != 0) {
err("enforce_task_rules failed"); err("enforce_task_rules failed");
return -1; return -1;
} }

View File

@ -26,7 +26,7 @@ enum nosv_ss_values {
ST_SCHED_SUBMITTING, ST_SCHED_SUBMITTING,
ST_MEM_ALLOCATING, ST_MEM_ALLOCATING,
ST_MEM_FREEING, ST_MEM_FREEING,
ST_TASK_RUNNING, ST_TASK_BODY,
ST_API_CREATE, ST_API_CREATE,
ST_API_DESTROY, ST_API_DESTROY,
ST_API_SUBMIT, ST_API_SUBMIT,

View File

@ -83,7 +83,7 @@ static const struct pcf_value_label nosv_ss_values[] = {
{ ST_SCHED_SUBMITTING, "Scheduler: Submitting" }, { ST_SCHED_SUBMITTING, "Scheduler: Submitting" },
{ ST_MEM_ALLOCATING, "Memory: Allocating" }, { ST_MEM_ALLOCATING, "Memory: Allocating" },
{ ST_MEM_FREEING, "Memory: Freeing" }, { ST_MEM_FREEING, "Memory: Freeing" },
{ ST_TASK_RUNNING, "Task: Running" }, { ST_TASK_BODY, "Task: In body" },
{ ST_API_CREATE, "API: Create" }, { ST_API_CREATE, "API: Create" },
{ ST_API_DESTROY, "API: Destroy" }, { ST_API_DESTROY, "API: Destroy" },
{ ST_API_SUBMIT, "API: Submit" }, { ST_API_SUBMIT, "API: Submit" },

View File

@ -9,3 +9,4 @@ test_emu(pause.c MP)
test_emu(mp-rank.c MP) test_emu(mp-rank.c MP)
test_emu(switch-same-type.c) test_emu(switch-same-type.c)
test_emu(multiple-segment.c MP NPROC 4) test_emu(multiple-segment.c MP NPROC 4)
test_emu(task-pause-from-submit.c)

View File

@ -0,0 +1,68 @@
/* Copyright (c) 2023 Barcelona Supercomputing Center (BSC)
* SPDX-License-Identifier: GPL-3.0-or-later */
#include <stdint.h>
#include <stdio.h>
#include "common.h"
#include "emu_prv.h"
#include "instr.h"
#include "instr_nosv.h"
#include "emu/nosv/nosv_priv.h"
int
main(void)
{
/* Tests the case where a task is paused from nosv_submit():
*
* A VTp event causes a pop of the subsystem channel, which is expected
* to be ST_TASK_RUNNING. This is not true in the case of a blocking
* submit (NOSV_SUBMIT_BLOCKING), which calls nosv_pause from inside
* nosv_submit, causing the transition to happen from the unexpected
* ST_API_SUBMIT state.
*/
instr_start(0, 1);
/* Match the PRV line in the trace */
FILE *f = fopen("match.sh", "w");
if (f == NULL)
die("fopen failed:");
uint32_t typeid = 100;
instr_nosv_type_create(typeid);
instr_nosv_task_create(1, typeid);
instr_nosv_task_execute(1);
/* When starting a task, it must cause the subsystems to enter
* the "Task: In body" state */
int prvtype = PRV_NOSV_SUBSYSTEM;
int st = ST_TASK_BODY;
fprintf(f, "grep ':%ld:%d:%d$' ovni/thread.prv\n", get_delta(), prvtype, st);
fprintf(f, "grep ':%ld:%d:%d$' ovni/cpu.prv\n", get_delta(), prvtype, st);
instr_nosv_submit_enter(); /* Blocking submit */
instr_nosv_task_pause(1);
/* Should be left in the submit state, so no state transition in
* subsystems view */
fprintf(f, "! grep ':%ld:%d:[0-9]*$' ovni/thread.prv\n", get_delta(), prvtype);
fprintf(f, "! grep ':%ld:%d:[0-9]*$' ovni/cpu.prv\n", get_delta(), prvtype);
/* But the task state must be set to pause, so the task id
* must be null */
prvtype = PRV_NOSV_TASKID;
fprintf(f, "grep ':%ld:%d:0$' ovni/thread.prv\n", get_delta(), prvtype);
fprintf(f, "grep ':%ld:%d:0$' ovni/cpu.prv\n", get_delta(), prvtype);
instr_nosv_submit_exit();
instr_nosv_task_resume(1);
instr_nosv_task_end(1);
fclose(f);
instr_end();
return 0;
}