From dbea90f5253ca717e9f54dadb96cfed44ebe8689 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Date: Thu, 2 Feb 2023 14:02:41 +0100 Subject: [PATCH] Fail if the streams clock is too far apart The current gate is set to 1 hour and it only checks the first event in the stream. Added a test too. --- src/emu/player.c | 43 ++++++++++++++++++++ src/emu/system.c | 3 -- test/emu/ovni/CMakeLists.txt | 1 + test/emu/ovni/clockgate.c | 76 ++++++++++++++++++++++++++++++++++++ 4 files changed, 120 insertions(+), 3 deletions(-) create mode 100644 test/emu/ovni/clockgate.c diff --git a/src/emu/player.c b/src/emu/player.c index 3335576..91d3b82 100644 --- a/src/emu/player.c +++ b/src/emu/player.c @@ -53,6 +53,42 @@ step_stream(struct player *player, struct stream *stream) return 0; } +static int +check_clock_gate(struct trace *trace) +{ + /* 1 hour in nanoseconds */ + int64_t maxgate = 3600LL * 1000LL * 1000LL * 1000LL; + int64_t t0 = 0LL; + int first = 1; + int ret = 0; + + struct stream *stream; + DL_FOREACH(trace->streams, stream) { + struct ovni_ev *oev = stream_ev(stream); + int64_t sclock = stream_evclock(stream, oev); + + if (first) { + first = 0; + t0 = sclock; + } + + int64_t delta = llabs(t0 - sclock); + if (delta > maxgate) { + double hdelta = ((double) delta) / (3600.0 * 1e9); + err("stream %s has starting clock too far: delta=%.2f h", + stream->relpath, hdelta); + ret = -1; + } + } + + if (ret != 0) { + err("detected large clock gate, run 'ovnisync' to set the offsets"); + return -1; + } + + return 0; +} + int player_init(struct player *player, struct trace *trace) { @@ -76,6 +112,13 @@ player_init(struct player *player, struct trace *trace) } } + /* Ensure the first event sclocks are not too far apart. Otherwise an + * offset table is mandatory. */ + if (check_clock_gate(trace) != 0) { + err("check_clock_gate failed\n"); + return -1; + } + return 0; } diff --git a/src/emu/system.c b/src/emu/system.c index b7746f3..8cbfb74 100644 --- a/src/emu/system.c +++ b/src/emu/system.c @@ -406,9 +406,6 @@ init_offsets(struct system *sys, struct trace *trace) if (n == 0 && sys->nlooms > 1) { err("warning: no clock offset file loaded with %ld looms", sys->nlooms); - - if (sys->args->linter_mode) - abort(); } for (int i = 0; i < n; i++) { diff --git a/test/emu/ovni/CMakeLists.txt b/test/emu/ovni/CMakeLists.txt index b6f281a..7ce765a 100644 --- a/test/emu/ovni/CMakeLists.txt +++ b/test/emu/ovni/CMakeLists.txt @@ -16,3 +16,4 @@ ovni_test(mp-simple.c MP) ovni_test(mp-rank.c MP) ovni_test(version-good.c) ovni_test(version-bad.c SHOULD_FAIL REGEX "version mismatch") +ovni_test(clockgate.c MP SHOULD_FAIL REGEX "detected large clock gate") diff --git a/test/emu/ovni/clockgate.c b/test/emu/ovni/clockgate.c new file mode 100644 index 0000000..cc11885 --- /dev/null +++ b/test/emu/ovni/clockgate.c @@ -0,0 +1,76 @@ +/* Copyright (c) 2021-2023 Barcelona Supercomputing Center (BSC) + * SPDX-License-Identifier: GPL-3.0-or-later */ + +#define _POSIX_C_SOURCE 200112L +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common.h" +#include "compat.h" +#include "ovni.h" + +int64_t delta = 0LL; + +static void +thread_execute_delayed(int32_t cpu, int32_t creator_tid, uint64_t tag) +{ + struct ovni_ev ev = {0}; + ovni_ev_set_mcv(&ev, "OHx"); + ovni_ev_set_clock(&ev, ovni_clock_now() + delta); + ovni_payload_add(&ev, (uint8_t *) &cpu, sizeof(cpu)); + ovni_payload_add(&ev, (uint8_t *) &creator_tid, sizeof(creator_tid)); + ovni_payload_add(&ev, (uint8_t *) &tag, sizeof(tag)); + ovni_ev_emit(&ev); +} + +static inline void +start_delayed(int rank, int nranks) +{ + char hostname[OVNI_MAX_HOSTNAME]; + char rankname[OVNI_MAX_HOSTNAME + 64]; + + if (gethostname(hostname, HOST_NAME_MAX) != 0) + die("gethostname failed"); + + sprintf(rankname, "%s.%d", hostname, rank); + + ovni_version_check(); + ovni_proc_init(1, rankname, getpid()); + ovni_proc_set_rank(rank, nranks); + ovni_thread_init(gettid()); + + /* All ranks inform CPUs */ + for (int i = 0; i < nranks; i++) + ovni_add_cpu(i, i); + + int curcpu = rank; + + dbg("thread %d has cpu %d (ncpus=%d)\n", + gettid(), curcpu, nranks); + + delta = ((int64_t) rank) * 2LL * 3600LL * 1000LL * 1000LL * 1000LL; + thread_execute_delayed(curcpu, -1, 0); +} + +int +main(void) +{ + int rank = atoi(getenv("OVNI_RANK")); + int nranks = atoi(getenv("OVNI_NRANKS")); + + start_delayed(rank, nranks); + + ovni_flush(); + ovni_thread_free(); + ovni_proc_fini(); + + return 0; +}