Fail if the streams clock is too far apart

The current gate is set to 1 hour and it only checks the first event in
the stream. Added a test too.
This commit is contained in:
Rodrigo Arias 2023-02-02 14:02:41 +01:00 committed by Rodrigo Arias Mallo
parent c5aa784caa
commit dbea90f525
4 changed files with 120 additions and 3 deletions

View File

@ -53,6 +53,42 @@ step_stream(struct player *player, struct stream *stream)
return 0;
}
static int
check_clock_gate(struct trace *trace)
{
/* 1 hour in nanoseconds */
int64_t maxgate = 3600LL * 1000LL * 1000LL * 1000LL;
int64_t t0 = 0LL;
int first = 1;
int ret = 0;
struct stream *stream;
DL_FOREACH(trace->streams, stream) {
struct ovni_ev *oev = stream_ev(stream);
int64_t sclock = stream_evclock(stream, oev);
if (first) {
first = 0;
t0 = sclock;
}
int64_t delta = llabs(t0 - sclock);
if (delta > maxgate) {
double hdelta = ((double) delta) / (3600.0 * 1e9);
err("stream %s has starting clock too far: delta=%.2f h",
stream->relpath, hdelta);
ret = -1;
}
}
if (ret != 0) {
err("detected large clock gate, run 'ovnisync' to set the offsets");
return -1;
}
return 0;
}
int
player_init(struct player *player, struct trace *trace)
{
@ -76,6 +112,13 @@ player_init(struct player *player, struct trace *trace)
}
}
/* Ensure the first event sclocks are not too far apart. Otherwise an
* offset table is mandatory. */
if (check_clock_gate(trace) != 0) {
err("check_clock_gate failed\n");
return -1;
}
return 0;
}

View File

@ -406,9 +406,6 @@ init_offsets(struct system *sys, struct trace *trace)
if (n == 0 && sys->nlooms > 1) {
err("warning: no clock offset file loaded with %ld looms",
sys->nlooms);
if (sys->args->linter_mode)
abort();
}
for (int i = 0; i < n; i++) {

View File

@ -16,3 +16,4 @@ ovni_test(mp-simple.c MP)
ovni_test(mp-rank.c MP)
ovni_test(version-good.c)
ovni_test(version-bad.c SHOULD_FAIL REGEX "version mismatch")
ovni_test(clockgate.c MP SHOULD_FAIL REGEX "detected large clock gate")

76
test/emu/ovni/clockgate.c Normal file
View File

@ -0,0 +1,76 @@
/* Copyright (c) 2021-2023 Barcelona Supercomputing Center (BSC)
* SPDX-License-Identifier: GPL-3.0-or-later */
#define _POSIX_C_SOURCE 200112L
#define _GNU_SOURCE
#include <limits.h>
#include <linux/limits.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <unistd.h>
#include "common.h"
#include "compat.h"
#include "ovni.h"
int64_t delta = 0LL;
static void
thread_execute_delayed(int32_t cpu, int32_t creator_tid, uint64_t tag)
{
struct ovni_ev ev = {0};
ovni_ev_set_mcv(&ev, "OHx");
ovni_ev_set_clock(&ev, ovni_clock_now() + delta);
ovni_payload_add(&ev, (uint8_t *) &cpu, sizeof(cpu));
ovni_payload_add(&ev, (uint8_t *) &creator_tid, sizeof(creator_tid));
ovni_payload_add(&ev, (uint8_t *) &tag, sizeof(tag));
ovni_ev_emit(&ev);
}
static inline void
start_delayed(int rank, int nranks)
{
char hostname[OVNI_MAX_HOSTNAME];
char rankname[OVNI_MAX_HOSTNAME + 64];
if (gethostname(hostname, HOST_NAME_MAX) != 0)
die("gethostname failed");
sprintf(rankname, "%s.%d", hostname, rank);
ovni_version_check();
ovni_proc_init(1, rankname, getpid());
ovni_proc_set_rank(rank, nranks);
ovni_thread_init(gettid());
/* All ranks inform CPUs */
for (int i = 0; i < nranks; i++)
ovni_add_cpu(i, i);
int curcpu = rank;
dbg("thread %d has cpu %d (ncpus=%d)\n",
gettid(), curcpu, nranks);
delta = ((int64_t) rank) * 2LL * 3600LL * 1000LL * 1000LL * 1000LL;
thread_execute_delayed(curcpu, -1, 0);
}
int
main(void)
{
int rank = atoi(getenv("OVNI_RANK"));
int nranks = atoi(getenv("OVNI_NRANKS"));
start_delayed(rank, nranks);
ovni_flush();
ovni_thread_free();
ovni_proc_fini();
return 0;
}