Fail if the streams clock is too far apart
The current gate is set to 1 hour and it only checks the first event in the stream. Added a test too.
This commit is contained in:
parent
c5aa784caa
commit
dbea90f525
@ -53,6 +53,42 @@ step_stream(struct player *player, struct stream *stream)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
check_clock_gate(struct trace *trace)
|
||||||
|
{
|
||||||
|
/* 1 hour in nanoseconds */
|
||||||
|
int64_t maxgate = 3600LL * 1000LL * 1000LL * 1000LL;
|
||||||
|
int64_t t0 = 0LL;
|
||||||
|
int first = 1;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
struct stream *stream;
|
||||||
|
DL_FOREACH(trace->streams, stream) {
|
||||||
|
struct ovni_ev *oev = stream_ev(stream);
|
||||||
|
int64_t sclock = stream_evclock(stream, oev);
|
||||||
|
|
||||||
|
if (first) {
|
||||||
|
first = 0;
|
||||||
|
t0 = sclock;
|
||||||
|
}
|
||||||
|
|
||||||
|
int64_t delta = llabs(t0 - sclock);
|
||||||
|
if (delta > maxgate) {
|
||||||
|
double hdelta = ((double) delta) / (3600.0 * 1e9);
|
||||||
|
err("stream %s has starting clock too far: delta=%.2f h",
|
||||||
|
stream->relpath, hdelta);
|
||||||
|
ret = -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ret != 0) {
|
||||||
|
err("detected large clock gate, run 'ovnisync' to set the offsets");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
player_init(struct player *player, struct trace *trace)
|
player_init(struct player *player, struct trace *trace)
|
||||||
{
|
{
|
||||||
@ -76,6 +112,13 @@ player_init(struct player *player, struct trace *trace)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Ensure the first event sclocks are not too far apart. Otherwise an
|
||||||
|
* offset table is mandatory. */
|
||||||
|
if (check_clock_gate(trace) != 0) {
|
||||||
|
err("check_clock_gate failed\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -406,9 +406,6 @@ init_offsets(struct system *sys, struct trace *trace)
|
|||||||
if (n == 0 && sys->nlooms > 1) {
|
if (n == 0 && sys->nlooms > 1) {
|
||||||
err("warning: no clock offset file loaded with %ld looms",
|
err("warning: no clock offset file loaded with %ld looms",
|
||||||
sys->nlooms);
|
sys->nlooms);
|
||||||
|
|
||||||
if (sys->args->linter_mode)
|
|
||||||
abort();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int i = 0; i < n; i++) {
|
for (int i = 0; i < n; i++) {
|
||||||
|
@ -16,3 +16,4 @@ ovni_test(mp-simple.c MP)
|
|||||||
ovni_test(mp-rank.c MP)
|
ovni_test(mp-rank.c MP)
|
||||||
ovni_test(version-good.c)
|
ovni_test(version-good.c)
|
||||||
ovni_test(version-bad.c SHOULD_FAIL REGEX "version mismatch")
|
ovni_test(version-bad.c SHOULD_FAIL REGEX "version mismatch")
|
||||||
|
ovni_test(clockgate.c MP SHOULD_FAIL REGEX "detected large clock gate")
|
||||||
|
76
test/emu/ovni/clockgate.c
Normal file
76
test/emu/ovni/clockgate.c
Normal file
@ -0,0 +1,76 @@
|
|||||||
|
/* Copyright (c) 2021-2023 Barcelona Supercomputing Center (BSC)
|
||||||
|
* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||||
|
|
||||||
|
#define _POSIX_C_SOURCE 200112L
|
||||||
|
#define _GNU_SOURCE
|
||||||
|
|
||||||
|
#include <limits.h>
|
||||||
|
#include <linux/limits.h>
|
||||||
|
#include <stddef.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <sys/syscall.h>
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
|
#include "common.h"
|
||||||
|
#include "compat.h"
|
||||||
|
#include "ovni.h"
|
||||||
|
|
||||||
|
int64_t delta = 0LL;
|
||||||
|
|
||||||
|
static void
|
||||||
|
thread_execute_delayed(int32_t cpu, int32_t creator_tid, uint64_t tag)
|
||||||
|
{
|
||||||
|
struct ovni_ev ev = {0};
|
||||||
|
ovni_ev_set_mcv(&ev, "OHx");
|
||||||
|
ovni_ev_set_clock(&ev, ovni_clock_now() + delta);
|
||||||
|
ovni_payload_add(&ev, (uint8_t *) &cpu, sizeof(cpu));
|
||||||
|
ovni_payload_add(&ev, (uint8_t *) &creator_tid, sizeof(creator_tid));
|
||||||
|
ovni_payload_add(&ev, (uint8_t *) &tag, sizeof(tag));
|
||||||
|
ovni_ev_emit(&ev);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
start_delayed(int rank, int nranks)
|
||||||
|
{
|
||||||
|
char hostname[OVNI_MAX_HOSTNAME];
|
||||||
|
char rankname[OVNI_MAX_HOSTNAME + 64];
|
||||||
|
|
||||||
|
if (gethostname(hostname, HOST_NAME_MAX) != 0)
|
||||||
|
die("gethostname failed");
|
||||||
|
|
||||||
|
sprintf(rankname, "%s.%d", hostname, rank);
|
||||||
|
|
||||||
|
ovni_version_check();
|
||||||
|
ovni_proc_init(1, rankname, getpid());
|
||||||
|
ovni_proc_set_rank(rank, nranks);
|
||||||
|
ovni_thread_init(gettid());
|
||||||
|
|
||||||
|
/* All ranks inform CPUs */
|
||||||
|
for (int i = 0; i < nranks; i++)
|
||||||
|
ovni_add_cpu(i, i);
|
||||||
|
|
||||||
|
int curcpu = rank;
|
||||||
|
|
||||||
|
dbg("thread %d has cpu %d (ncpus=%d)\n",
|
||||||
|
gettid(), curcpu, nranks);
|
||||||
|
|
||||||
|
delta = ((int64_t) rank) * 2LL * 3600LL * 1000LL * 1000LL * 1000LL;
|
||||||
|
thread_execute_delayed(curcpu, -1, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
main(void)
|
||||||
|
{
|
||||||
|
int rank = atoi(getenv("OVNI_RANK"));
|
||||||
|
int nranks = atoi(getenv("OVNI_NRANKS"));
|
||||||
|
|
||||||
|
start_delayed(rank, nranks);
|
||||||
|
|
||||||
|
ovni_flush();
|
||||||
|
ovni_thread_free();
|
||||||
|
ovni_proc_fini();
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user