diff --git a/Makefile b/Makefile index ffaccdd..f1baee7 100644 --- a/Makefile +++ b/Makefile @@ -29,7 +29,8 @@ libovni.so: ovni.o parson.o ovni2prv: ovni2prv.c ovni.o parson.o ovnisync: ovnisync.c - mpicc -lm $^ -o $@ + #OMPI_CC=clang mpicc -fsanitize=memory -fsanitize-recover=memory -fno-omit-frame-pointer -g -O2 -lm $^ -o $@ + mpicc -g -O0 -lm $^ -o $@ clean: rm -f *.o $(BIN) diff --git a/ovnisync.c b/ovnisync.c index 3d0503e..a072ac1 100644 --- a/ovnisync.c +++ b/ovnisync.c @@ -5,20 +5,64 @@ #include #include #include +#include #include "ovni.h" +const char progname[] = "ovnisync"; + +struct offset { + /* All in nanoseconds */ + double delta_mean; + double delta_median; + double delta_var; + double delta_std; + + /* The value selected for the offset */ + int64_t offset; + + /* In seconds */ + double wall_t0; + double wall_t1; + + char hostname[OVNI_MAX_HOSTNAME]; + int rank; + + int nsamples; + + /* Flexible array */ + double clock_sample[]; +}; + +struct offset_table { + int nprocs; + struct offset *_offset; + struct offset **offset; +}; + +struct options { + int nsamples; + int ndrift_samples; + int drift_wait; /* in seconds */ + int verbose; +}; + static double -get_time() +get_time(clockid_t clock, int use_ns) { struct timespec tv; - if(clock_gettime(CLOCK_MONOTONIC, &tv) != 0) + if(clock_gettime(clock, &tv) != 0) { perror("clock_gettime failed"); exit(EXIT_FAILURE); } - return (double)(tv.tv_sec) * 1.0e-9 + - (double)tv.tv_nsec; + + if(use_ns) + return (double)(tv.tv_sec) * 1.0e9 + + (double)tv.tv_nsec; + + return (double)(tv.tv_sec) + + (double)tv.tv_nsec * 1.0e-9; } static int @@ -37,123 +81,339 @@ cmp_double(const void *pa, const void *pb) return 0; } -/* Called by rank 0 */ -static void -get_offset(double *timetable, char (*hosttable)[OVNI_MAX_HOSTNAME], int nproc, int nsamples) +void +usage(int argc, char *argv[]) { - int i, j; - double median, mean, var, std; - double *offset; - double *delta; - - offset = malloc(nproc * sizeof(double)); - delta = malloc(nsamples * sizeof(double)); - - if(!offset || !delta) - { - perror("malloc"); - exit(EXIT_FAILURE); - } - - /* We use as ref the clock in rank 0 */ - - printf("%-10s %-20s %-20s %-20s\n", "rank", "hostname", "offset_median", "offset_std"); - for(i=0; indrift_samples = 1; + options->nsamples = 100; + options->verbose = 0; + options->drift_wait = 5; - if(!argv[1]) - nsamples = 100; - else - nsamples = atoi(argv[1]); + while ((opt = getopt(argc, argv, "d:vn:w:")) != -1) { + switch (opt) { + case 'd': + options->ndrift_samples = atoi(optarg); + break; + case 'w': + options->drift_wait = atoi(optarg); + break; + case 'v': + options->verbose = 1; + break; + case 'n': + options->nsamples = atoi(optarg); + break; + default: /* '?' */ + usage(argc, argv); + } + } - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &nprocs); - - timetable = calloc(nsamples * nprocs, sizeof(double)); - hosttable = calloc(nprocs, sizeof(*hosttable)); - t = calloc(nsamples, sizeof(double)); - - if(!timetable || !t || !hosttable) + if (optind < argc) { - perror("calloc"); + fprintf(stderr, "error: unexpected extra arguments\n"); exit(EXIT_FAILURE); } +} - /* Warm up iterations */ - for(i=0; i<20; i++) - { - MPI_Barrier(MPI_COMM_WORLD); - get_time(); - } +void +get_clock_samples(struct offset *offset, int nsamples) +{ + int i; + + /* Keep the wall time as well */ + offset->wall_t0 = get_time(CLOCK_REALTIME, 0); + + offset->nsamples = nsamples; for(i=0; iclock_sample[i] = get_time(CLOCK_MONOTONIC, 1); } - if(gethostname(hosttable[rank], OVNI_MAX_HOSTNAME) != 0) + offset->wall_t1 = get_time(CLOCK_REALTIME, 0); +} + +void +fill_offset(struct offset *offset, int nsamples) +{ + int warmup_nsamples; + + /* Identify the rank */ + MPI_Comm_rank(MPI_COMM_WORLD, &offset->rank); + + /* Fill the host name */ + if(gethostname(offset->hostname, OVNI_MAX_HOSTNAME) != 0) { perror("gethostname"); exit(EXIT_FAILURE); } - void *sendbuff = (rank > 0) ? hosttable[rank] : MPI_IN_PLACE; + //printf("rank=%d hostname=%s\n", offset->rank, offset->hostname); - MPI_Gather(sendbuff, sizeof(*hosttable), MPI_CHAR, - hosttable[0], sizeof(*hosttable), MPI_CHAR, - 0, MPI_COMM_WORLD); + /* Warm up iterations */ + warmup_nsamples = nsamples >= 20 ? 20 : nsamples; + get_clock_samples(offset, warmup_nsamples); - MPI_Gather(t, nsamples, MPI_DOUBLE, - timetable, nsamples, MPI_DOUBLE, - 0, MPI_COMM_WORLD); + get_clock_samples(offset, nsamples); +} +void +offset_compute_delta(struct offset *ref, struct offset *cur, int nsamples) +{ + int i; + double *delta; + + delta = malloc(sizeof(double) * nsamples); + + if(delta == NULL) + { + perror("malloc"); + exit(EXIT_FAILURE); + } + + for(i=0; iclock_sample[i] - cur->clock_sample[i]; + //printf("rank=%d sample=%d delta=%f ref=%f cur=%f\n", + // cur->rank, i, delta[i], ref->clock_sample[i], cur->clock_sample[i]); + } + + qsort(delta, nsamples, sizeof(double), cmp_double); + + cur->delta_median = delta[nsamples / 2]; + + for(cur->delta_mean=0, i=0; idelta_mean += delta[i]; + + cur->delta_mean /= nsamples; + + for(cur->delta_var=0, i=0; idelta_var += (delta[i] - cur->delta_mean) * + (delta[i] - cur->delta_mean); + + cur->delta_var /= (double) (nsamples - 1); + cur->delta_std = sqrt(cur->delta_var); + + /* The median is the selected metric for the offset */ + cur->offset = (int64_t) cur->delta_median; + + free(delta); +} + +size_t +offset_size(int nsamples) +{ + return sizeof(struct offset) + sizeof(double) * nsamples; +} + +struct offset * +table_get_offset(struct offset_table *table, int i, int nsamples) +{ + char *p; + + p = (char *) table->_offset; + p += i * offset_size(nsamples); + + return (struct offset *) p; +} + +struct offset_table * +build_offset_table(int nsamples, int rank) +{ + int i; + struct offset_table *table = NULL; + struct offset *offset = NULL; + void *sendbuf; + + /* The rank 0 must build the table */ if(rank == 0) - get_offset(timetable, hosttable, nprocs, nsamples); + { + table = malloc(sizeof(*table)); + + if(table == NULL) + { + perror("malloc"); + exit(EXIT_FAILURE); + } + + MPI_Comm_size(MPI_COMM_WORLD, &table->nprocs); + + table->_offset = calloc(table->nprocs, offset_size(nsamples)); + + if(table->_offset == NULL) + { + perror("malloc"); + exit(EXIT_FAILURE); + } + + table->offset = malloc(sizeof(struct offset *) * + table->nprocs); + + if(table->offset == NULL) + { + perror("malloc"); + exit(EXIT_FAILURE); + } + + for(i=0; inprocs; i++) + table->offset[i] = table_get_offset(table, i, nsamples); + + offset = table->offset[0]; + } + else + { + /* Otherwise just allocate one offset */ + offset = calloc(1, offset_size(nsamples)); + + if(offset == NULL) + { + perror("malloc"); + exit(EXIT_FAILURE); + } + + table = NULL; + } + + /* Each rank fills its own offset */ + fill_offset(offset, nsamples); + + sendbuf = rank == 0 ? MPI_IN_PLACE : offset; + + /* Then collect all the offsets into the rank 0 */ + MPI_Gather(sendbuf, offset_size(nsamples), MPI_CHAR, + offset, offset_size(nsamples), MPI_CHAR, + 0, MPI_COMM_WORLD); + + /* Finish the offsets by computing the deltas on rank 0 */ + if(rank == 0) + { + for(i=0; inprocs; i++) + { + offset_compute_delta(offset, table->offset[i], + nsamples); + } + } + + /* Cleanup for non-zero ranks */ + if(rank != 0) + free(offset); + + return table; +} + +void +print_drift_header(struct offset_table *table) +{ + int i; + //char buf[64]; + + printf("%-20s", "wallclock"); + + for(i=0; inprocs; i++) + { + //sprintf(buf, "rank%d", i); + printf(" %-20s", table->offset[i]->hostname); + } + + printf("\n"); +} + +void +print_drift_row(struct offset_table *table) +{ + int i; + char buf[64]; + + printf("%-20f", table->offset[0]->wall_t1); + + for(i=0; inprocs; i++) + printf(" %-20ld", table->offset[i]->offset); + + printf("\n"); +} + +void +print_table_detailed(struct offset_table *table) +{ + int i; + struct offset *offset; + + printf("%-10s %-20s %-20s %-20s %-20s\n", "rank", "hostname", "offset_median", "offset_mean", "offset_std"); + for(i=0; inprocs; i++) + { + offset = table->offset[i]; + printf("%-10d %-20s %-20ld %-20f %-20f\n", + i, offset->hostname, offset->offset, + offset->delta_mean, offset->delta_std); + } +} + +void +do_work(struct options *options, int rank) +{ + int drift_mode; + int i; + struct offset_table *table; + + drift_mode = options->ndrift_samples > 1 ? 1 : 0; + + for(i=0; indrift_samples; i++) + { + table = build_offset_table(options->nsamples, rank); + + if(rank == 0) + { + if(drift_mode) + { + if(i == 0) + print_drift_header(table); + + print_drift_row(table); + } + else + { + print_table_detailed(table); + } + + free(table->_offset); + free(table->offset); + free(table); + } + + if(drift_mode) + usleep(options->drift_wait * 1000 * 1000); + } +} + +int +main(int argc, char *argv[]) +{ + int rank; + struct options options; + + MPI_Init(&argc, &argv); + + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + + parse_options(&options, argc, argv); + + do_work(&options, rank); - free(hosttable); - free(timetable); - free(t); MPI_Finalize(); + return 0; } diff --git a/plot-drift.py b/plot-drift.py new file mode 100644 index 0000000..df5c706 --- /dev/null +++ b/plot-drift.py @@ -0,0 +1,40 @@ +import numpy as np +import matplotlib.pyplot as plt +import sys + +if len(sys.argv) != 2: + print("missing drift file") + exit(1) + +fn = sys.argv[1] + +data = np.genfromtxt(fn, skip_header=1) + +with open(fn, 'r') as f: + lines = [n for n in f.readline().strip().split(" ") if n != ''] + +node_names = lines[1:] +nnodes=len(node_names) + +t = data[:,0] + +t -= t[0] + +plt.figure(figsize=(10,6)) + +for i in range(nnodes): + delta = data[:,i+1] + delta -= delta[0] + + delta /= 1000 + + plt.plot(t, delta, label="rank%d (%s)" % (i, node_names[i])) + +plt.title('Clock drift using %d nodes' % nnodes) +plt.xlabel('Relative wall clock time (s)') +plt.ylabel('Relative time delta (us)') +plt.legend() +plt.grid() + +#plt.show() +plt.savefig("drift.png")