2022-09-19 12:39:02 +02:00
|
|
|
/* Copyright (c) 2021-2022 Barcelona Supercomputing Center (BSC)
|
|
|
|
* SPDX-License-Identifier: GPL-3.0-or-later */
|
2022-07-28 16:33:45 +02:00
|
|
|
|
2021-11-18 11:55:28 +01:00
|
|
|
#include "trace.h"
|
|
|
|
|
|
|
|
#define _GNU_SOURCE
|
|
|
|
#include <unistd.h>
|
|
|
|
|
2022-09-29 15:34:44 +02:00
|
|
|
#include <dirent.h>
|
|
|
|
#include <errno.h>
|
|
|
|
#include <fcntl.h>
|
|
|
|
#include <linux/limits.h>
|
|
|
|
#include <stdatomic.h>
|
2021-11-18 11:55:28 +01:00
|
|
|
#include <stdint.h>
|
2022-09-29 15:34:44 +02:00
|
|
|
#include <stdio.h>
|
2021-11-18 11:55:28 +01:00
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <sys/mman.h>
|
|
|
|
#include <sys/stat.h>
|
2022-09-29 15:34:44 +02:00
|
|
|
#include <time.h>
|
|
|
|
#include <unistd.h>
|
2021-11-18 11:55:28 +01:00
|
|
|
|
|
|
|
static int
|
2022-09-22 11:56:48 +02:00
|
|
|
find_dir_prefix_str(const char *dirname, const char *prefix, const char **str)
|
2021-11-18 11:55:28 +01:00
|
|
|
{
|
|
|
|
const char *p;
|
|
|
|
|
2022-09-22 11:56:48 +02:00
|
|
|
p = dirname;
|
2021-11-18 11:55:28 +01:00
|
|
|
|
|
|
|
/* Check the prefix */
|
2022-09-29 15:34:44 +02:00
|
|
|
if (strncmp(p, prefix, strlen(prefix)) != 0)
|
2021-11-18 11:55:28 +01:00
|
|
|
return -1;
|
|
|
|
|
|
|
|
p += strlen(prefix);
|
|
|
|
|
|
|
|
/* Find the dot */
|
2022-09-29 15:34:44 +02:00
|
|
|
if (*p != '.')
|
2021-11-18 11:55:28 +01:00
|
|
|
return -1;
|
|
|
|
|
|
|
|
p++;
|
|
|
|
|
2022-09-29 15:34:44 +02:00
|
|
|
if (str)
|
2021-11-18 15:53:10 +01:00
|
|
|
*str = p;
|
2021-11-18 11:55:28 +01:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2022-09-22 11:56:48 +02:00
|
|
|
find_dir_prefix_int(const char *dirname, const char *prefix, int *num)
|
2021-11-18 11:55:28 +01:00
|
|
|
{
|
|
|
|
const char *p;
|
|
|
|
|
2022-09-29 15:34:44 +02:00
|
|
|
if (find_dir_prefix_str(dirname, prefix, &p) != 0)
|
2021-11-18 11:55:28 +01:00
|
|
|
return -1;
|
|
|
|
|
|
|
|
/* Convert the suffix string to a number */
|
|
|
|
*num = atoi(p);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2022-09-22 11:56:48 +02:00
|
|
|
static size_t
|
2021-11-18 15:53:10 +01:00
|
|
|
count_dir_prefix(DIR *dir, const char *prefix)
|
|
|
|
{
|
|
|
|
struct dirent *dirent;
|
2022-09-22 11:56:48 +02:00
|
|
|
size_t n = 0;
|
2021-11-18 15:53:10 +01:00
|
|
|
|
2022-09-29 15:34:44 +02:00
|
|
|
while ((dirent = readdir(dir)) != NULL) {
|
|
|
|
if (find_dir_prefix_str(dirent->d_name, prefix, NULL) != 0)
|
2021-11-18 15:53:10 +01:00
|
|
|
continue;
|
|
|
|
|
|
|
|
n++;
|
|
|
|
}
|
|
|
|
|
|
|
|
return n;
|
|
|
|
}
|
|
|
|
|
2021-11-18 11:55:28 +01:00
|
|
|
static int
|
|
|
|
load_thread(struct ovni_ethread *thread, struct ovni_eproc *proc, int index, int tid, char *filepath)
|
|
|
|
{
|
|
|
|
static int total_threads = 0;
|
|
|
|
|
|
|
|
thread->tid = tid;
|
|
|
|
thread->index = index;
|
|
|
|
thread->gindex = total_threads++;
|
|
|
|
thread->state = TH_ST_UNKNOWN;
|
|
|
|
thread->proc = proc;
|
|
|
|
|
2022-09-29 15:34:44 +02:00
|
|
|
if (strlen(filepath) >= PATH_MAX) {
|
2021-11-29 16:02:09 +01:00
|
|
|
err("filepath too large: %s\n", filepath);
|
2021-11-18 11:55:28 +01:00
|
|
|
return -1;
|
|
|
|
}
|
2021-11-29 16:02:09 +01:00
|
|
|
|
|
|
|
strcpy(thread->tracefile, filepath);
|
|
|
|
|
2021-11-18 11:55:28 +01:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2021-12-10 18:20:31 +01:00
|
|
|
load_proc_metadata(struct ovni_eproc *proc, int *rank_enabled)
|
2021-11-18 11:55:28 +01:00
|
|
|
{
|
|
|
|
JSON_Object *meta;
|
|
|
|
|
|
|
|
meta = json_value_get_object(proc->meta);
|
2022-09-29 15:34:44 +02:00
|
|
|
if (meta == NULL)
|
2021-12-07 19:52:48 +01:00
|
|
|
die("load_proc_metadata: json_value_get_object() failed\n");
|
2021-11-18 11:55:28 +01:00
|
|
|
|
2021-12-10 18:20:31 +01:00
|
|
|
JSON_Value *appid_val = json_object_get_value(meta, "app_id");
|
2022-09-29 15:34:44 +02:00
|
|
|
if (appid_val == NULL)
|
2021-12-10 18:20:31 +01:00
|
|
|
die("process %d is missing app_id in metadata\n", proc->pid);
|
|
|
|
|
|
|
|
proc->appid = (int) json_number(appid_val);
|
|
|
|
|
|
|
|
JSON_Value *rank_val = json_object_get_value(meta, "rank");
|
|
|
|
|
2022-09-29 15:34:44 +02:00
|
|
|
if (rank_val != NULL) {
|
2021-12-10 18:20:31 +01:00
|
|
|
proc->rank = (int) json_number(rank_val);
|
|
|
|
*rank_enabled = 1;
|
2022-09-29 15:34:44 +02:00
|
|
|
} else {
|
2021-12-10 18:20:31 +01:00
|
|
|
proc->rank = -1;
|
|
|
|
}
|
2021-11-18 11:55:28 +01:00
|
|
|
}
|
|
|
|
|
2022-06-07 11:00:15 +02:00
|
|
|
static void
|
|
|
|
check_metadata_version(struct ovni_eproc *proc)
|
|
|
|
{
|
|
|
|
JSON_Object *meta = json_value_get_object(proc->meta);
|
2022-09-29 15:34:44 +02:00
|
|
|
if (meta == NULL)
|
2022-06-07 11:00:15 +02:00
|
|
|
die("check_metadata_version: json_value_get_object() failed\n");
|
|
|
|
|
|
|
|
JSON_Value *version_val = json_object_get_value(meta, "version");
|
2022-09-29 15:34:44 +02:00
|
|
|
if (version_val == NULL) {
|
2022-06-07 11:00:15 +02:00
|
|
|
die("process %d is missing attribute \"version\" in metadata\n",
|
2022-09-29 15:34:44 +02:00
|
|
|
proc->pid);
|
2022-06-07 11:00:15 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
int version = (int) json_number(version_val);
|
|
|
|
|
2022-09-29 15:34:44 +02:00
|
|
|
if (version != OVNI_METADATA_VERSION) {
|
2022-06-07 11:00:15 +02:00
|
|
|
die("pid %d: metadata version mismatch %d (expected %d)\n",
|
2022-09-29 15:34:44 +02:00
|
|
|
proc->pid, version,
|
|
|
|
OVNI_METADATA_VERSION);
|
2022-06-07 11:00:15 +02:00
|
|
|
}
|
2022-07-27 18:22:13 +02:00
|
|
|
|
|
|
|
JSON_Value *mversion_val = json_object_get_value(meta, "model_version");
|
2022-09-29 15:34:44 +02:00
|
|
|
if (mversion_val == NULL) {
|
2022-07-27 18:22:13 +02:00
|
|
|
die("process %d is missing attribute \"model_version\" in metadata\n",
|
2022-09-29 15:34:44 +02:00
|
|
|
proc->pid);
|
2022-07-27 18:22:13 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
const char *mversion = json_string(mversion_val);
|
|
|
|
|
2022-09-29 15:34:44 +02:00
|
|
|
if (strcmp(mversion, OVNI_MODEL_VERSION) != 0) {
|
2022-07-27 18:22:13 +02:00
|
|
|
die("pid %d: metadata model version mismatch '%s' (expected '%s')\n",
|
2022-09-29 15:34:44 +02:00
|
|
|
proc->pid, mversion,
|
|
|
|
OVNI_MODEL_VERSION);
|
2022-07-27 18:22:13 +02:00
|
|
|
}
|
2022-06-07 11:00:15 +02:00
|
|
|
}
|
2021-11-18 11:55:28 +01:00
|
|
|
|
2022-09-05 20:08:27 +02:00
|
|
|
static int
|
|
|
|
compare_int(const void *a, const void *b)
|
|
|
|
{
|
|
|
|
int aa = *(const int *) a;
|
|
|
|
int bb = *(const int *) b;
|
|
|
|
|
2022-09-29 15:34:44 +02:00
|
|
|
if (aa < bb)
|
2022-09-05 20:08:27 +02:00
|
|
|
return -1;
|
2022-09-29 15:34:44 +02:00
|
|
|
else if (aa > bb)
|
2022-09-05 20:08:27 +02:00
|
|
|
return +1;
|
|
|
|
else
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2021-11-18 11:55:28 +01:00
|
|
|
static int
|
|
|
|
load_proc(struct ovni_eproc *proc, struct ovni_loom *loom, int index, int pid, char *procdir)
|
|
|
|
{
|
|
|
|
static int total_procs = 0;
|
|
|
|
|
|
|
|
struct dirent *dirent;
|
|
|
|
DIR *dir;
|
|
|
|
char path[PATH_MAX];
|
|
|
|
struct ovni_ethread *thread;
|
|
|
|
|
|
|
|
proc->pid = pid;
|
|
|
|
proc->index = index;
|
|
|
|
proc->gindex = total_procs++;
|
|
|
|
proc->loom = loom;
|
|
|
|
|
2022-09-29 15:34:44 +02:00
|
|
|
if (snprintf(path, PATH_MAX, "%s/%s", procdir, "metadata.json") >= PATH_MAX) {
|
2021-11-18 11:55:28 +01:00
|
|
|
err("snprintf: path too large: %s\n", procdir);
|
|
|
|
abort();
|
|
|
|
}
|
|
|
|
|
|
|
|
proc->meta = json_parse_file_with_comments(path);
|
2022-09-29 15:34:44 +02:00
|
|
|
if (proc->meta == NULL) {
|
2021-11-18 11:55:28 +01:00
|
|
|
err("error loading metadata from %s\n", path);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2022-06-07 11:00:15 +02:00
|
|
|
check_metadata_version(proc);
|
|
|
|
|
2021-11-18 11:55:28 +01:00
|
|
|
/* The appid is populated from the metadata */
|
2021-12-10 18:20:31 +01:00
|
|
|
load_proc_metadata(proc, &loom->rank_enabled);
|
2021-11-18 11:55:28 +01:00
|
|
|
|
2022-09-29 15:34:44 +02:00
|
|
|
if ((dir = opendir(procdir)) == NULL) {
|
2021-11-18 11:55:28 +01:00
|
|
|
fprintf(stderr, "opendir %s failed: %s\n",
|
2022-09-29 15:34:44 +02:00
|
|
|
procdir, strerror(errno));
|
2021-11-18 11:55:28 +01:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2021-11-18 16:20:20 +01:00
|
|
|
proc->nthreads = count_dir_prefix(dir, "thread");
|
|
|
|
|
2022-09-29 15:34:44 +02:00
|
|
|
if (proc->nthreads <= 0) {
|
2021-11-18 16:20:20 +01:00
|
|
|
err("cannot find any thread for process %d\n",
|
2022-09-29 15:34:44 +02:00
|
|
|
proc->pid);
|
2021-11-18 16:20:20 +01:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
proc->thread = calloc(proc->nthreads, sizeof(struct ovni_ethread));
|
|
|
|
|
2022-09-29 15:34:44 +02:00
|
|
|
if (proc->thread == NULL) {
|
2021-11-18 16:20:20 +01:00
|
|
|
perror("calloc failed");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2022-09-05 20:08:27 +02:00
|
|
|
int *tids;
|
|
|
|
|
2022-09-29 15:34:44 +02:00
|
|
|
if ((tids = calloc(proc->nthreads, sizeof(int))) == NULL) {
|
2022-09-05 20:08:27 +02:00
|
|
|
perror("calloc failed\n");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2021-11-18 16:20:20 +01:00
|
|
|
rewinddir(dir);
|
|
|
|
|
2022-09-29 15:34:44 +02:00
|
|
|
for (size_t i = 0; i < proc->nthreads;) {
|
2022-09-05 20:08:27 +02:00
|
|
|
dirent = readdir(dir);
|
|
|
|
|
2022-09-29 15:34:44 +02:00
|
|
|
if (dirent == NULL) {
|
2022-09-05 20:08:27 +02:00
|
|
|
err("inconsistent: readdir returned NULL\n");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2022-09-29 15:34:44 +02:00
|
|
|
if (find_dir_prefix_int(dirent->d_name, "thread", &tids[i]) != 0)
|
2021-11-18 11:55:28 +01:00
|
|
|
continue;
|
|
|
|
|
2022-09-05 20:08:27 +02:00
|
|
|
i++;
|
|
|
|
}
|
|
|
|
|
|
|
|
closedir(dir);
|
|
|
|
|
|
|
|
/* Sort threads by ascending TID */
|
|
|
|
qsort(tids, proc->nthreads, sizeof(int), compare_int);
|
|
|
|
|
2022-09-29 15:34:44 +02:00
|
|
|
for (size_t i = 0; i < proc->nthreads; i++) {
|
2022-09-05 20:08:27 +02:00
|
|
|
int tid = tids[i];
|
|
|
|
|
2022-09-29 15:34:44 +02:00
|
|
|
if (snprintf(path, PATH_MAX, "%s/thread.%d", procdir, tid) >= PATH_MAX) {
|
2021-11-18 11:55:28 +01:00
|
|
|
err("snprintf: path too large: %s\n", procdir);
|
|
|
|
abort();
|
|
|
|
}
|
|
|
|
|
2021-11-18 16:20:20 +01:00
|
|
|
thread = &proc->thread[i];
|
2021-11-18 11:55:28 +01:00
|
|
|
|
2022-09-29 15:34:44 +02:00
|
|
|
if (load_thread(thread, proc, i, tid, path) != 0)
|
2021-11-18 11:55:28 +01:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2022-09-05 20:08:27 +02:00
|
|
|
free(tids);
|
2021-11-18 11:55:28 +01:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2021-11-18 15:53:10 +01:00
|
|
|
load_loom(struct ovni_loom *loom, char *loomdir)
|
2021-11-18 11:55:28 +01:00
|
|
|
{
|
2021-11-18 16:20:20 +01:00
|
|
|
int pid;
|
|
|
|
size_t i;
|
2021-11-18 11:55:28 +01:00
|
|
|
char path[PATH_MAX];
|
|
|
|
DIR *dir;
|
|
|
|
struct dirent *dirent;
|
|
|
|
|
2022-09-29 15:34:44 +02:00
|
|
|
if ((dir = opendir(loomdir)) == NULL) {
|
2021-11-18 11:55:28 +01:00
|
|
|
fprintf(stderr, "opendir %s failed: %s\n",
|
2022-09-29 15:34:44 +02:00
|
|
|
loomdir, strerror(errno));
|
2021-11-18 11:55:28 +01:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2021-12-10 18:20:31 +01:00
|
|
|
loom->rank_enabled = 0;
|
2021-11-18 15:53:10 +01:00
|
|
|
loom->nprocs = count_dir_prefix(dir, "proc");
|
|
|
|
|
2022-09-29 15:34:44 +02:00
|
|
|
if (loom->nprocs <= 0) {
|
2021-11-18 15:53:10 +01:00
|
|
|
err("cannot find any process directory in loom %s\n",
|
2022-09-29 15:34:44 +02:00
|
|
|
loom->hostname);
|
2021-11-18 15:53:10 +01:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
loom->proc = calloc(loom->nprocs, sizeof(struct ovni_eproc));
|
|
|
|
|
2022-09-29 15:34:44 +02:00
|
|
|
if (loom->proc == NULL) {
|
2021-11-18 15:53:10 +01:00
|
|
|
perror("calloc failed");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
rewinddir(dir);
|
|
|
|
|
|
|
|
i = 0;
|
2022-09-29 15:34:44 +02:00
|
|
|
while ((dirent = readdir(dir)) != NULL) {
|
|
|
|
if (find_dir_prefix_int(dirent->d_name, "proc", &pid) != 0)
|
2021-11-18 11:55:28 +01:00
|
|
|
continue;
|
|
|
|
|
|
|
|
sprintf(path, "%s/%s", loomdir, dirent->d_name);
|
|
|
|
|
2022-09-29 15:34:44 +02:00
|
|
|
if (i >= loom->nprocs) {
|
2021-11-18 16:20:20 +01:00
|
|
|
err("more process than expected\n");
|
|
|
|
abort();
|
|
|
|
}
|
|
|
|
|
2022-09-29 15:34:44 +02:00
|
|
|
if (load_proc(&loom->proc[i], loom, i, pid, path) != 0)
|
2021-11-18 11:55:28 +01:00
|
|
|
return -1;
|
|
|
|
|
2021-11-18 15:53:10 +01:00
|
|
|
i++;
|
2021-11-18 11:55:28 +01:00
|
|
|
}
|
|
|
|
|
2022-09-29 15:34:44 +02:00
|
|
|
if (i != loom->nprocs) {
|
2021-11-18 16:20:20 +01:00
|
|
|
err("unexpected number of processes\n");
|
|
|
|
abort();
|
|
|
|
}
|
|
|
|
|
2021-11-18 11:55:28 +01:00
|
|
|
closedir(dir);
|
|
|
|
|
2021-12-10 18:20:31 +01:00
|
|
|
/* Ensure all process have the rank, if enabled in any */
|
2022-09-29 15:34:44 +02:00
|
|
|
if (loom->rank_enabled) {
|
|
|
|
for (i = 0; i < loom->nprocs; i++) {
|
2021-12-10 18:20:31 +01:00
|
|
|
struct ovni_eproc *proc = &loom->proc[i];
|
2022-09-29 15:34:44 +02:00
|
|
|
if (proc->rank < 0) {
|
2021-12-10 18:20:31 +01:00
|
|
|
die("process %d is missing the rank\n",
|
2022-09-29 15:34:44 +02:00
|
|
|
proc->pid);
|
2021-12-10 18:20:31 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-11-18 11:55:28 +01:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2022-09-22 11:56:48 +02:00
|
|
|
compare_looms(const void *a, const void *b)
|
2021-11-18 11:55:28 +01:00
|
|
|
{
|
2022-09-22 11:56:48 +02:00
|
|
|
struct ovni_loom *la = (struct ovni_loom *) a;
|
|
|
|
struct ovni_loom *lb = (struct ovni_loom *) b;
|
|
|
|
return strcmp(la->dname, lb->dname);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
loom_to_host(const char *loom_name, char *host, int n)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
2022-09-29 15:34:44 +02:00
|
|
|
for (i = 0; i < n; i++) {
|
2022-09-22 11:56:48 +02:00
|
|
|
/* Copy until dot or end */
|
2022-09-29 15:34:44 +02:00
|
|
|
if (loom_name[i] != '.' && loom_name[i] != '\0')
|
2022-09-22 11:56:48 +02:00
|
|
|
host[i] = loom_name[i];
|
|
|
|
else
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2022-09-29 15:34:44 +02:00
|
|
|
if (i == n)
|
2022-09-22 11:56:48 +02:00
|
|
|
die("loom host name %s too long\n", loom_name);
|
|
|
|
|
|
|
|
host[i] = '\0';
|
2021-11-18 11:55:28 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
ovni_load_trace(struct ovni_trace *trace, char *tracedir)
|
|
|
|
{
|
|
|
|
DIR *dir;
|
|
|
|
|
2022-09-29 15:34:44 +02:00
|
|
|
if ((dir = opendir(tracedir)) == NULL) {
|
2021-11-18 14:05:19 +01:00
|
|
|
err("opendir %s failed: %s\n", tracedir, strerror(errno));
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2022-09-22 11:56:48 +02:00
|
|
|
trace->nlooms = count_dir_prefix(dir, "loom");
|
2021-11-18 14:05:19 +01:00
|
|
|
|
2022-09-29 15:34:44 +02:00
|
|
|
if (trace->nlooms == 0) {
|
2021-11-18 14:05:19 +01:00
|
|
|
err("cannot find any loom in %s\n", tracedir);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
trace->loom = calloc(trace->nlooms, sizeof(struct ovni_loom));
|
|
|
|
|
2022-09-29 15:34:44 +02:00
|
|
|
if (trace->loom == NULL) {
|
2021-11-18 14:05:19 +01:00
|
|
|
perror("calloc failed\n");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2021-11-18 15:53:10 +01:00
|
|
|
rewinddir(dir);
|
2021-11-18 14:05:19 +01:00
|
|
|
|
2022-09-22 11:56:48 +02:00
|
|
|
size_t l = 0;
|
|
|
|
struct dirent *dirent;
|
2021-11-18 14:05:19 +01:00
|
|
|
|
2022-09-29 15:34:44 +02:00
|
|
|
while ((dirent = readdir(dir)) != NULL) {
|
2022-09-22 11:56:48 +02:00
|
|
|
struct ovni_loom *loom = &trace->loom[l];
|
|
|
|
const char *loom_name;
|
2022-09-29 15:34:44 +02:00
|
|
|
if (find_dir_prefix_str(dirent->d_name, "loom", &loom_name) != 0) {
|
2021-11-18 11:55:28 +01:00
|
|
|
/* Ignore other files in tracedir */
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2022-09-29 15:34:44 +02:00
|
|
|
if (l >= trace->nlooms) {
|
2021-11-18 14:05:19 +01:00
|
|
|
err("extra loom detected\n");
|
|
|
|
return -1;
|
2021-11-18 11:55:28 +01:00
|
|
|
}
|
|
|
|
|
2022-09-22 11:56:48 +02:00
|
|
|
/* Copy the complete loom directory name to looms */
|
2022-09-29 15:34:44 +02:00
|
|
|
if (snprintf(loom->dname, PATH_MAX, "%s", dirent->d_name) >= PATH_MAX) {
|
2022-09-22 11:56:48 +02:00
|
|
|
err("error: loom name %s too long\n", dirent->d_name);
|
2021-11-18 14:05:19 +01:00
|
|
|
return -1;
|
|
|
|
}
|
2021-11-18 11:55:28 +01:00
|
|
|
|
2021-11-18 14:05:19 +01:00
|
|
|
l++;
|
2021-11-18 11:55:28 +01:00
|
|
|
}
|
|
|
|
|
2021-11-18 14:05:19 +01:00
|
|
|
closedir(dir);
|
|
|
|
|
2022-09-22 11:56:48 +02:00
|
|
|
/* Sort the looms, so we get the hostnames in alphanumeric order */
|
|
|
|
qsort(trace->loom, trace->nlooms, sizeof(struct ovni_loom),
|
2022-09-29 15:34:44 +02:00
|
|
|
compare_looms);
|
2021-11-18 11:55:28 +01:00
|
|
|
|
2022-09-29 15:34:44 +02:00
|
|
|
for (size_t i = 0; i < trace->nlooms; i++) {
|
2022-09-22 11:56:48 +02:00
|
|
|
struct ovni_loom *loom = &trace->loom[i];
|
|
|
|
const char *name = NULL;
|
|
|
|
|
2022-09-29 15:34:44 +02:00
|
|
|
if (find_dir_prefix_str(loom->dname, "loom", &name) != 0) {
|
2022-09-22 11:56:48 +02:00
|
|
|
err("error: mismatch for loom %s\n", loom->dname);
|
2021-11-18 14:05:19 +01:00
|
|
|
return -1;
|
2021-11-18 11:55:28 +01:00
|
|
|
}
|
|
|
|
|
2022-09-22 11:56:48 +02:00
|
|
|
loom_to_host(name, loom->hostname, sizeof(loom->hostname));
|
2021-11-18 11:55:28 +01:00
|
|
|
|
2022-09-29 15:34:44 +02:00
|
|
|
if (snprintf(loom->path, PATH_MAX, "%s/%s",
|
|
|
|
tracedir, loom->dname)
|
|
|
|
>= PATH_MAX) {
|
2022-09-22 11:56:48 +02:00
|
|
|
err("error: loom path %s/%s too long\n",
|
2022-09-29 15:34:44 +02:00
|
|
|
tracedir, loom->dname);
|
2021-11-18 11:55:28 +01:00
|
|
|
return -1;
|
2022-09-22 11:56:48 +02:00
|
|
|
}
|
2021-11-18 11:55:28 +01:00
|
|
|
|
2022-09-29 15:34:44 +02:00
|
|
|
if (load_loom(loom, loom->path) != 0)
|
2022-09-22 11:56:48 +02:00
|
|
|
return -1;
|
|
|
|
}
|
2021-11-18 11:55:28 +01:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2022-07-26 19:04:08 +02:00
|
|
|
check_stream_header(struct ovni_stream *stream)
|
2021-11-18 11:55:28 +01:00
|
|
|
{
|
2022-07-26 19:04:08 +02:00
|
|
|
int ret = 0;
|
2021-11-29 16:02:09 +01:00
|
|
|
|
2022-09-29 15:34:44 +02:00
|
|
|
if (stream->size < sizeof(struct ovni_stream_header)) {
|
2022-07-26 19:04:08 +02:00
|
|
|
err("stream %d: incomplete stream header\n",
|
2022-09-29 15:34:44 +02:00
|
|
|
stream->tid);
|
2021-11-29 16:02:09 +01:00
|
|
|
return -1;
|
|
|
|
}
|
2021-11-18 11:55:28 +01:00
|
|
|
|
2022-07-26 19:04:08 +02:00
|
|
|
struct ovni_stream_header *h =
|
|
|
|
(struct ovni_stream_header *) stream->buf;
|
|
|
|
|
2022-09-29 15:34:44 +02:00
|
|
|
if (memcmp(h->magic, OVNI_STREAM_MAGIC, 4) != 0) {
|
2022-07-26 19:04:08 +02:00
|
|
|
char magic[5];
|
|
|
|
memcpy(magic, h->magic, 4);
|
|
|
|
magic[4] = '\0';
|
|
|
|
err("stream %d: wrong stream magic '%s' (expected '%s')\n",
|
2022-09-29 15:34:44 +02:00
|
|
|
stream->tid, magic, OVNI_STREAM_MAGIC);
|
2022-07-26 19:04:08 +02:00
|
|
|
ret = -1;
|
|
|
|
}
|
|
|
|
|
2022-09-29 15:34:44 +02:00
|
|
|
if (h->version != OVNI_STREAM_VERSION) {
|
2022-07-26 19:04:08 +02:00
|
|
|
err("stream %d: stream version mismatch %u (expected %u)\n",
|
2022-09-29 15:34:44 +02:00
|
|
|
stream->tid, h->version, OVNI_STREAM_VERSION);
|
2022-07-26 19:04:08 +02:00
|
|
|
ret = -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
load_stream_fd(struct ovni_stream *stream, int fd)
|
|
|
|
{
|
|
|
|
struct stat st;
|
2022-09-29 15:34:44 +02:00
|
|
|
if (fstat(fd, &st) < 0) {
|
2021-12-09 14:11:57 +01:00
|
|
|
perror("fstat failed");
|
2021-11-18 11:55:28 +01:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2022-07-26 19:04:08 +02:00
|
|
|
/* Error because it doesn't have the header */
|
2022-09-29 15:34:44 +02:00
|
|
|
if (st.st_size == 0) {
|
2022-07-26 19:04:08 +02:00
|
|
|
err("stream %d is empty\n", stream->tid);
|
|
|
|
return -1;
|
2021-11-18 11:55:28 +01:00
|
|
|
}
|
|
|
|
|
2022-07-26 19:04:08 +02:00
|
|
|
int prot = PROT_READ | PROT_WRITE;
|
|
|
|
stream->buf = mmap(NULL, st.st_size, prot, MAP_PRIVATE, fd, 0);
|
2021-11-18 11:55:28 +01:00
|
|
|
|
2022-09-29 15:34:44 +02:00
|
|
|
if (stream->buf == MAP_FAILED) {
|
2021-12-09 14:11:57 +01:00
|
|
|
perror("mmap failed");
|
2021-11-18 11:55:28 +01:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2022-07-26 19:04:08 +02:00
|
|
|
stream->size = st.st_size;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
load_stream_buf(struct ovni_stream *stream, struct ovni_ethread *thread)
|
|
|
|
{
|
|
|
|
int fd;
|
|
|
|
|
2022-09-29 15:34:44 +02:00
|
|
|
if ((fd = open(thread->tracefile, O_RDWR)) == -1) {
|
2022-07-26 19:04:08 +02:00
|
|
|
perror("open failed");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2022-09-29 15:34:44 +02:00
|
|
|
if (load_stream_fd(stream, fd) != 0)
|
2022-07-26 19:04:08 +02:00
|
|
|
return -1;
|
|
|
|
|
2022-09-29 15:34:44 +02:00
|
|
|
if (check_stream_header(stream) != 0) {
|
2022-07-26 19:04:08 +02:00
|
|
|
err("stream %d: bad header\n", stream->tid);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
stream->offset = sizeof(struct ovni_stream_header);
|
|
|
|
|
2022-09-29 15:34:44 +02:00
|
|
|
if (stream->offset == stream->size)
|
2022-07-26 19:04:08 +02:00
|
|
|
stream->active = 0;
|
|
|
|
else
|
|
|
|
stream->active = 1;
|
2021-11-18 11:55:28 +01:00
|
|
|
|
2021-11-29 16:02:09 +01:00
|
|
|
/* No need to keep the fd open */
|
2022-09-29 15:34:44 +02:00
|
|
|
if (close(fd)) {
|
2021-12-09 14:11:57 +01:00
|
|
|
perror("close failed");
|
2021-11-29 16:02:09 +01:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2021-11-18 11:55:28 +01:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Populates the streams in a single array */
|
|
|
|
int
|
|
|
|
ovni_load_streams(struct ovni_trace *trace)
|
|
|
|
{
|
|
|
|
size_t i, j, k, s;
|
|
|
|
struct ovni_loom *loom;
|
|
|
|
struct ovni_eproc *proc;
|
|
|
|
struct ovni_ethread *thread;
|
|
|
|
struct ovni_stream *stream;
|
|
|
|
|
|
|
|
trace->nstreams = 0;
|
|
|
|
|
2022-09-29 15:34:44 +02:00
|
|
|
for (i = 0; i < trace->nlooms; i++) {
|
2021-11-18 11:55:28 +01:00
|
|
|
loom = &trace->loom[i];
|
2022-09-29 15:34:44 +02:00
|
|
|
for (j = 0; j < loom->nprocs; j++) {
|
2021-11-18 11:55:28 +01:00
|
|
|
proc = &loom->proc[j];
|
2022-09-29 15:34:44 +02:00
|
|
|
for (k = 0; k < proc->nthreads; k++) {
|
2021-11-18 11:55:28 +01:00
|
|
|
trace->nstreams++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
trace->stream = calloc(trace->nstreams, sizeof(struct ovni_stream));
|
|
|
|
|
2022-09-29 15:34:44 +02:00
|
|
|
if (trace->stream == NULL) {
|
2021-12-09 14:11:57 +01:00
|
|
|
perror("calloc failed");
|
2021-11-18 11:55:28 +01:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
err("loaded %ld streams\n", trace->nstreams);
|
|
|
|
|
2022-09-29 15:34:44 +02:00
|
|
|
for (s = 0, i = 0; i < trace->nlooms; i++) {
|
2021-11-18 11:55:28 +01:00
|
|
|
loom = &trace->loom[i];
|
2022-09-29 15:34:44 +02:00
|
|
|
for (j = 0; j < loom->nprocs; j++) {
|
2021-11-18 11:55:28 +01:00
|
|
|
proc = &loom->proc[j];
|
2022-09-29 15:34:44 +02:00
|
|
|
for (k = 0; k < proc->nthreads; k++) {
|
2021-11-18 11:55:28 +01:00
|
|
|
thread = &proc->thread[k];
|
|
|
|
stream = &trace->stream[s++];
|
|
|
|
|
|
|
|
stream->tid = thread->tid;
|
2021-12-16 13:30:26 +01:00
|
|
|
stream->thread = thread;
|
|
|
|
stream->proc = proc;
|
|
|
|
stream->loom = loom;
|
2021-11-18 11:55:28 +01:00
|
|
|
stream->lastclock = 0;
|
|
|
|
stream->offset = 0;
|
|
|
|
stream->cur_ev = NULL;
|
|
|
|
|
2022-09-29 15:34:44 +02:00
|
|
|
if (load_stream_buf(stream, thread) != 0) {
|
2021-11-18 11:55:28 +01:00
|
|
|
err("load_stream_buf failed\n");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
ovni_free_streams(struct ovni_trace *trace)
|
|
|
|
{
|
2022-09-29 15:34:44 +02:00
|
|
|
for (size_t i = 0; i < trace->nstreams; i++) {
|
2021-12-15 18:29:03 +01:00
|
|
|
struct ovni_stream *stream = &trace->stream[i];
|
2022-09-29 15:34:44 +02:00
|
|
|
if (munmap(stream->buf, stream->size) != 0)
|
2021-12-15 18:29:03 +01:00
|
|
|
die("munmap stream failed: %s\n", strerror(errno));
|
|
|
|
}
|
|
|
|
|
2021-11-18 11:55:28 +01:00
|
|
|
free(trace->stream);
|
|
|
|
}
|
|
|
|
|
2021-11-18 14:53:15 +01:00
|
|
|
void
|
|
|
|
ovni_free_trace(struct ovni_trace *trace)
|
|
|
|
{
|
2021-11-18 16:20:20 +01:00
|
|
|
size_t i, j;
|
2021-11-18 15:53:10 +01:00
|
|
|
|
2022-09-29 15:34:44 +02:00
|
|
|
for (i = 0; i < trace->nlooms; i++) {
|
|
|
|
for (j = 0; j < trace->loom[i].nprocs; j++) {
|
2021-11-18 16:20:20 +01:00
|
|
|
free(trace->loom[i].proc[j].thread);
|
|
|
|
}
|
|
|
|
|
2021-11-18 15:53:10 +01:00
|
|
|
free(trace->loom[i].proc);
|
2021-11-18 16:20:20 +01:00
|
|
|
}
|
2021-11-18 15:53:10 +01:00
|
|
|
|
2021-11-18 14:53:15 +01:00
|
|
|
free(trace->loom);
|
|
|
|
}
|
|
|
|
|
2021-11-18 11:55:28 +01:00
|
|
|
int
|
|
|
|
ovni_load_next_event(struct ovni_stream *stream)
|
|
|
|
{
|
2022-09-29 15:34:44 +02:00
|
|
|
if (stream->active == 0) {
|
2021-11-18 11:55:28 +01:00
|
|
|
dbg("stream is inactive, cannot load more events\n");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2022-07-26 19:04:08 +02:00
|
|
|
/* Only step the offset if we have load an event */
|
2022-09-29 15:34:44 +02:00
|
|
|
if (stream->cur_ev != NULL)
|
2022-07-26 19:04:08 +02:00
|
|
|
stream->offset += ovni_ev_size(stream->cur_ev);
|
2021-11-18 11:55:28 +01:00
|
|
|
|
2021-12-07 19:52:48 +01:00
|
|
|
/* It cannot overflow, otherwise we are reading garbage */
|
2022-09-29 15:34:44 +02:00
|
|
|
if (stream->offset > stream->size)
|
2021-12-07 19:52:48 +01:00
|
|
|
die("ovni_load_next_event: stream offset exceeds size\n");
|
|
|
|
|
2021-11-18 11:55:28 +01:00
|
|
|
/* We have reached the end */
|
2022-09-29 15:34:44 +02:00
|
|
|
if (stream->offset == stream->size) {
|
2021-11-18 11:55:28 +01:00
|
|
|
stream->active = 0;
|
2022-07-26 19:04:08 +02:00
|
|
|
stream->cur_ev = NULL;
|
2021-11-18 11:55:28 +01:00
|
|
|
dbg("stream %d runs out of events\n", stream->tid);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
stream->cur_ev = (struct ovni_ev *) &stream->buf[stream->offset];
|
|
|
|
|
2022-07-26 19:04:08 +02:00
|
|
|
return 0;
|
2021-11-18 11:55:28 +01:00
|
|
|
}
|