Order looms and processes by rank if given

This commit is contained in:
Rodrigo Arias 2023-04-06 19:27:43 +02:00 committed by Rodrigo Arias Mallo
parent e69df2e5c8
commit 676e30b4ca
8 changed files with 259 additions and 10 deletions

View File

@ -152,6 +152,20 @@ by_pid(struct proc *p1, struct proc *p2)
return 0; return 0;
} }
static int
by_rank(struct proc *p1, struct proc *p2)
{
int id1 = p1->rank;
int id2 = p2->rank;
if (id1 < id2)
return -1;
if (id1 > id2)
return +1;
else
return 0;
}
static int static int
by_phyid(struct cpu *c1, struct cpu *c2) by_phyid(struct cpu *c1, struct cpu *c2)
{ {
@ -169,13 +183,16 @@ by_phyid(struct cpu *c1, struct cpu *c2)
void void
loom_sort(struct loom *loom) loom_sort(struct loom *loom)
{ {
if (loom->rank_enabled)
HASH_SORT(loom->procs, by_rank);
else
HASH_SORT(loom->procs, by_pid); HASH_SORT(loom->procs, by_pid);
HASH_SORT(loom->cpus, by_phyid); HASH_SORT(loom->cpus, by_phyid);
for (struct proc *p = loom->procs; p; p = p->hh.next) { for (struct proc *p = loom->procs; p; p = p->hh.next)
proc_sort(p); proc_sort(p);
} }
}
int int
loom_init_end(struct loom *loom) loom_init_end(struct loom *loom)
@ -188,6 +205,21 @@ loom_init_end(struct loom *loom)
} }
} }
/* Ensure that all processes have a rank */
if (loom->rank_enabled) {
for (struct proc *p = loom->procs; p; p = p->hh.next) {
if (p->rank < 0) {
err("process %s has no rank information", p->id);
return -1;
}
if (p->rank < loom->rank_min)
loom->rank_min = p->rank;
}
} else {
loom->rank_min = -1;
}
/* Populate cpus_array */ /* Populate cpus_array */
loom->cpus_array = calloc(loom->ncpus, sizeof(struct cpu *)); loom->cpus_array = calloc(loom->ncpus, sizeof(struct cpu *));
if (loom->cpus_array == NULL) { if (loom->cpus_array == NULL) {
@ -249,6 +281,35 @@ loom_add_proc(struct loom *loom, struct proc *proc)
return -1; return -1;
} }
if (!proc->metadata_loaded) {
err("process %d hasn't loaded metadata", pid);
return -1;
}
if (loom->rank_enabled && proc->rank < 0) {
err("missing rank in process %d", pid);
return -1;
}
/* Check previous ranks if any */
if (!loom->rank_enabled && proc->rank >= 0) {
loom->rank_enabled = 1;
loom->rank_min = INT_MAX;
for (struct proc *p = loom->procs; p; p = p->hh.next) {
if (p->rank < 0) {
err("missing rank in process %d", p->pid);
return -1;
}
if (p->rank < loom->rank_min)
loom->rank_min = p->rank;
}
}
if (loom->rank_enabled && proc->rank < loom->rank_min)
loom->rank_min = proc->rank;
HASH_ADD_INT(loom->procs, pid, proc); HASH_ADD_INT(loom->procs, pid, proc);
loom->nprocs++; loom->nprocs++;

View File

@ -26,6 +26,7 @@ struct loom {
size_t ncpus; size_t ncpus;
size_t offset_ncpus; size_t offset_ncpus;
int rank_enabled; int rank_enabled;
int rank_min;
int64_t clock_offset; int64_t clock_offset;

View File

@ -84,11 +84,6 @@ create_proc(struct loom *loom, const char *tracedir, const char *relpath)
return NULL; return NULL;
} }
if (loom_add_proc(loom, proc) != 0) {
err("loom_add_proc failed");
return NULL;
}
/* Build metadata path */ /* Build metadata path */
char mpath[PATH_MAX]; char mpath[PATH_MAX];
@ -104,6 +99,11 @@ create_proc(struct loom *loom, const char *tracedir, const char *relpath)
return NULL; return NULL;
} }
if (loom_add_proc(loom, proc) != 0) {
err("loom_add_proc failed");
return NULL;
}
return proc; return proc;
} }
@ -212,15 +212,32 @@ create_system(struct system *sys, struct trace *trace)
} }
static int static int
cmp_loom(struct loom *a, struct loom *b) cmp_loom_id(struct loom *a, struct loom *b)
{ {
return strcmp(a->id, b->id); return strcmp(a->id, b->id);
} }
static int
cmp_loom_rank(struct loom *a, struct loom *b)
{
int id1 = a->rank_min;
int id2 = b->rank_min;
if (id1 < id2)
return -1;
if (id1 > id2)
return +1;
else
return 0;
}
static void static void
sort_lpt(struct system *sys) sort_lpt(struct system *sys)
{ {
DL_SORT(sys->looms, cmp_loom); if (sys->sort_by_rank)
DL_SORT(sys->looms, cmp_loom_rank);
else
DL_SORT(sys->looms, cmp_loom_id);
for (struct loom *l = sys->looms; l; l = l->next) for (struct loom *l = sys->looms; l; l = l->next)
loom_sort(l); loom_sort(l);
@ -459,6 +476,29 @@ init_offsets(struct system *sys, struct trace *trace)
return 0; return 0;
} }
static int
set_sort_criteria(struct system *sys)
{
for (struct loom *l = sys->looms; l; l = l->next) {
if (l->rank_enabled) {
sys->sort_by_rank = 1;
break;
}
}
if (!sys->sort_by_rank)
return 0;
for (struct loom *l = sys->looms; l; l = l->next) {
if (!l->rank_enabled) {
err("missing rank for loom %s", l->id);
return -1;
}
}
return 0;
}
int int
system_init(struct system *sys, struct emu_args *args, struct trace *trace) system_init(struct system *sys, struct emu_args *args, struct trace *trace)
{ {
@ -471,6 +511,11 @@ system_init(struct system *sys, struct emu_args *args, struct trace *trace)
return -1; return -1;
} }
if (set_sort_criteria(sys) != 0) {
err("set_sort_criteria failed");
return -1;
}
/* Ensure they are sorted so they are easier to read */ /* Ensure they are sorted so they are easier to read */
sort_lpt(sys); sort_lpt(sys);

View File

@ -29,6 +29,8 @@ struct system {
size_t ncpus; /* Including virtual cpus */ size_t ncpus; /* Including virtual cpus */
size_t nphycpus; size_t nphycpus;
int sort_by_rank;
struct loom *looms; struct loom *looms;
struct proc *procs; struct proc *procs;
struct thread *threads; struct thread *threads;

View File

@ -17,3 +17,5 @@ test_emu(version-good.c)
test_emu(version-bad.c SHOULD_FAIL REGEX "version mismatch") test_emu(version-bad.c SHOULD_FAIL REGEX "version mismatch")
test_emu(clockgate.c MP SHOULD_FAIL REGEX "detected large clock gate") test_emu(clockgate.c MP SHOULD_FAIL REGEX "detected large clock gate")
test_emu(no-cpus.c SHOULD_FAIL REGEX "loom .* has no physical CPUs") test_emu(no-cpus.c SHOULD_FAIL REGEX "loom .* has no physical CPUs")
test_emu(sort-cpus-by-loom.c MP)
test_emu(sort-cpus-by-rank.c MP)

View File

@ -0,0 +1,69 @@
/* Copyright (c) 2023 Barcelona Supercomputing Center (BSC)
* SPDX-License-Identifier: GPL-3.0-or-later */
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include "common.h"
#include "compat.h"
#include "instr.h"
#include "ovni.h"
#define N 4
/* Ensures that in the CPU timeline, the order of the CPUs is given by the loom
* alphanumeric order, when no rank information is present. */
int
main(void)
{
int rank = atoi(getenv("OVNI_RANK"));
int nranks = atoi(getenv("OVNI_NRANKS"));
int cpus[N];
for (int i = 0; i < N; i++) {
cpus[i] = rank * N + i;
}
char loom[128];
if (snprintf(loom, 128, "loom.%04d", nranks - rank) >= 128)
die("snprintf failed");
ovni_proc_init(1, loom, getpid());
ovni_thread_init(get_tid());
for (int i = 0; i < N; i++)
ovni_add_cpu(i, cpus[i]);
instr_thread_execute(-1, -1, 0);
instr_end();
if (rank == 0) {
FILE *c = fopen("expected", "w");
if (c == NULL)
die("fopen failed:");
/* The expected order should be increasing loom id but the CPUs
* should start from the end */
for (int i = 0; i < nranks; i++) {
int k = nranks - 1 - i;
for (int j = k * N; j < (k + 1) * N; j++) {
fprintf(c, " CPU %d.%d\n", i, j);
}
}
fclose(c);
FILE *f = fopen("match.sh", "w");
if (f == NULL)
die("fopen failed:");
fprintf(f, "grep ' CPU' ovni/cpu.row > found\n");
fprintf(f, "diff -y expected found\n");
fclose(f);
}
return 0;
}

View File

@ -0,0 +1,68 @@
/* Copyright (c) 2023 Barcelona Supercomputing Center (BSC)
* SPDX-License-Identifier: GPL-3.0-or-later */
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include "common.h"
#include "compat.h"
#include "instr.h"
#include "ovni.h"
#define N 4
/* Ensures that in the CPU trace, the order of the CPUs is given by the minimum
* rank of the processes of that loom, when the rank information is present. */
int
main(void)
{
int rank = atoi(getenv("OVNI_RANK"));
int nranks = atoi(getenv("OVNI_NRANKS"));
int cpus[N];
for (int i = 0; i < N; i++) {
cpus[i] = rank * N + i;
}
char loom[128];
if (snprintf(loom, 128, "loom.%04d", nranks - rank) >= 128)
die("snprintf failed");
ovni_proc_init(1, loom, getpid());
ovni_thread_init(get_tid());
ovni_proc_set_rank(rank, nranks);
for (int i = 0; i < N; i++)
ovni_add_cpu(i, cpus[i]);
instr_thread_execute(-1, -1, 0);
instr_end();
if (rank == 0) {
FILE *c = fopen("expected", "w");
if (c == NULL)
die("fopen failed:");
/* The expected order should be increasing rank and CPUs */
for (int i = 0; i < nranks; i++) {
for (int j = i * N; j < (i + 1) * N; j++) {
fprintf(c, " CPU %d.%d\n", i, j);
}
}
fclose(c);
FILE *f = fopen("match.sh", "w");
if (f == NULL)
die("fopen failed:");
fprintf(f, "grep ' CPU' ovni/cpu.row > found\n");
fprintf(f, "diff -y expected found\n");
fclose(f);
}
return 0;
}

View File

@ -67,6 +67,7 @@ test_duplicate_procs(struct loom *loom)
struct proc proc; struct proc proc;
OK(loom_init_begin(loom, testloom)); OK(loom_init_begin(loom, testloom));
OK(proc_init_begin(&proc, testproc)); OK(proc_init_begin(&proc, testproc));
proc.metadata_loaded = 1;
OK(loom_add_proc(loom, &proc)); OK(loom_add_proc(loom, &proc));
ERR(loom_add_proc(loom, &proc)); ERR(loom_add_proc(loom, &proc));