bench6/src/ompss2/readywave.c

/* Copyright (c) 2023 Barcelona Supercomputing Center (BSC)
 * SPDX-License-Identifier: GPL-3.0-or-later */

#define _DEFAULT_SOURCE

#include "bench6.h"

#include <nanos6.h>
#include <nanos6/debug.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <sys/time.h>
#include <stdatomic.h>
#include <math.h>
#include <pthread.h>

static char progname[] = "bench6.readywave";
static int ncpus = -1;
static long nwarm = 100L;
static long nruns = 200L;
static long ntasks_per_cpu = 1000L;
static double size_per_cpu_ns = 400.0;
static double cooldown_ms = 0.0;
static int sequential_sched = 0;

static atomic_int wait = 0;

pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;

#define M_WORK 10000000L

static void
busywork(long loops)
{
	for (volatile long j = 0; j < loops; j++);
}

static double
dummy_work(double ms)
{
	double start = bench6_time();
	double end = start + ms * 1e-3;
	double last;
	while ((last = bench6_time()) < end) {
		busywork(100L);
	}

	return (last - start) * 1e3;
}

static void
do_run(int run)
{
	dummy_work(cooldown_ms);

	atomic_store(&wait, 1);

	/* Cover 2 times the number of CPUs so no quickie starts before the
	 * blockers */
	for (long i = 0L; i < 2*ncpus; i++) {
		#pragma oss task label("blocker")
		{
			//fprintf(stderr, "blocker %d up\n", i);
			/* Wait until the creator finishes */
			while (atomic_load(&wait));
		}
	}

	/* Create the quick tasks */
	for (long i = 0L; i < ntasks_per_cpu * ncpus; i++) {
		#pragma oss task label("quickie")
		{
			if (sequential_sched) {
				dummy_work((ncpus - 1) * size_per_cpu_ns * 1e-6);

				pthread_mutex_lock(&mutex);
				dummy_work(size_per_cpu_ns * 1e-6);
				pthread_mutex_unlock(&mutex);
			} else {
				dummy_work(ncpus * size_per_cpu_ns * 1e-6);
			}
		}
	}

	/* Release the blockers */
	atomic_fetch_sub(&wait, 1);

	/* Start counting the time as the quickies will run now */
	double t0 = bench6_time();

	/* Wait until all tasks are ready */
	#pragma oss taskwait

	/* And measure the end time */
	double t1 = bench6_time();

	/* Warmup run */
	if (run < 0)
		return;

	printf("%d,%ld,%d,%.3f,%e,%e\n",
			run, ntasks_per_cpu,
			ncpus, size_per_cpu_ns,
			(t1 - t0) * 1e3,
			(t1 - t0) * 1e9 / ntasks_per_cpu / ncpus);
}

static void
do_warmup(void)
{
	fprintf(stderr, "running %ld warmup iterations...\n", nwarm);
	/* Warm up all the workers */
	for (long i = 0L; i < 5*ncpus; i++) {
		#pragma oss task label("warmup")
		dummy_work(20.0);
	}

	#pragma oss taskwait

	for (int i = 0; i < nwarm; i++)
		do_run(-1);

	#pragma oss taskwait
	fprintf(stderr, "warmup done\n");
}

static int
usage(void)
{
	fprintf(stderr, "%s - Create a wave of ready rasks\n", progname);
	fprintf(stderr, "\n");
	fprintf(stderr, "Usage: %s [-w NWARM] [-r NRUNS] [-t NTASKS] [-s SIZE]\n", progname);
	fprintf(stderr, "\n");
	fprintf(stderr, "Creates a large number of ready tasks to put pressure in the\n");
	fprintf(stderr, "scheduler server. First, 2*ncpus tasks block the cpus with\n");
	fprintf(stderr, "work until the creator worker finishes creating all the tasks.\n");
	fprintf(stderr, "Then, the blocker tasks are signaled to finish, and the quickie\n");
	fprintf(stderr, "tasks follow. The time is measured from the signal until they\n");
	fprintf(stderr, "all end.\n");
	fprintf(stderr, "\n");
	fprintf(stderr, "Options:\n");
	fprintf(stderr, "\n");
	fprintf(stderr, "  -w   Number of warmup repetitions. These are used to remove\n");
	fprintf(stderr, "       the effect of the jemalloc contention while growing the\n");
	fprintf(stderr, "       arenas (default %ld).\n", nwarm);
	fprintf(stderr, "\n");
	fprintf(stderr, "  -r   Number of repetitions of the test (default %ld).\n", nruns);
	fprintf(stderr, "\n");
	fprintf(stderr, "  -t   Number of tasks per CPU to be created (default %ld).\n", ntasks_per_cpu);
	fprintf(stderr, "\n");
	fprintf(stderr, "  -s   Size of the tasks in ns per CPU (default %f).\n", size_per_cpu_ns);
	fprintf(stderr, "\n");
	fprintf(stderr, "  -c   Cooldown delay in milliseconds before a new run (default %f).\n", cooldown_ms);
	fprintf(stderr, "\n");
	fprintf(stderr, "  -S   Serve the tasks sequentially (default %s).\n",
			sequential_sched ? "yes" : "no");
	fprintf(stderr, "\n");

	return -1;
}

int
main(int argc, char *argv[])
{
	int opt;

	while ((opt = getopt(argc, argv, "hr:w:t:s:c:S")) != -1) {
		switch (opt) {
		case 'r':
			nruns = atol(optarg);
			break;
		case 'w':
			nwarm = atol(optarg);
			break;
		case 't':
			ntasks_per_cpu = atol(optarg);
			break;
		case 's':
			size_per_cpu_ns = atof(optarg);
			break;
		case 'c':
			cooldown_ms = atof(optarg);
			break;
		case 'S':
			sequential_sched = 1;
			break;
		case 'h': /* Fall through */
		default: /* '?' */
			return usage();
		}
	}

	ncpus = get_ncpus();

	printf("%s,%s,%s,%s,%s,%s\n", "run", "ntasks_per_cpu", "ncpus", "size_per_cpu_ns", "time_ms", "avg_serve_time_ns");
	do_warmup();

	for (int run = 0; run < nruns; run++)
		do_run(run);

	return 0;
}
Add readywave test 2023-03-13 18:33:08 +01:00			`/* Copyright (c) 2023 Barcelona Supercomputing Center (BSC)`
			`* SPDX-License-Identifier: GPL-3.0-or-later */`

			`#define _DEFAULT_SOURCE`

			`#include "bench6.h"`

			`#include <nanos6.h>`
			`#include <nanos6/debug.h>`
			`#include <stdio.h>`
			`#include <stdlib.h>`
			`#include <time.h>`
			`#include <sys/time.h>`
Use blocker tasks in readywave Also adds some extra options to control the benchmark. 2023-03-20 13:37:25 +01:00			`#include <stdatomic.h>`
			`#include <math.h>`
			`#include <pthread.h>`
Add readywave test 2023-03-13 18:33:08 +01:00
Use blocker tasks in readywave Also adds some extra options to control the benchmark. 2023-03-20 13:37:25 +01:00			`static char progname[] = "bench6.readywave";`
Add readywave test 2023-03-13 18:33:08 +01:00			`static int ncpus = -1;`
Use blocker tasks in readywave Also adds some extra options to control the benchmark. 2023-03-20 13:37:25 +01:00			`static long nwarm = 100L;`
			`static long nruns = 200L;`
			`static long ntasks_per_cpu = 1000L;`
			`static double size_per_cpu_ns = 400.0;`
			`static double cooldown_ms = 0.0;`
			`static int sequential_sched = 0;`
Add readywave test 2023-03-13 18:33:08 +01:00
Use blocker tasks in readywave Also adds some extra options to control the benchmark. 2023-03-20 13:37:25 +01:00			`static atomic_int wait = 0;`

			`pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;`
Add readywave test 2023-03-13 18:33:08 +01:00
			`#define M_WORK 10000000L`

			`static void`
			`busywork(long loops)`
			`{`
			`for (volatile long j = 0; j < loops; j++);`
			`}`

Use blocker tasks in readywave Also adds some extra options to control the benchmark. 2023-03-20 13:37:25 +01:00			`static double`
Add readywave test 2023-03-13 18:33:08 +01:00			`dummy_work(double ms)`
			`{`
Fix ompss2 microbenchmarks 2023-05-22 18:57:18 +02:00			`double start = bench6_time();`
Use blocker tasks in readywave Also adds some extra options to control the benchmark. 2023-03-20 13:37:25 +01:00			`double end = start + ms * 1e-3;`
			`double last;`
Fix ompss2 microbenchmarks 2023-05-22 18:57:18 +02:00			`while ((last = bench6_time()) < end) {`
Use blocker tasks in readywave Also adds some extra options to control the benchmark. 2023-03-20 13:37:25 +01:00			`busywork(100L);`
			`}`

			`return (last - start) * 1e3;`
Add readywave test 2023-03-13 18:33:08 +01:00			`}`

			`static void`
			`do_run(int run)`
			`{`
Use blocker tasks in readywave Also adds some extra options to control the benchmark. 2023-03-20 13:37:25 +01:00			`dummy_work(cooldown_ms);`
Add readywave test 2023-03-13 18:33:08 +01:00
Use blocker tasks in readywave Also adds some extra options to control the benchmark. 2023-03-20 13:37:25 +01:00			`atomic_store(&wait, 1);`
Add readywave test 2023-03-13 18:33:08 +01:00
Use blocker tasks in readywave Also adds some extra options to control the benchmark. 2023-03-20 13:37:25 +01:00			`/* Cover 2 times the number of CPUs so no quickie starts before the`
			`* blockers */`
			`for (long i = 0L; i < 2*ncpus; i++) {`
			`#pragma oss task label("blocker")`
			`{`
			`//fprintf(stderr, "blocker %d up\n", i);`
			`/* Wait until the creator finishes */`
			`while (atomic_load(&wait));`
			`}`
Add readywave test 2023-03-13 18:33:08 +01:00			`}`

Use blocker tasks in readywave Also adds some extra options to control the benchmark. 2023-03-20 13:37:25 +01:00			`/* Create the quick tasks */`
Add readywave test 2023-03-13 18:33:08 +01:00			`for (long i = 0L; i < ntasks_per_cpu * ncpus; i++) {`
Use blocker tasks in readywave Also adds some extra options to control the benchmark. 2023-03-20 13:37:25 +01:00			`#pragma oss task label("quickie")`
			`{`
			`if (sequential_sched) {`
			`dummy_work((ncpus - 1) * size_per_cpu_ns * 1e-6);`

			`pthread_mutex_lock(&mutex);`
			`dummy_work(size_per_cpu_ns * 1e-6);`
			`pthread_mutex_unlock(&mutex);`
			`} else {`
			`dummy_work(ncpus * size_per_cpu_ns * 1e-6);`
			`}`
			`}`
Add readywave test 2023-03-13 18:33:08 +01:00			`}`

Use blocker tasks in readywave Also adds some extra options to control the benchmark. 2023-03-20 13:37:25 +01:00			`/* Release the blockers */`
			`atomic_fetch_sub(&wait, 1);`

			`/* Start counting the time as the quickies will run now */`
Fix ompss2 microbenchmarks 2023-05-22 18:57:18 +02:00			`double t0 = bench6_time();`
Use blocker tasks in readywave Also adds some extra options to control the benchmark. 2023-03-20 13:37:25 +01:00
			`/* Wait until all tasks are ready */`
Add readywave test 2023-03-13 18:33:08 +01:00			`#pragma oss taskwait`
Use blocker tasks in readywave Also adds some extra options to control the benchmark. 2023-03-20 13:37:25 +01:00
			`/* And measure the end time */`
Fix ompss2 microbenchmarks 2023-05-22 18:57:18 +02:00			`double t1 = bench6_time();`
Use blocker tasks in readywave Also adds some extra options to control the benchmark. 2023-03-20 13:37:25 +01:00
			`/* Warmup run */`
			`if (run < 0)`
			`return;`

			`printf("%d,%ld,%d,%.3f,%e,%e\n",`
			`run, ntasks_per_cpu,`
			`ncpus, size_per_cpu_ns,`
			`(t1 - t0) * 1e3,`
			`(t1 - t0) * 1e9 / ntasks_per_cpu / ncpus);`
			`}`

			`static void`
			`do_warmup(void)`
			`{`
			`fprintf(stderr, "running %ld warmup iterations...\n", nwarm);`
			`/* Warm up all the workers */`
			`for (long i = 0L; i < 5*ncpus; i++) {`
			`#pragma oss task label("warmup")`
			`dummy_work(20.0);`
			`}`

			`#pragma oss taskwait`

			`for (int i = 0; i < nwarm; i++)`
			`do_run(-1);`

			`#pragma oss taskwait`
			`fprintf(stderr, "warmup done\n");`
Add readywave test 2023-03-13 18:33:08 +01:00			`}`

			`static int`
Add heat mini-app and use cmake 2023-05-18 19:42:16 +02:00			`usage(void)`
Add readywave test 2023-03-13 18:33:08 +01:00			`{`
Use blocker tasks in readywave Also adds some extra options to control the benchmark. 2023-03-20 13:37:25 +01:00			`fprintf(stderr, "%s - Create a wave of ready rasks\n", progname);`
			`fprintf(stderr, "\n");`
			`fprintf(stderr, "Usage: %s [-w NWARM] [-r NRUNS] [-t NTASKS] [-s SIZE]\n", progname);`
			`fprintf(stderr, "\n");`
			`fprintf(stderr, "Creates a large number of ready tasks to put pressure in the\n");`
			`fprintf(stderr, "scheduler server. First, 2*ncpus tasks block the cpus with\n");`
			`fprintf(stderr, "work until the creator worker finishes creating all the tasks.\n");`
			`fprintf(stderr, "Then, the blocker tasks are signaled to finish, and the quickie\n");`
			`fprintf(stderr, "tasks follow. The time is measured from the signal until they\n");`
			`fprintf(stderr, "all end.\n");`
			`fprintf(stderr, "\n");`
			`fprintf(stderr, "Options:\n");`
			`fprintf(stderr, "\n");`
			`fprintf(stderr, " -w Number of warmup repetitions. These are used to remove\n");`
			`fprintf(stderr, " the effect of the jemalloc contention while growing the\n");`
			`fprintf(stderr, " arenas (default %ld).\n", nwarm);`
Add readywave test 2023-03-13 18:33:08 +01:00			`fprintf(stderr, "\n");`
Use blocker tasks in readywave Also adds some extra options to control the benchmark. 2023-03-20 13:37:25 +01:00			`fprintf(stderr, " -r Number of repetitions of the test (default %ld).\n", nruns);`
			`fprintf(stderr, "\n");`
			`fprintf(stderr, " -t Number of tasks per CPU to be created (default %ld).\n", ntasks_per_cpu);`
			`fprintf(stderr, "\n");`
			`fprintf(stderr, " -s Size of the tasks in ns per CPU (default %f).\n", size_per_cpu_ns);`
			`fprintf(stderr, "\n");`
			`fprintf(stderr, " -c Cooldown delay in milliseconds before a new run (default %f).\n", cooldown_ms);`
			`fprintf(stderr, "\n");`
			`fprintf(stderr, " -S Serve the tasks sequentially (default %s).\n",`
			`sequential_sched ? "yes" : "no");`
Add readywave test 2023-03-13 18:33:08 +01:00			`fprintf(stderr, "\n");`

			`return -1;`
			`}`

			`int`
			`main(int argc, char *argv[])`
			`{`
			`int opt;`

Use blocker tasks in readywave Also adds some extra options to control the benchmark. 2023-03-20 13:37:25 +01:00			`while ((opt = getopt(argc, argv, "hr:w:t:s:c:S")) != -1) {`
Add readywave test 2023-03-13 18:33:08 +01:00			`switch (opt) {`
			`case 'r':`
			`nruns = atol(optarg);`
			`break;`
Use blocker tasks in readywave Also adds some extra options to control the benchmark. 2023-03-20 13:37:25 +01:00			`case 'w':`
			`nwarm = atol(optarg);`
			`break;`
Add readywave test 2023-03-13 18:33:08 +01:00			`case 't':`
			`ntasks_per_cpu = atol(optarg);`
			`break;`
Use blocker tasks in readywave Also adds some extra options to control the benchmark. 2023-03-20 13:37:25 +01:00			`case 's':`
			`size_per_cpu_ns = atof(optarg);`
			`break;`
			`case 'c':`
			`cooldown_ms = atof(optarg);`
			`break;`
			`case 'S':`
			`sequential_sched = 1;`
Add readywave test 2023-03-13 18:33:08 +01:00			`break;`
			`case 'h': /* Fall through */`
			`default: /* '?' */`
Add heat mini-app and use cmake 2023-05-18 19:42:16 +02:00			`return usage();`
Add readywave test 2023-03-13 18:33:08 +01:00			`}`
			`}`

			`ncpus = get_ncpus();`

Use blocker tasks in readywave Also adds some extra options to control the benchmark. 2023-03-20 13:37:25 +01:00			`printf("%s,%s,%s,%s,%s,%s\n", "run", "ntasks_per_cpu", "ncpus", "size_per_cpu_ns", "time_ms", "avg_serve_time_ns");`
			`do_warmup();`

Add readywave test 2023-03-13 18:33:08 +01:00			`for (int run = 0; run < nruns; run++)`
			`do_run(run);`

			`return 0;`
			`}`