Add README.md

Add OmpSs-2 simple example
Move personal shells to a custom directory
2026-02-04 12:00:19 +01:00 · 2026-02-04 11:54:30 +01:00 · 2026-02-04 10:25:22 +01:00 · 2026-02-03 18:24:02 +01:00 · 2026-01-29 14:31:21 +01:00 · 2025-10-30 10:44:27 +01:00
23 changed files with 1161 additions and 49 deletions
--- a/README.md
+++ b/README.md
@@ -0,0 +1,15 @@
 # Nix development shells
 This repository collects several examples of development environments to be used
 with `nix develop`.
 The definition of the environment is located in the `flake.nix` file and the
 precise version of the commit is stored in the `flake.lock` file. These two
 files provide all the required information to reproduce the environment by any
 user.
 Make sure they are tracked by git so that you can see what changes you do
 in your environment over time.
 To enter an environment, go to the directory with the `flake.nix` file and run
 `nix develop`. 
--- a/cuda/.gitignore
+++ b/cuda/.gitignore
@@ -0,0 +1 @@
 cudainfo
--- a/cuda/Makefile
+++ b/cuda/Makefile
@@ -0,0 +1,12 @@
 HOSTCXX  ?= g++
 NVCC     := nvcc -ccbin $(HOSTCXX)
 CXXFLAGS := -m64 -Wno-deprecated-gpu-targets
 # Target rules
 all: cudainfo
 cudainfo: cudainfo.cpp
 	$(NVCC) $(CXXFLAGS) -o $@ $<
 clean:
 	rm -f cudainfo cudainfo.o
--- a/cuda/README.md
+++ b/cuda/README.md
@@ -0,0 +1,4 @@
 # CUDA example
 Run `nix develop` to load the environment and `make` to build the example CUDA
 program. Run it with `./cudainfo` from the fox machine to test it.
--- a/cuda/cudainfo.cpp
+++ b/cuda/cudainfo.cpp
@@ -0,0 +1,600 @@
 /*
 * Copyright 1993-2015 NVIDIA Corporation.  All rights reserved.
 *
 * Please refer to the NVIDIA end user license agreement (EULA) associated
 * with this source code for terms and conditions that govern your use of
 * this software. Any use, reproduction, disclosure, or distribution of
 * this software and related documentation outside the terms of the EULA
 * is strictly prohibited.
 *
 */
 /* This sample queries the properties of the CUDA devices present in the system via CUDA Runtime API. */
 // Shared Utilities (QA Testing)
 // std::system includes
 #include <memory>
 #include <iostream>
 #include <cuda_runtime.h>
 // This will output the proper CUDA error strings in the event that a CUDA host call returns an error
 #define checkCudaErrors(val)           check ( (val), #val, __FILE__, __LINE__ )
 // CUDA Runtime error messages
 #ifdef __DRIVER_TYPES_H__
 static const char *_cudaGetErrorEnum(cudaError_t error)
 {
    switch (error)
    {
        case cudaSuccess:
            return "cudaSuccess";
        case cudaErrorMissingConfiguration:
            return "cudaErrorMissingConfiguration";
        case cudaErrorMemoryAllocation:
            return "cudaErrorMemoryAllocation";
        case cudaErrorInitializationError:
            return "cudaErrorInitializationError";
        case cudaErrorLaunchFailure:
            return "cudaErrorLaunchFailure";
        case cudaErrorPriorLaunchFailure:
            return "cudaErrorPriorLaunchFailure";
        case cudaErrorLaunchTimeout:
            return "cudaErrorLaunchTimeout";
        case cudaErrorLaunchOutOfResources:
            return "cudaErrorLaunchOutOfResources";
        case cudaErrorInvalidDeviceFunction:
            return "cudaErrorInvalidDeviceFunction";
        case cudaErrorInvalidConfiguration:
            return "cudaErrorInvalidConfiguration";
        case cudaErrorInvalidDevice:
            return "cudaErrorInvalidDevice";
        case cudaErrorInvalidValue:
            return "cudaErrorInvalidValue";
        case cudaErrorInvalidPitchValue:
            return "cudaErrorInvalidPitchValue";
        case cudaErrorInvalidSymbol:
            return "cudaErrorInvalidSymbol";
        case cudaErrorMapBufferObjectFailed:
            return "cudaErrorMapBufferObjectFailed";
        case cudaErrorUnmapBufferObjectFailed:
            return "cudaErrorUnmapBufferObjectFailed";
        case cudaErrorInvalidHostPointer:
            return "cudaErrorInvalidHostPointer";
        case cudaErrorInvalidDevicePointer:
            return "cudaErrorInvalidDevicePointer";
        case cudaErrorInvalidTexture:
            return "cudaErrorInvalidTexture";
        case cudaErrorInvalidTextureBinding:
            return "cudaErrorInvalidTextureBinding";
        case cudaErrorInvalidChannelDescriptor:
            return "cudaErrorInvalidChannelDescriptor";
        case cudaErrorInvalidMemcpyDirection:
            return "cudaErrorInvalidMemcpyDirection";
        case cudaErrorAddressOfConstant:
            return "cudaErrorAddressOfConstant";
        case cudaErrorTextureFetchFailed:
            return "cudaErrorTextureFetchFailed";
        case cudaErrorTextureNotBound:
            return "cudaErrorTextureNotBound";
        case cudaErrorSynchronizationError:
            return "cudaErrorSynchronizationError";
        case cudaErrorInvalidFilterSetting:
            return "cudaErrorInvalidFilterSetting";
        case cudaErrorInvalidNormSetting:
            return "cudaErrorInvalidNormSetting";
        case cudaErrorMixedDeviceExecution:
            return "cudaErrorMixedDeviceExecution";
        case cudaErrorCudartUnloading:
            return "cudaErrorCudartUnloading";
        case cudaErrorUnknown:
            return "cudaErrorUnknown";
        case cudaErrorNotYetImplemented:
            return "cudaErrorNotYetImplemented";
        case cudaErrorMemoryValueTooLarge:
            return "cudaErrorMemoryValueTooLarge";
        case cudaErrorInvalidResourceHandle:
            return "cudaErrorInvalidResourceHandle";
        case cudaErrorNotReady:
            return "cudaErrorNotReady";
        case cudaErrorInsufficientDriver:
            return "cudaErrorInsufficientDriver";
        case cudaErrorSetOnActiveProcess:
            return "cudaErrorSetOnActiveProcess";
        case cudaErrorInvalidSurface:
            return "cudaErrorInvalidSurface";
        case cudaErrorNoDevice:
            return "cudaErrorNoDevice";
        case cudaErrorECCUncorrectable:
            return "cudaErrorECCUncorrectable";
        case cudaErrorSharedObjectSymbolNotFound:
            return "cudaErrorSharedObjectSymbolNotFound";
        case cudaErrorSharedObjectInitFailed:
            return "cudaErrorSharedObjectInitFailed";
        case cudaErrorUnsupportedLimit:
            return "cudaErrorUnsupportedLimit";
        case cudaErrorDuplicateVariableName:
            return "cudaErrorDuplicateVariableName";
        case cudaErrorDuplicateTextureName:
            return "cudaErrorDuplicateTextureName";
        case cudaErrorDuplicateSurfaceName:
            return "cudaErrorDuplicateSurfaceName";
        case cudaErrorDevicesUnavailable:
            return "cudaErrorDevicesUnavailable";
        case cudaErrorInvalidKernelImage:
            return "cudaErrorInvalidKernelImage";
        case cudaErrorNoKernelImageForDevice:
            return "cudaErrorNoKernelImageForDevice";
        case cudaErrorIncompatibleDriverContext:
            return "cudaErrorIncompatibleDriverContext";
        case cudaErrorPeerAccessAlreadyEnabled:
            return "cudaErrorPeerAccessAlreadyEnabled";
        case cudaErrorPeerAccessNotEnabled:
            return "cudaErrorPeerAccessNotEnabled";
        case cudaErrorDeviceAlreadyInUse:
            return "cudaErrorDeviceAlreadyInUse";
        case cudaErrorProfilerDisabled:
            return "cudaErrorProfilerDisabled";
        case cudaErrorProfilerNotInitialized:
            return "cudaErrorProfilerNotInitialized";
        case cudaErrorProfilerAlreadyStarted:
            return "cudaErrorProfilerAlreadyStarted";
        case cudaErrorProfilerAlreadyStopped:
            return "cudaErrorProfilerAlreadyStopped";
        /* Since CUDA 4.0*/
        case cudaErrorAssert:
            return "cudaErrorAssert";
        case cudaErrorTooManyPeers:
            return "cudaErrorTooManyPeers";
        case cudaErrorHostMemoryAlreadyRegistered:
            return "cudaErrorHostMemoryAlreadyRegistered";
        case cudaErrorHostMemoryNotRegistered:
            return "cudaErrorHostMemoryNotRegistered";
        /* Since CUDA 5.0 */
        case cudaErrorOperatingSystem:
            return "cudaErrorOperatingSystem";
        case cudaErrorPeerAccessUnsupported:
            return "cudaErrorPeerAccessUnsupported";
        case cudaErrorLaunchMaxDepthExceeded:
            return "cudaErrorLaunchMaxDepthExceeded";
        case cudaErrorLaunchFileScopedTex:
            return "cudaErrorLaunchFileScopedTex";
        case cudaErrorLaunchFileScopedSurf:
            return "cudaErrorLaunchFileScopedSurf";
        case cudaErrorSyncDepthExceeded:
            return "cudaErrorSyncDepthExceeded";
        case cudaErrorLaunchPendingCountExceeded:
            return "cudaErrorLaunchPendingCountExceeded";
        case cudaErrorNotPermitted:
            return "cudaErrorNotPermitted";
        case cudaErrorNotSupported:
            return "cudaErrorNotSupported";
        /* Since CUDA 6.0 */
        case cudaErrorHardwareStackError:
            return "cudaErrorHardwareStackError";
        case cudaErrorIllegalInstruction:
            return "cudaErrorIllegalInstruction";
        case cudaErrorMisalignedAddress:
            return "cudaErrorMisalignedAddress";
        case cudaErrorInvalidAddressSpace:
            return "cudaErrorInvalidAddressSpace";
        case cudaErrorInvalidPc:
            return "cudaErrorInvalidPc";
        case cudaErrorIllegalAddress:
            return "cudaErrorIllegalAddress";
        /* Since CUDA 6.5*/
        case cudaErrorInvalidPtx:
            return "cudaErrorInvalidPtx";
        case cudaErrorInvalidGraphicsContext:
            return "cudaErrorInvalidGraphicsContext";
        case cudaErrorStartupFailure:
            return "cudaErrorStartupFailure";
        case cudaErrorApiFailureBase:
            return "cudaErrorApiFailureBase";
    }
    return "<unknown>";
 }
 #endif
 template< typename T >
 void check(T result, char const *const func, const char *const file, int const line)
 {
    if (result)
    {
        fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \"%s\" \n",
                file, line, static_cast<unsigned int>(result), _cudaGetErrorEnum(result), func);
        cudaDeviceReset();
        // Make sure we call CUDA Device Reset before exiting
        exit(EXIT_FAILURE);
    }
 }
 int *pArgc = NULL;
 char **pArgv = NULL;
 #if CUDART_VERSION < 5000
 // CUDA-C includes
 #include <cuda.h>
 // This function wraps the CUDA Driver API into a template function
 template <class T>
 inline void getCudaAttribute(T *attribute, CUdevice_attribute device_attribute, int device)
 {
    CUresult error =    cuDeviceGetAttribute(attribute, device_attribute, device);
    if (CUDA_SUCCESS != error) {
        fprintf(stderr, "cuSafeCallNoSync() Driver API error = %04d from file <%s>, line %i.\n",
                error, __FILE__, __LINE__);
        // cudaDeviceReset causes the driver to clean up all state. While
        // not mandatory in normal operation, it is good practice.  It is also
        // needed to ensure correct operation when the application is being
        // profiled. Calling cudaDeviceReset causes all profile data to be
        // flushed before the application exits
        cudaDeviceReset();
        exit(EXIT_FAILURE);
    }
 }
 #endif /* CUDART_VERSION < 5000 */
 // Beginning of GPU Architecture definitions
 inline int ConvertSMVer2Cores(int major, int minor)
 {
    // Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
    typedef struct {
        int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version
        int Cores;
    } sSMtoCores;
    sSMtoCores nGpuArchCoresPerSM[] = {
        { 0x20, 32 }, // Fermi Generation (SM 2.0) GF100 class
        { 0x21, 48 }, // Fermi Generation (SM 2.1) GF10x class
        { 0x30, 192}, // Kepler Generation (SM 3.0) GK10x class
        { 0x32, 192}, // Kepler Generation (SM 3.2) GK10x class
        { 0x35, 192}, // Kepler Generation (SM 3.5) GK11x class
        { 0x37, 192}, // Kepler Generation (SM 3.7) GK21x class
        { 0x50, 128}, // Maxwell Generation (SM 5.0) GM10x class
        { 0x52, 128}, // Maxwell Generation (SM 5.2) GM20x class
        {   -1, -1 }
    };
    int index = 0;
    while (nGpuArchCoresPerSM[index].SM != -1) {
        if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor)) {
            return nGpuArchCoresPerSM[index].Cores;
        }
        index++;
    }
    // If we don't find the values, we default use the previous one to run properly
    printf("MapSMtoCores for SM %d.%d is undefined.  Default to use %d Cores/SM\n", major, minor, nGpuArchCoresPerSM[index-1].Cores);
    return nGpuArchCoresPerSM[index-1].Cores;
 }
 ////////////////////////////////////////////////////////////////////////////////
 // Program main
 ////////////////////////////////////////////////////////////////////////////////
 int
 main(int argc, char **argv)
 {
    pArgc = &argc;
    pArgv = argv;
    printf("%s Starting...\n\n", argv[0]);
    printf(" CUDA Device Query (Runtime API) version (CUDART static linking)\n\n");
    int deviceCount = 0;
    cudaError_t error_id = cudaGetDeviceCount(&deviceCount);
    if (error_id != cudaSuccess) {
        printf("cudaGetDeviceCount failed: %s (%d)\n",
 			cudaGetErrorString(error_id), (int) error_id);
        printf("Result = FAIL\n");
        exit(EXIT_FAILURE);
    }
    // This function call returns 0 if there are no CUDA capable devices.
    if (deviceCount == 0)
        printf("There are no available device(s) that support CUDA\n");
    else
        printf("Detected %d CUDA Capable device(s)\n", deviceCount);
    int dev, driverVersion = 0, runtimeVersion = 0;
    for (dev = 0; dev < deviceCount; ++dev) {
        cudaSetDevice(dev);
        cudaDeviceProp deviceProp;
        cudaGetDeviceProperties(&deviceProp, dev);
        printf("\nDevice %d: \"%s\"\n", dev, deviceProp.name);
        // Console log
        cudaDriverGetVersion(&driverVersion);
        cudaRuntimeGetVersion(&runtimeVersion);
        printf("  CUDA Driver Version / Runtime Version          %d.%d / %d.%d\n", driverVersion/1000, (driverVersion%100)/10, runtimeVersion/1000, (runtimeVersion%100)/10);
        printf("  CUDA Capability Major/Minor version number:    %d.%d\n", deviceProp.major, deviceProp.minor);
        printf("  Total amount of global memory:                 %.0f MBytes (%llu bytes)\n",
                (float)deviceProp.totalGlobalMem/1048576.0f, (unsigned long long) deviceProp.totalGlobalMem);
        printf("  (%2d) Multiprocessors, (%3d) CUDA Cores/MP:     %d CUDA Cores\n",
               deviceProp.multiProcessorCount,
               ConvertSMVer2Cores(deviceProp.major, deviceProp.minor),
               ConvertSMVer2Cores(deviceProp.major, deviceProp.minor) * deviceProp.multiProcessorCount);
        printf("  GPU Max Clock rate:                            %.0f MHz (%0.2f GHz)\n", deviceProp.clockRate * 1e-3f, deviceProp.clockRate * 1e-6f);
 #if CUDART_VERSION >= 5000
        // This is supported in CUDA 5.0 (runtime API device properties)
        printf("  Memory Clock rate:                             %.0f Mhz\n", deviceProp.memoryClockRate * 1e-3f);
        printf("  Memory Bus Width:                              %d-bit\n",   deviceProp.memoryBusWidth);
        if (deviceProp.l2CacheSize) {
            printf("  L2 Cache Size:                                 %d bytes\n", deviceProp.l2CacheSize);
        }
 #else
        // This only available in CUDA 4.0-4.2 (but these were only exposed in the CUDA Driver API)
        int memoryClock;
        getCudaAttribute<int>(&memoryClock, CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, dev);
        printf("  Memory Clock rate:                             %.0f Mhz\n", memoryClock * 1e-3f);
        int memBusWidth;
        getCudaAttribute<int>(&memBusWidth, CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH, dev);
        printf("  Memory Bus Width:                              %d-bit\n", memBusWidth);
        int L2CacheSize;
        getCudaAttribute<int>(&L2CacheSize, CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE, dev);
        if (L2CacheSize) {
            printf("  L2 Cache Size:                                 %d bytes\n", L2CacheSize);
        }
 #endif
        printf("  Maximum Texture Dimension Size (x,y,z)         1D=(%d), 2D=(%d, %d), 3D=(%d, %d, %d)\n",
               deviceProp.maxTexture1D   , deviceProp.maxTexture2D[0], deviceProp.maxTexture2D[1],
               deviceProp.maxTexture3D[0], deviceProp.maxTexture3D[1], deviceProp.maxTexture3D[2]);
        printf("  Maximum Layered 1D Texture Size, (num) layers  1D=(%d), %d layers\n",
               deviceProp.maxTexture1DLayered[0], deviceProp.maxTexture1DLayered[1]);
        printf("  Maximum Layered 2D Texture Size, (num) layers  2D=(%d, %d), %d layers\n",
               deviceProp.maxTexture2DLayered[0], deviceProp.maxTexture2DLayered[1], deviceProp.maxTexture2DLayered[2]);
        printf("  Total amount of constant memory:               %lu bytes\n", deviceProp.totalConstMem);
        printf("  Total amount of shared memory per block:       %lu bytes\n", deviceProp.sharedMemPerBlock);
        printf("  Total number of registers available per block: %d\n", deviceProp.regsPerBlock);
        printf("  Warp size:                                     %d\n", deviceProp.warpSize);
        printf("  Maximum number of threads per multiprocessor:  %d\n", deviceProp.maxThreadsPerMultiProcessor);
        printf("  Maximum number of threads per block:           %d\n", deviceProp.maxThreadsPerBlock);
        printf("  Max dimension size of a thread block (x,y,z): (%d, %d, %d)\n",
               deviceProp.maxThreadsDim[0],
               deviceProp.maxThreadsDim[1],
               deviceProp.maxThreadsDim[2]);
        printf("  Max dimension size of a grid size    (x,y,z): (%d, %d, %d)\n",
               deviceProp.maxGridSize[0],
               deviceProp.maxGridSize[1],
               deviceProp.maxGridSize[2]);
        printf("  Maximum memory pitch:                          %lu bytes\n", deviceProp.memPitch);
        printf("  Texture alignment:                             %lu bytes\n", deviceProp.textureAlignment);
        printf("  Concurrent copy and kernel execution:          %s with %d copy engine(s)\n", (deviceProp.deviceOverlap ? "Yes" : "No"), deviceProp.asyncEngineCount);
        printf("  Run time limit on kernels:                     %s\n", deviceProp.kernelExecTimeoutEnabled ? "Yes" : "No");
        printf("  Integrated GPU sharing Host Memory:            %s\n", deviceProp.integrated ? "Yes" : "No");
        printf("  Support host page-locked memory mapping:       %s\n", deviceProp.canMapHostMemory ? "Yes" : "No");
        printf("  Alignment requirement for Surfaces:            %s\n", deviceProp.surfaceAlignment ? "Yes" : "No");
        printf("  Device has ECC support:                        %s\n", deviceProp.ECCEnabled ? "Enabled" : "Disabled");
 #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
        printf("  CUDA Device Driver Mode (TCC or WDDM):         %s\n", deviceProp.tccDriver ? "TCC (Tesla Compute Cluster Driver)" : "WDDM (Windows Display Driver Model)");
 #endif
        printf("  Device supports Unified Addressing (UVA):      %s\n", deviceProp.unifiedAddressing ? "Yes" : "No");
        printf("  Device PCI Domain ID / Bus ID / location ID:   %d / %d / %d\n", deviceProp.pciDomainID, deviceProp.pciBusID, deviceProp.pciDeviceID);
        const char *sComputeMode[] = {
            "Default (multiple host threads can use ::cudaSetDevice() with device simultaneously)",
            "Exclusive (only one host thread in one process is able to use ::cudaSetDevice() with this device)",
            "Prohibited (no host thread can use ::cudaSetDevice() with this device)",
            "Exclusive Process (many threads in one process is able to use ::cudaSetDevice() with this device)",
            "Unknown",
            NULL
        };
        printf("  Compute Mode:\n");
        printf("     < %s >\n", sComputeMode[deviceProp.computeMode]);
    }
    // If there are 2 or more GPUs, query to determine whether RDMA is supported
    if (deviceCount >= 2)
    {
        cudaDeviceProp prop[64];
        int gpuid[64]; // we want to find the first two GPU's that can support P2P
        int gpu_p2p_count = 0;
        for (int i=0; i < deviceCount; i++)
        {
            checkCudaErrors(cudaGetDeviceProperties(&prop[i], i));
            // Only boards based on Fermi or later can support P2P
            if ((prop[i].major >= 2)
 #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
                // on Windows (64-bit), the Tesla Compute Cluster driver for windows must be enabled to supprot this
                && prop[i].tccDriver
 #endif
               )
            {
                // This is an array of P2P capable GPUs
                gpuid[gpu_p2p_count++] = i;
            }
        }
        // Show all the combinations of support P2P GPUs
        int can_access_peer_0_1, can_access_peer_1_0;
        if (gpu_p2p_count >= 2)
        {
            for (int i = 0; i < gpu_p2p_count-1; i++)
            {
                for (int j = 1; j < gpu_p2p_count; j++)
                {
                    checkCudaErrors(cudaDeviceCanAccessPeer(&can_access_peer_0_1, gpuid[i], gpuid[j]));
                    printf("> Peer access from %s (GPU%d) -> %s (GPU%d) : %s\n", prop[gpuid[i]].name, gpuid[i],
                           prop[gpuid[j]].name, gpuid[j] ,
                           can_access_peer_0_1 ? "Yes" : "No");
                }
            }
            for (int j = 1; j < gpu_p2p_count; j++)
            {
                for (int i = 0; i < gpu_p2p_count-1; i++)
                {
                    checkCudaErrors(cudaDeviceCanAccessPeer(&can_access_peer_1_0, gpuid[j], gpuid[i]));
                    printf("> Peer access from %s (GPU%d) -> %s (GPU%d) : %s\n", prop[gpuid[j]].name, gpuid[j],
                           prop[gpuid[i]].name, gpuid[i] ,
                           can_access_peer_1_0 ? "Yes" : "No");
                }
            }
        }
    }
    // csv masterlog info
    // *****************************
    // exe and CUDA driver name
    printf("\n");
    std::string sProfileString = "deviceQuery, CUDA Driver = CUDART";
    char cTemp[128];
    // driver version
    sProfileString += ", CUDA Driver Version = ";
 #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
    sprintf_s(cTemp, 10, "%d.%d", driverVersion/1000, (driverVersion%100)/10);
 #else
    sprintf(cTemp, "%d.%d", driverVersion/1000, (driverVersion%100)/10);
 #endif
    sProfileString +=  cTemp;
    // Runtime version
    sProfileString += ", CUDA Runtime Version = ";
 #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
    sprintf_s(cTemp, 10, "%d.%d", runtimeVersion/1000, (runtimeVersion%100)/10);
 #else
    sprintf(cTemp, "%d.%d", runtimeVersion/1000, (runtimeVersion%100)/10);
 #endif
    sProfileString +=  cTemp;
    // Device count
    sProfileString += ", NumDevs = ";
 #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
    sprintf_s(cTemp, 10, "%d", deviceCount);
 #else
    sprintf(cTemp, "%d", deviceCount);
 #endif
    sProfileString += cTemp;
    // Print Out all device Names
    for (dev = 0; dev < deviceCount; ++dev)
    {
 #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
        sprintf_s(cTemp, 13, ", Device%d = ", dev);
 #else
        sprintf(cTemp, ", Device%d = ", dev);
 #endif
        cudaDeviceProp deviceProp;
        cudaGetDeviceProperties(&deviceProp, dev);
        sProfileString += cTemp;
        sProfileString += deviceProp.name;
    }
    sProfileString += "\n";
    printf("%s", sProfileString.c_str());
    printf("Result = PASS\n");
    // finish
    // cudaDeviceReset causes the driver to clean up all state. While
    // not mandatory in normal operation, it is good practice.  It is also
    // needed to ensure correct operation when the application is being
    // profiled. Calling cudaDeviceReset causes all profile data to be
    // flushed before the application exits
    cudaDeviceReset();
    return 0;
 }
--- a/cuda/flake.lock
+++ b/cuda/flake.lock
@@ -0,0 +1,45 @@
 {
  "nodes": {
    "jungle": {
      "inputs": {
        "nixpkgs": "nixpkgs"
      },
      "locked": {
        "lastModified": 1770128250,
        "narHash": "sha256-Kx3EwImhYCp4bLPNWGz4oL4IYVjkCLXwcVmXTY40MBc=",
        "ref": "refs/heads/master",
        "rev": "7a6e4232de0e181de97e099e600ffc3a964260e0",
        "revCount": 1536,
        "type": "git",
        "url": "https://jungle.bsc.es/git/rarias/jungle"
      },
      "original": {
        "type": "git",
        "url": "https://jungle.bsc.es/git/rarias/jungle"
      }
    },
    "nixpkgs": {
      "locked": {
        "lastModified": 1767634882,
        "narHash": "sha256-2GffSfQxe3sedHzK+sTKlYo/NTIAGzbFCIsNMUPAAnk=",
        "owner": "NixOS",
        "repo": "nixpkgs",
        "rev": "3c9db02515ef1d9b6b709fc60ba9a540957f661c",
        "type": "github"
      },
      "original": {
        "owner": "NixOS",
        "ref": "nixos-25.11",
        "repo": "nixpkgs",
        "type": "github"
      }
    },
    "root": {
      "inputs": {
        "jungle": "jungle"
      }
    }
  },
  "root": "root",
  "version": 7
 }
--- a/cuda/flake.nix
+++ b/cuda/flake.nix
@@ -0,0 +1,43 @@
 {
  inputs.jungle.url = "git+https://jungle.bsc.es/git/rarias/jungle";
  outputs = { self, jungle }:
  let
    nixpkgs = jungle.inputs.nixpkgs;
    customOverlay = (final: prev: {
      # Example overlay, for now empty
    });
    pkgs = import nixpkgs {
      system = "x86_64-linux";
      overlays = [
        # Apply jungle overlay to get our BSC custom packages
        jungle.outputs.bscOverlay
        # And on top apply our local changes to customize for cluster
        customOverlay
      ];
      # Needed for CUDA
      config.allowUnfree = true;
    };
  in {
    devShells.x86_64-linux.default = pkgs.mkShell {
      pname = "cuda-devshell";
      # Include these packages in the shell
      packages = with pkgs; [
        # Cuda packages (more at https://search.nixos.org/packages)
        cudatoolkit # Required for nvcc
        (lib.getOutput "static" cudaPackages.cuda_cudart) # Required for -lcudart_static
        cudaPackages.libcusparse
        autoAddDriverRunpath
        # ... add more packages from https://search.nixos.org/packages
      ];
      # The dependencies needed to build these packages will be also included
      inputsFrom = with pkgs; [
        # Empty for now 
      ];
      shellHook = ''
        export CUDA_PATH=${pkgs.cudatoolkit}
        export LD_LIBRARY_PATH=/var/run/opengl-driver/lib
        export SMS=50
      '';
    };
  };
 }
--- a/custom/arnau/posv/flake.lock
+++ b/custom/arnau/posv/flake.lock
@@ -0,0 +1,45 @@
 {
  "nodes": {
    "jungle": {
      "inputs": {
        "nixpkgs": "nixpkgs"
      },
      "locked": {
        "lastModified": 1760427467,
        "narHash": "sha256-DemQ+XT3BWXh8fr6UDfGNUB4ba0tGJXyep5/lg+gBD4=",
        "ref": "refs/heads/master",
        "rev": "4261d327c678e52abdd568a27168ea7cdd0484a0",
        "revCount": 1487,
        "type": "git",
        "url": "https://jungle.bsc.es/git/rarias/jungle"
      },
      "original": {
        "type": "git",
        "url": "https://jungle.bsc.es/git/rarias/jungle"
      }
    },
    "nixpkgs": {
      "locked": {
        "lastModified": 1752436162,
        "narHash": "sha256-Kt1UIPi7kZqkSc5HVj6UY5YLHHEzPBkgpNUByuyxtlw=",
        "owner": "NixOS",
        "repo": "nixpkgs",
        "rev": "dfcd5b901dbab46c9c6e80b265648481aafb01f8",
        "type": "github"
      },
      "original": {
        "owner": "NixOS",
        "ref": "nixos-25.05",
        "repo": "nixpkgs",
        "type": "github"
      }
    },
    "root": {
      "inputs": {
        "jungle": "jungle"
      }
    }
  },
  "root": "root",
  "version": 7
 }
--- a/custom/arnau/posv/flake.nix
+++ b/custom/arnau/posv/flake.nix
@@ -0,0 +1,47 @@
 {
  # Fetch the list of packages for BSC
  inputs.jungle.url = "git+https://jungle.bsc.es/git/rarias/jungle";
  outputs = { self, jungle }:
  let
    customOverlay = final: prev: {
      # Disable GIL in python
      python314 = prev.python314.override {
        enableGIL = false;
      };
      # Use a custom nOS-V commit
      nosv = prev.nosv.override {
        useGit = true;
        gitBranch = "nosv_join";
        gitCommit = "33130d271a59d0794545e4a2a597a56951e428aa";
        gitUrl = "ssh://git@gitlab-internal.bsc.es/acinca/nos-v.git";
      };
    };
    pkgs = import jungle.inputs.nixpkgs {
      system = "x86_64-linux";
      overlays = [
        # Add our BSC packages
        jungle.bscOverlay
        # And our changes above on top
        customOverlay
      ];
    };
  in {
    devShells.x86_64-linux.default = pkgs.mkShell {
      pname = "devshell";
      # Set the NOSV_HOME to point to the current nosv package
      NOSV_HOME = pkgs.nosv;
      # These will be included in the environment with `nix develop`.
      buildInputs = with pkgs; [
        # Add python to the develop shell, with setuptools included
        (python314.withPackages (python-pkgs: with python-pkgs; [
          setuptools
        ]))
        # Extra packages
        gcc cowsay nosv
      ];
    };
  };
 }
--- a/custom/isabel/slurm/flake.lock
+++ b/custom/isabel/slurm/flake.lock
@@ -0,0 +1,45 @@
 {
  "nodes": {
    "jungle": {
      "inputs": {
        "nixpkgs": "nixpkgs"
      },
      "locked": {
        "lastModified": 1760427467,
        "narHash": "sha256-DemQ+XT3BWXh8fr6UDfGNUB4ba0tGJXyep5/lg+gBD4=",
        "ref": "refs/heads/master",
        "rev": "4261d327c678e52abdd568a27168ea7cdd0484a0",
        "revCount": 1487,
        "type": "git",
        "url": "https://jungle.bsc.es/git/rarias/jungle"
      },
      "original": {
        "type": "git",
        "url": "https://jungle.bsc.es/git/rarias/jungle"
      }
    },
    "nixpkgs": {
      "locked": {
        "lastModified": 1752436162,
        "narHash": "sha256-Kt1UIPi7kZqkSc5HVj6UY5YLHHEzPBkgpNUByuyxtlw=",
        "owner": "NixOS",
        "repo": "nixpkgs",
        "rev": "dfcd5b901dbab46c9c6e80b265648481aafb01f8",
        "type": "github"
      },
      "original": {
        "owner": "NixOS",
        "ref": "nixos-25.05",
        "repo": "nixpkgs",
        "type": "github"
      }
    },
    "root": {
      "inputs": {
        "jungle": "jungle"
      }
    }
  },
  "root": "root",
  "version": 7
 }
--- a/custom/isabel/slurm/flake.nix
+++ b/custom/isabel/slurm/flake.nix
@@ -1,8 +1,9 @@
 {
-  inputs.bscpkgs.url = "git+https://git.sr.ht/~rodarima/bscpkgs";
+  inputs.jungle.url = "git+https://jungle.bsc.es/git/rarias/jungle";
-  outputs = { self, bscpkgs }:  
+  outputs = { self, jungle }:
  let
-    nixpkgs = bscpkgs.inputs.nixpkgs;
+    nixpkgs = jungle.inputs.nixpkgs;
    lib = nixpkgs.lib;
    clusterOverlay = (final: prev: {
      # Use cluster llvm compiler repo
      clangOmpss2Unwrapped = prev.clangOmpss2Unwrapped.override {
@@ -11,12 +12,19 @@
        gitCommit = "151c260ba834826c01855da0a41fc203ffe4d025";
        gitBranch = "cluster";
      };
      # Configure MPICH to use UCX with multiple thread support
      mpich = prev.mpich.overrideAttrs (old: {
        configureFlags = (lib.remove "--with-device=ch4:ofi" old.configureFlags) ++ [
          "--with-device=ch4:ucx"
          "--enable-threads=multiple"
        ];
      });
    });
    pkgs = import nixpkgs {
      system = "x86_64-linux";
      overlays = [
-        # Apply bscpkgs to get our BSC custom packages
+        # Apply jungle overlay to get our BSC custom packages
-        bscpkgs.outputs.bscOverlay
+        jungle.outputs.bscOverlay
        # And on top apply our local changes to customize for cluster
        clusterOverlay
      ];
@@ -26,8 +34,7 @@
      pname = "devshell";
      buildInputs = with pkgs; [
        slurm.out slurm.dev gcc
-        clangOmpss2
+        clangOmpss2 mpich osumb
        nanos6
      ];
      inputsFrom = with pkgs; [
        nanos6
--- a/custom/isabel/slurm/llvmTest.c
+++ b/custom/isabel/slurm/llvmTest.c
--- a/custom/vincent/chol/.gitignore
+++ b/custom/vincent/chol/.gitignore
@@ -0,0 +1 @@
 out/
--- a/custom/vincent/chol/flake.lock
+++ b/custom/vincent/chol/flake.lock
@@ -0,0 +1,45 @@
 {
  "nodes": {
    "jungle": {
      "inputs": {
        "nixpkgs": "nixpkgs"
      },
      "locked": {
        "lastModified": 1769187376,
        "narHash": "sha256-H8aMWt4OVwXWoUPPSZuj0eSq3Ur17nY62Ap+hYiQy3o=",
        "ref": "refs/heads/master",
        "rev": "deb0cd1488b8d72ad1395b25aa4dbbdf721274d9",
        "revCount": 1533,
        "type": "git",
        "url": "https://jungle.bsc.es/git/rarias/jungle"
      },
      "original": {
        "type": "git",
        "url": "https://jungle.bsc.es/git/rarias/jungle"
      }
    },
    "nixpkgs": {
      "locked": {
        "lastModified": 1767634882,
        "narHash": "sha256-2GffSfQxe3sedHzK+sTKlYo/NTIAGzbFCIsNMUPAAnk=",
        "owner": "NixOS",
        "repo": "nixpkgs",
        "rev": "3c9db02515ef1d9b6b709fc60ba9a540957f661c",
        "type": "github"
      },
      "original": {
        "owner": "NixOS",
        "ref": "nixos-25.11",
        "repo": "nixpkgs",
        "type": "github"
      }
    },
    "root": {
      "inputs": {
        "jungle": "jungle"
      }
    }
  },
  "root": "root",
  "version": 7
 }
--- a/custom/vincent/chol/flake.nix
+++ b/custom/vincent/chol/flake.nix
@@ -0,0 +1,61 @@
 {
  inputs.jungle.url = "git+https://jungle.bsc.es/git/rarias/jungle";
  outputs = { self, jungle }:
  let
    nixpkgs = jungle.inputs.nixpkgs;
    customOverlay = (final: prev: {
      # Build blis for Fox architecture and without OpenMP
      amd-blis = (prev.amd-blis.override {
        withOpenMP = false;
        withArchitecture = "zen4";
      }).overrideAttrs (old: {
        hardeningDisable = [ "all" ];
      });
      # Disable OpenMP in flame
      amd-libflame = (prev.amd-libflame.override {
        withOpenMP = false;
      }).overrideAttrs (old: {
        hardeningDisable = [ "all" ];
      });
      # Build bench6 with blis
      bench6 = prev.bench6.overrideAttrs (old: {
        buildInputs = with final; [
          bigotes
          openmp
          openmpv
          nanos6
          nodes
          nosv
          mpi
          tampi
          ovni
          amd-blis
          amd-libflame
        ];
        cmakeFlags = (old.cmakeFlags or []) ++ [
          "-DCBLAS_INCLUDE_DIR=${final.amd-blis}/include/blis"
          "-DLAPACKE_INCLUDE_DIR=${final.amd-libflame}/include"
          "-DLAPACKE_LIBRARY=${final.amd-libflame}/lib/liblapacke.so.3"
        ];
      });
    });
    pkgs = import nixpkgs {
      system = "x86_64-linux";
      overlays = [
        jungle.outputs.bscOverlay
        customOverlay
      ];
    };
  in {
    devShells.x86_64-linux.default = pkgs.mkShell {
      pname = "devshell";
      packages = with pkgs; [
        bench6 bigotes ministat
      ];
    };
    packages.x86_64-linux.bench6 = pkgs.bench6;
  };
 }
--- a/custom/vincent/chol/run.sh
+++ b/custom/vincent/chol/run.sh
@@ -0,0 +1,20 @@
 #!/bin/sh
 if [ -z "$1" ]; then
 	exec nix develop -c $0 run
 	#exec srun -J chol -p fox --exclusive nix develop -c $0 run
 fi
 set -eux
 size=$((32*1024))
 bs=512
 b6dir=$(which b6_cholesky_nodes | awk -F/ '{print $4}')
 wdir="out/$b6dir"
 mkdir -p "$wdir"
 log="$wdir/b6_cholesky_nodes-$size-$bs.csv"
 bigotes -o "$log" -- b6_cholesky_nodes $size $bs
 ministat -w80 "$log"
--- a/isabel/slurm/flake.lock
+++ b/isabel/slurm/flake.lock
@@ -1,42 +0,0 @@
 {
  "nodes": {
    "bscpkgs": {
      "inputs": {
        "nixpkgs": "nixpkgs"
      },
      "locked": {
        "lastModified": 1749650500,
        "narHash": "sha256-2MHfVPV6RA7qPSCtXh4+KK0F0UjN+J4z8//+n6NK7Xs=",
        "ref": "refs/heads/master",
        "rev": "9d1944c658929b6f98b3f3803fead4d1b91c4405",
        "revCount": 961,
        "type": "git",
        "url": "https://git.sr.ht/~rodarima/bscpkgs"
      },
      "original": {
        "type": "git",
        "url": "https://git.sr.ht/~rodarima/bscpkgs"
      }
    },
    "nixpkgs": {
      "locked": {
        "lastModified": 1736867362,
        "narHash": "sha256-i/UJ5I7HoqmFMwZEH6vAvBxOrjjOJNU739lnZnhUln8=",
        "path": "/nix/store/2csx2kkb2hxyxhhmg2xs9jfyypikwwk6-source",
        "rev": "9c6b49aeac36e2ed73a8c472f1546f6d9cf1addc",
        "type": "path"
      },
      "original": {
        "id": "nixpkgs",
        "type": "indirect"
      }
    },
    "root": {
      "inputs": {
        "bscpkgs": "bscpkgs"
      }
    }
  },
  "root": "root",
  "version": 7
 }
--- a/ompss2/.gitignore
+++ b/ompss2/.gitignore
@@ -0,0 +1,2 @@
 hello
 ovni/
--- a/ompss2/Makefile
+++ b/ompss2/Makefile
@@ -0,0 +1,13 @@
 CC=clang
 CFLAGS=-fompss-2
 hello: hello.c
 trace: hello
 	rm -rf ovni/
 	NOSV_CONFIG_OVERRIDE="instrumentation.version=ovni" NOSV_APPID=1 ./hello
 	ovniemu ovni/
 	ls -l ovni/*.prv
 clean:
 	rm -rf hello ovni/
--- a/ompss2/README.md
+++ b/ompss2/README.md
@@ -0,0 +1,57 @@
 # OmpSs-2 environment
 This example shows how to include the LLVM compiler to build OmpSs-2 programs
 with the new NODES and nOS-V runtime. The package `clangOmpss2Nodes` already
 sets all the needed variables to locate the right runtime.
 Run `nix develop` then `make` to build the `hello` program:
    apex% nix develop
    apex$ make
    clang -fompss-2    hello.c   -o hello
    apex$ ./hello
    hello from task 1
    hello from task 0
    hello from task 2
    hello from task 3
    hello from task 4
    hello from task 7
    hello from task 8
    hello from task 6
    hello from task 9
    hello from task 5
 You can use `make trace` to run the hello program with instrumentation and
 generate an ovni trace that is then converted to paraver:
    apex$ make trace
    rm -rf ovni/
    NOSV_CONFIG_OVERRIDE="instrumentation.version=ovni" NOSV_APPID=1 ./hello
    hello from task 1
    hello from task 0
    hello from task 3
    hello from task 4
    hello from task 5
    hello from task 6
    hello from task 2
    hello from task 8
    hello from task 7
    hello from task 9
    ovniemu ovni/
    ovniemu: INFO: loaded 58 streams
    ovniemu: INFO: sorting looms by name
    ovniemu: INFO: loaded 1 looms, 1 processes, 58 threads and 56 cpus
    ovniemu: INFO: generated with libovni version 1.13.0 commit 0643266
    ovniemu: INFO: the following 2 models are enabled:
    ovniemu: INFO:      ovni 1.1.0 'O' (18 events)
    ovniemu: INFO:      nosv 2.6.0 'V' (64 events)
    ovniemu: INFO: emulation starts
    ovniemu: INFO: apex.nosv-u1880-p598308 burst stats: median/avg/max =  77/ 81/333 ns
    ovniemu: WARN: ignoring old event OCn
    ovniemu: INFO: 100.0% done at avg 42 kev/s
    ovniemu: INFO: processed 711 input events in 0.02 s
    ovniemu: INFO: writing traces to disk, please wait
    ovniemu: INFO: emulation finished ok
    ls -l ovni/*.prv
    -rw-r--r-- 1 rarias Computational 48224 Feb  4 11:52 ovni/cpu.prv
    -rw-r--r-- 1 rarias Computational 33689 Feb  4 11:52 ovni/thread.prv
--- a/ompss2/flake.lock
+++ b/ompss2/flake.lock
@@ -0,0 +1,45 @@
 {
  "nodes": {
    "jungle": {
      "inputs": {
        "nixpkgs": "nixpkgs"
      },
      "locked": {
        "lastModified": 1770128250,
        "narHash": "sha256-Kx3EwImhYCp4bLPNWGz4oL4IYVjkCLXwcVmXTY40MBc=",
        "ref": "refs/heads/master",
        "rev": "7a6e4232de0e181de97e099e600ffc3a964260e0",
        "revCount": 1536,
        "type": "git",
        "url": "https://jungle.bsc.es/git/rarias/jungle"
      },
      "original": {
        "type": "git",
        "url": "https://jungle.bsc.es/git/rarias/jungle"
      }
    },
    "nixpkgs": {
      "locked": {
        "lastModified": 1767634882,
        "narHash": "sha256-2GffSfQxe3sedHzK+sTKlYo/NTIAGzbFCIsNMUPAAnk=",
        "owner": "NixOS",
        "repo": "nixpkgs",
        "rev": "3c9db02515ef1d9b6b709fc60ba9a540957f661c",
        "type": "github"
      },
      "original": {
        "owner": "NixOS",
        "ref": "nixos-25.11",
        "repo": "nixpkgs",
        "type": "github"
      }
    },
    "root": {
      "inputs": {
        "jungle": "jungle"
      }
    }
  },
  "root": "root",
  "version": 7
 }
--- a/ompss2/flake.nix
+++ b/ompss2/flake.nix
@@ -0,0 +1,36 @@
 {
  inputs.jungle.url = "git+https://jungle.bsc.es/git/rarias/jungle";
  outputs = { self, jungle }:
  let
    nixpkgs = jungle.inputs.nixpkgs;
    customOverlay = (final: prev: {
      # Example overlay, for now empty
    });
    pkgs = import nixpkgs {
      system = "x86_64-linux";
      overlays = [
        # Apply jungle overlay to get our BSC custom packages
        jungle.outputs.bscOverlay
        # And on top apply our local changes to customize for cluster
        customOverlay
      ];
    };
  in {
    devShells.x86_64-linux.default = pkgs.mkShell {
      pname = "ompss2-devshell";
      # Include these packages in the shell
      packages = with pkgs; [
        clangOmpss2Nodes
        nodes
        nosv
        ovni
        # Optional: Add wxparaver to open .prv traces (needs a working $DISPLAY)
        # wxparaver
      ];
      # The dependencies needed to build these packages will be also included
      inputsFrom = with pkgs; [
      ];
    };
  };
 }
--- a/ompss2/hello.c
+++ b/ompss2/hello.c
@@ -0,0 +1,10 @@
 #include <stdio.h>
 int main()
 {
 	for (int i = 0; i < 10; i++) {
 		#pragma oss task
 		printf("hello from task %d\n", i);
 	}
 	return 0;
 }
Author	SHA1	Message	Date
Rodrigo Arias Mallo	1cf55785f2	Add README.md	2026-02-04 12:00:19 +01:00
Rodrigo Arias Mallo	7e55e255f9	Add OmpSs-2 simple example	2026-02-04 11:54:30 +01:00
Rodrigo Arias Mallo	150bdae46e	Move personal shells to a custom directory	2026-02-04 10:25:22 +01:00
Rodrigo Arias Mallo	0495bf0dee	Add CUDA shell example	2026-02-03 18:24:02 +01:00
Rodrigo Arias Mallo	0775e1ce73	Add Vincent reproducer for cholesky	2026-01-29 14:31:21 +01:00
Rodrigo Arias Mallo	9bae257774	Configure MPICH for MN5	2025-10-30 10:44:27 +01:00
Rodrigo Arias Mallo	676a0ced1c	Switch bscpkgs inputs to jungle	2025-10-14 10:55:06 +02:00
Rodrigo Arias Mallo	9457de1983	Add posv shell for Arnau	2025-10-01 13:32:38 +02:00
Rodrigo Arias Mallo	59c56db491	Remove nanos6 from buildInputs	2025-08-29 11:40:18 +02:00