Enable strictDeps on all our toplevel packages #12
| @ -1,4 +1,4 @@ | ||||
| { lib, config, ... }: | ||||
| { lib, config, pkgs, ... }: | ||||
| { | ||||
|   # Configure Nvidia driver to use with CUDA | ||||
|   hardware.nvidia.package = config.boot.kernelPackages.nvidiaPackages.production; | ||||
| @ -15,4 +15,6 @@ | ||||
|   programs.nix-required-mounts.allowedPatterns.nvidia-gpu.paths = [ | ||||
|     config.systemd.tmpfiles.settings.graphics-driver."/run/opengl-driver"."L+".argument | ||||
|   ]; | ||||
| 
 | ||||
|   environment.systemPackages = [ pkgs.cudainfo ]; | ||||
| } | ||||
|  | ||||
							
								
								
									
										12
									
								
								pkgs/cudainfo/Makefile
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										12
									
								
								pkgs/cudainfo/Makefile
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,12 @@ | ||||
| HOSTCXX  ?= g++ | ||||
| NVCC     := nvcc -ccbin $(HOSTCXX) | ||||
| CXXFLAGS := -m64 | ||||
| 
 | ||||
| # Target rules
 | ||||
| all: cudainfo | ||||
| 
 | ||||
| cudainfo: cudainfo.cpp | ||||
| 	$(NVCC) $(CXXFLAGS) -o $@ $< | ||||
| 
 | ||||
| clean: | ||||
| 	rm -f cudainfo cudainfo.o | ||||
							
								
								
									
										600
									
								
								pkgs/cudainfo/cudainfo.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										600
									
								
								pkgs/cudainfo/cudainfo.cpp
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,600 @@ | ||||
| /*
 | ||||
|  * Copyright 1993-2015 NVIDIA Corporation.  All rights reserved. | ||||
|  * | ||||
|  * Please refer to the NVIDIA end user license agreement (EULA) associated | ||||
|  * with this source code for terms and conditions that govern your use of | ||||
|  * this software. Any use, reproduction, disclosure, or distribution of | ||||
|  * this software and related documentation outside the terms of the EULA | ||||
|  * is strictly prohibited. | ||||
|  * | ||||
|  */ | ||||
| /* This sample queries the properties of the CUDA devices present in the system via CUDA Runtime API. */ | ||||
| 
 | ||||
| // Shared Utilities (QA Testing)
 | ||||
| 
 | ||||
| // std::system includes
 | ||||
| #include <memory> | ||||
| #include <iostream> | ||||
| 
 | ||||
| #include <cuda_runtime.h> | ||||
| 
 | ||||
| // This will output the proper CUDA error strings in the event that a CUDA host call returns an error
 | ||||
| #define checkCudaErrors(val)           check ( (val), #val, __FILE__, __LINE__ ) | ||||
| 
 | ||||
| // CUDA Runtime error messages
 | ||||
| #ifdef __DRIVER_TYPES_H__ | ||||
| static const char *_cudaGetErrorEnum(cudaError_t error) | ||||
| { | ||||
|     switch (error) | ||||
|     { | ||||
|         case cudaSuccess: | ||||
|             return "cudaSuccess"; | ||||
| 
 | ||||
|         case cudaErrorMissingConfiguration: | ||||
|             return "cudaErrorMissingConfiguration"; | ||||
| 
 | ||||
|         case cudaErrorMemoryAllocation: | ||||
|             return "cudaErrorMemoryAllocation"; | ||||
| 
 | ||||
|         case cudaErrorInitializationError: | ||||
|             return "cudaErrorInitializationError"; | ||||
| 
 | ||||
|         case cudaErrorLaunchFailure: | ||||
|             return "cudaErrorLaunchFailure"; | ||||
| 
 | ||||
|         case cudaErrorPriorLaunchFailure: | ||||
|             return "cudaErrorPriorLaunchFailure"; | ||||
| 
 | ||||
|         case cudaErrorLaunchTimeout: | ||||
|             return "cudaErrorLaunchTimeout"; | ||||
| 
 | ||||
|         case cudaErrorLaunchOutOfResources: | ||||
|             return "cudaErrorLaunchOutOfResources"; | ||||
| 
 | ||||
|         case cudaErrorInvalidDeviceFunction: | ||||
|             return "cudaErrorInvalidDeviceFunction"; | ||||
| 
 | ||||
|         case cudaErrorInvalidConfiguration: | ||||
|             return "cudaErrorInvalidConfiguration"; | ||||
| 
 | ||||
|         case cudaErrorInvalidDevice: | ||||
|             return "cudaErrorInvalidDevice"; | ||||
| 
 | ||||
|         case cudaErrorInvalidValue: | ||||
|             return "cudaErrorInvalidValue"; | ||||
| 
 | ||||
|         case cudaErrorInvalidPitchValue: | ||||
|             return "cudaErrorInvalidPitchValue"; | ||||
| 
 | ||||
|         case cudaErrorInvalidSymbol: | ||||
|             return "cudaErrorInvalidSymbol"; | ||||
| 
 | ||||
|         case cudaErrorMapBufferObjectFailed: | ||||
|             return "cudaErrorMapBufferObjectFailed"; | ||||
| 
 | ||||
|         case cudaErrorUnmapBufferObjectFailed: | ||||
|             return "cudaErrorUnmapBufferObjectFailed"; | ||||
| 
 | ||||
|         case cudaErrorInvalidHostPointer: | ||||
|             return "cudaErrorInvalidHostPointer"; | ||||
| 
 | ||||
|         case cudaErrorInvalidDevicePointer: | ||||
|             return "cudaErrorInvalidDevicePointer"; | ||||
| 
 | ||||
|         case cudaErrorInvalidTexture: | ||||
|             return "cudaErrorInvalidTexture"; | ||||
| 
 | ||||
|         case cudaErrorInvalidTextureBinding: | ||||
|             return "cudaErrorInvalidTextureBinding"; | ||||
| 
 | ||||
|         case cudaErrorInvalidChannelDescriptor: | ||||
|             return "cudaErrorInvalidChannelDescriptor"; | ||||
| 
 | ||||
|         case cudaErrorInvalidMemcpyDirection: | ||||
|             return "cudaErrorInvalidMemcpyDirection"; | ||||
| 
 | ||||
|         case cudaErrorAddressOfConstant: | ||||
|             return "cudaErrorAddressOfConstant"; | ||||
| 
 | ||||
|         case cudaErrorTextureFetchFailed: | ||||
|             return "cudaErrorTextureFetchFailed"; | ||||
| 
 | ||||
|         case cudaErrorTextureNotBound: | ||||
|             return "cudaErrorTextureNotBound"; | ||||
| 
 | ||||
|         case cudaErrorSynchronizationError: | ||||
|             return "cudaErrorSynchronizationError"; | ||||
| 
 | ||||
|         case cudaErrorInvalidFilterSetting: | ||||
|             return "cudaErrorInvalidFilterSetting"; | ||||
| 
 | ||||
|         case cudaErrorInvalidNormSetting: | ||||
|             return "cudaErrorInvalidNormSetting"; | ||||
| 
 | ||||
|         case cudaErrorMixedDeviceExecution: | ||||
|             return "cudaErrorMixedDeviceExecution"; | ||||
| 
 | ||||
|         case cudaErrorCudartUnloading: | ||||
|             return "cudaErrorCudartUnloading"; | ||||
| 
 | ||||
|         case cudaErrorUnknown: | ||||
|             return "cudaErrorUnknown"; | ||||
| 
 | ||||
|         case cudaErrorNotYetImplemented: | ||||
|             return "cudaErrorNotYetImplemented"; | ||||
| 
 | ||||
|         case cudaErrorMemoryValueTooLarge: | ||||
|             return "cudaErrorMemoryValueTooLarge"; | ||||
| 
 | ||||
|         case cudaErrorInvalidResourceHandle: | ||||
|             return "cudaErrorInvalidResourceHandle"; | ||||
| 
 | ||||
|         case cudaErrorNotReady: | ||||
|             return "cudaErrorNotReady"; | ||||
| 
 | ||||
|         case cudaErrorInsufficientDriver: | ||||
|             return "cudaErrorInsufficientDriver"; | ||||
| 
 | ||||
|         case cudaErrorSetOnActiveProcess: | ||||
|             return "cudaErrorSetOnActiveProcess"; | ||||
| 
 | ||||
|         case cudaErrorInvalidSurface: | ||||
|             return "cudaErrorInvalidSurface"; | ||||
| 
 | ||||
|         case cudaErrorNoDevice: | ||||
|             return "cudaErrorNoDevice"; | ||||
| 
 | ||||
|         case cudaErrorECCUncorrectable: | ||||
|             return "cudaErrorECCUncorrectable"; | ||||
| 
 | ||||
|         case cudaErrorSharedObjectSymbolNotFound: | ||||
|             return "cudaErrorSharedObjectSymbolNotFound"; | ||||
| 
 | ||||
|         case cudaErrorSharedObjectInitFailed: | ||||
|             return "cudaErrorSharedObjectInitFailed"; | ||||
| 
 | ||||
|         case cudaErrorUnsupportedLimit: | ||||
|             return "cudaErrorUnsupportedLimit"; | ||||
| 
 | ||||
|         case cudaErrorDuplicateVariableName: | ||||
|             return "cudaErrorDuplicateVariableName"; | ||||
| 
 | ||||
|         case cudaErrorDuplicateTextureName: | ||||
|             return "cudaErrorDuplicateTextureName"; | ||||
| 
 | ||||
|         case cudaErrorDuplicateSurfaceName: | ||||
|             return "cudaErrorDuplicateSurfaceName"; | ||||
| 
 | ||||
|         case cudaErrorDevicesUnavailable: | ||||
|             return "cudaErrorDevicesUnavailable"; | ||||
| 
 | ||||
|         case cudaErrorInvalidKernelImage: | ||||
|             return "cudaErrorInvalidKernelImage"; | ||||
| 
 | ||||
|         case cudaErrorNoKernelImageForDevice: | ||||
|             return "cudaErrorNoKernelImageForDevice"; | ||||
| 
 | ||||
|         case cudaErrorIncompatibleDriverContext: | ||||
|             return "cudaErrorIncompatibleDriverContext"; | ||||
| 
 | ||||
|         case cudaErrorPeerAccessAlreadyEnabled: | ||||
|             return "cudaErrorPeerAccessAlreadyEnabled"; | ||||
| 
 | ||||
|         case cudaErrorPeerAccessNotEnabled: | ||||
|             return "cudaErrorPeerAccessNotEnabled"; | ||||
| 
 | ||||
|         case cudaErrorDeviceAlreadyInUse: | ||||
|             return "cudaErrorDeviceAlreadyInUse"; | ||||
| 
 | ||||
|         case cudaErrorProfilerDisabled: | ||||
|             return "cudaErrorProfilerDisabled"; | ||||
| 
 | ||||
|         case cudaErrorProfilerNotInitialized: | ||||
|             return "cudaErrorProfilerNotInitialized"; | ||||
| 
 | ||||
|         case cudaErrorProfilerAlreadyStarted: | ||||
|             return "cudaErrorProfilerAlreadyStarted"; | ||||
| 
 | ||||
|         case cudaErrorProfilerAlreadyStopped: | ||||
|             return "cudaErrorProfilerAlreadyStopped"; | ||||
| 
 | ||||
|         /* Since CUDA 4.0*/ | ||||
|         case cudaErrorAssert: | ||||
|             return "cudaErrorAssert"; | ||||
| 
 | ||||
|         case cudaErrorTooManyPeers: | ||||
|             return "cudaErrorTooManyPeers"; | ||||
| 
 | ||||
|         case cudaErrorHostMemoryAlreadyRegistered: | ||||
|             return "cudaErrorHostMemoryAlreadyRegistered"; | ||||
| 
 | ||||
|         case cudaErrorHostMemoryNotRegistered: | ||||
|             return "cudaErrorHostMemoryNotRegistered"; | ||||
| 
 | ||||
|         /* Since CUDA 5.0 */ | ||||
|         case cudaErrorOperatingSystem: | ||||
|             return "cudaErrorOperatingSystem"; | ||||
| 
 | ||||
|         case cudaErrorPeerAccessUnsupported: | ||||
|             return "cudaErrorPeerAccessUnsupported"; | ||||
| 
 | ||||
|         case cudaErrorLaunchMaxDepthExceeded: | ||||
|             return "cudaErrorLaunchMaxDepthExceeded"; | ||||
| 
 | ||||
|         case cudaErrorLaunchFileScopedTex: | ||||
|             return "cudaErrorLaunchFileScopedTex"; | ||||
| 
 | ||||
|         case cudaErrorLaunchFileScopedSurf: | ||||
|             return "cudaErrorLaunchFileScopedSurf"; | ||||
| 
 | ||||
|         case cudaErrorSyncDepthExceeded: | ||||
|             return "cudaErrorSyncDepthExceeded"; | ||||
| 
 | ||||
|         case cudaErrorLaunchPendingCountExceeded: | ||||
|             return "cudaErrorLaunchPendingCountExceeded"; | ||||
| 
 | ||||
|         case cudaErrorNotPermitted: | ||||
|             return "cudaErrorNotPermitted"; | ||||
| 
 | ||||
|         case cudaErrorNotSupported: | ||||
|             return "cudaErrorNotSupported"; | ||||
| 
 | ||||
|         /* Since CUDA 6.0 */ | ||||
|         case cudaErrorHardwareStackError: | ||||
|             return "cudaErrorHardwareStackError"; | ||||
| 
 | ||||
|         case cudaErrorIllegalInstruction: | ||||
|             return "cudaErrorIllegalInstruction"; | ||||
| 
 | ||||
|         case cudaErrorMisalignedAddress: | ||||
|             return "cudaErrorMisalignedAddress"; | ||||
| 
 | ||||
|         case cudaErrorInvalidAddressSpace: | ||||
|             return "cudaErrorInvalidAddressSpace"; | ||||
| 
 | ||||
|         case cudaErrorInvalidPc: | ||||
|             return "cudaErrorInvalidPc"; | ||||
| 
 | ||||
|         case cudaErrorIllegalAddress: | ||||
|             return "cudaErrorIllegalAddress"; | ||||
| 
 | ||||
|         /* Since CUDA 6.5*/ | ||||
|         case cudaErrorInvalidPtx: | ||||
|             return "cudaErrorInvalidPtx"; | ||||
| 
 | ||||
|         case cudaErrorInvalidGraphicsContext: | ||||
|             return "cudaErrorInvalidGraphicsContext"; | ||||
| 
 | ||||
|         case cudaErrorStartupFailure: | ||||
|             return "cudaErrorStartupFailure"; | ||||
| 
 | ||||
|         case cudaErrorApiFailureBase: | ||||
|             return "cudaErrorApiFailureBase"; | ||||
|     } | ||||
| 
 | ||||
|     return "<unknown>"; | ||||
| } | ||||
| #endif | ||||
| 
 | ||||
| template< typename T > | ||||
| void check(T result, char const *const func, const char *const file, int const line) | ||||
| { | ||||
|     if (result) | ||||
|     { | ||||
|         fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \"%s\" \n", | ||||
|                 file, line, static_cast<unsigned int>(result), _cudaGetErrorEnum(result), func); | ||||
|         cudaDeviceReset(); | ||||
|         // Make sure we call CUDA Device Reset before exiting
 | ||||
|         exit(EXIT_FAILURE); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| int *pArgc = NULL; | ||||
| char **pArgv = NULL; | ||||
| 
 | ||||
| #if CUDART_VERSION < 5000 | ||||
| 
 | ||||
| // CUDA-C includes
 | ||||
| #include <cuda.h> | ||||
| 
 | ||||
| // This function wraps the CUDA Driver API into a template function
 | ||||
| template <class T> | ||||
| inline void getCudaAttribute(T *attribute, CUdevice_attribute device_attribute, int device) | ||||
| { | ||||
|     CUresult error =    cuDeviceGetAttribute(attribute, device_attribute, device); | ||||
| 
 | ||||
|     if (CUDA_SUCCESS != error) { | ||||
|         fprintf(stderr, "cuSafeCallNoSync() Driver API error = %04d from file <%s>, line %i.\n", | ||||
|                 error, __FILE__, __LINE__); | ||||
| 
 | ||||
|         // cudaDeviceReset causes the driver to clean up all state. While
 | ||||
|         // not mandatory in normal operation, it is good practice.  It is also
 | ||||
|         // needed to ensure correct operation when the application is being
 | ||||
|         // profiled. Calling cudaDeviceReset causes all profile data to be
 | ||||
|         // flushed before the application exits
 | ||||
|         cudaDeviceReset(); | ||||
|         exit(EXIT_FAILURE); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| #endif /* CUDART_VERSION < 5000 */ | ||||
| 
 | ||||
| // Beginning of GPU Architecture definitions
 | ||||
| inline int ConvertSMVer2Cores(int major, int minor) | ||||
| { | ||||
|     // Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
 | ||||
|     typedef struct { | ||||
|         int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version
 | ||||
|         int Cores; | ||||
|     } sSMtoCores; | ||||
| 
 | ||||
|     sSMtoCores nGpuArchCoresPerSM[] = { | ||||
|         { 0x20, 32 }, // Fermi Generation (SM 2.0) GF100 class
 | ||||
|         { 0x21, 48 }, // Fermi Generation (SM 2.1) GF10x class
 | ||||
|         { 0x30, 192}, // Kepler Generation (SM 3.0) GK10x class
 | ||||
|         { 0x32, 192}, // Kepler Generation (SM 3.2) GK10x class
 | ||||
|         { 0x35, 192}, // Kepler Generation (SM 3.5) GK11x class
 | ||||
|         { 0x37, 192}, // Kepler Generation (SM 3.7) GK21x class
 | ||||
|         { 0x50, 128}, // Maxwell Generation (SM 5.0) GM10x class
 | ||||
|         { 0x52, 128}, // Maxwell Generation (SM 5.2) GM20x class
 | ||||
|         {   -1, -1 } | ||||
|     }; | ||||
| 
 | ||||
|     int index = 0; | ||||
| 
 | ||||
|     while (nGpuArchCoresPerSM[index].SM != -1) { | ||||
|         if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor)) { | ||||
|             return nGpuArchCoresPerSM[index].Cores; | ||||
|         } | ||||
| 
 | ||||
|         index++; | ||||
|     } | ||||
| 
 | ||||
|     // If we don't find the values, we default use the previous one to run properly
 | ||||
|     printf("MapSMtoCores for SM %d.%d is undefined.  Default to use %d Cores/SM\n", major, minor, nGpuArchCoresPerSM[index-1].Cores); | ||||
|     return nGpuArchCoresPerSM[index-1].Cores; | ||||
| } | ||||
| 
 | ||||
| ////////////////////////////////////////////////////////////////////////////////
 | ||||
| // Program main
 | ||||
| ////////////////////////////////////////////////////////////////////////////////
 | ||||
| int | ||||
| main(int argc, char **argv) | ||||
| { | ||||
|     pArgc = &argc; | ||||
|     pArgv = argv; | ||||
| 
 | ||||
|     printf("%s Starting...\n\n", argv[0]); | ||||
|     printf(" CUDA Device Query (Runtime API) version (CUDART static linking)\n\n"); | ||||
| 
 | ||||
|     int deviceCount = 0; | ||||
|     cudaError_t error_id = cudaGetDeviceCount(&deviceCount); | ||||
| 
 | ||||
|     if (error_id != cudaSuccess) { | ||||
|         printf("cudaGetDeviceCount failed: %s (%d)\n", | ||||
| 			cudaGetErrorString(error_id), (int) error_id); | ||||
|         printf("Result = FAIL\n"); | ||||
|         exit(EXIT_FAILURE); | ||||
|     } | ||||
| 
 | ||||
|     // This function call returns 0 if there are no CUDA capable devices.
 | ||||
|     if (deviceCount == 0) | ||||
|         printf("There are no available device(s) that support CUDA\n"); | ||||
|     else | ||||
|         printf("Detected %d CUDA Capable device(s)\n", deviceCount); | ||||
| 
 | ||||
|     int dev, driverVersion = 0, runtimeVersion = 0; | ||||
| 
 | ||||
|     for (dev = 0; dev < deviceCount; ++dev) { | ||||
|         cudaSetDevice(dev); | ||||
|         cudaDeviceProp deviceProp; | ||||
|         cudaGetDeviceProperties(&deviceProp, dev); | ||||
| 
 | ||||
|         printf("\nDevice %d: \"%s\"\n", dev, deviceProp.name); | ||||
| 
 | ||||
|         // Console log
 | ||||
|         cudaDriverGetVersion(&driverVersion); | ||||
|         cudaRuntimeGetVersion(&runtimeVersion); | ||||
|         printf("  CUDA Driver Version / Runtime Version          %d.%d / %d.%d\n", driverVersion/1000, (driverVersion%100)/10, runtimeVersion/1000, (runtimeVersion%100)/10); | ||||
|         printf("  CUDA Capability Major/Minor version number:    %d.%d\n", deviceProp.major, deviceProp.minor); | ||||
| 
 | ||||
|         printf("  Total amount of global memory:                 %.0f MBytes (%llu bytes)\n", | ||||
|                 (float)deviceProp.totalGlobalMem/1048576.0f, (unsigned long long) deviceProp.totalGlobalMem); | ||||
| 
 | ||||
|         printf("  (%2d) Multiprocessors, (%3d) CUDA Cores/MP:     %d CUDA Cores\n", | ||||
|                deviceProp.multiProcessorCount, | ||||
|                ConvertSMVer2Cores(deviceProp.major, deviceProp.minor), | ||||
|                ConvertSMVer2Cores(deviceProp.major, deviceProp.minor) * deviceProp.multiProcessorCount); | ||||
|         printf("  GPU Max Clock rate:                            %.0f MHz (%0.2f GHz)\n", deviceProp.clockRate * 1e-3f, deviceProp.clockRate * 1e-6f); | ||||
| 
 | ||||
| 
 | ||||
| #if CUDART_VERSION >= 5000 | ||||
|         // This is supported in CUDA 5.0 (runtime API device properties)
 | ||||
|         printf("  Memory Clock rate:                             %.0f Mhz\n", deviceProp.memoryClockRate * 1e-3f); | ||||
|         printf("  Memory Bus Width:                              %d-bit\n",   deviceProp.memoryBusWidth); | ||||
| 
 | ||||
|         if (deviceProp.l2CacheSize) { | ||||
|             printf("  L2 Cache Size:                                 %d bytes\n", deviceProp.l2CacheSize); | ||||
|         } | ||||
| 
 | ||||
| #else | ||||
|         // This only available in CUDA 4.0-4.2 (but these were only exposed in the CUDA Driver API)
 | ||||
|         int memoryClock; | ||||
|         getCudaAttribute<int>(&memoryClock, CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, dev); | ||||
|         printf("  Memory Clock rate:                             %.0f Mhz\n", memoryClock * 1e-3f); | ||||
|         int memBusWidth; | ||||
|         getCudaAttribute<int>(&memBusWidth, CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH, dev); | ||||
|         printf("  Memory Bus Width:                              %d-bit\n", memBusWidth); | ||||
|         int L2CacheSize; | ||||
|         getCudaAttribute<int>(&L2CacheSize, CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE, dev); | ||||
| 
 | ||||
|         if (L2CacheSize) { | ||||
|             printf("  L2 Cache Size:                                 %d bytes\n", L2CacheSize); | ||||
|         } | ||||
| 
 | ||||
| #endif | ||||
| 
 | ||||
|         printf("  Maximum Texture Dimension Size (x,y,z)         1D=(%d), 2D=(%d, %d), 3D=(%d, %d, %d)\n", | ||||
|                deviceProp.maxTexture1D   , deviceProp.maxTexture2D[0], deviceProp.maxTexture2D[1], | ||||
|                deviceProp.maxTexture3D[0], deviceProp.maxTexture3D[1], deviceProp.maxTexture3D[2]); | ||||
|         printf("  Maximum Layered 1D Texture Size, (num) layers  1D=(%d), %d layers\n", | ||||
|                deviceProp.maxTexture1DLayered[0], deviceProp.maxTexture1DLayered[1]); | ||||
|         printf("  Maximum Layered 2D Texture Size, (num) layers  2D=(%d, %d), %d layers\n", | ||||
|                deviceProp.maxTexture2DLayered[0], deviceProp.maxTexture2DLayered[1], deviceProp.maxTexture2DLayered[2]); | ||||
| 
 | ||||
| 
 | ||||
|         printf("  Total amount of constant memory:               %lu bytes\n", deviceProp.totalConstMem); | ||||
|         printf("  Total amount of shared memory per block:       %lu bytes\n", deviceProp.sharedMemPerBlock); | ||||
|         printf("  Total number of registers available per block: %d\n", deviceProp.regsPerBlock); | ||||
|         printf("  Warp size:                                     %d\n", deviceProp.warpSize); | ||||
|         printf("  Maximum number of threads per multiprocessor:  %d\n", deviceProp.maxThreadsPerMultiProcessor); | ||||
|         printf("  Maximum number of threads per block:           %d\n", deviceProp.maxThreadsPerBlock); | ||||
|         printf("  Max dimension size of a thread block (x,y,z): (%d, %d, %d)\n", | ||||
|                deviceProp.maxThreadsDim[0], | ||||
|                deviceProp.maxThreadsDim[1], | ||||
|                deviceProp.maxThreadsDim[2]); | ||||
|         printf("  Max dimension size of a grid size    (x,y,z): (%d, %d, %d)\n", | ||||
|                deviceProp.maxGridSize[0], | ||||
|                deviceProp.maxGridSize[1], | ||||
|                deviceProp.maxGridSize[2]); | ||||
|         printf("  Maximum memory pitch:                          %lu bytes\n", deviceProp.memPitch); | ||||
|         printf("  Texture alignment:                             %lu bytes\n", deviceProp.textureAlignment); | ||||
|         printf("  Concurrent copy and kernel execution:          %s with %d copy engine(s)\n", (deviceProp.deviceOverlap ? "Yes" : "No"), deviceProp.asyncEngineCount); | ||||
|         printf("  Run time limit on kernels:                     %s\n", deviceProp.kernelExecTimeoutEnabled ? "Yes" : "No"); | ||||
|         printf("  Integrated GPU sharing Host Memory:            %s\n", deviceProp.integrated ? "Yes" : "No"); | ||||
|         printf("  Support host page-locked memory mapping:       %s\n", deviceProp.canMapHostMemory ? "Yes" : "No"); | ||||
|         printf("  Alignment requirement for Surfaces:            %s\n", deviceProp.surfaceAlignment ? "Yes" : "No"); | ||||
|         printf("  Device has ECC support:                        %s\n", deviceProp.ECCEnabled ? "Enabled" : "Disabled"); | ||||
| #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) | ||||
|         printf("  CUDA Device Driver Mode (TCC or WDDM):         %s\n", deviceProp.tccDriver ? "TCC (Tesla Compute Cluster Driver)" : "WDDM (Windows Display Driver Model)"); | ||||
| #endif | ||||
|         printf("  Device supports Unified Addressing (UVA):      %s\n", deviceProp.unifiedAddressing ? "Yes" : "No"); | ||||
|         printf("  Device PCI Domain ID / Bus ID / location ID:   %d / %d / %d\n", deviceProp.pciDomainID, deviceProp.pciBusID, deviceProp.pciDeviceID); | ||||
| 
 | ||||
|         const char *sComputeMode[] = { | ||||
|             "Default (multiple host threads can use ::cudaSetDevice() with device simultaneously)", | ||||
|             "Exclusive (only one host thread in one process is able to use ::cudaSetDevice() with this device)", | ||||
|             "Prohibited (no host thread can use ::cudaSetDevice() with this device)", | ||||
|             "Exclusive Process (many threads in one process is able to use ::cudaSetDevice() with this device)", | ||||
|             "Unknown", | ||||
|             NULL | ||||
|         }; | ||||
|         printf("  Compute Mode:\n"); | ||||
|         printf("     < %s >\n", sComputeMode[deviceProp.computeMode]); | ||||
|     } | ||||
| 
 | ||||
|     // If there are 2 or more GPUs, query to determine whether RDMA is supported
 | ||||
|     if (deviceCount >= 2) | ||||
|     { | ||||
|         cudaDeviceProp prop[64]; | ||||
|         int gpuid[64]; // we want to find the first two GPU's that can support P2P
 | ||||
|         int gpu_p2p_count = 0; | ||||
| 
 | ||||
|         for (int i=0; i < deviceCount; i++) | ||||
|         { | ||||
|             checkCudaErrors(cudaGetDeviceProperties(&prop[i], i)); | ||||
| 
 | ||||
|             // Only boards based on Fermi or later can support P2P
 | ||||
|             if ((prop[i].major >= 2) | ||||
| #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) | ||||
|                 // on Windows (64-bit), the Tesla Compute Cluster driver for windows must be enabled to supprot this
 | ||||
|                 && prop[i].tccDriver | ||||
| #endif | ||||
|                ) | ||||
|             { | ||||
|                 // This is an array of P2P capable GPUs
 | ||||
|                 gpuid[gpu_p2p_count++] = i; | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         // Show all the combinations of support P2P GPUs
 | ||||
|         int can_access_peer_0_1, can_access_peer_1_0; | ||||
| 
 | ||||
|         if (gpu_p2p_count >= 2) | ||||
|         { | ||||
|             for (int i = 0; i < gpu_p2p_count-1; i++) | ||||
|             { | ||||
|                 for (int j = 1; j < gpu_p2p_count; j++) | ||||
|                 { | ||||
|                     checkCudaErrors(cudaDeviceCanAccessPeer(&can_access_peer_0_1, gpuid[i], gpuid[j])); | ||||
|                     printf("> Peer access from %s (GPU%d) -> %s (GPU%d) : %s\n", prop[gpuid[i]].name, gpuid[i], | ||||
|                            prop[gpuid[j]].name, gpuid[j] , | ||||
|                            can_access_peer_0_1 ? "Yes" : "No"); | ||||
|                 } | ||||
|             } | ||||
| 
 | ||||
|             for (int j = 1; j < gpu_p2p_count; j++) | ||||
|             { | ||||
|                 for (int i = 0; i < gpu_p2p_count-1; i++) | ||||
|                 { | ||||
|                     checkCudaErrors(cudaDeviceCanAccessPeer(&can_access_peer_1_0, gpuid[j], gpuid[i])); | ||||
|                     printf("> Peer access from %s (GPU%d) -> %s (GPU%d) : %s\n", prop[gpuid[j]].name, gpuid[j], | ||||
|                            prop[gpuid[i]].name, gpuid[i] , | ||||
|                            can_access_peer_1_0 ? "Yes" : "No"); | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     // csv masterlog info
 | ||||
|     // *****************************
 | ||||
|     // exe and CUDA driver name
 | ||||
|     printf("\n"); | ||||
|     std::string sProfileString = "deviceQuery, CUDA Driver = CUDART"; | ||||
|     char cTemp[128]; | ||||
| 
 | ||||
|     // driver version
 | ||||
|     sProfileString += ", CUDA Driver Version = "; | ||||
| #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) | ||||
|     sprintf_s(cTemp, 10, "%d.%d", driverVersion/1000, (driverVersion%100)/10); | ||||
| #else | ||||
|     sprintf(cTemp, "%d.%d", driverVersion/1000, (driverVersion%100)/10); | ||||
| #endif | ||||
|     sProfileString +=  cTemp; | ||||
| 
 | ||||
|     // Runtime version
 | ||||
|     sProfileString += ", CUDA Runtime Version = "; | ||||
| #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) | ||||
|     sprintf_s(cTemp, 10, "%d.%d", runtimeVersion/1000, (runtimeVersion%100)/10); | ||||
| #else | ||||
|     sprintf(cTemp, "%d.%d", runtimeVersion/1000, (runtimeVersion%100)/10); | ||||
| #endif | ||||
|     sProfileString +=  cTemp; | ||||
| 
 | ||||
|     // Device count
 | ||||
|     sProfileString += ", NumDevs = "; | ||||
| #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) | ||||
|     sprintf_s(cTemp, 10, "%d", deviceCount); | ||||
| #else | ||||
|     sprintf(cTemp, "%d", deviceCount); | ||||
| #endif | ||||
|     sProfileString += cTemp; | ||||
| 
 | ||||
|     // Print Out all device Names
 | ||||
|     for (dev = 0; dev < deviceCount; ++dev) | ||||
|     { | ||||
| #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) | ||||
|         sprintf_s(cTemp, 13, ", Device%d = ", dev); | ||||
| #else | ||||
|         sprintf(cTemp, ", Device%d = ", dev); | ||||
| #endif | ||||
|         cudaDeviceProp deviceProp; | ||||
|         cudaGetDeviceProperties(&deviceProp, dev); | ||||
|         sProfileString += cTemp; | ||||
|         sProfileString += deviceProp.name; | ||||
|     } | ||||
| 
 | ||||
|     sProfileString += "\n"; | ||||
|     printf("%s", sProfileString.c_str()); | ||||
| 
 | ||||
|     printf("Result = PASS\n"); | ||||
| 
 | ||||
|     // finish
 | ||||
|     // cudaDeviceReset causes the driver to clean up all state. While
 | ||||
|     // not mandatory in normal operation, it is good practice.  It is also
 | ||||
|     // needed to ensure correct operation when the application is being
 | ||||
|     // profiled. Calling cudaDeviceReset causes all profile data to be
 | ||||
|     // flushed before the application exits
 | ||||
|     cudaDeviceReset(); | ||||
|     return 0; | ||||
| } | ||||
							
								
								
									
										43
									
								
								pkgs/cudainfo/default.nix
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										43
									
								
								pkgs/cudainfo/default.nix
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,43 @@ | ||||
| { | ||||
|   stdenv | ||||
| , cudatoolkit | ||||
| , cudaPackages | ||||
| , autoAddDriverRunpath | ||||
| , strace | ||||
| }: | ||||
| 
 | ||||
| stdenv.mkDerivation (finalAttrs: { | ||||
|   name = "cudainfo"; | ||||
|   src = ./.; | ||||
|   buildInputs = [ | ||||
|     cudatoolkit # Required for nvcc | ||||
|     cudaPackages.cuda_cudart.static # Required for -lcudart_static | ||||
|     autoAddDriverRunpath | ||||
|   ]; | ||||
|   installPhase = '' | ||||
|     mkdir -p $out/bin | ||||
|     cp -a cudainfo $out/bin | ||||
|   ''; | ||||
|   passthru.gpuCheck = stdenv.mkDerivation { | ||||
|     name = "cudainfo-test"; | ||||
|     requiredSystemFeatures = [ "cuda" ]; | ||||
|     dontBuild = true; | ||||
|     nativeCheckInputs = [ | ||||
|       finalAttrs.finalPackage # The cudainfo package from above | ||||
|       strace # When it fails, it will show the trace | ||||
|     ]; | ||||
|     dontUnpack = true; | ||||
|     doCheck = true; | ||||
|     checkPhase = '' | ||||
|       if ! cudainfo; then | ||||
|         set -x | ||||
|         cudainfo=$(command -v cudainfo) | ||||
|         ldd $cudainfo | ||||
|         readelf -d $cudainfo | ||||
|         strace -f $cudainfo | ||||
|         set +x | ||||
|       fi | ||||
|     ''; | ||||
|     installPhase = "touch $out"; | ||||
|   }; | ||||
| }) | ||||
| @ -52,4 +52,5 @@ final: prev: | ||||
|   prometheus-slurm-exporter = prev.callPackage ./slurm-exporter.nix { }; | ||||
|   meteocat-exporter = prev.callPackage ./meteocat-exporter/default.nix { }; | ||||
|   upc-qaire-exporter = prev.callPackage ./upc-qaire-exporter/default.nix { }; | ||||
|   cudainfo = prev.callPackage ./cudainfo/default.nix { }; | ||||
| } | ||||
|  | ||||
		Reference in New Issue
	
	Block a user