#include /* nvcc devicequery.cu ./a.out CUDA Device Query... There are 1 CUDA devices. CUDA Device #0 Major revision number: 1 Minor revision number: 1 Name: GeForce 8400 GS Total global memory: 267714560 Total shared memory per block: 16384 Total registers per block: 8192 Warp size: 32 Maximum memory pitch: 2147483647 Maximum threads per block: 512 Maximum dimension 0 of block: 512 Maximum dimension 1 of block: 512 Maximum dimension 2 of block: 64 Maximum dimension 0 of grid: 65535 Maximum dimension 1 of grid: 65535 Maximum dimension 2 of grid: 1 Clock rate: 1400000 Total constant memory: 65536 Texture alignment: 256 Concurrent copy and execution: No Number of multiprocessors: 1 Kernel execution timeout: Yes ======================================================= lspci | grep NVIDIA 01:00.0 VGA compatible controller: NVIDIA Corporation G98 [GeForce 8400 GS] (rev a1) ======================================================== HOT TO RUN ON euclid.cyi.ac.cy ssh epl372u02@euclid.cyi.ac.cy [epl372u02@euclid ~]$ module load gcccuda [epl372u02@euclid ~]$ nvcc devicequery.cu [epl372u02@euclid ~]$ qsub cudarun.pbs Where cudarun.pbs is: #!/bin/bash #PBS -N cudarun #PBS -j oe #PBS -q batch #PBS -l nodes=1:ppn=8 module load gcccuda ./a.out [epl372u02@euclid ~]$ cat cudarun.o4762 --------------------------------------------------------- Torque Prologue on Euclid cluster at The Cyprus Institute Tue Mar 18 17:55:11 EET 2014 --------------------------------------------------------- Job ID: 4762.euclid Job Name: cudarun Username: epl372u02 --------------------------------------------------------- CUDA Device Query... There are 2 CUDA devices. CUDA Device #0 Major revision number: 1 Minor revision number: 3 Name: Tesla T10 Processor Total global memory: 4294770688 Total shared memory per block: 16384 Total registers per block: 16384 Warp size: 32 Maximum memory pitch: 2147483647 Maximum threads per block: 512 Maximum dimension 0 of block: 512 Maximum dimension 1 of block: 512 Maximum dimension 2 of block: 64 Maximum dimension 0 of grid: 65535 Maximum dimension 1 of grid: 65535 Maximum dimension 2 of grid: 1 Clock rate: 1296000 Total constant memory: 65536 Texture alignment: 256 Concurrent copy and execution: Yes Number of multiprocessors: 30 Kernel execution timeout: No CUDA Device #1 Major revision number: 1 Minor revision number: 3 Name: Tesla T10 Processor Total global memory: 4294770688 Total shared memory per block: 16384 Total registers per block: 16384 Warp size: 32 Maximum memory pitch: 2147483647 Maximum threads per block: 512 Maximum dimension 0 of block: 512 Maximum dimension 1 of block: 512 Maximum dimension 2 of block: 64 Maximum dimension 0 of grid: 65535 Maximum dimension 1 of grid: 65535 Maximum dimension 2 of grid: 1 Clock rate: 1296000 Total constant memory: 65536 Texture alignment: 256 Concurrent copy and execution: Yes Number of multiprocessors: 30 Kernel execution timeout: No Press any key to exit... */ // Print device properties void printDevProp(cudaDeviceProp devProp) { printf("Major revision number: %d\n", devProp.major); printf("Minor revision number: %d\n", devProp.minor); printf("Name: %s\n", devProp.name); printf("Total global memory: %u\n", devProp.totalGlobalMem); printf("Total shared memory per block: %u\n", devProp.sharedMemPerBlock); printf("Total registers per block: %d\n", devProp.regsPerBlock); printf("Warp size: %d\n", devProp.warpSize); printf("Maximum memory pitch: %u\n", devProp.memPitch); printf("Maximum threads per block: %d\n", devProp.maxThreadsPerBlock); for (int i = 0; i < 3; ++i) printf("Maximum dimension %d of block: %d\n", i, devProp.maxThreadsDim[i]); for (int i = 0; i < 3; ++i) printf("Maximum dimension %d of grid: %d\n", i, devProp.maxGridSize[i]); printf("Clock rate: %d\n", devProp.clockRate); printf("Total constant memory: %u\n", devProp.totalConstMem); printf("Texture alignment: %u\n", devProp.textureAlignment); printf("Concurrent copy and execution: %s\n", (devProp.deviceOverlap ? "Yes" : "No")); printf("Number of multiprocessors: %d\n", devProp.multiProcessorCount); printf("Kernel execution timeout: %s\n", (devProp.kernelExecTimeoutEnabled ? "Yes" : "No")); return; } int main() { // Number of CUDA devices int devCount; cudaGetDeviceCount(&devCount); printf("CUDA Device Query...\n"); printf("There are %d CUDA devices.\n", devCount); // Iterate through devices for (int i = 0; i < devCount; ++i) { // Get device properties printf("\nCUDA Device #%d\n", i); cudaDeviceProp devProp; cudaGetDeviceProperties(&devProp, i); printDevProp(devProp); } printf("\nPress any key to exit..."); char c; scanf("%c", &c); return 0; }