diff --git a/lib/gpu/cudpp_mini/cudpp_maximal_launch.cpp b/lib/gpu/cudpp_mini/cudpp_maximal_launch.cpp index 904d101b3b..1a7542e68c 100644 --- a/lib/gpu/cudpp_mini/cudpp_maximal_launch.cpp +++ b/lib/gpu/cudpp_mini/cudpp_maximal_launch.cpp @@ -8,16 +8,7 @@ // in the root directory of this source distribution. // ------------------------------------------------------------- #include "cudpp_maximal_launch.h" - -inline size_t min(size_t x, size_t y) -{ - return (x <= y) ? x : y; -} - -inline size_t max(size_t x, size_t y) -{ - return (x >= y) ? x : y; -} +#include // computes next highest multiple of f from x inline size_t multiple(size_t x, size_t f) @@ -65,7 +56,7 @@ size_t maxBlocks(cudaFuncAttributes &attribs, size_t ctaLimitSMem = smemPerCTA > 0 ? devprop.sharedMemPerBlock / smemPerCTA : maxBlocksPerSM; size_t ctaLimitThreads = maxThreadsPerSM / threadsPerBlock; - return devprop.multiProcessorCount * min(ctaLimitRegs, min(ctaLimitSMem, min(ctaLimitThreads, maxBlocksPerSM))); + return devprop.multiProcessorCount * std::min(ctaLimitRegs, std::min(ctaLimitSMem, std::min(ctaLimitThreads, (size_t)maxBlocksPerSM))); } extern "C" @@ -80,15 +71,15 @@ size_t maxBlocksFromPointer(void* kernel, { err = cudaGetDeviceProperties(&devprop, deviceID); if (err != cudaSuccess) - return -1; + return (size_t)-1; cudaFuncAttributes attr; err = cudaFuncGetAttributes(&attr, (const char*)kernel); if (err != cudaSuccess) - return -1; + return (size_t)-1; return maxBlocks(attr, devprop, bytesDynamicSharedMem, threadsPerBlock); } - return -1; + return (size_t)-1; } diff --git a/lib/gpu/geryon/nvd_kernel.h b/lib/gpu/geryon/nvd_kernel.h index d03a715e1b..9293db2fe4 100644 --- a/lib/gpu/geryon/nvd_kernel.h +++ b/lib/gpu/geryon/nvd_kernel.h @@ -244,7 +244,7 @@ class UCL_Kernel { template inline void add_arg(const dtype* const arg) { #if CUDA_VERSION >= 4000 - _kernel_args[_num_args]=const_cast(arg); + _kernel_args[_num_args]=const_cast(arg); #else _param_size = (_param_size+__alignof(dtype)-1) & ~(__alignof(dtype)-1); CU_SAFE_CALL(cuParamSetv(_kernel,_param_size,(void*)arg,sizeof(dtype)));