Changes from Mike Brown.
git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@5277 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
363
lib/gpu/cudpp_mini/cudpp_util.h
Normal file
363
lib/gpu/cudpp_mini/cudpp_util.h
Normal file
@ -0,0 +1,363 @@
|
||||
// -------------------------------------------------------------
|
||||
// cuDPP -- CUDA Data Parallel Primitives library
|
||||
// -------------------------------------------------------------
|
||||
// $Revision$
|
||||
// $Date$
|
||||
// -------------------------------------------------------------
|
||||
// This source code is distributed under the terms of license.txt in
|
||||
// the root directory of this source distribution.
|
||||
// -------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* @file
|
||||
* cudpp_util.h
|
||||
*
|
||||
* @brief C++ utility functions and classes used internally to cuDPP
|
||||
*/
|
||||
|
||||
#ifndef __CUDPP_UTIL_H__
|
||||
#define __CUDPP_UTIL_H__
|
||||
|
||||
#ifdef WIN32
|
||||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
#include <cuda.h>
|
||||
#include <cudpp.h>
|
||||
#include <limits.h>
|
||||
#include <float.h>
|
||||
|
||||
#if (CUDA_VERSION >= 3000)
|
||||
#define LAUNCH_BOUNDS(x) __launch_bounds__((x))
|
||||
#define LAUNCH_BOUNDS_MINBLOCKs(x, y) __launch_bounds__((x),(y))
|
||||
#else
|
||||
#define LAUNCH_BOUNDS(x)
|
||||
#define LAUNCH_BOUNDS_MINBLOCKS(x, y)
|
||||
#endif
|
||||
|
||||
|
||||
/** @brief Determine if \a n is a power of two.
|
||||
* @param n Value to be checked to see if it is a power of two
|
||||
* @returns True if \a n is a power of two, false otherwise
|
||||
*/
|
||||
inline bool
|
||||
isPowerOfTwo(int n)
|
||||
{
|
||||
return ((n&(n-1))==0) ;
|
||||
}
|
||||
|
||||
/** @brief Determine if an integer \a n is a multiple of an integer \a f.
|
||||
* @param n Multiple
|
||||
* @param f Factor
|
||||
* @returns True if \a n is a multiple of \a f, false otherwise
|
||||
*/
|
||||
inline bool
|
||||
isMultiple(int n, int f)
|
||||
{
|
||||
if (isPowerOfTwo(f))
|
||||
return ((n&(f-1))==0);
|
||||
else
|
||||
return (n%f==0);
|
||||
}
|
||||
|
||||
/** @brief Compute the smallest power of two larger than \a n.
|
||||
* @param n Input value
|
||||
* @returns The smallest power f two larger than \a n
|
||||
*/
|
||||
inline int
|
||||
ceilPow2(int n)
|
||||
{
|
||||
double log2n = log2((double)n);
|
||||
if (isPowerOfTwo(n))
|
||||
return n;
|
||||
else
|
||||
return 1 << (int)ceil(log2n);
|
||||
}
|
||||
|
||||
/** @brief Compute the largest power of two smaller than \a n.
|
||||
* @param n Input value
|
||||
* @returns The largest power of two smaller than \a n.
|
||||
*/
|
||||
inline int
|
||||
floorPow2(int n)
|
||||
{
|
||||
#ifdef WIN32
|
||||
// method 2
|
||||
return 1 << (int)_logb((float)n);
|
||||
#else
|
||||
// method 3
|
||||
int exp;
|
||||
frexp((float)n, &exp);
|
||||
return 1 << (exp - 1);
|
||||
#endif
|
||||
}
|
||||
|
||||
/** @brief Returns the maximum value for type \a T.
|
||||
*
|
||||
* Implemented using template specialization on \a T.
|
||||
*/
|
||||
template <class T>
|
||||
__host__ __device__ inline T getMax() { return 0; }
|
||||
/** @brief Returns the minimum value for type \a T.
|
||||
*
|
||||
* Implemented using template specialization on \a T.
|
||||
*/
|
||||
template <class T>
|
||||
__host__ __device__ inline T getMin() { return 0; }
|
||||
// type specializations for the above
|
||||
// getMax
|
||||
template <> __host__ __device__ inline int getMax() { return INT_MAX; }
|
||||
template <> __host__ __device__ inline unsigned int getMax() { return INT_MAX; }
|
||||
template <> __host__ __device__ inline float getMax() { return FLT_MAX; }
|
||||
template <> __host__ __device__ inline char getMax() { return (char)INT_MAX; }
|
||||
template <> __host__ __device__ inline unsigned char getMax() { return (unsigned char)INT_MAX; }
|
||||
// getMin
|
||||
template <> __host__ __device__ inline int getMin() { return INT_MIN; }
|
||||
template <> __host__ __device__ inline unsigned int getMin() { return 0; }
|
||||
template <> __host__ __device__ inline float getMin() { return -FLT_MAX; }
|
||||
template <> __host__ __device__ inline char getMin() { return (char)INT_MIN; }
|
||||
template <> __host__ __device__ inline unsigned char getMin() { return (unsigned char)0; }
|
||||
|
||||
/** @brief Returns the maximum of three values.
|
||||
* @param a First value.
|
||||
* @param b Second value.
|
||||
* @param c Third value.
|
||||
* @returns The maximum of \a a, \a b and \a c.
|
||||
*/
|
||||
template<class T>
|
||||
inline int max3(T a, T b, T c)
|
||||
{
|
||||
return (a > b) ? ((a > c)? a : c) : ((b > c) ? b : c);
|
||||
}
|
||||
|
||||
/** @brief Utility template struct for generating small vector types from scalar types
|
||||
*
|
||||
* Given a base scalar type (\c int, \c float, etc.) and a vector length (1 through 4) as
|
||||
* template parameters, this struct defines a vector type (\c float3, \c int4, etc.) of the
|
||||
* specified length and base type. For example:
|
||||
* \code
|
||||
* template <class T>
|
||||
* __device__ void myKernel(T *data)
|
||||
* {
|
||||
* typeToVector<T,4>::Result myVec4; // create a vec4 of type T
|
||||
* myVec4 = (typeToVector<T,4>::Result*)data[0]; // load first element of data as a vec4
|
||||
* }
|
||||
* \endcode
|
||||
*
|
||||
* This functionality is implemented using template specialization. Currently specializations
|
||||
* for int, float, and unsigned int vectors of lengths 2-4 are defined. Note that this results
|
||||
* in types being generated at compile time -- there is no runtime cost. typeToVector is used by
|
||||
* the optimized scan \c __device__ functions in scan_cta.cu.
|
||||
*/
|
||||
template <typename T, int N>
|
||||
struct typeToVector
|
||||
{
|
||||
typedef T Result;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct typeToVector<int, 4>
|
||||
{
|
||||
typedef int4 Result;
|
||||
};
|
||||
template<>
|
||||
struct typeToVector<unsigned int, 4>
|
||||
{
|
||||
typedef uint4 Result;
|
||||
};
|
||||
template<>
|
||||
struct typeToVector<float, 4>
|
||||
{
|
||||
typedef float4 Result;
|
||||
};
|
||||
template<>
|
||||
struct typeToVector<int, 3>
|
||||
{
|
||||
typedef int3 Result;
|
||||
};
|
||||
template<>
|
||||
struct typeToVector<unsigned int, 3>
|
||||
{
|
||||
typedef uint3 Result;
|
||||
};
|
||||
template<>
|
||||
struct typeToVector<float, 3>
|
||||
{
|
||||
typedef float3 Result;
|
||||
};
|
||||
template<>
|
||||
struct typeToVector<int, 2>
|
||||
{
|
||||
typedef int2 Result;
|
||||
};
|
||||
template<>
|
||||
struct typeToVector<unsigned int, 2>
|
||||
{
|
||||
typedef uint2 Result;
|
||||
};
|
||||
template<>
|
||||
struct typeToVector<float, 2>
|
||||
{
|
||||
typedef float2 Result;
|
||||
};
|
||||
|
||||
/** @brief Templatized operator class used by scan and segmented scan
|
||||
*
|
||||
* This Operator class is used to allow generic support of binary
|
||||
* associative operators in scan. It defines two member functions,
|
||||
* op() and identity(), that are used in place of + and 0 (for
|
||||
* example) in the scan and segmented scan code. Because this is
|
||||
* template code, all decisions in the code are made at compile
|
||||
* time, resulting in optimal operator code. Currently the operators
|
||||
* CUDPP_ADD, CUDPP_MULTIPLY, CUDPP_MIN, and CUDPP_MAX are supported.
|
||||
* Operator is implemented using template specialization for the
|
||||
* types \c int, \c unsigned int, and \c float.
|
||||
*/
|
||||
template <typename T, CUDPPOperator oper>
|
||||
class Operator
|
||||
{
|
||||
public:
|
||||
/** Applies the operator to operands \a a and \a b.
|
||||
* @param a First operand
|
||||
* @param b Second operand
|
||||
* @returns a OP b, where OP is defined by ::CUDPPOperator \a oper.
|
||||
*/
|
||||
static __device__ T op(const T a, const T b)
|
||||
{
|
||||
switch (oper)
|
||||
{
|
||||
case CUDPP_ADD:
|
||||
return a + b;
|
||||
case CUDPP_MULTIPLY:
|
||||
return a * b;
|
||||
case CUDPP_MIN:
|
||||
return min(a, b);
|
||||
case CUDPP_MAX:
|
||||
return max(a, b);
|
||||
}
|
||||
}
|
||||
|
||||
/** Returns the identity element defined for type \a T */
|
||||
static __device__ T identity() { return 0; }
|
||||
};
|
||||
|
||||
// specializations for different types
|
||||
template <CUDPPOperator oper>
|
||||
class Operator <int, oper>
|
||||
{
|
||||
public:
|
||||
static __device__ int op(const int a, const int b)
|
||||
{
|
||||
switch (oper)
|
||||
{
|
||||
default:
|
||||
case CUDPP_ADD:
|
||||
return a + b;
|
||||
case CUDPP_MULTIPLY:
|
||||
return a * b;
|
||||
case CUDPP_MIN:
|
||||
return min(a, b);
|
||||
case CUDPP_MAX:
|
||||
return max(a, b);
|
||||
}
|
||||
}
|
||||
|
||||
static __device__ int identity()
|
||||
{
|
||||
switch (oper)
|
||||
{
|
||||
default:
|
||||
case CUDPP_ADD:
|
||||
return 0;
|
||||
case CUDPP_MULTIPLY:
|
||||
return 1;
|
||||
case CUDPP_MIN:
|
||||
return INT_MAX;
|
||||
case CUDPP_MAX:
|
||||
return INT_MIN;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <CUDPPOperator oper>
|
||||
class Operator <unsigned int, oper>
|
||||
{
|
||||
public:
|
||||
static __device__ unsigned int op(const unsigned int a, const unsigned int b)
|
||||
{
|
||||
switch (oper)
|
||||
{
|
||||
default:
|
||||
case CUDPP_ADD:
|
||||
return a + b;
|
||||
case CUDPP_MULTIPLY:
|
||||
return a * b;
|
||||
case CUDPP_MIN:
|
||||
return min(a, b);
|
||||
case CUDPP_MAX:
|
||||
return max(a, b);
|
||||
}
|
||||
}
|
||||
|
||||
static __device__ unsigned int identity()
|
||||
{
|
||||
switch (oper)
|
||||
{
|
||||
default:
|
||||
case CUDPP_ADD:
|
||||
return 0;
|
||||
case CUDPP_MULTIPLY:
|
||||
return 1;
|
||||
case CUDPP_MIN:
|
||||
return UINT_MAX;
|
||||
case CUDPP_MAX:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template <CUDPPOperator oper>
|
||||
class Operator <float, oper>
|
||||
{
|
||||
public:
|
||||
static __device__ float op(const float a, const float b)
|
||||
{
|
||||
switch (oper)
|
||||
{
|
||||
default:
|
||||
case CUDPP_ADD:
|
||||
return a + b;
|
||||
case CUDPP_MULTIPLY:
|
||||
return a * b;
|
||||
case CUDPP_MIN:
|
||||
return min(a, b);
|
||||
case CUDPP_MAX:
|
||||
return max(a, b);
|
||||
}
|
||||
}
|
||||
|
||||
static __device__ float identity()
|
||||
{
|
||||
switch (oper)
|
||||
{
|
||||
default:
|
||||
case CUDPP_ADD:
|
||||
return 0.0f;
|
||||
case CUDPP_MULTIPLY:
|
||||
return 1.0f;
|
||||
case CUDPP_MIN:
|
||||
return FLT_MAX;
|
||||
case CUDPP_MAX:
|
||||
return -FLT_MAX;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
#endif // __CUDPP_UTIL_H__
|
||||
|
||||
// Leave this at the end of the file
|
||||
// Local Variables:
|
||||
// mode:c++
|
||||
// c-file-style: "NVIDIA"
|
||||
// End:
|
||||
Reference in New Issue
Block a user