//===================================================================== // Copyright 2021 (c), Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and / or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions : // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. // // Pull changes: // Fixed build of cmp_core on Mac. (#164) //===================================================================== #ifndef CMP_MATH_VEC4_H #define CMP_MATH_VEC4_H #pragma warning(disable : 4201) //==================================================== // Vector Class definitions for CPU & Intrinsics //==================================================== #if defined(__linux__) || defined(_WIN32) || defined(__APPLE__) //============================================= VEC2 ================================================== template class Vec2T { public: T x; T y; // ***************************************** // Constructors // ***************************************** /// Default constructor Vec2T() : x((T)0) , y((T)0){}; /// Value constructor Vec2T(const T& vx, const T& vy) : x(vx) , y(vy){}; /// Copy constructor Vec2T(const Vec2T& val) : x(val.x) , y(val.y){}; /// Single value constructor. Sets all components to the given value Vec2T(const T& v) : x(v) , y(v){}; // ***************************************** // Conversions/Assignment/Indexing // ***************************************** /// cast to T* operator const T*() const { return (const T*)this; }; /// cast to T* operator T*() { return (T*)this; }; /// Indexing const T& operator[](int i) const { return ((const T*)this)[i]; }; T& operator[](int i) { return ((T*)this)[i]; }; /// Assignment const Vec2T& operator=(const Vec2T& rhs) { x = rhs.x; y = rhs.y; return *this; }; // ***************************************** // Comparison // ***************************************** /// Equality comparison bool operator==(const Vec2T& rhs) const { return (x == rhs.x && y == rhs.y); }; /// Inequality comparision bool operator!=(const Vec2T& rhs) const { return (x != rhs.x || y != rhs.y); }; // ***************************************** // Arithmetic // ***************************************** /// Addition const Vec2T operator+(const Vec2T& rhs) const { return Vec2T(x + rhs.x, y + rhs.y); }; /// Subtraction const Vec2T operator-(const Vec2T& rhs) const { return Vec2T(x - rhs.x, y - rhs.y); }; /// Multiply const Vec2T operator*(const Vec2T& rhs) const { return Vec2T(x * rhs.x, y * rhs.y); }; /// Divide const Vec2T operator/(const Vec2T& rhs) const { return Vec2T(x / rhs.x, y / rhs.y); }; /// Multiply by scalar const Vec2T operator*(const T& v) const { return Vec2T(x * v, y * v); }; /// Divide by scalar const Vec2T operator/(const T& v) const { return Vec2T(x / v, y / v); }; /// Addition in-place Vec2T& operator+=(const Vec2T& rhs) { x += rhs.x; y += rhs.y; return *this; }; /// Subtract in-place Vec2T& operator-=(const Vec2T& rhs) { x -= rhs.x; y -= rhs.y; return *this; }; /// Scalar multiply in-place Vec2T& operator*=(const T& v) { x *= v; y *= v; return *this; }; /// Scalar divide in-place Vec2T& operator/=(const T& v) { x /= v; y /= v; return *this; }; }; typedef Vec2T CMP_Vec2f; typedef Vec2T CGU_Vec2f; typedef Vec2T CGV_Vec2f; typedef Vec2T CMP_Vec2d; typedef Vec2T CMP_Vec2i; typedef Vec2T CGU_Vec2i; typedef Vec2T CGU_Vec2ui; //============================================= VEC3 ================================================== template class Vec3T { public: union { struct { T x, y, z; }; struct { T r, g, b; }; struct { Vec2T rg; }; }; // ***************************************** // Constructors // ***************************************** /// Default constructor Vec3T() : x((T)0) , y((T)0) , z((T)0){}; /// Value constructor Vec3T(const T& vx, const T& vy, const T& vz) : x(vx) , y(vy) , z(vz){}; /// Copy constructor Vec3T(const Vec3T& val) : x(val.x) , y(val.y) , z(val.z){}; /// Single value constructor. Sets all components to the given value Vec3T(const T& v) : x(v) , y(v) , z(v){}; /// Array constructor. Assumes a 3-component array Vec3T(const T* v) : x(v[0]) , y(v[1]) , z(v[2]){}; // ***************************************** // Conversions/Assignment/Indexing // ***************************************** /// cast to T* operator const T*() const { return (const T*)this; }; /// cast to T* operator T*() { return (T*)this; }; /// Assignment const Vec3T& operator=(const Vec3T& rhs) { x = rhs.x; y = rhs.y; z = rhs.z; return *this; }; // ***************************************** // Comparison // ***************************************** /// Equality comparison bool operator==(const Vec3T& rhs) const { return (x == rhs.x && y == rhs.y && z == rhs.z); }; /// Inequality comparision bool operator!=(const Vec3T& rhs) const { return (x != rhs.x || y != rhs.y || z != rhs.z); }; // ***************************************** // Arithmetic // ***************************************** /// Addition by vector const Vec3T operator+(const Vec3T& rhs) const { return Vec3T(x + rhs.x, y + rhs.y, z + rhs.z); }; /// Subtraction by vector const Vec3T operator-(const Vec3T& rhs) const { return Vec3T(x - rhs.x, y - rhs.y, z - rhs.z); }; /// Multiply by vector const Vec3T operator*(const Vec3T& rhs) const { return Vec3T(x * rhs.x, y * rhs.y, z * rhs.z); }; /// Divide by vector const Vec3T operator/(const Vec3T& rhs) const { return Vec3T(x / rhs.x, y / rhs.y, z / rhs.z); }; /// Multiply by scalar const Vec3T operator*(const T& v) const { return Vec3T(x * v, y * v, z * v); }; /// Divide by scalar const Vec3T operator/(const T& v) const { return Vec3T(x / v, y / v, z / v); }; /// Add by scalar const Vec3T operator+(const T& v) const { return Vec3T(x + v, y + v, z + v); }; /// Subtract by scalar const Vec3T operator-(const T& v) const { return Vec3T(x - v, y - v, z - v); }; /// Addition in-place Vec3T& operator+=(const Vec3T& rhs) { x += rhs.x; y += rhs.y; z += rhs.z; return *this; }; /// Subtract in-place Vec3T& operator-=(const Vec3T& rhs) { x -= rhs.x; y -= rhs.y; z -= rhs.z; return *this; }; /// Scalar multiply in-place Vec3T& operator*=(const T& v) { x *= v; y *= v; z *= v; return *this; }; /// Scalar Add in-place Vec3T& operator+=(const T& v) { x += v; y += v; z += v; return *this; }; /// Scalar divide in-place Vec3T& operator/=(const T& v) { x /= v; y /= v; z /= v; return *this; }; }; typedef Vec3T CGU_Vec3bool; typedef Vec3T CGU_Vec3f; typedef Vec3T CGV_Vec3f; typedef Vec3T CGU_Vec3i; typedef Vec3T CGU_Vec3uc; typedef Vec3T CGV_Vec3uc; typedef Vec3T CMP_Vec3f; typedef Vec3T CMP_Vec3d; typedef Vec3T CMP_Vec3i; typedef Vec3T CMP_Vec3uc; typedef Vec3T CMP_Vec3ui; //============================================= VEC4 ================================================== template class Vec4T { public: union { struct { T x, y, z, w; }; struct { T r, g, b, a; }; struct { Vec3T rgb; // a is undefined }; }; // ***************************************** // Constructors // ***************************************** /// Default constructor Vec4T() : x((T)0) , y((T)0) , z((T)0) , w((T)0){}; /// Value constructor Vec4T(const T& vx, const T& vy, const T& vz, const T& vw) : x(vx) , y(vy) , z(vz) , w(vw){}; /// Copy constructor Vec4T(const Vec4T& val) : x(val.x) , y(val.y) , z(val.z) , w(val.w){}; /// Single value constructor. Sets all components to the given value Vec4T(const T& v) : x(v) , y(v) , z(v) , w(v){}; /// Array constructor. Assumes a 4-component array Vec4T(const T* v) : x(v[0]) , y(v[1]) , z(v[2]) , w(v[3]){}; // ***************************************** // Conversions/Assignment/Indexing // ***************************************** /// cast to T* operator const T*() const { return (const T*)this; }; /// cast to T* operator T*() { return (T*)this; }; /// negate to -T //Vec4T operator-() const; /// Assignment const Vec4T& operator=(const T& v) { x = v; y = v; z = v; w = v; return *this; }; const Vec4T& operator=(const Vec4T& rhs) { x = rhs.x; y = rhs.y; z = rhs.z; w = rhs.w; return *this; }; // ***************************************** // Comparison // ***************************************** /// Equality comparison bool operator==(const Vec4T& rhs) const { return (x == rhs.x && y == rhs.y && z == rhs.z && w == rhs.w); }; /// Inequality comparision bool operator!=(const Vec4T& rhs) const { return (x != rhs.x || y != rhs.y || z != rhs.z || w != rhs.w); }; // ***************************************** // Arithmetic // ***************************************** /// Addition by vector const Vec4T operator+(const Vec4T& rhs) const { return Vec4T(x + rhs.x, y + rhs.y, z + rhs.z, w + rhs.w); }; /// Subtraction by vector const Vec4T operator-(const Vec4T& rhs) const { return Vec4T(x - rhs.x, y - rhs.y, z - rhs.z, w - rhs.w); }; /// Multiply by vector const Vec4T operator*(const Vec4T& rhs) const { return Vec4T(x * rhs.x, y * rhs.y, z * rhs.z, w * rhs.w); }; /// Divide by vector const Vec4T operator/(const Vec4T& rhs) const { return Vec4T(x / rhs.x, y / rhs.y, z / rhs.z, w / rhs.w); }; /// Multiply by scalar RHS const Vec4T operator*(const T& v) const { return Vec4T(x * v, y * v, z * v, w * v); }; /// Divide by scalar const Vec4T operator/(const T& v) const { return Vec4T(x / v, y / v, z / v, w / v); }; /// Add by scalar const Vec4T operator+(const T& v) const { return Vec4T(x + v, y + v, z + v, w + v); }; /// Subtract by scalar const Vec4T operator-(const T& v) const { return Vec4T(x - v, y - v, z - v, w - v); }; /** Negation operator. Returns the negative of the vector. */ const Vec4T operator-() const { return Vec4T(-x, -y, -z, -w); } /// Left bit shift vector by a scalar const Vec4T operator<<(const T& v) const { return Vec4T(x << v, y << v, z << v, w << v); } /// Right bit shift vector by a scalar const Vec4T operator>>(const T& v) const { return Vec4T(x >> v, y >> v, z >> v, w >> v); } /// Addition in-place Vec4T& operator+=(const Vec4T& rhs) { x += rhs.x; y += rhs.y; z += rhs.z; w += rhs.w; return *this; }; /// Subtract in-place Vec4T& operator-=(const Vec4T& rhs) { x -= rhs.x; y -= rhs.y; z -= rhs.z; w -= rhs.w; return *this; }; /// Scalar multiply in-place Vec4T& operator*=(const T& v) { x *= v; y *= v; z *= v; w *= v; return *this; }; /// Scalar divide in-place Vec4T& operator/=(const T& v) { x /= v; y /= v; z /= v; w /= v; return *this; }; }; // template // std::ostream& operator<<(std::ostream& out, Vec4T& v) // { // return out << v.x << ", " << v.y << ", " << v.z; // } #ifdef CMP_USE_XMMINTRIN #include #include "xmmintrin.h" #include #include // SSE Vec4 #ifndef _WIN32 class CMP_SSEVec4f #else #include "intrin.h" class __declspec(align(16)) CMP_SSEVec4f #endif { public: union { __m128 vec128; // float Vector 128 bits in total (16 Bytes) = array of 4 floats #ifndef _WIN32 float f32[4]; #endif }; // constructors inline CMP_SSEVec4f(){}; inline CMP_SSEVec4f(float x, float y, float z, float w) : vec128(_mm_setr_ps(x, y, z, w)){}; inline CMP_SSEVec4f(__m128 vec) : vec128(vec) { } inline CMP_SSEVec4f(const float* data) : vec128(_mm_load_ps(data)){}; inline CMP_SSEVec4f(float scalar) : vec128(_mm_load1_ps(&scalar)){}; // copy and assignment inline CMP_SSEVec4f(const CMP_SSEVec4f& init) : vec128(init.vec128){}; inline const CMP_SSEVec4f& operator=(const CMP_SSEVec4f& lhs) { vec128 = lhs.vec128; return *this; }; // conversion to m128 type for direct use in _mm intrinsics inline operator __m128() { return vec128; }; inline operator const __m128() const { return vec128; }; // indexing #ifndef _WIN32 inline const float& operator[](int i) const { return f32[i]; }; inline float& operator[](int i) { return f32[i]; }; #else inline const float& operator[](int i) const { return vec128.m128_f32[i]; }; inline float& operator[](int i) { return vec128.m128_f32[i]; }; #endif // addition inline CMP_SSEVec4f operator+(const CMP_SSEVec4f& rhs) const { return CMP_SSEVec4f(_mm_add_ps(vec128, rhs.vec128)); }; inline CMP_SSEVec4f& operator+=(const CMP_SSEVec4f& rhs) { vec128 = _mm_add_ps(vec128, rhs.vec128); return *this; }; // multiplication inline CMP_SSEVec4f operator*(const CMP_SSEVec4f& rhs) const { return CMP_SSEVec4f(_mm_mul_ps(vec128, rhs.vec128)); }; inline CMP_SSEVec4f& operator*=(const CMP_SSEVec4f& rhs) { vec128 = _mm_mul_ps(vec128, rhs.vec128); return *this; }; // scalar multiplication //inline CMP_SSEVec4f operator*( float rhs ) const { return CMP_SSEVec4f( _mm_mul_ps(vec128, _mm_load1_ps(&rhs)) ); }; //inline CMP_SSEVec4f& operator*=( float rhs ) { vec128 = _mm_mul_ps(vec128, _mm_load1_ps(&rhs)); return *this; }; // subtraction inline CMP_SSEVec4f operator-(const CMP_SSEVec4f& rhs) const { return CMP_SSEVec4f(_mm_sub_ps(vec128, rhs.vec128)); }; inline CMP_SSEVec4f& operator-=(const CMP_SSEVec4f& rhs) { vec128 = _mm_sub_ps(vec128, rhs.vec128); return *this; }; // division inline CMP_SSEVec4f operator/(const CMP_SSEVec4f& rhs) const { return CMP_SSEVec4f(_mm_div_ps(vec128, rhs.vec128)); }; inline CMP_SSEVec4f& operator/=(const CMP_SSEVec4f& rhs) { vec128 = _mm_div_ps(vec128, rhs.vec128); return *this; }; // scalar division inline CMP_SSEVec4f operator/(float rhs) const { return CMP_SSEVec4f(_mm_div_ps(vec128, _mm_load1_ps(&rhs))); }; inline CMP_SSEVec4f& operator/=(float rhs) { vec128 = _mm_div_ps(vec128, _mm_load1_ps(&rhs)); return *this; }; // comparison // these return 0 or 0xffffffff in each component inline CMP_SSEVec4f operator<(const CMP_SSEVec4f& rhs) const { return CMP_SSEVec4f(_mm_cmplt_ps(vec128, rhs.vec128)); }; inline CMP_SSEVec4f operator>(const CMP_SSEVec4f& rhs) const { return CMP_SSEVec4f(_mm_cmpgt_ps(vec128, rhs.vec128)); }; inline CMP_SSEVec4f operator<=(const CMP_SSEVec4f& rhs) const { return CMP_SSEVec4f(_mm_cmple_ps(vec128, rhs.vec128)); }; inline CMP_SSEVec4f operator>=(const CMP_SSEVec4f& rhs) const { return CMP_SSEVec4f(_mm_cmpge_ps(vec128, rhs.vec128)); }; inline CMP_SSEVec4f operator==(const CMP_SSEVec4f& rhs) const { return CMP_SSEVec4f(_mm_cmpeq_ps(vec128, rhs.vec128)); }; // bitwise operators inline CMP_SSEVec4f operator|(const CMP_SSEVec4f& rhs) const { return CMP_SSEVec4f(_mm_or_ps(vec128, rhs.vec128)); }; inline CMP_SSEVec4f operator&(const CMP_SSEVec4f& rhs) const { return CMP_SSEVec4f(_mm_and_ps(vec128, rhs.vec128)); }; inline CMP_SSEVec4f operator^(const CMP_SSEVec4f& rhs) const { return CMP_SSEVec4f(_mm_xor_ps(vec128, rhs.vec128)); }; inline const CMP_SSEVec4f& operator|=(const CMP_SSEVec4f& rhs) { vec128 = _mm_or_ps(vec128, rhs.vec128); return *this; }; inline const CMP_SSEVec4f& operator&=(const CMP_SSEVec4f& rhs) { vec128 = _mm_and_ps(vec128, rhs.vec128); return *this; }; // for some horrible reason,there's no bitwise not instruction for SSE, // so we have to do xor with 0xfffffff in order to fake it. // TO get a 0xffffffff, we execute 0=0 inline CMP_SSEVec4f operator~() const { __m128 zero = _mm_setzero_ps(); __m128 is_true = _mm_cmpeq_ps(zero, zero); return _mm_xor_ps(is_true, vec128); }; }; #endif typedef Vec4T CMP_Vec4f; typedef Vec4T CMP_Vec4d; typedef Vec4T CMP_Vec4i; typedef Vec4T CMP_Vec4ui; // unsigned 16 bit x,y,x,w typedef Vec4T CMP_Vec4uc; // unsigned 8 bit x,y,x,w typedef Vec4T CGU_Vec4uc; // unsigned 8 bit x,y,x,w typedef Vec4T CGV_Vec4uc; // unsigned 8 bit x,y,x,w #endif // not ASPM_GPU #endif // Header Guard