Add operators for simd::vector operations

2016-09-20 01:09:42 +02:00 · 2016-09-20 01:09:42 +02:00 · 1d3a80351a
commit 1d3a80351a
parent e0c9b34a5d
4 changed files with 108 additions and 8 deletions
--- a/include/vectorwrapper/vectorwrapper_simd.hpp
+++ b/include/vectorwrapper/vectorwrapper_simd.hpp
@ -26,27 +26,55 @@ namespace vwr {
 		template <typename V, std::size_t=::vwr::Vec<V>::dimensions, typename=typename ::vwr::Vec<V>::scalar_type>
 		class Vec;

+		template <typename V, std::size_t=::vwr::Vec<V>::dimensions, typename=typename ::vwr::Vec<V>::scalar_type>
+		class VecPack;
+
 		template <typename V>
-		class Vec<V, 3, float> : public ::vwr::Vec<V, 3> {
-			typedef ::vwr::Vec<V, 3> base_class;
+		class Vec<V, 3, float> : public implem::VecBase<V>, public implem::VecAccessors<V, 3> {
+			typedef ::vwr::implem::VecBase<V> base_class;
 		public:
 			static_assert(alignof(V) % 16 == 0, "Wrapped type must be aligned to 16");
 			static_assert(base_class::is_interleaved_mem == 0, "Expected tightly packed vector_type");

 			using typename base_class::vector_type;
+			typedef VecPack<V, 3, float> pack_type;
 			typedef float scalar_type;

 			Vec ( void ) = default;
 			Vec ( const Vec& ) = default;
+			Vec ( VecPack<V, 3, float> parPack ) __attribute__((always_inline));
 			explicit Vec ( const vector_type& parIn ) : base_class(parIn) { }
 			explicit Vec ( const scalar_type parX ) : base_class(parX) { }
 			explicit Vec ( const base_class& parIn ) : base_class(parIn) { }

+			Vec& operator= ( VecPack<V, 3, float> parPack ) __attribute__((always_inline));
+
 			template <typename V2> Vec& operator+= ( const Vec<V2, 3, scalar_type>& parOther );
 			template <typename V2> Vec& operator-= ( const Vec<V2, 3, scalar_type>& parOther );
 			template <typename V2> Vec& operator*= ( const Vec<V2, 3, scalar_type>& parOther );
 			template <typename V2> Vec& operator/= ( const Vec<V2, 3, scalar_type>& parOther );
 		};
+
+		template <typename V>
+		class VecPack<V, 3, float> {
+		public:
+			VecPack ( const Vec<V, 3, float>& parVec ) __attribute__((always_inline));
+			VecPack ( __m128 parPack ) __attribute__((always_inline));
+			VecPack ( float parValue ) __attribute__((always_inline));
+
+			__m128 pack;
+		};
+
+		template <template <typename, std::size_t, typename> class V1, template <typename, std::size_t, typename> class V2, typename V>
+		inline VecPack<V, 3, float> operator+ ( V1<V, 3, float> parLeft, V2<V, 3, float> parRight ) __attribute__((always_inline));
+		template <template <typename, std::size_t, typename> class V1, template <typename, std::size_t, typename> class V2, typename V>
+		inline VecPack<V, 3, float> operator+ ( V1<V, 3, float> parLeft, V2<V, 3, float> parRight ) __attribute__((always_inline));
+		template <template <typename, std::size_t, typename> class V1, template <typename, std::size_t, typename> class V2, typename V>
+		inline VecPack<V, 3, float> operator- ( V1<V, 3, float> parLeft, V2<V, 3, float> parRight ) __attribute__((always_inline));
+		template <template <typename, std::size_t, typename> class V1, template <typename, std::size_t, typename> class V2, typename V>
+		inline VecPack<V, 3, float> operator* ( V1<V, 3, float> parLeft, V2<V, 3, float> parRight ) __attribute__((always_inline));
+		template <template <typename, std::size_t, typename> class V1, template <typename, std::size_t, typename> class V2, typename V>
+		inline VecPack<V, 3, float> operator/ ( V1<V, 3, float> parLeft, V2<V, 3, float> parRight ) __attribute__((always_inline));
 	} //namespace simd
 } //namespace vwr

--- a/include/vectorwrapper/vectorwrapper_simd.inl
+++ b/include/vectorwrapper/vectorwrapper_simd.inl
@ -16,6 +16,16 @@

 namespace vwr {
 	namespace simd {
+		template <typename V>
+		inline Vec<V, 3, float>::Vec (VecPack<V, 3, float> parPack) {
+			_mm_store_ps(&this->x(), parPack.pack);
+		}
+
+		template <typename V>
+		Vec<V, 3, float>& Vec<V, 3, float>::operator= (VecPack<V, 3, float> parPack) {
+			_mm_store_ps(&this->x(), parPack.pack);
+		}
+
 		template <typename V>
 		template <typename V2>
 		auto Vec<V, 3, float>::operator+= (const Vec<V2, 3, scalar_type>& parOther) -> Vec& {
@ -43,5 +53,43 @@ namespace vwr {

 			return *this;
 		}
+
+		template <typename V>
+		inline VecPack<V, 3, float>::VecPack (const Vec<V, 3, float>& parVec) :
+			pack(_mm_load_ps(&parVec.x()))
+		{
+		}
+
+		template <typename V>
+		inline VecPack<V, 3, float>::VecPack (__m128 parPack) :
+			pack(parPack)
+		{
+		}
+
+		template <typename V>
+		inline VecPack<V, 3, float>::VecPack (float parValue) :
+			pack(_mm_load1_ps(&parValue))
+		{
+		}
+
+		template <template <typename, std::size_t, typename> class V1, template <typename, std::size_t, typename> class V2, typename V>
+		inline VecPack<V, 3, float> operator+ (V1<V, 3, float> parLeft, V2<V, 3, float> parRight) {
+			return _mm_add_ps(VecPack<V, 3, float>(parLeft).pack, VecPack<V, 3, float>(parRight).pack);
+		}
+
+		template <template <typename, std::size_t, typename> class V1, template <typename, std::size_t, typename> class V2, typename V>
+		inline VecPack<V, 3, float> operator- (V1<V, 3, float> parLeft, V2<V, 3, float> parRight) {
+			return _mm_sub_ps(VecPack<V, 3, float>(parLeft).pack, VecPack<V, 3, float>(parRight).pack);
+		}
+
+		template <template <typename, std::size_t, typename> class V1, template <typename, std::size_t, typename> class V2, typename V>
+		inline VecPack<V, 3, float> operator* (V1<V, 3, float> parLeft, V2<V, 3, float> parRight) {
+			return _mm_mul_ps(VecPack<V, 3, float>(parLeft).pack, VecPack<V, 3, float>(parRight).pack);
+		}
+
+		template <template <typename, std::size_t, typename> class V1, template <typename, std::size_t, typename> class V2, typename V>
+		inline VecPack<V, 3, float> operator/ (V1<V, 3, float> parLeft, V2<V, 3, float> parRight) {
+			return _mm_div_ps(VecPack<V, 3, float>(parLeft).pack, VecPack<V, 3, float>(parRight).pack);
+		}
 	} //namespace simd
 } //namespace vwr
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 2.6.4 FATAL_ERROR)
 include(CTest)

 set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g -O0 -std=c++11")
-set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3 -std=c++11")
+set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3 -std=c++11 -g -fno-omit-frame-pointer")

 add_subdirectory(gtest-1.7.0)
 set(GTEST_MAIN_CPP "${CMAKE_SOURCE_DIR}/gtest-1.7.0/src/gtest_main.cc")
--- a/test/speed/speed.cpp
+++ b/test/speed/speed.cpp
@ -2,6 +2,7 @@
 #include "vectorwrapper/vectorwrapper_simd.hpp"
 #include <gtest/gtest.h>
 #include <memory>
+#include <random>

 namespace vwr {
 	typedef std::aligned_storage<sizeof(float) * 3, 16>::type float3_storage;
@ -20,15 +21,38 @@ namespace vwr {
 } //namespace vwr

 TEST(vwr_speed, speed) {
+	std::minstd_rand randgen;
+
 	typedef vwr::simd::Vec<vwr::float3_storage> simd_vec3;
 	static_assert(sizeof(simd_vec3) >= sizeof(float) * 3, "SIMD vector too small");

-	simd_vec3 v1(1.0f);
-	simd_vec3 v2(0.5f);
+	auto s1 = static_cast<float>(randgen()) / static_cast<float>(randgen.max());
+	auto s2 = static_cast<float>(randgen()) / static_cast<float>(randgen.max());
+	simd_vec3 v1(s1);
+	simd_vec3 v2(s2);

+	s1 += s2;
 	v1 += v2;
+	EXPECT_FLOAT_EQ(s1, v1.x());
+	EXPECT_FLOAT_EQ(s1, v1.y());
+	EXPECT_FLOAT_EQ(s1, v1.z());

-	EXPECT_FLOAT_EQ(1.5f, v1.x());
-	EXPECT_FLOAT_EQ(1.5f, v1.y());
-	EXPECT_FLOAT_EQ(1.5f, v1.z());
+	auto s3 = s1 + s2;
+	simd_vec3 v3 = v1 + v2;
+	EXPECT_FLOAT_EQ(s3, v3.x());
+	EXPECT_FLOAT_EQ(s3, v3.y());
+	EXPECT_FLOAT_EQ(s3, v3.z());
+
+	auto s4 = (s3 - s2) * s1;
+	simd_vec3 v4 = (v3 - v2) * v1;
+	EXPECT_FLOAT_EQ(s4, v4.x());
+	EXPECT_FLOAT_EQ(s4, v4.y());
+	EXPECT_FLOAT_EQ(s4, v4.z());
+
+	simd_vec3::pack_type zeroeight(0.8f);
+	auto s5 = s4 / s1 + 0.8f;
+	simd_vec3 v5 = v4 / v1 + zeroeight;
+	EXPECT_FLOAT_EQ(s5, v5.x());
+	EXPECT_FLOAT_EQ(s5, v5.y());
+	EXPECT_FLOAT_EQ(s5, v5.z());
 }