xtensor_simd.hpp 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333
  1. /***************************************************************************
  2. * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht *
  3. * Copyright (c) QuantStack *
  4. * *
  5. * Distributed under the terms of the BSD 3-Clause License. *
  6. * *
  7. * The full license is in the file LICENSE, distributed with this software. *
  8. ****************************************************************************/
  9. #ifndef XTENSOR_SIMD_HPP
  10. #define XTENSOR_SIMD_HPP
  11. #include <vector>
  12. #include <xtl/xdynamic_bitset.hpp>
  13. #include "xutils.hpp"
  14. #ifdef XTENSOR_USE_XSIMD
  15. #include <xsimd/xsimd.hpp>
  16. // #include <xsimd/memory/xsimd_load_store.hpp>
  17. #if defined(_MSV_VER) && (_MSV_VER < 1910)
  18. template <class T, class A>
  19. inline xsimd::batch_bool<T, A> isnan(const xsimd::batch<T, A>& b)
  20. {
  21. return xsimd::isnan(b);
  22. }
  23. #endif
  24. namespace xt_simd
  25. {
  26. template <class T, std::size_t A>
  27. using aligned_allocator = xsimd::aligned_allocator<T, A>;
  28. using aligned_mode = xsimd::aligned_mode;
  29. using unaligned_mode = xsimd::unaligned_mode;
  30. template <class A>
  31. using allocator_alignment = xsimd::allocator_alignment<A>;
  32. template <class A>
  33. using allocator_alignment_t = xsimd::allocator_alignment_t<A>;
  34. template <class C>
  35. using container_alignment = xsimd::container_alignment<C>;
  36. template <class C>
  37. using container_alignment_t = xsimd::container_alignment_t<C>;
  38. template <class T>
  39. using simd_traits = xsimd::simd_traits<T>;
  40. template <class T>
  41. using revert_simd_traits = xsimd::revert_simd_traits<T>;
  42. template <class T>
  43. using simd_type = xsimd::simd_type<T>;
  44. template <class T>
  45. using simd_bool_type = xsimd::simd_bool_type<T>;
  46. template <class T>
  47. using revert_simd_type = xsimd::revert_simd_type<T>;
  48. template <class T1, class T2>
  49. using simd_return_type = xsimd::simd_return_type<T1, T2>;
  50. using xsimd::broadcast_as;
  51. using xsimd::get_alignment_offset;
  52. using xsimd::load_as;
  53. using xsimd::select;
  54. using xsimd::store_as;
  55. template <class V>
  56. using is_batch_bool = xsimd::is_batch_bool<V>;
  57. template <class V>
  58. using is_batch_complex = xsimd::is_batch_complex<V>;
  59. template <class T1, class T2>
  60. using simd_condition = xsimd::detail::simd_condition<T1, T2>;
  61. }
  62. #else // XTENSOR_USE_XSIMD
  63. namespace xt_simd
  64. {
  65. template <class T, std::size_t A>
  66. class aligned_allocator;
  67. struct aligned_mode
  68. {
  69. };
  70. struct unaligned_mode
  71. {
  72. };
  73. template <class A>
  74. struct allocator_alignment
  75. {
  76. using type = unaligned_mode;
  77. };
  78. template <class A>
  79. using allocator_alignment_t = typename allocator_alignment<A>::type;
  80. template <class C>
  81. struct container_alignment
  82. {
  83. using type = unaligned_mode;
  84. };
  85. template <class C>
  86. using container_alignment_t = typename container_alignment<C>::type;
  87. template <class T>
  88. struct simd_traits
  89. {
  90. using type = T;
  91. using bool_type = bool;
  92. using batch_bool = bool;
  93. static constexpr std::size_t size = 1;
  94. };
  95. template <class T>
  96. struct revert_simd_traits
  97. {
  98. using type = T;
  99. static constexpr std::size_t size = simd_traits<type>::size;
  100. };
  101. template <class T>
  102. using simd_type = typename simd_traits<T>::type;
  103. template <class T>
  104. using simd_bool_type = typename simd_traits<T>::bool_type;
  105. template <class T>
  106. using revert_simd_type = typename revert_simd_traits<T>::type;
  107. template <class R, class T>
  108. inline simd_type<R> broadcast_as(const T& value)
  109. {
  110. return value;
  111. }
  112. template <class R, class T>
  113. inline simd_type<R> load_as(const T* src, aligned_mode)
  114. {
  115. return *src;
  116. }
  117. template <class R, class T>
  118. inline simd_type<R> load_as(const T* src, unaligned_mode)
  119. {
  120. return *src;
  121. }
  122. template <class R, class T>
  123. inline void store_as(R* dst, const simd_type<T>& src, aligned_mode)
  124. {
  125. *dst = src;
  126. }
  127. template <class R, class T>
  128. inline void store_as(R* dst, const simd_type<T>& src, unaligned_mode)
  129. {
  130. *dst = src;
  131. }
  132. template <class T>
  133. inline T select(bool cond, const T& t1, const T& t2)
  134. {
  135. return cond ? t1 : t2;
  136. }
  137. template <class T>
  138. inline std::size_t get_alignment_offset(const T* /*p*/, std::size_t size, std::size_t /*block_size*/)
  139. {
  140. return size;
  141. }
  142. template <class T1, class T2>
  143. using simd_return_type = simd_type<T2>;
  144. template <class V>
  145. struct is_batch_bool : std::false_type
  146. {
  147. };
  148. template <class V>
  149. struct is_batch_complex : std::false_type
  150. {
  151. };
  152. template <class T1, class T2>
  153. struct simd_condition : std::true_type
  154. {
  155. };
  156. }
  157. #endif // XTENSOR_USE_XSIMD
  158. namespace xt
  159. {
  160. using xt_simd::aligned_mode;
  161. using xt_simd::unaligned_mode;
  162. struct inner_aligned_mode
  163. {
  164. };
  165. namespace detail
  166. {
  167. template <class A1, class A2>
  168. struct driven_align_mode_impl
  169. {
  170. using type = std::conditional_t<std::is_same<A1, A2>::value, A1, ::xt_simd::unaligned_mode>;
  171. };
  172. template <class A>
  173. struct driven_align_mode_impl<inner_aligned_mode, A>
  174. {
  175. using type = A;
  176. };
  177. }
  178. template <class A1, class A2>
  179. struct driven_align_mode
  180. {
  181. using type = typename detail::driven_align_mode_impl<A1, A2>::type;
  182. };
  183. template <class A1, class A2>
  184. using driven_align_mode_t = typename detail::driven_align_mode_impl<A1, A2>::type;
  185. namespace detail
  186. {
  187. template <class E, class T, class = void>
  188. struct has_load_simd : std::false_type
  189. {
  190. };
  191. template <class E, class T>
  192. struct has_load_simd<
  193. E,
  194. T,
  195. void_t<decltype(std::declval<E>().template load_simd<aligned_mode, T>(typename E::size_type(0)))>>
  196. : std::true_type
  197. {
  198. };
  199. template <class E, class T, bool B = xt_simd::simd_condition<typename E::value_type, T>::value>
  200. struct has_simd_interface_impl : has_load_simd<E, T>
  201. {
  202. };
  203. template <class E, class T>
  204. struct has_simd_interface_impl<E, T, false> : std::false_type
  205. {
  206. };
  207. }
  208. template <class E, class T = typename std::decay_t<E>::value_type>
  209. struct has_simd_interface : detail::has_simd_interface_impl<E, T>
  210. {
  211. };
  212. template <class T>
  213. struct has_simd_type : std::integral_constant<bool, !std::is_same<T, xt_simd::simd_type<T>>::value>
  214. {
  215. };
  216. namespace detail
  217. {
  218. template <class F, class B, class = void>
  219. struct has_simd_apply_impl : std::false_type
  220. {
  221. };
  222. template <class F, class B>
  223. struct has_simd_apply_impl<F, B, void_t<decltype(&F::template simd_apply<B>)>> : std::true_type
  224. {
  225. };
  226. }
  227. template <class F, class B>
  228. struct has_simd_apply : detail::has_simd_apply_impl<F, B>
  229. {
  230. };
  231. template <class T>
  232. using bool_load_type = std::conditional_t<std::is_same<T, bool>::value, uint8_t, T>;
  233. template <class T>
  234. struct forbid_simd : std::false_type
  235. {
  236. };
  237. template <class A>
  238. struct forbid_simd<std::vector<bool, A>> : std::true_type
  239. {
  240. };
  241. template <class A>
  242. struct forbid_simd<const std::vector<bool, A>> : std::true_type
  243. {
  244. };
  245. template <class B, class A>
  246. struct forbid_simd<xtl::xdynamic_bitset<B, A>> : std::true_type
  247. {
  248. };
  249. template <class B, class A>
  250. struct forbid_simd<const xtl::xdynamic_bitset<B, A>> : std::true_type
  251. {
  252. };
  253. template <class C, class T1, class T2>
  254. struct container_simd_return_type
  255. : std::enable_if<!forbid_simd<C>::value, xt_simd::simd_return_type<T1, bool_load_type<T2>>>
  256. {
  257. };
  258. template <class C, class T1, class T2>
  259. using container_simd_return_type_t = typename container_simd_return_type<C, T1, T2>::type;
  260. }
  261. #endif