| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367 |
- /***************************************************************************
- * Copyright (c) Johan Mabille, Sylvain Corlay and Wolf Vollprecht *
- * Copyright (c) QuantStack *
- * *
- * Distributed under the terms of the BSD 3-Clause License. *
- * *
- * The full license is in the file LICENSE, distributed with this software. *
- ****************************************************************************/
- #ifndef XTENSOR_ASSIGN_HPP
- #define XTENSOR_ASSIGN_HPP
- #include <algorithm>
- #include <functional>
- #include <type_traits>
- #include <utility>
- #include <xtl/xcomplex.hpp>
- #include <xtl/xsequence.hpp>
- #include "xexpression.hpp"
- #include "xfunction.hpp"
- #include "xiterator.hpp"
- #include "xstrides.hpp"
- #include "xtensor_config.hpp"
- #include "xtensor_forward.hpp"
- #include "xutils.hpp"
- #if defined(XTENSOR_USE_TBB)
- #include <tbb/tbb.h>
- #endif
- namespace xt
- {
- /********************
- * Assign functions *
- ********************/
- template <class E1, class E2>
- void assign_data(xexpression<E1>& e1, const xexpression<E2>& e2, bool trivial);
- template <class E1, class E2>
- void assign_xexpression(xexpression<E1>& e1, const xexpression<E2>& e2);
- template <class E1, class E2>
- void computed_assign(xexpression<E1>& e1, const xexpression<E2>& e2);
- template <class E1, class E2, class F>
- void scalar_computed_assign(xexpression<E1>& e1, const E2& e2, F&& f);
- template <class E1, class E2>
- void assert_compatible_shape(const xexpression<E1>& e1, const xexpression<E2>& e2);
- template <class E1, class E2>
- void strided_assign(E1& e1, const E2& e2, std::false_type /*disable*/);
- template <class E1, class E2>
- void strided_assign(E1& e1, const E2& e2, std::true_type /*enable*/);
- /************************
- * xexpression_assigner *
- ************************/
- template <class Tag>
- class xexpression_assigner_base;
- template <>
- class xexpression_assigner_base<xtensor_expression_tag>
- {
- public:
- template <class E1, class E2>
- static void assign_data(xexpression<E1>& e1, const xexpression<E2>& e2, bool trivial);
- };
- template <class Tag>
- class xexpression_assigner : public xexpression_assigner_base<Tag>
- {
- public:
- using base_type = xexpression_assigner_base<Tag>;
- template <class E1, class E2>
- static void assign_xexpression(E1& e1, const E2& e2);
- template <class E1, class E2>
- static void computed_assign(xexpression<E1>& e1, const xexpression<E2>& e2);
- template <class E1, class E2, class F>
- static void scalar_computed_assign(xexpression<E1>& e1, const E2& e2, F&& f);
- template <class E1, class E2>
- static void assert_compatible_shape(const xexpression<E1>& e1, const xexpression<E2>& e2);
- private:
- template <class E1, class E2>
- static bool resize(E1& e1, const E2& e2);
- template <class E1, class F, class... CT>
- static bool resize(E1& e1, const xfunction<F, CT...>& e2);
- };
- /********************
- * stepper_assigner *
- ********************/
- template <class E1, class E2, layout_type L>
- class stepper_assigner
- {
- public:
- using lhs_iterator = typename E1::stepper;
- using rhs_iterator = typename E2::const_stepper;
- using shape_type = typename E1::shape_type;
- using index_type = xindex_type_t<shape_type>;
- using size_type = typename lhs_iterator::size_type;
- using difference_type = typename lhs_iterator::difference_type;
- stepper_assigner(E1& e1, const E2& e2);
- void run();
- void step(size_type i);
- void step(size_type i, size_type n);
- void reset(size_type i);
- void to_end(layout_type);
- private:
- E1& m_e1;
- lhs_iterator m_lhs;
- rhs_iterator m_rhs;
- index_type m_index;
- };
- /*******************
- * linear_assigner *
- *******************/
- template <bool simd_assign>
- class linear_assigner
- {
- public:
- template <class E1, class E2>
- static void run(E1& e1, const E2& e2);
- };
- template <>
- class linear_assigner<false>
- {
- public:
- template <class E1, class E2>
- static void run(E1& e1, const E2& e2);
- private:
- template <class E1, class E2>
- static void run_impl(E1& e1, const E2& e2, std::true_type);
- template <class E1, class E2>
- static void run_impl(E1& e1, const E2& e2, std::false_type);
- };
- /*************************
- * strided_loop_assigner *
- *************************/
- namespace strided_assign_detail
- {
- struct loop_sizes_t
- {
- bool can_do_strided_assign;
- bool is_row_major;
- std::size_t inner_loop_size;
- std::size_t outer_loop_size;
- std::size_t cut;
- std::size_t dimension;
- };
- }
- template <bool simd>
- class strided_loop_assigner
- {
- public:
- using loop_sizes_t = strided_assign_detail::loop_sizes_t;
- // is_row_major, inner_loop_size, outer_loop_size, cut
- template <class E1, class E2>
- static void run(E1& e1, const E2& e2, const loop_sizes_t& loop_sizes);
- template <class E1, class E2>
- static loop_sizes_t get_loop_sizes(E1& e1, const E2& e2);
- template <class E1, class E2>
- static void run(E1& e1, const E2& e2);
- };
- /***********************************
- * Assign functions implementation *
- ***********************************/
- template <class E1, class E2>
- inline void assign_data(xexpression<E1>& e1, const xexpression<E2>& e2, bool trivial)
- {
- using tag = xexpression_tag_t<E1, E2>;
- xexpression_assigner<tag>::assign_data(e1, e2, trivial);
- }
- template <class E1, class E2>
- inline void assign_xexpression(xexpression<E1>& e1, const xexpression<E2>& e2)
- {
- xtl::mpl::static_if<has_assign_to<E1, E2>::value>(
- [&](auto self)
- {
- self(e2).derived_cast().assign_to(e1);
- },
- /*else*/
- [&](auto /*self*/)
- {
- using tag = xexpression_tag_t<E1, E2>;
- xexpression_assigner<tag>::assign_xexpression(e1, e2);
- }
- );
- }
- template <class E1, class E2>
- inline void computed_assign(xexpression<E1>& e1, const xexpression<E2>& e2)
- {
- using tag = xexpression_tag_t<E1, E2>;
- xexpression_assigner<tag>::computed_assign(e1, e2);
- }
- template <class E1, class E2, class F>
- inline void scalar_computed_assign(xexpression<E1>& e1, const E2& e2, F&& f)
- {
- using tag = xexpression_tag_t<E1, E2>;
- xexpression_assigner<tag>::scalar_computed_assign(e1, e2, std::forward<F>(f));
- }
- template <class E1, class E2>
- inline void assert_compatible_shape(const xexpression<E1>& e1, const xexpression<E2>& e2)
- {
- using tag = xexpression_tag_t<E1, E2>;
- xexpression_assigner<tag>::assert_compatible_shape(e1, e2);
- }
- /***************************************
- * xexpression_assigner implementation *
- ***************************************/
- namespace detail
- {
- template <class E1, class E2>
- constexpr bool linear_static_layout()
- {
- // A row_major or column_major container with a dimension <= 1 is computed as
- // layout any, leading to some performance improvements, for example when
- // assigning a col-major vector to a row-major vector etc
- return compute_layout(
- select_layout<E1::static_layout, typename E1::shape_type>::value,
- select_layout<E2::static_layout, typename E2::shape_type>::value
- )
- != layout_type::dynamic;
- }
- template <class E1, class E2>
- inline auto is_linear_assign(const E1& e1, const E2& e2)
- -> std::enable_if_t<has_strides<E1>::value, bool>
- {
- return (E1::contiguous_layout && E2::contiguous_layout && linear_static_layout<E1, E2>())
- || (e1.is_contiguous() && e2.has_linear_assign(e1.strides()));
- }
- template <class E1, class E2>
- inline auto is_linear_assign(const E1&, const E2&) -> std::enable_if_t<!has_strides<E1>::value, bool>
- {
- return false;
- }
- template <class E1, class E2>
- inline bool linear_dynamic_layout(const E1& e1, const E2& e2)
- {
- return e1.is_contiguous() && e2.is_contiguous()
- && compute_layout(e1.layout(), e2.layout()) != layout_type::dynamic;
- }
- template <class E, class = void>
- struct has_step_leading : std::false_type
- {
- };
- template <class E>
- struct has_step_leading<E, void_t<decltype(std::declval<E>().step_leading())>> : std::true_type
- {
- };
- template <class T>
- struct use_strided_loop
- {
- static constexpr bool stepper_deref()
- {
- return std::is_reference<typename T::stepper::reference>::value;
- }
- static constexpr bool value = has_strides<T>::value
- && has_step_leading<typename T::stepper>::value && stepper_deref();
- };
- template <class T>
- struct use_strided_loop<xscalar<T>>
- {
- static constexpr bool value = true;
- };
- template <class F, class... CT>
- struct use_strided_loop<xfunction<F, CT...>>
- {
- static constexpr bool value = xtl::conjunction<use_strided_loop<std::decay_t<CT>>...>::value;
- };
- /**
- * Considering the assignment LHS = RHS, if the requested value type used for
- * loading simd from RHS is not complex while LHS value_type is complex,
- * the assignment fails. The reason is that SIMD batches of complex values cannot
- * be implicitly instantiated from batches of scalar values.
- * Making the constructor implicit does not fix the issue since in the end,
- * the assignment is done with vec.store(buffer) where vec is a batch of scalars
- * and buffer an array of complex. SIMD batches of scalars do not provide overloads
- * of store that accept buffer of complex values and that SHOULD NOT CHANGE.
- * Load and store overloads must accept SCALAR BUFFERS ONLY.
- * Therefore, the solution is to explicitly force the instantiation of complex
- * batches in the assignment mechanism. A common situation that triggers this
- * issue is:
- * xt::xarray<double> rhs = { 1, 2, 3 };
- * xt::xarray<std::complex<double>> lhs = rhs;
- */
- template <class T1, class T2>
- struct conditional_promote_to_complex
- {
- static constexpr bool cond = xtl::is_gen_complex<T1>::value && !xtl::is_gen_complex<T2>::value;
- // Alternative: use std::complex<T2> or xcomplex<T2, T2, bool> depending on T1
- using type = std::conditional_t<cond, T1, T2>;
- };
- template <class T1, class T2>
- using conditional_promote_to_complex_t = typename conditional_promote_to_complex<T1, T2>::type;
- }
- template <class E1, class E2>
- class xassign_traits
- {
- private:
- using e1_value_type = typename E1::value_type;
- using e2_value_type = typename E2::value_type;
- template <class T>
- using is_bool = std::is_same<T, bool>;
- static constexpr bool is_bool_conversion()
- {
- return is_bool<e2_value_type>::value && !is_bool<e1_value_type>::value;
- }
- static constexpr bool contiguous_layout()
- {
- return E1::contiguous_layout && E2::contiguous_layout;
- }
- static constexpr bool convertible_types()
- {
- return std::is_convertible<e2_value_type, e1_value_type>::value && !is_bool_conversion();
- }
- static constexpr bool use_xsimd()
- {
- return xt_simd::simd_traits<int8_t>::size > 1;
- }
- template <class T>
- static constexpr bool simd_size_impl()
- {
- return xt_simd::simd_traits<T>::size > 1 || (is_bool<T>::value && use_xsimd());
- }
- static constexpr bool simd_size()
- {
- return simd_size_impl<e1_value_type>() && simd_size_impl<e2_value_type>();
- }
- static constexpr bool simd_interface()
- {
- return has_simd_interface<E1, requested_value_type>()
- && has_simd_interface<E2, requested_value_type>();
- }
- public:
- // constexpr methods instead of constexpr data members avoid the need of definitions at namespace
- // scope of these data members (since they are odr-used).
- static constexpr bool simd_assign()
- {
- return convertible_types() && simd_size() && simd_interface();
- }
- static constexpr bool linear_assign(const E1& e1, const E2& e2, bool trivial)
- {
- return trivial && detail::is_linear_assign(e1, e2);
- }
- static constexpr bool strided_assign()
- {
- return detail::use_strided_loop<E1>::value && detail::use_strided_loop<E2>::value;
- }
- static constexpr bool simd_linear_assign()
- {
- return contiguous_layout() && simd_assign();
- }
- static constexpr bool simd_strided_assign()
- {
- return strided_assign() && simd_assign();
- }
- static constexpr bool simd_linear_assign(const E1& e1, const E2& e2)
- {
- return simd_assign() && detail::linear_dynamic_layout(e1, e2);
- }
- using e2_requested_value_type = std::
- conditional_t<is_bool<e2_value_type>::value, typename E2::bool_load_type, e2_value_type>;
- using requested_value_type = detail::conditional_promote_to_complex_t<e1_value_type, e2_requested_value_type>;
- };
- template <class E1, class E2>
- inline void xexpression_assigner_base<xtensor_expression_tag>::assign_data(
- xexpression<E1>& e1,
- const xexpression<E2>& e2,
- bool trivial
- )
- {
- E1& de1 = e1.derived_cast();
- const E2& de2 = e2.derived_cast();
- using traits = xassign_traits<E1, E2>;
- bool linear_assign = traits::linear_assign(de1, de2, trivial);
- constexpr bool simd_assign = traits::simd_assign();
- constexpr bool simd_linear_assign = traits::simd_linear_assign();
- constexpr bool simd_strided_assign = traits::simd_strided_assign();
- if (linear_assign)
- {
- if (simd_linear_assign || traits::simd_linear_assign(de1, de2))
- {
- // Do not use linear_assigner<true> here since it will make the compiler
- // instantiate this branch even if the runtime condition is false, resulting
- // in compilation error for expressions that do not provide a SIMD interface.
- // simd_assign is true if simd_linear_assign() or simd_linear_assign(de1, de2)
- // is true.
- linear_assigner<simd_assign>::run(de1, de2);
- }
- else
- {
- linear_assigner<false>::run(de1, de2);
- }
- }
- else if (simd_strided_assign)
- {
- strided_loop_assigner<simd_strided_assign>::run(de1, de2);
- }
- else
- {
- stepper_assigner<E1, E2, default_assignable_layout(E1::static_layout)>(de1, de2).run();
- }
- }
- template <class Tag>
- template <class E1, class E2>
- inline void xexpression_assigner<Tag>::assign_xexpression(E1& e1, const E2& e2)
- {
- bool trivial_broadcast = resize(e1.derived_cast(), e2.derived_cast());
- base_type::assign_data(e1, e2, trivial_broadcast);
- }
- template <class Tag>
- template <class E1, class E2>
- inline void xexpression_assigner<Tag>::computed_assign(xexpression<E1>& e1, const xexpression<E2>& e2)
- {
- using shape_type = typename E1::shape_type;
- using comperator_type = std::greater<typename shape_type::value_type>;
- using size_type = typename E1::size_type;
- E1& de1 = e1.derived_cast();
- const E2& de2 = e2.derived_cast();
- size_type dim2 = de2.dimension();
- shape_type shape = uninitialized_shape<shape_type>(dim2);
- bool trivial_broadcast = de2.broadcast_shape(shape, true);
- auto&& de1_shape = de1.shape();
- if (dim2 > de1.dimension()
- || std::lexicographical_compare(
- shape.begin(),
- shape.end(),
- de1_shape.begin(),
- de1_shape.end(),
- comperator_type()
- ))
- {
- typename E1::temporary_type tmp(shape);
- base_type::assign_data(tmp, e2, trivial_broadcast);
- de1.assign_temporary(std::move(tmp));
- }
- else
- {
- base_type::assign_data(e1, e2, trivial_broadcast);
- }
- }
- template <class Tag>
- template <class E1, class E2, class F>
- inline void xexpression_assigner<Tag>::scalar_computed_assign(xexpression<E1>& e1, const E2& e2, F&& f)
- {
- E1& d = e1.derived_cast();
- using size_type = typename E1::size_type;
- auto dst = d.storage().begin();
- for (size_type i = d.size(); i > 0; --i)
- {
- *dst = f(*dst, e2);
- ++dst;
- }
- }
- template <class Tag>
- template <class E1, class E2>
- inline void
- xexpression_assigner<Tag>::assert_compatible_shape(const xexpression<E1>& e1, const xexpression<E2>& e2)
- {
- const E1& de1 = e1.derived_cast();
- const E2& de2 = e2.derived_cast();
- if (!broadcastable(de2.shape(), de1.shape()))
- {
- throw_broadcast_error(de2.shape(), de1.shape());
- }
- }
- namespace detail
- {
- template <bool B, class... CT>
- struct static_trivial_broadcast;
- template <class... CT>
- struct static_trivial_broadcast<true, CT...>
- {
- static constexpr bool value = detail::promote_index<typename std::decay_t<CT>::shape_type...>::value;
- };
- template <class... CT>
- struct static_trivial_broadcast<false, CT...>
- {
- static constexpr bool value = false;
- };
- }
- template <class Tag>
- template <class E1, class E2>
- inline bool xexpression_assigner<Tag>::resize(E1& e1, const E2& e2)
- {
- // If our RHS is not a xfunction, we know that the RHS is at least potentially trivial
- // We check the strides of the RHS in detail::is_trivial_broadcast to see if they match up!
- // So we can skip a shape copy and a call to broadcast_shape(...)
- e1.resize(e2.shape());
- return true;
- }
- template <class Tag>
- template <class E1, class F, class... CT>
- inline bool xexpression_assigner<Tag>::resize(E1& e1, const xfunction<F, CT...>& e2)
- {
- return xtl::mpl::static_if<detail::is_fixed<typename xfunction<F, CT...>::shape_type>::value>(
- [&](auto /*self*/)
- {
- /*
- * If the shape of the xfunction is statically known, we can compute the broadcast triviality
- * at compile time plus we can resize right away.
- */
- // resize in case LHS is not a fixed size container. If it is, this is a NOP
- e1.resize(typename xfunction<F, CT...>::shape_type{});
- return detail::static_trivial_broadcast<
- detail::is_fixed<typename xfunction<F, CT...>::shape_type>::value,
- CT...>::value;
- },
- /* else */
- [&](auto /*self*/)
- {
- using index_type = xindex_type_t<typename E1::shape_type>;
- using size_type = typename E1::size_type;
- size_type size = e2.dimension();
- index_type shape = uninitialized_shape<index_type>(size);
- bool trivial_broadcast = e2.broadcast_shape(shape, true);
- e1.resize(std::move(shape));
- return trivial_broadcast;
- }
- );
- }
- /***********************************
- * stepper_assigner implementation *
- ***********************************/
- template <class FROM, class TO>
- struct is_narrowing_conversion
- {
- using argument_type = std::decay_t<FROM>;
- using result_type = std::decay_t<TO>;
- static const bool value = xtl::is_arithmetic<result_type>::value
- && (sizeof(result_type) < sizeof(argument_type)
- || (xtl::is_integral<result_type>::value
- && std::is_floating_point<argument_type>::value));
- };
- template <class FROM, class TO>
- struct has_sign_conversion
- {
- using argument_type = std::decay_t<FROM>;
- using result_type = std::decay_t<TO>;
- static const bool value = xtl::is_signed<argument_type>::value != xtl::is_signed<result_type>::value;
- };
- template <class FROM, class TO>
- struct has_assign_conversion
- {
- using argument_type = std::decay_t<FROM>;
- using result_type = std::decay_t<TO>;
- static const bool value = is_narrowing_conversion<argument_type, result_type>::value
- || has_sign_conversion<argument_type, result_type>::value;
- };
- template <class E1, class E2, layout_type L>
- inline stepper_assigner<E1, E2, L>::stepper_assigner(E1& e1, const E2& e2)
- : m_e1(e1)
- , m_lhs(e1.stepper_begin(e1.shape()))
- , m_rhs(e2.stepper_begin(e1.shape()))
- , m_index(xtl::make_sequence<index_type>(e1.shape().size(), size_type(0)))
- {
- }
- template <class E1, class E2, layout_type L>
- inline void stepper_assigner<E1, E2, L>::run()
- {
- using tmp_size_type = typename E1::size_type;
- using argument_type = std::decay_t<decltype(*m_rhs)>;
- using result_type = std::decay_t<decltype(*m_lhs)>;
- constexpr bool needs_cast = has_assign_conversion<argument_type, result_type>::value;
- tmp_size_type s = m_e1.size();
- for (tmp_size_type i = 0; i < s; ++i)
- {
- *m_lhs = conditional_cast<needs_cast, result_type>(*m_rhs);
- stepper_tools<L>::increment_stepper(*this, m_index, m_e1.shape());
- }
- }
- template <class E1, class E2, layout_type L>
- inline void stepper_assigner<E1, E2, L>::step(size_type i)
- {
- m_lhs.step(i);
- m_rhs.step(i);
- }
- template <class E1, class E2, layout_type L>
- inline void stepper_assigner<E1, E2, L>::step(size_type i, size_type n)
- {
- m_lhs.step(i, n);
- m_rhs.step(i, n);
- }
- template <class E1, class E2, layout_type L>
- inline void stepper_assigner<E1, E2, L>::reset(size_type i)
- {
- m_lhs.reset(i);
- m_rhs.reset(i);
- }
- template <class E1, class E2, layout_type L>
- inline void stepper_assigner<E1, E2, L>::to_end(layout_type l)
- {
- m_lhs.to_end(l);
- m_rhs.to_end(l);
- }
- /**********************************
- * linear_assigner implementation *
- **********************************/
- template <bool simd_assign>
- template <class E1, class E2>
- inline void linear_assigner<simd_assign>::run(E1& e1, const E2& e2)
- {
- using lhs_align_mode = xt_simd::container_alignment_t<E1>;
- constexpr bool is_aligned = std::is_same<lhs_align_mode, aligned_mode>::value;
- using rhs_align_mode = std::conditional_t<is_aligned, inner_aligned_mode, unaligned_mode>;
- using e1_value_type = typename E1::value_type;
- using e2_value_type = typename E2::value_type;
- using value_type = typename xassign_traits<E1, E2>::requested_value_type;
- using simd_type = xt_simd::simd_type<value_type>;
- using size_type = typename E1::size_type;
- size_type size = e1.size();
- constexpr size_type simd_size = simd_type::size;
- constexpr bool needs_cast = has_assign_conversion<e1_value_type, e2_value_type>::value;
- size_type align_begin = is_aligned ? 0 : xt_simd::get_alignment_offset(e1.data(), size, simd_size);
- size_type align_end = align_begin + ((size - align_begin) & ~(simd_size - 1));
- for (size_type i = 0; i < align_begin; ++i)
- {
- e1.data_element(i) = conditional_cast<needs_cast, e1_value_type>(e2.data_element(i));
- }
- #if defined(XTENSOR_USE_TBB)
- if (size >= XTENSOR_TBB_THRESHOLD)
- {
- tbb::static_partitioner ap;
- tbb::parallel_for(
- align_begin,
- align_end,
- simd_size,
- [&e1, &e2](size_t i)
- {
- e1.template store_simd<lhs_align_mode>(
- i,
- e2.template load_simd<rhs_align_mode, value_type>(i)
- );
- },
- ap
- );
- }
- else
- {
- for (size_type i = align_begin; i < align_end; i += simd_size)
- {
- e1.template store_simd<lhs_align_mode>(i, e2.template load_simd<rhs_align_mode, value_type>(i));
- }
- }
- #elif defined(XTENSOR_USE_OPENMP)
- if (size >= size_type(XTENSOR_OPENMP_TRESHOLD))
- {
- #pragma omp parallel for default(none) shared(align_begin, align_end, e1, e2)
- #ifndef _WIN32
- for (size_type i = align_begin; i < align_end; i += simd_size)
- {
- e1.template store_simd<lhs_align_mode>(i, e2.template load_simd<rhs_align_mode, value_type>(i));
- }
- #else
- for (auto i = static_cast<std::ptrdiff_t>(align_begin); i < static_cast<std::ptrdiff_t>(align_end);
- i += static_cast<std::ptrdiff_t>(simd_size))
- {
- size_type ui = static_cast<size_type>(i);
- e1.template store_simd<lhs_align_mode>(ui, e2.template load_simd<rhs_align_mode, value_type>(ui));
- }
- #endif
- }
- else
- {
- for (size_type i = align_begin; i < align_end; i += simd_size)
- {
- e1.template store_simd<lhs_align_mode>(i, e2.template load_simd<rhs_align_mode, value_type>(i));
- }
- }
- #else
- for (size_type i = align_begin; i < align_end; i += simd_size)
- {
- e1.template store_simd<lhs_align_mode>(i, e2.template load_simd<rhs_align_mode, value_type>(i));
- }
- #endif
- for (size_type i = align_end; i < size; ++i)
- {
- e1.data_element(i) = conditional_cast<needs_cast, e1_value_type>(e2.data_element(i));
- }
- }
- template <class E1, class E2>
- inline void linear_assigner<false>::run(E1& e1, const E2& e2)
- {
- using is_convertible = std::
- is_convertible<typename std::decay_t<E2>::value_type, typename std::decay_t<E1>::value_type>;
- // If the types are not compatible, this function is still instantiated but never called.
- // To avoid compilation problems in effectively unused code trivial_assigner_run_impl is
- // empty in this case.
- run_impl(e1, e2, is_convertible());
- }
- template <class E1, class E2>
- inline void linear_assigner<false>::run_impl(E1& e1, const E2& e2, std::true_type /*is_convertible*/)
- {
- using value_type = typename E1::value_type;
- using size_type = typename E1::size_type;
- auto src = linear_begin(e2);
- auto dst = linear_begin(e1);
- size_type n = e1.size();
- #if defined(XTENSOR_USE_TBB)
- tbb::static_partitioner sp;
- tbb::parallel_for(
- std::ptrdiff_t(0),
- static_cast<std::ptrdiff_t>(n),
- [&](std::ptrdiff_t i)
- {
- *(dst + i) = static_cast<value_type>(*(src + i));
- },
- sp
- );
- #elif defined(XTENSOR_USE_OPENMP)
- if (n >= XTENSOR_OPENMP_TRESHOLD)
- {
- #pragma omp parallel for default(none) shared(src, dst, n)
- for (std::ptrdiff_t i = std::ptrdiff_t(0); i < static_cast<std::ptrdiff_t>(n); i++)
- {
- *(dst + i) = static_cast<value_type>(*(src + i));
- }
- }
- else
- {
- for (; n > size_type(0); --n)
- {
- *dst = static_cast<value_type>(*src);
- ++src;
- ++dst;
- }
- }
- #else
- for (; n > size_type(0); --n)
- {
- *dst = static_cast<value_type>(*src);
- ++src;
- ++dst;
- }
- #endif
- }
- template <class E1, class E2>
- inline void linear_assigner<false>::run_impl(E1&, const E2&, std::false_type /*is_convertible*/)
- {
- XTENSOR_PRECONDITION(false, "Internal error: linear_assigner called with unrelated types.");
- }
- /****************************************
- * strided_loop_assigner implementation *
- ****************************************/
- namespace strided_assign_detail
- {
- template <layout_type layout>
- struct idx_tools;
- template <>
- struct idx_tools<layout_type::row_major>
- {
- template <class T>
- static void next_idx(T& outer_index, T& outer_shape)
- {
- auto i = outer_index.size();
- for (; i > 0; --i)
- {
- if (outer_index[i - 1] + 1 >= outer_shape[i - 1])
- {
- outer_index[i - 1] = 0;
- }
- else
- {
- outer_index[i - 1]++;
- break;
- }
- }
- }
- template <class T>
- static void nth_idx(size_t n, T& outer_index, const T& outer_shape)
- {
- dynamic_shape<std::size_t> stride_sizes;
- xt::resize_container(stride_sizes, outer_shape.size());
- // compute strides
- using size_type = typename T::size_type;
- for (size_type i = outer_shape.size(); i > 0; i--)
- {
- stride_sizes[i - 1] = (i == outer_shape.size()) ? 1 : stride_sizes[i] * outer_shape[i];
- }
- // compute index
- for (size_type i = 0; i < outer_shape.size(); i++)
- {
- auto d_idx = n / stride_sizes[i];
- outer_index[i] = d_idx;
- n -= d_idx * stride_sizes[i];
- }
- }
- };
- template <>
- struct idx_tools<layout_type::column_major>
- {
- template <class T>
- static void next_idx(T& outer_index, T& outer_shape)
- {
- using size_type = typename T::size_type;
- size_type i = 0;
- auto sz = outer_index.size();
- for (; i < sz; ++i)
- {
- if (outer_index[i] + 1 >= outer_shape[i])
- {
- outer_index[i] = 0;
- }
- else
- {
- outer_index[i]++;
- break;
- }
- }
- }
- template <class T>
- static void nth_idx(size_t n, T& outer_index, const T& outer_shape)
- {
- dynamic_shape<std::size_t> stride_sizes;
- xt::resize_container(stride_sizes, outer_shape.size());
- using size_type = typename T::size_type;
- // compute required strides
- for (size_type i = 0; i < outer_shape.size(); i++)
- {
- stride_sizes[i] = (i == 0) ? 1 : stride_sizes[i - 1] * outer_shape[i - 1];
- }
- // compute index
- for (size_type i = outer_shape.size(); i > 0;)
- {
- i--;
- auto d_idx = n / stride_sizes[i];
- outer_index[i] = d_idx;
- n -= d_idx * stride_sizes[i];
- }
- }
- };
- template <layout_type L, class S>
- struct check_strides_functor
- {
- using strides_type = S;
- check_strides_functor(const S& strides)
- : m_cut(L == layout_type::row_major ? 0 : strides.size())
- , m_strides(strides)
- {
- }
- template <class T, layout_type LE = L>
- std::enable_if_t<LE == layout_type::row_major, std::size_t> operator()(const T& el)
- {
- // All dimenions less than var have differing strides
- auto var = check_strides_overlap<layout_type::row_major>::get(m_strides, el.strides());
- if (var > m_cut)
- {
- m_cut = var;
- }
- return m_cut;
- }
- template <class T, layout_type LE = L>
- std::enable_if_t<LE == layout_type::column_major, std::size_t> operator()(const T& el)
- {
- auto var = check_strides_overlap<layout_type::column_major>::get(m_strides, el.strides());
- // All dimensions >= var have differing strides
- if (var < m_cut)
- {
- m_cut = var;
- }
- return m_cut;
- }
- template <class T>
- std::size_t operator()(const xt::xscalar<T>& /*el*/)
- {
- return m_cut;
- }
- template <class F, class... CT>
- std::size_t operator()(const xt::xfunction<F, CT...>& xf)
- {
- xt::for_each(*this, xf.arguments());
- return m_cut;
- }
- private:
- std::size_t m_cut;
- const strides_type& m_strides;
- };
- template <bool possible = true, class E1, class E2, std::enable_if_t<!has_strides<E1>::value || !possible, bool> = true>
- loop_sizes_t get_loop_sizes(const E1& e1, const E2&)
- {
- return {false, true, 1, e1.size(), e1.dimension(), e1.dimension()};
- }
- template <bool possible = true, class E1, class E2, std::enable_if_t<has_strides<E1>::value && possible, bool> = true>
- loop_sizes_t get_loop_sizes(const E1& e1, const E2& e2)
- {
- using shape_value_type = typename E1::shape_type::value_type;
- bool is_row_major = true;
- // Try to find a row-major scheme first, where the outer loop is on the first N = `cut`
- // dimensions, and the inner loop is
- is_row_major = true;
- auto is_zero = [](auto i)
- {
- return i == 0;
- };
- auto&& strides = e1.strides();
- auto it_bwd = std::find_if_not(strides.rbegin(), strides.rend(), is_zero);
- bool de1_row_contiguous = it_bwd != strides.rend() && *it_bwd == 1;
- auto it_fwd = std::find_if_not(strides.begin(), strides.end(), is_zero);
- bool de1_col_contiguous = it_fwd != strides.end() && *it_fwd == 1;
- if (de1_row_contiguous)
- {
- is_row_major = true;
- }
- else if (de1_col_contiguous)
- {
- is_row_major = false;
- }
- else
- {
- // No strided loop possible.
- return {false, true, 1, e1.size(), e1.dimension(), e1.dimension()};
- }
- // Cut is the number of dimensions in the outer loop
- std::size_t cut = 0;
- if (is_row_major)
- {
- auto csf = check_strides_functor<layout_type::row_major, decltype(e1.strides())>(e1.strides());
- cut = csf(e2);
- // This makes that only one dimension will be treated in the inner loop.
- if (cut < e1.strides().size() - 1)
- {
- // Only make the inner loop go over one dimension by default for now
- cut = e1.strides().size() - 1;
- }
- }
- else if (!is_row_major)
- {
- auto csf = check_strides_functor<layout_type::column_major, decltype(e1.strides())>(e1.strides()
- );
- cut = csf(e2);
- if (cut > 1)
- {
- // Only make the inner loop go over one dimension by default for now
- cut = 1;
- }
- } // can't reach here because this would have already triggered the fallback
- std::size_t outer_loop_size = static_cast<std::size_t>(std::accumulate(
- e1.shape().begin(),
- e1.shape().begin() + static_cast<std::ptrdiff_t>(cut),
- shape_value_type(1),
- std::multiplies<shape_value_type>{}
- ));
- std::size_t inner_loop_size = static_cast<std::size_t>(std::accumulate(
- e1.shape().begin() + static_cast<std::ptrdiff_t>(cut),
- e1.shape().end(),
- shape_value_type(1),
- std::multiplies<shape_value_type>{}
- ));
- if (!is_row_major)
- {
- std::swap(outer_loop_size, inner_loop_size);
- }
- return {inner_loop_size > 1, is_row_major, inner_loop_size, outer_loop_size, cut, e1.dimension()};
- }
- }
- template <bool simd>
- template <class E1, class E2>
- inline strided_assign_detail::loop_sizes_t strided_loop_assigner<simd>::get_loop_sizes(E1& e1, const E2& e2)
- {
- return strided_assign_detail::get_loop_sizes<simd>(e1, e2);
- }
- #define strided_parallel_assign
- template <bool simd>
- template <class E1, class E2>
- inline void strided_loop_assigner<simd>::run(E1& e1, const E2& e2, const loop_sizes_t& loop_sizes)
- {
- bool is_row_major = loop_sizes.is_row_major;
- std::size_t inner_loop_size = loop_sizes.inner_loop_size;
- std::size_t outer_loop_size = loop_sizes.outer_loop_size;
- std::size_t cut = loop_sizes.cut;
- // TODO can we get rid of this and use `shape_type`?
- dynamic_shape<std::size_t> idx, max_shape;
- if (is_row_major)
- {
- xt::resize_container(idx, cut);
- max_shape.assign(e1.shape().begin(), e1.shape().begin() + static_cast<std::ptrdiff_t>(cut));
- }
- else
- {
- xt::resize_container(idx, e1.shape().size() - cut);
- max_shape.assign(e1.shape().begin() + static_cast<std::ptrdiff_t>(cut), e1.shape().end());
- }
- // add this when we have std::array index!
- // std::fill(idx.begin(), idx.end(), 0);
- using e1_value_type = typename E1::value_type;
- using e2_value_type = typename E2::value_type;
- constexpr bool needs_cast = has_assign_conversion<e1_value_type, e2_value_type>::value;
- using value_type = typename xassign_traits<E1, E2>::requested_value_type;
- using simd_type = std::conditional_t<
- std::is_same<e1_value_type, bool>::value,
- xt_simd::simd_bool_type<value_type>,
- xt_simd::simd_type<value_type>>;
- std::size_t simd_size = inner_loop_size / simd_type::size;
- std::size_t simd_rest = inner_loop_size % simd_type::size;
- auto fct_stepper = e2.stepper_begin(e1.shape());
- auto res_stepper = e1.stepper_begin(e1.shape());
- // TODO in 1D case this is ambiguous -- could be RM or CM.
- // Use default layout to make decision
- std::size_t step_dim = 0;
- if (!is_row_major) // row major case
- {
- step_dim = cut;
- }
- #if defined(XTENSOR_USE_OPENMP) && defined(strided_parallel_assign)
- if (outer_loop_size >= XTENSOR_OPENMP_TRESHOLD / inner_loop_size)
- {
- std::size_t first_step = true;
- #pragma omp parallel for schedule(static) firstprivate(first_step, fct_stepper, res_stepper, idx)
- for (std::size_t ox = 0; ox < outer_loop_size; ++ox)
- {
- if (first_step)
- {
- is_row_major
- ? strided_assign_detail::idx_tools<layout_type::row_major>::nth_idx(ox, idx, max_shape)
- : strided_assign_detail::idx_tools<layout_type::column_major>::nth_idx(ox, idx, max_shape);
- for (std::size_t i = 0; i < idx.size(); ++i)
- {
- fct_stepper.step(i + step_dim, idx[i]);
- res_stepper.step(i + step_dim, idx[i]);
- }
- first_step = false;
- }
- for (std::size_t i = 0; i < simd_size; ++i)
- {
- res_stepper.template store_simd(fct_stepper.template step_simd<value_type>());
- }
- for (std::size_t i = 0; i < simd_rest; ++i)
- {
- *(res_stepper) = conditional_cast<needs_cast, e1_value_type>(*(fct_stepper));
- res_stepper.step_leading();
- fct_stepper.step_leading();
- }
- // next unaligned index
- is_row_major
- ? strided_assign_detail::idx_tools<layout_type::row_major>::next_idx(idx, max_shape)
- : strided_assign_detail::idx_tools<layout_type::column_major>::next_idx(idx, max_shape);
- fct_stepper.to_begin();
- // need to step E1 as well if not contigous assign (e.g. view)
- if (!E1::contiguous_layout)
- {
- res_stepper.to_begin();
- for (std::size_t i = 0; i < idx.size(); ++i)
- {
- fct_stepper.step(i + step_dim, idx[i]);
- res_stepper.step(i + step_dim, idx[i]);
- }
- }
- else
- {
- for (std::size_t i = 0; i < idx.size(); ++i)
- {
- fct_stepper.step(i + step_dim, idx[i]);
- }
- }
- }
- }
- else
- {
- #elif defined(strided_parallel_assign) && defined(XTENSOR_USE_TBB)
- if (outer_loop_size > XTENSOR_TBB_THRESHOLD / inner_loop_size)
- {
- tbb::static_partitioner sp;
- tbb::parallel_for(
- tbb::blocked_range<size_t>(0ul, outer_loop_size),
- [&e1, &e2, is_row_major, step_dim, simd_size, simd_rest, &max_shape, &idx_ = idx](
- const tbb::blocked_range<size_t>& r
- )
- {
- auto idx = idx_;
- auto fct_stepper = e2.stepper_begin(e1.shape());
- auto res_stepper = e1.stepper_begin(e1.shape());
- std::size_t first_step = true;
- // #pragma omp parallel for schedule(static) firstprivate(first_step, fct_stepper,
- // res_stepper, idx)
- for (std::size_t ox = r.begin(); ox < r.end(); ++ox)
- {
- if (first_step)
- {
- is_row_major
- ? strided_assign_detail::idx_tools<layout_type::row_major>::nth_idx(ox, idx, max_shape)
- : strided_assign_detail::idx_tools<layout_type::column_major>::nth_idx(
- ox,
- idx,
- max_shape
- );
- for (std::size_t i = 0; i < idx.size(); ++i)
- {
- fct_stepper.step(i + step_dim, idx[i]);
- res_stepper.step(i + step_dim, idx[i]);
- }
- first_step = false;
- }
- for (std::size_t i = 0; i < simd_size; ++i)
- {
- res_stepper.template store_simd(fct_stepper.template step_simd<value_type>());
- }
- for (std::size_t i = 0; i < simd_rest; ++i)
- {
- *(res_stepper) = conditional_cast<needs_cast, e1_value_type>(*(fct_stepper));
- res_stepper.step_leading();
- fct_stepper.step_leading();
- }
- // next unaligned index
- is_row_major
- ? strided_assign_detail::idx_tools<layout_type::row_major>::next_idx(idx, max_shape)
- : strided_assign_detail::idx_tools<layout_type::column_major>::next_idx(idx, max_shape);
- fct_stepper.to_begin();
- // need to step E1 as well if not contigous assign (e.g. view)
- if (!E1::contiguous_layout)
- {
- res_stepper.to_begin();
- for (std::size_t i = 0; i < idx.size(); ++i)
- {
- fct_stepper.step(i + step_dim, idx[i]);
- res_stepper.step(i + step_dim, idx[i]);
- }
- }
- else
- {
- for (std::size_t i = 0; i < idx.size(); ++i)
- {
- fct_stepper.step(i + step_dim, idx[i]);
- }
- }
- }
- },
- sp
- );
- }
- else
- {
- #endif
- for (std::size_t ox = 0; ox < outer_loop_size; ++ox)
- {
- for (std::size_t i = 0; i < simd_size; ++i)
- {
- res_stepper.store_simd(fct_stepper.template step_simd<value_type>());
- }
- for (std::size_t i = 0; i < simd_rest; ++i)
- {
- *(res_stepper) = conditional_cast<needs_cast, e1_value_type>(*(fct_stepper));
- res_stepper.step_leading();
- fct_stepper.step_leading();
- }
- is_row_major
- ? strided_assign_detail::idx_tools<layout_type::row_major>::next_idx(idx, max_shape)
- : strided_assign_detail::idx_tools<layout_type::column_major>::next_idx(idx, max_shape);
- fct_stepper.to_begin();
- // need to step E1 as well if not contigous assign (e.g. view)
- if (!E1::contiguous_layout)
- {
- res_stepper.to_begin();
- for (std::size_t i = 0; i < idx.size(); ++i)
- {
- fct_stepper.step(i + step_dim, idx[i]);
- res_stepper.step(i + step_dim, idx[i]);
- }
- }
- else
- {
- for (std::size_t i = 0; i < idx.size(); ++i)
- {
- fct_stepper.step(i + step_dim, idx[i]);
- }
- }
- }
- #if (defined(XTENSOR_USE_OPENMP) || defined(XTENSOR_USE_TBB)) && defined(strided_parallel_assign)
- }
- #endif
- }
- template <>
- template <class E1, class E2>
- inline void strided_loop_assigner<true>::run(E1& e1, const E2& e2)
- {
- strided_assign_detail::loop_sizes_t loop_sizes = strided_loop_assigner<true>::get_loop_sizes(e1, e2);
- if (loop_sizes.can_do_strided_assign)
- {
- run(e1, e2, loop_sizes);
- }
- else
- {
- // trigger the fallback assigner
- stepper_assigner<E1, E2, default_assignable_layout(E1::static_layout)>(e1, e2).run();
- }
- }
- template <>
- template <class E1, class E2>
- inline void strided_loop_assigner<false>::run(E1& /*e1*/, const E2& /*e2*/, const loop_sizes_t&)
- {
- }
- template <>
- template <class E1, class E2>
- inline void strided_loop_assigner<false>::run(E1& e1, const E2& e2)
- {
- // trigger the fallback assigner
- stepper_assigner<E1, E2, default_assignable_layout(E1::static_layout)>(e1, e2).run();
- }
- }
- #endif
|