PartialReduxEvaluator.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2011-2018 Gael Guennebaud <gael.guennebaud@inria.fr>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #ifndef EIGEN_PARTIALREDUX_H
11 #define EIGEN_PARTIALREDUX_H
12 
13 #include "./InternalHeaderCheck.h"
14 
15 namespace Eigen {
16 
17 namespace internal {
18 
19 
20 
43 /* logic deciding a strategy for unrolling of vectorized paths */
44 template<typename Func, typename Evaluator>
45 struct packetwise_redux_traits
46 {
47  enum {
48  OuterSize = int(Evaluator::IsRowMajor) ? Evaluator::RowsAtCompileTime : Evaluator::ColsAtCompileTime,
49  Cost = OuterSize == Dynamic ? HugeCost
50  : OuterSize * Evaluator::CoeffReadCost + (OuterSize-1) * functor_traits<Func>::Cost,
52  };
53 
54 };
55 
56 /* Value to be returned when size==0 , by default let's return 0 */
57 template<typename PacketType,typename Func>
59 PacketType packetwise_redux_empty_value(const Func& ) {
60  const typename unpacket_traits<PacketType>::type zero(0);
61  return pset1<PacketType>(zero);
62 }
63 
64 /* For products the default is 1 */
65 template<typename PacketType,typename Scalar>
67 PacketType packetwise_redux_empty_value(const scalar_product_op<Scalar,Scalar>& ) {
68  return pset1<PacketType>(Scalar(1));
69 }
70 
71 /* Perform the actual reduction */
72 template<typename Func, typename Evaluator,
73  int Unrolling = packetwise_redux_traits<Func, Evaluator>::Unrolling
74 >
75 struct packetwise_redux_impl;
76 
77 /* Perform the actual reduction with unrolling */
78 template<typename Func, typename Evaluator>
79 struct packetwise_redux_impl<Func, Evaluator, CompleteUnrolling>
80 {
81  typedef redux_novec_unroller<Func,Evaluator, 0, Evaluator::SizeAtCompileTime> Base;
82  typedef typename Evaluator::Scalar Scalar;
83 
84  template<typename PacketType>
85  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE
86  PacketType run(const Evaluator &eval, const Func& func, Index /*size*/)
87  {
88  return redux_vec_unroller<Func, Evaluator, 0, packetwise_redux_traits<Func, Evaluator>::OuterSize>::template run<PacketType>(eval,func);
89  }
90 };
91 
92 /* Add a specialization of redux_vec_unroller for size==0 at compiletime.
93  * This specialization is not required for general reductions, which is
94  * why it is defined here.
95  */
96 template<typename Func, typename Evaluator, Index Start>
97 struct redux_vec_unroller<Func, Evaluator, Start, 0>
98 {
99  template<typename PacketType>
101  static EIGEN_STRONG_INLINE PacketType run(const Evaluator &, const Func& f)
102  {
103  return packetwise_redux_empty_value<PacketType>(f);
104  }
105 };
106 
107 /* Perform the actual reduction for dynamic sizes */
108 template<typename Func, typename Evaluator>
109 struct packetwise_redux_impl<Func, Evaluator, NoUnrolling>
110 {
111  typedef typename Evaluator::Scalar Scalar;
112  typedef typename redux_traits<Func, Evaluator>::PacketType PacketScalar;
113 
114  template<typename PacketType>
116  static PacketType run(const Evaluator &eval, const Func& func, Index size)
117  {
118  if(size==0)
119  return packetwise_redux_empty_value<PacketType>(func);
120 
121  const Index size4 = (size-1)&(~3);
122  PacketType p = eval.template packetByOuterInner<Unaligned,PacketType>(0,0);
123  Index i = 1;
124  // This loop is optimized for instruction pipelining:
125  // - each iteration generates two independent instructions
126  // - thanks to branch prediction and out-of-order execution we have independent instructions across loops
127  for(; i<size4; i+=4)
128  p = func.packetOp(p,
129  func.packetOp(
130  func.packetOp(eval.template packetByOuterInner<Unaligned,PacketType>(i+0,0),eval.template packetByOuterInner<Unaligned,PacketType>(i+1,0)),
131  func.packetOp(eval.template packetByOuterInner<Unaligned,PacketType>(i+2,0),eval.template packetByOuterInner<Unaligned,PacketType>(i+3,0))));
132  for(; i<size; ++i)
133  p = func.packetOp(p, eval.template packetByOuterInner<Unaligned,PacketType>(i,0));
134  return p;
135  }
136 };
137 
138 template< typename ArgType, typename MemberOp, int Direction>
139 struct evaluator<PartialReduxExpr<ArgType, MemberOp, Direction> >
140  : evaluator_base<PartialReduxExpr<ArgType, MemberOp, Direction> >
141 {
142  typedef PartialReduxExpr<ArgType, MemberOp, Direction> XprType;
143  typedef typename internal::nested_eval<ArgType,1>::type ArgTypeNested;
144  typedef add_const_on_value_type_t<ArgTypeNested> ConstArgTypeNested;
145  typedef internal::remove_all_t<ArgTypeNested> ArgTypeNestedCleaned;
146  typedef typename ArgType::Scalar InputScalar;
147  typedef typename XprType::Scalar Scalar;
148  enum {
149  TraversalSize = Direction==int(Vertical) ? int(ArgType::RowsAtCompileTime) : int(ArgType::ColsAtCompileTime)
150  };
151  typedef typename MemberOp::template Cost<int(TraversalSize)> CostOpType;
152  enum {
153  CoeffReadCost = TraversalSize==Dynamic ? HugeCost
154  : TraversalSize==0 ? 1
155  : int(TraversalSize) * int(evaluator<ArgType>::CoeffReadCost) + int(CostOpType::value),
156 
157  ArgFlags_ = evaluator<ArgType>::Flags,
158 
159  Vectorizable_ = bool(int(ArgFlags_)&PacketAccessBit)
160  && bool(MemberOp::Vectorizable)
161  && (Direction==int(Vertical) ? bool(ArgFlags_&RowMajorBit) : (ArgFlags_&RowMajorBit)==0)
162  && (TraversalSize!=0),
163 
164  Flags = (traits<XprType>::Flags&RowMajorBit)
165  | (evaluator<ArgType>::Flags&(HereditaryBits&(~RowMajorBit)))
166  | (Vectorizable_ ? PacketAccessBit : 0)
167  | LinearAccessBit,
168 
169  Alignment = 0 // FIXME this will need to be improved once PartialReduxExpr is vectorized
170  };
171 
172  EIGEN_DEVICE_FUNC explicit evaluator(const XprType xpr)
173  : m_arg(xpr.nestedExpression()), m_functor(xpr.functor())
174  {
175  EIGEN_INTERNAL_CHECK_COST_VALUE(TraversalSize==Dynamic ? HugeCost : (TraversalSize==0 ? 1 : int(CostOpType::value)));
176  EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
177  }
178 
179  typedef typename XprType::CoeffReturnType CoeffReturnType;
180 
181  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
182  const Scalar coeff(Index i, Index j) const
183  {
184  return coeff(Direction==Vertical ? j : i);
185  }
186 
187  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
188  const Scalar coeff(Index index) const
189  {
190  return m_functor(m_arg.template subVector<DirectionType(Direction)>(index));
191  }
192 
193  template<int LoadMode,typename PacketType>
194  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
195  PacketType packet(Index i, Index j) const
196  {
197  return packet<LoadMode,PacketType>(Direction==Vertical ? j : i);
198  }
199 
200  template<int LoadMode,typename PacketType>
201  EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
202  PacketType packet(Index idx) const
203  {
204  enum { PacketSize = internal::unpacket_traits<PacketType>::size };
205  typedef Block<const ArgTypeNestedCleaned,
206  Direction==Vertical ? int(ArgType::RowsAtCompileTime) : int(PacketSize),
207  Direction==Vertical ? int(PacketSize) : int(ArgType::ColsAtCompileTime),
208  true /* InnerPanel */> PanelType;
209 
210  PanelType panel(m_arg,
211  Direction==Vertical ? 0 : idx,
212  Direction==Vertical ? idx : 0,
213  Direction==Vertical ? m_arg.rows() : Index(PacketSize),
214  Direction==Vertical ? Index(PacketSize) : m_arg.cols());
215 
216  // FIXME
217  // See bug 1612, currently if PacketSize==1 (i.e. complex<double> with 128bits registers) then the storage-order of panel get reversed
218  // and methods like packetByOuterInner do not make sense anymore in this context.
219  // So let's just by pass "vectorization" in this case:
220  if(PacketSize==1)
221  return internal::pset1<PacketType>(coeff(idx));
222 
223  typedef typename internal::redux_evaluator<PanelType> PanelEvaluator;
224  PanelEvaluator panel_eval(panel);
225  typedef typename MemberOp::BinaryOp BinaryOp;
226  PacketType p = internal::packetwise_redux_impl<BinaryOp,PanelEvaluator>::template run<PacketType>(panel_eval,m_functor.binaryFunc(),m_arg.outerSize());
227  return p;
228  }
229 
230 protected:
231  ConstArgTypeNested m_arg;
232  const MemberOp m_functor;
233 };
234 
235 } // end namespace internal
236 
237 } // end namespace Eigen
238 
239 #endif // EIGEN_PARTIALREDUX_H
std::conditional_t< Direction==Vertical, ColXpr, RowXpr > subVector(Index i)
#define EIGEN_DEVICE_FUNC
Definition: Macros.h:883
#define EIGEN_UNROLLING_LIMIT
Definition: Settings.h:24
#define EIGEN_INTERNAL_CHECK_COST_VALUE(C)
Definition: StaticAssert.h:112
float * p
DirectionType
Definition: Constants.h:263
@ Vertical
Definition: Constants.h:266
const unsigned int PacketAccessBit
Definition: Constants.h:96
const unsigned int LinearAccessBit
Definition: Constants.h:132
const unsigned int RowMajorBit
Definition: Constants.h:68
PacketType packetwise_redux_empty_value(const Func &)
: InteropHeaders
Definition: Core:139
const unsigned int HereditaryBits
Definition: Constants.h:197
const int HugeCost
Definition: Constants.h:46
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:82
@ CompleteUnrolling
Definition: Constants.h:306
@ NoUnrolling
Definition: Constants.h:301
const int Dynamic
Definition: Constants.h:24
std::ptrdiff_t j