11 #ifndef EIGEN_CXX11_TENSOR_TENSOR_REVERSE_H
12 #define EIGEN_CXX11_TENSOR_TENSOR_REVERSE_H
24 template<
typename ReverseDimensions,
typename XprType>
25 struct traits<TensorReverseOp<ReverseDimensions,
26 XprType> > :
public traits<XprType>
28 typedef typename XprType::Scalar Scalar;
30 typedef typename XprTraits::StorageKind StorageKind;
31 typedef typename XprTraits::Index
Index;
32 typedef typename XprType::Nested Nested;
33 typedef std::remove_reference_t<Nested> Nested_;
34 static constexpr
int NumDimensions = XprTraits::NumDimensions;
35 static constexpr
int Layout = XprTraits::Layout;
36 typedef typename XprTraits::PointerType PointerType;
39 template<
typename ReverseDimensions,
typename XprType>
40 struct eval<TensorReverseOp<ReverseDimensions, XprType>,
Eigen::Dense>
42 typedef const TensorReverseOp<ReverseDimensions, XprType>& type;
45 template<
typename ReverseDimensions,
typename XprType>
46 struct nested<TensorReverseOp<ReverseDimensions, XprType>, 1,
47 typename eval<TensorReverseOp<ReverseDimensions, XprType> >::type>
49 typedef TensorReverseOp<ReverseDimensions, XprType> type;
54 template<
typename ReverseDimensions,
typename XprType>
56 XprType>, WriteAccessors>
60 typedef typename Eigen::internal::traits<TensorReverseOp>::Scalar
Scalar;
63 typedef typename Eigen::internal::nested<TensorReverseOp>::type
Nested;
64 typedef typename Eigen::internal::traits<TensorReverseOp>::StorageKind
66 typedef typename Eigen::internal::traits<TensorReverseOp>::Index
Index;
69 const XprType& expr,
const ReverseDimensions& reverse_dims)
88 template<
typename ReverseDimensions,
typename ArgType,
typename Device>
93 static constexpr
int NumDims = internal::array_size<ReverseDimensions>::value;
106 BlockAccess = NumDims > 0,
107 PreferBlockAccess =
true,
121 typedef typename internal::TensorMaterializedBlock<
CoeffReturnType, NumDims,
127 : m_impl(op.expression(), device),
128 m_reverse(op.reverse()),
135 m_dimensions = m_impl.dimensions();
138 for (
int i = 1;
i < NumDims; ++
i) {
139 m_strides[
i] = m_strides[
i-1] * m_dimensions[
i-1];
143 m_strides[NumDims-1] = 1;
144 for (
int i = NumDims - 2;
i >= 0; --
i) {
145 m_strides[
i] = m_strides[
i+1] * m_dimensions[
i+1];
155 m_impl.evalSubExprsIfNeeded(NULL);
159 #ifdef EIGEN_USE_THREADS
160 template <
typename EvalSubExprsCallback>
161 EIGEN_STRONG_INLINE
void evalSubExprsIfNeededAsync(
163 m_impl.evalSubExprsIfNeededAsync(
nullptr, [done](
bool) { done(
true); });
174 Index inputIndex = 0;
177 for (
int i = NumDims - 1;
i > 0; --
i) {
178 Index idx = index / m_fastStrides[
i];
179 index -= idx * m_strides[
i];
181 idx = m_dimensions[
i] - idx - 1;
183 inputIndex += idx * m_strides[
i] ;
186 inputIndex += (m_dimensions[0] - index - 1);
192 for (
int i = 0;
i < NumDims - 1; ++
i) {
193 Index idx = index / m_fastStrides[
i];
194 index -= idx * m_strides[
i];
196 idx = m_dimensions[
i] - idx - 1;
198 inputIndex += idx * m_strides[
i] ;
200 if (m_reverse[NumDims-1]) {
201 inputIndex += (m_dimensions[NumDims-1] - index - 1);
211 return m_impl.coeff(reverseIndex(index));
214 template<
int LoadMode>
234 const size_t target_size =
m_device.lastLevelCacheSize();
237 return internal::TensorBlockResourceRequirements::skewed<Scalar>(
239 .addCostPerCoeff({0, 0, 24});
244 bool =
false)
const {
252 static const bool isColMajor =
255 static const Index inner_dim_idx = isColMajor ? 0 : NumDims - 1;
256 const bool inner_dim_reversed = m_reverse[inner_dim_idx];
259 Index block_offset = 0;
262 Index input_offset = reverseIndex(desc.offset());
267 for (
int i = 0;
i < NumDims; ++
i) {
268 const int dim = isColMajor ?
i : NumDims - 1 -
i;
269 it[
i].size = desc.dimension(dim);
271 it[
i].reverse = m_reverse[dim];
274 i == 0 ? 1 : (it[
i - 1].size * it[
i - 1].block_stride);
275 it[
i].block_span = it[
i].block_stride * (it[
i].size - 1);
277 it[
i].input_stride = m_strides[dim];
278 it[
i].input_span = it[
i].input_stride * (it[
i].size - 1);
281 it[
i].input_stride = -1 * it[
i].input_stride;
282 it[
i].input_span = -1 * it[
i].input_span;
288 int effective_inner_dim = 0;
289 for (
int i = 1;
i < NumDims; ++
i) {
290 if (it[
i].reverse != it[effective_inner_dim].reverse)
break;
291 if (it[
i].block_stride != it[effective_inner_dim].
size)
break;
292 if (it[
i].block_stride !=
numext::abs(it[
i].input_stride))
break;
294 it[
i].size = it[effective_inner_dim].size * it[
i].size;
296 it[
i].block_stride = 1;
297 it[
i].input_stride = (inner_dim_reversed ? -1 : 1);
299 it[
i].block_span = it[
i].block_stride * (it[
i].size - 1);
300 it[
i].input_span = it[
i].input_stride * (it[
i].size - 1);
302 effective_inner_dim =
i;
305 eigen_assert(it[effective_inner_dim].block_stride == 1);
307 (inner_dim_reversed ? -1 : 1));
309 const Index inner_dim_size = it[effective_inner_dim].size;
312 const typename TensorBlock::Storage block_storage =
313 TensorBlock::prepareStorage(desc, scratch);
316 while (it[NumDims - 1].count < it[NumDims - 1].
size) {
318 Index dst = block_offset;
319 Index src = input_offset;
323 if (inner_dim_reversed) {
324 for (
Index i = 0;
i < inner_dim_size; ++
i) {
325 block_buffer[dst] = m_impl.coeff(src);
330 for (
Index i = 0;
i < inner_dim_size; ++
i) {
331 block_buffer[dst] = m_impl.coeff(src);
338 if ((NumDims - effective_inner_dim) == 1)
break;
341 for (
Index i = effective_inner_dim + 1;
i < NumDims; ++
i) {
342 if (++it[
i].count < it[
i].
size) {
343 block_offset += it[
i].block_stride;
344 input_offset += it[
i].input_stride;
347 if (
i != NumDims - 1) it[
i].count = 0;
348 block_offset -= it[
i].block_span;
349 input_offset -= it[
i].input_span;
353 return block_storage.AsTensorMaterializedBlock();
357 double compute_cost = NumDims * (2 * TensorOpCost::AddCost<Index>() +
358 2 * TensorOpCost::MulCost<Index>() +
359 TensorOpCost::DivCost<Index>());
360 for (
int i = 0;
i < NumDims; ++
i) {
362 compute_cost += 2 * TensorOpCost::AddCost<Index>();
365 return m_impl.costPerCoeff(vectorized) +
380 struct BlockIteratorState {
402 template <
typename ReverseDimensions,
typename ArgType,
typename Device>
404 :
public TensorEvaluator<const TensorReverseOp<ReverseDimensions, ArgType>,
410 static constexpr
int NumDims = internal::array_size<ReverseDimensions>::value;
418 PreferBlockAccess =
false,
423 :
Base(op, device) {}
438 return this->m_impl.coeffRef(this->reverseIndex(index));
447 internal::pstore<CoeffReturnType, PacketReturnType>(values,
x);
#define EIGEN_UNROLL_LOOP
#define EIGEN_DEVICE_FUNC
#define EIGEN_STATIC_ASSERT(X, MSG)
#define EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(Derived)
TensorBase< TensorReverseOp< ReverseDimensions, XprType >, WriteAccessors > Base
Eigen::internal::nested< TensorReverseOp >::type Nested
const ReverseDimensions m_reverse_dims
const ReverseDimensions & reverse() const
Eigen::internal::traits< TensorReverseOp >::StorageKind StorageKind
const internal::remove_all_t< typename XprType::Nested > & expression() const
XprType::CoeffReturnType CoeffReturnType
TensorReverseOp(const XprType &expr, const ReverseDimensions &reverse_dims)
Eigen::internal::traits< TensorReverseOp >::Scalar Scalar
Eigen::NumTraits< Scalar >::Real RealScalar
Eigen::internal::traits< TensorReverseOp >::Index Index
typename remove_all< T >::type remove_all_t
EIGEN_ALWAYS_INLINE std::enable_if_t< NumTraits< T >::IsSigned||NumTraits< T >::IsComplex, typename NumTraits< T >::Real > abs(const T &x)
: TensorContractionSycl.h, provides various tensor contraction kernel for SYCL backend
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
internal::packet_traits< Scalar >::type type
void writePacket(Index index, const PacketReturnType &x) const
DSizes< Index, NumDims > Dimensions
internal::TensorBlockNotImplemented TensorBlock
const Dimensions & dimensions() const
TensorReverseOp< ReverseDimensions, ArgType > XprType
TensorEvaluator(const XprType &op, const Device &device)
XprType::CoeffReturnType CoeffReturnType
Scalar & coeffRef(Index index) const
TensorEvaluator< const TensorReverseOp< ReverseDimensions, ArgType >, Device > Base
PacketType< CoeffReturnType, Device >::type PacketReturnType
internal::TensorMaterializedBlock< CoeffReturnType, NumDims, Layout, Index > TensorBlock
CoeffReturnType coeff(Index index) const
internal::TensorBlockResourceRequirements getResourceRequirements() const
TensorEvaluator< ArgType, Device > m_impl
TensorOpCost costPerCoeff(bool vectorized) const
TensorEvaluator(const XprType &op, const Device &device)
internal::TensorBlockDescriptor< NumDims, Index > TensorBlockDesc
PacketReturnType packet(Index index) const
XprType::CoeffReturnType CoeffReturnType
Storage::Type EvaluatorPointerType
internal::TensorIntDivisor< Index > IndexDivisor
DSizes< Index, NumDims > Dimensions
const Dimensions & dimensions() const
PacketType< CoeffReturnType, Device >::type PacketReturnType
array< Index, NumDims > m_strides
internal::TensorBlockScratchAllocator< Device > TensorBlockScratch
StorageMemory< CoeffReturnType, Device > Storage
TensorReverseOp< ReverseDimensions, ArgType > XprType
Index reverseIndex(Index index) const
TensorBlock block(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool=false) const
bool evalSubExprsIfNeeded(EvaluatorPointerType)
array< IndexDivisor, NumDims > m_fastStrides
const Device EIGEN_DEVICE_REF m_device
TensorEvaluator< const ArgType, Device >::TensorBlock ArgTensorBlock
ReverseDimensions m_reverse
Storage::Type data() const
A cost model used to limit the number of threads used for evaluating tensor expression.
const Dimensions & dimensions() const
static constexpr int Layout
const Device EIGEN_DEVICE_REF m_device
CoeffReturnType coeff(Index index) const
Storage::Type EvaluatorPointerType
CoeffReturnType & coeffRef(Index index) const
static constexpr int PacketSize
Derived::Scalar CoeffReturnType
internal::TensorMaterializedBlock< ScalarNoConst, NumCoords, Layout, Index > TensorBlock