10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_PADDING_H
11 #define EIGEN_CXX11_TENSOR_TENSOR_PADDING_H
25 template<
typename PaddingDimensions,
typename XprType>
26 struct traits<TensorPaddingOp<PaddingDimensions, XprType> > :
public traits<XprType>
28 typedef typename XprType::Scalar Scalar;
30 typedef typename XprTraits::StorageKind StorageKind;
31 typedef typename XprTraits::Index
Index;
32 typedef typename XprType::Nested Nested;
33 typedef std::remove_reference_t<Nested> Nested_;
34 static constexpr
int NumDimensions = XprTraits::NumDimensions;
35 static constexpr
int Layout = XprTraits::Layout;
36 typedef typename XprTraits::PointerType PointerType;
39 template<
typename PaddingDimensions,
typename XprType>
40 struct eval<TensorPaddingOp<PaddingDimensions, XprType>,
Eigen::Dense>
42 typedef const TensorPaddingOp<PaddingDimensions, XprType>& type;
45 template<
typename PaddingDimensions,
typename XprType>
46 struct nested<TensorPaddingOp<PaddingDimensions, XprType>, 1, typename eval<TensorPaddingOp<PaddingDimensions, XprType> >::type>
48 typedef TensorPaddingOp<PaddingDimensions, XprType> type;
55 template<
typename PaddingDimensions,
typename XprType>
59 typedef typename Eigen::internal::traits<TensorPaddingOp>::Scalar
Scalar;
62 typedef typename Eigen::internal::nested<TensorPaddingOp>::type
Nested;
63 typedef typename Eigen::internal::traits<TensorPaddingOp>::StorageKind
StorageKind;
64 typedef typename Eigen::internal::traits<TensorPaddingOp>::Index
Index;
86 template<
typename PaddingDimensions,
typename ArgType,
typename Device>
91 static constexpr
int NumDims = internal::array_size<PaddingDimensions>::value;
105 PreferBlockAccess =
true,
116 typedef typename internal::TensorMaterializedBlock<
ScalarNoConst, NumDims,
122 : m_impl(op.expression(), device), m_padding(op.padding()), m_paddingValue(op.padding_value()),
m_device(device)
130 m_dimensions = m_impl.dimensions();
131 for (
int i = 0;
i < NumDims; ++
i) {
132 m_dimensions[
i] += m_padding[
i].first + m_padding[
i].second;
136 m_inputStrides[0] = 1;
137 m_outputStrides[0] = 1;
138 for (
int i = 1;
i < NumDims; ++
i) {
139 m_inputStrides[
i] = m_inputStrides[
i-1] * input_dims[
i-1];
140 m_outputStrides[
i] = m_outputStrides[
i-1] * m_dimensions[
i-1];
142 m_outputStrides[NumDims] = m_outputStrides[NumDims-1] * m_dimensions[NumDims-1];
144 m_inputStrides[NumDims - 1] = 1;
145 m_outputStrides[NumDims] = 1;
146 for (
int i = NumDims - 2;
i >= 0; --
i) {
147 m_inputStrides[
i] = m_inputStrides[
i+1] * input_dims[
i+1];
148 m_outputStrides[
i+1] = m_outputStrides[
i+2] * m_dimensions[
i+1];
150 m_outputStrides[0] = m_outputStrides[1] * m_dimensions[0];
157 m_impl.evalSubExprsIfNeeded(NULL);
161 #ifdef EIGEN_USE_THREADS
162 template <
typename EvalSubExprsCallback>
163 EIGEN_STRONG_INLINE
void evalSubExprsIfNeededAsync(
165 m_impl.evalSubExprsIfNeededAsync(
nullptr, [done](
bool) { done(
true); });
176 Index inputIndex = 0;
179 for (
int i = NumDims - 1;
i > 0; --
i) {
180 const Index idx = index / m_outputStrides[
i];
181 if (isPaddingAtIndexForDim(idx,
i)) {
182 return m_paddingValue;
184 inputIndex += (idx - m_padding[
i].first) * m_inputStrides[
i];
185 index -= idx * m_outputStrides[
i];
187 if (isPaddingAtIndexForDim(index, 0)) {
188 return m_paddingValue;
190 inputIndex += (index - m_padding[0].first);
193 for (
int i = 0;
i < NumDims - 1; ++
i) {
194 const Index idx = index / m_outputStrides[
i+1];
195 if (isPaddingAtIndexForDim(idx,
i)) {
196 return m_paddingValue;
198 inputIndex += (idx - m_padding[
i].first) * m_inputStrides[
i];
199 index -= idx * m_outputStrides[
i+1];
201 if (isPaddingAtIndexForDim(index, NumDims-1)) {
202 return m_paddingValue;
204 inputIndex += (index - m_padding[NumDims-1].first);
206 return m_impl.coeff(inputIndex);
209 template<
int LoadMode>
213 return packetColMajor(index);
215 return packetRowMajor(index);
222 for (
int i = 0;
i < NumDims; ++
i)
223 updateCostPerDimension(cost,
i,
i == 0);
226 for (
int i = NumDims - 1;
i >= 0; --
i)
227 updateCostPerDimension(cost,
i,
i == NumDims - 1);
234 const size_t target_size =
m_device.lastLevelCacheSize();
235 return internal::TensorBlockResourceRequirements::merge(
236 internal::TensorBlockResourceRequirements::skewed<Scalar>(target_size),
237 m_impl.getResourceRequirements());
242 bool =
false)
const {
244 if (desc.size() == 0) {
249 static const bool IsColMajor =
Layout ==
static_cast<int>(
ColMajor);
250 const int inner_dim_idx = IsColMajor ? 0 : NumDims - 1;
252 Index offset = desc.offset();
256 for (
int i = NumDims - 1;
i > 0; --
i) {
257 const int dim = IsColMajor ?
i : NumDims -
i - 1;
258 const int stride_dim = IsColMajor ? dim : dim + 1;
259 output_offsets[dim] = offset / m_outputStrides[stride_dim];
260 offset -= output_offsets[dim] * m_outputStrides[stride_dim];
262 output_offsets[inner_dim_idx] = offset;
266 for (
int i = 0;
i < NumDims; ++
i) {
267 const int dim = IsColMajor ?
i : NumDims -
i - 1;
268 input_offsets[dim] = input_offsets[dim] - m_padding[dim].first;
274 Index input_offset = 0;
275 for (
int i = 0;
i < NumDims; ++
i) {
276 const int dim = IsColMajor ?
i : NumDims -
i - 1;
277 input_offset += input_offsets[dim] * m_inputStrides[dim];
283 Index output_offset = 0;
285 internal::strides<Layout>(desc.dimensions());
295 array<BlockIteratorState, NumDims - 1> it;
296 for (
int i = 0;
i < NumDims - 1; ++
i) {
297 const int dim = IsColMajor ?
i + 1 : NumDims -
i - 2;
299 it[
i].size = desc.dimension(dim);
301 it[
i].input_stride = m_inputStrides[dim];
302 it[
i].input_span = it[
i].input_stride * (it[
i].size - 1);
304 it[
i].output_stride = output_strides[dim];
305 it[
i].output_span = it[
i].output_stride * (it[
i].size - 1);
308 const Index input_inner_dim_size =
309 static_cast<Index>(m_impl.dimensions()[inner_dim_idx]);
312 const Index output_size = desc.size();
317 const Index output_inner_dim_size = desc.dimension(inner_dim_idx);
321 const Index output_inner_pad_before_size =
322 input_offsets[inner_dim_idx] < 0
324 output_inner_dim_size)
330 (output_inner_dim_size - output_inner_pad_before_size),
332 numext::maxi(input_inner_dim_size - (input_offsets[inner_dim_idx] +
333 output_inner_pad_before_size),
340 const Index output_inner_pad_after_size =
341 (output_inner_dim_size - output_inner_copy_size -
342 output_inner_pad_before_size);
346 (output_inner_pad_before_size + output_inner_copy_size +
347 output_inner_pad_after_size));
352 for (
int i = 0;
i < NumDims; ++
i) {
353 const int dim = IsColMajor ?
i : NumDims -
i - 1;
354 output_padded[dim] = isPaddingAtIndexForDim(output_coord[dim], dim);
357 typedef internal::StridedLinearBufferCopy<ScalarNoConst, Index> LinCopy;
360 const typename TensorBlock::Storage block_storage =
361 TensorBlock::prepareStorage(desc, scratch);
369 const bool squeeze_writes =
372 (input_inner_dim_size == m_dimensions[inner_dim_idx]) &&
374 (input_inner_dim_size == output_inner_dim_size);
376 const int squeeze_dim = IsColMajor ? inner_dim_idx + 1 : inner_dim_idx - 1;
379 const Index squeeze_max_coord =
382 static_cast<Index>(m_dimensions[squeeze_dim] -
383 m_padding[squeeze_dim].second),
385 static_cast<Index>(output_offsets[squeeze_dim] +
386 desc.dimension(squeeze_dim)))
387 :
static_cast<Index>(0);
392 bool is_padded =
false;
393 for (
int j = 1;
j < NumDims; ++
j) {
394 const int dim = IsColMajor ?
j : NumDims -
j - 1;
395 is_padded = output_padded[dim];
396 if (is_padded)
break;
401 size += output_inner_dim_size;
403 LinCopy::template Run<LinCopy::Kind::FillLinear>(
404 typename LinCopy::Dst(output_offset, 1, block_storage.data()),
405 typename LinCopy::Src(0, 0, &m_paddingValue),
406 output_inner_dim_size);
409 }
else if (squeeze_writes) {
411 const Index squeeze_num = squeeze_max_coord - output_coord[squeeze_dim];
412 size += output_inner_dim_size * squeeze_num;
415 LinCopy::template Run<LinCopy::Kind::Linear>(
416 typename LinCopy::Dst(output_offset, 1, block_storage.data()),
417 typename LinCopy::Src(input_offset, 1, m_impl.data()),
418 output_inner_dim_size * squeeze_num);
424 it[0].count += (squeeze_num - 1);
425 input_offset += it[0].input_stride * (squeeze_num - 1);
426 output_offset += it[0].output_stride * (squeeze_num - 1);
427 output_coord[squeeze_dim] += (squeeze_num - 1);
431 size += output_inner_dim_size;
434 const Index out = output_offset;
436 LinCopy::template Run<LinCopy::Kind::FillLinear>(
437 typename LinCopy::Dst(out, 1, block_storage.data()),
438 typename LinCopy::Src(0, 0, &m_paddingValue),
439 output_inner_pad_before_size);
443 const Index out = output_offset + output_inner_pad_before_size;
444 const Index in = input_offset + output_inner_pad_before_size;
446 eigen_assert(output_inner_copy_size == 0 || m_impl.data() != NULL);
448 LinCopy::template Run<LinCopy::Kind::Linear>(
449 typename LinCopy::Dst(out, 1, block_storage.data()),
450 typename LinCopy::Src(in, 1, m_impl.data()),
451 output_inner_copy_size);
455 const Index out = output_offset + output_inner_pad_before_size +
456 output_inner_copy_size;
458 LinCopy::template Run<LinCopy::Kind::FillLinear>(
459 typename LinCopy::Dst(out, 1, block_storage.data()),
460 typename LinCopy::Src(0, 0, &m_paddingValue),
461 output_inner_pad_after_size);
465 for (
int j = 0;
j < NumDims - 1; ++
j) {
466 const int dim = IsColMajor ?
j + 1 : NumDims -
j - 2;
468 if (++it[
j].count < it[
j].
size) {
469 input_offset += it[
j].input_stride;
470 output_offset += it[
j].output_stride;
471 output_coord[dim] += 1;
472 output_padded[dim] = isPaddingAtIndexForDim(output_coord[dim], dim);
476 input_offset -= it[
j].input_span;
477 output_offset -= it[
j].output_span;
478 output_coord[dim] -= it[
j].size - 1;
479 output_padded[dim] = isPaddingAtIndexForDim(output_coord[dim], dim);
483 return block_storage.AsTensorMaterializedBlock();
489 struct BlockIteratorState {
507 Index index,
int dim_index)
const {
508 return (!internal::index_pair_first_statically_eq<PaddingDimensions>(dim_index, 0) &&
509 index < m_padding[dim_index].
first) ||
510 (!internal::index_pair_second_statically_eq<PaddingDimensions>(dim_index, 0) &&
511 index >= m_dimensions[dim_index] - m_padding[dim_index].second);
515 int dim_index)
const {
516 return internal::index_pair_first_statically_eq<PaddingDimensions>(dim_index, 0);
520 int dim_index)
const {
521 return internal::index_pair_second_statically_eq<PaddingDimensions>(dim_index, 0);
526 const double in =
static_cast<double>(m_impl.dimensions()[
i]);
527 const double out = in + m_padding[
i].first + m_padding[
i].second;
530 const double reduction = in / out;
533 cost +=
TensorOpCost(0, 0, 2 * TensorOpCost::AddCost<Index>() +
534 reduction * (1 * TensorOpCost::AddCost<Index>()));
536 cost +=
TensorOpCost(0, 0, 2 * TensorOpCost::AddCost<Index>() +
537 2 * TensorOpCost::MulCost<Index>() +
538 reduction * (2 * TensorOpCost::MulCost<Index>() +
539 1 * TensorOpCost::DivCost<Index>()));
549 const Index initialIndex = index;
550 Index inputIndex = 0;
552 for (
int i = NumDims - 1;
i > 0; --
i) {
553 const Index firstIdx = index;
555 const Index lastPaddedLeft = m_padding[
i].first * m_outputStrides[
i];
556 const Index firstPaddedRight = (m_dimensions[
i] - m_padding[
i].second) * m_outputStrides[
i];
557 const Index lastPaddedRight = m_outputStrides[
i+1];
559 if (!isLeftPaddingCompileTimeZero(
i) && lastIdx < lastPaddedLeft) {
561 return internal::pset1<PacketReturnType>(m_paddingValue);
563 else if (!isRightPaddingCompileTimeZero(
i) && firstIdx >= firstPaddedRight && lastIdx < lastPaddedRight) {
565 return internal::pset1<PacketReturnType>(m_paddingValue);
567 else if ((isLeftPaddingCompileTimeZero(
i) && isRightPaddingCompileTimeZero(
i)) || (firstIdx >= lastPaddedLeft && lastIdx < firstPaddedRight)) {
569 const Index idx = index / m_outputStrides[
i];
570 inputIndex += (idx - m_padding[
i].first) * m_inputStrides[
i];
571 index -= idx * m_outputStrides[
i];
575 return packetWithPossibleZero(initialIndex);
580 const Index firstIdx = index;
581 const Index lastPaddedLeft = m_padding[0].first;
582 const Index firstPaddedRight = (m_dimensions[0] - m_padding[0].second);
583 const Index lastPaddedRight = m_outputStrides[1];
585 if (!isLeftPaddingCompileTimeZero(0) && lastIdx < lastPaddedLeft) {
587 return internal::pset1<PacketReturnType>(m_paddingValue);
589 else if (!isRightPaddingCompileTimeZero(0) && firstIdx >= firstPaddedRight && lastIdx < lastPaddedRight) {
591 return internal::pset1<PacketReturnType>(m_paddingValue);
593 else if ((isLeftPaddingCompileTimeZero(0) && isRightPaddingCompileTimeZero(0)) || (firstIdx >= lastPaddedLeft && lastIdx < firstPaddedRight)) {
595 inputIndex += (index - m_padding[0].first);
596 return m_impl.template packet<Unaligned>(inputIndex);
599 return packetWithPossibleZero(initialIndex);
606 const Index initialIndex = index;
607 Index inputIndex = 0;
609 for (
int i = 0;
i < NumDims - 1; ++
i) {
610 const Index firstIdx = index;
612 const Index lastPaddedLeft = m_padding[
i].first * m_outputStrides[
i+1];
613 const Index firstPaddedRight = (m_dimensions[
i] - m_padding[
i].second) * m_outputStrides[
i+1];
614 const Index lastPaddedRight = m_outputStrides[
i];
616 if (!isLeftPaddingCompileTimeZero(
i) && lastIdx < lastPaddedLeft) {
618 return internal::pset1<PacketReturnType>(m_paddingValue);
620 else if (!isRightPaddingCompileTimeZero(
i) && firstIdx >= firstPaddedRight && lastIdx < lastPaddedRight) {
622 return internal::pset1<PacketReturnType>(m_paddingValue);
624 else if ((isLeftPaddingCompileTimeZero(
i) && isRightPaddingCompileTimeZero(
i)) || (firstIdx >= lastPaddedLeft && lastIdx < firstPaddedRight)) {
626 const Index idx = index / m_outputStrides[
i+1];
627 inputIndex += (idx - m_padding[
i].first) * m_inputStrides[
i];
628 index -= idx * m_outputStrides[
i+1];
632 return packetWithPossibleZero(initialIndex);
637 const Index firstIdx = index;
638 const Index lastPaddedLeft = m_padding[NumDims-1].first;
639 const Index firstPaddedRight = (m_dimensions[NumDims-1] - m_padding[NumDims-1].second);
640 const Index lastPaddedRight = m_outputStrides[NumDims-1];
642 if (!isLeftPaddingCompileTimeZero(NumDims-1) && lastIdx < lastPaddedLeft) {
644 return internal::pset1<PacketReturnType>(m_paddingValue);
646 else if (!isRightPaddingCompileTimeZero(NumDims-1) && firstIdx >= firstPaddedRight && lastIdx < lastPaddedRight) {
648 return internal::pset1<PacketReturnType>(m_paddingValue);
650 else if ((isLeftPaddingCompileTimeZero(NumDims-1) && isRightPaddingCompileTimeZero(NumDims-1)) || (firstIdx >= lastPaddedLeft && lastIdx < firstPaddedRight)) {
652 inputIndex += (index - m_padding[NumDims-1].first);
653 return m_impl.template packet<Unaligned>(inputIndex);
656 return packetWithPossibleZero(initialIndex);
#define EIGEN_ALWAYS_INLINE
#define EIGEN_UNROLL_LOOP
#define EIGEN_DEVICE_FUNC
#define EIGEN_STATIC_ASSERT(X, MSG)
XprType::CoeffReturnType CoeffReturnType
TensorPaddingOp(const XprType &expr, const PaddingDimensions &padding_dims, const Scalar padding_value)
Eigen::internal::traits< TensorPaddingOp >::Index Index
const PaddingDimensions m_padding_dims
Eigen::internal::traits< TensorPaddingOp >::StorageKind StorageKind
Scalar padding_value() const
Eigen::NumTraits< Scalar >::Real RealScalar
const Scalar m_padding_value
const internal::remove_all_t< typename XprType::Nested > & expression() const
Eigen::internal::traits< TensorPaddingOp >::Scalar Scalar
Eigen::internal::nested< TensorPaddingOp >::type Nested
const PaddingDimensions & padding() const
typename remove_all< T >::type remove_all_t
EIGEN_CONSTEXPR Index first(const T &x) EIGEN_NOEXCEPT
EIGEN_ALWAYS_INLINE T maxi(const T &x, const T &y)
EIGEN_ALWAYS_INLINE T mini(const T &x, const T &y)
EIGEN_ALWAYS_INLINE std::enable_if_t< NumTraits< T >::IsSigned||NumTraits< T >::IsComplex, typename NumTraits< T >::Real > abs(const T &x)
: TensorContractionSycl.h, provides various tensor contraction kernel for SYCL backend
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
internal::packet_traits< Scalar >::type type
EIGEN_ALWAYS_INLINE bool isLeftPaddingCompileTimeZero(int dim_index) const
TensorEvaluator< ArgType, Device > m_impl
TensorPaddingOp< PaddingDimensions, ArgType > XprType
const Device EIGEN_DEVICE_REF m_device
EIGEN_ALWAYS_INLINE bool isPaddingAtIndexForDim(Index index, int dim_index) const
PacketReturnType packetWithPossibleZero(Index index) const
array< Index, NumDims > m_inputStrides
const Dimensions & dimensions() const
Storage::Type EvaluatorPointerType
PacketReturnType packet(Index index) const
PacketType< CoeffReturnType, Device >::type PacketReturnType
void updateCostPerDimension(TensorOpCost &cost, int i, bool first) const
internal::TensorBlockScratchAllocator< Device > TensorBlockScratch
EIGEN_ALWAYS_INLINE bool isRightPaddingCompileTimeZero(int dim_index) const
PaddingDimensions m_padding
TensorEvaluator(const XprType &op, const Device &device)
DSizes< Index, NumDims > Dimensions
bool evalSubExprsIfNeeded(EvaluatorPointerType)
StorageMemory< CoeffReturnType, Device > Storage
array< Index, NumDims+1 > m_outputStrides
PacketReturnType packetColMajor(Index index) const
TensorBlock block(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool=false) const
internal::TensorBlockDescriptor< NumDims, Index > TensorBlockDesc
XprType::CoeffReturnType CoeffReturnType
TensorOpCost costPerCoeff(bool vectorized) const
std::remove_const_t< Scalar > ScalarNoConst
CoeffReturnType coeff(Index index) const
internal::TensorBlockResourceRequirements getResourceRequirements() const
PacketReturnType packetRowMajor(Index index) const
EvaluatorPointerType data() const
internal::TensorMaterializedBlock< ScalarNoConst, NumDims, Layout, Index > TensorBlock
A cost model used to limit the number of threads used for evaluating tensor expression.
const Dimensions & dimensions() const
static constexpr int Layout
const Device EIGEN_DEVICE_REF m_device
CoeffReturnType coeff(Index index) const
Storage::Type EvaluatorPointerType
static constexpr int PacketSize
internal::TensorMaterializedBlock< ScalarNoConst, NumCoords, Layout, Index > TensorBlock
std::remove_const_t< Scalar > ScalarNoConst