4 #ifndef EIGEN_CXX11_TENSOR_TENSOR_VOLUME_PATCH_H
5 #define EIGEN_CXX11_TENSOR_TENSOR_VOLUME_PATCH_H
28 template<DenseIndex Planes, DenseIndex Rows, DenseIndex Cols,
typename XprType>
29 struct traits<TensorVolumePatchOp<Planes, Rows, Cols, XprType> > :
public traits<XprType>
31 typedef std::remove_const_t<typename XprType::Scalar> Scalar;
33 typedef typename XprTraits::StorageKind StorageKind;
34 typedef typename XprTraits::Index
Index;
35 typedef typename XprType::Nested Nested;
36 typedef std::remove_reference_t<Nested> Nested_;
37 static constexpr
int NumDimensions = XprTraits::NumDimensions + 1;
38 static constexpr
int Layout = XprTraits::Layout;
39 typedef typename XprTraits::PointerType PointerType;
43 template<DenseIndex Planes, DenseIndex Rows, DenseIndex Cols,
typename XprType>
44 struct eval<TensorVolumePatchOp<Planes, Rows, Cols, XprType>,
Eigen::Dense>
46 typedef const TensorVolumePatchOp<Planes, Rows, Cols, XprType>& type;
49 template<DenseIndex Planes, DenseIndex Rows, DenseIndex Cols,
typename XprType>
50 struct nested<TensorVolumePatchOp<Planes, Rows, Cols, XprType>, 1, typename eval<TensorVolumePatchOp<Planes, Rows, Cols, XprType> >::type>
52 typedef TensorVolumePatchOp<Planes, Rows, Cols, XprType> type;
57 template<DenseIndex Planes, DenseIndex Rows, DenseIndex Cols,
typename XprType>
61 typedef typename Eigen::internal::traits<TensorVolumePatchOp>::Scalar
Scalar;
64 typedef typename Eigen::internal::nested<TensorVolumePatchOp>::type
Nested;
65 typedef typename Eigen::internal::traits<TensorVolumePatchOp>::StorageKind
StorageKind;
66 typedef typename Eigen::internal::traits<TensorVolumePatchOp>::Index
Index;
170 template<DenseIndex Planes, DenseIndex Rows, DenseIndex Cols,
typename ArgType,
typename Device>
175 static constexpr
int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
176 static constexpr
int NumDims = NumInputDims + 1;
178 typedef std::remove_const_t<typename XprType::Scalar>
Scalar;
200 m_impl(op.expression(), device)
210 m_inputDepth = input_dims[0];
211 m_inputPlanes = input_dims[1];
212 m_inputRows = input_dims[2];
213 m_inputCols = input_dims[3];
215 m_inputDepth = input_dims[NumInputDims-1];
216 m_inputPlanes = input_dims[NumInputDims-2];
217 m_inputRows = input_dims[NumInputDims-3];
218 m_inputCols = input_dims[NumInputDims-4];
234 m_input_planes_eff = (m_inputPlanes - 1) * m_plane_inflate_strides + 1;
235 m_input_rows_eff = (m_inputRows - 1) * m_row_inflate_strides + 1;
236 m_input_cols_eff = (m_inputCols - 1) * m_col_inflate_strides + 1;
252 m_outputPlanes =
numext::ceil((m_input_planes_eff - m_patch_planes_eff + 1.f) /
static_cast<float>(m_plane_strides));
253 m_outputRows =
numext::ceil((m_input_rows_eff - m_patch_rows_eff + 1.f) /
static_cast<float>(m_row_strides));
254 m_outputCols =
numext::ceil((m_input_cols_eff - m_patch_cols_eff + 1.f) /
static_cast<float>(m_col_strides));
255 m_planePaddingTop = 0;
257 m_colPaddingLeft = 0;
260 m_outputPlanes =
numext::ceil(m_input_planes_eff /
static_cast<float>(m_plane_strides));
261 m_outputRows =
numext::ceil(m_input_rows_eff /
static_cast<float>(m_row_strides));
262 m_outputCols =
numext::ceil(m_input_cols_eff /
static_cast<float>(m_col_strides));
263 const Index dz = (m_outputPlanes - 1) * m_plane_strides + m_patch_planes_eff - m_input_planes_eff;
264 const Index dy = (m_outputRows - 1) * m_row_strides + m_patch_rows_eff - m_input_rows_eff;
265 const Index dx = (m_outputCols - 1) * m_col_strides + m_patch_cols_eff - m_input_cols_eff;
266 m_planePaddingTop = dz / 2;
267 m_rowPaddingTop = dy / 2;
268 m_colPaddingLeft = dx / 2;
288 m_dimensions[0] = input_dims[0];
292 m_dimensions[4] = m_outputPlanes * m_outputRows * m_outputCols;
293 for (
int i = 5;
i < NumDims; ++
i) {
294 m_dimensions[
i] = input_dims[
i-1];
304 m_dimensions[NumDims-1] = input_dims[NumInputDims-1];
308 m_dimensions[NumDims-5] = m_outputPlanes * m_outputRows * m_outputCols;
309 for (
int i = NumDims-6;
i >= 0; --
i) {
310 m_dimensions[
i] = input_dims[
i];
316 m_rowStride = m_dimensions[1];
317 m_colStride = m_dimensions[2] * m_rowStride;
318 m_patchStride = m_colStride * m_dimensions[3] * m_dimensions[0];
319 m_otherStride = m_patchStride * m_dimensions[4];
321 m_rowStride = m_dimensions[NumDims-2];
322 m_colStride = m_dimensions[NumDims-3] * m_rowStride;
323 m_patchStride = m_colStride * m_dimensions[NumDims-4] * m_dimensions[NumDims-1];
324 m_otherStride = m_patchStride * m_dimensions[NumDims-5];
328 m_planeInputStride = m_inputDepth;
329 m_rowInputStride = m_inputDepth * m_inputPlanes;
330 m_colInputStride = m_inputDepth * m_inputRows * m_inputPlanes;
331 m_otherInputStride = m_inputDepth * m_inputRows * m_inputCols * m_inputPlanes;
333 m_outputPlanesRows = m_outputPlanes * m_outputRows;
336 m_fastOtherStride = internal::TensorIntDivisor<Index>(m_otherStride);
338 m_fastPatchStride = internal::TensorIntDivisor<Index>(m_patchStride);
339 m_fastColStride = internal::TensorIntDivisor<Index>(m_colStride);
340 m_fastRowStride = internal::TensorIntDivisor<Index>(m_rowStride);
341 m_fastInputRowStride = internal::TensorIntDivisor<Index>(m_row_inflate_strides);
342 m_fastInputColStride = internal::TensorIntDivisor<Index>(m_col_inflate_strides);
343 m_fastInputPlaneStride = internal::TensorIntDivisor<Index>(m_plane_inflate_strides);
344 m_fastInputColsEff = internal::TensorIntDivisor<Index>(m_input_cols_eff);
345 m_fastOutputPlanes = internal::TensorIntDivisor<Index>(m_outputPlanes);
346 m_fastOutputPlanesRows = internal::TensorIntDivisor<Index>(m_outputPlanesRows);
349 m_fastOutputDepth = internal::TensorIntDivisor<Index>(m_dimensions[0]);
351 m_fastOutputDepth = internal::TensorIntDivisor<Index>(m_dimensions[NumDims-1]);
358 m_impl.evalSubExprsIfNeeded(NULL);
369 const Index patchIndex = index / m_fastPatchStride;
373 const Index patchOffset = (index - patchIndex * m_patchStride) / m_fastOutputDepth;
376 const Index otherIndex = (NumDims == 5) ? 0 : index / m_fastOtherStride;
377 const Index patch3DIndex = (NumDims == 5) ? patchIndex : (index - otherIndex * m_otherStride) / m_fastPatchStride;
380 const Index colIndex = patch3DIndex / m_fastOutputPlanesRows;
381 const Index colOffset = patchOffset / m_fastColStride;
382 const Index inputCol = colIndex * m_col_strides + colOffset * m_in_col_strides - m_colPaddingLeft;
383 const Index origInputCol = (m_col_inflate_strides == 1) ? inputCol : ((inputCol >= 0) ? (inputCol / m_fastInputColStride) : 0);
384 if (inputCol < 0 || inputCol >= m_input_cols_eff ||
385 ((m_col_inflate_strides != 1) && (inputCol != origInputCol * m_col_inflate_strides))) {
386 return Scalar(m_paddingValue);
390 const Index rowIndex = (patch3DIndex - colIndex * m_outputPlanesRows) / m_fastOutputPlanes;
391 const Index rowOffset = (patchOffset - colOffset * m_colStride) / m_fastRowStride;
392 const Index inputRow = rowIndex * m_row_strides + rowOffset * m_in_row_strides - m_rowPaddingTop;
393 const Index origInputRow = (m_row_inflate_strides == 1) ? inputRow : ((inputRow >= 0) ? (inputRow / m_fastInputRowStride) : 0);
394 if (inputRow < 0 || inputRow >= m_input_rows_eff ||
395 ((m_row_inflate_strides != 1) && (inputRow != origInputRow * m_row_inflate_strides))) {
396 return Scalar(m_paddingValue);
400 const Index planeIndex = (patch3DIndex - m_outputPlanes * (colIndex * m_outputRows + rowIndex));
401 const Index planeOffset = patchOffset - colOffset * m_colStride - rowOffset * m_rowStride;
402 const Index inputPlane = planeIndex * m_plane_strides + planeOffset * m_in_plane_strides - m_planePaddingTop;
403 const Index origInputPlane = (m_plane_inflate_strides == 1) ? inputPlane : ((inputPlane >= 0) ? (inputPlane / m_fastInputPlaneStride) : 0);
404 if (inputPlane < 0 || inputPlane >= m_input_planes_eff ||
405 ((m_plane_inflate_strides != 1) && (inputPlane != origInputPlane * m_plane_inflate_strides))) {
406 return Scalar(m_paddingValue);
409 const int depth_index =
static_cast<int>(
Layout) ==
static_cast<int>(
ColMajor) ? 0 : NumDims - 1;
410 const Index depth = index - (index / m_fastOutputDepth) * m_dimensions[depth_index];
412 const Index inputIndex = depth +
413 origInputRow * m_rowInputStride +
414 origInputCol * m_colInputStride +
415 origInputPlane * m_planeInputStride +
416 otherIndex * m_otherInputStride;
418 return m_impl.coeff(inputIndex);
421 template<
int LoadMode>
426 if (m_in_row_strides != 1 || m_in_col_strides != 1 || m_row_inflate_strides != 1 || m_col_inflate_strides != 1 ||
427 m_in_plane_strides != 1 || m_plane_inflate_strides != 1) {
428 return packetWithPossibleZero(index);
432 const Index patchIndex = indices[0] / m_fastPatchStride;
433 if (patchIndex != indices[1] / m_fastPatchStride) {
434 return packetWithPossibleZero(index);
436 const Index otherIndex = (NumDims == 5) ? 0 : indices[0] / m_fastOtherStride;
437 eigen_assert(otherIndex == indices[1] / m_fastOtherStride);
440 const Index patchOffsets[2] = {(indices[0] - patchIndex * m_patchStride) / m_fastOutputDepth,
441 (indices[1] - patchIndex * m_patchStride) / m_fastOutputDepth};
443 const Index patch3DIndex = (NumDims == 5) ? patchIndex : (indices[0] - otherIndex * m_otherStride) / m_fastPatchStride;
444 eigen_assert(patch3DIndex == (indices[1] - otherIndex * m_otherStride) / m_fastPatchStride);
446 const Index colIndex = patch3DIndex / m_fastOutputPlanesRows;
447 const Index colOffsets[2] = {
448 patchOffsets[0] / m_fastColStride,
449 patchOffsets[1] / m_fastColStride};
452 const Index inputCols[2] = {
453 colIndex * m_col_strides + colOffsets[0] - m_colPaddingLeft,
454 colIndex * m_col_strides + colOffsets[1] - m_colPaddingLeft};
455 if (inputCols[1] < 0 || inputCols[0] >= m_inputCols) {
456 return internal::pset1<PacketReturnType>(
Scalar(m_paddingValue));
459 if (inputCols[0] != inputCols[1]) {
460 return packetWithPossibleZero(index);
463 const Index rowIndex = (patch3DIndex - colIndex * m_outputPlanesRows) / m_fastOutputPlanes;
464 const Index rowOffsets[2] = {
465 (patchOffsets[0] - colOffsets[0] * m_colStride) / m_fastRowStride,
466 (patchOffsets[1] - colOffsets[1] * m_colStride) / m_fastRowStride};
469 const Index inputRows[2] = {
470 rowIndex * m_row_strides + rowOffsets[0] - m_rowPaddingTop,
471 rowIndex * m_row_strides + rowOffsets[1] - m_rowPaddingTop};
473 if (inputRows[1] < 0 || inputRows[0] >= m_inputRows) {
474 return internal::pset1<PacketReturnType>(
Scalar(m_paddingValue));
477 if (inputRows[0] != inputRows[1]) {
478 return packetWithPossibleZero(index);
481 const Index planeIndex = (patch3DIndex - m_outputPlanes * (colIndex * m_outputRows + rowIndex));
482 const Index planeOffsets[2] = {
483 patchOffsets[0] - colOffsets[0] * m_colStride - rowOffsets[0] * m_rowStride,
484 patchOffsets[1] - colOffsets[1] * m_colStride - rowOffsets[1] * m_rowStride};
486 const Index inputPlanes[2] = {
487 planeIndex * m_plane_strides + planeOffsets[0] - m_planePaddingTop,
488 planeIndex * m_plane_strides + planeOffsets[1] - m_planePaddingTop};
490 if (inputPlanes[1] < 0 || inputPlanes[0] >= m_inputPlanes) {
491 return internal::pset1<PacketReturnType>(
Scalar(m_paddingValue));
494 if (inputPlanes[0] >= 0 && inputPlanes[1] < m_inputPlanes) {
496 const int depth_index =
static_cast<int>(
Layout) ==
static_cast<int>(
ColMajor) ? 0 : NumDims - 1;
497 const Index depth = index - (index / m_fastOutputDepth) * m_dimensions[depth_index];
498 const Index inputIndex = depth +
499 inputRows[0] * m_rowInputStride +
500 inputCols[0] * m_colInputStride +
501 m_planeInputStride * inputPlanes[0] +
502 otherIndex * m_otherInputStride;
503 return m_impl.template packet<Unaligned>(inputIndex);
506 return packetWithPossibleZero(index);
511 const double compute_cost =
512 10 * TensorOpCost::DivCost<Index>() + 21 * TensorOpCost::MulCost<Index>() +
513 8 * TensorOpCost::AddCost<Index>();
#define EIGEN_UNROLL_LOOP
#define EIGEN_DEVICE_FUNC
#define EIGEN_STATIC_ASSERT(X, MSG)
DenseIndex padding_right() const
const DenseIndex m_plane_strides
XprType::CoeffReturnType CoeffReturnType
DenseIndex padding_bottom_z() const
const DenseIndex m_patch_planes
const DenseIndex m_row_inflate_strides
const DenseIndex m_patch_rows
DenseIndex in_plane_strides() const
DenseIndex patch_rows() const
DenseIndex patch_cols() const
const DenseIndex m_row_strides
DenseIndex col_strides() const
Eigen::internal::traits< TensorVolumePatchOp >::Index Index
DenseIndex plane_strides() const
PaddingType padding_type() const
const DenseIndex m_padding_bottom_z
TensorVolumePatchOp(const XprType &expr, DenseIndex patch_planes, DenseIndex patch_rows, DenseIndex patch_cols, DenseIndex plane_strides, DenseIndex row_strides, DenseIndex col_strides, DenseIndex in_plane_strides, DenseIndex in_row_strides, DenseIndex in_col_strides, DenseIndex plane_inflate_strides, DenseIndex row_inflate_strides, DenseIndex col_inflate_strides, PaddingType padding_type, Scalar padding_value)
TensorVolumePatchOp(const XprType &expr, DenseIndex patch_planes, DenseIndex patch_rows, DenseIndex patch_cols, DenseIndex plane_strides, DenseIndex row_strides, DenseIndex col_strides, DenseIndex in_plane_strides, DenseIndex in_row_strides, DenseIndex in_col_strides, DenseIndex plane_inflate_strides, DenseIndex row_inflate_strides, DenseIndex col_inflate_strides, DenseIndex padding_top_z, DenseIndex padding_bottom_z, DenseIndex padding_top, DenseIndex padding_bottom, DenseIndex padding_left, DenseIndex padding_right, Scalar padding_value)
DenseIndex in_row_strides() const
const DenseIndex m_plane_inflate_strides
const DenseIndex m_in_row_strides
const DenseIndex m_col_strides
DenseIndex in_col_strides() const
const internal::remove_all_t< typename XprType::Nested > & expression() const
Eigen::internal::nested< TensorVolumePatchOp >::type Nested
const PaddingType m_padding_type
const DenseIndex m_padding_right
DenseIndex patch_planes() const
Scalar padding_value() const
const DenseIndex m_padding_bottom
Eigen::NumTraits< Scalar >::Real RealScalar
DenseIndex padding_bottom() const
const DenseIndex m_padding_left
const DenseIndex m_padding_top
const Scalar m_padding_value
DenseIndex padding_top() const
bool padding_explicit() const
DenseIndex padding_left() const
Eigen::internal::traits< TensorVolumePatchOp >::Scalar Scalar
DenseIndex row_inflate_strides() const
Eigen::internal::traits< TensorVolumePatchOp >::StorageKind StorageKind
const DenseIndex m_col_inflate_strides
const DenseIndex m_patch_cols
DenseIndex plane_inflate_strides() const
const DenseIndex m_in_col_strides
DenseIndex padding_top_z() const
DenseIndex col_inflate_strides() const
const DenseIndex m_padding_top_z
const DenseIndex m_in_plane_strides
DenseIndex row_strides() const
const bool m_padding_explicit
typename remove_all< T >::type remove_all_t
Scalar() ceil(const Scalar &x)
: TensorContractionSycl.h, provides various tensor contraction kernel for SYCL backend
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
EIGEN_DEFAULT_DENSE_INDEX_TYPE DenseIndex
internal::packet_traits< Scalar >::type type
XprType::CoeffReturnType CoeffReturnType
internal::TensorIntDivisor< Index > m_fastPatchStride
internal::TensorIntDivisor< Index > m_fastOutputDepth
PacketReturnType packetWithPossibleZero(Index index) const
internal::TensorIntDivisor< Index > m_fastInputRowStride
Index userPlaneStride() const
internal::TensorIntDivisor< Index > m_fastInputColStride
Index colPaddingLeft() const
Index planePaddingTop() const
TensorEvaluator(const XprType &op, const Device &device)
Index m_plane_inflate_strides
internal::TensorIntDivisor< Index > m_fastInputPlaneStride
const TensorEvaluator< ArgType, Device > & impl() const
Index colInflateStride() const
std::remove_const_t< typename XprType::Scalar > Scalar
Index rowPaddingTop() const
Index userInPlaneStride() const
DSizes< Index, NumDims > Dimensions
EvaluatorPointerType data() const
const Dimensions & dimensions() const
TensorEvaluator< ArgType, Device > m_impl
Index m_row_inflate_strides
PacketType< CoeffReturnType, Device >::type PacketReturnType
Index rowInflateStride() const
Index userInColStride() const
internal::TensorIntDivisor< Index > m_fastRowStride
StorageMemory< CoeffReturnType, Device > Storage
Index userColStride() const
internal::TensorIntDivisor< Index > m_fastOutputPlanes
bool evalSubExprsIfNeeded(EvaluatorPointerType)
Storage::Type EvaluatorPointerType
internal::TensorIntDivisor< Index > m_fastOtherStride
internal::TensorIntDivisor< Index > m_fastOutputPlanesRows
Index userRowStride() const
Index m_col_inflate_strides
internal::TensorIntDivisor< Index > m_fastInputColsEff
Index outputPlanes() const
TensorOpCost costPerCoeff(bool vectorized) const
internal::TensorIntDivisor< Index > m_fastColStride
PacketReturnType packet(Index index) const
CoeffReturnType coeff(Index index) const
internal::TensorBlockNotImplemented TensorBlock
Index planeInflateStride() const
TensorVolumePatchOp< Planes, Rows, Cols, ArgType > XprType
Index userInRowStride() const
A cost model used to limit the number of threads used for evaluating tensor expression.
const Dimensions & dimensions() const
static constexpr int Layout
CoeffReturnType coeff(Index index) const
static constexpr int PacketSize