10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_PATCH_H
11 #define EIGEN_CXX11_TENSOR_TENSOR_PATCH_H
25 template<
typename PatchDim,
typename XprType>
26 struct traits<TensorPatchOp<PatchDim, XprType> > :
public traits<XprType>
28 typedef typename XprType::Scalar Scalar;
30 typedef typename XprTraits::StorageKind StorageKind;
31 typedef typename XprTraits::Index
Index;
32 typedef typename XprType::Nested Nested;
33 typedef std::remove_reference_t<Nested> Nested_;
34 static constexpr
int NumDimensions = XprTraits::NumDimensions + 1;
35 static constexpr
int Layout = XprTraits::Layout;
36 typedef typename XprTraits::PointerType PointerType;
39 template<
typename PatchDim,
typename XprType>
40 struct eval<TensorPatchOp<PatchDim, XprType>,
Eigen::Dense>
42 typedef const TensorPatchOp<PatchDim, XprType>& type;
45 template<
typename PatchDim,
typename XprType>
46 struct nested<TensorPatchOp<PatchDim, XprType>, 1, typename eval<TensorPatchOp<PatchDim, XprType> >::type>
48 typedef TensorPatchOp<PatchDim, XprType> type;
55 template<
typename PatchDim,
typename XprType>
59 typedef typename Eigen::internal::traits<TensorPatchOp>::Scalar
Scalar;
62 typedef typename Eigen::internal::nested<TensorPatchOp>::type
Nested;
63 typedef typename Eigen::internal::traits<TensorPatchOp>::StorageKind
StorageKind;
64 typedef typename Eigen::internal::traits<TensorPatchOp>::Index
Index;
83 template<
typename PatchDim,
typename ArgType,
typename Device>
88 static constexpr
int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value + 1;
112 : m_impl(op.expression(), device)
114 Index num_patches = 1;
118 for (
int i = 0;
i < NumDims-1; ++
i) {
119 m_dimensions[
i] = patch_dims[
i];
120 num_patches *= (input_dims[
i] - patch_dims[
i] + 1);
122 m_dimensions[NumDims-1] = num_patches;
124 m_inputStrides[0] = 1;
125 m_patchStrides[0] = 1;
126 for (
int i = 1;
i < NumDims-1; ++
i) {
127 m_inputStrides[
i] = m_inputStrides[
i-1] * input_dims[
i-1];
128 m_patchStrides[
i] = m_patchStrides[
i-1] * (input_dims[
i-1] - patch_dims[
i-1] + 1);
130 m_outputStrides[0] = 1;
131 for (
int i = 1;
i < NumDims; ++
i) {
132 m_outputStrides[
i] = m_outputStrides[
i-1] * m_dimensions[
i-1];
135 for (
int i = 0;
i < NumDims-1; ++
i) {
136 m_dimensions[
i+1] = patch_dims[
i];
137 num_patches *= (input_dims[
i] - patch_dims[
i] + 1);
139 m_dimensions[0] = num_patches;
141 m_inputStrides[NumDims-2] = 1;
142 m_patchStrides[NumDims-2] = 1;
143 for (
int i = NumDims-3;
i >= 0; --
i) {
144 m_inputStrides[
i] = m_inputStrides[
i+1] * input_dims[
i+1];
145 m_patchStrides[
i] = m_patchStrides[
i+1] * (input_dims[
i+1] - patch_dims[
i+1] + 1);
147 m_outputStrides[NumDims-1] = 1;
148 for (
int i = NumDims-2;
i >= 0; --
i) {
149 m_outputStrides[
i] = m_outputStrides[
i+1] * m_dimensions[
i+1];
157 m_impl.evalSubExprsIfNeeded(NULL);
167 Index output_stride_index = (
static_cast<int>(
Layout) ==
static_cast<int>(
ColMajor)) ? NumDims - 1 : 0;
169 Index patchIndex = index / m_outputStrides[output_stride_index];
171 Index patchOffset = index - patchIndex * m_outputStrides[output_stride_index];
172 Index inputIndex = 0;
175 for (
int i = NumDims - 2;
i > 0; --
i) {
176 const Index patchIdx = patchIndex / m_patchStrides[
i];
177 patchIndex -= patchIdx * m_patchStrides[
i];
178 const Index offsetIdx = patchOffset / m_outputStrides[
i];
179 patchOffset -= offsetIdx * m_outputStrides[
i];
180 inputIndex += (patchIdx + offsetIdx) * m_inputStrides[
i];
184 for (
int i = 0;
i < NumDims - 2; ++
i) {
185 const Index patchIdx = patchIndex / m_patchStrides[
i];
186 patchIndex -= patchIdx * m_patchStrides[
i];
187 const Index offsetIdx = patchOffset / m_outputStrides[
i+1];
188 patchOffset -= offsetIdx * m_outputStrides[
i+1];
189 inputIndex += (patchIdx + offsetIdx) * m_inputStrides[
i];
192 inputIndex += (patchIndex + patchOffset);
193 return m_impl.coeff(inputIndex);
196 template<
int LoadMode>
201 Index output_stride_index = (
static_cast<int>(
Layout) ==
static_cast<int>(
ColMajor)) ? NumDims - 1 : 0;
203 Index patchIndices[2] = {indices[0] / m_outputStrides[output_stride_index],
204 indices[1] / m_outputStrides[output_stride_index]};
205 Index patchOffsets[2] = {indices[0] - patchIndices[0] * m_outputStrides[output_stride_index],
206 indices[1] - patchIndices[1] * m_outputStrides[output_stride_index]};
208 Index inputIndices[2] = {0, 0};
211 for (
int i = NumDims - 2;
i > 0; --
i) {
212 const Index patchIdx[2] = {patchIndices[0] / m_patchStrides[
i],
213 patchIndices[1] / m_patchStrides[
i]};
214 patchIndices[0] -= patchIdx[0] * m_patchStrides[
i];
215 patchIndices[1] -= patchIdx[1] * m_patchStrides[
i];
217 const Index offsetIdx[2] = {patchOffsets[0] / m_outputStrides[
i],
218 patchOffsets[1] / m_outputStrides[
i]};
219 patchOffsets[0] -= offsetIdx[0] * m_outputStrides[
i];
220 patchOffsets[1] -= offsetIdx[1] * m_outputStrides[
i];
222 inputIndices[0] += (patchIdx[0] + offsetIdx[0]) * m_inputStrides[
i];
223 inputIndices[1] += (patchIdx[1] + offsetIdx[1]) * m_inputStrides[
i];
227 for (
int i = 0;
i < NumDims - 2; ++
i) {
228 const Index patchIdx[2] = {patchIndices[0] / m_patchStrides[
i],
229 patchIndices[1] / m_patchStrides[
i]};
230 patchIndices[0] -= patchIdx[0] * m_patchStrides[
i];
231 patchIndices[1] -= patchIdx[1] * m_patchStrides[
i];
233 const Index offsetIdx[2] = {patchOffsets[0] / m_outputStrides[
i+1],
234 patchOffsets[1] / m_outputStrides[
i+1]};
235 patchOffsets[0] -= offsetIdx[0] * m_outputStrides[
i+1];
236 patchOffsets[1] -= offsetIdx[1] * m_outputStrides[
i+1];
238 inputIndices[0] += (patchIdx[0] + offsetIdx[0]) * m_inputStrides[
i];
239 inputIndices[1] += (patchIdx[1] + offsetIdx[1]) * m_inputStrides[
i];
242 inputIndices[0] += (patchIndices[0] + patchOffsets[0]);
243 inputIndices[1] += (patchIndices[1] + patchOffsets[1]);
245 if (inputIndices[1] - inputIndices[0] ==
PacketSize - 1) {
251 values[0] = m_impl.coeff(inputIndices[0]);
252 values[
PacketSize-1] = m_impl.coeff(inputIndices[1]);
263 const double compute_cost = NumDims * (TensorOpCost::DivCost<Index>() +
264 TensorOpCost::MulCost<Index>() +
265 2 * TensorOpCost::AddCost<Index>());
266 return m_impl.costPerCoeff(vectorized) +
#define EIGEN_UNROLL_LOOP
#define EIGEN_DEVICE_FUNC
Eigen::internal::traits< TensorPatchOp >::StorageKind StorageKind
const PatchDim & patch_dims() const
const PatchDim m_patch_dims
Eigen::internal::traits< TensorPatchOp >::Index Index
Eigen::internal::traits< TensorPatchOp >::Scalar Scalar
Eigen::NumTraits< Scalar >::Real RealScalar
XprType::CoeffReturnType CoeffReturnType
Eigen::internal::nested< TensorPatchOp >::type Nested
const internal::remove_all_t< typename XprType::Nested > & expression() const
TensorPatchOp(const XprType &expr, const PatchDim &patch_dims)
typename remove_all< T >::type remove_all_t
: TensorContractionSycl.h, provides various tensor contraction kernel for SYCL backend
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
internal::packet_traits< Scalar >::type type
TensorOpCost costPerCoeff(bool vectorized) const
TensorEvaluator(const XprType &op, const Device &device)
DSizes< Index, NumDims > Dimensions
bool evalSubExprsIfNeeded(EvaluatorPointerType)
TensorEvaluator< ArgType, Device > m_impl
TensorPatchOp< PatchDim, ArgType > XprType
array< Index, NumDims > m_outputStrides
PacketType< CoeffReturnType, Device >::type PacketReturnType
const Dimensions & dimensions() const
XprType::CoeffReturnType CoeffReturnType
StorageMemory< CoeffReturnType, Device > Storage
PacketReturnType packet(Index index) const
CoeffReturnType coeff(Index index) const
array< Index, NumDims-1 > m_inputStrides
EvaluatorPointerType data() const
array< Index, NumDims-1 > m_patchStrides
Storage::Type EvaluatorPointerType
internal::TensorBlockNotImplemented TensorBlock
A cost model used to limit the number of threads used for evaluating tensor expression.
const Dimensions & dimensions() const
static constexpr int Layout
CoeffReturnType coeff(Index index) const
static constexpr int PacketSize