10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H
11 #define EIGEN_CXX11_TENSOR_TENSOR_CHIPPING_H
26 template<DenseIndex DimId,
typename XprType>
27 struct traits<TensorChippingOp<DimId, XprType> > :
public traits<XprType>
29 typedef typename XprType::Scalar Scalar;
31 typedef typename XprTraits::StorageKind StorageKind;
32 typedef typename XprTraits::Index
Index;
33 typedef typename XprType::Nested Nested;
34 typedef std::remove_reference_t<Nested> Nested_;
35 static constexpr
int NumDimensions = XprTraits::NumDimensions - 1;
36 static constexpr
int Layout = XprTraits::Layout;
37 typedef typename XprTraits::PointerType PointerType;
40 template<DenseIndex DimId,
typename XprType>
41 struct eval<TensorChippingOp<DimId, XprType>,
Eigen::Dense>
46 template<DenseIndex DimId,
typename XprType>
47 struct nested<TensorChippingOp<DimId, XprType>, 1, typename eval<TensorChippingOp<DimId, XprType> >::type>
49 typedef TensorChippingOp<DimId, XprType> type;
52 template <DenseIndex DimId>
81 template<DenseIndex DimId,
typename XprType>
86 typedef typename Eigen::internal::traits<TensorChippingOp>::Scalar
Scalar;
89 typedef typename Eigen::internal::nested<TensorChippingOp>::type
Nested;
90 typedef typename Eigen::internal::traits<TensorChippingOp>::StorageKind
StorageKind;
91 typedef typename Eigen::internal::traits<TensorChippingOp>::Index
Index;
111 const internal::DimensionId<DimId>
m_dim;
116 template<DenseIndex DimId,
typename ArgType,
typename Device>
120 static constexpr
int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
121 static constexpr
int NumDims = NumInputDims-1;
140 IsOuterChipping = (
Layout ==
ColMajor && DimId == NumInputDims - 1) ||
159 typedef internal::TensorBlockDescriptor<NumInputDims, Index>
164 typedef typename internal::TensorMaterializedBlock<
ScalarNoConst, NumDims,
170 : m_impl(op.expression(), device), m_dim(op.dim()),
m_device(device)
179 for (
int i = 0;
i < NumInputDims; ++
i) {
180 if (
i != m_dim.actualDim()) {
181 m_dimensions[
j] = input_dims[
i];
189 for (
int i = 0;
i < m_dim.actualDim(); ++
i) {
190 m_stride *= input_dims[
i];
191 m_inputStride *= input_dims[
i];
194 for (
int i = NumInputDims-1;
i > m_dim.actualDim(); --
i) {
195 m_stride *= input_dims[
i];
196 m_inputStride *= input_dims[
i];
199 m_inputStride *= input_dims[m_dim.actualDim()];
200 m_inputOffset = m_stride * op.
offset();
206 m_impl.evalSubExprsIfNeeded(NULL);
216 return m_impl.coeff(srcCoeff(index));
219 template<
int LoadMode>
224 if (isInnerChipping()) {
227 Index inputIndex = index * m_inputStride + m_inputOffset;
231 values[
i] = m_impl.coeff(inputIndex);
232 inputIndex += m_inputStride;
236 }
else if (isOuterChipping()) {
239 return m_impl.template packet<LoadMode>(index + m_inputOffset);
241 const Index idx = index / m_stride;
242 const Index rem = index - idx * m_stride;
244 Index inputIndex = idx * m_inputStride + m_inputOffset + rem;
245 return m_impl.template packet<LoadMode>(inputIndex);
264 m_dim.actualDim() == 0) ||
266 m_dim.actualDim() == NumInputDims - 1)) {
267 cost += TensorOpCost::MulCost<Index>() + TensorOpCost::AddCost<Index>();
268 }
else if ((
static_cast<int>(
Layout) ==
static_cast<int>(
ColMajor) &&
269 m_dim.actualDim() == NumInputDims - 1) ||
271 m_dim.actualDim() == 0)) {
272 cost += TensorOpCost::AddCost<Index>();
274 cost += 3 * TensorOpCost::MulCost<Index>() + TensorOpCost::DivCost<Index>() +
275 3 * TensorOpCost::AddCost<Index>();
278 return m_impl.costPerCoeff(vectorized) +
284 const size_t target_size =
m_device.lastLevelCacheSize();
285 return internal::TensorBlockResourceRequirements::merge(
286 internal::TensorBlockResourceRequirements::skewed<Scalar>(target_size),
287 m_impl.getResourceRequirements());
292 bool root_of_expr_ast =
false)
const {
293 const Index chip_dim = m_dim.actualDim();
296 for (
int i = 0;
i < NumInputDims; ++
i) {
298 =
i < chip_dim ? desc.dimension(
i)
299 :
i > chip_dim ? desc.dimension(
i - 1)
306 if (desc.HasDestinationBuffer()) {
308 for (
int i = 0;
i < NumInputDims; ++
i) {
309 arg_destination_strides[
i]
310 =
i < chip_dim ? desc.destination().strides()[
i]
311 :
i > chip_dim ? desc.destination().strides()[
i - 1]
315 arg_desc.template AddDestinationBuffer<Layout>(
316 desc.destination().template data<ScalarNoConst>(),
317 arg_destination_strides);
320 ArgTensorBlock arg_block = m_impl.block(arg_desc, scratch, root_of_expr_ast);
321 if (!arg_desc.HasDestinationBuffer()) desc.DropDestinationBuffer();
323 if (arg_block.data() != NULL) {
325 return TensorBlock(arg_block.kind(), arg_block.data(),
332 const typename TensorBlock::Storage block_storage =
333 TensorBlock::prepareStorage(desc, scratch);
335 typedef internal::TensorBlockAssignment<
337 TensorBlockAssignment;
339 TensorBlockAssignment::Run(
340 TensorBlockAssignment::target(
341 arg_desc.dimensions(),
342 internal::strides<Layout>(arg_desc.dimensions()),
343 block_storage.data()),
346 return block_storage.AsTensorMaterializedBlock();
352 if (isOuterChipping() && result) {
353 return result + m_inputOffset;
363 if (isInnerChipping()) {
366 inputIndex = index * m_inputStride + m_inputOffset;
367 }
else if (isOuterChipping()) {
371 inputIndex = index + m_inputOffset;
373 const Index idx = index / m_stride;
374 inputIndex = idx * m_inputStride + m_inputOffset;
375 index -= idx * m_stride;
382 return IsInnerChipping ||
383 (
static_cast<int>(
Layout) ==
ColMajor && m_dim.actualDim() == 0) ||
384 (
static_cast<int>(
Layout) ==
RowMajor && m_dim.actualDim() == NumInputDims - 1);
388 return IsOuterChipping ||
389 (
static_cast<int>(
Layout) ==
ColMajor && m_dim.actualDim() == NumInputDims-1) ||
398 const internal::DimensionId<DimId>
m_dim;
404 template<DenseIndex DimId,
typename ArgType,
typename Device>
406 :
public TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
410 static constexpr
int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
411 static constexpr
int NumDims = NumInputDims-1;
437 return this->m_impl.coeffRef(this->srcCoeff(index));
443 if (this->isInnerChipping()) {
447 internal::pstore<CoeffReturnType, PacketReturnType>(values,
x);
448 Index inputIndex = index * this->m_inputStride + this->m_inputOffset;
451 this->m_impl.coeffRef(inputIndex) = values[
i];
452 inputIndex += this->m_inputStride;
454 }
else if (this->isOuterChipping()) {
457 this->m_impl.template writePacket<StoreMode>(index + this->m_inputOffset,
x);
459 const Index idx = index / this->m_stride;
460 const Index rem = index - idx * this->m_stride;
461 if (rem + PacketSize <= this->m_stride) {
462 const Index inputIndex = idx * this->m_inputStride + this->m_inputOffset + rem;
463 this->m_impl.template writePacket<StoreMode>(inputIndex,
x);
467 internal::pstore<CoeffReturnType, PacketReturnType>(values,
x);
477 template <
typename TensorBlock>
482 const Index chip_dim = this->m_dim.actualDim();
485 for (
int i = 0;
i < NumInputDims; ++
i) {
486 input_block_dims[
i] =
i < chip_dim ? desc.dimension(
i)
487 :
i > chip_dim ? desc.dimension(
i - 1)
495 typedef internal::TensorBlockAssignment<
Scalar, NumInputDims,
496 TensorBlockExpr,
Index>
499 TensorBlockAssign::Run(
500 TensorBlockAssign::target(
502 internal::strides<Layout>(this->m_impl.dimensions()),
503 this->m_impl.data(), this->srcCoeff(desc.offset())),
504 block.expr().reshape(input_block_dims));
#define EIGEN_UNROLL_LOOP
#define EIGEN_UNUSED_VARIABLE(var)
#define EIGEN_DEVICE_FUNC
#define EIGEN_STATIC_ASSERT(X, MSG)
#define EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(Derived)
Eigen::internal::traits< TensorChippingOp >::Index Index
TensorBase< TensorChippingOp< DimId, XprType > > Base
Eigen::internal::traits< TensorChippingOp >::StorageKind StorageKind
Eigen::internal::nested< TensorChippingOp >::type Nested
Eigen::internal::traits< TensorChippingOp >::Scalar Scalar
const internal::DimensionId< DimId > m_dim
Eigen::NumTraits< Scalar >::Real RealScalar
const internal::remove_all_t< typename XprType::Nested > & expression() const
const Index offset() const
TensorChippingOp(const XprType &expr, const Index offset, const Index dim)
XprType::CoeffReturnType CoeffReturnType
A tensor expression mapping an existing array of data.
typename remove_all< T >::type remove_all_t
: TensorContractionSycl.h, provides various tensor contraction kernel for SYCL backend
T * constCast(const T *data)
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
EIGEN_DEFAULT_DENSE_INDEX_TYPE DenseIndex
internal::packet_traits< Scalar >::type type
DSizes< Index, NumDims > Dimensions
CoeffReturnType & coeffRef(Index index) const
TensorEvaluator< const TensorChippingOp< DimId, ArgType >, Device > Base
TensorEvaluator(const XprType &op, const Device &device)
PacketType< CoeffReturnType, Device >::type PacketReturnType
internal::TensorBlockDescriptor< NumDims, Index > TensorBlockDesc
XprType::CoeffReturnType CoeffReturnType
void writeBlock(const TensorBlockDesc &desc, const TensorBlock &block)
void writePacket(Index index, const PacketReturnType &x) const
TensorChippingOp< DimId, ArgType > XprType
Storage::Type EvaluatorPointerType
const Device EIGEN_DEVICE_REF m_device
bool isInnerChipping() const
TensorEvaluator< ArgType, Device > m_impl
bool isOuterChipping() const
TensorEvaluator< const ArgType, Device >::TensorBlock ArgTensorBlock
PacketReturnType packet(Index index) const
internal::TensorBlockDescriptor< NumDims, Index > TensorBlockDesc
CoeffReturnType coeff(Index index) const
TensorChippingOp< DimId, ArgType > XprType
StorageMemory< CoeffReturnType, Device > Storage
TensorOpCost costPerCoeff(bool vectorized) const
Index srcCoeff(Index index) const
internal::TensorBlockScratchAllocator< Device > TensorBlockScratch
internal::TensorBlockResourceRequirements getResourceRequirements() const
internal::TensorMaterializedBlock< ScalarNoConst, NumDims, Layout, Index > TensorBlock
TensorEvaluator(const XprType &op, const Device &device)
internal::TensorBlockDescriptor< NumInputDims, Index > ArgTensorBlockDesc
const Dimensions & dimensions() const
XprType::CoeffReturnType CoeffReturnType
DSizes< Index, NumDims > Dimensions
TensorBlock block(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool root_of_expr_ast=false) const
const internal::DimensionId< DimId > m_dim
bool evalSubExprsIfNeeded(EvaluatorPointerType)
Storage::Type data() const
std::remove_const_t< Scalar > ScalarNoConst
PacketType< CoeffReturnType, Device >::type PacketReturnType
A cost model used to limit the number of threads used for evaluating tensor expression.
const Dimensions & dimensions() const
static constexpr int Layout
const Device EIGEN_DEVICE_REF m_device
TensorBlock block(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool=false) const
CoeffReturnType coeff(Index index) const
CoeffReturnType & coeffRef(Index index) const
static constexpr int PacketSize
internal::TensorMaterializedBlock< ScalarNoConst, NumCoords, Layout, Index > TensorBlock
std::remove_const_t< Scalar > ScalarNoConst