10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_BROADCASTING_H
11 #define EIGEN_CXX11_TENSOR_TENSOR_BROADCASTING_H
25 template<
typename Broadcast,
typename XprType>
26 struct traits<TensorBroadcastingOp<Broadcast, XprType> > :
public traits<XprType>
28 typedef typename XprType::Scalar Scalar;
30 typedef typename XprTraits::StorageKind StorageKind;
31 typedef typename XprTraits::Index
Index;
32 typedef typename XprType::Nested Nested;
33 typedef std::remove_reference_t<Nested> Nested_;
34 static constexpr
int NumDimensions = XprTraits::NumDimensions;
35 static constexpr
int Layout = XprTraits::Layout;
36 typedef typename XprTraits::PointerType PointerType;
39 template<
typename Broadcast,
typename XprType>
40 struct eval<TensorBroadcastingOp<Broadcast, XprType>,
Eigen::Dense>
42 typedef const TensorBroadcastingOp<Broadcast, XprType>
EIGEN_DEVICE_REF type;
45 template<
typename Broadcast,
typename XprType>
46 struct nested<TensorBroadcastingOp<Broadcast, XprType>, 1, typename eval<TensorBroadcastingOp<Broadcast, XprType> >::type>
48 typedef TensorBroadcastingOp<Broadcast, XprType> type;
51 template <
typename Dims>
52 struct is_input_scalar {
53 static const bool value =
false;
56 struct is_input_scalar<Sizes<> > {
57 static const bool value =
true;
59 #ifndef EIGEN_EMULATE_CXX11_META_H
60 template <
typename std::ptrdiff_t... Indices>
61 struct is_input_scalar<Sizes<Indices...> > {
62 static const bool value = (Sizes<Indices...>::total_size == 1);
70 template<
typename Broadcast,
typename XprType>
74 typedef typename Eigen::internal::traits<TensorBroadcastingOp>::Scalar
Scalar;
77 typedef typename Eigen::internal::nested<TensorBroadcastingOp>::type
Nested;
78 typedef typename Eigen::internal::traits<TensorBroadcastingOp>::StorageKind
StorageKind;
79 typedef typename Eigen::internal::traits<TensorBroadcastingOp>::Index
Index;
98 template<
typename Broadcast,
typename ArgType,
typename Device>
103 static constexpr
int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
120 PreferBlockAccess =
true,
138 typedef typename internal::TensorMaterializedBlock<
ScalarNoConst, NumDims,
144 : isCopy(false), nByOne(false), oneByN(false),
145 m_device(device), m_broadcast(op.broadcast()), m_impl(op.expression(), device)
154 for (
int i = 0;
i < NumDims; ++
i) {
156 m_dimensions[
i] = input_dims[
i] * m_broadcast[
i];
157 if (m_broadcast[
i] != 1) {
163 m_inputStrides[0] = 1;
164 m_outputStrides[0] = 1;
165 for (
int i = 1;
i < NumDims; ++
i) {
166 m_inputStrides[
i] = m_inputStrides[
i-1] * input_dims[
i-1];
167 m_outputStrides[
i] = m_outputStrides[
i-1] * m_dimensions[
i-1];
170 m_inputStrides[NumDims-1] = 1;
171 m_outputStrides[NumDims-1] = 1;
172 for (
int i = NumDims-2;
i >= 0; --
i) {
173 m_inputStrides[
i] = m_inputStrides[
i+1] * input_dims[
i+1];
174 m_outputStrides[
i] = m_outputStrides[
i+1] * m_dimensions[
i+1];
178 if (input_dims[0] == 1) {
180 for (
int i = 1;
i < NumDims; ++
i) {
181 if (m_broadcast[
i] != 1) {
186 }
else if (input_dims[NumDims-1] == 1) {
188 for (
int i = 0;
i < NumDims-1; ++
i) {
189 if (m_broadcast[
i] != 1) {
198 if (!oneByN && !nByOne) {
199 if (input_dims[0] == 1 && input_dims[NumDims-1] == 1 && NumDims > 2) {
202 for (
int i = 1;
i < NumDims-1; ++
i) {
203 if (m_broadcast[
i] != 1) {
216 m_impl.evalSubExprsIfNeeded(NULL);
220 #ifdef EIGEN_USE_THREADS
221 template <
typename EvalSubExprsCallback>
222 EIGEN_STRONG_INLINE
void evalSubExprsIfNeededAsync(
224 m_impl.evalSubExprsIfNeededAsync(
nullptr, [done](
bool) { done(
true); });
235 return m_impl.coeff(0);
240 return m_impl.coeff(index);
242 return coeffColMajor(index);
246 return m_impl.coeff(index);
248 return coeffRowMajor(index);
255 Index inputIndex = 0;
257 for (
int i = NumDims - 1;
i > 0; --
i) {
258 const Index idx = index / m_outputStrides[
i];
259 if (internal::index_statically_eq<Broadcast>(
i, 1)) {
261 inputIndex += idx * m_inputStrides[
i];
263 if (internal::index_statically_eq<InputDimensions>(
i, 1)) {
266 inputIndex += (idx % m_impl.dimensions()[
i]) * m_inputStrides[
i];
269 index -= idx * m_outputStrides[
i];
271 if (internal::index_statically_eq<Broadcast>(0, 1)) {
275 if (internal::index_statically_eq<InputDimensions>(0, 1)) {
278 inputIndex += (index % m_impl.dimensions()[0]);
286 return m_impl.coeff(indexColMajor(index));
290 Index inputIndex = 0;
292 for (
int i = 0;
i < NumDims - 1; ++
i) {
293 const Index idx = index / m_outputStrides[
i];
294 if (internal::index_statically_eq<Broadcast>(
i, 1)) {
296 inputIndex += idx * m_inputStrides[
i];
298 if (internal::index_statically_eq<InputDimensions>(
i, 1)) {
301 inputIndex += (idx % m_impl.dimensions()[
i]) * m_inputStrides[
i];
304 index -= idx * m_outputStrides[
i];
306 if (internal::index_statically_eq<Broadcast>(NumDims - 1, 1)) {
310 if (internal::index_statically_eq<InputDimensions>(NumDims - 1, 1)) {
311 eigen_assert(index % m_impl.dimensions()[NumDims - 1] == 0);
313 inputIndex += (index % m_impl.dimensions()[NumDims - 1]);
321 return m_impl.coeff(indexRowMajor(index));
324 template<
int LoadMode>
328 return internal::pset1<PacketReturnType>(m_impl.coeff(0));
333 #ifdef EIGEN_GPU_COMPILE_PHASE
336 return m_impl.template packet<Unaligned>(index);
338 return m_impl.template packet<LoadMode>(index);
340 }
else if (oneByN && !nByOne) {
341 return packetNByOne<LoadMode>(index);
342 }
else if (!oneByN && nByOne) {
343 return packetOneByN<LoadMode>(index);
344 }
else if (oneByN && nByOne) {
345 return packetOneByNByOne<LoadMode>(index);
347 return packetColMajor<LoadMode>(index);
351 #ifdef EIGEN_GPU_COMPILE_PHASE
353 return m_impl.template packet<Unaligned>(index);
355 return m_impl.template packet<LoadMode>(index);
357 }
else if (oneByN && !nByOne) {
358 return packetOneByN<LoadMode>(index);
359 }
else if (!oneByN && nByOne) {
360 return packetNByOne<LoadMode>(index);
361 }
else if (oneByN && nByOne) {
362 return packetOneByNByOne<LoadMode>(index);
364 return packetRowMajor<LoadMode>(index);
369 template<
int LoadMode>
376 Index startDim, endDim;
377 Index inputIndex, outputOffset, batchedIndex;
380 startDim = NumDims - 1;
384 endDim = NumDims - 2;
387 batchedIndex = index % m_outputStrides[startDim];
388 inputIndex = batchedIndex / m_outputStrides[endDim];
389 outputOffset = batchedIndex % m_outputStrides[endDim];
391 if (outputOffset +
PacketSize <= m_outputStrides[endDim]) {
392 values[0] = m_impl.coeff(inputIndex);
393 return internal::pload1<PacketReturnType>(values);
397 if (outputOffset + cur < m_outputStrides[endDim]) {
398 values[
i] = m_impl.coeff(inputIndex);
401 inputIndex = (inputIndex == m_inputStrides[startDim] ? 0 : inputIndex);
402 values[
i] = m_impl.coeff(inputIndex);
407 return internal::pload<PacketReturnType>(values);
411 template<
int LoadMode>
422 m_inputStrides[NumDims - 1] : m_inputStrides[0];
423 Index inputIndex = index %
M;
425 return m_impl.template packet<Unaligned>(inputIndex);
430 if (inputIndex >
M - 1) {
433 values[
i] = m_impl.coeff(inputIndex++);
435 return internal::pload<PacketReturnType>(values);
439 template<
int LoadMode>
449 m_broadcast[0] : m_broadcast[NumDims - 1];
451 Index inputIndex = index /
M;
452 Index outputOffset = index %
M;
454 return internal::pset1<PacketReturnType>(m_impl.coeff(inputIndex));
459 if (outputOffset <
M) {
460 values[
i] = m_impl.coeff(inputIndex);
463 values[
i] = m_impl.coeff(++inputIndex);
467 return internal::pload<PacketReturnType>(values);
473 template<
int LoadMode>
478 const Index originalIndex = index;
480 Index inputIndex = 0;
482 for (
int i = NumDims - 1;
i > 0; --
i) {
483 const Index idx = index / m_outputStrides[
i];
484 if (internal::index_statically_eq<Broadcast>(
i, 1)) {
486 inputIndex += idx * m_inputStrides[
i];
488 if (internal::index_statically_eq<InputDimensions>(
i, 1)) {
491 inputIndex += (idx % m_impl.dimensions()[
i]) * m_inputStrides[
i];
494 index -= idx * m_outputStrides[
i];
497 if (internal::index_statically_eq<Broadcast>(0, 1)) {
499 innermostLoc = index;
501 if (internal::index_statically_eq<InputDimensions>(0, 1)) {
505 innermostLoc = index % m_impl.dimensions()[0];
508 inputIndex += innermostLoc;
512 if (innermostLoc +
PacketSize <= m_impl.dimensions()[0]) {
513 return m_impl.template packet<Unaligned>(inputIndex);
516 values[0] = m_impl.coeff(inputIndex);
519 if (innermostLoc +
i < m_impl.dimensions()[0]) {
520 values[
i] = m_impl.coeff(inputIndex+
i);
522 values[
i] = coeffColMajor(originalIndex+
i);
530 template<
int LoadMode>
535 const Index originalIndex = index;
537 Index inputIndex = 0;
539 for (
int i = 0;
i < NumDims - 1; ++
i) {
540 const Index idx = index / m_outputStrides[
i];
541 if (internal::index_statically_eq<Broadcast>(
i, 1)) {
543 inputIndex += idx * m_inputStrides[
i];
545 if (internal::index_statically_eq<InputDimensions>(
i, 1)) {
548 inputIndex += (idx % m_impl.dimensions()[
i]) * m_inputStrides[
i];
551 index -= idx * m_outputStrides[
i];
554 if (internal::index_statically_eq<Broadcast>(NumDims-1, 1)) {
556 innermostLoc = index;
558 if (internal::index_statically_eq<InputDimensions>(NumDims-1, 1)) {
559 eigen_assert(index % m_impl.dimensions()[NumDims-1] == 0);
562 innermostLoc = index % m_impl.dimensions()[NumDims-1];
565 inputIndex += innermostLoc;
569 if (innermostLoc +
PacketSize <= m_impl.dimensions()[NumDims-1]) {
570 return m_impl.template packet<Unaligned>(inputIndex);
573 values[0] = m_impl.coeff(inputIndex);
576 if (innermostLoc +
i < m_impl.dimensions()[NumDims-1]) {
577 values[
i] = m_impl.coeff(inputIndex+
i);
579 values[
i] = coeffRowMajor(originalIndex+
i);
589 double compute_cost = TensorOpCost::AddCost<Index>();
590 if (!isCopy && NumDims > 0) {
592 for (
int i = NumDims - 1;
i > 0; --
i) {
593 compute_cost += TensorOpCost::DivCost<Index>();
594 if (internal::index_statically_eq<Broadcast>(
i, 1)) {
596 TensorOpCost::MulCost<Index>() + TensorOpCost::AddCost<Index>();
598 if (!internal::index_statically_eq<InputDimensions>(
i, 1)) {
599 compute_cost += TensorOpCost::MulCost<Index>() +
600 TensorOpCost::ModCost<Index>() +
601 TensorOpCost::AddCost<Index>();
605 TensorOpCost::MulCost<Index>() + TensorOpCost::AddCost<Index>();
608 return m_impl.costPerCoeff(vectorized) +
616 const size_t target_size =
m_device.firstLevelCacheSize();
617 return internal::TensorBlockResourceRequirements::merge(
618 m_impl.getResourceRequirements(),
619 internal::TensorBlockResourceRequirements::skewed<Scalar>(target_size));
624 bool =
false)
const {
625 BlockBroadcastingParams params = blockBroadcastingParams(desc);
627 if (params.inner_dim_size == 0 || params.bcast_dim_size == 0) {
632 const typename TensorBlock::Storage block_storage =
633 TensorBlock::prepareStorage(desc, scratch);
637 size_t materialized_input_size = 0;
646 for (
int i = params.inner_dim_count + 1;
i < NumDims; ++
i) {
647 const Index dim = IsColMajor ?
i : NumDims - 1 -
i;
648 it[idx].size = params.output_dims[dim];
650 it[idx].output_stride = m_outputStrides[dim];
651 it[idx].output_span = it[idx].output_stride * (it[idx].size - 1);
656 Index output_offset = 0;
660 const Index output_size = NumDims == 0 ? 1 : params.output_dims.TotalSize();
662 for (
Index num_output_coeffs = 0; num_output_coeffs < output_size;) {
663 ScalarNoConst* bcast_output = materialized_output + num_output_coeffs;
664 Index bcast_offset = desc.offset() + output_offset;
667 num_output_coeffs += BroadcastBlockAlongBcastDim(
668 params, bcast_offset, scratch, bcast_output, &materialized_input,
669 &materialized_input_size);
672 for (
int j = 0;
j < idx; ++
j) {
673 if (++it[
j].count < it[
j].
size) {
674 output_offset += it[
j].output_stride;
678 output_offset -= it[
j].output_span;
682 return block_storage.AsTensorMaterializedBlock();
689 Broadcast
functor()
const {
return m_broadcast; }
691 static constexpr
bool IsColMajor =
711 struct BlockBroadcastingParams {
732 struct BlockBroadcastingIteratorState {
741 BlockBroadcastingParams params;
743 params.input_dims =
Dimensions(m_impl.dimensions());
746 params.output_dims = desc.dimensions();
747 params.output_strides = internal::strides<Layout>(params.output_dims);
751 params.bcast_dim = 0;
752 params.bcast_dim_size = 1;
753 params.inner_dim_size = 1;
757 params.inner_dim_count = 0;
759 for (
int i = 0;
i < NumDims; ++
i) {
760 const int dim = IsColMajor ?
i : NumDims -
i - 1;
762 if (params.output_dims[dim] == m_dimensions[dim]) {
763 params.inner_dim_size *= params.output_dims[dim];
764 ++params.inner_dim_count;
769 eigen_assert(params.output_dims[dim] < m_dimensions[dim]);
770 params.bcast_dim = dim;
771 params.bcast_dim_size = params.output_dims[dim];
776 for (
int i = 0;
i < params.inner_dim_count; ++
i) {
777 const int dim = IsColMajor ?
i : NumDims -
i - 1;
778 params.input_block_sizes[dim] = params.input_dims[dim];
780 for (
int i = params.inner_dim_count;
i < NumDims; ++
i) {
781 const int dim = IsColMajor ?
i : NumDims -
i - 1;
782 params.input_block_sizes[dim] = 1;
784 params.input_block_strides =
785 internal::strides<Layout>(params.input_block_sizes);
805 for (
int i = 0;
i < params.inner_dim_count; ++
i) {
806 const int dim = IsColMajor ?
i : NumDims -
i - 1;
808 const int copy_dim = IsColMajor ? 2 *
i : 2 * NumDims - 2 *
i - 1;
809 const int broadcast_dim = IsColMajor ? copy_dim + 1 : copy_dim - 1;
811 params.bcast_block_sizes[copy_dim] = params.input_dims[dim];
812 params.bcast_block_sizes[broadcast_dim] = m_broadcast[dim];
813 params.bcast_block_strides[copy_dim] = params.output_strides[dim];
814 params.bcast_block_strides[broadcast_dim] =
815 params.output_strides[dim] * params.input_dims[dim];
816 params.bcast_input_strides[copy_dim] = params.input_block_strides[dim];
817 params.bcast_input_strides[broadcast_dim] = 0;
820 for (
int i = 2 * params.inner_dim_count;
i < 2 * NumDims; ++
i) {
821 const int dim = IsColMajor ?
i : 2 * NumDims -
i - 1;
822 params.bcast_block_sizes[dim] = 1;
823 params.bcast_block_strides[dim] = 0;
824 params.bcast_input_strides[dim] = 0;
837 BlockBroadcastingParams params,
Index bcast_offset,
840 size_t* materialized_input_size)
const {
841 if (params.bcast_dim_size == 1) {
843 return BroadcastBlock(
844 params.input_block_sizes, params.input_block_strides,
845 params.bcast_block_sizes, params.bcast_block_strides,
846 params.bcast_input_strides, bcast_offset, 0, scratch,
847 materialized_output, materialized_input, materialized_input_size);
849 }
else if (params.input_dims[params.bcast_dim] == 1) {
851 const int broadcast_bcast_dim =
852 IsColMajor ? 2 * params.inner_dim_count + 1
853 : 2 * NumDims - 2 * params.inner_dim_count - 2;
855 params.bcast_block_sizes[broadcast_bcast_dim] = params.bcast_dim_size;
856 params.bcast_input_strides[broadcast_bcast_dim] = 0;
857 params.bcast_block_strides[broadcast_bcast_dim] =
858 params.output_strides[params.bcast_dim];
860 return BroadcastBlock(
861 params.input_block_sizes, params.input_block_strides,
862 params.bcast_block_sizes, params.bcast_block_strides,
863 params.bcast_input_strides, bcast_offset, 0, scratch,
864 materialized_output, materialized_input, materialized_input_size);
869 Index num_output_coeffs = 0;
891 const Index bcast_dim_left_index =
892 bcast_offset / m_outputStrides[params.bcast_dim];
895 const Index input_bcast_dim_size = params.input_dims[params.bcast_dim];
900 divup<Index>(bcast_dim_left_index, input_bcast_dim_size) *
901 input_bcast_dim_size;
903 if (
first_multiple <= bcast_dim_left_index + params.bcast_dim_size) {
905 const Index last_multiple =
906 (bcast_dim_left_index + params.bcast_dim_size) /
907 input_bcast_dim_size * input_bcast_dim_size;
908 const int copy_bcast_dim =
909 IsColMajor ? 2 * params.inner_dim_count
910 : 2 * NumDims - 2 * params.inner_dim_count - 1;
911 const int broadcast_bcast_dim =
912 IsColMajor ? 2 * params.inner_dim_count + 1
913 : 2 * NumDims - 2 * params.inner_dim_count - 2;
917 params.input_block_sizes[params.bcast_dim] = head_size;
918 params.bcast_block_sizes[copy_bcast_dim] = head_size;
919 params.bcast_input_strides[copy_bcast_dim] =
920 params.input_block_strides[params.bcast_dim];
921 params.bcast_block_strides[copy_bcast_dim] =
922 params.output_strides[params.bcast_dim];
923 params.bcast_block_sizes[broadcast_bcast_dim] = 1;
924 params.bcast_input_strides[broadcast_bcast_dim] = 0;
925 params.bcast_block_strides[broadcast_bcast_dim] =
926 params.output_strides[params.bcast_dim] *
927 params.input_dims[params.bcast_dim];
929 num_output_coeffs += BroadcastBlock(
930 params.input_block_sizes, params.input_block_strides,
931 params.bcast_block_sizes, params.bcast_block_strides,
932 params.bcast_input_strides, bcast_offset, 0, scratch,
933 materialized_output, materialized_input, materialized_input_size);
936 params.input_block_sizes[params.bcast_dim] = input_bcast_dim_size;
937 params.bcast_block_sizes[copy_bcast_dim] = input_bcast_dim_size;
938 params.bcast_input_strides[copy_bcast_dim] =
939 params.input_block_strides[params.bcast_dim];
940 params.bcast_block_strides[copy_bcast_dim] =
941 params.output_strides[params.bcast_dim];
942 params.bcast_block_sizes[broadcast_bcast_dim] =
944 params.bcast_input_strides[broadcast_bcast_dim] = 0;
945 params.bcast_block_strides[broadcast_bcast_dim] =
946 params.output_strides[params.bcast_dim] *
947 params.input_dims[params.bcast_dim];
949 m_outputStrides[params.bcast_dim];
951 num_output_coeffs += BroadcastBlock(
952 params.input_block_sizes, params.input_block_strides,
953 params.bcast_block_sizes, params.bcast_block_strides,
954 params.bcast_input_strides, bcast_offset, offset, scratch,
955 materialized_output, materialized_input, materialized_input_size);
957 if (last_multiple < bcast_dim_left_index + params.bcast_dim_size) {
958 const Index tail_size =
959 bcast_dim_left_index + params.bcast_dim_size - last_multiple;
960 params.input_block_sizes[params.bcast_dim] = tail_size;
961 params.bcast_block_sizes[copy_bcast_dim] = tail_size;
962 params.bcast_input_strides[copy_bcast_dim] =
963 params.input_block_strides[params.bcast_dim];
964 params.bcast_block_strides[copy_bcast_dim] =
965 params.output_strides[params.bcast_dim];
966 params.bcast_block_sizes[broadcast_bcast_dim] = 1;
967 params.bcast_input_strides[broadcast_bcast_dim] = 0;
968 params.bcast_block_strides[broadcast_bcast_dim] =
969 params.output_strides[params.bcast_dim] *
970 params.input_dims[params.bcast_dim];
971 const Index offset = (last_multiple - bcast_dim_left_index) *
972 m_outputStrides[params.bcast_dim];
974 num_output_coeffs += BroadcastBlock(
975 params.input_block_sizes, params.input_block_strides,
976 params.bcast_block_sizes, params.bcast_block_strides,
977 params.bcast_input_strides, bcast_offset, offset, scratch,
978 materialized_output, materialized_input, materialized_input_size);
982 const int copy_bcast_dim =
983 IsColMajor ? 2 * params.inner_dim_count
984 : 2 * NumDims - 2 * params.inner_dim_count - 1;
985 params.input_block_sizes[params.bcast_dim] = params.bcast_dim_size;
986 params.bcast_block_sizes[copy_bcast_dim] = params.bcast_dim_size;
987 params.bcast_input_strides[copy_bcast_dim] =
988 params.input_block_strides[params.bcast_dim];
989 params.bcast_block_strides[copy_bcast_dim] =
990 params.output_strides[params.bcast_dim];
992 num_output_coeffs += BroadcastBlock(
993 params.input_block_sizes, params.input_block_strides,
994 params.bcast_block_sizes, params.bcast_block_strides,
995 params.bcast_input_strides, bcast_offset, 0, scratch,
996 materialized_output, materialized_input, materialized_input_size);
999 return num_output_coeffs;
1011 size_t* materialized_input_size)
const {
1014 const Index input_offset = bcast_offset + offset;
1016 IsColMajor ? indexColMajor(input_offset) : indexRowMajor(input_offset),
1026 if (input_block.data() != NULL) {
1028 input_buffer = input_block.data();
1035 const size_t input_total_size = input_block_sizes.
TotalSize();
1036 if (*materialized_input == NULL ||
1037 *materialized_input_size < input_total_size) {
1038 *materialized_input_size = input_total_size;
1039 void* mem = scratch.allocate(*materialized_input_size *
sizeof(
Scalar));
1043 typedef internal::TensorBlockAssignment<
1045 TensorBlockAssignment;
1047 TensorBlockAssignment::Run(
1048 TensorBlockAssignment::target(input_block_sizes, input_block_strides,
1049 *materialized_input),
1050 input_block.expr());
1052 input_buffer = *materialized_input;
1058 typedef internal::TensorBlockIO<ScalarNoConst, Index, 2 * NumDims, Layout>
1061 typename TensorBlockIO::Src src(bcast_input_strides, input_buffer);
1062 typename TensorBlockIO::Dst dst(bcast_block_sizes, bcast_block_strides,
1063 materialized_output + offset);
1065 return TensorBlockIO::Copy(dst, src);
#define EIGEN_ALWAYS_INLINE
#define EIGEN_UNROLL_LOOP
#define EIGEN_DEVICE_FUNC
#define EIGEN_STATIC_ASSERT(X, MSG)
const Broadcast & broadcast() const
XprType::CoeffReturnType CoeffReturnType
const internal::remove_all_t< typename XprType::Nested > & expression() const
Eigen::NumTraits< Scalar >::Real RealScalar
Eigen::internal::traits< TensorBroadcastingOp >::Index Index
const Broadcast m_broadcast
Eigen::internal::nested< TensorBroadcastingOp >::type Nested
TensorBroadcastingOp(const XprType &expr, const Broadcast &broadcast)
Eigen::internal::traits< TensorBroadcastingOp >::Scalar Scalar
Eigen::internal::traits< TensorBroadcastingOp >::StorageKind StorageKind
A tensor expression mapping an existing array of data.
Index first_multiple(Index size, Index base)
typename remove_all< T >::type remove_all_t
: TensorContractionSycl.h, provides various tensor contraction kernel for SYCL backend
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
DenseIndex TotalSize() const
internal::packet_traits< Scalar >::type type
bool evalSubExprsIfNeeded(EvaluatorPointerType)
internal::TensorMaterializedBlock< ScalarNoConst, NumDims, Layout, Index > TensorBlock
TensorEvaluator(const XprType &op, const Device &device)
TensorEvaluator< ArgType, Device >::Dimensions InputDimensions
StorageMemory< CoeffReturnType, Device > Storage
EvaluatorPointerType data() const
CoeffReturnType coeffRowMajor(Index index) const
Index indexRowMajor(Index index) const
internal::TensorBlockDescriptor< NumDims, Index > TensorBlockDesc
CoeffReturnType coeffColMajor(Index index) const
internal::TensorBlockResourceRequirements getResourceRequirements() const
TensorBlock block(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool=false) const
TensorBroadcastingOp< Broadcast, ArgType > XprType
const std::remove_reference_t< Broadcast > m_broadcast
Index BroadcastBlock(const Dimensions &input_block_sizes, const Dimensions &input_block_strides, const BroadcastDimensions &bcast_block_sizes, const BroadcastDimensions &bcast_block_strides, const BroadcastDimensions &bcast_input_strides, Index bcast_offset, Index offset, TensorBlockScratch &scratch, ScalarNoConst *materialized_output, ScalarNoConst **materialized_input, size_t *materialized_input_size) const
TensorOpCost costPerCoeff(bool vectorized) const
TensorEvaluator< const ArgType, Device >::TensorBlock ArgTensorBlock
PacketReturnType packetOneByN(Index index) const
TensorBlock emptyBlock() const
array< Index, NumDims > m_inputStrides
PacketReturnType packetOneByNByOne(Index index) const
XprType::CoeffReturnType CoeffReturnType
Storage::Type EvaluatorPointerType
PacketReturnType packetRowMajor(Index index) const
DSizes< Index, 2 *NumDims > BroadcastDimensions
EIGEN_ALWAYS_INLINE CoeffReturnType coeff(Index index) const
EIGEN_ALWAYS_INLINE BlockBroadcastingParams blockBroadcastingParams(TensorBlockDesc &desc) const
Index indexColMajor(Index index) const
PacketType< CoeffReturnType, Device >::type PacketReturnType
const TensorEvaluator< ArgType, Device > & impl() const
array< Index, NumDims > m_outputStrides
PacketReturnType packetNByOne(Index index) const
const Dimensions & dimensions() const
Broadcast functor() const
internal::TensorBlockScratchAllocator< Device > TensorBlockScratch
const Device EIGEN_DEVICE_REF m_device
DSizes< Index, NumDims > Dimensions
EIGEN_ALWAYS_INLINE PacketReturnType packet(Index index) const
TensorEvaluator< ArgType, Device > m_impl
Index BroadcastBlockAlongBcastDim(BlockBroadcastingParams params, Index bcast_offset, TensorBlockScratch &scratch, ScalarNoConst *materialized_output, ScalarNoConst **materialized_input, size_t *materialized_input_size) const
std::remove_const_t< Scalar > ScalarNoConst
PacketReturnType packetColMajor(Index index) const
BroadcastDimensions bcast_block_sizes
BroadcastDimensions bcast_block_strides
Dimensions output_strides
BroadcastDimensions bcast_input_strides
Dimensions input_block_sizes
Dimensions input_block_strides
A cost model used to limit the number of threads used for evaluating tensor expression.
const Dimensions & dimensions() const
static constexpr int Layout
const Device EIGEN_DEVICE_REF m_device
Storage::Type EvaluatorPointerType
static constexpr int PacketSize
internal::TensorMaterializedBlock< ScalarNoConst, NumCoords, Layout, Index > TensorBlock
std::remove_const_t< Scalar > ScalarNoConst
Derived::Dimensions Dimensions