10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_CONCATENATION_H
11 #define EIGEN_CXX11_TENSOR_TENSOR_CONCATENATION_H
25 template<
typename Axis,
typename LhsXprType,
typename RhsXprType>
26 struct traits<TensorConcatenationOp<Axis, LhsXprType, RhsXprType> >
29 typedef typename promote_storage_type<
typename LhsXprType::Scalar,
30 typename RhsXprType::Scalar>::ret Scalar;
31 typedef typename promote_storage_type<typename traits<LhsXprType>::StorageKind,
33 typedef typename promote_index_type<typename traits<LhsXprType>::Index,
35 typedef typename LhsXprType::Nested LhsNested;
36 typedef typename RhsXprType::Nested RhsNested;
37 typedef std::remove_reference_t<LhsNested> LhsNested_;
38 typedef std::remove_reference_t<RhsNested> RhsNested_;
42 typedef std::conditional_t<Pointer_type_promotion<typename LhsXprType::Scalar, Scalar>::val,
46 template<
typename Axis,
typename LhsXprType,
typename RhsXprType>
47 struct eval<TensorConcatenationOp<Axis, LhsXprType, RhsXprType>,
Eigen::Dense>
49 typedef const TensorConcatenationOp<Axis, LhsXprType, RhsXprType>& type;
52 template<
typename Axis,
typename LhsXprType,
typename RhsXprType>
53 struct nested<TensorConcatenationOp<Axis, LhsXprType, RhsXprType>, 1, typename eval<TensorConcatenationOp<Axis, LhsXprType, RhsXprType> >::type>
55 typedef TensorConcatenationOp<Axis, LhsXprType, RhsXprType> type;
61 template<
typename Axis,
typename LhsXprType,
typename RhsXprType>
66 typedef typename internal::traits<TensorConcatenationOp>::Scalar
Scalar;
67 typedef typename internal::traits<TensorConcatenationOp>::StorageKind
StorageKind;
68 typedef typename internal::traits<TensorConcatenationOp>::Index
Index;
69 typedef typename internal::nested<TensorConcatenationOp>::type
Nested;
70 typedef typename internal::promote_storage_type<
typename LhsXprType::CoeffReturnType,
96 template<
typename Axis,
typename LeftArgType,
typename RightArgType,
typename Device>
101 static constexpr
int NumDims = internal::array_size<typename TensorEvaluator<LeftArgType, Device>::Dimensions>::value;
102 static constexpr
int RightNumDims = internal::array_size<typename TensorEvaluator<RightArgType, Device>::Dimensions>::value;
125 : m_leftImpl(op.lhsExpression(), device), m_rightImpl(op.rhsExpression(), device), m_axis(op.axis())
132 const Dimensions& lhs_dims = m_leftImpl.dimensions();
133 const Dimensions& rhs_dims = m_rightImpl.dimensions();
136 for (;
i < m_axis; ++
i) {
139 m_dimensions[
i] = lhs_dims[
i];
143 m_dimensions[
i] = lhs_dims[
i] + rhs_dims[
i];
144 for (++
i;
i < NumDims; ++
i) {
147 m_dimensions[
i] = lhs_dims[
i];
152 m_leftStrides[0] = 1;
153 m_rightStrides[0] = 1;
154 m_outputStrides[0] = 1;
156 for (
int j = 1;
j < NumDims; ++
j) {
157 m_leftStrides[
j] = m_leftStrides[
j-1] * lhs_dims[
j-1];
158 m_rightStrides[
j] = m_rightStrides[
j-1] * rhs_dims[
j-1];
159 m_outputStrides[
j] = m_outputStrides[
j-1] * m_dimensions[
j-1];
162 m_leftStrides[NumDims - 1] = 1;
163 m_rightStrides[NumDims - 1] = 1;
164 m_outputStrides[NumDims - 1] = 1;
166 for (
int j = NumDims - 2;
j >= 0; --
j) {
167 m_leftStrides[
j] = m_leftStrides[
j+1] * lhs_dims[
j+1];
168 m_rightStrides[
j] = m_rightStrides[
j+1] * rhs_dims[
j+1];
169 m_outputStrides[
j] = m_outputStrides[
j+1] * m_dimensions[
j+1];
179 m_leftImpl.evalSubExprsIfNeeded(NULL);
180 m_rightImpl.evalSubExprsIfNeeded(NULL);
186 m_leftImpl.cleanup();
187 m_rightImpl.cleanup();
197 for (
int i = NumDims - 1;
i > 0; --
i) {
198 subs[
i] = index / m_outputStrides[
i];
199 index -= subs[
i] * m_outputStrides[
i];
203 for (
int i = 0;
i < NumDims - 1; ++
i) {
204 subs[
i] = index / m_outputStrides[
i];
205 index -= subs[
i] * m_outputStrides[
i];
207 subs[NumDims - 1] = index;
210 const Dimensions& left_dims = m_leftImpl.dimensions();
211 if (subs[m_axis] < left_dims[m_axis]) {
214 left_index = subs[0];
216 for (
int i = 1;
i < NumDims; ++
i) {
217 left_index += (subs[
i] % left_dims[
i]) * m_leftStrides[
i];
220 left_index = subs[NumDims - 1];
222 for (
int i = NumDims - 2;
i >= 0; --
i) {
223 left_index += (subs[
i] % left_dims[
i]) * m_leftStrides[
i];
226 return m_leftImpl.coeff(left_index);
228 subs[m_axis] -= left_dims[m_axis];
229 const Dimensions& right_dims = m_rightImpl.dimensions();
232 right_index = subs[0];
234 for (
int i = 1;
i < NumDims; ++
i) {
235 right_index += (subs[
i] % right_dims[
i]) * m_rightStrides[
i];
238 right_index = subs[NumDims - 1];
240 for (
int i = NumDims - 2;
i >= 0; --
i) {
241 right_index += (subs[
i] % right_dims[
i]) * m_rightStrides[
i];
244 return m_rightImpl.coeff(right_index);
249 template<
int LoadMode>
258 for (
int i = 0;
i < packetSize; ++
i) {
267 const double compute_cost = NumDims * (2 * TensorOpCost::AddCost<Index>() +
268 2 * TensorOpCost::MulCost<Index>() +
269 TensorOpCost::DivCost<Index>() +
270 TensorOpCost::ModCost<Index>());
271 const double lhs_size = m_leftImpl.dimensions().TotalSize();
272 const double rhs_size = m_rightImpl.dimensions().TotalSize();
273 return (lhs_size / (lhs_size + rhs_size)) *
274 m_leftImpl.costPerCoeff(vectorized) +
275 (rhs_size / (lhs_size + rhs_size)) *
276 m_rightImpl.costPerCoeff(vectorized) +
293 template<
typename Axis,
typename LeftArgType,
typename RightArgType,
typename Device>
295 :
public TensorEvaluator<const TensorConcatenationOp<Axis, LeftArgType, RightArgType>, Device>
330 for (
int i = Base::NumDims - 1;
i > 0; --
i) {
331 subs[
i] = index / this->m_outputStrides[
i];
332 index -= subs[
i] * this->m_outputStrides[
i];
336 const Dimensions& left_dims = this->m_leftImpl.dimensions();
337 if (subs[this->m_axis] < left_dims[this->m_axis]) {
338 Index left_index = subs[0];
339 for (
int i = 1;
i < Base::NumDims; ++
i) {
340 left_index += (subs[
i] % left_dims[
i]) * this->m_leftStrides[
i];
342 return this->m_leftImpl.coeffRef(left_index);
344 subs[this->m_axis] -= left_dims[this->m_axis];
345 const Dimensions& right_dims = this->m_rightImpl.dimensions();
346 Index right_index = subs[0];
347 for (
int i = 1;
i < Base::NumDims; ++
i) {
348 right_index += (subs[
i] % right_dims[
i]) * this->m_rightStrides[
i];
350 return this->m_rightImpl.coeffRef(right_index);
362 internal::pstore<CoeffReturnType, PacketReturnType>(values,
x);
363 for (
int i = 0;
i < packetSize; ++
i) {
#define EIGEN_UNROLL_LOOP
#define EIGEN_DEVICE_FUNC
#define EIGEN_STATIC_ASSERT(X, MSG)
#define EIGEN_TENSOR_INHERIT_ASSIGNMENT_OPERATORS(Derived)
Tensor concatenation class.
internal::traits< TensorConcatenationOp >::StorageKind StorageKind
const internal::remove_all_t< typename RhsXprType::Nested > & rhsExpression() const
TensorBase< TensorConcatenationOp< Axis, LhsXprType, RhsXprType >, WriteAccessors > Base
internal::nested< TensorConcatenationOp >::type Nested
internal::traits< TensorConcatenationOp >::Index Index
RhsXprType::Nested m_rhs_xpr
NumTraits< Scalar >::Real RealScalar
const Axis & axis() const
TensorConcatenationOp(const LhsXprType &lhs, const RhsXprType &rhs, Axis axis)
const internal::remove_all_t< typename LhsXprType::Nested > & lhsExpression() const
internal::traits< TensorConcatenationOp >::Scalar Scalar
internal::promote_storage_type< typename LhsXprType::CoeffReturnType, typename RhsXprType::CoeffReturnType >::ret CoeffReturnType
LhsXprType::Nested m_lhs_xpr
typename remove_all< T >::type remove_all_t
: TensorContractionSycl.h, provides various tensor contraction kernel for SYCL backend
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
internal::packet_traits< Scalar >::type type
TensorConcatenationOp< Axis, LeftArgType, RightArgType > XprType
internal::TensorBlockNotImplemented TensorBlock
XprType::CoeffReturnType CoeffReturnType
PacketType< CoeffReturnType, Device >::type PacketReturnType
TensorEvaluator(XprType &op, const Device &device)
void writePacket(Index index, const PacketReturnType &x) const
Base::Dimensions Dimensions
TensorEvaluator< const TensorConcatenationOp< Axis, LeftArgType, RightArgType >, Device > Base
CoeffReturnType & coeffRef(Index index) const
TensorOpCost costPerCoeff(bool vectorized) const
bool evalSubExprsIfNeeded(EvaluatorPointerType)
TensorEvaluator(const XprType &op, const Device &device)
TensorEvaluator< RightArgType, Device > m_rightImpl
PacketType< CoeffReturnType, Device >::type PacketReturnType
const Dimensions & dimensions() const
TensorEvaluator< LeftArgType, Device > m_leftImpl
array< Index, NumDims > m_rightStrides
CoeffReturnType coeff(Index index) const
StorageMemory< CoeffReturnType, Device > Storage
array< Index, NumDims > m_leftStrides
array< Index, NumDims > m_outputStrides
EvaluatorPointerType data() const
TensorConcatenationOp< Axis, LeftArgType, RightArgType > XprType
internal::TensorBlockNotImplemented TensorBlock
XprType::CoeffReturnType CoeffReturnType
Storage::Type EvaluatorPointerType
PacketReturnType packet(Index index) const
DSizes< Index, NumDims > Dimensions
A cost model used to limit the number of threads used for evaluating tensor expression.
const Dimensions & dimensions() const
static constexpr int Layout
CoeffReturnType coeff(Index index) const
CoeffReturnType & coeffRef(Index index) const