10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_GENERATOR_H
11 #define EIGEN_CXX11_TENSOR_TENSOR_GENERATOR_H
25 template<
typename Generator,
typename XprType>
26 struct traits<TensorGeneratorOp<Generator, XprType> > :
public traits<XprType>
28 typedef typename XprType::Scalar Scalar;
30 typedef typename XprTraits::StorageKind StorageKind;
31 typedef typename XprTraits::Index
Index;
32 typedef typename XprType::Nested Nested;
33 typedef std::remove_reference_t<Nested> Nested_;
34 static constexpr
int NumDimensions = XprTraits::NumDimensions;
35 static constexpr
int Layout = XprTraits::Layout;
36 typedef typename XprTraits::PointerType PointerType;
39 template<
typename Generator,
typename XprType>
40 struct eval<TensorGeneratorOp<Generator, XprType>,
Eigen::Dense>
42 typedef const TensorGeneratorOp<Generator, XprType>& type;
45 template<
typename Generator,
typename XprType>
46 struct nested<TensorGeneratorOp<Generator, XprType>, 1, typename eval<TensorGeneratorOp<Generator, XprType> >::type>
48 typedef TensorGeneratorOp<Generator, XprType> type;
55 template<
typename Generator,
typename XprType>
59 typedef typename Eigen::internal::traits<TensorGeneratorOp>::Scalar
Scalar;
62 typedef typename Eigen::internal::nested<TensorGeneratorOp>::type
Nested;
63 typedef typename Eigen::internal::traits<TensorGeneratorOp>::StorageKind
StorageKind;
64 typedef typename Eigen::internal::traits<TensorGeneratorOp>::Index
Index;
83 template<
typename Generator,
typename ArgType,
typename Device>
89 static constexpr
int NumDims = internal::array_size<Dimensions>::value;
100 PreferBlockAccess =
true,
111 typedef typename internal::TensorMaterializedBlock<
CoeffReturnType, NumDims,
117 :
m_device(device), m_generator(op.generator())
125 for (
int i = 1;
i < NumDims; ++
i) {
126 m_strides[
i] = m_strides[
i - 1] * m_dimensions[
i - 1];
130 m_strides[NumDims - 1] = 1;
132 for (
int i = NumDims - 2;
i >= 0; --
i) {
133 m_strides[
i] = m_strides[
i + 1] * m_dimensions[
i + 1];
150 extract_coordinates(index, coords);
151 return m_generator(coords);
154 template<
int LoadMode>
160 EIGEN_ALIGN_MAX std::remove_const_t<CoeffReturnType> values[packetSize];
161 for (
int i = 0;
i < packetSize; ++
i) {
170 const size_t target_size =
m_device.firstLevelCacheSize();
172 return internal::TensorBlockResourceRequirements::skewed<Scalar>(
176 struct BlockIteratorState {
185 bool =
false)
const {
186 static const bool is_col_major =
191 extract_coordinates(desc.offset(), coords);
200 for (
int i = 0;
i < NumDims; ++
i) {
201 const int dim = is_col_major ?
i : NumDims - 1 -
i;
202 it[
i].size = desc.dimension(dim);
203 it[
i].stride =
i == 0 ? 1 : (it[
i - 1].size * it[
i - 1].stride);
204 it[
i].span = it[
i].stride * (it[
i].size - 1);
210 const typename TensorBlock::Storage block_storage =
211 TensorBlock::prepareStorage(desc, scratch);
217 static const int inner_dim = is_col_major ? 0 : NumDims - 1;
218 const Index inner_dim_size = it[0].size;
219 const Index inner_dim_vectorized = inner_dim_size - packet_size;
221 while (it[NumDims - 1].count < it[NumDims - 1].
size) {
224 for (;
i <= inner_dim_vectorized;
i += packet_size) {
225 for (
Index j = 0;
j < packet_size; ++
j) {
227 j_coords[inner_dim] +=
j;
228 *(block_buffer + offset +
i +
j) = m_generator(j_coords);
230 coords[inner_dim] += packet_size;
233 for (;
i < inner_dim_size; ++
i) {
234 *(block_buffer + offset +
i) = m_generator(coords);
237 coords[inner_dim] = initial_coords[inner_dim];
240 if (NumDims == 1)
break;
243 for (
i = 1;
i < NumDims; ++
i) {
244 if (++it[
i].count < it[
i].
size) {
245 offset += it[
i].stride;
246 coords[is_col_major ?
i : NumDims - 1 -
i]++;
249 if (
i != NumDims - 1) it[
i].count = 0;
250 coords[is_col_major ?
i : NumDims - 1 -
i] =
251 initial_coords[is_col_major ?
i : NumDims - 1 -
i];
252 offset -= it[
i].span;
256 return block_storage.AsTensorMaterializedBlock();
263 return TensorOpCost(0, 0, TensorOpCost::AddCost<Scalar>() +
264 TensorOpCost::MulCost<Scalar>());
273 for (
int i = NumDims - 1;
i > 0; --
i) {
274 const Index idx = index / m_fast_strides[
i];
275 index -= idx * m_strides[
i];
280 for (
int i = 0;
i < NumDims - 1; ++
i) {
281 const Index idx = index / m_fast_strides[
i];
282 index -= idx * m_strides[
i];
285 coords[NumDims-1] = index;
#define EIGEN_UNROLL_LOOP
#define EIGEN_DEVICE_FUNC
Eigen::internal::traits< TensorGeneratorOp >::StorageKind StorageKind
Eigen::internal::traits< TensorGeneratorOp >::Scalar Scalar
TensorGeneratorOp(const XprType &expr, const Generator &generator)
Eigen::internal::nested< TensorGeneratorOp >::type Nested
Eigen::NumTraits< Scalar >::Real RealScalar
XprType::CoeffReturnType CoeffReturnType
Eigen::internal::traits< TensorGeneratorOp >::Index Index
const Generator m_generator
const Generator & generator() const
const internal::remove_all_t< typename XprType::Nested > & expression() const
typename remove_all< T >::type remove_all_t
: TensorContractionSycl.h, provides various tensor contraction kernel for SYCL backend
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
internal::packet_traits< Scalar >::type type
TensorEvaluator(const XprType &op, const Device &device)
internal::TensorBlockResourceRequirements getResourceRequirements() const
XprType::CoeffReturnType CoeffReturnType
EvaluatorPointerType data() const
const Device EIGEN_DEVICE_REF m_device
TensorBlock block(TensorBlockDesc &desc, TensorBlockScratch &scratch, bool=false) const
internal::TensorBlockScratchAllocator< Device > TensorBlockScratch
const Dimensions & dimensions() const
Storage::Type EvaluatorPointerType
PacketReturnType packet(Index index) const
array< Index, NumDims > m_strides
internal::TensorMaterializedBlock< CoeffReturnType, NumDims, Layout, Index > TensorBlock
TensorEvaluator< ArgType, Device >::Dimensions Dimensions
TensorGeneratorOp< Generator, ArgType > XprType
StorageMemory< CoeffReturnType, Device > Storage
bool evalSubExprsIfNeeded(EvaluatorPointerType)
internal::TensorIntDivisor< Index > IndexDivisor
TensorOpCost costPerCoeff(bool) const
void extract_coordinates(Index index, array< Index, NumDims > &coords) const
CoeffReturnType coeff(Index index) const
PacketType< CoeffReturnType, Device >::type PacketReturnType
internal::TensorBlockDescriptor< NumDims, Index > TensorBlockDesc
array< IndexDivisor, NumDims > m_fast_strides
A cost model used to limit the number of threads used for evaluating tensor expression.
const Dimensions & dimensions() const
static constexpr int Layout
const Device EIGEN_DEVICE_REF m_device
CoeffReturnType coeff(Index index) const
Derived::Scalar CoeffReturnType