Static Public Member Functions
static int	numThreads (double output_size, const TensorOpCost &cost_per_coeff, int max_threads)

static double	taskSize (double output_size, const TensorOpCost &cost_per_coeff)

static double	totalCost (double output_size, const TensorOpCost &cost_per_coeff)

Static Public Attributes
static const int	kDeviceCyclesPerComputeCycle

static const int	kPerThreadCycles

static const int	kStartupCycles

static const int	kTaskSize

Detailed Description

template<typename Device>
class Eigen::TensorCostModel< Device >

Definition at line 163 of file TensorCostModel.h.

Member Function Documentation

◆ numThreads()

template<typename Device >

static int Eigen::TensorCostModel< Device >::numThreads	(	double	output_size,
		const TensorOpCost &	cost_per_coeff,
		int	max_threads
	)

inlinestatic

Definition at line 176 of file TensorCostModel.h.

                                                                                {
     double cost = totalCost(output_size, cost_per_coeff);
     double threads = (cost - kStartupCycles) / kPerThreadCycles + 0.9;
     // Make sure we don't invoke undefined behavior when we convert to an int.
     threads = numext::mini<double>(threads, GenericNumTraits<int>::highest());
     return numext::mini(max_threads,
                         numext::maxi<int>(1, static_cast<int>(threads)));
   }

Code

◆ taskSize()

template<typename Device >

static double Eigen::TensorCostModel< Device >::taskSize	(	double	output_size,
		const TensorOpCost &	cost_per_coeff
	)

inlinestatic

Definition at line 189 of file TensorCostModel.h.

                                                               {
     return totalCost(output_size, cost_per_coeff) / kTaskSize;
   }

Code

◆ totalCost()

template<typename Device >

static double Eigen::TensorCostModel< Device >::totalCost	(	double	output_size,
		const TensorOpCost &	cost_per_coeff
	)

inlinestatic

Definition at line 194 of file TensorCostModel.h.

                                                               {
     // Cost of memory fetches from L2 cache. 64 is typical cache line size.
     // 11 is L2 cache latency on Haswell.
     // We don't know whether data is in L1, L2 or L3. But we are most interested
     // in single-threaded computational time around 100us-10ms (smaller time
     // is too small for parallelization, larger time is not interesting
     // either because we are probably using all available threads already).
     // And for the target time range, L2 seems to be what matters. Data set
     // fitting into L1 is too small to take noticeable time. Data set fitting
     // only into L3 presumably will take more than 10ms to load and process.
     const double kLoadCycles = 1.0 / 64 * 11;
     const double kStoreCycles = 1.0 / 64 * 11;
     // Scaling from Eigen compute cost to device cycles.
     return output_size *
         cost_per_coeff.total_cost(kLoadCycles, kStoreCycles,
                                   kDeviceCyclesPerComputeCycle);
   }

Code

Member Data Documentation

◆ kDeviceCyclesPerComputeCycle

template<typename Device >

const int Eigen::TensorCostModel< Device >::kDeviceCyclesPerComputeCycle

static

Definition at line 166 of file TensorCostModel.h.

◆ kPerThreadCycles

template<typename Device >

const int Eigen::TensorCostModel< Device >::kPerThreadCycles

static

Definition at line 170 of file TensorCostModel.h.

◆ kStartupCycles

template<typename Device >

const int Eigen::TensorCostModel< Device >::kStartupCycles

static

Definition at line 169 of file TensorCostModel.h.

◆ kTaskSize

template<typename Device >

const int Eigen::TensorCostModel< Device >::kTaskSize

static

Definition at line 171 of file TensorCostModel.h.

The documentation for this class was generated from the following file:

TensorCostModel.h

Static Public Member Functions

Static Public Attributes

Detailed Description

template<typename Device> class Eigen::TensorCostModel< Device >

Member Function Documentation

◆ numThreads()

◆ taskSize()

◆ totalCost()

Member Data Documentation

◆ kDeviceCyclesPerComputeCycle

◆ kPerThreadCycles

◆ kStartupCycles

◆ kTaskSize

template<typename Device>
class Eigen::TensorCostModel< Device >