eigen/Parallelizer_8h_source.html

 // This file is part of Eigen, a lightweight C++ template library

 // for linear algebra.

 //

 // Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>

 //

 // This Source Code Form is subject to the terms of the Mozilla

 // Public License v. 2.0. If a copy of the MPL was not distributed

 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.


 #ifndef EIGEN_PARALLELIZER_H

 #define EIGEN_PARALLELIZER_H


 #include "../InternalHeaderCheck.h"


 namespace Eigen {


 namespace internal {


 inline void manage_multi_threading(Action action, int* v)

 {

   static int m_maxThreads = -1;

   EIGEN_UNUSED_VARIABLE(m_maxThreads)


   if(action==SetAction)

   {

     eigen_internal_assert(v!=0);

     m_maxThreads = *v;

   }

   else if(action==GetAction)

   {

     eigen_internal_assert(v!=0);

     #ifdef EIGEN_HAS_OPENMP

     if(m_maxThreads>0)

       *v = m_maxThreads;

     else

       *v = omp_get_max_threads();

     #else

     *v = 1;

     #endif

   }

   else

   {

     eigen_internal_assert(false);

   }

 }


 }


 inline void initParallel()

 {

   int nbt;

   internal::manage_multi_threading(GetAction, &nbt);

   std::ptrdiff_t l1, l2, l3;

   internal::manage_caching_sizes(GetAction, &l1, &l2, &l3);

 }


 inline int nbThreads()

 {

   int ret;

   internal::manage_multi_threading(GetAction, &ret);

   return ret;

 }


 inline void setNbThreads(int v)

 {

   internal::manage_multi_threading(SetAction, &v);

 }


 namespace internal {


 template<typename Index> struct GemmParallelInfo

 {


 #ifdef EIGEN_HAS_OPENMP

   GemmParallelInfo() : sync(-1), users(0), lhs_start(0), lhs_length(0) {}

   std::atomic<Index> sync;

   std::atomic<int> users;

 #else

   GemmParallelInfo() : lhs_start(0), lhs_length(0) {}

 #endif


   Index lhs_start;

   Index lhs_length;

 };


 template<bool Condition, typename Functor, typename Index>

 void parallelize_gemm(const Functor& func, Index rows, Index cols, Index depth, bool transpose)

 {

   // TODO when EIGEN_USE_BLAS is defined,

   // we should still enable OMP for other scalar types

   // Without C++11, we have to disable GEMM's parallelization on

   // non x86 architectures because there volatile is not enough for our purpose.

   // See bug 1572.

 #if (! defined(EIGEN_HAS_OPENMP)) || defined(EIGEN_USE_BLAS)

   // FIXME the transpose variable is only needed to properly split

   // the matrix product when multithreading is enabled. This is a temporary

   // fix to support row-major destination matrices. This whole

   // parallelizer mechanism has to be redesigned anyway.

   EIGEN_UNUSED_VARIABLE(depth);

   EIGEN_UNUSED_VARIABLE(transpose);

   func(0,rows, 0,cols);

 #else


   // Dynamically check whether we should enable or disable OpenMP.

   // The conditions are:

   // - the max number of threads we can create is greater than 1

   // - we are not already in a parallel code

   // - the sizes are large enough


   // compute the maximal number of threads from the size of the product:

   // This first heuristic takes into account that the product kernel is fully optimized when working with nr columns at once.

   Index size = transpose ? rows : cols;

   Index pb_max_threads = std::max<Index>(1,size / Functor::Traits::nr);


   // compute the maximal number of threads from the total amount of work:

   double work = static_cast<double>(rows) * static_cast<double>(cols) *

       static_cast<double>(depth);

   double kMinTaskSize = 50000;  // FIXME improve this heuristic.

   pb_max_threads = std::max<Index>(1, std::min<Index>(pb_max_threads, static_cast<Index>( work / kMinTaskSize ) ));


   // compute the number of threads we are going to use

   Index threads = std::min<Index>(nbThreads(), pb_max_threads);


   // if multi-threading is explicitly disabled, not useful, or if we already are in a parallel session,

   // then abort multi-threading

   // FIXME omp_get_num_threads()>1 only works for openmp, what if the user does not use openmp?

   if((!Condition) || (threads==1) || (omp_get_num_threads()>1))

     return func(0,rows, 0,cols);


   Eigen::initParallel();

   func.initParallelSession(threads);


   if(transpose)

     std::swap(rows,cols);


   ei_declare_aligned_stack_constructed_variable(GemmParallelInfo<Index>,info,threads,0);


   #pragma omp parallel num_threads(threads)

   {

     Index i = omp_get_thread_num();

     // Note that the actual number of threads might be lower than the number of request ones.

     Index actual_threads = omp_get_num_threads();


     Index blockCols = (cols / actual_threads) & ~Index(0x3);

     Index blockRows = (rows / actual_threads);

     blockRows = (blockRows/Functor::Traits::mr)*Functor::Traits::mr;


     Index r0 = i*blockRows;

     Index actualBlockRows = (i+1==actual_threads) ? rows-r0 : blockRows;


     Index c0 = i*blockCols;

     Index actualBlockCols = (i+1==actual_threads) ? cols-c0 : blockCols;


     info[i].lhs_start = r0;

     info[i].lhs_length = actualBlockRows;


     if(transpose) func(c0, actualBlockCols, 0, rows, info);

     else          func(0, rows, c0, actualBlockCols, info);

   }

 #endif

 }


 } // end namespace internal


 } // end namespace Eigen


 #endif // EIGEN_PARALLELIZER_H

v
Array< int, Dynamic, 1 > v
Definition: Array_initializer_list_vector_cxx11.cpp:1

i
int i
Definition: BiCGSTAB_step_by_step.cpp:9

eigen_internal_assert
#define eigen_internal_assert(x)
Definition: Macros.h:908

EIGEN_UNUSED_VARIABLE
#define EIGEN_UNUSED_VARIABLE(var)
Definition: Macros.h:957

ei_declare_aligned_stack_constructed_variable
#define ei_declare_aligned_stack_constructed_variable(TYPE, NAME, SIZE, BUFFER)
Definition: Memory.h:847

size
const int size
Definition: Tutorial_AdvancedInitialization_ThreeWays.cpp:1

rows
int rows
Definition: Tutorial_commainit_02.cpp:1

cols
int cols
Definition: Tutorial_commainit_02.cpp:1

Eigen::internal::parallelize_gemm
void parallelize_gemm(const Functor &func, Index rows, Index cols, Index depth, bool transpose)
Definition: Parallelizer.h:93

Eigen::internal::manage_multi_threading
void manage_multi_threading(Action action, int *v)
Definition: Parallelizer.h:20

Eigen::internal::manage_caching_sizes
void manage_caching_sizes(Action action, std::ptrdiff_t *l1, std::ptrdiff_t *l2, std::ptrdiff_t *l3)
Definition: products/GeneralBlockPanelKernel.h:93

Eigen::internal::swap
void swap(scoped_array< T > &a, scoped_array< T > &b)
Definition: Memory.h:788

Eigen
: InteropHeaders
Definition: Core:139

Eigen::Action
Action
Definition: Constants.h:508

Eigen::GetAction
@ GetAction
Definition: Constants.h:508

Eigen::SetAction
@ SetAction
Definition: Constants.h:508

Eigen::Index
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:82

Eigen::initParallel
void initParallel()
Definition: Parallelizer.h:51

Eigen::nbThreads
int nbThreads()
Definition: Parallelizer.h:61

Eigen::setNbThreads
void setNbThreads(int v)
Definition: Parallelizer.h:70

internal
Definition: Eigen_Colamd.h:50