// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#define EIGEN_USE_THREADS
#include "main.h"
#include <iostream>
#include <Eigen/CXX11/Tensor>
using Eigen::Tensor;
class TestAllocator :
public Allocator {
public :
~TestAllocator() EIGEN_OVERRIDE {}
EIGEN_DEVICE_FUNC
void * allocate(size_t num_bytes)
const EIGEN_OVERRIDE {
const_cast <TestAllocator*>(
this )->alloc_count_++;
return internal::aligned_malloc(num_bytes);
}
EIGEN_DEVICE_FUNC
void deallocate(
void * buffer)
const EIGEN_OVERRIDE {
const_cast <TestAllocator*>(
this )->dealloc_count_++;
internal::aligned_free(buffer);
}
int alloc_count()
const {
return alloc_count_; }
int dealloc_count()
const {
return dealloc_count_; }
private :
int alloc_count_ =
0 ;
int dealloc_count_ =
0 ;
};
void test_multithread_elementwise()
{
Tensor<
float ,
3 > in1(
200 ,
30 ,
70 );
Tensor<
float ,
3 > in2(
200 ,
30 ,
70 );
Tensor<
double ,
3 > out(
200 ,
30 ,
70 );
in1.setRandom();
in2.setRandom();
Eigen::ThreadPool tp(internal::random<
int >(
3 ,
11 ));
Eigen::ThreadPoolDevice thread_pool_device(&tp, internal::random<
int >(
3 ,
11 ));
out.device(thread_pool_device) = (in1 + in2 *
3 .
14 f).cast<
double >();
for (
int i =
0 ; i <
200 ; ++i) {
for (
int j =
0 ; j <
30 ; ++j) {
for (
int k =
0 ; k <
70 ; ++k) {
VERIFY_IS_APPROX(out(i, j, k),
static_cast <
double >(in1(i, j, k) + in2(i, j, k) *
3 .
14 f));
}
}
}
}
void test_async_multithread_elementwise()
{
Tensor<
float ,
3 > in1(
200 ,
30 ,
70 );
Tensor<
float ,
3 > in2(
200 ,
30 ,
70 );
Tensor<
double ,
3 > out(
200 ,
30 ,
70 );
in1.setRandom();
in2.setRandom();
Eigen::ThreadPool tp(internal::random<
int >(
3 ,
11 ));
Eigen::ThreadPoolDevice thread_pool_device(&tp, internal::random<
int >(
3 ,
11 ));
Eigen::Barrier b(
1 );
out.device(thread_pool_device, [&b]() { b.Notify(); }) = (in1 + in2 *
3 .
14 f).cast<
double >();
b.Wait();
for (
int i =
0 ; i <
200 ; ++i) {
for (
int j =
0 ; j <
30 ; ++j) {
for (
int k =
0 ; k <
70 ; ++k) {
VERIFY_IS_APPROX(out(i, j, k),
static_cast <
double >(in1(i, j, k) + in2(i, j, k) *
3 .
14 f));
}
}
}
}
void test_multithread_compound_assignment()
{
Tensor<
float ,
3 > in1(
2 ,
3 ,
7 );
Tensor<
float ,
3 > in2(
2 ,
3 ,
7 );
Tensor<
float ,
3 > out(
2 ,
3 ,
7 );
in1.setRandom();
in2.setRandom();
Eigen::ThreadPool tp(internal::random<
int >(
3 ,
11 ));
Eigen::ThreadPoolDevice thread_pool_device(&tp, internal::random<
int >(
3 ,
11 ));
out.device(thread_pool_device) = in1;
out.device(thread_pool_device) += in2 *
3 .
14 f;
for (
int i =
0 ; i <
2 ; ++i) {
for (
int j =
0 ; j <
3 ; ++j) {
for (
int k =
0 ; k <
7 ; ++k) {
VERIFY_IS_APPROX(out(i,j,k), in1(i,j,k) + in2(i,j,k) *
3 .
14 f);
}
}
}
}
template <
int DataLayout>
void test_multithread_contraction()
{
Tensor<
float ,
4 , DataLayout> t_left(
30 ,
50 ,
37 ,
31 );
Tensor<
float ,
5 , DataLayout> t_right(
37 ,
31 ,
70 ,
2 ,
10 );
Tensor<
float ,
5 , DataLayout> t_result(
30 ,
50 ,
70 ,
2 ,
10 );
t_left.setRandom();
t_right.setRandom();
// this contraction should be equivalent to a single matrix multiplication
typedef Tensor<
float ,
1 >::DimensionPair DimPair;
Eigen::array<DimPair,
2 > dims({{DimPair(
2 ,
0 ), DimPair(
3 ,
1 )}});
typedef Map<Matrix<
float , Dynamic, Dynamic, DataLayout>> MapXf;
MapXf m_left(t_left.data(),
1500 ,
1147 );
MapXf m_right(t_right.data(),
1147 ,
1400 );
Matrix<
float , Dynamic, Dynamic, DataLayout> m_result(
1500 ,
1400 );
Eigen::ThreadPool tp(
4 );
Eigen::ThreadPoolDevice thread_pool_device(&tp,
4 );
// compute results by separate methods
t_result.device(thread_pool_device) = t_left.contract(t_right, dims);
m_result = m_left * m_right;
for (ptrdiff_t i =
0 ; i < t_result.size(); i++) {
VERIFY(&t_result.data()[i] != &m_result.data()[i]);
if (fabsf(t_result(i) - m_result(i)) <
1 e-
4 f) {
continue ;
}
if (Eigen::internal::isApprox(t_result(i), m_result(i),
1 e-
4 f)) {
continue ;
}
std::cout <<
"mismatch detected at index " << i <<
": " << t_result(i)
<<
" vs " << m_result(i) << std::endl;
assert(
false );
}
}
template <
int DataLayout>
void test_contraction_corner_cases()
{
Tensor<
float ,
2 , DataLayout> t_left(
32 ,
500 );
Tensor<
float ,
2 , DataLayout> t_right(
32 ,
28 *
28 );
Tensor<
float ,
2 , DataLayout> t_result(
500 ,
28 *
28 );
t_left = (t_left.constant(-
0 .
5 f) + t_left.random()) *
2 .
0 f;
t_right = (t_right.constant(-
0 .
6 f) + t_right.random()) *
2 .
0 f;
t_result = t_result.constant(NAN);
// this contraction should be equivalent to a single matrix multiplication
typedef Tensor<
float ,
1 >::DimensionPair DimPair;
Eigen::array<DimPair,
1 > dims{{DimPair(
0 ,
0 )}};
typedef Map<Matrix<
float , Dynamic, Dynamic, DataLayout>> MapXf;
MapXf m_left(t_left.data(),
32 ,
500 );
MapXf m_right(t_right.data(),
32 ,
28 *
28 );
Matrix<
float , Dynamic, Dynamic, DataLayout> m_result(
500 ,
28 *
28 );
Eigen::ThreadPool tp(
12 );
Eigen::ThreadPoolDevice thread_pool_device(&tp,
12 );
// compute results by separate methods
t_result.device(thread_pool_device) = t_left.contract(t_right, dims);
m_result = m_left.transpose() * m_right;
for (ptrdiff_t i =
0 ; i < t_result.size(); i++) {
assert(!(numext::isnan)(t_result.data()[i]));
if (fabsf(t_result.data()[i] - m_result.data()[i]) >=
1 e-
4 f) {
std::cout <<
"mismatch detected at index " << i <<
" : " << t_result.data()[i] <<
" vs " << m_result.data()[i] << st
d::endl;
assert(false );
}
}
t_left.resize(32 , 1 );
t_left = (t_left.constant(-0 .5 f) + t_left.random()) * 2 .0 f;
t_result.resize (1 , 28 *28 );
t_result = t_result.constant(NAN);
t_result.device(thread_pool_device) = t_left.contract(t_right, dims);
new (&m_left) MapXf(t_left.data(), 32 , 1 );
m_result = m_left.transpose() * m_right;
for (ptrdiff_t i = 0 ; i < t_result.size(); i++) {
assert(!(numext::isnan)(t_result.data()[i]));
if (fabsf(t_result.data()[i] - m_result.data()[i]) >= 1 e-4 f) {
std::cout << "mismatch detected: " << t_result.data()[i] << " vs " << m_result.data()[i] << std::endl;
assert(false );
}
}
t_left.resize(32 , 500 );
t_right.resize(32 , 4 );
t_left = (t_left.constant(-0 .5 f) + t_left.random()) * 2 .0 f;
t_right = (t_right.constant(-0 .6 f) + t_right.random()) * 2 .0 f;
t_result.resize (500 , 4 );
t_result = t_result.constant(NAN);
t_result.device(thread_pool_device) = t_left.contract(t_right, dims);
new (&m_left) MapXf(t_left.data(), 32 , 500 );
new (&m_right) MapXf(t_right.data(), 32 , 4 );
m_result = m_left.transpose() * m_right;
for (ptrdiff_t i = 0 ; i < t_result.size(); i++) {
assert(!(numext::isnan)(t_result.data()[i]));
if (fabsf(t_result.data()[i] - m_result.data()[i]) >= 1 e-4 f) {
std::cout << "mismatch detected: " << t_result.data()[i] << " vs " << m_result.data()[i] << std::endl;
assert(false );
}
}
t_left.resize(32 , 1 );
t_right.resize(32 , 4 );
t_left = (t_left.constant(-0 .5 f) + t_left.random()) * 2 .0 f;
t_right = (t_right.constant(-0 .6 f) + t_right.random()) * 2 .0 f;
t_result.resize (1 , 4 );
t_result = t_result.constant(NAN);
t_result.device(thread_pool_device) = t_left.contract(t_right, dims);
new (&m_left) MapXf(t_left.data(), 32 , 1 );
new (&m_right) MapXf(t_right.data(), 32 , 4 );
m_result = m_left.transpose() * m_right;
for (ptrdiff_t i = 0 ; i < t_result.size(); i++) {
assert(!(numext::isnan)(t_result.data()[i]));
if (fabsf(t_result.data()[i] - m_result.data()[i]) >= 1 e-4 f) {
std::cout << "mismatch detected: " << t_result.data()[i] << " vs " << m_result.data()[i] << std::endl;
assert(false );
}
}
}
template <int DataLayout>
void test_multithread_contraction_agrees_with_singlethread() {
int contract_size = internal::random<int >(1 , 5000 );
Tensor<float , 3 , DataLayout> left(internal::random<int >(1 , 80 ),
contract_size,
internal::random<int >(1 , 100 ));
Tensor<float , 4 , DataLayout> right(internal::random<int >(1 , 25 ),
internal::random<int >(1 , 37 ),
contract_size,
internal::random<int >(1 , 51 ));
left.setRandom();
right.setRandom();
// add constants to shift values away from 0 for more precision
left += left.constant(1 .5 f);
right += right.constant(1 .5 f);
typedef Tensor<float , 1 >::DimensionPair DimPair;
Eigen::array<DimPair, 1 > dims({{DimPair(1 , 2 )}});
Eigen::ThreadPool tp(internal::random<int >(2 , 11 ));
Eigen::ThreadPoolDevice thread_pool_device(&tp, internal::random<int >(2 , 11 ));
Tensor<float , 5 , DataLayout> st_result;
st_result = left.contract(right, dims);
Tensor<float , 5 , DataLayout> tp_result(st_result.dimensions());
tp_result.device(thread_pool_device) = left.contract(right, dims);
VERIFY(dimensions_match(st_result.dimensions(), tp_result.dimensions()));
for (ptrdiff_t i = 0 ; i < st_result.size(); i++) {
// if both of the values are very small, then do nothing (because the test will fail
// due to numerical precision issues when values are small)
if (numext::abs(st_result.data()[i] - tp_result.data()[i]) >= 1 e-4 f) {
VERIFY_IS_APPROX(st_result.data()[i], tp_result.data()[i]);
}
}
}
// Apply Sqrt to all output elements.
struct SqrtOutputKernel {
template <typename Index, typename Scalar>
EIGEN_ALWAYS_INLINE void operator ()(
const internal::blas_data_mapper<Scalar, Index, ColMajor>& output_mapper,
const TensorContractionParams&, Index, Index, Index num_rows,
Index num_cols) const {
for (int i = 0 ; i < num_rows; ++i) {
for (int j = 0 ; j < num_cols; ++j) {
output_mapper(i, j) = std::sqrt(output_mapper(i, j));
}
}
}
};
template <int DataLayout>
static void test_multithread_contraction_with_output_kernel() {
typedef Tensor<float , 1 >::DimensionPair DimPair;
const int num_threads = internal::random<int >(2 , 11 );
ThreadPool threads(num_threads);
Eigen::ThreadPoolDevice device(&threads, num_threads);
Tensor<float , 4 , DataLayout> t_left(30 , 50 , 8 , 31 );
Tensor<float , 5 , DataLayout> t_right(8 , 31 , 7 , 20 , 10 );
Tensor<float , 5 , DataLayout> t_result(30 , 50 , 7 , 20 , 10 );
t_left.setRandom();
t_right.setRandom();
// Put trash in mat4 to verify contraction clears output memory.
t_result.setRandom();
// Add a little offset so that the results won't be close to zero.
t_left += t_left.constant(1 .0 f);
t_right += t_right.constant(1 .0 f);
typedef Map<Eigen::Matrix<float , Dynamic, Dynamic, DataLayout>> MapXf;
MapXf m_left(t_left.data(), 1500 , 248 );
MapXf m_right(t_right.data(), 248 , 1400 );
Eigen::Matrix<float , Dynamic, Dynamic, DataLayout> m_result(1500 , 1400 );
// this contraction should be equivalent to a single matrix multiplication
Eigen::array<DimPair, 2 > dims({{DimPair(2 , 0 ), DimPair(3 , 1 )}});
// compute results by separate methods
t_result.device(device) = t_left.contract(t_right, dims, SqrtOutputKernel());
m_result = m_left * m_right;
for (Index i = 0 ; i < t_result.dimensions().TotalSize(); i++) {
VERIFY(&t_result.data()[i] != &m_result.data()[i]);
VERIFY_IS_APPROX(t_result.data()[i], std::sqrt(m_result.data()[i]));
}
}
template <int DataLayout>
void test_async_multithread_contraction_agrees_with_singlethread()
{
int contract_size = internal::random<int >(100 , 500 );
Tensor<float , 3 , DataLayout> left(internal::random<int >(10 , 40 ),
contract_size,
internal::random<int >(10 , 40 ));
Tensor<float , 4 , DataLayout> right(
internal::random<int >(1 , 20 ), internal::random<int >(1 , 20 ), contract_size,
internal::random<int >(1 , 20 ));
left.setRandom();
right.setRandom();
// add constants to shift values away from 0 for more precision
left += left.constant(1 .5 f);
right += right.constant(1 .5 f);
typedef Tensor<float , 1 >::DimensionPair DimPair;
Eigen::array<DimPair, 1 > dims({{DimPair(1 , 2 )}});
Eigen::ThreadPool tp(internal::random<int >(2 , 11 ));
Eigen::ThreadPoolDevice thread_pool_device(&tp, internal::random<int >(8 , 32 ));
Tensor<float , 5 , DataLayout> st_result;
st_result = left.contract(right, dims);
Tensor<float , 5 , DataLayout> tp_result(st_result.dimensions());
Eigen::Barrier barrier(1 );
tp_result.device(thread_pool_device, [&barrier]() { barrier.Notify(); }) =
left.contract(right, dims);
barrier.Wait();
VERIFY(dimensions_match(st_result.dimensions(), tp_result.dimensions()));
for (ptrdiff_t i = 0 ; i < st_result.size(); i++) {
// if both of the values are very small, then do nothing (because the test
// will fail due to numerical precision issues when values are small)
if (numext::abs(st_result.data()[i] - tp_result.data()[i]) >= 1 e-4 f) {
VERIFY_IS_APPROX(st_result.data()[i], tp_result.data()[i]);
}
}
}
// We are triggering 'evalShardedByInnerDim' optimization.
template <int DataLayout>
static void test_sharded_by_inner_dim_contraction()
{
typedef Tensor<float , 1 >::DimensionPair DimPair;
const int num_threads = internal::random<int >(4 , 16 );
ThreadPool threads(num_threads);
Eigen::ThreadPoolDevice device(&threads, num_threads);
Tensor<float , 2 , DataLayout> t_left(2 , 10000 );
Tensor<float , 2 , DataLayout> t_right(10000 , 10 );
Tensor<float , 2 , DataLayout> t_result(2 , 10 );
t_left.setRandom();
t_right.setRandom();
// Put trash in t_result to verify contraction clears output memory.
t_result.setRandom();
// Add a little offset so that the results won't be close to zero.
t_left += t_left.constant(1 .0 f);
t_right += t_right.constant(1 .0 f);
typedef Map<Eigen::Matrix<float , Dynamic, Dynamic, DataLayout>> MapXf;
MapXf m_left(t_left.data(), 2 , 10000 );
MapXf m_right(t_right.data(), 10000 , 10 );
Eigen::Matrix<float , Dynamic, Dynamic, DataLayout> m_result(2 , 10 );
// this contraction should be equivalent to a single matrix multiplication
Eigen::array<DimPair, 1 > dims({{DimPair(1 , 0 )}});
// compute results by separate methods
t_result.device(device) = t_left.contract(t_right, dims);
m_result = m_left * m_right;
for (Index i = 0 ; i < t_result.dimensions().TotalSize(); i++) {
VERIFY_IS_APPROX(t_result.data()[i], m_result.data()[i]);
}
}
// We are triggering 'evalShardedByInnerDim' optimization with output kernel.
template <int DataLayout>
static void test_sharded_by_inner_dim_contraction_with_output_kernel()
{
typedef Tensor<float , 1 >::DimensionPair DimPair;
const int num_threads = internal::random<int >(4 , 16 );
ThreadPool threads(num_threads);
Eigen::ThreadPoolDevice device(&threads, num_threads);
Tensor<float , 2 , DataLayout> t_left(2 , 10000 );
Tensor<float , 2 , DataLayout> t_right(10000 , 10 );
Tensor<float , 2 , DataLayout> t_result(2 , 10 );
t_left.setRandom();
t_right.setRandom();
// Put trash in t_result to verify contraction clears output memory.
t_result.setRandom();
// Add a little offset so that the results won't be close to zero.
t_left += t_left.constant(1 .0 f);
t_right += t_right.constant(1 .0 f);
typedef Map<Eigen::Matrix<float , Dynamic, Dynamic, DataLayout>> MapXf;
MapXf m_left(t_left.data(), 2 , 10000 );
MapXf m_right(t_right.data(), 10000 , 10 );
Eigen::Matrix<float , Dynamic, Dynamic, DataLayout> m_result(2 , 10 );
// this contraction should be equivalent to a single matrix multiplication
Eigen::array<DimPair, 1 > dims({{DimPair(1 , 0 )}});
// compute results by separate methods
t_result.device(device) = t_left.contract(t_right, dims, SqrtOutputKernel());
m_result = m_left * m_right;
for (Index i = 0 ; i < t_result.dimensions().TotalSize(); i++) {
VERIFY_IS_APPROX(t_result.data()[i], std::sqrt(m_result.data()[i]));
}
}
// We are triggering 'evalShardedByInnerDim' optimization.
template <int DataLayout>
static void test_async_sharded_by_inner_dim_contraction()
{
typedef Tensor<float , 1 >::DimensionPair DimPair;
const int num_threads = internal::random<int >(4 , 16 );
ThreadPool threads(num_threads);
Eigen::ThreadPoolDevice device(&threads, num_threads);
Tensor<float , 2 , DataLayout> t_left(2 , 10000 );
Tensor<float , 2 , DataLayout> t_right(10000 , 10 );
Tensor<float , 2 , DataLayout> t_result(2 , 10 );
t_left.setRandom();
t_right.setRandom();
// Put trash in t_result to verify contraction clears output memory.
t_result.setRandom();
// Add a little offset so that the results won't be close to zero.
t_left += t_left.constant(1 .0 f);
t_right += t_right.constant(1 .0 f);
typedef Map<Eigen::Matrix<float , Dynamic, Dynamic, DataLayout>> MapXf;
MapXf m_left(t_left.data(), 2 , 10000 );
MapXf m_right(t_right.data(), 10000 , 10 );
Eigen::Matrix<float , Dynamic, Dynamic, DataLayout> m_result(2 , 10 );
// this contraction should be equivalent to a single matrix multiplication
Eigen::array<DimPair, 1 > dims({{DimPair(1 , 0 )}});
// compute results by separate methods
Eigen::Barrier barrier(1 );
t_result.device(device, [&barrier]() { barrier.Notify(); }) =
t_left.contract(t_right, dims);
barrier.Wait();
m_result = m_left * m_right;
for (Index i = 0 ; i < t_result.dimensions().TotalSize(); i++) {
VERIFY_IS_APPROX(t_result.data()[i], m_result.data()[i]);
}
}
// We are triggering 'evalShardedByInnerDim' optimization with output kernel.
template <int DataLayout>
static void test_async_sharded_by_inner_dim_contraction_with_output_kernel()
{
typedef Tensor<float , 1 >::DimensionPair DimPair;
const int num_threads = internal::random<int >(4 , 16 );
ThreadPool threads(num_threads);
Eigen::ThreadPoolDevice device(&threads, num_threads);
Tensor<float , 2 , DataLayout> t_left(2 , 10000 );
Tensor<float , 2 , DataLayout> t_right(10000 , 10 );
Tensor<float , 2 , DataLayout> t_result(2 , 10 );
t_left.setRandom();
t_right.setRandom();
// Put trash in t_result to verify contraction clears output memory.
t_result.setRandom();
// Add a little offset so that the results won't be close to zero.
t_left += t_left.constant(1 .0 f);
t_right += t_right.constant(1 .0 f);
typedef Map<Eigen::Matrix<float , Dynamic, Dynamic, DataLayout>> MapXf;
MapXf m_left(t_left.data(), 2 , 10000 );
MapXf m_right(t_right.data(), 10000 , 10 );
Eigen::Matrix<float , Dynamic, Dynamic, DataLayout> m_result(2 , 10 );
// this contraction should be equivalent to a single matrix multiplication
Eigen::array<DimPair, 1 > dims({{DimPair(1 , 0 )}});
// compute results by separate methods
Eigen::Barrier barrier(1 );
t_result.device(device, [&barrier]() { barrier.Notify(); }) =
t_left.contract(t_right, dims, SqrtOutputKernel());
barrier.Wait();
m_result = m_left * m_right;
for (Index i = 0 ; i < t_result.dimensions().TotalSize(); i++) {
VERIFY_IS_APPROX(t_result.data()[i], std::sqrt(m_result.data()[i]));
}
}
template <int DataLayout>
void test_full_contraction() {
int contract_size1 = internal::random<int >(1 , 500 );
int contract_size2 = internal::random<int >(1 , 500 );
Tensor<float , 2 , DataLayout> left(contract_size1,
contract_size2);
Tensor<float , 2 , DataLayout> right(contract_size1,
contract_size2);
left.setRandom();
right.setRandom();
// add constants to shift values away from 0 for more precision
left += left.constant(1 .5 f);
right += right.constant(1 .5 f);
typedef Tensor<float , 2 >::DimensionPair DimPair;
Eigen::array<DimPair, 2 > dims({{DimPair(0 , 0 ), DimPair(1 , 1 )}});
Eigen::ThreadPool tp(internal::random<int >(2 , 11 ));
Eigen::ThreadPoolDevice thread_pool_device(&tp, internal::random<int >(2 , 11 ));
Tensor<float , 0 , DataLayout> st_result;
st_result = left.contract(right, dims);
Tensor<float , 0 , DataLayout> tp_result;
tp_result.device(thread_pool_device) = left.contract(right, dims);
VERIFY(dimensions_match(st_result.dimensions(), tp_result.dimensions()));
// if both of the values are very small, then do nothing (because the test will fail
// due to numerical precision issues when values are small)
if (numext::abs(st_result() - tp_result()) >= 1 e-4 f) {
VERIFY_IS_APPROX(st_result(), tp_result());
}
}
template <int DataLayout>
void test_multithreaded_reductions() {
const int num_threads = internal::random<int >(3 , 11 );
ThreadPool thread_pool(num_threads);
Eigen::ThreadPoolDevice thread_pool_device(&thread_pool, num_threads);
const int num_rows = internal::random<int >(13 , 732 );
const int num_cols = internal::random<int >(13 , 732 );
Tensor<float , 2 , DataLayout> t1(num_rows, num_cols);
t1.setRandom();
Tensor<float , 0 , DataLayout> full_redux;
full_redux = t1.sum();
Tensor<float , 0 , DataLayout> full_redux_tp;
full_redux_tp.device(thread_pool_device) = t1.sum();
// Check that the single threaded and the multi threaded reductions return
// the same result.
VERIFY_IS_APPROX(full_redux(), full_redux_tp());
}
void test_memcpy() {
for (int i = 0 ; i < 5 ; ++i) {
const int num_threads = internal::random<int >(3 , 11 );
Eigen::ThreadPool tp(num_threads);
Eigen::ThreadPoolDevice thread_pool_device(&tp, num_threads);
const int size = internal::random<int >(13 , 7632 );
Tensor<float , 1 > t1(size);
t1.setRandom();
std::vector<float > result(size);
thread_pool_device.memcpy(&result[0 ], t1.data(), size*sizeof (float ));
for (int j = 0 ; j < size; j++) {
VERIFY_IS_EQUAL(t1(j), result[j]);
}
}
}
void test_multithread_random()
{
Eigen::ThreadPool tp(2 );
Eigen::ThreadPoolDevice device(&tp, 2 );
Tensor<float , 1 > t(1 << 20 );
t.device(device) = t.random<Eigen::internal::NormalRandomGenerator<float >>();
}
template <int DataLayout>
void test_multithread_shuffle(Allocator* allocator)
{
Tensor<float , 4 , DataLayout> tensor(17 ,5 ,7 ,11 );
tensor.setRandom();
const int num_threads = internal::random<int >(2 , 11 );
ThreadPool threads(num_threads);
Eigen::ThreadPoolDevice device(&threads, num_threads, allocator);
Tensor<float , 4 , DataLayout> shuffle(7 ,5 ,11 ,17 );
array<ptrdiff_t, 4 > shuffles = {{2 ,1 ,3 ,0 }};
shuffle.device(device) = tensor.shuffle(shuffles);
for (int i = 0 ; i < 17 ; ++i) {
for (int j = 0 ; j < 5 ; ++j) {
for (int k = 0 ; k < 7 ; ++k) {
for (int l = 0 ; l < 11 ; ++l) {
VERIFY_IS_EQUAL(tensor(i,j,k,l), shuffle(k,j,l,i));
}
}
}
}
}
void test_threadpool_allocate(TestAllocator* allocator)
{
const int num_threads = internal::random<int >(2 , 11 );
const int num_allocs = internal::random<int >(2 , 11 );
ThreadPool threads(num_threads);
Eigen::ThreadPoolDevice device(&threads, num_threads, allocator);
for (int a = 0 ; a < num_allocs; ++a) {
void * ptr = device.allocate(512 );
device.deallocate(ptr);
}
VERIFY(allocator != NULL);
VERIFY_IS_EQUAL(allocator->alloc_count(), num_allocs);
VERIFY_IS_EQUAL(allocator->dealloc_count(), num_allocs);
}
EIGEN_DECLARE_TEST(cxx11_tensor_thread_pool)
{
CALL_SUBTEST_1(test_multithread_elementwise());
CALL_SUBTEST_1(test_async_multithread_elementwise());
CALL_SUBTEST_1(test_multithread_compound_assignment());
CALL_SUBTEST_2(test_multithread_contraction<ColMajor>());
CALL_SUBTEST_2(test_multithread_contraction<RowMajor>());
CALL_SUBTEST_3(test_multithread_contraction_agrees_with_singlethread<ColMajor>());
CALL_SUBTEST_3(test_multithread_contraction_agrees_with_singlethread<RowMajor>());
CALL_SUBTEST_3(test_multithread_contraction_with_output_kernel<ColMajor>());
CALL_SUBTEST_3(test_multithread_contraction_with_output_kernel<RowMajor>());
CALL_SUBTEST_4(test_async_multithread_contraction_agrees_with_singlethread<ColMajor>());
CALL_SUBTEST_4(test_async_multithread_contraction_agrees_with_singlethread<RowMajor>());
// Test EvalShardedByInnerDimContext parallelization strategy.
CALL_SUBTEST_5(test_sharded_by_inner_dim_contraction<ColMajor>());
CALL_SUBTEST_5(test_sharded_by_inner_dim_contraction<RowMajor>());
CALL_SUBTEST_5(test_sharded_by_inner_dim_contraction_with_output_kernel<ColMajor>());
CALL_SUBTEST_5(test_sharded_by_inner_dim_contraction_with_output_kernel<RowMajor>());
CALL_SUBTEST_6(test_async_sharded_by_inner_dim_contraction<ColMajor>());
CALL_SUBTEST_6(test_async_sharded_by_inner_dim_contraction<RowMajor>());
CALL_SUBTEST_6(test_async_sharded_by_inner_dim_contraction_with_output_kernel<ColMajor>());
CALL_SUBTEST_6(test_async_sharded_by_inner_dim_contraction_with_output_kernel<RowMajor>());
// Exercise various cases that have been problematic in the past.
CALL_SUBTEST_7(test_contraction_corner_cases<ColMajor>());
CALL_SUBTEST_7(test_contraction_corner_cases<RowMajor>());
CALL_SUBTEST_8(test_full_contraction<ColMajor>());
CALL_SUBTEST_8(test_full_contraction<RowMajor>());
CALL_SUBTEST_9(test_multithreaded_reductions<ColMajor>());
CALL_SUBTEST_9(test_multithreaded_reductions<RowMajor>());
CALL_SUBTEST_10(test_memcpy());
CALL_SUBTEST_10(test_multithread_random());
TestAllocator test_allocator;
CALL_SUBTEST_11(test_multithread_shuffle<ColMajor>(NULL));
CALL_SUBTEST_11(test_multithread_shuffle<RowMajor>(&test_allocator));
CALL_SUBTEST_11(test_threadpool_allocate(&test_allocator));
// Force CMake to split this test.
// EIGEN_SUFFIXES;1;2;3;4;5;6;7;8;9;10;11
}
Messung V0.5 in Prozent C=91 H=93 G=91
¤ Dauer der Verarbeitung: 0.27 Sekunden
(vorverarbeitet am 2026-06-06)
¤
*© Formatika GbR, Deutschland