// Copyright 2019 Google LLC
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <stddef.h>
#undef HWY_TARGET_INCLUDE
#define HWY_TARGET_INCLUDE
"tests/table_test.cc"
#include "hwy/foreach_target.h" // IWYU pragma: keep
#include "hwy/highway.h"
#include "hwy/tests/test_util-inl.h"
HWY_BEFORE_NAMESPACE();
namespace hwy {
namespace HWY_NAMESPACE {
struct TestTableLookupLanes {
template <
class T,
class D>
HWY_NOINLINE
void operator()(T
/*unused*/, D d) {
const RebindToSigned<D> di;
using TI = TFromD<decltype(di)>;
#if HWY_TARGET != HWY_SCALAR
const size_t N = Lanes(d);
auto idx = AllocateAligned<TI>(N);
auto expected = AllocateAligned<T>(N);
HWY_ASSERT(idx && expected);
ZeroBytes(idx.get(), N *
sizeof(TI));
const auto v = Iota(d,
1);
if (N <=
8) {
// Test all permutations
for (size_t i0 =
0; i0 < N; ++i0) {
idx[
0] =
static_cast<TI>(i0);
for (size_t i1 =
0; i1 < N; ++i1) {
if (N >=
2) idx[
1] =
static_cast<TI>(i1);
for (size_t i2 =
0; i2 < N; ++i2) {
if (N >=
4) idx[
2] =
static_cast<TI>(i2);
for (size_t i3 =
0; i3 < N; ++i3) {
if (N >=
4) idx[
3] =
static_cast<TI>(i3);
for (size_t i =
0; i < N; ++i) {
expected[i] = ConvertScalarTo<T>(idx[i] +
1);
// == v[idx[i]]
}
const auto opaque1 = IndicesFromVec(d, Load(di, idx.get()));
const auto actual1 = TableLookupLanes(v, opaque1);
HWY_ASSERT_VEC_EQ(d, expected.get(), actual1);
const auto opaque2 = SetTableIndices(d, idx.get());
const auto actual2 = TableLookupLanes(v, opaque2);
HWY_ASSERT_VEC_EQ(d, expected.get(), actual2);
}
}
}
}
}
else {
// Too many permutations to test exhaustively; choose one with repeated
// and cross-block indices and ensure indices do not exceed #lanes.
// For larger vectors, upper lanes will be zero.
HWY_ALIGN TI idx_source[
16] = {
1,
3,
2,
2,
8,
1,
7,
6,
15,
14,
14,
15,
4,
9,
8,
5};
for (size_t i =
0; i < N; ++i) {
idx[i] = (i <
16) ? idx_source[i] :
0;
// Avoid undefined results / asan error for scalar by capping indices.
if (idx[i] >=
static_cast<TI>(N)) {
idx[i] =
static_cast<TI>(N -
1);
}
expected[i] = ConvertScalarTo<T>(idx[i] +
1);
// == v[idx[i]]
}
const auto opaque1 = IndicesFromVec(d, Load(di, idx.get()));
const auto actual1 = TableLookupLanes(v, opaque1);
HWY_ASSERT_VEC_EQ(d, expected.get(), actual1);
const auto opaque2 = SetTableIndices(d, idx.get());
const auto actual2 = TableLookupLanes(v, opaque2);
HWY_ASSERT_VEC_EQ(d, expected.get(), actual2);
}
#else
const TI index =
0;
const auto v = Set(d,
1);
const auto opaque1 = SetTableIndices(d, &index);
HWY_ASSERT_VEC_EQ(d, v, TableLookupLanes(v, opaque1));
const auto opaque2 = IndicesFromVec(d, Zero(di));
HWY_ASSERT_VEC_EQ(d, v, TableLookupLanes(v, opaque2));
#endif
}
};
HWY_NOINLINE
void TestAllTableLookupLanes() {
ForAllTypes(ForPartialVectors<TestTableLookupLanes>());
}
struct TestTwoTablesLookupLanes {
template <
class T,
class D>
HWY_NOINLINE
void operator()(T
/*unused*/, D d) {
const RebindToUnsigned<D> du;
using TU = TFromD<decltype(du)>;
const size_t N = Lanes(d);
const size_t twiceN = N *
2;
auto idx = AllocateAligned<TU>(twiceN);
auto expected = AllocateAligned<T>(twiceN);
HWY_ASSERT(idx && expected);
ZeroBytes(idx.get(), twiceN *
sizeof(TU));
const auto a = Iota(d,
1);
const auto b = Add(a, Set(d, ConvertScalarTo<T>(N)));
if (twiceN <=
8) {
// Test all permutations
for (size_t i0 =
0; i0 < twiceN; ++i0) {
idx[
0] =
static_cast<TU>(i0);
for (size_t i1 =
0; i1 < twiceN; ++i1) {
if (twiceN >=
2) idx[
1] =
static_cast<TU>(i1);
for (size_t i2 =
0; i2 < twiceN; ++i2) {
if (twiceN >=
4) idx[
2] =
static_cast<TU>(i2);
for (size_t i3 =
0; i3 < twiceN; ++i3) {
if (twiceN >=
4) idx[
3] =
static_cast<TU>(i3);
for (size_t i =
0; i < twiceN; ++i) {
expected[i] = ConvertScalarTo<T>(idx[i] +
1);
// == v[idx[i]]
}
const auto opaque1_a = IndicesFromVec(d, Load(du, idx.get()));
const auto opaque1_b = IndicesFromVec(d, Load(du, idx.get() + N));
const auto actual1_a = TwoTablesLookupLanes(d, a, b, opaque1_a);
const auto actual1_b = TwoTablesLookupLanes(d, a, b, opaque1_b);
HWY_ASSERT_VEC_EQ(d, expected.get(), actual1_a);
HWY_ASSERT_VEC_EQ(d, expected.get() + N, actual1_b);
const auto opaque2_a = SetTableIndices(d, idx.get());
const auto opaque2_b = SetTableIndices(d, idx.get() + N);
const auto actual2_a = TwoTablesLookupLanes(d, a, b, opaque2_a);
const auto actual2_b = TwoTablesLookupLanes(d, a, b, opaque2_b);
HWY_ASSERT_VEC_EQ(d, expected.get(), actual2_a);
HWY_ASSERT_VEC_EQ(d, expected.get() + N, actual2_b);
}
}
}
}
}
else {
constexpr size_t kLanesPerBlock =
16 /
sizeof(T);
constexpr size_t kMaxBlockIdx =
static_cast<size_t>(LimitsMax<TU>()) >>
1;
static_assert(kMaxBlockIdx >
0,
"kMaxBlockIdx > 0 must be true");
const size_t num_of_blocks_per_vect = HWY_MAX(N / kLanesPerBlock,
1);
const size_t num_of_blocks_to_check =
HWY_MIN(num_of_blocks_per_vect *
2, kMaxBlockIdx);
for (size_t i =
0; i < num_of_blocks_to_check; i++) {
// Too many permutations to test exhaustively; choose one with repeated
// and cross-block indices and ensure indices do not exceed #lanes.
// For larger vectors, upper lanes will be zero.
HWY_ALIGN TU idx_source[
16] = {
1,
3,
2,
2,
8,
1,
7,
6,
15,
14,
14,
15,
4,
9,
8,
5};
for (size_t j =
0; j < twiceN; ++j) {
idx[j] =
static_cast<TU>((i * kLanesPerBlock + idx_source[j &
15] +
(j &
static_cast<size_t>(-
16))) &
(twiceN -
1));
expected[j] = ConvertScalarTo<T>(idx[j] +
1);
// == v[idx[j]]
}
const auto opaque1_a = IndicesFromVec(d, Load(du, idx.get()));
const auto opaque1_b = IndicesFromVec(d, Load(du, idx.get() + N));
const auto actual1_a = TwoTablesLookupLanes(d, a, b, opaque1_a);
const auto actual1_b = TwoTablesLookupLanes(d, a, b, opaque1_b);
HWY_ASSERT_VEC_EQ(d, expected.get(), actual1_a);
HWY_ASSERT_VEC_EQ(d, expected.get() + N, actual1_b);
const auto opaque2_a = SetTableIndices(d, idx.get());
const auto opaque2_b = SetTableIndices(d, idx.get() + N);
const auto actual2_a = TwoTablesLookupLanes(d, a, b, opaque2_a);
const auto actual2_b = TwoTablesLookupLanes(d, a, b, opaque2_b);
HWY_ASSERT_VEC_EQ(d, expected.get(), actual2_a);
HWY_ASSERT_VEC_EQ(d, expected.get() + N, actual2_b);
}
}
}
};
HWY_NOINLINE
void TestAllTwoTablesLookupLanes() {
ForAllTypes(ForPartialVectors<TestTwoTablesLookupLanes>());
}
// NOLINTNEXTLINE(google-readability-namespace-comments)
}
// namespace HWY_NAMESPACE
}
// namespace hwy
HWY_AFTER_NAMESPACE();
#if HWY_ONCE
namespace hwy {
HWY_BEFORE_TEST(HwyTableTest);
HWY_EXPORT_AND_TEST_P(HwyTableTest, TestAllTableLookupLanes);
HWY_EXPORT_AND_TEST_P(HwyTableTest, TestAllTwoTablesLookupLanes);
}
// namespace hwy
#endif