// Copyright 2019 Google LLC
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <string.h>
// memset
#undef HWY_TARGET_INCLUDE
#define HWY_TARGET_INCLUDE
"tests/slide_up_down_test.cc"
#include "hwy/foreach_target.h" // IWYU pragma: keep
#include "hwy/highway.h"
#include "hwy/tests/test_util-inl.h"
HWY_BEFORE_NAMESPACE();
namespace hwy {
namespace HWY_NAMESPACE {
class TestSlideUpLanes {
private:
template <
class D>
static HWY_INLINE
void DoTestSlideUpLanes(D d,
TFromD<D>* HWY_RESTRICT expected,
const size_t N,
const size_t slide_amt) {
for (size_t i =
0; i < N; i++) {
expected[i] = ConvertScalarTo<TFromD<D>>(
(i >= slide_amt) ? (i - slide_amt +
1) :
0);
}
const auto v = Iota(d,
1);
HWY_ASSERT_VEC_EQ(d, expected, SlideUpLanes(d, v, slide_amt));
if (slide_amt ==
1) {
HWY_ASSERT_VEC_EQ(d, expected, Slide1Up(d, v));
}
}
#if !HWY_HAVE_SCALABLE && HWY_TARGET < HWY_EMU128 && \
HWY_TARGET != HWY_SVE2_128 && HWY_TARGET != HWY_SVE_256
template <
class D>
static HWY_NOINLINE
void DoTestSlideUpLanesWithConstAmt_0_7(
D d, TFromD<D>* HWY_RESTRICT expected,
const size_t N) {
DoTestSlideUpLanes(d, expected, N,
0);
if (N <=
1)
return;
DoTestSlideUpLanes(d, expected, N,
1);
if (N <=
2)
return;
DoTestSlideUpLanes(d, expected, N,
2);
DoTestSlideUpLanes(d, expected, N,
3);
if (N <=
4)
return;
DoTestSlideUpLanes(d, expected, N,
4);
DoTestSlideUpLanes(d, expected, N,
5);
DoTestSlideUpLanes(d, expected, N,
6);
DoTestSlideUpLanes(d, expected, N,
7);
}
template <
class D, HWY_IF_LANES_LE_D(D,
8)>
static HWY_INLINE
void DoTestSlideUpLanesWithConstAmt_8_15(
D
/*d*/, TFromD<D>* HWY_RESTRICT /*expected*/, const size_t /*N*/) {}
template <
class D, HWY_IF_LANES_GT_D(D,
8)>
static HWY_NOINLINE
void DoTestSlideUpLanesWithConstAmt_8_15(
D d, TFromD<D>* HWY_RESTRICT expected,
const size_t N) {
if (N <=
8)
return;
DoTestSlideUpLanes(d, expected, N,
8);
DoTestSlideUpLanes(d, expected, N,
9);
DoTestSlideUpLanes(d, expected, N,
10);
DoTestSlideUpLanes(d, expected, N,
11);
DoTestSlideUpLanes(d, expected, N,
12);
DoTestSlideUpLanes(d, expected, N,
13);
DoTestSlideUpLanes(d, expected, N,
14);
DoTestSlideUpLanes(d, expected, N,
15);
}
#if HWY_TARGET <= HWY_AVX2 || HWY_TARGET == HWY_WASM_EMU256
template <
class D, HWY_IF_LANES_LE_D(D,
16)>
static HWY_INLINE
void DoTestSlideUpLanesWithConstAmt_16_31(
D
/*d*/, TFromD<D>* HWY_RESTRICT /*expected*/, const size_t /*N*/) {}
template <
class D, HWY_IF_LANES_GT_D(D,
16)>
static HWY_NOINLINE
void DoTestSlideUpLanesWithConstAmt_16_31(
D d, TFromD<D>* HWY_RESTRICT expected,
const size_t N) {
if (N <=
16)
return;
DoTestSlideUpLanes(d, expected, N,
16);
DoTestSlideUpLanes(d, expected, N,
17);
DoTestSlideUpLanes(d, expected, N,
18);
DoTestSlideUpLanes(d, expected, N,
19);
DoTestSlideUpLanes(d, expected, N,
20);
DoTestSlideUpLanes(d, expected, N,
21);
DoTestSlideUpLanes(d, expected, N,
22);
DoTestSlideUpLanes(d, expected, N,
23);
DoTestSlideUpLanes(d, expected, N,
24);
DoTestSlideUpLanes(d, expected, N,
25);
DoTestSlideUpLanes(d, expected, N,
26);
DoTestSlideUpLanes(d, expected, N,
27);
DoTestSlideUpLanes(d, expected, N,
28);
DoTestSlideUpLanes(d, expected, N,
29);
DoTestSlideUpLanes(d, expected, N,
30);
DoTestSlideUpLanes(d, expected, N,
31);
}
#if HWY_TARGET <= HWY_AVX3
template <
class D, HWY_IF_LANES_LE_D(D,
32)>
static HWY_INLINE
void DoTestSlideUpLanesWithConstAmt_32_63(
D
/*d*/, TFromD<D>* HWY_RESTRICT /*expected*/, const size_t /*N*/) {}
template <
class D, HWY_IF_LANES_GT_D(D,
32)>
static HWY_NOINLINE
void DoTestSlideUpLanesWithConstAmt_32_63(
D d, TFromD<D>* HWY_RESTRICT expected,
const size_t N) {
if (N <=
32)
return;
DoTestSlideUpLanes(d, expected, N,
32);
DoTestSlideUpLanes(d, expected, N,
33);
DoTestSlideUpLanes(d, expected, N,
34);
DoTestSlideUpLanes(d, expected, N,
35);
DoTestSlideUpLanes(d, expected, N,
36);
DoTestSlideUpLanes(d, expected, N,
37);
DoTestSlideUpLanes(d, expected, N,
38);
DoTestSlideUpLanes(d, expected, N,
39);
DoTestSlideUpLanes(d, expected, N,
40);
DoTestSlideUpLanes(d, expected, N,
41);
DoTestSlideUpLanes(d, expected, N,
42);
DoTestSlideUpLanes(d, expected, N,
43);
DoTestSlideUpLanes(d, expected, N,
44);
DoTestSlideUpLanes(d, expected, N,
45);
DoTestSlideUpLanes(d, expected, N,
46);
DoTestSlideUpLanes(d, expected, N,
47);
DoTestSlideUpLanes(d, expected, N,
48);
DoTestSlideUpLanes(d, expected, N,
49);
DoTestSlideUpLanes(d, expected, N,
50);
DoTestSlideUpLanes(d, expected, N,
51);
DoTestSlideUpLanes(d, expected, N,
52);
DoTestSlideUpLanes(d, expected, N,
53);
DoTestSlideUpLanes(d, expected, N,
54);
DoTestSlideUpLanes(d, expected, N,
55);
DoTestSlideUpLanes(d, expected, N,
56);
DoTestSlideUpLanes(d, expected, N,
57);
DoTestSlideUpLanes(d, expected, N,
58);
DoTestSlideUpLanes(d, expected, N,
59);
DoTestSlideUpLanes(d, expected, N,
60);
DoTestSlideUpLanes(d, expected, N,
61);
DoTestSlideUpLanes(d, expected, N,
62);
DoTestSlideUpLanes(d, expected, N,
63);
}
#endif // HWY_TARGET <= HWY_AVX3
#endif // HWY_TARGET <= HWY_AVX2 || HWY_TARGET == HWY_WASM_EMU256
#endif // !HWY_HAVE_SCALABLE && HWY_TARGET < HWY_EMU128 &&
// HWY_TARGET != HWY_SVE2_128 && HWY_TARGET != HWY_SVE_256
public:
template <
class T,
class D>
HWY_NOINLINE
void operator()(T
/*unused*/, D d) {
const size_t N = Lanes(d);
auto expected = AllocateAligned<T>(N);
HWY_ASSERT(expected);
for (size_t i =
0; i < N; i++) {
size_t slide_amt = i;
#if !HWY_COMPILER_MSVC
PreventElision(slide_amt);
#endif
DoTestSlideUpLanes(d, expected.get(), N, slide_amt);
}
#if !HWY_HAVE_SCALABLE && HWY_TARGET < HWY_EMU128 && \
HWY_TARGET != HWY_SVE2_128 && HWY_TARGET != HWY_SVE_256
DoTestSlideUpLanesWithConstAmt_0_7(d, expected.get(), N);
DoTestSlideUpLanesWithConstAmt_8_15(d, expected.get(), N);
#if HWY_TARGET <= HWY_AVX2 || HWY_TARGET == HWY_WASM_EMU256
DoTestSlideUpLanesWithConstAmt_16_31(d, expected.get(), N);
#if HWY_TARGET <= HWY_AVX3
DoTestSlideUpLanesWithConstAmt_32_63(d, expected.get(), N);
#endif // HWY_TARGET <= HWY_AVX3
#endif // HWY_TARGET <= HWY_AVX2 || HWY_TARGET == HWY_WASM_EMU256
#endif // !HWY_HAVE_SCALABLE && HWY_TARGET < HWY_EMU128 &&
// HWY_TARGET != HWY_SVE2_128 && HWY_TARGET != HWY_SVE_256
}
};
HWY_NOINLINE
void TestAllSlideUpLanes() {
ForAllTypes(ForPartialVectors<TestSlideUpLanes>());
}
#if !HWY_HAVE_SCALABLE && HWY_TARGET < HWY_EMU128 && \
HWY_TARGET != HWY_SVE2_128 && HWY_TARGET != HWY_SVE_256
// DoTestSlideDownLanes needs to be inlined on targets where
// DoTestSlideDownLanesWithConstAmt_0_7, DoTestSlideDownLanesWithConstAmt_8_15,
// DoTestSlideDownLanesWithConstAmt_16_31, and
// DoTestSlideDownLanesWithConstAmt_32_63 are called since the implementation
// of SlideDownLanes(d, v, N) for the SSE2/SSSE3/SSE4/AVX2/AVX3/NEON/WASM
// targets has an optimized path for the case where __builtin_constant_p(N) is
// true (or in other words, when N is known to be a constant) when compiled with
// GCC or Clang and optimizations are enabled.
// If DoTestSlideDownLanes is not inlined on the
// SSE2/SSSE3/SSE4/AVX2/AVX3/NEON/WASM targets,
// DoTestSlideDownLanesWithConstAmt_0_7, DoTestSlideDownLanesWithConstAmt_8_15,
// DoTestSlideDownLanesWithConstAmt_16_31, and
// DoTestSlideDownLanesWithConstAmt_32_63 will fail to throughly test the
// implementations of SlideDownLanes(d, v, N) in optimized builds compiled with
// GCC or Clang for the case where N is known to be a constant.
#define HWY_SLIDE_DOWN_TEST_INLINE HWY_INLINE
#else
// DoTestSlideDownLanes should not be inlined on RVV targets to work around RVV
// miscompilation.
#define HWY_SLIDE_DOWN_TEST_INLINE HWY_NOINLINE
#endif
class TestSlideDownLanes {
private:
// HWY_SLIDE_DOWN_TEST_INLINE is required here to work around RVV
// miscompilation.
template <
class D>
static HWY_SLIDE_DOWN_TEST_INLINE
void DoTestSlideDownLanes(
D d, TFromD<D>* HWY_RESTRICT expected,
const size_t N,
const size_t slide_amt) {
for (size_t i =
0; i < N; i++) {
const size_t src_idx = slide_amt + i;
expected[i] = ConvertScalarTo<TFromD<D>>((src_idx < N) ? src_idx :
0);
}
const Vec<D> v = Iota(d,
0);
HWY_ASSERT_VEC_EQ(d, expected, SlideDownLanes(d, v, slide_amt));
if (slide_amt ==
1) {
HWY_ASSERT_VEC_EQ(d, expected, Slide1Down(d, v));
}
}
#if !HWY_HAVE_SCALABLE && HWY_TARGET < HWY_EMU128 && \
HWY_TARGET != HWY_SVE2_128 && HWY_TARGET != HWY_SVE_256
template <
class D>
static HWY_NOINLINE
void DoTestSlideDownLanesWithConstAmt_0_7(
D d, TFromD<D>* HWY_RESTRICT expected,
const size_t N) {
DoTestSlideDownLanes(d, expected, N,
0);
if (N <=
1)
return;
DoTestSlideDownLanes(d, expected, N,
1);
if (N <=
2)
return;
DoTestSlideDownLanes(d, expected, N,
2);
DoTestSlideDownLanes(d, expected, N,
3);
if (N <=
4)
return;
DoTestSlideDownLanes(d, expected, N,
4);
DoTestSlideDownLanes(d, expected, N,
5);
DoTestSlideDownLanes(d, expected, N,
6);
DoTestSlideDownLanes(d, expected, N,
7);
}
template <
class D, HWY_IF_LANES_LE_D(D,
8)>
static HWY_INLINE
void DoTestSlideDownLanesWithConstAmt_8_15(
D
/*d*/, TFromD<D>* HWY_RESTRICT /*expected*/, const size_t /*N*/) {}
template <
class D, HWY_IF_LANES_GT_D(D,
8)>
static HWY_NOINLINE
void DoTestSlideDownLanesWithConstAmt_8_15(
D d, TFromD<D>* HWY_RESTRICT expected,
const size_t N) {
if (N <=
8)
return;
DoTestSlideDownLanes(d, expected, N,
8);
DoTestSlideDownLanes(d, expected, N,
9);
DoTestSlideDownLanes(d, expected, N,
10);
DoTestSlideDownLanes(d, expected, N,
11);
DoTestSlideDownLanes(d, expected, N,
12);
DoTestSlideDownLanes(d, expected, N,
13);
DoTestSlideDownLanes(d, expected, N,
14);
DoTestSlideDownLanes(d, expected, N,
15);
}
#if HWY_TARGET <= HWY_AVX2 || HWY_TARGET == HWY_WASM_EMU256
template <
class D, HWY_IF_LANES_LE_D(D,
16)>
static HWY_INLINE
void DoTestSlideDownLanesWithConstAmt_16_31(
D
/*d*/, TFromD<D>* HWY_RESTRICT /*expected*/, const size_t /*N*/) {}
template <
class D, HWY_IF_LANES_GT_D(D,
16)>
static HWY_NOINLINE
void DoTestSlideDownLanesWithConstAmt_16_31(
D d, TFromD<D>* HWY_RESTRICT expected,
const size_t N) {
if (N <=
16)
return;
DoTestSlideDownLanes(d, expected, N,
16);
DoTestSlideDownLanes(d, expected, N,
17);
DoTestSlideDownLanes(d, expected, N,
18);
DoTestSlideDownLanes(d, expected, N,
19);
DoTestSlideDownLanes(d, expected, N,
20);
DoTestSlideDownLanes(d, expected, N,
21);
DoTestSlideDownLanes(d, expected, N,
22);
DoTestSlideDownLanes(d, expected, N,
23);
DoTestSlideDownLanes(d, expected, N,
24);
DoTestSlideDownLanes(d, expected, N,
25);
DoTestSlideDownLanes(d, expected, N,
26);
DoTestSlideDownLanes(d, expected, N,
27);
DoTestSlideDownLanes(d, expected, N,
28);
DoTestSlideDownLanes(d, expected, N,
29);
DoTestSlideDownLanes(d, expected, N,
30);
DoTestSlideDownLanes(d, expected, N,
31);
}
#if HWY_TARGET <= HWY_AVX3
template <
class D, HWY_IF_LANES_LE_D(D,
32)>
static HWY_INLINE
void DoTestSlideDownLanesWithConstAmt_32_63(
D
/*d*/, TFromD<D>* HWY_RESTRICT /*expected*/, const size_t /*N*/) {}
template <
class D, HWY_IF_LANES_GT_D(D,
32)>
static HWY_NOINLINE
void DoTestSlideDownLanesWithConstAmt_32_63(
D d, TFromD<D>* HWY_RESTRICT expected,
const size_t N) {
if (N <=
32)
return;
DoTestSlideDownLanes(d, expected, N,
32);
DoTestSlideDownLanes(d, expected, N,
33);
DoTestSlideDownLanes(d, expected, N,
34);
DoTestSlideDownLanes(d, expected, N,
35);
DoTestSlideDownLanes(d, expected, N,
36);
DoTestSlideDownLanes(d, expected, N,
37);
DoTestSlideDownLanes(d, expected, N,
38);
DoTestSlideDownLanes(d, expected, N,
39);
DoTestSlideDownLanes(d, expected, N,
40);
DoTestSlideDownLanes(d, expected, N,
41);
DoTestSlideDownLanes(d, expected, N,
42);
DoTestSlideDownLanes(d, expected, N,
43);
DoTestSlideDownLanes(d, expected, N,
44);
DoTestSlideDownLanes(d, expected, N,
45);
DoTestSlideDownLanes(d, expected, N,
46);
DoTestSlideDownLanes(d, expected, N,
47);
DoTestSlideDownLanes(d, expected, N,
48);
DoTestSlideDownLanes(d, expected, N,
49);
DoTestSlideDownLanes(d, expected, N,
50);
DoTestSlideDownLanes(d, expected, N,
51);
DoTestSlideDownLanes(d, expected, N,
52);
DoTestSlideDownLanes(d, expected, N,
53);
DoTestSlideDownLanes(d, expected, N,
54);
DoTestSlideDownLanes(d, expected, N,
55);
DoTestSlideDownLanes(d, expected, N,
56);
DoTestSlideDownLanes(d, expected, N,
57);
DoTestSlideDownLanes(d, expected, N,
58);
DoTestSlideDownLanes(d, expected, N,
59);
DoTestSlideDownLanes(d, expected, N,
60);
DoTestSlideDownLanes(d, expected, N,
61);
DoTestSlideDownLanes(d, expected, N,
62);
DoTestSlideDownLanes(d, expected, N,
63);
}
#endif // HWY_TARGET <= HWY_AVX3
#endif // HWY_TARGET <= HWY_AVX2 || HWY_TARGET == HWY_WASM_EMU256
#endif // !HWY_HAVE_SCALABLE && HWY_TARGET < HWY_EMU128 &&
// HWY_TARGET != HWY_SVE2_128 && HWY_TARGET != HWY_SVE_256
public:
template <
class T,
class D>
HWY_NOINLINE
void operator()(T
/*unused*/, D d) {
const size_t N = Lanes(d);
auto expected = AllocateAligned<T>(N);
HWY_ASSERT(expected);
for (size_t i =
0; i < N; i++) {
size_t slide_amt = i;
#if !HWY_COMPILER_MSVC
PreventElision(slide_amt);
#endif
DoTestSlideDownLanes(d, expected.get(), N, slide_amt);
}
#if !HWY_HAVE_SCALABLE && HWY_TARGET < HWY_EMU128 && \
HWY_TARGET != HWY_SVE2_128 && HWY_TARGET != HWY_SVE_256
DoTestSlideDownLanesWithConstAmt_0_7(d, expected.get(), N);
DoTestSlideDownLanesWithConstAmt_8_15(d, expected.get(), N);
#if HWY_TARGET <= HWY_AVX2 || HWY_TARGET == HWY_WASM_EMU256
DoTestSlideDownLanesWithConstAmt_16_31(d, expected.get(), N);
#if HWY_TARGET <= HWY_AVX3
DoTestSlideDownLanesWithConstAmt_32_63(d, expected.get(), N);
#endif // HWY_TARGET <= HWY_AVX3
#endif // HWY_TARGET <= HWY_AVX2 || HWY_TARGET == HWY_WASM_EMU256
#endif // !HWY_HAVE_SCALABLE && HWY_TARGET < HWY_EMU128 &&
// HWY_TARGET != HWY_SVE2_128 && HWY_TARGET != HWY_SVE_256
}
};
#undef HWY_SLIDE_DOWN_TEST_INLINE
HWY_NOINLINE
void TestAllSlideDownLanes() {
ForAllTypes(ForPartialVectors<TestSlideDownLanes>());
}
struct TestSlide1 {
template <
class T,
class D>
HWY_NOINLINE
void operator()(T
/*unused*/, D d) {
const auto iota0 = Iota(d,
0);
const auto iota1 = Iota(d,
1);
const auto expected_slide_down_result =
IfThenElseZero(FirstN(d, Lanes(d) -
1), iota1);
HWY_ASSERT_VEC_EQ(d, iota0, Slide1Up(d, iota1));
HWY_ASSERT_VEC_EQ(d, expected_slide_down_result, Slide1Down(d, iota0));
}
};
HWY_NOINLINE
void TestAllSlide1() {
ForAllTypes(ForPartialVectors<TestSlide1>());
}
class TestSlideBlocks {
private:
template <
int kBlocks,
class D>
static HWY_INLINE
void DoTestSlideByKBlocks(D d) {
using T = TFromD<D>;
constexpr size_t kLanesPerBlock =
16 /
sizeof(T);
constexpr size_t kLanesToSlide =
static_cast<size_t>(kBlocks) * kLanesPerBlock;
const auto iota_0 = Iota(d,
0);
const auto iota_k = Iota(d, kLanesToSlide);
const auto first_k_lanes_mask = FirstN(d, kLanesToSlide);
const auto expected_slide_up_result =
IfThenZeroElse(first_k_lanes_mask, iota_0);
HWY_ASSERT_VEC_EQ(d, expected_slide_up_result,
SlideUpBlocks<kBlocks>(d, iota_k));
const RebindToUnsigned<decltype(d)> du;
using TU = TFromD<decltype(du)>;
const auto slide_down_result_mask = BitCast(
d, Reverse(du, IfThenZeroElse(RebindMask(du, first_k_lanes_mask),
Set(du, hwy::LimitsMax<TU>()))));
const auto expected_slide_down_result =
And(slide_down_result_mask, iota_k);
HWY_ASSERT_VEC_EQ(d, expected_slide_down_result,
SlideDownBlocks<kBlocks>(d, iota_0));
}
#if HWY_MAX_BYTES >=
32
template <
class D, HWY_IF_V_SIZE_LE_D(D,
16)>
static HWY_INLINE
void DoTestSlideBy1Block(D
/*d*/, size_t /*N*/) {}
template <
class D, HWY_IF_V_SIZE_GT_D(D,
16)>
static HWY_INLINE
void DoTestSlideBy1Block(D d, size_t N) {
if (N < (
32 /
sizeof(TFromD<D>)))
return;
DoTestSlideByKBlocks<
1>(d);
}
#if HWY_MAX_BYTES >=
64
template <
class D, HWY_IF_V_SIZE_LE_D(D,
32)>
static HWY_INLINE
void DoTestSlideBy2And3Blocks(D
/*d*/, size_t /*N*/) {}
template <
class D, HWY_IF_V_SIZE_GT_D(D,
32)>
static HWY_INLINE
void DoTestSlideBy2And3Blocks(D d, size_t N) {
if (N < (
64 /
sizeof(TFromD<D>)))
return;
DoTestSlideByKBlocks<
2>(d);
DoTestSlideByKBlocks<
3>(d);
}
#endif // HWY_MAX_BYTES >= 64
#endif // HWY_MAX_BYTES >= 32
public:
template <
class T,
class D>
HWY_NOINLINE
void operator()(T
/*unused*/, D d) {
DoTestSlideByKBlocks<
0>(d);
#if HWY_MAX_BYTES >=
32
const size_t N = Lanes(d);
DoTestSlideBy1Block(d, N);
#if HWY_MAX_BYTES >=
64
DoTestSlideBy2And3Blocks(d, N);
#endif // HWY_MAX_BYTES >= 64
#endif // HWY_MAX_BYTES >= 32
}
};
HWY_NOINLINE
void TestAllSlideBlocks() {
ForAllTypes(ForPartialVectors<TestSlideBlocks>());
}
// NOLINTNEXTLINE(google-readability-namespace-comments)
}
// namespace HWY_NAMESPACE
}
// namespace hwy
HWY_AFTER_NAMESPACE();
#if HWY_ONCE
namespace hwy {
HWY_BEFORE_TEST(HwySlideUpDownTest);
HWY_EXPORT_AND_TEST_P(HwySlideUpDownTest, TestAllSlideUpLanes);
HWY_EXPORT_AND_TEST_P(HwySlideUpDownTest, TestAllSlideDownLanes);
HWY_EXPORT_AND_TEST_P(HwySlideUpDownTest, TestAllSlide1);
HWY_EXPORT_AND_TEST_P(HwySlideUpDownTest, TestAllSlideBlocks);
}
// namespace hwy
#endif