// Copyright 2023 Google LLC
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#undef HWY_TARGET_INCLUDE
#define HWY_TARGET_INCLUDE
"tests/foreach_vec_test.cc"
#include "hwy/foreach_target.h" // IWYU pragma: keep
#include "hwy/highway.h"
#include "hwy/tests/test_util-inl.h"
HWY_BEFORE_NAMESPACE();
namespace hwy {
namespace HWY_NAMESPACE {
struct ForeachVectorTestPerLaneSizeState {
size_t num_of_lanes_mask;
#if HWY_HAVE_SCALABLE
int pow2_mask;
#endif
};
struct ForeachVectorTestState {
ForeachVectorTestPerLaneSizeState per_lane_size_states[
16];
int lane_sizes_mask;
};
template <
class D>
static HWY_INLINE
void UpdateForeachVectorTestState(
ForeachVectorTestState &state, D d) {
using T = TFromD<D>;
static_assert(
sizeof(T) >=
1 &&
sizeof(T) <=
8,
"sizeof(T) must be between 1 and 8");
state.lane_sizes_mask |= (
1 <<
sizeof(T));
ForeachVectorTestPerLaneSizeState *per_lane_size_state =
&state.per_lane_size_states[
sizeof(T)];
const size_t lanes = Lanes(d);
HWY_ASSERT(lanes >
0 && (lanes & (lanes -
1)) ==
0);
per_lane_size_state->num_of_lanes_mask |= lanes;
#if HWY_HAVE_SCALABLE
constexpr
int kPow2 = D().Pow2();
#if HWY_TARGET == HWY_RVV
static_assert(kPow2 >= detail::MinPow2<T>(),
"kPow2 >= detail::MinPow2<T>() must be true");
#endif
static_assert(kPow2 <= detail::MaxPow2(),
"kPow2 <= detail::MaxPow2() must be true");
if (HWY_TARGET == HWY_RVV || kPow2 >= -
3) {
per_lane_size_state->pow2_mask |= (
1 << (kPow2 +
3));
}
#endif
}
static constexpr
int kMaxSupportedLaneSize = HWY_HAVE_INTEGER64 ?
8 :
4;
static constexpr
int kSupportedLaneSizesMask =
(
1 <<
1) | (
1 <<
2) | (
1 <<
4) | (HWY_HAVE_INTEGER64 ? (
1 <<
8) :
0);
#if HWY_HAVE_SCALABLE
static constexpr
int kSupportedU8Pow2Mask =
(HWY_TARGET == HWY_RVV) ?
0x7F :
0x0F;
#endif
static HWY_INLINE size_t LanesPerVectWithLaneSize(size_t lanes_per_u8_vect,
int lane_size) {
#if HWY_TARGET == HWY_SCALAR
(
void)lanes_per_u8_vect;
(
void)lane_size;
return 1;
#else
return lanes_per_u8_vect /
static_cast<size_t>(lane_size);
#endif
}
#define HWY_DECLARE_FOREACH_VECTOR_TEST(TestClass) \
static ForeachVectorTestState TestClass
##State; \
\
struct TestClass { \
template <
class T,
class D> \
HWY_INLINE
void operator()(T, D d) { \
UpdateForeachVectorTestState(TestClass
##State, d); \
} \
};
HWY_DECLARE_FOREACH_VECTOR_TEST(TestForMaxPow2)
HWY_NOINLINE
void TestAllForMaxPow2() {
ZeroBytes<
sizeof(ForeachVectorTestState)>(&TestForMaxPow2State);
ForUnsignedTypes(ForMaxPow2<TestForMaxPow2>());
HWY_ASSERT(TestForMaxPow2State.lane_sizes_mask == kSupportedLaneSizesMask);
const size_t lanes_per_u8_vect = Lanes(ScalableTag<uint8_t>());
for (
int lane_size =
1; lane_size <= kMaxSupportedLaneSize; lane_size <<=
1) {
ForeachVectorTestPerLaneSizeState *per_lane_size_state =
&TestForMaxPow2State.per_lane_size_states[lane_size];
const size_t lanes = LanesPerVectWithLaneSize(lanes_per_u8_vect, lane_size);
HWY_ASSERT(per_lane_size_state->num_of_lanes_mask ==
((lanes << (HWY_TARGET == HWY_RVV ?
2 :
1)) -
1));
#if HWY_HAVE_SCALABLE
const int expected_pow2_mask =
(kSupportedU8Pow2Mask &
0x1F) & (-((lane_size +
1) /
2));
HWY_ASSERT(per_lane_size_state->pow2_mask == expected_pow2_mask);
#endif
}
}
HWY_DECLARE_FOREACH_VECTOR_TEST(TestForExtendableVectors)
#if HWY_TARGET == HWY_RVV
template <
int kPow2,
class Test,
class T,
hwy::EnableIf<(-kPow2 < detail::MinPow2<T>())> * = nullptr>
static HWY_INLINE
void ExecuteTestForExtendableVectors(
const Test &
/*test*/,
T
/*unused*/) {}
template <
int kPow2,
class Test,
class T,
hwy::EnableIf<(-kPow2 >= detail::MinPow2<T>())> * = nullptr>
static HWY_INLINE
void ExecuteTestForExtendableVectors(
const Test &test,
T
/*unused*/) {
test(T());
}
#endif
template <
int kPow2>
static HWY_NOINLINE
void DoTestAllForExtendableVectors() {
static_assert(kPow2 >=
0 && kPow2 <=
3,
"kPow2 must be between 0 and 3");
ZeroBytes<
sizeof(ForeachVectorTestState)>(&TestForExtendableVectorsState);
const ForExtendableVectors<TestForExtendableVectors, kPow2> test;
#if HWY_TARGET == HWY_RVV
test(uint8_t());
ExecuteTestForExtendableVectors<kPow2>(test, uint16_t());
ExecuteTestForExtendableVectors<kPow2>(test, uint32_t());
ExecuteTestForExtendableVectors<kPow2>(test, uint64_t());
#else
ForUnsignedTypes(test);
#endif
#if HWY_TARGET == HWY_SCALAR
HWY_ASSERT(TestForExtendableVectorsState.lane_sizes_mask ==
0);
#else // HWY_TARGET != HWY_SCALAR
const size_t lanes_per_u8_vect = Lanes(ScalableTag<uint8_t, -kPow2>());
#if HWY_TARGET == HWY_RVV
const int expected_lane_sizes_mask =
kSupportedLaneSizesMask &
((
1 <<
1) | (
1 <<
2) | ((kPow2 <=
2) ? (
1 <<
4) :
0) |
((kPow2 <=
1) ? (
1 <<
8) :
0));
#else
const int expected_lane_sizes_mask =
kSupportedLaneSizesMask & (((lanes_per_u8_vect >=
1) ? (
1 <<
1) :
0) |
((lanes_per_u8_vect >=
2) ? (
1 <<
2) :
0) |
((lanes_per_u8_vect >=
4) ? (
1 <<
4) :
0) |
((lanes_per_u8_vect >=
8) ? (
1 <<
8) :
0));
#endif
HWY_ASSERT(TestForExtendableVectorsState.lane_sizes_mask ==
expected_lane_sizes_mask);
#endif // HWY_TARGET == HWY_SCALAR
for (
int lane_size =
1; lane_size <= kMaxSupportedLaneSize; lane_size <<=
1) {
ForeachVectorTestPerLaneSizeState *per_lane_size_state =
&TestForExtendableVectorsState.per_lane_size_states[lane_size];
#if HWY_TARGET == HWY_SCALAR
HWY_ASSERT(per_lane_size_state->num_of_lanes_mask ==
0);
#else
if ((expected_lane_sizes_mask & (
1 << lane_size)) !=
0) {
const size_t lanes =
LanesPerVectWithLaneSize(lanes_per_u8_vect, lane_size);
HWY_ASSERT(per_lane_size_state->num_of_lanes_mask ==
((lanes << (HWY_TARGET == HWY_RVV ?
4 :
1)) -
1));
#if HWY_HAVE_SCALABLE
const int expected_pow2_mask =
((kSupportedU8Pow2Mask >> kPow2) & (-((lane_size +
1) /
2))) |
((HWY_TARGET == HWY_RVV) ?
0 : (
1 << (
3 - kPow2)));
HWY_ASSERT(per_lane_size_state->pow2_mask == expected_pow2_mask);
#endif
}
else {
HWY_ASSERT(per_lane_size_state->num_of_lanes_mask ==
0);
#if HWY_HAVE_SCALABLE
HWY_ASSERT(per_lane_size_state->pow2_mask ==
0);
#endif
}
#endif // HWY_TARGET == HWY_SCALAR
}
}
HWY_NOINLINE
void TestAllForExtendableVectors() {
DoTestAllForExtendableVectors<
1>();
DoTestAllForExtendableVectors<
2>();
DoTestAllForExtendableVectors<
3>();
}
HWY_DECLARE_FOREACH_VECTOR_TEST(TestForShrinkableVectors)
HWY_NOINLINE
void TestAllForShrinkableVectors() {
ZeroBytes<
sizeof(ForeachVectorTestState)>(&TestForShrinkableVectorsState);
ForUnsignedTypes(ForShrinkableVectors<TestForShrinkableVectors>());
#if HWY_TARGET == HWY_SCALAR
HWY_ASSERT(TestForShrinkableVectorsState.lane_sizes_mask ==
0);
#else // HWY_TARGET != HWY_SCALAR
HWY_ASSERT(TestForShrinkableVectorsState.lane_sizes_mask ==
kSupportedLaneSizesMask);
const size_t lanes_per_u8_vect = Lanes(ScalableTag<uint8_t>());
#endif // HWY_TARGET == HWY_SCALAR
for (
int lane_size =
1; lane_size <= kMaxSupportedLaneSize; lane_size <<=
1) {
ForeachVectorTestPerLaneSizeState *per_lane_size_state =
&TestForShrinkableVectorsState.per_lane_size_states[lane_size];
#if HWY_TARGET == HWY_SCALAR
HWY_ASSERT(per_lane_size_state->num_of_lanes_mask ==
0);
#else // HWY_TARGET != HWY_SCALAR
const size_t lanes = LanesPerVectWithLaneSize(lanes_per_u8_vect, lane_size);
#if HWY_HAVE_SCALABLE
const int expected_pow2_mask =
kSupportedU8Pow2Mask & (-
2 * ((lane_size +
1) /
2));
const size_t expected_lanes_mask =
(lanes *
static_cast<size_t>(expected_pow2_mask)) >>
3;
HWY_ASSERT((per_lane_size_state->num_of_lanes_mask & expected_lanes_mask) ==
expected_lanes_mask);
HWY_ASSERT(per_lane_size_state->pow2_mask == expected_pow2_mask);
#else // !HWY_HAVE_SCALABLE
const size_t expected_lanes_mask =
static_cast<size_t>(((lanes <<
1) -
1) & (~size_t{
1}));
HWY_ASSERT(per_lane_size_state->num_of_lanes_mask == expected_lanes_mask);
#endif // HWY_HAVE_SCALABLE
#endif // HWY_TARGET == HWY_SCALAR
}
}
HWY_DECLARE_FOREACH_VECTOR_TEST(TestForGEVectors)
template <size_t kMinBits,
class Test,
class T,
HWY_IF_LANES_LE(kMinBits,
sizeof(T) *
8 -
1)>
static HWY_INLINE
void ExecuteTestForGEVectors(
const Test &
/*test*/,
T
/*unused*/) {}
template <size_t kMinBits,
class Test,
class T,
HWY_IF_LANES_GT(kMinBits,
sizeof(T) *
8 -
1)>
static HWY_INLINE
void ExecuteTestForGEVectors(
const Test &test, T
/*unused*/) {
test(T());
}
template <size_t kMinBits,
class Test>
static HWY_NOINLINE
void DoTestAllForGEVectors(
const Test &test) {
static_assert(kMinBits >=
16,
"kMinBits >= 16 must be true");
ZeroBytes<
sizeof(ForeachVectorTestState)>(&TestForGEVectorsState);
test(uint8_t());
test(uint16_t());
ExecuteTestForGEVectors<kMinBits>(test, uint32_t());
#if HWY_HAVE_INTEGER64
ExecuteTestForGEVectors<kMinBits>(test, uint64_t());
#endif
#if HWY_TARGET == HWY_SCALAR
HWY_ASSERT(TestForGEVectorsState.lane_sizes_mask ==
0);
#else // HWY_TARGET != HWY_SCALAR
const size_t lanes_per_u8_vect = Lanes(ScalableTag<uint8_t>());
#if HWY_TARGET == HWY_RVV
const size_t lanes_per_largest_u8_vect = lanes_per_u8_vect *
8;
#else
const size_t lanes_per_largest_u8_vect = lanes_per_u8_vect;
#endif // HWY_TARGET == HWY_RVV
constexpr
int kGEVectSupportedLaneSizesMask =
kSupportedLaneSizesMask &
((
1 <<
1) | (
1 <<
2) | ((kMinBits >=
32) ? (
1 <<
4) :
0) |
((kMinBits >=
64) ? (
1 <<
8) :
0));
const int expected_lane_sizes_mask =
(lanes_per_largest_u8_vect >= (kMinBits /
8))
? kGEVectSupportedLaneSizesMask
:
0;
constexpr size_t kSupportedU8VecSizesMask =
static_cast<size_t>(((
static_cast<size_t>(HWY_MAX_BYTES) <<
1) -
1) &
(~((kMinBits /
8) -
1)));
HWY_ASSERT(TestForGEVectorsState.lane_sizes_mask == expected_lane_sizes_mask);
#endif // HWY_TARGET == HWY_SCALAR
#if HWY_HAVE_SCALABLE
constexpr
int kMinVecPow2 =
static_cast<
int>(CeilLog2(HWY_MIN(kMinBits /
16,
8))) -
3;
static_assert(kMinVecPow2 >= -
3 && kMinVecPow2 <=
0,
"kMinVecPow2 must be between -3 and 0");
constexpr
int kGEVectSupportedU8Pow2Mask =
kSupportedU8Pow2Mask & (-(
1 << (kMinVecPow2 +
3)));
#if HWY_TARGET == HWY_RVV
const int ge_vect_supported_u8_pow2_mask =
kGEVectSupportedU8Pow2Mask &
((kMinBits <=
128)
? -
1
: ((lanes_per_u8_vect < (kMinBits /
64))
?
0
: (
0x40 |
((lanes_per_u8_vect >= (kMinBits /
32)) ?
0x20 :
0) |
((lanes_per_u8_vect >= (kMinBits /
16)) ?
0x10 :
0) |
((lanes_per_u8_vect >= (kMinBits /
8)) ?
0x08 :
0))));
#else
const int ge_vect_supported_u8_pow2_mask =
(lanes_per_u8_vect >= (kMinBits /
8)) ? kGEVectSupportedU8Pow2Mask :
0;
#endif
#endif
for (
int lane_size =
1; lane_size <= kMaxSupportedLaneSize; lane_size <<=
1) {
ForeachVectorTestPerLaneSizeState *per_lane_size_state =
&TestForGEVectorsState.per_lane_size_states[lane_size];
#if HWY_TARGET == HWY_SCALAR
HWY_ASSERT(per_lane_size_state->num_of_lanes_mask ==
0);
#else // HWY_TARGET != HWY_SCALAR
if (kMinBits >=
static_cast<size_t>(lane_size *
8)) {
const size_t expected_lanes_mask =
(((lanes_per_largest_u8_vect <<
1) -
1) & kSupportedU8VecSizesMask) /
static_cast<size_t>(lane_size);
HWY_ASSERT(per_lane_size_state->num_of_lanes_mask == expected_lanes_mask);
#if HWY_HAVE_SCALABLE
const int expected_pow2_mask =
ge_vect_supported_u8_pow2_mask & (-((lane_size +
1) /
2));
HWY_ASSERT(per_lane_size_state->pow2_mask == expected_pow2_mask);
#endif
}
else {
HWY_ASSERT(per_lane_size_state->num_of_lanes_mask ==
0);
#if HWY_HAVE_SCALABLE
HWY_ASSERT(per_lane_size_state->pow2_mask ==
0);
#endif
}
#endif // HWY_TARGET == HWY_SCALAR
}
}
HWY_NOINLINE
void TestAllForGEVectors() {
DoTestAllForGEVectors<
16>(ForGEVectors<
16, TestForGEVectors>());
DoTestAllForGEVectors<
32>(ForGEVectors<
32, TestForGEVectors>());
DoTestAllForGEVectors<
64>(ForGEVectors<
64, TestForGEVectors>());
DoTestAllForGEVectors<
128>(ForGEVectors<
128, TestForGEVectors>());
DoTestAllForGEVectors<
256>(ForGEVectors<
256, TestForGEVectors>());
DoTestAllForGEVectors<
512>(ForGEVectors<
512, TestForGEVectors>());
}
HWY_DECLARE_FOREACH_VECTOR_TEST(TestForPromoteVectors)
template <
int kSrcLaneSizePow2,
int kPromotePow2,
class Test,
class T,
hwy::EnableIf<(kSrcLaneSizePow2 + kPromotePow2 <=
(HWY_HAVE_INTEGER64 ?
3 :
2))> * = nullptr>
static HWY_INLINE
void ExecuteTestForPromoteVectors(
const Test &test,
T
/*unused*/) {
test(T());
}
template <
int kSrcLaneSizePow2,
int kPromotePow2,
class Test,
class T,
hwy::EnableIf<(kSrcLaneSizePow2 + kPromotePow2 >
(HWY_HAVE_INTEGER64 ?
3 :
2))> * = nullptr>
static HWY_INLINE
void ExecuteTestForPromoteVectors(
const Test &
/*test*/,
T
/*unused*/) {}
template <
int kPow2>
static HWY_NOINLINE
void DoTestAllForPromoteVectors() {
ZeroBytes<
sizeof(ForeachVectorTestState)>(&TestForPromoteVectorsState);
const ForPromoteVectors<TestForPromoteVectors, kPow2> test;
test(uint8_t());
ExecuteTestForPromoteVectors<
1, kPow2>(test, uint16_t());
ExecuteTestForPromoteVectors<
2, kPow2>(test, uint32_t());
constexpr
int kMaxSupportedPromoteLaneSize = kMaxSupportedLaneSize >> kPow2;
static_assert(kMaxSupportedPromoteLaneSize >
0,
"kMaxSupportedPromoteLaneSize > 0 must be true");
constexpr
int kSupportedPromoteLaneSizesMask =
kSupportedLaneSizesMask & ((
2 << kMaxSupportedPromoteLaneSize) -
1);
HWY_ASSERT(TestForPromoteVectorsState.lane_sizes_mask ==
kSupportedPromoteLaneSizesMask);
const size_t lanes_per_u8_vect = Lanes(ScalableTag<uint8_t>());
for (
int lane_size =
1; lane_size <= kMaxSupportedLaneSize; lane_size <<=
1) {
ForeachVectorTestPerLaneSizeState *per_lane_size_state =
&TestForPromoteVectorsState.per_lane_size_states[lane_size];
if (lane_size <= kMaxSupportedPromoteLaneSize) {
const size_t lanes =
LanesPerVectWithLaneSize(lanes_per_u8_vect, lane_size) >>
(HWY_TARGET == HWY_SCALAR ?
0 : kPow2);
HWY_ASSERT(per_lane_size_state->num_of_lanes_mask ==
((lanes << (HWY_TARGET == HWY_RVV ?
4 :
1)) -
1));
#if HWY_HAVE_SCALABLE
const int expected_pow2_mask =
((kSupportedU8Pow2Mask >> kPow2) & (-((lane_size +
1) /
2))) |
((HWY_TARGET == HWY_RVV) ?
0 : (
1 << (
3 - kPow2)));
HWY_ASSERT(per_lane_size_state->pow2_mask == expected_pow2_mask);
#endif
}
else {
HWY_ASSERT(per_lane_size_state->num_of_lanes_mask ==
0);
#if HWY_HAVE_SCALABLE
HWY_ASSERT(per_lane_size_state->pow2_mask ==
0);
#endif
}
}
}
HWY_NOINLINE
void TestAllForPromoteVectors() {
DoTestAllForPromoteVectors<
1>();
DoTestAllForPromoteVectors<
2>();
#if HWY_HAVE_INTEGER64
DoTestAllForPromoteVectors<
3>();
#endif
}
HWY_DECLARE_FOREACH_VECTOR_TEST(TestForDemoteVectors)
template <
int kSrcLaneSizePow2,
int kDemotePow2,
class Test,
class T,
hwy::EnableIf<(kSrcLaneSizePow2 >= kDemotePow2)> * = nullptr>
static HWY_INLINE
void ExecuteTestForDemoteVectors(
const Test &test,
T
/*unused*/) {
test(T());
}
template <
int kSrcLaneSizePow2,
int kDemotePow2,
class Test,
class T,
hwy::EnableIf<(kSrcLaneSizePow2 < kDemotePow2)> * = nullptr>
static HWY_INLINE
void ExecuteTestForDemoteVectors(
const Test &
/*test*/,
T
/*unused*/) {}
template <
int kPow2>
HWY_NOINLINE
void DoTestAllForDemoteVectors() {
ZeroBytes<
sizeof(ForeachVectorTestState)>(&TestForDemoteVectorsState);
const ForDemoteVectors<TestForDemoteVectors, kPow2> test;
ExecuteTestForDemoteVectors<
1, kPow2>(test, uint16_t());
ExecuteTestForDemoteVectors<
2, kPow2>(test, uint32_t());
#if HWY_HAVE_INTEGER64
ExecuteTestForDemoteVectors<
3, kPow2>(test, uint64_t());
#endif
constexpr
int kMinDemotableLaneSize =
1 << kPow2;
constexpr
int kSupportedDemoteLaneSizesMask =
kSupportedLaneSizesMask & (-(
1 << kMinDemotableLaneSize));
HWY_ASSERT(TestForDemoteVectorsState.lane_sizes_mask ==
kSupportedDemoteLaneSizesMask);
const size_t lanes_per_u8_vect = Lanes(ScalableTag<uint8_t>());
for (
int lane_size =
1; lane_size <= kMaxSupportedLaneSize; lane_size <<=
1) {
ForeachVectorTestPerLaneSizeState *per_lane_size_state =
&TestForDemoteVectorsState.per_lane_size_states[lane_size];
if (lane_size >= kMinDemotableLaneSize) {
const size_t lanes =
LanesPerVectWithLaneSize(lanes_per_u8_vect, lane_size);
HWY_ASSERT(per_lane_size_state->num_of_lanes_mask ==
((lanes << (HWY_TARGET == HWY_RVV ?
4 :
1)) -
1));
#if HWY_HAVE_SCALABLE
const int expected_pow2_mask =
kSupportedU8Pow2Mask &
((-lane_size) | (((lane_size >> kPow2) >
1) ? (lane_size >>
1) :
0));
HWY_ASSERT(per_lane_size_state->pow2_mask == expected_pow2_mask);
#endif
}
else {
HWY_ASSERT(per_lane_size_state->num_of_lanes_mask ==
0);
#if HWY_HAVE_SCALABLE
HWY_ASSERT(per_lane_size_state->pow2_mask ==
0);
#endif
}
}
}
HWY_NOINLINE
void TestAllForDemoteVectors() {
DoTestAllForDemoteVectors<
1>();
DoTestAllForDemoteVectors<
2>();
#if HWY_HAVE_INTEGER64
DoTestAllForDemoteVectors<
3>();
#endif
}
HWY_DECLARE_FOREACH_VECTOR_TEST(TestForHalfVectors)
template <
int kPow2>
static HWY_NOINLINE
void DoTestAllForHalfVectors() {
ZeroBytes<
sizeof(ForeachVectorTestState)>(&TestForHalfVectorsState);
ForUnsignedTypes(ForHalfVectors<TestForHalfVectors, kPow2>());
#if HWY_TARGET == HWY_SCALAR
const size_t kMinSrcVectLanes =
1;
#else
const size_t kMinSrcVectLanes = size_t{
1} << kPow2;
#endif
const size_t lanes_per_u8_vect = Lanes(ScalableTag<uint8_t>());
#if HWY_TARGET == HWY_SCALAR || HWY_TARGET == HWY_RVV
const int expected_lane_sizes_mask = kSupportedLaneSizesMask;
#else
const int expected_lane_sizes_mask =
kSupportedLaneSizesMask &
(((lanes_per_u8_vect >= kMinSrcVectLanes) ? (
1 <<
1) :
0) |
((lanes_per_u8_vect >=
2 * kMinSrcVectLanes) ? (
1 <<
2) :
0) |
((lanes_per_u8_vect >=
4 * kMinSrcVectLanes) ? (
1 <<
4) :
0) |
((lanes_per_u8_vect >=
8 * kMinSrcVectLanes) ? (
1 <<
8) :
0));
#endif
HWY_ASSERT(TestForHalfVectorsState.lane_sizes_mask ==
expected_lane_sizes_mask);
for (
int lane_size =
1; lane_size <= kMaxSupportedLaneSize; lane_size <<=
1) {
ForeachVectorTestPerLaneSizeState *per_lane_size_state =
&TestForHalfVectorsState.per_lane_size_states[lane_size];
const size_t lanes = LanesPerVectWithLaneSize(lanes_per_u8_vect, lane_size);
HWY_ASSERT(per_lane_size_state->num_of_lanes_mask ==
(((lanes << (HWY_TARGET == HWY_RVV ?
4 :
1)) -
1) &
(size_t{
0} - kMinSrcVectLanes)));
#if HWY_HAVE_SCALABLE
const int expected_pow2_mask =
kSupportedU8Pow2Mask & ((-(((lane_size +
1) /
2) << kPow2)) |
(lanes >= kMinSrcVectLanes ?
8 :
0));
HWY_ASSERT(per_lane_size_state->pow2_mask == expected_pow2_mask);
#endif
}
}
HWY_NOINLINE
void TestAllForHalfVectors() {
DoTestAllForHalfVectors<
1>();
DoTestAllForHalfVectors<
2>();
}
HWY_DECLARE_FOREACH_VECTOR_TEST(TestForPartialVectors)
HWY_NOINLINE
void TestAllForPartialVectors() {
ZeroBytes<
sizeof(ForeachVectorTestState)>(&TestForPartialVectorsState);
ForUnsignedTypes(ForPartialVectors<TestForPartialVectors>());
HWY_ASSERT(TestForPartialVectorsState.lane_sizes_mask ==
kSupportedLaneSizesMask);
const size_t lanes_per_u8_vect = Lanes(ScalableTag<uint8_t>());
for (
int lane_size =
1; lane_size <= kMaxSupportedLaneSize; lane_size <<=
1) {
ForeachVectorTestPerLaneSizeState *per_lane_size_state =
&TestForPartialVectorsState.per_lane_size_states[lane_size];
const size_t lanes = LanesPerVectWithLaneSize(lanes_per_u8_vect, lane_size);
HWY_ASSERT(per_lane_size_state->num_of_lanes_mask ==
((lanes << (HWY_TARGET == HWY_RVV ?
4 :
1)) -
1));
#if HWY_HAVE_SCALABLE
const int expected_pow2_mask =
kSupportedU8Pow2Mask & (-((lane_size +
1) /
2));
HWY_ASSERT(per_lane_size_state->pow2_mask == expected_pow2_mask);
#endif
}
}
HWY_DECLARE_FOREACH_VECTOR_TEST(TestForPartialFixedOrFullVectors)
HWY_NOINLINE
void TestAllForPartialFixedOrFullVectors() {
ZeroBytes<
sizeof(ForeachVectorTestState)>(
&TestForPartialFixedOrFullVectorsState);
ForUnsignedTypes(
ForPartialFixedOrFullScalableVectors<TestForPartialFixedOrFullVectors>());
HWY_ASSERT(TestForPartialFixedOrFullVectorsState.lane_sizes_mask ==
kSupportedLaneSizesMask);
const size_t lanes_per_u8_vect = Lanes(ScalableTag<uint8_t>());
for (
int lane_size =
1; lane_size <= kMaxSupportedLaneSize; lane_size <<=
1) {
ForeachVectorTestPerLaneSizeState *per_lane_size_state =
&TestForPartialFixedOrFullVectorsState.per_lane_size_states[lane_size];
const size_t lanes = LanesPerVectWithLaneSize(lanes_per_u8_vect, lane_size);
#if HWY_TARGET == HWY_RVV
const size_t expected_lanes_mask =
((lanes *
16) -
1) & (size_t{
0} - ((lanes_per_u8_vect +
7) /
8));
#elif HWY_HAVE_SCALABLE || HWY_TARGET == HWY_SVE_256 || \
HWY_TARGET == HWY_SVE2_128
const size_t expected_lanes_mask = lanes;
#else
const size_t expected_lanes_mask = (lanes <<
1) -
1;
#endif
HWY_ASSERT(per_lane_size_state->num_of_lanes_mask == expected_lanes_mask);
#if HWY_HAVE_SCALABLE
#if HWY_TARGET == HWY_RVV
const int expected_pow2_mask = kSupportedU8Pow2Mask & (-lane_size);
#else
const int expected_pow2_mask =
8;
#endif
HWY_ASSERT(per_lane_size_state->pow2_mask == expected_pow2_mask);
#endif // HWY_HAVE_SCALABLE
}
}
#undef HWY_DECLARE_FOREACH_VECTOR_TEST
// NOLINTNEXTLINE(google-readability-namespace-comments)
}
// namespace HWY_NAMESPACE
}
// namespace hwy
HWY_AFTER_NAMESPACE();
#if HWY_ONCE
namespace hwy {
HWY_BEFORE_TEST(HwyForeachVecTest);
HWY_EXPORT_AND_TEST_P(HwyForeachVecTest, TestAllForMaxPow2);
HWY_EXPORT_AND_TEST_P(HwyForeachVecTest, TestAllForExtendableVectors);
HWY_EXPORT_AND_TEST_P(HwyForeachVecTest, TestAllForShrinkableVectors);
HWY_EXPORT_AND_TEST_P(HwyForeachVecTest, TestAllForGEVectors);
HWY_EXPORT_AND_TEST_P(HwyForeachVecTest, TestAllForPromoteVectors);
HWY_EXPORT_AND_TEST_P(HwyForeachVecTest, TestAllForDemoteVectors);
HWY_EXPORT_AND_TEST_P(HwyForeachVecTest, TestAllForHalfVectors);
HWY_EXPORT_AND_TEST_P(HwyForeachVecTest, TestAllForPartialVectors);
HWY_EXPORT_AND_TEST_P(HwyForeachVecTest, TestAllForPartialFixedOrFullVectors);
}
// namespace hwy
#endif