Quelle float_test.cc Sprache: C

// Copyright 2019 Google LLC
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Tests some ops specific to floating-point types (Div, Round etc.)

#include <stdio.h>

#include <cmath>  // std::ceil, std::floor

#include "hwy/base.h"

#undef HWY_TARGET_INCLUDE
#define HWY_TARGET_INCLUDE "tests/float_test.cc"
#include "hwy/foreach_target.h"  // IWYU pragma: keep
#include "hwy/highway.h"
#include "hwy/tests/test_util-inl.h"

HWY_BEFORE_NAMESPACE();
namespace hwy {
namespace HWY_NAMESPACE {

HWY_NOINLINE void TestAllF16FromF32() {
  const FixedTag<float, 1> d1;

  // +/- 0
  HWY_ASSERT_EQ(0, BitCastScalar<uint16_t>(hwy::F16FromF32(0.0f)));
  HWY_ASSERT_EQ(0x8000, BitCastScalar<uint16_t>(hwy::F16FromF32(-0.0f)));
  // smallest f32 subnormal
  HWY_ASSERT_EQ(0,
                BitCastScalar<uint16_t>(hwy::F16FromF32(5.87747175411E-39f)));
  HWY_ASSERT_EQ(0x8000,
                BitCastScalar<uint16_t>(hwy::F16FromF32(-5.87747175411E-39f)));
  // largest f16 subnormal
  HWY_ASSERT_EQ(0x3FF, BitCastScalar<uint16_t>(hwy::F16FromF32(6.0975552E-5f)));
  HWY_ASSERT_EQ(0x83FF,
                BitCastScalar<uint16_t>(hwy::F16FromF32(-6.0975552E-5f)));
  // smallest normalized f16
  HWY_ASSERT_EQ(0x400,
                BitCastScalar<uint16_t>(hwy::F16FromF32(6.103515625E-5f)));
  HWY_ASSERT_EQ(0x8400,
                BitCastScalar<uint16_t>(hwy::F16FromF32(-6.103515625E-5f)));

  // rounding to nearest even
  HWY_ASSERT_EQ((15 << 10) + 0,  // round down to even: 0[10..0] => 0
                BitCastScalar<uint16_t>(hwy::F16FromF32(1.00048828125f)));
  HWY_ASSERT_EQ((15 << 10) + 1,  // round up: 0[1..1] => 1
                BitCastScalar<uint16_t>(hwy::F16FromF32(1.00097644329f)));
  HWY_ASSERT_EQ((15 << 10) + 2,  // round up to even: 1[10..0] => 10
                BitCastScalar<uint16_t>(hwy::F16FromF32(1.00146484375f)));

  // greater than f16 max => inf
  HWY_ASSERT_EQ(0x7C00, BitCastScalar<uint16_t>(hwy::F16FromF32(7E4f)));
  HWY_ASSERT_EQ(0xFC00, BitCastScalar<uint16_t>(hwy::F16FromF32(-7E4f)));
  // infinity
  HWY_ASSERT_EQ(0x7C00,
                BitCastScalar<uint16_t>(hwy::F16FromF32(GetLane(Inf(d1)))));
  HWY_ASSERT_EQ(0xFC00,
                BitCastScalar<uint16_t>(hwy::F16FromF32(-GetLane(Inf(d1)))));
  // NaN
  HWY_ASSERT_EQ(0x7FFF,
                BitCastScalar<uint16_t>(hwy::F16FromF32(GetLane(NaN(d1)))));
  HWY_ASSERT_EQ(0xFFFF,
                BitCastScalar<uint16_t>(hwy::F16FromF32(-GetLane(NaN(d1)))));
}

HWY_NOINLINE void TestAllF32FromF16() {
  const FixedTag<float, 1> d1;

  // +/- 0
  HWY_ASSERT_EQ(0.0f, hwy::F32FromF16(BitCastScalar<float16_t>(uint16_t{0})));
  HWY_ASSERT_EQ(-0.0f,
                hwy::F32FromF16(BitCastScalar<float16_t>(uint16_t{0x8000})));
  // largest f16 subnormal
  HWY_ASSERT_EQ(6.0975552E-5f,
                hwy::F32FromF16(BitCastScalar<float16_t>(uint16_t{0x3FF})));
  HWY_ASSERT_EQ(-6.0975552E-5f,
                hwy::F32FromF16(BitCastScalar<float16_t>(uint16_t{0x83FF})));
  // smallest normalized f16
  HWY_ASSERT_EQ(6.103515625E-5f,
                hwy::F32FromF16(BitCastScalar<float16_t>(uint16_t{0x400})));
  HWY_ASSERT_EQ(-6.103515625E-5f,
                hwy::F32FromF16(BitCastScalar<float16_t>(uint16_t{0x8400})));
  // infinity
  HWY_ASSERT_EQ(GetLane(Inf(d1)),
                hwy::F32FromF16(BitCastScalar<float16_t>(uint16_t{0x7C00})));
  HWY_ASSERT_EQ(-GetLane(Inf(d1)),
                hwy::F32FromF16(BitCastScalar<float16_t>(uint16_t{0xFC00})));
  // NaN
  HWY_ASSERT_EQ(GetLane(NaN(d1)),
                hwy::F32FromF16(BitCastScalar<float16_t>(uint16_t{0x7FFF})));
  HWY_ASSERT_EQ(-GetLane(NaN(d1)),
                hwy::F32FromF16(BitCastScalar<float16_t>(uint16_t{0xFFFF})));
}

struct TestDiv {
  template <typename T, class D>
  HWY_NOINLINE void operator()(T /*unused*/, D d) {
    const auto v = Iota(d, -2);
    const auto v1 = Set(d, ConvertScalarTo<T>(1));

    // Unchanged after division by 1.
    HWY_ASSERT_VEC_EQ(d, v, Div(v, v1));

    const size_t N = Lanes(d);
    auto expected = AllocateAligned<T>(N);
    HWY_ASSERT(expected);
    for (size_t i = 0; i < N; ++i) {
      expected[i] = ConvertScalarTo<T>((static_cast<double>(i) - 2.0) / 2.0);
    }
    HWY_ASSERT_VEC_EQ(d, expected.get(), Div(v, Set(d, ConvertScalarTo<T>(2))));
  }
};

HWY_NOINLINE void TestAllDiv() { ForFloatTypes(ForPartialVectors<TestDiv>()); }

struct TestApproximateReciprocal {
  template <typename T, class D>
  HWY_NOINLINE void operator()(T /*unused*/, D d) {
    const auto v = Iota(d, -2);
    const auto nonzero =
        IfThenElse(Eq(v, Zero(d)), Set(d, ConvertScalarTo<T>(1)), v);
    const size_t N = Lanes(d);
    auto input = AllocateAligned<T>(N);
    auto actual = AllocateAligned<T>(N);
    HWY_ASSERT(input && actual);

    Store(nonzero, d, input.get());
    Store(ApproximateReciprocal(nonzero), d, actual.get());

    double max_l1 = 0.0;
    double worst_expected = 0.0;
    double worst_actual = 0.0;
    for (size_t i = 0; i < N; ++i) {
      const double expected = 1.0 / input[i];
      const double l1 = ScalarAbs(expected - actual[i]);
      if (l1 > max_l1) {
        max_l1 = l1;
        worst_expected = expected;
        worst_actual = actual[i];
      }
    }
    const double abs_worst_expected = ScalarAbs(worst_expected);
    if (abs_worst_expected > 1E-5) {
      const double max_rel = max_l1 / abs_worst_expected;
      fprintf(stderr, "max l1 %f rel %f (%f vs %f)\n", max_l1, max_rel,
              worst_expected, worst_actual);
      HWY_ASSERT(max_rel < 0.004);
    }
  }
};

HWY_NOINLINE void TestAllApproximateReciprocal() {
  ForFloatTypes(ForPartialVectors<TestApproximateReciprocal>());
}

struct TestSquareRoot {
  template <typename T, class D>
  HWY_NOINLINE void operator()(T /*unused*/, D d) {
    const auto vi = Iota(d, 0);
    HWY_ASSERT_VEC_EQ(d, vi, Sqrt(Mul(vi, vi)));
  }
};

HWY_NOINLINE void TestAllSquareRoot() {
  ForFloatTypes(ForPartialVectors<TestSquareRoot>());
}

struct TestReciprocalSquareRoot {
  template <typename T, class D>
  HWY_NOINLINE void operator()(T /*unused*/, D d) {
    const Vec<D> v = Set(d, ConvertScalarTo<T>(123.0f));
    const size_t N = Lanes(d);
    auto lanes = AllocateAligned<T>(N);
    HWY_ASSERT(lanes);
    Store(ApproximateReciprocalSqrt(v), d, lanes.get());
    for (size_t i = 0; i < N; ++i) {
      T err = ConvertScalarTo<T>(ConvertScalarTo<float>(lanes[i]) - 0.090166f);
      if (err < ConvertScalarTo<T>(0)) err = -err;
      if (static_cast<double>(err) >= 4E-4) {
        HWY_ABORT("Lane %d (%d): actual %f err %f\n", static_cast<int>(i),
                  static_cast<int>(N), static_cast<double>(lanes[i]),
                  static_cast<double>(err));
      }
    }
  }
};

HWY_NOINLINE void TestAllReciprocalSquareRoot() {
  ForFloatTypes(ForPartialVectors<TestReciprocalSquareRoot>());
}

template <typename T, class D>
AlignedFreeUniquePtr<T[]> RoundTestCases(T /*unused*/, D d, size_t& padded) {
  const T eps = Epsilon<T>();
  const T huge = ConvertScalarTo<T>(sizeof(T) >= 4 ? 1E34 : 3E4);
  const T test_cases[] = {
      // +/- 1
      ConvertScalarTo<T>(1), ConvertScalarTo<T>(-1),
      // +/- 0
      ConvertScalarTo<T>(0), ConvertScalarTo<T>(-0),
      // near 0
      ConvertScalarTo<T>(0.4), ConvertScalarTo<T>(-0.4),
      // +/- integer
      ConvertScalarTo<T>(4), ConvertScalarTo<T>(-32),
      // positive near limit
      ConvertScalarTo<T>(MantissaEnd<T>() - ConvertScalarTo<T>(1.5)),
      ConvertScalarTo<T>(MantissaEnd<T>() + ConvertScalarTo<T>(1.5)),
      // negative near limit
      ConvertScalarTo<T>(-MantissaEnd<T>() - ConvertScalarTo<T>(1.5)),
      ConvertScalarTo<T>(-MantissaEnd<T>() + ConvertScalarTo<T>(1.5)),
      // positive tiebreak
      ConvertScalarTo<T>(1.5), ConvertScalarTo<T>(2.5),
      // negative tiebreak
      ConvertScalarTo<T>(-1.5), ConvertScalarTo<T>(-2.5),
      // positive +/- delta
      ConvertScalarTo<T>(2.0001), ConvertScalarTo<T>(3.9999),
      // negative +/- delta
      ConvertScalarTo<T>(-999.9999), ConvertScalarTo<T>(-998.0001),
      // positive +/- epsilon
      ConvertScalarTo<T>(ConvertScalarTo<T>(1) + eps),
      ConvertScalarTo<T>(ConvertScalarTo<T>(1) - eps),
      // negative +/- epsilon
      ConvertScalarTo<T>(ConvertScalarTo<T>(-1) + eps),
      ConvertScalarTo<T>(ConvertScalarTo<T>(-1) - eps),
      // +/- huge (but still fits in float)
      huge, -huge,
      // +/- infinity
      GetLane(Inf(d)), GetLane(Neg(Inf(d))),
      // qNaN
      GetLane(NaN(d))};
  const size_t kNumTestCases = sizeof(test_cases) / sizeof(test_cases[0]);
  const size_t N = Lanes(d);
  padded = RoundUpTo(kNumTestCases, N);  // allow loading whole vectors
  auto in = AllocateAligned<T>(padded);
  auto expected = AllocateAligned<T>(padded);
  HWY_ASSERT(in && expected);
  CopyBytes(test_cases, in.get(), kNumTestCases * sizeof(T));
  ZeroBytes(in.get() + kNumTestCases, (padded - kNumTestCases) * sizeof(T));
  return in;
}

struct TestRound {
  template <typename T, class D>
  HWY_NOINLINE void operator()(T t, D d) {
    size_t padded;
    auto in = RoundTestCases(t, d, padded);
    auto expected = AllocateAligned<T>(padded);
    HWY_ASSERT(expected);

    for (size_t i = 0; i < padded; ++i) {
// Avoid [std::]round, which does not round to nearest *even*.
// NOTE: std:: version from C++11 cmath is not defined in RVV GCC, see
// https://lists.freebsd.org/pipermail/freebsd-current/2014-January/048130.html
// Cast to f32/64 because nearbyint does not support _Float16.
#if HWY_HAVE_FLOAT64
      const double f = ConvertScalarTo<double>(in[i]);
#else
      const float f = ConvertScalarTo<float>(in[i]);
#endif
      expected[i] = ConvertScalarTo<T>(nearbyint(f));
    }
    for (size_t i = 0; i < padded; i += Lanes(d)) {
      HWY_ASSERT_VEC_EQ(d, &expected[i], Round(Load(d, &in[i])));
    }
  }
};

HWY_NOINLINE void TestAllRound() {
  ForFloatTypes(ForPartialVectors<TestRound>());
}

struct TestNearestInt {
  template <typename TF, class DF>
  HWY_NOINLINE void operator()(TF tf, const DF df) {
    using TI = MakeSigned<TF>;
    const RebindToSigned<DF> di;

    size_t padded;
    auto in = RoundTestCases(tf, df, padded);
    auto expected = AllocateAligned<TI>(padded);
    HWY_ASSERT(expected);

    constexpr double kMax = static_cast<double>(LimitsMax<TI>());
    for (size_t i = 0; i < padded; ++i) {
      if (ScalarIsNaN(in[i])) {
        // We replace NaN with 0 below (no_nan)
        expected[i] = 0;
      } else if (ScalarIsInf(in[i]) ||
                 static_cast<double>(ScalarAbs(in[i])) >= kMax) {
        // Avoid undefined result for lrintf
        expected[i] = std::signbit(in[i]) ? LimitsMin<TI>() : LimitsMax<TI>();
      } else {
        expected[i] = static_cast<TI>(lrintf(ConvertScalarTo<float>(in[i])));
      }
    }
    for (size_t i = 0; i < padded; i += Lanes(df)) {
      const auto v = Load(df, &in[i]);
      const auto no_nan = IfThenElse(Eq(v, v), v, Zero(df));
      HWY_ASSERT_VEC_EQ(di, &expected[i], NearestInt(no_nan));
    }
  }
};

HWY_NOINLINE void TestAllNearestInt() {
  ForPartialVectors<TestNearestInt>()(float());
}

struct TestTrunc {
  template <typename T, class D>
  HWY_NOINLINE void operator()(T t, D d) {
    size_t padded;
    auto in = RoundTestCases(t, d, padded);
    auto expected = AllocateAligned<T>(padded);
    HWY_ASSERT(expected);

    for (size_t i = 0; i < padded; ++i) {
      // NOTE: std:: version from C++11 cmath is not defined in RVV GCC, see
      // https://lists.freebsd.org/pipermail/freebsd-current/2014-January/048130.html
      // Cast to double because trunc does not support _Float16.
      expected[i] = ConvertScalarTo<T>(trunc(ConvertScalarTo<double>(in[i])));
    }
    for (size_t i = 0; i < padded; i += Lanes(d)) {
      HWY_ASSERT_VEC_EQ(d, &expected[i], Trunc(Load(d, &in[i])));
    }
  }
};

HWY_NOINLINE void TestAllTrunc() {
  ForFloatTypes(ForPartialVectors<TestTrunc>());
}

struct TestCeil {
  template <typename T, class D>
  HWY_NOINLINE void operator()(T t, D d) {
    size_t padded;
    auto in = RoundTestCases(t, d, padded);
    auto expected = AllocateAligned<T>(padded);
    HWY_ASSERT(expected);

    for (size_t i = 0; i < padded; ++i) {
      // Cast to double because ceil does not support _Float16.
      expected[i] =
          ConvertScalarTo<T>(std::ceil(ConvertScalarTo<double>(in[i])));
    }
    for (size_t i = 0; i < padded; i += Lanes(d)) {
      HWY_ASSERT_VEC_EQ(d, &expected[i], Ceil(Load(d, &in[i])));
    }
  }
};

HWY_NOINLINE void TestAllCeil() {
  ForFloatTypes(ForPartialVectors<TestCeil>());
}

struct TestFloor {
  template <typename T, class D>
  HWY_NOINLINE void operator()(T t, D d) {
    size_t padded;
    auto in = RoundTestCases(t, d, padded);
    auto expected = AllocateAligned<T>(padded);
    HWY_ASSERT(expected);

    for (size_t i = 0; i < padded; ++i) {
      // Cast to double because floor does not support _Float16.
      expected[i] =
          ConvertScalarTo<T>(std::floor(ConvertScalarTo<double>(in[i])));
    }
    for (size_t i = 0; i < padded; i += Lanes(d)) {
      HWY_ASSERT_VEC_EQ(d, &expected[i], Floor(Load(d, &in[i])));
    }
  }
};

HWY_NOINLINE void TestAllFloor() {
  ForFloatTypes(ForPartialVectors<TestFloor>());
}

struct TestAbsDiff {
  template <typename T, class D>
  HWY_NOINLINE void operator()(T /*unused*/, D d) {
    const size_t N = Lanes(d);
    auto in_lanes_a = AllocateAligned<T>(N);
    auto in_lanes_b = AllocateAligned<T>(N);
    auto out_lanes = AllocateAligned<T>(N);
    HWY_ASSERT(in_lanes_a && in_lanes_b && out_lanes);
    for (size_t i = 0; i < N; ++i) {
      in_lanes_a[i] = ConvertScalarTo<T>((i ^ 1u) << i);
      in_lanes_b[i] = ConvertScalarTo<T>(i << i);
      out_lanes[i] = ConvertScalarTo<T>(
          ScalarAbs(ConvertScalarTo<T>(in_lanes_a[i] - in_lanes_b[i])));
    }
    const auto a = Load(d, in_lanes_a.get());
    const auto b = Load(d, in_lanes_b.get());
    const auto expected = Load(d, out_lanes.get());
    HWY_ASSERT_VEC_EQ(d, expected, AbsDiff(a, b));
    HWY_ASSERT_VEC_EQ(d, expected, AbsDiff(b, a));
  }
};

HWY_NOINLINE void TestAllAbsDiff() {
  ForFloatTypes(ForPartialVectors<TestAbsDiff>());
}

// NOLINTNEXTLINE(google-readability-namespace-comments)
}  // namespace HWY_NAMESPACE
}  // namespace hwy
HWY_AFTER_NAMESPACE();

#if HWY_ONCE

namespace hwy {
HWY_BEFORE_TEST(HwyFloatTest);
HWY_EXPORT_AND_TEST_P(HwyFloatTest, TestAllF16FromF32);
HWY_EXPORT_AND_TEST_P(HwyFloatTest, TestAllF32FromF16);
HWY_EXPORT_AND_TEST_P(HwyFloatTest, TestAllDiv);
HWY_EXPORT_AND_TEST_P(HwyFloatTest, TestAllApproximateReciprocal);
HWY_EXPORT_AND_TEST_P(HwyFloatTest, TestAllSquareRoot);
HWY_EXPORT_AND_TEST_P(HwyFloatTest, TestAllReciprocalSquareRoot);
HWY_EXPORT_AND_TEST_P(HwyFloatTest, TestAllRound);
HWY_EXPORT_AND_TEST_P(HwyFloatTest, TestAllNearestInt);
HWY_EXPORT_AND_TEST_P(HwyFloatTest, TestAllTrunc);
HWY_EXPORT_AND_TEST_P(HwyFloatTest, TestAllCeil);
HWY_EXPORT_AND_TEST_P(HwyFloatTest, TestAllFloor);
HWY_EXPORT_AND_TEST_P(HwyFloatTest, TestAllAbsDiff);
}  // namespace hwy

#endif

Messung V0.5

¤ Dauer der Verarbeitung: 0.10 Sekunden (vorverarbeitet) ¤

Wurzel

Suchen

Beweissystem der NASA

Beweissystem Isabelle

NIST Cobol Testsuite

Cephes Mathematical Library

Wiener Entwicklungsmethode

Haftungshinweis

Die Informationen auf dieser Webseite wurden nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit, noch Qualität der bereit gestellten Informationen zugesichert.

Bemerkung:

Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.