/* * Copyright (c) 2022 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. *
*/
/* * Copyright (c) 2021 Loongson Technology Corporation Limited * All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. * * Contributed by Shiyou Yin <yinshiyou-hf@loongson.cn> * Xiwei Gu <guxiwei-hf@loongson.cn> * Lu Wang <wanglu@loongson.cn> * * This file is a header file for loongarch builtin extension. *
*/
#ifdef __loongarch_sx #include <lsxintrin.h> /* * ============================================================================= * Description : Dot product & addition of byte vector elements * Arguments : Inputs - in_c, in_h, in_l * Outputs - out * Return Type - halfword * Details : Signed byte elements from in_h are multiplied by * signed byte elements from in_l, and then added adjacent to * each other to get a result twice the size of input. Then * the results are added to signed half-word elements from in_c. * Example : out = __lsx_vdp2add_h_b(in_c, in_h, in_l) * in_c : 1,2,3,4, 1,2,3,4 * in_h : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8 * in_l : 8,7,6,5, 4,3,2,1, 8,7,6,5, 4,3,2,1 * out : 23,40,41,26, 23,40,41,26 * =============================================================================
*/ staticinline __m128i __lsx_vdp2add_h_b(__m128i in_c, __m128i in_h,
__m128i in_l) {
__m128i out;
out = __lsx_vmaddwev_h_b(in_c, in_h, in_l);
out = __lsx_vmaddwod_h_b(out, in_h, in_l); return out;
}
/* * ============================================================================= * Description : Dot product & addition of byte vector elements * Arguments : Inputs - in_c, in_h, in_l * Outputs - out * Return Type - halfword * Details : Unsigned byte elements from in_h are multiplied by * unsigned byte elements from in_l, and then added adjacent to * each other to get a result twice the size of input. * The results are added to signed half-word elements from in_c. * Example : out = __lsx_vdp2add_h_bu(in_c, in_h, in_l) * in_c : 1,2,3,4, 1,2,3,4 * in_h : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8 * in_l : 8,7,6,5, 4,3,2,1, 8,7,6,5, 4,3,2,1 * out : 23,40,41,26, 23,40,41,26 * =============================================================================
*/ staticinline __m128i __lsx_vdp2add_h_bu(__m128i in_c, __m128i in_h,
__m128i in_l) {
__m128i out;
out = __lsx_vmaddwev_h_bu(in_c, in_h, in_l);
out = __lsx_vmaddwod_h_bu(out, in_h, in_l); return out;
}
/* * ============================================================================= * Description : Dot product & addition of byte vector elements * Arguments : Inputs - in_c, in_h, in_l * Outputs - out * Return Type - halfword * Details : Unsigned byte elements from in_h are multiplied by * signed byte elements from in_l, and then added adjacent to * each other to get a result twice the size of input. * The results are added to signed half-word elements from in_c. * Example : out = __lsx_vdp2add_h_bu_b(in_c, in_h, in_l) * in_c : 1,1,1,1, 1,1,1,1 * in_h : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8 * in_l : -1,-2,-3,-4, -5,-6,-7,-8, 1,2,3,4, 5,6,7,8 * out : -4,-24,-60,-112, 6,26,62,114 * =============================================================================
*/ staticinline __m128i __lsx_vdp2add_h_bu_b(__m128i in_c, __m128i in_h,
__m128i in_l) {
__m128i out;
out = __lsx_vmaddwev_h_bu_b(in_c, in_h, in_l);
out = __lsx_vmaddwod_h_bu_b(out, in_h, in_l); return out;
}
/* * ============================================================================= * Description : Dot product & addition of half-word vector elements * Arguments : Inputs - in_c, in_h, in_l * Outputs - out * Return Type - __m128i * Details : Signed half-word elements from in_h are multiplied by * signed half-word elements from in_l, and then added adjacent to * each other to get a result twice the size of input. * Then the results are added to signed word elements from in_c. * Example : out = __lsx_vdp2add_h_b(in_c, in_h, in_l) * in_c : 1,2,3,4 * in_h : 1,2,3,4, 5,6,7,8 * in_l : 8,7,6,5, 4,3,2,1 * out : 23,40,41,26 * =============================================================================
*/ staticinline __m128i __lsx_vdp2add_w_h(__m128i in_c, __m128i in_h,
__m128i in_l) {
__m128i out;
out = __lsx_vmaddwev_w_h(in_c, in_h, in_l);
out = __lsx_vmaddwod_w_h(out, in_h, in_l); return out;
}
/* * ============================================================================= * Description : Dot product of byte vector elements * Arguments : Inputs - in_h, in_l * Outputs - out * Return Type - halfword * Details : Signed byte elements from in_h are multiplied by * signed byte elements from in_l, and then added adjacent to * each other to get a result twice the size of input. * Example : out = __lsx_vdp2_h_b(in_h, in_l) * in_h : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8 * in_l : 8,7,6,5, 4,3,2,1, 8,7,6,5, 4,3,2,1 * out : 22,38,38,22, 22,38,38,22 * =============================================================================
*/ staticinline __m128i __lsx_vdp2_h_b(__m128i in_h, __m128i in_l) {
__m128i out;
out = __lsx_vmulwev_h_b(in_h, in_l);
out = __lsx_vmaddwod_h_b(out, in_h, in_l); return out;
}
/* * ============================================================================= * Description : Dot product of byte vector elements * Arguments : Inputs - in_h, in_l * Outputs - out * Return Type - halfword * Details : Unsigned byte elements from in_h are multiplied by * unsigned byte elements from in_l, and then added adjacent to * each other to get a result twice the size of input. * Example : out = __lsx_vdp2_h_bu(in_h, in_l) * in_h : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8 * in_l : 8,7,6,5, 4,3,2,1, 8,7,6,5, 4,3,2,1 * out : 22,38,38,22, 22,38,38,22 * =============================================================================
*/ staticinline __m128i __lsx_vdp2_h_bu(__m128i in_h, __m128i in_l) {
__m128i out;
out = __lsx_vmulwev_h_bu(in_h, in_l);
out = __lsx_vmaddwod_h_bu(out, in_h, in_l); return out;
}
/* * ============================================================================= * Description : Dot product of byte vector elements * Arguments : Inputs - in_h, in_l * Outputs - out * Return Type - halfword * Details : Unsigned byte elements from in_h are multiplied by * signed byte elements from in_l, and then added adjacent to * each other to get a result twice the size of input. * Example : out = __lsx_vdp2_h_bu_b(in_h, in_l) * in_h : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8 * in_l : 8,7,6,5, 4,3,2,1, 8,7,6,5, 4,3,2,-1 * out : 22,38,38,22, 22,38,38,6 * =============================================================================
*/ staticinline __m128i __lsx_vdp2_h_bu_b(__m128i in_h, __m128i in_l) {
__m128i out;
out = __lsx_vmulwev_h_bu_b(in_h, in_l);
out = __lsx_vmaddwod_h_bu_b(out, in_h, in_l); return out;
}
/* * ============================================================================= * Description : Dot product of byte vector elements * Arguments : Inputs - in_h, in_l * Outputs - out * Return Type - halfword * Details : Signed byte elements from in_h are multiplied by * signed byte elements from in_l, and then added adjacent to * each other to get a result twice the size of input. * Example : out = __lsx_vdp2_w_h(in_h, in_l) * in_h : 1,2,3,4, 5,6,7,8 * in_l : 8,7,6,5, 4,3,2,1 * out : 22,38,38,22 * =============================================================================
*/ staticinline __m128i __lsx_vdp2_w_h(__m128i in_h, __m128i in_l) {
__m128i out;
out = __lsx_vmulwev_w_h(in_h, in_l);
out = __lsx_vmaddwod_w_h(out, in_h, in_l); return out;
}
/* * ============================================================================= * Description : Dot product of byte vector elements * Arguments : Inputs - in_h, in_l * Outputs - out * Return Type - double * Details : Signed byte elements from in_h are multiplied by * signed byte elements from in_l, and then added adjacent to * each other to get a result twice the size of input. * Example : out = __lsx_vdp2_d_w(in_h, in_l) * in_h : 1,2,3,4 * in_l : 8,7,6,5 * out : 22,38 * =============================================================================
*/ staticinline __m128i __lsx_vdp2_d_w(__m128i in_h, __m128i in_l) {
__m128i out;
out = __lsx_vmulwev_d_w(in_h, in_l);
out = __lsx_vmaddwod_d_w(out, in_h, in_l); return out;
}
/* * ============================================================================= * Description : Clip all halfword elements of input vector between min & max * out = ((_in) < (min)) ? (min) : (((_in) > (max)) ? (max) : * (_in)) * Arguments : Inputs - _in (input vector) * - min (min threshold) * - max (max threshold) * Outputs - out (output vector with clipped elements) * Return Type - signed halfword * Example : out = __lsx_vclip_h(_in) * _in : -8,2,280,249, -8,255,280,249 * min : 1,1,1,1, 1,1,1,1 * max : 9,9,9,9, 9,9,9,9 * out : 1,2,9,9, 1,9,9,9 * =============================================================================
*/ staticinline __m128i __lsx_vclip_h(__m128i _in, __m128i min, __m128i max) {
__m128i out;
out = __lsx_vmax_h(min, _in);
out = __lsx_vmin_h(max, out); return out;
}
/* * ============================================================================= * Description : Set each element of vector between 0 and 255 * Arguments : Inputs - _in * Outputs - out * Return Type - halfword * Details : Signed byte elements from _in are clamped between 0 and 255. * Example : out = __lsx_vclip255_h(_in) * _in : -8,255,280,249, -8,255,280,249 * out : 0,255,255,249, 0,255,255,249 * =============================================================================
*/ staticinline __m128i __lsx_vclip255_h(__m128i _in) {
__m128i out;
out = __lsx_vmaxi_h(_in, 0);
out = __lsx_vsat_hu(out, 7); return out;
}
/* * ============================================================================= * Description : Set each element of vector between 0 and 255 * Arguments : Inputs - _in * Outputs - out * Return Type - word * Details : Signed byte elements from _in are clamped between 0 and 255. * Example : out = __lsx_vclip255_w(_in) * _in : -8,255,280,249 * out : 0,255,255,249 * =============================================================================
*/ staticinline __m128i __lsx_vclip255_w(__m128i _in) {
__m128i out;
out = __lsx_vmaxi_w(_in, 0);
out = __lsx_vsat_wu(out, 7); return out;
}
#ifdef __loongarch_asx #include <lasxintrin.h> /* * ============================================================================= * Description : Dot product of byte vector elements * Arguments : Inputs - in_h, in_l * Output - out * Return Type - signed halfword * Details : Unsigned byte elements from in_h are multiplied with * unsigned byte elements from in_l producing a result * twice the size of input i.e. signed halfword. * Then these multiplied results of adjacent odd-even elements * are added to the out vector * Example : See out = __lasx_xvdp2_w_h(in_h, in_l) * =============================================================================
*/ staticinline __m256i __lasx_xvdp2_h_bu(__m256i in_h, __m256i in_l) {
__m256i out;
out = __lasx_xvmulwev_h_bu(in_h, in_l);
out = __lasx_xvmaddwod_h_bu(out, in_h, in_l); return out;
}
/* * ============================================================================= * Description : Dot product of byte vector elements * Arguments : Inputs - in_h, in_l * Output - out * Return Type - signed halfword * Details : Signed byte elements from in_h are multiplied with * signed byte elements from in_l producing a result * twice the size of input i.e. signed halfword. * Then these multiplication results of adjacent odd-even elements * are added to the out vector * Example : See out = __lasx_xvdp2_w_h(in_h, in_l) * =============================================================================
*/ staticinline __m256i __lasx_xvdp2_h_b(__m256i in_h, __m256i in_l) {
__m256i out;
out = __lasx_xvmulwev_h_b(in_h, in_l);
out = __lasx_xvmaddwod_h_b(out, in_h, in_l); return out;
}
/* * ============================================================================= * Description : Dot product of halfword vector elements * Arguments : Inputs - in_h, in_l * Output - out * Return Type - signed word * Details : Signed halfword elements from in_h are multiplied with * signed halfword elements from in_l producing a result * twice the size of input i.e. signed word. * Then these multiplied results of adjacent odd-even elements * are added to the out vector. * Example : out = __lasx_xvdp2_w_h(in_h, in_l) * in_h : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8 * in_l : 8,7,6,5, 4,3,2,1, 8,7,6,5, 4,3,2,1 * out : 22,38,38,22, 22,38,38,22 * =============================================================================
*/ staticinline __m256i __lasx_xvdp2_w_h(__m256i in_h, __m256i in_l) {
__m256i out;
out = __lasx_xvmulwev_w_h(in_h, in_l);
out = __lasx_xvmaddwod_w_h(out, in_h, in_l); return out;
}
/* * ============================================================================= * Description : Dot product of word vector elements * Arguments : Inputs - in_h, in_l * Output - out * Return Type - signed double * Details : Signed word elements from in_h are multiplied with * signed word elements from in_l producing a result * twice the size of input i.e. signed double-word. * Then these multiplied results of adjacent odd-even elements * are added to the out vector. * Example : See out = __lasx_xvdp2_w_h(in_h, in_l) * =============================================================================
*/ staticinline __m256i __lasx_xvdp2_d_w(__m256i in_h, __m256i in_l) {
__m256i out;
out = __lasx_xvmulwev_d_w(in_h, in_l);
out = __lasx_xvmaddwod_d_w(out, in_h, in_l); return out;
}
/* * ============================================================================= * Description : Dot product of halfword vector elements * Arguments : Inputs - in_h, in_l * Output - out * Return Type - signed word * Details : Unsigned halfword elements from in_h are multiplied with * signed halfword elements from in_l producing a result * twice the size of input i.e. unsigned word. * Multiplication result of adjacent odd-even elements * are added to the out vector * Example : See out = __lasx_xvdp2_w_h(in_h, in_l) * =============================================================================
*/ staticinline __m256i __lasx_xvdp2_w_hu_h(__m256i in_h, __m256i in_l) {
__m256i out;
out = __lasx_xvmulwev_w_hu_h(in_h, in_l);
out = __lasx_xvmaddwod_w_hu_h(out, in_h, in_l); return out;
}
/* * ============================================================================= * Description : Dot product & addition of byte vector elements * Arguments : Inputs - in_h, in_l * Output - out * Return Type - halfword * Details : Signed byte elements from in_h are multiplied with * signed byte elements from in_l producing a result * twice the size of input i.e. signed halfword. * Then these multiplied results of adjacent odd-even elements * are added to the in_c vector. * Example : See out = __lasx_xvdp2add_w_h(in_c, in_h, in_l) * =============================================================================
*/ staticinline __m256i __lasx_xvdp2add_h_b(__m256i in_c, __m256i in_h,
__m256i in_l) {
__m256i out;
out = __lasx_xvmaddwev_h_b(in_c, in_h, in_l);
out = __lasx_xvmaddwod_h_b(out, in_h, in_l); return out;
}
/* * ============================================================================= * Description : Dot product & addition of byte vector elements * Arguments : Inputs - in_h, in_l * Output - out * Return Type - halfword * Details : Unsigned byte elements from in_h are multiplied with * unsigned byte elements from in_l producing a result * twice the size of input i.e. signed halfword. * Then these multiplied results of adjacent odd-even elements * are added to the in_c vector. * Example : See out = __lasx_xvdp2add_w_h(in_c, in_h, in_l) * =============================================================================
*/ staticinline __m256i __lasx_xvdp2add_h_bu(__m256i in_c, __m256i in_h,
__m256i in_l) {
__m256i out;
out = __lasx_xvmaddwev_h_bu(in_c, in_h, in_l);
out = __lasx_xvmaddwod_h_bu(out, in_h, in_l); return out;
}
/* * ============================================================================= * Description : Dot product & addition of byte vector elements * Arguments : Inputs - in_h, in_l * Output - out * Return Type - halfword * Details : Unsigned byte elements from in_h are multiplied with * signed byte elements from in_l producing a result * twice the size of input i.e. signed halfword. * Then these multiplied results of adjacent odd-even elements * are added to the in_c vector. * Example : See out = __lasx_xvdp2add_w_h(in_c, in_h, in_l) * =============================================================================
*/ staticinline __m256i __lasx_xvdp2add_h_bu_b(__m256i in_c, __m256i in_h,
__m256i in_l) {
__m256i out;
out = __lasx_xvmaddwev_h_bu_b(in_c, in_h, in_l);
out = __lasx_xvmaddwod_h_bu_b(out, in_h, in_l); return out;
}
/* * ============================================================================= * Description : Dot product of halfword vector elements * Arguments : Inputs - in_c, in_h, in_l * Output - out * Return Type - per RTYPE * Details : Signed halfword elements from in_h are multiplied with * signed halfword elements from in_l producing a result * twice the size of input i.e. signed word. * Multiplication result of adjacent odd-even elements * are added to the in_c vector. * Example : out = __lasx_xvdp2add_w_h(in_c, in_h, in_l) * in_c : 1,2,3,4, 1,2,3,4 * in_h : 1,2,3,4, 5,6,7,8, 1,2,3,4, 5,6,7,8, * in_l : 8,7,6,5, 4,3,2,1, 8,7,6,5, 4,3,2,1, * out : 23,40,41,26, 23,40,41,26 * =============================================================================
*/ staticinline __m256i __lasx_xvdp2add_w_h(__m256i in_c, __m256i in_h,
__m256i in_l) {
__m256i out;
out = __lasx_xvmaddwev_w_h(in_c, in_h, in_l);
out = __lasx_xvmaddwod_w_h(out, in_h, in_l); return out;
}
/* * ============================================================================= * Description : Dot product of halfword vector elements * Arguments : Inputs - in_c, in_h, in_l * Output - out * Return Type - signed word * Details : Unsigned halfword elements from in_h are multiplied with * unsigned halfword elements from in_l producing a result * twice the size of input i.e. signed word. * Multiplication result of adjacent odd-even elements * are added to the in_c vector. * Example : See out = __lasx_xvdp2add_w_h(in_c, in_h, in_l) * =============================================================================
*/ staticinline __m256i __lasx_xvdp2add_w_hu(__m256i in_c, __m256i in_h,
__m256i in_l) {
__m256i out;
out = __lasx_xvmaddwev_w_hu(in_c, in_h, in_l);
out = __lasx_xvmaddwod_w_hu(out, in_h, in_l); return out;
}
/* * ============================================================================= * Description : Dot product of halfword vector elements * Arguments : Inputs - in_c, in_h, in_l * Output - out * Return Type - signed word * Details : Unsigned halfword elements from in_h are multiplied with * signed halfword elements from in_l producing a result * twice the size of input i.e. signed word. * Multiplication result of adjacent odd-even elements * are added to the in_c vector * Example : See out = __lasx_xvdp2add_w_h(in_c, in_h, in_l) * =============================================================================
*/ staticinline __m256i __lasx_xvdp2add_w_hu_h(__m256i in_c, __m256i in_h,
__m256i in_l) {
__m256i out;
out = __lasx_xvmaddwev_w_hu_h(in_c, in_h, in_l);
out = __lasx_xvmaddwod_w_hu_h(out, in_h, in_l); return out;
}
/* * ============================================================================= * Description : Vector Unsigned Dot Product and Subtract * Arguments : Inputs - in_c, in_h, in_l * Output - out * Return Type - signed halfword * Details : Unsigned byte elements from in_h are multiplied with * unsigned byte elements from in_l producing a result * twice the size of input i.e. signed halfword. * Multiplication result of adjacent odd-even elements * are added together and subtracted from double width elements * in_c vector. * Example : See out = __lasx_xvdp2sub_w_h(in_c, in_h, in_l) * =============================================================================
*/ staticinline __m256i __lasx_xvdp2sub_h_bu(__m256i in_c, __m256i in_h,
__m256i in_l) {
__m256i out;
out = __lasx_xvmulwev_h_bu(in_h, in_l);
out = __lasx_xvmaddwod_h_bu(out, in_h, in_l);
out = __lasx_xvsub_h(in_c, out); return out;
}
/* * ============================================================================= * Description : Vector Signed Dot Product and Subtract * Arguments : Inputs - in_c, in_h, in_l * Output - out * Return Type - signed word * Details : Signed halfword elements from in_h are multiplied with * Signed halfword elements from in_l producing a result * twice the size of input i.e. signed word. * Multiplication result of adjacent odd-even elements * are added together and subtracted from double width elements * in_c vector. * Example : out = __lasx_xvdp2sub_w_h(in_c, in_h, in_l) * in_c : 0,0,0,0, 0,0,0,0 * in_h : 3,1,3,0, 0,0,0,1, 0,0,1,1, 0,0,0,1 * in_l : 2,1,1,0, 1,0,0,0, 0,0,1,0, 1,0,0,1 * out : -7,-3,0,0, 0,-1,0,-1 * =============================================================================
*/ staticinline __m256i __lasx_xvdp2sub_w_h(__m256i in_c, __m256i in_h,
__m256i in_l) {
__m256i out;
out = __lasx_xvmulwev_w_h(in_h, in_l);
out = __lasx_xvmaddwod_w_h(out, in_h, in_l);
out = __lasx_xvsub_w(in_c, out); return out;
}
/* * ============================================================================= * Description : Dot product of halfword vector elements * Arguments : Inputs - in_h, in_l * Output - out * Return Type - signed word * Details : Signed halfword elements from in_h are multiplied with * signed halfword elements from in_l producing a result * four times the size of input i.e. signed doubleword. * Then these multiplication results of four adjacent elements * are added together and stored to the out vector. * Example : out = __lasx_xvdp4_d_h(in_h, in_l) * in_h : 3,1,3,0, 0,0,0,1, 0,0,1,-1, 0,0,0,1 * in_l : -2,1,1,0, 1,0,0,0, 0,0,1, 0, 1,0,0,1 * out : -2,0,1,1 * =============================================================================
*/ staticinline __m256i __lasx_xvdp4_d_h(__m256i in_h, __m256i in_l) {
__m256i out;
out = __lasx_xvmulwev_w_h(in_h, in_l);
out = __lasx_xvmaddwod_w_h(out, in_h, in_l);
out = __lasx_xvhaddw_d_w(out, out); return out;
}
/* * ============================================================================= * Description : The high half of the vector elements are expanded and * added after being doubled. * Arguments : Inputs - in_h, in_l * Output - out * Details : The in_h vector and the in_l vector are added after the * higher half of the two-fold sign extension (signed byte * to signed halfword) and stored to the out vector. * Example : See out = __lasx_xvaddwh_w_h(in_h, in_l) * =============================================================================
*/ staticinline __m256i __lasx_xvaddwh_h_b(__m256i in_h, __m256i in_l) {
__m256i out;
out = __lasx_xvilvh_b(in_h, in_l);
out = __lasx_xvhaddw_h_b(out, out); return out;
}
/* * ============================================================================= * Description : The high half of the vector elements are expanded and * added after being doubled. * Arguments : Inputs - in_h, in_l * Output - out * Details : The in_h vector and the in_l vector are added after the * higher half of the two-fold sign extension (signed halfword * to signed word) and stored to the out vector. * Example : out = __lasx_xvaddwh_w_h(in_h, in_l) * in_h : 3, 0,3,0, 0,0,0,-1, 0,0,1,-1, 0,0,0,1 * in_l : 2,-1,1,2, 1,0,0, 0, 1,0,1, 0, 1,0,0,1 * out : 1,0,0,-1, 1,0,0, 2 * =============================================================================
*/ staticinline __m256i __lasx_xvaddwh_w_h(__m256i in_h, __m256i in_l) {
__m256i out;
out = __lasx_xvilvh_h(in_h, in_l);
out = __lasx_xvhaddw_w_h(out, out); return out;
}
/* * ============================================================================= * Description : The low half of the vector elements are expanded and * added after being doubled. * Arguments : Inputs - in_h, in_l * Output - out * Details : The in_h vector and the in_l vector are added after the * lower half of the two-fold sign extension (signed byte * to signed halfword) and stored to the out vector. * Example : See out = __lasx_xvaddwl_w_h(in_h, in_l) * =============================================================================
*/ staticinline __m256i __lasx_xvaddwl_h_b(__m256i in_h, __m256i in_l) {
__m256i out;
out = __lasx_xvilvl_b(in_h, in_l);
out = __lasx_xvhaddw_h_b(out, out); return out;
}
/* * ============================================================================= * Description : The low half of the vector elements are expanded and * added after being doubled. * Arguments : Inputs - in_h, in_l * Output - out * Details : The in_h vector and the in_l vector are added after the * lower half of the two-fold sign extension (signed halfword * to signed word) and stored to the out vector. * Example : out = __lasx_xvaddwl_w_h(in_h, in_l) * in_h : 3, 0,3,0, 0,0,0,-1, 0,0,1,-1, 0,0,0,1 * in_l : 2,-1,1,2, 1,0,0, 0, 1,0,1, 0, 1,0,0,1 * out : 5,-1,4,2, 1,0,2,-1 * =============================================================================
*/ staticinline __m256i __lasx_xvaddwl_w_h(__m256i in_h, __m256i in_l) {
__m256i out;
out = __lasx_xvilvl_h(in_h, in_l);
out = __lasx_xvhaddw_w_h(out, out); return out;
}
/* * ============================================================================= * Description : The low half of the vector elements are expanded and * added after being doubled. * Arguments : Inputs - in_h, in_l * Output - out * Details : The out vector and the out vector are added after the * lower half of the two-fold zero extension (unsigned byte * to unsigned halfword) and stored to the out vector. * Example : See out = __lasx_xvaddwl_w_h(in_h, in_l) * =============================================================================
*/ staticinline __m256i __lasx_xvaddwl_h_bu(__m256i in_h, __m256i in_l) {
__m256i out;
out = __lasx_xvilvl_b(in_h, in_l);
out = __lasx_xvhaddw_hu_bu(out, out); return out;
}
/* * ============================================================================= * Description : The low half of the vector elements are expanded and * added after being doubled. * Arguments : Inputs - in_h, in_l * Output - out * Details : The in_l vector after double zero extension (unsigned byte to * signed halfword),added to the in_h vector. * Example : See out = __lasx_xvaddw_w_w_h(in_h, in_l) * =============================================================================
*/ staticinline __m256i __lasx_xvaddw_h_h_bu(__m256i in_h, __m256i in_l) {
__m256i out;
out = __lasx_xvsllwil_hu_bu(in_l, 0);
out = __lasx_xvadd_h(in_h, out); return out;
}
/* * ============================================================================= * Description : The low half of the vector elements are expanded and * added after being doubled. * Arguments : Inputs - in_h, in_l * Output - out * Details : The in_l vector after double sign extension (signed halfword to * signed word), added to the in_h vector. * Example : out = __lasx_xvaddw_w_w_h(in_h, in_l) * in_h : 0, 1,0,0, -1,0,0,1, * in_l : 2,-1,1,2, 1,0,0,0, 0,0,1,0, 1,0,0,1, * out : 2, 0,1,2, -1,0,1,1, * =============================================================================
*/ staticinline __m256i __lasx_xvaddw_w_w_h(__m256i in_h, __m256i in_l) {
__m256i out;
out = __lasx_xvsllwil_w_h(in_l, 0);
out = __lasx_xvadd_w(in_h, out); return out;
}
/* * ============================================================================= * Description : Multiplication and addition calculation after expansion * of the lower half of the vector. * Arguments : Inputs - in_c, in_h, in_l * Output - out * Details : The in_h vector and the in_l vector are multiplied after * the lower half of the two-fold sign extension (signed halfword * to signed word), and the result is added to the vector in_c, * then stored to the out vector. * Example : out = __lasx_xvmaddwl_w_h(in_c, in_h, in_l) * in_c : 1,2,3,4, 5,6,7,8 * in_h : 1,2,3,4, 1,2,3,4, 5,6,7,8, 5,6,7,8 * in_l : 200, 300, 400, 500, 2000, 3000, 4000, 5000, * -200,-300,-400,-500, -2000,-3000,-4000,-5000 * out : 201, 602,1203,2004, -995, -1794,-2793,-3992 * =============================================================================
*/ staticinline __m256i __lasx_xvmaddwl_w_h(__m256i in_c, __m256i in_h,
__m256i in_l) {
__m256i tmp0, tmp1, out;
/* * ============================================================================= * Description : Multiplication and addition calculation after expansion * of the higher half of the vector. * Arguments : Inputs - in_c, in_h, in_l * Output - out * Details : The in_h vector and the in_l vector are multiplied after * the higher half of the two-fold sign extension (signed * halfword to signed word), and the result is added to * the vector in_c, then stored to the out vector. * Example : See out = __lasx_xvmaddwl_w_h(in_c, in_h, in_l) * =============================================================================
*/ staticinline __m256i __lasx_xvmaddwh_w_h(__m256i in_c, __m256i in_h,
__m256i in_l) {
__m256i tmp0, tmp1, out;
/* * ============================================================================= * Description : Multiplication calculation after expansion of the lower * half of the vector. * Arguments : Inputs - in_h, in_l * Output - out * Details : The in_h vector and the in_l vector are multiplied after * the lower half of the two-fold sign extension (signed * halfword to signed word), then stored to the out vector. * Example : out = __lasx_xvmulwl_w_h(in_h, in_l) * in_h : 3,-1,3,0, 0,0,0,-1, 0,0,1,-1, 0,0,0,1 * in_l : 2,-1,1,2, 1,0,0, 0, 0,0,1, 0, 1,0,0,1 * out : 6,1,3,0, 0,0,1,0 * =============================================================================
*/ staticinline __m256i __lasx_xvmulwl_w_h(__m256i in_h, __m256i in_l) {
__m256i tmp0, tmp1, out;
/* * ============================================================================= * Description : Multiplication calculation after expansion of the lower * half of the vector. * Arguments : Inputs - in_h, in_l * Output - out * Details : The in_h vector and the in_l vector are multiplied after * the lower half of the two-fold sign extension (signed * halfword to signed word), then stored to the out vector. * Example : out = __lasx_xvmulwh_w_h(in_h, in_l) * in_h : 3,-1,3,0, 0,0,0,-1, 0,0,1,-1, 0,0,0,1 * in_l : 2,-1,1,2, 1,0,0, 0, 0,0,1, 0, 1,0,0,1 * out : 0,0,0,0, 0,0,0,1 * =============================================================================
*/ staticinline __m256i __lasx_xvmulwh_w_h(__m256i in_h, __m256i in_l) {
__m256i tmp0, tmp1, out;
/* * ============================================================================= * Description : The low half of the vector elements are added to the high half * after being doubled, then saturated. * Arguments : Inputs - in_h, in_l * Output - out * Details : The in_h vector adds the in_l vector after the lower half of * the two-fold zero extension (unsigned byte to unsigned * halfword) and then saturated. The results are stored to the out * vector. * Example : out = __lasx_xvsaddw_hu_hu_bu(in_h, in_l) * in_h : 2,65532,1,2, 1,0,0,0, 0,0,1,0, 1,0,0,1 * in_l : 3,6,3,0, 0,0,0,1, 0,0,1,1, 0,0,0,1, 3,18,3,0, 0,0,0,1, 0,0,1,1, * 0,0,0,1 * out : 5,65535,4,2, 1,0,0,1, 3,18,4,0, 1,0,0,2, * =============================================================================
*/ staticinline __m256i __lasx_xvsaddw_hu_hu_bu(__m256i in_h, __m256i in_l) {
__m256i tmp1, out;
__m256i zero = { 0 };
/* * ============================================================================= * Description : Clip all halfword elements of input vector between min & max * out = ((in) < (min)) ? (min) : (((in) > (max)) ? (max) : (in)) * Arguments : Inputs - in (input vector) * - min (min threshold) * - max (max threshold) * Outputs - in (output vector with clipped elements) * Return Type - signed halfword * Example : out = __lasx_xvclip_h(in, min, max) * in : -8,2,280,249, -8,255,280,249, 4,4,4,4, 5,5,5,5 * min : 1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1 * max : 9,9,9,9, 9,9,9,9, 9,9,9,9, 9,9,9,9 * out : 1,2,9,9, 1,9,9,9, 4,4,4,4, 5,5,5,5 * =============================================================================
*/ staticinline __m256i __lasx_xvclip_h(__m256i in, __m256i min, __m256i max) {
__m256i out;
out = __lasx_xvmax_h(min, in);
out = __lasx_xvmin_h(max, out); return out;
}
/* * ============================================================================= * Description : Clip all signed halfword elements of input vector * between 0 & 255 * Arguments : Inputs - in (input vector) * Outputs - out (output vector with clipped elements) * Return Type - signed halfword * Example : See out = __lasx_xvclip255_w(in) * =============================================================================
*/ staticinline __m256i __lasx_xvclip255_h(__m256i in) {
__m256i out;
out = __lasx_xvmaxi_h(in, 0);
out = __lasx_xvsat_hu(out, 7); return out;
}
/* * ============================================================================= * Description : Clip all signed word elements of input vector * between 0 & 255 * Arguments : Inputs - in (input vector) * Output - out (output vector with clipped elements) * Return Type - signed word * Example : out = __lasx_xvclip255_w(in) * in : -8,255,280,249, -8,255,280,249 * out : 0,255,255,249, 0,255,255,249 * =============================================================================
*/ staticinline __m256i __lasx_xvclip255_w(__m256i in) {
__m256i out;
out = __lasx_xvmaxi_w(in, 0);
out = __lasx_xvsat_wu(out, 7); return out;
}
/* * ============================================================================= * Description : Indexed halfword element values are replicated to all * elements in output vector. If 'idx < 8' use xvsplati_l_*, * if 'idx >= 8' use xvsplati_h_*. * Arguments : Inputs - in, idx * Output - out * Details : Idx element value from in vector is replicated to all * elements in out vector. * Valid index range for halfword operation is 0-7 * Example : out = __lasx_xvsplati_l_h(in, idx) * in : 20,10,11,12, 13,14,15,16, 0,0,2,0, 0,0,0,0 * idx : 0x02 * out : 11,11,11,11, 11,11,11,11, 11,11,11,11, 11,11,11,11 * =============================================================================
*/ staticinline __m256i __lasx_xvsplati_l_h(__m256i in, int idx) {
__m256i out;
out = __lasx_xvpermi_q(in, in, 0x02);
out = __lasx_xvreplve_h(out, idx); return out;
}
/* * ============================================================================= * Description : Indexed halfword element values are replicated to all * elements in output vector. If 'idx < 8' use xvsplati_l_*, * if 'idx >= 8' use xvsplati_h_*. * Arguments : Inputs - in, idx * Output - out * Details : Idx element value from in vector is replicated to all * elements in out vector. * Valid index range for halfword operation is 0-7 * Example : out = __lasx_xvsplati_h_h(in, idx) * in : 20,10,11,12, 13,14,15,16, 0,2,0,0, 0,0,0,0 * idx : 0x09 * out : 2,2,2,2, 2,2,2,2, 2,2,2,2, 2,2,2,2 * =============================================================================
*/ staticinline __m256i __lasx_xvsplati_h_h(__m256i in, int idx) {
__m256i out;
out = __lasx_xvpermi_q(in, in, 0x13);
out = __lasx_xvreplve_h(out, idx); return out;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.