/* * Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. *
*/
// Note, inline_string_indexOf() generates checks: // if (pattern.count > src.count) return -1; // if (pattern.count == 0) return 0;
// We have two strings, a source string in haystack, haystack_len and a pattern string // in needle, needle_len. Find the first occurrence of pattern in source or return -1.
// For larger pattern and source we use a simplified Boyer Moore algorithm. // With a small pattern and source we use linear scan.
// needle_len >=8 && needle_len < 256 && needle_len < haystack_len/4, use bmh algorithm.
sub(result_tmp, haystack_len, needle_len); // needle_len < 8, use linear scan
sub(t0, needle_len, 8);
bltz(t0, LINEARSEARCH); // needle_len >= 256, use linear scan
sub(t0, needle_len, 256);
bgez(t0, LINEARSTUB); // needle_len >= haystack_len/4, use linear scan
srli(t0, haystack_len, 2);
bge(needle_len, t0, LINEARSTUB);
// Boyer-Moore-Horspool introduction: // The Boyer Moore alogorithm is based on the description here:- // // http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string_search_algorithm // // This describes and algorithm with 2 shift rules. The 'Bad Character' rule // and the 'Good Suffix' rule. // // These rules are essentially heuristics for how far we can shift the // pattern along the search string. // // The implementation here uses the 'Bad Character' rule only because of the // complexity of initialisation for the 'Good Suffix' rule. // // This is also known as the Boyer-Moore-Horspool algorithm: // // http://en.wikipedia.org/wiki/Boyer-Moore-Horspool_algorithm // // #define ASIZE 256 // // int bm(unsigned char *pattern, int m, unsigned char *src, int n) { // int i, j; // unsigned c; // unsigned char bc[ASIZE]; // // /* Preprocessing */ // for (i = 0; i < ASIZE; ++i) // bc[i] = m; // for (i = 0; i < m - 1; ) { // c = pattern[i]; // ++i; // // c < 256 for Latin1 string, so, no need for branch // #ifdef PATTERN_STRING_IS_LATIN1 // bc[c] = m - i; // #else // if (c < ASIZE) bc[c] = m - i; // #endif // } // // /* Searching */ // j = 0; // while (j <= n - m) { // c = src[i+j]; // if (pattern[m-1] == c) // int k; // for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k); // if (k < 0) return j; // // c < 256 for Latin1 string, so, no need for branch // #ifdef SOURCE_STRING_IS_LATIN1_AND_PATTERN_STRING_IS_LATIN1 // // LL case: (c< 256) always true. Remove branch // j += bc[pattern[j+m-1]]; // #endif // #ifdef SOURCE_STRING_IS_UTF_AND_PATTERN_STRING_IS_UTF // // UU case: need if (c<ASIZE) check. Skip 1 character if not. // if (c < ASIZE) // j += bc[pattern[j+m-1]]; // else // j += 1 // #endif // #ifdef SOURCE_IS_UTF_AND_PATTERN_IS_LATIN1 // // UL case: need if (c<ASIZE) check. Skip <pattern length> if not. // if (c < ASIZE) // j += bc[pattern[j+m-1]]; // else // j += m // #endif // } // return -1; // }
// pattern length is >=8, so, we can read at least 1 register for cases when // UTF->Latin1 conversion is not needed(8 LL or 4UU) and half register for // UL case. We'll re-read last character in inner pre-loop code to have // single outer pre-loop load constint firstStep = isLL ? 7 : 3;
bind(BM_INIT_LOOP); // for (i = 0; i < ASIZE; ++i) // bc[i] = m; for (int i = 0; i < 4; i++) {
sd(tmp5, Address(ch1, i * wordSize));
}
add(ch1, ch1, 32);
sub(tmp6, tmp6, 4);
bgtz(tmp6, BM_INIT_LOOP);
sub(nlen_tmp, needle_len, 1); // m - 1, index of the last element in pattern Register orig_haystack = tmp5;
mv(orig_haystack, haystack); // result_tmp = tmp4
shadd(haystack_end, result_tmp, haystack, haystack_end, haystack_chr_shift);
sub(ch2, needle_len, 1); // bc offset init value, ch2 is t1
mv(tmp3, needle);
// for (i = 0; i < m - 1; ) { // c = pattern[i]; // ++i; // // c < 256 for Latin1 string, so, no need for branch // #ifdef PATTERN_STRING_IS_LATIN1 // bc[c] = m - i; // #else // if (c < ASIZE) bc[c] = m - i; // #endif // }
bind(BCLOOP);
(this->*needle_load_1chr)(ch1, Address(tmp3), noreg);
add(tmp3, tmp3, needle_chr_size); if (!needle_isL) { // ae == StrIntrinsicNode::UU
mv(tmp6, ASIZE);
bgeu(ch1, tmp6, BCSKIP);
}
add(tmp4, sp, ch1);
sb(ch2, Address(tmp4)); // store skip offset to BC offset table
bind(BCSKIP);
sub(ch2, ch2, 1); // for next pattern element, skip distance -1
bgtz(ch2, BCLOOP);
// tmp6: pattern end, address after needle
shadd(tmp6, needle_len, needle, tmp6, needle_chr_shift); if (needle_isL == haystack_isL) { // load last 8 bytes (8LL/4UU symbols)
ld(tmp6, Address(tmp6, -wordSize));
} else { // UL: from UTF-16(source) search Latin1(pattern)
lwu(tmp6, Address(tmp6, -wordSize / 2)); // load last 4 bytes(4 symbols) // convert Latin1 to UTF. eg: 0x0000abcd -> 0x0a0b0c0d // We'll have to wait until load completed, but it's still faster than per-character loads+checks
srli(tmp3, tmp6, BitsPerByte * (wordSize / 2 - needle_chr_size)); // pattern[m-1], eg:0x0000000a
slli(ch2, tmp6, XLEN - 24);
srli(ch2, ch2, XLEN - 8); // pattern[m-2], 0x0000000b
slli(ch1, tmp6, XLEN - 16);
srli(ch1, ch1, XLEN - 8); // pattern[m-3], 0x0000000c
andi(tmp6, tmp6, 0xff); // pattern[m-4], 0x0000000d
slli(ch2, ch2, 16);
orr(ch2, ch2, ch1); // 0x00000b0c
slli(result, tmp3, 48); // use result as temp register
orr(tmp6, tmp6, result); // 0x0a00000d
slli(result, ch2, 16);
orr(tmp6, tmp6, result); // UTF-16:0x0a0b0c0d
}
// i = m - 1; // skipch = j + i; // if (skipch == pattern[m - 1] // for (k = m - 2; k >= 0 && pattern[k] == src[k + j]; --k); // else // move j with bad char offset table
bind(BMLOOPSTR2); // compare pattern to source string backward
shadd(result, nlen_tmp, haystack, result, haystack_chr_shift);
(this->*haystack_load_1chr)(skipch, Address(result), noreg);
sub(nlen_tmp, nlen_tmp, firstStep); // nlen_tmp is positive here, because needle_len >= 8 if (needle_isL == haystack_isL) { // re-init tmp3. It's for free because it's executed in parallel with // load above. Alternative is to initialize it before loop, but it'll // affect performance on in-order systems with 2 or more ld/st pipelines
srli(tmp3, tmp6, BitsPerByte * (wordSize - needle_chr_size)); // UU/LL: pattern[m-1]
} if (!isLL) { // UU/UL case
slli(ch2, nlen_tmp, 1); // offsets in bytes
}
bne(tmp3, skipch, BMSKIP); // if not equal, skipch is bad char
add(result, haystack, isLL ? nlen_tmp : ch2);
ld(ch2, Address(result)); // load 8 bytes from source string
mv(ch1, tmp6); if (isLL) {
j(BMLOOPSTR1_AFTER_LOAD);
} else {
sub(nlen_tmp, nlen_tmp, 1); // no need to branch for UU/UL case. cnt1 >= 8
j(BMLOOPSTR1_CMP);
}
bind(LINEARSTUB);
sub(t0, needle_len, 16); // small patterns still should be handled by simple algorithm
bltz(t0, LINEARSEARCH);
mv(result, zr);
RuntimeAddress stub = NULL; if (isLL) {
stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ll());
assert(stub.target() != NULL, "string_indexof_linear_ll stub has not been generated");
} elseif (needle_isL) {
stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_ul());
assert(stub.target() != NULL, "string_indexof_linear_ul stub has not been generated");
} else {
stub = RuntimeAddress(StubRoutines::riscv::string_indexof_linear_uu());
assert(stub.target() != NULL, "string_indexof_linear_uu stub has not been generated");
}
trampoline_call(stub);
j(DONE);
// for L strings, 1 byte for 1 character // for U strings, 2 bytes for 1 character int str1_chr_size = str1_isL ? 1 : 2; int str2_chr_size = str2_isL ? 1 : 2; int minCharsInWord = isLL ? wordSize : wordSize / 2;
// Bizzarely, the counts are passed in bytes, regardless of whether they // are L or U strings, however the result is always in characters. if (!str1_isL) {
sraiw(cnt1, cnt1, 1);
} if (!str2_isL) {
sraiw(cnt2, cnt2, 1);
}
// Compute the minimum of the string lengths and save the difference in result.
sub(result, cnt1, cnt2);
bgt(cnt1, cnt2, L);
mv(cnt2, cnt1);
bind(L);
// A very short string
mv(t0, minCharsInWord);
ble(cnt2, t0, SHORT_STRING);
xorr(tmp3, tmp1, tmp2);
beqz(tmp3, NEXT_WORD);
j(DIFFERENCE);
bind(TAIL);
xorr(tmp3, tmp1, tmp2);
bnez(tmp3, DIFFERENCE); // Last longword. In the case where length == 4 we compare the // same longword twice, but that's still faster than another // conditional branch. if (str1_isL == str2_isL) { // LL or UU
ld(tmp1, Address(str1));
ld(tmp2, Address(str2));
} elseif (isLU) { // LU case
lwu(tmp1, Address(str1));
ld(tmp2, Address(str2));
inflate_lo32(tmp3, tmp1);
mv(tmp1, tmp3);
} else { // UL case
lwu(tmp2, Address(str2));
ld(tmp1, Address(str1));
inflate_lo32(tmp3, tmp2);
mv(tmp2, tmp3);
}
bind(TAIL_CHECK);
xorr(tmp3, tmp1, tmp2);
beqz(tmp3, DONE);
// Find the first different characters in the longwords and // compute their difference.
bind(DIFFERENCE);
ctzc_bit(result, tmp3, isLL); // count zero from lsb to msb
srl(tmp1, tmp1, result);
srl(tmp2, tmp2, result); if (isLL) {
andi(tmp1, tmp1, 0xFF);
andi(tmp2, tmp2, 0xFF);
} else {
andi(tmp1, tmp1, 0xFFFF);
andi(tmp2, tmp2, 0xFFFF);
}
sub(result, tmp1, tmp2);
j(DONE);
}
bind(STUB);
RuntimeAddress stub = NULL; switch (ae) { case StrIntrinsicNode::LL:
stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LL()); break; case StrIntrinsicNode::UU:
stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UU()); break; case StrIntrinsicNode::LU:
stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_LU()); break; case StrIntrinsicNode::UL:
stub = RuntimeAddress(StubRoutines::riscv::compare_long_string_UL()); break; default:
ShouldNotReachHere();
}
assert(stub.target() != NULL, "compare_long_string stub has not been generated");
trampoline_call(stub);
j(DONE);
bind(SAME);
mv(result, true); // That's it.
bind(DONE);
BLOCK_COMMENT("} array_equals");
}
// Compare Strings
// For Strings we're passed the address of the first characters in a1 // and a2 and the length in cnt1. // elem_size is the element size in bytes: either 1 or 2. // There are two implementations. For arrays >= 8 bytes, all // comparisons (including the final one, which may overlap) are // performed 8 bytes at a time. For strings < 8 bytes, we compare a // halfword, then a short, and then a byte.
// Last longword. In the case where length == 4 we compare the // same longword twice, but that's still faster than another // conditional branch. // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when // length == 4.
add(tmp1, a1, cnt1);
ld(tmp1, Address(tmp1, 0));
add(tmp2, a2, cnt1);
ld(tmp2, Address(tmp2, 0));
bne(tmp1, tmp2, DONE);
j(SAME);
// This is a function should only be used by C2. Flip the unordered when unordered-greater, C2 would use // unordered-lesser instead of unordered-greater. Finally, commute the result bits at function do_one_bytecode(). void C2_MacroAssembler::float_cmp_branch(int cmpFlag, FloatRegister op1, FloatRegister op2, Label& label, bool is_far) {
assert(cmpFlag >= 0 && cmpFlag < (int)(sizeof(float_conditional_branches) / sizeof(float_conditional_branches[0])), "invalid float conditional branch index"); int booltest_flag = cmpFlag & ~(C2_MacroAssembler::double_branch_mask);
(this->*float_conditional_branches[cmpFlag])(op1, op2, label, is_far,
(booltest_flag == (BoolTest::ge) || booltest_flag == (BoolTest::gt)) ? false : true);
}
void C2_MacroAssembler::enc_cmpUEqNeLeGt_imm0_branch(int cmpFlag, Register op1, Label& L, bool is_far) { switch (cmpFlag) { case BoolTest::eq: case BoolTest::le:
beqz(op1, L, is_far); break; case BoolTest::ne: case BoolTest::gt:
bnez(op1, L, is_far); break; default:
ShouldNotReachHere();
}
}
// Set dst to NaN if any NaN input. void C2_MacroAssembler::minmax_FD(FloatRegister dst, FloatRegister src1, FloatRegister src2, bool is_double, bool is_min) {
assert_different_registers(dst, src1, src2);
Label Done, Compare;
is_double ? fclass_d(t0, src1)
: fclass_s(t0, src1);
is_double ? fclass_d(t1, src2)
: fclass_s(t1, src2);
orr(t0, t0, t1);
andi(t0, t0, 0b1100000000); //if src1 or src2 is quiet or signaling NaN then return NaN
beqz(t0, Compare);
is_double ? fadd_d(dst, src1, src2)
: fadd_s(dst, src1, src2);
j(Done);
// used by C2 ClearArray patterns. // base: Address of a buffer to be zeroed // cnt: Count in HeapWords // // base, cnt, v0, v1 and t0 are clobbered. void C2_MacroAssembler::clear_array_v(Register base, Register cnt) {
Label loop;
// making zero words
vsetvli(t0, cnt, Assembler::e64, Assembler::m4);
vxor_vv(v0, v0, v0);
int minCharsInWord = encLL ? wordSize : wordSize / 2;
BLOCK_COMMENT("string_compare {");
// for Latin strings, 1 byte for 1 character // for UTF16 strings, 2 bytes for 1 character if (!str1_isL)
sraiw(cnt1, cnt1, 1); if (!str2_isL)
sraiw(cnt2, cnt2, 1);
// if str1 == str2, return the difference // save the minimum of the string lengths in cnt2.
sub(result, cnt1, cnt2);
bgt(cnt1, cnt2, L);
mv(cnt2, cnt1);
bind(L);
// Compress char[] array to byte[]. // result: the array length if every element in array can be encoded; 0, otherwise. void C2_MacroAssembler::char_array_compress_v(Register src, Register dst, Register len, Register result, Register tmp) {
Label done;
encode_iso_array_v(src, dst, len, result, tmp);
beqz(len, done);
mv(result, zr);
bind(done);
}
// result: the number of elements had been encoded. void C2_MacroAssembler::encode_iso_array_v(Register src, Register dst, Register len, Register result, Register tmp) {
Label loop, DIFFERENCE, DONE;
// Set dst to NaN if any NaN input. void C2_MacroAssembler::minmax_FD_v(VectorRegister dst, VectorRegister src1, VectorRegister src2, bool is_double, bool is_min) {
assert_different_registers(dst, src1, src2);
// Set dst to NaN if any NaN input. void C2_MacroAssembler::reduce_minmax_FD_v(FloatRegister dst,
FloatRegister src1, VectorRegister src2,
VectorRegister tmp1, VectorRegister tmp2, bool is_double, bool is_min) {
assert_different_registers(src2, tmp1, tmp2);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung ist noch experimentell.