/* * Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions.
*/ /* * @test * @bug 4221795 8032446 8174270 * @summary Confirm Normalizer's fundamental behavior. Imported from ICU4J 3.2's * src/com/ibm/icu/dev/test and modified. * @modules java.base/sun.text java.base/jdk.internal.icu.text * @library /java/text/testlib * @compile -XDignore.symbol.file ICUBasicTest.java * @run main/timeout=30 ICUBasicTest
*/
/* ******************************************************************************* * Copyright (C) 1996-2004, International Business Machines Corporation and * * others. All Rights Reserved. * *******************************************************************************
*/
/* * Special cases for UAX #15 bug * see Unicode Public Review Issue #29 * at http://www.unicode.org/review/resolved-pri.html#pri29 * * Note: * PRI #29 is supported in Unicode 4.1.0. Therefore, expected results are * different for earlier Unicode versions.
*/ publicvoid TestComposition() {
final TestCompositionCase cases[] = new TestCompositionCase[] { new TestCompositionCase(NFC, UNICODE_3_2_0, "\u1100\u0300\u1161\u0327", "\u1100\u0300\u1161\u0327"), new TestCompositionCase(NFC, UNICODE_LATEST, "\u1100\u0300\u1161\u0327", "\u1100\u0300\u1161\u0327"),
new TestCompositionCase(NFC, UNICODE_3_2_0, "\u1100\u0300\u1161\u0327\u11a8", "\u1100\u0300\u1161\u0327\u11a8"), new TestCompositionCase(NFC, UNICODE_LATEST, "\u1100\u0300\u1161\u0327\u11a8", "\u1100\u0300\u1161\u0327\u11a8"),
new TestCompositionCase(NFC, UNICODE_3_2_0, "\uac00\u0300\u0327\u11a8", "\uac00\u0327\u0300\u11a8"), new TestCompositionCase(NFC, UNICODE_LATEST, "\uac00\u0300\u0327\u11a8", "\uac00\u0327\u0300\u11a8"),
new TestCompositionCase(NFC, UNICODE_3_2_0, "\u0b47\u0300\u0b3e", "\u0b47\u0300\u0b3e"), new TestCompositionCase(NFC, UNICODE_LATEST, "\u0b47\u0300\u0b3e", "\u0b47\u0300\u0b3e"),
};
String output; int i, length;
for (i=0; i<cases.length; ++i) {
output = Normalizer.normalize(cases[i].input,
cases[i].form, cases[i].options); if (!output.equals(cases[i].expect)) {
errln("unexpected result for case " + i + ". Expected="
+ cases[i].expect + ", Actual=" + output);
} elseif (verbose) {
logln("expected result for case " + i + ". Expected="
+ cases[i].expect + ", Actual=" + output);
}
}
}
privatefinalstaticclass TestCompositionCase { public java.text.Normalizer.Form form; publicint options; public String input, expect;
/* * Added in order to detect a regression.
*/ publicvoid TestCombiningMarks() {
String src = "\u0f71\u0f72\u0f73\u0f74\u0f75";
String expected = "\u0F71\u0F71\u0F71\u0F72\u0F72\u0F74\u0F74";
String result = NormalizerBase.normalize(src, NFD);
if (!expected.equals(result)) {
errln("Reordering of combining marks failed. Expected: " +
toHexString(expected) + " Got: "+ toHexString(result));
}
}
/* * Added in order to detect a regression.
*/ publicvoid TestBengali() throws Exception {
String input = "\u09bc\u09be\u09cd\u09be";
String output=NormalizerBase.normalize(input, NFC);
if (!input.equals(output)) {
errln("ERROR in NFC of string");
} return;
}
/* * Added in order to detect a regression.
*/ /** * Test for a problem found by Verisign. Problem is that * characters at the start of a string are not put in canonical * order correctly by compose() if there is no starter.
*/ publicvoid TestVerisign() throws Exception {
String[] inputs = { "\u05b8\u05b9\u05b1\u0591\u05c3\u05b0\u05ac\u059f", "\u0592\u05b7\u05bc\u05a5\u05b0\u05c0\u05c4\u05ad"
};
String[] outputs = { "\u05b1\u05b8\u05b9\u0591\u05c3\u05b0\u05ac\u059f", "\u05b0\u05b7\u05bc\u05a5\u0592\u05c0\u05ad\u05c4"
};
for (int i = 0; i < inputs.length; ++i) {
String input = inputs[i];
String output = outputs[i];
/** * Test for a problem that showed up just before ICU 1.6 release * having to do with combining characters with an index of zero. * Such characters do not participate in any canonical * decompositions. However, having an index of zero means that * they all share one typeMask[] entry, that is, they all have to * map to the same canonical class, which is not the case, in * reality.
*/ publicvoid TestZeroIndex() throws Exception {
String[] DATA = { // Expect col1 x COMPOSE_COMPAT => col2 // Expect col2 x DECOMP => col3 "A\u0316\u0300", "\u00C0\u0316", "A\u0316\u0300", "A\u0300\u0316", "\u00C0\u0316", "A\u0316\u0300", "A\u0327\u0300", "\u00C0\u0327", "A\u0327\u0300", "c\u0321\u0327", "c\u0321\u0327", "c\u0321\u0327", "c\u0327\u0321", "\u00E7\u0321", "c\u0327\u0321",
};
for (int i=0; i<DATA.length; i+=3) {
String a = DATA[i];
String b = NormalizerBase.normalize(a, NFKC);
String exp = DATA[i+1];
/** * Make sure characters in the CompositionExclusion.txt list do not get * composed to.
*/ publicvoid TestCompositionExclusion() throws Exception { // This list is generated from CompositionExclusion.txt. // Update whenever the normalizer tables are updated. Note // that we test all characters listed, even those that can be // derived from the Unicode DB and are therefore commented // out.
/* * kyuka's note: * Original data seemed to be based on Unicode 3.0.0(the initial * Composition Exclusions list) and seemed to have some mistakes. * Updated in order to correct mistakes and to support Unicode 4.0.0. * And, this table can be used also for Unicode 3.2.0.
*/
String[][] EXCLUDED_UNICODE_3_2_0 = {
{"\u0340"},
{"\u0341"},
{"\u0343"},
{"\u0344"},
{"\u0374"},
{"\u037E"},
{"\u0387"},
{"\u0958"},
{"\u0959", "\u095F"},
{"\u09DC"},
{"\u09DD"},
{"\u09DF"},
{"\u0A33"},
{"\u0A36"},
{"\u0A59", "\u0A5B"},
{"\u0A5E"},
{"\u0B5C"},
{"\u0B5D"},
{"\u0F43"},
{"\u0F4D"},
{"\u0F52"},
{"\u0F57"},
{"\u0F5C"},
{"\u0F69"},
{"\u0F73"},
{"\u0F75"},
{"\u0F76"},
{"\u0F78"},
{"\u0F81"},
{"\u0F93"},
{"\u0F9D"},
{"\u0FA2"},
{"\u0FA7"},
{"\u0FAC"},
{"\u0FB9"},
{"\u1F71"},
{"\u1F73"},
{"\u1F75"},
{"\u1F77"},
{"\u1F79"},
{"\u1F7B"},
{"\u1F7D"},
{"\u1FBB"},
{"\u1FBE"},
{"\u1FC9"},
{"\u1FCB"},
{"\u1FD3"},
{"\u1FDB"},
{"\u1FE3"},
{"\u1FEB"},
{"\u1FEE"},
{"\u1FEF"},
{"\u1FF9"},
{"\u1FFB"},
{"\u1FFD"},
{"\u2000"},
{"\u2001"},
{"\u2126"},
{"\u212A"},
{"\u212B"},
{"\u2329"},
{"\u232A"},
{"\u2ADC"},
{"\uF900", "\uFA0D"},
{"\uFA10"},
{"\uFA12"},
{"\uFA15", "\uFA1E"},
{"\uFA20"},
{"\uFA22"},
{"\uFA25"},
{"\uFA26"},
{"\uFA2A", "\uFA2D"},
{"\uFA30", "\uFA6A"},
{"\uFB1D"},
{"\uFB1F"},
{"\uFB2A", "\uFB36"},
{"\uFB38", "\uFB3C"},
{"\uFB3E"},
{"\uFB40"},
{"\uFB41"},
{"\uFB43"},
{"\uFB44"},
{"\uFB46", "\uFB4E"},
{"\uD834\uDD5E", "\uD834\uDD64"},
{"\uD834\uDDBB", "\uD834\uDDC0"},
{"\uD87E\uDC00", "\uD87E\uDE1D"}
};
String[][] EXCLUDED_LATEST = {
};
for (int i = 0; i < EXCLUDED_UNICODE_3_2_0.length; ++i) { if (EXCLUDED_UNICODE_3_2_0[i].length == 1) {
checkCompositionExclusion_320(EXCLUDED_UNICODE_3_2_0[i][0]);
} else { int from, to;
from = Character.codePointAt(EXCLUDED_UNICODE_3_2_0[i][0], 0);
to = Character.codePointAt(EXCLUDED_UNICODE_3_2_0[i][1], 0);
publicvoid TestExplodingBase() throws Exception{ // \u017f - Latin small letter long s // \u0307 - combining dot above // \u1e61 - Latin small letter s with dot above // \u1e9b - Latin small letter long s with dot above
String[][] canon = { // Input Decomposed Composed
{ "Tschu\u017f", "Tschu\u017f", "Tschu\u017f" },
{ "Tschu\u1e9b", "Tschu\u017f\u0307", "Tschu\u1e9b" },
};
String[][] compat = { // Input Decomposed Composed
{ "\u017f", "s", "s" },
{ "\u1e9b", "s\u0307", "\u1e61" },
};
/* These two are broken in Unicode 2.1.2 but fixed in 2.1.5 and later*/ // ka(Hankaku-Katakana) + ten(Hankaku)
{ "\uFF76\uFF9E", "\u30AB\u3099", "\u30AC" },
if (!output.equals(expect)) {
errln("FAIL: case " + i
+ " expected '" + expect + "' (" + toHexString(expect) + ")"
+ " but got '" + output + "' (" + toHexString(output) + ")"
);
}
}
}
// With Canonical decomposition, Hangul syllables should get decomposed // into Jamo, but Jamo characters should not be decomposed into // conjoining Jamo private String[][] hangulCanon = { // Input Decomposed Composed
{ "\ud4db", "\u1111\u1171\u11b6", "\ud4db" },
{ "\u1111\u1171\u11b6", "\u1111\u1171\u11b6", "\ud4db" },
};
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung ist noch experimentell.