/* * Copyright (c) 1998, 2022, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. Oracle designates this * particular file as subject to the "Classpath" exception as provided * by Oracle in the LICENSE file that accompanied this code. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions.
*/
/* Determine length of this Standard UTF-8 in Modified UTF-8. * Validation is done of the basic UTF encoding rules, returns * length (no change) when errors are detected in the UTF encoding. * * Note: Accepts Modified UTF-8 also, no verification on the * correctness of Standard UTF-8 is done. e,g, 0xC080 input is ok.
*/ int JNICALL utf8sToUtf8mLength(jbyte *string, int length) { int newLength; int i;
newLength = 0; for ( i = 0 ; i < length ; i++ ) { unsigned byte;
byte = (unsignedchar)string[i]; if ( (byte & 0x80) == 0 ) { /* 1byte encoding */
newLength++; if ( byte == 0 ) {
newLength++; /* We gain one byte in length on NULL bytes */
}
} elseif ( (byte & 0xE0) == 0xC0 ) { /* 2byte encoding */ /* Check encoding of following bytes */ if ( (i+1) >= length || (string[i+1] & 0xC0) != 0x80 ) { break; /* Error condition */
}
i++; /* Skip next byte */
newLength += 2;
} elseif ( (byte & 0xF0) == 0xE0 ) { /* 3byte encoding */ /* Check encoding of following bytes */ if ( (i+2) >= length || (string[i+1] & 0xC0) != 0x80
|| (string[i+2] & 0xC0) != 0x80 ) { break; /* Error condition */
}
i += 2; /* Skip next two bytes */
newLength += 3;
} elseif ( (byte & 0xF8) == 0xF0 ) { /* 4byte encoding */ /* Check encoding of following bytes */ if ( (i+3) >= length || (string[i+1] & 0xC0) != 0x80
|| (string[i+2] & 0xC0) != 0x80
|| (string[i+3] & 0xC0) != 0x80 ) { break; /* Error condition */
}
i += 3; /* Skip next 3 bytes */
newLength += 6; /* 4byte encoding turns into 2 3byte ones */
} else { break; /* Error condition */
}
} if ( i != length ) { /* Error in finding new length, return old length so no conversion */ /* FIXUP: ERROR_MESSAGE? */ return length;
} return newLength;
}
/* Convert Standard UTF-8 to Modified UTF-8. * Assumes the UTF-8 encoding was validated by utf8mLength() above. * * Note: Accepts Modified UTF-8 also, no verification on the * correctness of Standard UTF-8 is done. e,g, 0xC080 input is ok.
*/ void JNICALL utf8sToUtf8m(jbyte *string, int length, jbyte *newString, int newLength) { int i; int j;
j = 0; for ( i = 0 ; i < length ; i++ ) { unsigned byte1;
/* Given a Modified UTF-8 string, calculate the Standard UTF-8 length. * Basic validation of the UTF encoding rules is done, and length is * returned (no change) when errors are detected. * * Note: No validation is made that this is indeed Modified UTF-8 coming in. *
*/ int JNICALL utf8mToUtf8sLength(jbyte *string, int length) { int newLength; int i;
newLength = 0; for ( i = 0 ; i < length ; i++ ) { unsigned byte1, byte2, byte3, byte4, byte5, byte6;
byte1 = (unsignedchar)string[i]; if ( (byte1 & 0x80) == 0 ) { /* 1byte encoding */
newLength++;
} elseif ( (byte1 & 0xE0) == 0xC0 ) { /* 2byte encoding */ /* Check encoding of following bytes */ if ( (i+1) >= length || (string[i+1] & 0xC0) != 0x80 ) { break; /* Error condition */
}
byte2 = (unsignedchar)string[++i]; if ( byte1 != 0xC0 || byte2 != 0x80 ) {
newLength += 2; /* Normal 2byte encoding, not 0xC080 */
} else {
newLength++; /* We will turn 0xC080 into 0 */
}
} elseif ( (byte1 & 0xF0) == 0xE0 ) { /* 3byte encoding */ /* Check encoding of following bytes */ if ( (i+2) >= length || (string[i+1] & 0xC0) != 0x80
|| (string[i+2] & 0xC0) != 0x80 ) { break; /* Error condition */
}
byte2 = (unsignedchar)string[++i];
byte3 = (unsignedchar)string[++i];
newLength += 3; /* Possible process a second 3byte encoding */ if ( (i+3) < length && byte1 == 0xED && (byte2 & 0xF0) == 0xA0 ) { /* See if this is a pair of 3byte encodings */
byte4 = (unsignedchar)string[i+1];
byte5 = (unsignedchar)string[i+2];
byte6 = (unsignedchar)string[i+3]; if ( byte4 == 0xED && (byte5 & 0xF0) == 0xB0 ) { /* Check encoding of 3rd byte */ if ( (byte6 & 0xC0) != 0x80 ) { break; /* Error condition */
}
newLength++; /* New string will have 4byte encoding */
i += 3; /* Skip next 3 bytes */
}
}
} else { break; /* Error condition */
}
} if ( i != length ) { /* Error in UTF encoding */ /* FIXUP: ERROR_MESSAGE()? */ return length;
} return newLength;
}
/* Convert a Modified UTF-8 string into a Standard UTF-8 string * It is assumed that this string has been validated in terms of the * basic UTF encoding rules by utf8Length() above. * * Note: No validation is made that this is indeed Modified UTF-8 coming in. *
*/ void JNICALL utf8mToUtf8s(jbyte *string, int length, jbyte *newString, int newLength) { int i; int j;
j = 0; for ( i = 0 ; i < length ; i++ ) { unsigned byte1, byte2, byte3, byte4, byte5, byte6;
if (intCodePage == -1) { // First call, get codepage from the os
langID = LANGIDFROMLCID(GetUserDefaultLCID());
localeID = MAKELCID(langID, SORT_DEFAULT); if (GetLocaleInfo(localeID, LOCALE_IDEFAULTANSICODEPAGE,
strCodePage, sizeof(strCodePage)/sizeof(TCHAR)) > 0 ) {
intCodePage = atoi(strCodePage);
} else {
intCodePage = GetACP();
}
}
return intCodePage;
}
/* * Get wide string (assumes len>0)
*/ static WCHAR* getWideString(UINT codePage, char* str, int len, int *pwlen) { int wlen;
WCHAR* wstr;
/* Convert the string to WIDE string */
wlen = MultiByteToWideChar(codePage, 0, str, len, NULL, 0);
*pwlen = wlen; if (wlen <= 0) {
UTF_ERROR(("Can't get WIDE string length")); return NULL;
}
wstr = (WCHAR*)malloc(wlen * sizeof(WCHAR)); if (wstr == NULL) {
UTF_ERROR(("Can't malloc() any space")); return NULL;
} if (MultiByteToWideChar(codePage, 0, str, len, wstr, wlen) == 0) {
UTF_ERROR(("Can't get WIDE string")); return NULL;
} return wstr;
}
/* * Convert UTF-8 to a platform string * NOTE: outputBufSize includes the space for the trailing 0.
*/ int JNICALL utf8ToPlatform(jbyte *utf8, int len, char* output, int outputBufSize) { int wlen; int plen;
WCHAR* wstr;
UINT codepage; int outputMaxLen;
UTF_ASSERT(utf8);
UTF_ASSERT(output);
UTF_ASSERT(len >= 0);
UTF_ASSERT(outputBufSize > len);
outputMaxLen = outputBufSize - 1; // leave space for trailing 0
/* Zero length is ok, but we don't need to do much */ if ( len == 0 ) {
output[0] = 0; return 0;
}
/* Get WIDE string version (assumes len>0) */
wstr = getWideString(CP_UTF8, (char*)utf8, len, &wlen); if ( wstr == NULL ) { // Can't allocate WIDE string goto just_copy_bytes;
}
/* * Convert Platform Encoding to UTF-8. * NOTE: outputBufSize includes the space for the trailing 0.
*/ int JNICALL utf8FromPlatform(char *str, int len, jbyte *output, int outputBufSize) { int wlen; int plen;
WCHAR* wstr;
UINT codepage; int outputMaxLen;
UTF_ASSERT(str);
UTF_ASSERT(output);
UTF_ASSERT(len >= 0);
UTF_ASSERT(outputBufSize > len);
outputMaxLen = outputBufSize - 1; // leave space for trailing 0
/* Zero length is ok, but we don't need to do much */ if ( len == 0 ) {
output[0] = 0; return 0;
}
/* Get WIDE string version (assumes len>0) */
codepage = getCodepage();
wstr = getWideString(codepage, str, len, &wlen); if ( wstr == NULL ) { goto just_copy_bytes;
}
/* * Do iconv() conversion. * Returns length or -1 if output overflows. * NOTE: outputBufSize includes the space for the trailing 0.
*/ staticint iconvConvert(conv_direction drn, char *bytes, size_t len, char *output, size_t outputBufSize) {
UTF_ASSERT(bytes);
UTF_ASSERT(output);
UTF_ASSERT(outputBufSize > len);
outputMaxLen = outputBufSize - 1; // leave space for trailing 0
/* Zero length is ok, but we don't need to do much */ if ( len == 0 ) {
output[0] = 0; return 0;
}
if (codeset == NULL && codeset != (char *) -1) { // locale is not initialized, do it now if (setlocale(LC_ALL, "") != NULL) { // nl_langinfo returns ANSI_X3.4-1968 by default
codeset = (char*)nl_langinfo(CODESET);
}
if (codeset == NULL) { // Not able to initialize process locale from platform one.
codeset = (char *) -1;
}
}
if (codeset == (char *) -1) { // There was an error during initialization, so just bail out goto just_copy_bytes;
}
func = (drn == TO_UTF8) ? iconv_open(codeset, "UTF-8") : iconv_open("UTF-8", codeset); if (func == (iconv_t) -1) { // Requested charset combination is not supported, conversion couldn't be done. // make sure we will not try it again
codeset = (char *) -1; goto just_copy_bytes;
}
/* * Convert UTF-8 to Platform Encoding. * Returns length or -1 if output overflows. * NOTE: outputBufSize includes the space for the trailing 0.
*/ int JNICALL utf8ToPlatform(jbyte *utf8, int len, char *output, int outputBufSize) { return iconvConvert(FROM_UTF8, (char*)utf8, len, output, outputBufSize);
}
/* * Convert Platform Encoding to UTF-8. * Returns length or -1 if output overflows. * NOTE: outputBufSize includes the space for the trailing 0.
*/ int JNICALL utf8FromPlatform(char *str, int len, jbyte *output, int outputBufSize) { return iconvConvert(TO_UTF8, str, len, (char*) output, outputBufSize);
}
#endif
Messung V0.5
¤ Dauer der Verarbeitung: 0.14 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.