/* * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions.
*/
/** * Simple utility to convert from native encoding file to ascii or reverse * including \udddd Unicode notation.
*/ publicclass Native2Ascii { final Charset cs; final CharsetEncoder encoder; public Native2Ascii(Charset cs) { this.cs = cs; this.encoder = cs.newEncoder();
}
// A copy of native2ascii N2AFilter class N2AFilter extends FilterWriter { public N2AFilter(Writer out) { super(out); } publicvoid write(char b) throws IOException { char[] buf = newchar[1];
buf[0] = b;
write(buf, 0, 1);
}
publicvoid write(char[] buf, int off, int len) throws IOException { for (int i = 0; i < len; i++) { if ((buf[i] > '\u007f')) { // write \udddd
out.write('\\');
out.write('u');
String hex = Integer.toHexString(buf[i]);
StringBuilder hex4 = new StringBuilder(hex);
hex4.reverse(); int length = 4 - hex4.length(); for (int j = 0; j < length; j++) {
hex4.append('0');
} for (int j = 0; j < 4; j++) {
out.write(hex4.charAt(3 - j));
}
} else
out.write(buf[i]);
}
}
}
// A copy of native2ascii A2NFilter class A2NFilter extends FilterReader { // maintain a trailing buffer to hold any incompleted // unicode escaped sequences privatechar[] trailChars = null;
public A2NFilter(Reader in) { super(in);
}
publicint read(char[] buf, int off, int len) throws IOException { int numChars = 0; // how many characters have been read int retChars = 0; // how many characters we'll return
char[] cBuf = newchar[len]; int cOffset = 0; // offset at which we'll start reading boolean eof = false;
// copy trailing chars from previous invocation to input buffer if (trailChars != null) { for (int i = 0; i < trailChars.length; i++)
cBuf[i] = trailChars[i];
numChars = trailChars.length;
trailChars = null;
}
int n = in.read(cBuf, numChars, len - numChars); if (n < 0) {
eof = true; if (numChars == 0) return -1; // EOF;
} else {
numChars += n;
}
for (int i = 0; i < numChars; ) { char c = cBuf[i++];
if (c != '\\' || (eof && numChars <= 5)) { // Not a backslash, so copy and continue // Always pass non backslash chars straight thru // for regular encoding. If backslash occurs in // input stream at the final 5 chars then don't // attempt to read-ahead and de-escape since these // are literal occurrences of U+005C which need to // be encoded verbatim in the target encoding.
buf[retChars++] = c; continue;
}
int remaining = numChars - i; if (remaining < 5) { // Might be the first character of a unicode escape, but we // don't have enough characters to tell, so save it and finish
trailChars = newchar[1 + remaining];
trailChars[0] = c; for (int j = 0; j < remaining; j++)
trailChars[1 + j] = cBuf[i + j]; break;
} // At this point we have at least five characters remaining
c = cBuf[i++]; if (c != 'u') { // Not a unicode escape, so copy and continue
buf[retChars++] = '\\';
buf[retChars++] = c; continue;
}
// The next four characters are the hex part of a unicode escape char rc = 0; boolean isUE = true; try {
rc = (char) Integer.parseInt(new String(cBuf, i, 4), 16);
} catch (NumberFormatException x) {
isUE = false;
} if (isUE && encoder.canEncode(rc)) { // We'll be able to convert this
buf[retChars++] = rc;
i += 4; // Align beyond the current uXXXX sequence
} else { // We won't, so just retain the original sequence
buf[retChars++] = '\\';
buf[retChars++] = 'u'; continue;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.