/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tomcat.util.buf;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
import java.nio.charset.StandardCharsets;
/**
* Encodes characters as bytes using UTF-8. Extracted from Apache Harmony with some minor bug fixes applied.
*/
public class Utf8Encoder
extends CharsetEncoder {
public Utf8Encoder() {
super(StandardCharsets.UTF_8,
1.
1f,
4.
0f);
}
@Override
protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) {
if (in.hasArray() && out.hasArray()) {
return encodeHasArray(in, out);
}
return encodeNotHasArray(in, out);
}
private CoderResult encodeHasArray(CharBuffer in, ByteBuffer out) {
int outRemaining = out.remaining();
int pos = in.position();
int limit = in.limit();
byte[] bArr;
char[] cArr;
int x = pos;
bArr = out.array();
cArr = in.array();
int outPos = out.position();
int rem = in.remaining();
for (x = pos; x < pos + rem; x++) {
int jchar = (cArr[x] &
0xFFFF);
if (jchar <=
0x7F) {
if (outRemaining <
1) {
in.position(x);
out.position(outPos);
return CoderResult.OVERFLOW;
}
bArr[outPos++] = (
byte) (jchar &
0xFF);
outRemaining--;
}
else if (jchar <=
0x7FF) {
if (outRemaining <
2) {
in.position(x);
out.position(outPos);
return CoderResult.OVERFLOW;
}
bArr[outPos++] = (
byte) (
0xC0 + ((jchar >>
6) &
0x1F));
bArr[outPos++] = (
byte) (
0x80 + (jchar &
0x3F));
outRemaining -=
2;
}
else if (jchar >=
0xD800 && jchar <=
0xDFFF) {
// in has to have one byte more.
if (limit <= x +
1) {
in.position(x);
out.position(outPos);
return CoderResult.UNDERFLOW;
}
if (outRemaining <
4) {
in.position(x);
out.position(outPos);
return CoderResult.OVERFLOW;
}
// The surrogate pair starts with a low-surrogate.
if (jchar >=
0xDC00) {
in.position(x);
out.position(outPos);
return CoderResult.malformedForLength(
1);
}
int jchar2 = cArr[x +
1] &
0xFFFF;
// The surrogate pair ends with a high-surrogate.
if (jchar2 <
0xDC00) {
in.position(x);
out.position(outPos);
return CoderResult.malformedForLength(
1);
}
// Note, the Unicode scalar value n is defined
// as follows:
// n = (jchar-0xD800)*0x400+(jchar2-0xDC00)+0x10000
// Where jchar is a high-surrogate,
// jchar2 is a low-surrogate.
int n = (jchar <<
10) + jchar2 +
0xFCA02400;
bArr[outPos++] = (
byte) (
0xF0 + ((n >>
18) &
0x07));
bArr[outPos++] = (
byte) (
0x80 + ((n >>
12) &
0x3F));
bArr[outPos++] = (
byte) (
0x80 + ((n >>
6) &
0x3F));
bArr[outPos++] = (
byte) (
0x80 + (n &
0x3F));
outRemaining -=
4;
x++;
}
else {
if (outRemaining <
3) {
in.position(x);
out.position(outPos);
return CoderResult.OVERFLOW;
}
bArr[outPos++] = (
byte) (
0xE0 + ((jchar >>
12) &
0x0F));
bArr[outPos++] = (
byte) (
0x80 + ((jchar >>
6) &
0x3F));
bArr[outPos++] = (
byte) (
0x80 + (jchar &
0x3F));
outRemaining -=
3;
}
if (outRemaining ==
0) {
in.position(x +
1);
out.position(outPos);
// If both input and output are exhausted, return UNDERFLOW
if (x +
1 == limit) {
return CoderResult.UNDERFLOW;
}
else {
return CoderResult.OVERFLOW;
}
}
}
if (rem !=
0) {
in.position(x);
out.position(outPos);
}
return CoderResult.UNDERFLOW;
}
private CoderResult encodeNotHasArray(CharBuffer in, ByteBuffer out) {
int outRemaining = out.remaining();
int pos = in.position();
int limit = in.limit();
try {
while (pos < limit) {
if (outRemaining ==
0) {
return CoderResult.OVERFLOW;
}
int jchar = (in.get() &
0xFFFF);
if (jchar <=
0x7F) {
if (outRemaining <
1) {
return CoderResult.OVERFLOW;
}
out.put((
byte) jchar);
outRemaining--;
}
else if (jchar <=
0x7FF) {
if (outRemaining <
2) {
return CoderResult.OVERFLOW;
}
out.put((
byte) (
0xC0 + ((jchar >>
6) &
0x1F)));
out.put((
byte) (
0x80 + (jchar &
0x3F)));
outRemaining -=
2;
}
else if (jchar >=
0xD800 && jchar <=
0xDFFF) {
// in has to have one byte more.
if (limit <= pos +
1) {
return CoderResult.UNDERFLOW;
}
if (outRemaining <
4) {
return CoderResult.OVERFLOW;
}
// The surrogate pair starts with a low-surrogate.
if (jchar >=
0xDC00) {
return CoderResult.malformedForLength(
1);
}
int jchar2 = (in.get() &
0xFFFF);
// The surrogate pair ends with a high-surrogate.
if (jchar2 <
0xDC00) {
return CoderResult.malformedForLength(
1);
}
// Note, the Unicode scalar value n is defined
// as follows:
// n = (jchar-0xD800)*0x400+(jchar2-0xDC00)+0x10000
// Where jchar is a high-surrogate,
// jchar2 is a low-surrogate.
int n = (jchar <<
10) + jchar2 +
0xFCA02400;
out.put((
byte) (
0xF0 + ((n >>
18) &
0x07)));
out.put((
byte) (
0x80 + ((n >>
12) &
0x3F)));
out.put((
byte) (
0x80 + ((n >>
6) &
0x3F)));
out.put((
byte) (
0x80 + (n &
0x3F)));
outRemaining -=
4;
pos++;
}
else {
if (outRemaining <
3) {
return CoderResult.OVERFLOW;
}
out.put((
byte) (
0xE0 + ((jchar >>
12) &
0x0F)));
out.put((
byte) (
0x80 + ((jchar >>
6) &
0x3F)));
out.put((
byte) (
0x80 + (jchar &
0x3F)));
outRemaining -=
3;
}
pos++;
}
}
finally {
in.position(pos);
}
return CoderResult.UNDERFLOW;
}
}