Quelle URLEncoder.java Sprache: JAVA

/*
* Copyright (c) 1995, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/

package java.net;

import java.io.UnsupportedEncodingException;
import java.io.CharArrayWriter;
import java.nio.charset.Charset;
import java.nio.charset.IllegalCharsetNameException;
import java.nio.charset.UnsupportedCharsetException ;
import java.util.BitSet;
import java.util.Objects;

import jdk.internal.util.StaticProperty;

/**
* Utility class for HTML form encoding. This class contains static methods
* for converting a String to the <CODE>application/x-www-form-urlencoded</CODE> MIME
* format. For more information about HTML form encoding, consult the HTML
* <A HREF="http://www.w3.org/TR/html4/">specification</A>.
*
* 
* When encoding a String, the following rules apply:
*
* <ul>
* <li>The alphanumeric characters "{@code a}" through
* "{@code z}", "{@code A}" through
* "{@code Z}" and "{@code 0}"
* through "{@code 9}" remain the same.
* <li>The special characters "{@code .}",
* "{@code -}", "{@code *}", and
* "{@code _}" remain the same.
* <li>The space character " " is
* converted into a plus sign "{@code +}".
* <li>All other characters are unsafe and are first converted into
* one or more bytes using some encoding scheme. Then each byte is
* represented by the 3-character string
* "{@code %xy}", where xy is the
* two-digit hexadecimal representation of the byte.
* The recommended encoding scheme to use is UTF-8. However,
* for compatibility reasons, if an encoding is not specified,
* then the default charset is used.
* </ul>
*
* 
* For example using UTF-8 as the encoding scheme the string "The
* string ü@foo-bar" would get converted to
* "The+string+%C3%BC%40foo-bar" because in UTF-8 the character
* ü is encoded as two bytes C3 (hex) and BC (hex), and the
* character @ is encoded as one byte 40 (hex).
*
* @see Charset#defaultCharset()
*
* @author Herb Jellinek
* @since 1.0
*/
public class URLEncoder {
 static BitSet dontNeedEncoding;
 static final int caseDiff = ('a' - 'A');
 static String dfltEncName;

 static {

 /* The list of characters that are not encoded has been
 * determined as follows:
 *
 * RFC 2396 states:
 * -----
 * Data characters that are allowed in a URI but do not have a
 * reserved purpose are called unreserved. These include upper
 * and lower case letters, decimal digits, and a limited set of
 * punctuation marks and symbols.
 *
 * unreserved = alphanum | mark
 *
 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
 *
 * Unreserved characters can be escaped without changing the
 * semantics of the URI, but this should not be done unless the
 * URI is being used in a context that does not allow the
 * unescaped character to appear.
 * -----
 *
 * It appears that both Netscape and Internet Explorer escape
 * all special characters from this list with the exception
 * of "-", "_", ".", "*". While it is not clear why they are
 * escaping the other characters, perhaps it is safest to
 * assume that there might be contexts in which the others
 * are unsafe if not escaped. Therefore, we will use the same
 * list. It is also noteworthy that this is consistent with
 * O'Reilly's "HTML: The Definitive Guide" (page 164).
 *
 * As a last note, Internet Explorer does not encode the "@"
 * character which is clearly not unreserved according to the
 * RFC. We are being consistent with the RFC in this matter,
 * as is Netscape.
 *
 */

 dontNeedEncoding = new BitSet(256);
 int i;
 for (i = 'a'; i <= 'z'; i++) {
 dontNeedEncoding.set(i);
 }
 for (i = 'A'; i <= 'Z'; i++) {
 dontNeedEncoding.set(i);
 }
 for (i = '0'; i <= '9'; i++) {
 dontNeedEncoding.set(i);
 }
 dontNeedEncoding.set(' '); /* encoding a space to a + is done
 * in the encode() method */
 dontNeedEncoding.set('-');
 dontNeedEncoding.set('_');
 dontNeedEncoding.set('.');
 dontNeedEncoding.set('*');

 dfltEncName = StaticProperty.fileEncoding();
 }

 /**
 * You can't call the constructor.
 */
 private URLEncoder() { }

 /**
 * Translates a string into {@code x-www-form-urlencoded}
 * format. This method uses the default charset
 * as the encoding scheme to obtain the bytes for unsafe characters.
 *
 * @param s {@code String} to be translated.
 * @deprecated The resulting string may vary depending on the
 * default charset. Instead, use the encode(String,String)
 * method to specify the encoding.
 * @return the translated {@code String}.
 */
 @Deprecated
 public static String encode(String s) {

 String str = null;

 try {
 str = encode(s, dfltEncName);
 } catch (UnsupportedEncodingException e) {
 // The system should always have the default charset
 }

 return str;
 }

 /**
 * Translates a string into {@code application/x-www-form-urlencoded}
 * format using a specific encoding scheme.
 * 
 * This method behaves the same as {@linkplain #encode(String s, Charset charset)}
 * except that it will {@linkplain Charset#forName look up the charset}
 * using the given encoding name.
 *
 * @param s {@code String} to be translated.
 * @param enc The name of a supported
 * <a href="../lang/package-summary.html#charenc">character
 * encoding</a>.
 * @return the translated {@code String}.
 * @throws UnsupportedEncodingException
 * If the named encoding is not supported
 * @see URLDecoder#decode(java.lang.String, java.lang.String)
 * @since 1.4
 */
 public static String encode(String s, String enc)
 throws UnsupportedEncodingException {
 if (enc == null) {
 throw new NullPointerException("charsetName");
 }

 try {
 Charset charset = Charset.forName(enc);
 return encode(s, charset);
 } catch (IllegalCharsetNameException | UnsupportedCharsetException e) {
 throw new UnsupportedEncodingException(enc);
 }
 }

 /**
 * Translates a string into {@code application/x-www-form-urlencoded}
 * format using a specific {@linkplain Charset Charset}.
 * This method uses the supplied charset to obtain the bytes for unsafe
 * characters.
 * 
 * Note: The <a href=
 * "http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars">
 * World Wide Web Consortium Recommendation</a> states that
 * UTF-8 should be used. Not doing so may introduce incompatibilities.
 *
 * @param s {@code String} to be translated.
 * @param charset the given charset
 * @return the translated {@code String}.
 * @throws NullPointerException if {@code s} or {@code charset} is {@code null}.
 * @see URLDecoder#decode(java.lang.String, Charset)
 * @since 10
 */
 public static String encode(String s, Charset charset) {
 Objects.requireNonNull(charset, "charset");

 boolean needToChange = false;
 StringBuilder out = new StringBuilder(s.length());
 CharArrayWriter charArrayWriter = new CharArrayWriter();

 for (int i = 0; i < s.length();) {
 int c = s.charAt(i);
 //System.out.println("Examining character: " + c);
 if (dontNeedEncoding.get(c)) {
 if (c == ' ') {
 c = '+';
 needToChange = true;
 }
 //System.out.println("Storing: " + c);
 out.append((char)c);
 i++;
 } else {
 // convert to external encoding before hex conversion
 do {
 charArrayWriter.write(c);
 /*
 * If this character represents the start of a Unicode
 * surrogate pair, then pass in two characters. It's not
 * clear what should be done if a byte reserved in the
 * surrogate pairs range occurs outside of a legal
 * surrogate pair. For now, just treat it as if it were
 * any other character.
 */
 if (c >= 0xD800 && c <= 0xDBFF) {
 /*
 System.out.println(Integer.toHexString(c)
 + " is high surrogate");
 */
 if ( (i+1) < s.length()) {
 int d = s.charAt(i+1);
 /*
 System.out.println("\tExamining "
 + Integer.toHexString(d));
 */
 if (d >= 0xDC00 && d <= 0xDFFF) {
 /*
 System.out.println("\t"
 + Integer.toHexString(d)
 + " is low surrogate");
 */
 charArrayWriter.write(d);
 i++;
 }
 }
 }
 i++;
 } while (i < s.length() && !dontNeedEncoding.get((c = s.charAt(i))));

 charArrayWriter.flush();
 String str = charArrayWriter.toString();
 byte[] ba = str.getBytes(charset);
 for (byte b : ba) {
 out.append('%');
 char ch = Character.forDigit((b >> 4) & 0xF, 16);
 // converting to use uppercase letter as part of
 // the hex value if ch is a letter.
 if (Character.isLetter(ch)) {
 ch -= caseDiff;
 }
 out.append(ch);
 ch = Character.forDigit(b & 0xF, 16);
 if (Character.isLetter(ch)) {
 ch -= caseDiff;
 }
 out.append(ch);
 }
 charArrayWriter.reset();
 needToChange = true;
 }
 }

 return (needToChange? out.toString() : s);
 }
}

quality91%

¤ Dauer der Verarbeitung: 0.2 Sekunden (vorverarbeitet) ¤

Wurzel

Suchen

Beweissystem der NASA

Beweissystem Isabelle

NIST Cobol Testsuite

Cephes Mathematical Library

Wiener Entwicklungsmethode

Haftungshinweis

Die Informationen auf dieser Webseite wurden nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit, noch Qualität der bereit gestellten Informationen zugesichert.

Bemerkung:

Die farbliche Syntaxdarstellung ist noch experimentell.