From 40a9d99496e098562f090fb7ffce9e749011b131 Mon Sep 17 00:00:00 2001 From: Benjamin Culkin Date: Mon, 20 May 2024 17:58:16 -0400 Subject: Formatting pass --- .../src/main/java/net/wotonomy/web/URI.java | 6836 ++++++++++---------- 1 file changed, 3510 insertions(+), 3326 deletions(-) (limited to 'projects/net.wotonomy.web/src/main/java/net/wotonomy/web/URI.java') diff --git a/projects/net.wotonomy.web/src/main/java/net/wotonomy/web/URI.java b/projects/net.wotonomy.web/src/main/java/net/wotonomy/web/URI.java index 41f77f5..ba608a4 100644 --- a/projects/net.wotonomy.web/src/main/java/net/wotonomy/web/URI.java +++ b/projects/net.wotonomy.web/src/main/java/net/wotonomy/web/URI.java @@ -59,7 +59,7 @@ * * [Additional notices, if required by prior licensing conditions] * - */ + */ // excellent class borrowed from Apache Commons project: //package org.apache.commons.httpclient; @@ -80,30 +80,39 @@ import sun.security.action.GetPropertyAction; /** * The interface for the URI(Uniform Resource Identifiers) version of RFC 2396. * This class has the purpose of supportting of parsing a URI reference to - * extend any specific protocols, the character encoding of the protocol to - * be transported and the charset of the document. + * extend any specific protocols, the character encoding of the protocol to be + * transported and the charset of the document. *

* A URI is always in an "escaped" form, since escaping or unescaping a - * completed URI might change its semantics. + * completed URI might change its semantics. *

- * Implementers should be careful not to escape or unescape the same string - * more than once, since unescaping an already unescaped string might lead to - * misinterpreting a percent data character as another escaped character, - * or vice versa in the case of escaping an already escaped string. + * Implementers should be careful not to escape or unescape the same string more + * than once, since unescaping an already unescaped string might lead to + * misinterpreting a percent data character as another escaped character, or + * vice versa in the case of escaping an already escaped string. *

* In order to avoid these problems, data types used as follows: - *

+ * 

+ *

+ * + *
  *   URI character sequence: char
  *   octet sequence: byte
  *   original character sequence: String
- * 

+ *

+ * + *
+ *

* - * So, a URI is a sequence of characters as an array of a char type, which - * is not always represented as a sequence of octets as an array of byte. + * So, a URI is a sequence of characters as an array of a char type, which is + * not always represented as a sequence of octets as an array of byte. *

* * URI Syntactic Components - *

+ * 

+ *

+ * + *
  * - In general, written as follows:
  *   Absolute URI = <scheme>:<scheme-specific-part>
  *   Generic URI = <scheme>://<authority><path>?<query>
@@ -113,9 +122,13 @@ import sun.security.action.GetPropertyAction;
  *   hier_part     = ( net_path | abs_path ) [ "?" query ]
  *   net_path      = "//" authority [ abs_path ]
  *   abs_path      = "/"  path_segments
- * 

+ *

+ * + *
+ *

* * The following examples illustrate URI that are in common use. + * *

  * ftp://ftp.is.co.za/rfc/rfc1808.txt
  *    -- ftp scheme for File Transfer Protocol services
@@ -130,11 +143,14 @@ import sun.security.action.GetPropertyAction;
  * telnet://melvyl.ucop.edu/
  *    -- telnet scheme for interactive services via the TELNET Protocol
  * 
+ * * Please, notice that there are many modifications from URL(RFC 1738) and * relative URL(RFC 1808). *

* The expressions for a URI - *

+ * 

+ * + *

  * For escaped URI forms
  *  - URI(char[]) // constructor
  *  - char[] getRawXxx() // method
@@ -144,3321 +160,3489 @@ import sun.security.action.GetPropertyAction;
  * For unescaped URI forms
  *  - URI(String) // constructor
  *  - String getXXX() // method
- * 

+ *

+ *

* * @author Sung-Gu - * @version $Revision: 905 $ $Date: 2002/03/14 15:14:01 + * @version $Revision: 905 $ $Date: 2002/03/14 15:14:01 */ class URI implements Cloneable, Comparable, Serializable { - - // ----------------------------------------------------------- Constructors - - protected URI() { - } - - /** - * Construct a URI as an escaped form of a character array. - * An URI can be placed within double-quotes or angle brackets like - * "http://test.com/" and <http://test.com/> - * - * @param escaped the URI character sequence - * @exception IOException - * @throws NullPointerException if escaped is null - */ - public URI(char[] escaped) throws IOException { - parseUriReference(new String(escaped), true); - } - - - /** - * Construct a URI from the given string. - *

-     *   URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
-     * 

- * An URI can be placed within double-quotes or angle brackets like - * "http://test.com/" and <http://test.com/> - * - * @param original the string to be represented to URI character sequence - * It is one of absoluteURI and relativeURI. - * @exception IOException - */ - public URI(String original) throws IOException { - parseUriReference(original, false); - } - - /** - * Construct a URI from a URL. - * - * @param url a valid URL. - * @throws IOException - * @since 2.0 - */ - public URI(URL url) throws IOException { - this(url.toString()); - } - - - /** - * Construct a general URI from the given components. - *

-     *   URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
-     *   absoluteURI   = scheme ":" ( hier_part | opaque_part )
-     *   opaque_part   = uric_no_slash *uric
-     * 

- * It's for absolute URI = <scheme>:<scheme-specific-part># - * <fragment>. - * - * @param scheme the scheme string - * @param scheme_specific_part scheme_specific_part - * @param fragment the fragment string - * @exception IOException - */ - public URI(String scheme, String scheme_specific_part, String fragment) - throws IOException { - - // validate and contruct the URI character sequence - if (scheme == null) { - throw new IOException(/*IOException.PARSING,*/ "URI: scheme required"); - } - char[] s = scheme.toLowerCase().toCharArray(); - if (validate(s, URI.scheme)) { - _scheme = s; // is_absoluteURI - } else { - throw new IOException(/*IOException.PARSING,*/ "URI: incorrect scheme"); - } - _opaque = encode(scheme_specific_part, allowed_opaque_part); - // Set flag - _is_opaque_part = true; - setUriReference(); - } - - - /** - * Construct a general URI from the given components. - *

-     *   URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
-     *   absoluteURI   = scheme ":" ( hier_part | opaque_part )
-     *   relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ]
-     *   hier_part     = ( net_path | abs_path ) [ "?" query ]
-     * 

- * It's for absolute URI = <scheme>:<path>?<query>#< - * fragment> and relative URI = <path>?<query>#<fragment - * >. - * - * @param scheme the scheme string - * @param authority the authority string - * @param path the path string - * @param query the query string - * @param fragment the fragment string - * @exception IOException - */ - public URI(String scheme, String authority, String path, String query, - String fragment) throws IOException { - - // validate and contruct the URI character sequence - StringBuffer buff = new StringBuffer(); - if (scheme != null) { - buff.append(scheme); - buff.append(':'); - } - if (authority != null) { - buff.append("//"); - buff.append(authority); - } - if (path != null) { // accept empty path - if ((scheme != null || authority != null) - && !path.startsWith("/")) { - throw new IOException(/*IOException.PARSING*,*/ - "URI: abs_path requested"); - } - buff.append(path); - } - if (query != null) { - buff.append('?'); - buff.append(query); - } - if (fragment != null) { - buff.append('#'); - buff.append(fragment); - } - parseUriReference(buff.toString(), false); - } - - - /** - * Construct a general URI from the given components. - * - * @param scheme the scheme string - * @param userinfo the userinfo string - * @param host the host string - * @param port the port number - * @exception IOException - */ - public URI(String scheme, String userinfo, String host, int port) - throws IOException { - - this(scheme, userinfo, host, port, null, null, null); - } - - - /** - * Construct a general URI from the given components. - * - * @param scheme the scheme string - * @param userinfo the userinfo string - * @param host the host string - * @param port the port number - * @param path the path string - * @exception IOException - */ - public URI(String scheme, String userinfo, String host, int port, - String path) throws IOException { - - this(scheme, userinfo, host, port, path, null, null); - } - - - /** - * Construct a general URI from the given components. - * - * @param scheme the scheme string - * @param userinfo the userinfo string - * @param host the host string - * @param port the port number - * @param path the path string - * @param query the query string - * @exception IOException - */ - public URI(String scheme, String userinfo, String host, int port, - String path, String query) throws IOException { - - this(scheme, userinfo, host, port, path, query, null); - } - - - /** - * Construct a general URI from the given components. - * - * @param scheme the scheme string - * @param userinfo the userinfo string - * @param host the host string - * @param port the port number - * @param path the path string - * @param query the query string - * @param fragment the fragment string - * @exception IOException - */ - public URI(String scheme, String userinfo, String host, int port, - String path, String query, String fragment) throws IOException { - - this(scheme, (host == null) ? null : - ((userinfo != null) ? userinfo + '@' : "") + host + - ((port != -1) ? ":" + port : ""), path, query, fragment); - } - - - /** - * Construct a general URI from the given components. - * - * @param scheme the scheme string - * @param host the host string - * @param path the path string - * @param fragment the fragment string - * @exception IOException - */ - public URI(String scheme, String host, String path, String fragment) - throws IOException { - - this(scheme, host, path, null, fragment); - } - - - /** - * Construct a general URI with the given relative URI string. - * - * @param base the base URI - * @param relative the relative URI string - * @exception IOException - */ - public URI(URI base, String relative) throws IOException { - this(base, new URI(relative)); - } - - - /** - * Construct a general URI with the given relative URI. - *

-     *   URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
-     *   relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ]
-     * 

- * Resolving Relative References to Absolute Form. - * - * Examples of Resolving Relative URI References - * - * Within an object with a well-defined base URI of - *

-     *   http://a/b/c/d;p?q
-     * 

- * the relative URI would be resolved as follows: - * - * Normal Examples - * - *

-     *   g:h           =  g:h
-     *   g             =  http://a/b/c/g
-     *   ./g           =  http://a/b/c/g
-     *   g/            =  http://a/b/c/g/
-     *   /g            =  http://a/g
-     *   //g           =  http://g
-     *   ?y            =  http://a/b/c/?y
-     *   g?y           =  http://a/b/c/g?y
-     *   #s            =  (current document)#s
-     *   g#s           =  http://a/b/c/g#s
-     *   g?y#s         =  http://a/b/c/g?y#s
-     *   ;x            =  http://a/b/c/;x
-     *   g;x           =  http://a/b/c/g;x
-     *   g;x?y#s       =  http://a/b/c/g;x?y#s
-     *   .             =  http://a/b/c/
-     *   ./            =  http://a/b/c/
-     *   ..            =  http://a/b/
-     *   ../           =  http://a/b/
-     *   ../g          =  http://a/b/g
-     *   ../..         =  http://a/
-     *   ../../        =  http://a/ 
-     *   ../../g       =  http://a/g
-     * 

- * - * Some URI schemes do not allow a hierarchical syntax matching the - * syntax, and thus cannot use relative references. - * - * @param base the base URI - * @param relative the relative URI - * @exception IOException - */ - public URI(URI base, URI relative) throws IOException { - - if (base._scheme == null) { - throw new IOException(/* IOException.PARSING,*/ "URI: base URI required"); - } - if (base._scheme != null) { - this._scheme = base._scheme; - this._authority = base._authority; - } - if (base._is_opaque_part || relative._is_opaque_part) { - this._scheme = base._scheme; - this._is_opaque_part = relative._is_opaque_part; - this._opaque = relative._opaque; - this._fragment = relative._fragment; - this.setUriReference(); - return; - } - if (relative._scheme != null) { - this._scheme = relative._scheme; - this._is_net_path = relative._is_net_path; - this._authority = relative._authority; - if (relative._is_server) { - this._userinfo = relative._userinfo; - this._host = relative._host; - this._port = relative._port; - } else if (relative._is_reg_name) { - this._is_reg_name = relative._is_reg_name; - } - this._is_abs_path = relative._is_abs_path; - this._is_rel_path = relative._is_rel_path; - this._path = relative._path; - } else if (base._authority != null && relative._scheme == null) { - this._is_net_path = base._is_net_path; - this._authority = base._authority; - if (base._is_server) { - this._userinfo = base._userinfo; - this._host = base._host; - this._port = base._port; - } else if (base._is_reg_name) { - this._is_reg_name = base._is_reg_name; - } - } - if (relative._authority != null) { - this._is_net_path = relative._is_net_path; - this._authority = relative._authority; - if (relative._is_server) { - this._is_server = relative._is_server; - this._userinfo = relative._userinfo; - this._host = relative._host; - this._port = relative._port; - } else if (relative._is_reg_name) { - this._is_reg_name = relative._is_reg_name; - } - this._is_abs_path = relative._is_abs_path; - this._is_rel_path = relative._is_rel_path; - this._path = relative._path; - } - // resolve the path - if (relative._scheme == null && relative._authority == null || - equals(base._scheme, relative._scheme)) { - this._path = resolvePath(base._path, relative._path); - } - // base._query removed - if (relative._query != null) { - this._query = relative._query; - } - // base._fragment removed - if (relative._fragment != null) { - this._fragment = relative._fragment; - } - this.setUriReference(); - } - - // --------------------------------------------------- Instance Variables - - static final long serialVersionUID = 604752400577948726L; - - - /** - * This Uniform Resource Identifier (URI). - * The URI is always in an "escaped" form, since escaping or unescaping - * a completed URI might change its semantics. - */ - protected char[] _uri = null; - - - /** - * The default charset of the protocol. RFC 2277, 2396 - */ - protected static String _protocolCharset = "UTF-8"; - - - /** - * The default charset of the document. RFC 2277, 2396 - * The platform's charset is used for the document by default. - */ - protected static String _documentCharset = null; - // Static initializer for _documentCharset - static { - Locale locale = Locale.getDefault(); - if (locale != null) { - // in order to support backward compatiblity - _documentCharset = LocaleToCharsetMap.getCharset(locale); - } else { - _documentCharset = (String)AccessController.doPrivileged( - new GetPropertyAction("file.encoding")); - } - } - - /** - * The scheme. - */ - protected char[] _scheme = null; - - - /** - * The opaque. - */ - protected char[] _opaque = null; - - - /** - * The authority. - */ - protected char[] _authority = null; - - - /** - * The userinfo. - */ - protected char[] _userinfo = null; - - - /** - * The host. - */ - protected char[] _host = null; - - - /** - * The port. - */ - protected int _port = -1; - - - /** - * The path. - */ - protected char[] _path = null; - - - /** - * The query. - */ - protected char[] _query = null; - - - /** - * The fragment. - */ - protected char[] _fragment = null; - - - /** - * The root path. - */ - protected static char[] rootPath = { '/' }; - - // ---------------------- Generous characters for each component validation - - /** - * The percent "%" character always has the reserved purpose of being the - * escape indicator, it must be escaped as "%25" in order to be used as - * data within a URI. - */ - protected static final BitSet percent = new BitSet(256); - // Static initializer for percent - static { - percent.set('%'); - } - - - /** - * BitSet for digit. - *

-     * digit    = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |
-     *            "8" | "9"
-     * 

- */ - protected static final BitSet digit = new BitSet(256); - // Static initializer for digit - static { - for(int i = '0'; i <= '9'; i++) { - digit.set(i); - } - } - - - /** - * BitSet for alpha. - *

-     * alpha         = lowalpha | upalpha
-     * 

- */ - protected static final BitSet alpha = new BitSet(256); - // Static initializer for alpha - static { - for (int i = 'a'; i <= 'z'; i++) { - alpha.set(i); - } - for (int i = 'A'; i <= 'Z'; i++) { - alpha.set(i); - } - } - - - /** - * BitSet for alphanum (join of alpha & digit). - *

-     *  alphanum      = alpha | digit
-     * 

- */ - protected static final BitSet alphanum = new BitSet(256); - // Static initializer for alphanum - static { - alphanum.or(alpha); - alphanum.or(digit); - } - - - /** - * BitSet for hex. - *

-     * hex           = digit | "A" | "B" | "C" | "D" | "E" | "F" |
-     *                         "a" | "b" | "c" | "d" | "e" | "f"
-     * 

- */ - protected static final BitSet hex = new BitSet(256); - // Static initializer for hex - static { - hex.or(digit); - for(int i = 'a'; i <= 'f'; i++) { - hex.set(i); - } - for(int i = 'A'; i <= 'F'; i++) { - hex.set(i); - } - } - - - /** - * BitSet for escaped. - *

-     * escaped       = "%" hex hex
-     * 

- */ - protected static final BitSet escaped = new BitSet(256); - // Static initializer for escaped - static { - escaped.or(percent); - escaped.or(hex); - } - - - /** - * BitSet for mark. - *

-     * mark          = "-" | "_" | "." | "!" | "~" | "*" | "'" |
-     *                 "(" | ")"
-     * 

- */ - protected static final BitSet mark = new BitSet(256); - // Static initializer for mark - static { - mark.set('-'); - mark.set('_'); - mark.set('.'); - mark.set('!'); - mark.set('~'); - mark.set('*'); - mark.set('\''); - mark.set('('); - mark.set(')'); - } - - - /** - * Data characters that are allowed in a URI but do not have a reserved - * purpose are called unreserved. - *

-     * unreserved    = alphanum | mark
-     * 

- */ - protected static final BitSet unreserved = new BitSet(256); - // Static initializer for unreserved - static { - unreserved.or(alphanum); - unreserved.or(mark); - } - - - /** - * BitSet for reserved. - *

-     * reserved      = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
-     *                 "$" | ","
-     * 

- */ - protected static final BitSet reserved = new BitSet(256); - // Static initializer for reserved - static { - reserved.set(';'); - reserved.set('/'); - reserved.set('?'); - reserved.set(':'); - reserved.set('@'); - reserved.set('&'); - reserved.set('='); - reserved.set('+'); - reserved.set('$'); - reserved.set(','); - } - - - /** - * BitSet for uric. - *

-     * uric          = reserved | unreserved | escaped
-     * 

- */ - protected static final BitSet uric = new BitSet(256); - // Static initializer for uric - static { - uric.or(reserved); - uric.or(unreserved); - uric.or(escaped); - } - - - /** - * BitSet for fragment (alias for uric). - *

-     * fragment      = *uric
-     * 

- */ - protected static final BitSet fragment = uric; - - - /** - * BitSet for query (alias for uric). - *

-     * query         = *uric
-     * 

- */ - protected static final BitSet query = uric; - - - /** - * BitSet for pchar. - *

-     * pchar         = unreserved | escaped |
-     *                 ":" | "@" | "&" | "=" | "+" | "$" | ","
-     * 

- */ - protected static final BitSet pchar = new BitSet(256); - // Static initializer for pchar - static { - pchar.or(unreserved); - pchar.or(escaped); - pchar.set(':'); - pchar.set('@'); - pchar.set('&'); - pchar.set('='); - pchar.set('+'); - pchar.set('$'); - pchar.set(','); - } - - - /** - * BitSet for param (alias for pchar). - *

-     * param         = *pchar
-     * 

- */ - protected static final BitSet param = pchar; - - - /** - * BitSet for segment. - *

-     * segment       = *pchar *( ";" param )
-     * 

- */ - protected static final BitSet segment = new BitSet(256); - // Static initializer for segment - static { - segment.or(pchar); - segment.set(';'); - segment.or(param); - } - - - /** - * BitSet for path segments. - *

-     * path_segments = segment *( "/" segment )
-     * 

- */ - protected static final BitSet path_segments = new BitSet(256); - // Static initializer for path_segments - static { - path_segments.set('/'); - path_segments.or(segment); - } - - - /** - * URI absolute path. - *

-     * abs_path      = "/"  path_segments
-     * 

- */ - protected static final BitSet abs_path = new BitSet(256); - // Static initializer for abs_path - static { - abs_path.set('/'); - abs_path.or(path_segments); - } - - - /** - * URI bitset for encoding typical non-slash characters. - *

-     * uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
-     *                 "&" | "=" | "+" | "$" | ","
-     * 

- */ - protected static final BitSet uric_no_slash = new BitSet(256); - // Static initializer for uric_no_slash - static { - uric_no_slash.or(unreserved); - uric_no_slash.or(escaped); - uric_no_slash.set(';'); - uric_no_slash.set('?'); - uric_no_slash.set(';'); - uric_no_slash.set('@'); - uric_no_slash.set('&'); - uric_no_slash.set('='); - uric_no_slash.set('+'); - uric_no_slash.set('$'); - uric_no_slash.set(','); - } - - - /** - * URI bitset that combines uric_no_slash and uric. - *

-     * opaque_part   = uric_no_slash *uric
-     * 

- */ - protected static final BitSet opaque_part = new BitSet(256); - // Static initializer for opaque_part - static { - opaque_part.or(uric_no_slash); - opaque_part.or(uric); - } - - - /** - * URI bitset that combines absolute path and opaque part. - *

-     * path          = [ abs_path | opaque_part ]
-     * 

- */ - protected static final BitSet path = new BitSet(256); - // Static initializer for path - static { - path.or(abs_path); - path.or(opaque_part); - } - - - /** - * Port, a logical alias for digit. - */ - protected static final BitSet port = digit; - - - /** - * Bitset that combines digit and dot fo IPv$address. - *

-     * IPv4address   = 1*digit "." 1*digit "." 1*digit "." 1*digit
-     * 

- */ - protected static final BitSet IPv4address = new BitSet(256); - // Static initializer for IPv4address - static { - IPv4address.or(digit); - IPv4address.set('.'); - } - - - /** - * RFC 2373. - *

-     * IPv6address = hexpart [ ":" IPv4address ]
-     * 

- */ - protected static final BitSet IPv6address = new BitSet(256); - // Static initializer for IPv6address reference - static { - IPv6address.or(hex); // hexpart - IPv6address.set(':'); - IPv6address.or(IPv4address); - } - - - /** - * RFC 2732, 2373. - *

-     * IPv6reference   = "[" IPv6address "]"
-     * 

- */ - protected static final BitSet IPv6reference = new BitSet(256); - // Static initializer for IPv6reference - static { - IPv6reference.set('['); - IPv6reference.or(IPv6address); - IPv6reference.set(']'); - } - - - /** - * BitSet for toplabel. - *

-     * toplabel      = alpha | alpha *( alphanum | "-" ) alphanum
-     * 

- */ - protected static final BitSet toplabel = new BitSet(256); - // Static initializer for toplabel - static { - toplabel.or(alphanum); - toplabel.set('-'); - } - - - /** - * BitSet for domainlabel. - *

-     * domainlabel   = alphanum | alphanum *( alphanum | "-" ) alphanum
-     * 

- */ - protected static final BitSet domainlabel = toplabel; - - - /** - * BitSet for hostname. - *

-     * hostname      = *( domainlabel "." ) toplabel [ "." ]
-     * 

- */ - protected static final BitSet hostname = new BitSet(256); - // Static initializer for hostname - static { - hostname.or(toplabel); - // hostname.or(domainlabel); - hostname.set('.'); - } - - - /** - * BitSet for host. - *

-     * host          = hostname | IPv4address | IPv6reference
-     * 

- */ - protected static final BitSet host = new BitSet(256); - // Static initializer for host - static { - host.or(hostname); - // host.or(IPv4address); - host.or(IPv6reference); // IPv4address - } - - - /** - * BitSet for hostport. - *

-     * hostport      = host [ ":" port ]
-     * 

- */ - protected static final BitSet hostport = new BitSet(256); - // Static initializer for hostport - static { - hostport.or(host); - hostport.set(':'); - hostport.or(port); - } - - - /** - * Bitset for userinfo. - *

-     * userinfo      = *( unreserved | escaped |
-     *                    ";" | ":" | "&" | "=" | "+" | "$" | "," )
-     * 

- */ - protected static final BitSet userinfo = new BitSet(256); - // Static initializer for userinfo - static { - userinfo.or(unreserved); - userinfo.or(escaped); - userinfo.set(';'); - userinfo.set(':'); - userinfo.set('&'); - userinfo.set('='); - userinfo.set('+'); - userinfo.set('$'); - userinfo.set(','); - } - - - /** - * BitSet for within the userinfo component like user and password. - */ - public static final BitSet within_userinfo = new BitSet(256); - // Static initializer for within_userinfo - static { - within_userinfo.or(userinfo); - within_userinfo.clear(';'); // reserved within authority - within_userinfo.clear(':'); - within_userinfo.clear('@'); - within_userinfo.clear('?'); - within_userinfo.clear('/'); - } - - - /** - * Bitset for server. - *

-     * server        = [ [ userinfo "@" ] hostport ]
-     * 

- */ - protected static final BitSet server = new BitSet(256); - // Static initializer for server - static { - server.or(userinfo); - server.set('@'); - server.or(hostport); - } - - - /** - * BitSet for reg_name. - *

-     * reg_name      = 1*( unreserved | escaped | "$" | "," |
-     *                     ";" | ":" | "@" | "&" | "=" | "+" )
-     * 

- */ - protected static final BitSet reg_name = new BitSet(256); - // Static initializer for reg_name - static { - reg_name.or(unreserved); - reg_name.or(escaped); - reg_name.set('$'); - reg_name.set(','); - reg_name.set(';'); - reg_name.set(':'); - reg_name.set('@'); - reg_name.set('&'); - reg_name.set('='); - reg_name.set('+'); - } - - - /** - * BitSet for authority. - *

-     * authority     = server | reg_name
-     * 

- */ - protected static final BitSet authority = new BitSet(256); - // Static initializer for authority - static { - authority.or(server); - authority.or(reg_name); - } - - - /** - * BitSet for scheme. - *

-     * scheme        = alpha *( alpha | digit | "+" | "-" | "." )
-     * 

- */ - protected static final BitSet scheme = new BitSet(256); - // Static initializer for scheme - static { - scheme.or(alpha); - scheme.or(digit); - scheme.set('+'); - scheme.set('-'); - scheme.set('.'); - } - - - /** - * BitSet for rel_segment. - *

-     * rel_segment   = 1*( unreserved | escaped |
-     *                     ";" | "@" | "&" | "=" | "+" | "$" | "," )
-     * 

- */ - protected static final BitSet rel_segment = new BitSet(256); - // Static initializer for rel_segment - static { - rel_segment.or(unreserved); - rel_segment.or(escaped); - rel_segment.set(';'); - rel_segment.set('@'); - rel_segment.set('&'); - rel_segment.set('='); - rel_segment.set('+'); - rel_segment.set('$'); - rel_segment.set(','); - } - - - /** - * BitSet for rel_path. - *

-     * rel_path      = rel_segment [ abs_path ]
-     * 

- */ - protected static final BitSet rel_path = new BitSet(256); - // Static initializer for rel_path - static { - rel_path.or(rel_segment); - rel_path.or(abs_path); - } - - - /** - * BitSet for net_path. - *

-     * net_path      = "//" authority [ abs_path ]
-     * 

- */ - protected static final BitSet net_path = new BitSet(256); - // Static initializer for net_path - static { - net_path.set('/'); - net_path.or(authority); - net_path.or(abs_path); - } - - - /** - * BitSet for hier_part. - *

-     * hier_part     = ( net_path | abs_path ) [ "?" query ]
-     * 

- */ - protected static final BitSet hier_part = new BitSet(256); - // Static initializer for hier_part - static { - hier_part.or(net_path); - hier_part.or(abs_path); - // hier_part.set('?'); aleady included - hier_part.or(query); - } - - - /** - * BitSet for relativeURI. - *

-     * relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ]
-     * 

- */ - protected static final BitSet relativeURI = new BitSet(256); - // Static initializer for relativeURI - static { - relativeURI.or(net_path); - relativeURI.or(abs_path); - relativeURI.or(rel_path); - // relativeURI.set('?'); aleady included - relativeURI.or(query); - } - - - /** - * BitSet for absoluteURI. - *

-     * absoluteURI   = scheme ":" ( hier_part | opaque_part )
-     * 

- */ - protected static final BitSet absoluteURI = new BitSet(256); - // Static initializer for absoluteURI - static { - absoluteURI.or(scheme); - absoluteURI.set(':'); - absoluteURI.or(hier_part); - absoluteURI.or(opaque_part); - } - - - /** - * BitSet for URI-reference. - *

-     * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
-     * 

- */ - protected static final BitSet URI_reference = new BitSet(256); - // Static initializer for URI_reference - static { - URI_reference.or(absoluteURI); - URI_reference.or(relativeURI); - URI_reference.set('#'); - URI_reference.or(fragment); - } - - // ---------------------------- Characters disallowed within the URI syntax - // Excluded US-ASCII Characters are like control, space, delims and unwise - - /** - * BitSet for control. - */ - public static final BitSet control = new BitSet(256); - // Static initializer for control - static { - for (int i = 0; i <= 0x1F; i++) { - control.set(i); - } - control.set(0x7F); - } - - /** - * BitSet for space. - */ - public static final BitSet space = new BitSet(256); - // Static initializer for space - static { - space.set(0x20); - } - - - /** - * BitSet for delims. - */ - public static final BitSet delims = new BitSet(256); - // Static initializer for delims - static { - delims.set('<'); - delims.set('>'); - delims.set('#'); - delims.set('%'); - delims.set('"'); - } - - - /** - * BitSet for unwise. - */ - public static final BitSet unwise = new BitSet(256); - // Static initializer for unwise - static { - unwise.set('{'); - unwise.set('}'); - unwise.set('|'); - unwise.set('\\'); - unwise.set('^'); - unwise.set('['); - unwise.set(']'); - unwise.set('`'); - } - - - /** - * Disallowed rel_path before escaping. - */ - public static final BitSet disallowed_rel_path = new BitSet(256); - // Static initializer for disallowed_rel_path - static { - disallowed_rel_path.or(uric); - disallowed_rel_path.andNot(rel_path); - } - - - /** - * Disallowed opaque_part before escaping. - */ - public static final BitSet disallowed_opaque_part = new BitSet(256); - // Static initializer for disallowed_opaque_part - static { - disallowed_opaque_part.or(uric); - disallowed_opaque_part.andNot(opaque_part); - } - - // ----------------------- Characters allowed within and for each component - - /** - * Those characters that are allowed for the authority component. - */ - public static final BitSet allowed_authority = new BitSet(256); - // Static initializer for allowed_authority - static { - allowed_authority.or(authority); - allowed_authority.clear('%'); - } - - - /** - * Those characters that are allowed for the opaque_part. - */ - public static final BitSet allowed_opaque_part = new BitSet(256); - // Static initializer for allowed_opaque_part - static { - allowed_opaque_part.or(opaque_part); - allowed_opaque_part.clear('%'); - } - - - /** - * Those characters that are allowed for the reg_name. - */ - public static final BitSet allowed_reg_name = new BitSet(256); - // Static initializer for allowed_reg_name - static { - allowed_reg_name.or(reg_name); - // allowed_reg_name.andNot(percent); - allowed_reg_name.clear('%'); - } - - - /** - * Those characters that are allowed for the userinfo component. - */ - public static final BitSet allowed_userinfo = new BitSet(256); - // Static initializer for allowed_userinfo - static { - allowed_userinfo.or(userinfo); - // allowed_userinfo.andNot(percent); - allowed_userinfo.clear('%'); - } - - - /** - * Those characters that are allowed for within the userinfo component. - */ - public static final BitSet allowed_within_userinfo = new BitSet(256); - // Static initializer for allowed_within_userinfo - static { - allowed_within_userinfo.or(within_userinfo); - allowed_within_userinfo.clear('%'); - } - - - /** - * Those characters that are allowed for the IPv6reference component. - * The characters '[', ']' in IPv6reference should be excluded. - */ - public static final BitSet allowed_IPv6reference = new BitSet(256); - // Static initializer for allowed_IPv6reference - static { - allowed_IPv6reference.or(IPv6reference); - // allowed_IPv6reference.andNot(unwise); - allowed_IPv6reference.clear('['); - allowed_IPv6reference.clear(']'); - } - - - /** - * Those characters that are allowed for the host component. - * The characters '[', ']' in IPv6reference should be excluded. - */ - public static final BitSet allowed_host = new BitSet(256); - // Static initializer for allowed_host - static { - allowed_host.or(hostname); - allowed_host.or(allowed_IPv6reference); - } - - - /** - * Those characters that are allowed for the authority component. - */ - public static final BitSet allowed_within_authority = new BitSet(256); - // Static initializer for allowed_within_authority - static { - allowed_within_authority.or(server); - allowed_within_authority.or(reg_name); - allowed_within_authority.clear(';'); - allowed_within_authority.clear(':'); - allowed_within_authority.clear('@'); - allowed_within_authority.clear('?'); - allowed_within_authority.clear('/'); - } - - - /** - * Those characters that are allowed for the abs_path. - */ - public static final BitSet allowed_abs_path = new BitSet(256); - // Static initializer for allowed_abs_path - static { - allowed_abs_path.or(abs_path); - // allowed_abs_path.set('/'); // aleady included - allowed_abs_path.andNot(percent); - } - - - /** - * Those characters that are allowed for the rel_path. - */ - public static final BitSet allowed_rel_path = new BitSet(256); - // Static initializer for allowed_rel_path - static { - allowed_rel_path.or(rel_path); - allowed_rel_path.clear('%'); - } - - - /** - * Those characters that are allowed within the path. - */ - public static final BitSet allowed_within_path = new BitSet(256); - // Static initializer for allowed_within_path - static { - allowed_within_path.or(abs_path); - allowed_within_path.clear('/'); - allowed_within_path.clear(';'); - allowed_within_path.clear('='); - allowed_within_path.clear('?'); - } - - - /** - * Those characters that are allowed for the query component. - */ - public static final BitSet allowed_query = new BitSet(256); - // Static initializer for allowed_query - static { - allowed_query.or(uric); - allowed_query.clear('%'); - } - - - /** - * Those characters that are allowed within the query component. - */ - public static final BitSet allowed_within_query = new BitSet(256); - // Static initializer for allowed_within_query - static { - allowed_within_query.or(allowed_query); - allowed_within_query.andNot(reserved); // excluded 'reserved' - allowed_within_query.clear('#'); // avoid confict with the fragment - } - - - /** - * Those characters that are allowed for the fragment component. - */ - public static final BitSet allowed_fragment = new BitSet(256); - // Static initializer for allowed_fragment - static { - allowed_fragment.or(uric); - allowed_fragment.clear('%'); - } - - // ------------------------------------------- Flags for this URI-reference - - // URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] - // absoluteURI = scheme ":" ( hier_part | opaque_part ) - protected boolean _is_hier_part; - protected boolean _is_opaque_part; - // relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ] - // hier_part = ( net_path | abs_path ) [ "?" query ] - protected boolean _is_net_path; - protected boolean _is_abs_path; - protected boolean _is_rel_path; - // net_path = "//" authority [ abs_path ] - // authority = server | reg_name - protected boolean _is_reg_name; - protected boolean _is_server; // = _has_server - // server = [ [ userinfo "@" ] hostport ] - // host = hostname | IPv4address | IPv6reference - protected boolean _is_hostname; - protected boolean _is_IPv4address; - protected boolean _is_IPv6reference; - - // ------------------------------------------ Character and escape encoding - - /** - * Encode with the default protocol charset. - * - * @param original the original character sequence - * @param allowed those characters that are allowed within a component - * @return URI character sequence - * @exception IOException null component or unsupported character encoding - */ - protected static char[] encode(String original, BitSet allowed) - throws IOException { - - return encode(original, allowed, _protocolCharset); - } - - - /** - * Encodes URI string. - * - * This is a two mapping, one from original characters to octets, and - * subsequently a second from octets to URI characters: - *

-     *   original character sequence->octet sequence->URI character sequence
-     * 

- * - * An escaped octet is encoded as a character triplet, consisting of the - * percent character "%" followed by the two hexadecimal digits - * representing the octet code. For example, "%20" is the escaped - * encoding for the US-ASCII space character. - *

- * Conversion from the local filesystem character set to UTF-8 will - * normally involve a two step process. First convert the local character - * set to the UCS; then convert the UCS to UTF-8. - * The first step in the process can be performed by maintaining a mapping - * table that includes the local character set code and the corresponding - * UCS code. - * The next step is to convert the UCS character code to the UTF-8 encoding. - *

- * Mapping between vendor codepages can be done in a very similar manner - * as described above. - *

- * The only time escape encodings can allowedly be made is when a URI is - * being created from its component parts. The escape and validate methods - * are internally performed within this method. - * - * @param original the original character sequence - * @param allowed those characters that are allowed within a component - * @param charset the protocol charset - * @return URI character sequence - * @exception IOException null component or unsupported character encoding - */ - protected static char[] encode(String original, BitSet allowed, - String charset) throws IOException { - - // encode original to uri characters. - if (original == null) { - throw new IOException(/*IOException.PARSING,*/ "URI: null"); - } - // escape octet to uri characters. - if (allowed == null) { - throw new IOException(/*IOException.PARSING,*/ - "URI: null allowed characters"); - } - byte[] octets; - try { - octets = original.getBytes(charset); - } catch (UnsupportedEncodingException error) { - throw new IOException(/*IOException.UNSUPPORTED_ENCODING,*/ "Unsupported Encoding: " + charset); - } - StringBuffer buf = new StringBuffer(octets.length); - for (int i = 0; i < octets.length; i++) { - char c = (char) octets[i]; - if (allowed.get(c)) { - buf.append(c); - } else { - buf.append('%'); - byte b = octets[i]; // use the original byte value - char hexadecimal = Character.forDigit((b >> 4) & 0xF, 16); - buf.append(Character.toUpperCase(hexadecimal)); // high - hexadecimal = Character.forDigit(b & 0xF, 16); - buf.append(Character.toUpperCase(hexadecimal)); // low - } - } - - return buf.toString().toCharArray(); - } - - - /** - * Decode with the default protocol charset. - * - * @param component the URI character sequence - * @return original character sequence - * @exception IOException incomplete trailing escape pattern - * or unsupported character encoding - */ - protected static String decode(char[] component) throws IOException { - return decode(component, _protocolCharset); - } - - - /** - * Decodes URI encoded string. - * - * This is a two mapping, one from URI characters to octets, and - * subsequently a second from octets to original characters: - *

-     *   URI character sequence->octet sequence->original character sequence
-     * 

- * - * A URI must be separated into its components before the escaped - * characters within those components can be allowedly decoded. - *

- * Notice that there is a chance that URI characters that are non UTF-8 - * may be parsed as valid UTF-8. A recent non-scientific analysis found - * that EUC encoded Japanese words had a 2.7% false reading; SJIS had a - * 0.0005% false reading; other encoding such as ASCII or KOI-8 have a 0% - * false reading. - *

- * The percent "%" character always has the reserved purpose of being - * the escape indicator, it must be escaped as "%25" in order to be used - * as data within a URI. - *

- * The unescape method is internally performed within this method. - * - * @param component the URI character sequence - * @param charset the protocol charset - * @return original character sequence - * @exception IOException incomplete trailing escape pattern - * or unsupported character encoding - */ - protected static String decode(char[] component, String charset) - throws IOException { - - // unescape uri characters to octets - if (component == null) return null; - - byte[] octets; - try { - octets = new String(component).getBytes(charset); - } catch (UnsupportedEncodingException error) { - throw new IOException(/* IOException.UNSUPPORTED_ENCODING, */ - "URI: not supported " + charset + " encoding"); - } - int length = octets.length; - int oi = 0; // output index - for (int ii = 0; ii < length; oi++) { - byte aByte = (byte) octets[ii++]; - if (aByte == '%' && ii+2 <= length) { - byte high = (byte) Character.digit((char) octets[ii++], 16); - byte low = (byte) Character.digit((char) octets[ii++], 16); - if (high == -1 || low == -1) { - throw new IOException(/* IOException.ESCAPING, */ - "URI: incomplete trailing escape pattern"); - - } - aByte = (byte) ((high << 4) + low); - } - octets[oi] = (byte) aByte; - } - - String result; - try { - result = new String(octets, 0, oi, charset); - } catch (UnsupportedEncodingException error) { - throw new IOException(/* IOException.UNSUPPORTED_ENCODING, */ - "URI: not supported " + charset + " encoding"); - } - - return result; - } - - - /** - * Pre-validate the unescaped URI string within a specific component. - * - * @param component the component string within the component - * @param disallowed those characters disallowed within the component - * @return if true, it doesn't have the disallowed characters - * if false, the component is undefined or an incorrect one - */ - protected boolean prevalidate(String component, BitSet disallowed) { - // prevalidate the given component by disallowed characters - if (component == null) { - return false; // undefined - } - char[] target = component.toCharArray(); - for (int i = 0; i < target.length; i++) { - if (disallowed.get(target[i])) { - return false; - } - } - return true; - } - - - /** - * Validate the URI characters within a specific component. - * The component must be performed after escape encoding. Or it doesn't - * include escaped characters. - * - * @param component the characters sequence within the component - * @param generous those characters that are allowed within a component - * @return if true, it's the correct URI character sequence - */ - protected boolean validate(char[] component, BitSet generous) { - // validate each component by generous characters - return validate(component, 0, -1, generous); - } - - - /** - * Validate the URI characters within a specific component. - * The component must be performed after escape encoding. Or it doesn't - * include escaped characters. - *

- * It's not that much strict, generous. The strict validation might be - * performed before being called this method. - * - * @param component the characters sequence within the component - * @param soffset the starting offset of the given component - * @param eoffset the ending offset of the given component - * if -1, it means the length of the component - * @param generous those characters that are allowed within a component - * @return if true, it's the correct URI character sequence - * @throws NullPointerException null component - */ - protected boolean validate(char[] component, int soffset, int eoffset, - BitSet generous) { - // validate each component by generous characters - if (eoffset == -1) { - eoffset = component.length -1; - } - for (int i = soffset; i <= eoffset; i++) { - if (!generous.get(component[i])) return false; - } - return true; - } - - - /** - * In order to avoid any possilbity of conflict with non-ASCII characters, - * Parse a URI reference as a String with the character - * encoding of the local system or the document. - *

- * The following line is the regular expression for breaking-down a URI - * reference into its components. - *

-     *   ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
-     *    12            3  4          5       6  7        8 9
-     * 

- * For example, matching the above expression to - * http://jakarta.apache.org/ietf/uri/#Related - * results in the following subexpression matches: - *

-     *               $1 = http:
-     *  scheme    =  $2 = http
-     *               $3 = //jakarta.apache.org
-     *  authority =  $4 = jakarta.apache.org
-     *  path      =  $5 = /ietf/uri/
-     *               $6 = 
-     *  query     =  $7 = 
-     *               $8 = #Related
-     *  fragment  =  $9 = Related
-     * 

- * - * @param original the original character sequence - * @param escaped true if original is escaped - * @return the original character sequence - * @exception IOException - */ - protected void parseUriReference(String original, boolean escaped) - throws IOException { - - // validate and contruct the URI character sequence - if (original == null || original.length() == 0) { - throw new IOException("URI-Reference required"); - } - - /** @ - * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? - */ - String tmp = original.trim(); - - /** - * The length of the string sequence of characters. - * It may not be equal to the length of the byte array. - */ - int length = tmp.length(); - - /** - * Remove the delimiters like angle brackets around an URI. - */ - char[] firstDelimiter = { tmp.charAt(0) }; - if (validate(firstDelimiter, delims)) { - if (length >= 2) { - char[] lastDelimiter = { tmp.charAt(length - 1) }; - if (validate(lastDelimiter, delims)) { - tmp = tmp.substring(1, length - 1); - length = length - 2; - } - } - } - - /** - * The starting index - */ - int from = 0; - - /** - * The test flag whether the URI is started from the path component. - */ - boolean isStartedFromPath = false; - int atColon = tmp.indexOf(':'); - int atSlash = tmp.indexOf('/'); - if (atColon < 0 || (atSlash >= 0 && atSlash < atColon)) { - isStartedFromPath = true; - } - - /** - *

-         *     @@@@@@@@
-         *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
-         * 

- */ - int at = indexFirstOf(tmp, isStartedFromPath ? "/?#" : ":/?#", from); - if (at == -1) at = 0; - - /** - * Parse the scheme. - *

-         *  scheme    =  $2 = http
-         *              @
-         *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
-         * 

- */ - if (at < length && tmp.charAt(at) == ':') { - char[] target = tmp.substring(0, at).toLowerCase().toCharArray(); - if (validate(target, scheme)) { - _scheme = target; - } else { - throw new IOException("incorrect scheme"); - } - from = ++at; - } - - /** - * Parse the authority component. - *

-         *  authority =  $4 = jakarta.apache.org
-         *                  @@
-         *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
-         * 

- */ - // Reset flags - _is_net_path = _is_abs_path = _is_rel_path = _is_hier_part = false; - if (0 <= at && at < length && tmp.charAt(at) == '/') { - // Set flag - _is_hier_part = true; - if (at + 2 < length && tmp.charAt(at + 1) == '/') { - // the temporary index to start the search from - int next = indexFirstOf(tmp, "/?#", at + 2); - if (next == -1) { - next = (tmp.substring(at + 2).length() == 0) ? at + 2 : - tmp.length(); - } - parseAuthority(tmp.substring(at + 2, next), escaped); - from = at = next; - // Set flag - _is_net_path = true; - } - if (from == at) { - // Set flag - _is_abs_path = true; - } - } - - /** - * Parse the path component. - *

-         *  path      =  $5 = /ietf/uri/
-         *                                @@@@@@
-         *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
-         * 

- */ - if (from < length) { - // rel_path = rel_segment [ abs_path ] - int next = indexFirstOf(tmp, "?#", from); - if (next == -1) { - next = tmp.length(); - } - if (!_is_abs_path) { - if (!escaped && prevalidate(tmp.substring(from, next), - disallowed_rel_path) || escaped && - validate(tmp.substring(from, next).toCharArray(), - rel_path)) { - // Set flag - _is_rel_path = true; - } else if (!escaped && prevalidate(tmp.substring(from, next), - disallowed_opaque_part) || escaped && - validate(tmp.substring(from, next).toCharArray(), - opaque_part)) { - // Set flag - _is_opaque_part = true; - } else { - // the path component may be empty - _path = null; - } - } - setPath(tmp.substring(from, next)); - at = next; - } - - /** - * Parse the query component. - *

-         *  query     =  $7 = 
-         *                                        @@@@@@@@@
-         *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
-         * 

- */ - if (0 <= at && at+1 < length && tmp.charAt(at) == '?') { - int next = tmp.indexOf('#', at + 1); - if (next == -1) { - next = tmp.length(); - } - _query = (escaped) ? tmp.substring(at + 1, next).toCharArray() : - encode(tmp.substring(at + 1, next), allowed_query); - at = next; - } - - /** - * Parse the fragment component. - *

-         *  fragment  =  $9 = Related
-         *                                                   @@@@@@@@
-         *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
-         * 

- */ - if (0 <= at && at+1 < length && tmp.charAt(at) == '#') { - _fragment = (escaped) ? tmp.substring(at + 1).toCharArray() : - encode(tmp.substring(at + 1), allowed_fragment); - } - - // set this URI. - setUriReference(); - } - - - /** - * Get the earlier index that to be searched for the first occurrance in - * one of any of the given string. - * - * @param s the string to be indexed - * @param delims the delimiters used to index - * @return the earlier index if there are delimiters - */ - protected int indexFirstOf(String s, String delims) { - return indexFirstOf(s, delims, -1); - } - - - /** - * Get the earlier index that to be searched for the first occurrance in - * one of any of the given string. - * - * @param s the string to be indexed - * @param delims the delimiters used to index - * @param offset the from index - * @return the earlier index if there are delimiters - */ - protected int indexFirstOf(String s, String delims, int offset) { - if (s == null || s.length() == 0) { - return -1; - } - if (delims == null || delims.length() == 0) { - return -1; - } - // check boundaries - if (offset < 0) { - offset = 0; - } else if (offset > s.length()) { - return -1; - } - // s is never null - int min = s.length(); - char[] delim = delims.toCharArray(); - for (int i = 0; i < delim.length; i++) { - int at = s.indexOf(delim[i], offset); - if (at >= 0 && at < min) { - min = at; - } - } - return (min == s.length()) ? -1 : min; - } - - - /** - * Get the earlier index that to be searched for the first occurrance in - * one of any of the given array. - * - * @param s the character array to be indexed - * @param delim the delimiter used to index - * @return the ealier index if there are a delimiter - */ - protected int indexFirstOf(char[] s, char delim) { - return indexFirstOf(s, delim, 0); - } - - - /** - * Get the earlier index that to be searched for the first occurrance in - * one of any of the given array. - * - * @param s the character array to be indexed - * @param delim the delimiter used to index - * @return the ealier index if there is a delimiter - */ - protected int indexFirstOf(char[] s, char delim, int offset) { - if (s == null || s.length == 0) { - return -1; - } - // check boundaries - if (offset < 0) { - offset = 0; - } else if (offset > s.length) { - return -1; - } - for (int i = offset; i < s.length; i++) { - if (s[i] == delim) { - return i; - } - } - return -1; - } - - - /** - * Parse the authority component. - * - * @param original the original character sequence of authority component - * @param escaped true if original is escaped - * @exception IOException - */ - protected void parseAuthority(String original, boolean escaped) - throws IOException { - - // Reset flags - _is_reg_name = _is_server = - _is_hostname = _is_IPv4address = _is_IPv6reference = false; - - boolean has_port = true; - int from = 0; - int next = original.indexOf('@'); - if (next != -1) { // neither -1 and 0 - // each protocol extented from URI supports the specific userinfo - _userinfo = (escaped) ? original.substring(0, next).toCharArray() : - encode(original.substring(0, next), allowed_userinfo); - from = next + 1; - } - next = original.indexOf('[', from); - if (next >= from) { - next = original.indexOf(']', from); - if (next == -1) { - throw new IOException(/* IOException.PARSING,*/ "URI: IPv6reference"); - } else { - next++; - } - // In IPv6reference, '[', ']' should be excluded - _host = (escaped) ? original.substring(from, next).toCharArray() : - encode(original.substring(from, next), allowed_IPv6reference); - // Set flag - _is_IPv6reference = true; - } else { // only for !_is_IPv6reference - next = original.indexOf(':', from); - if (next == -1) { - next = original.length(); - has_port = false; - } - // REMINDME: it doesn't need the pre-validation - _host = original.substring(from, next).toCharArray(); - if (validate(_host, IPv4address)) { - // Set flag - _is_IPv4address = true; - } else if (validate(_host, hostname)) { - // Set flag - _is_hostname = true; - } else { - // Set flag - _is_reg_name = true; - } - } - if (_is_reg_name) { - // Reset flags for a server-based naming authority - _is_server = _is_hostname = _is_IPv4address = - _is_IPv6reference = false; - // set a registry-based naming authority - _authority = (escaped) ? original.toString().toCharArray() : - encode(original.toString(), allowed_reg_name); - } else { - if (original.length()-1 > next && has_port && - original.charAt(next) == ':') { // not empty - from = next + 1; - try { - _port = Integer.parseInt(original.substring(from)); - } catch (NumberFormatException error) { - throw new IOException(/*IOException.PARSING, */ - "URI: invalid port number"); - } - } - // set a server-based naming authority - StringBuffer buf = new StringBuffer(); - if (_userinfo != null) { // has_userinfo - buf.append(_userinfo); - buf.append('@'); - } - if (_host != null) { - buf.append(_host); - if (_port != -1) { - buf.append(':'); - buf.append(_port); - } - } - _authority = buf.toString().toCharArray(); - // Set flag - _is_server = true; - } - } - - - /** - * Once it's parsed successfully, set this URI. - * - * @see #getRawURI - */ - protected void setUriReference() { - // set _uri - StringBuffer buf = new StringBuffer(); - // ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? - if (_scheme != null) { - buf.append(_scheme); - buf.append(':'); - } - if (_is_net_path) { - buf.append("//"); - if (_authority != null) { // has_authority - if (_userinfo != null) { // by default, remove userinfo part - if (_host != null) { - buf.append(_host); - if (_port != -1) { - buf.append(':'); - buf.append(_port); - } - } - } else { - buf.append(_authority); - } - } - } - if (_opaque != null && _is_opaque_part) { - buf.append(_opaque); - } else if (_path != null) { - // _is_hier_part or _is_relativeURI - if (_path.length != 0) { - buf.append(_path); - } - } - if (_query != null) { // has_query - buf.append('?'); - buf.append(_query); - } - if (_fragment != null) { // has_fragment - buf.append('#'); - buf.append(_fragment); - } - - _uri = buf.toString().toCharArray(); - } - - // ----------------------------------------------------------- Test methods - - - /** - * Tell whether or not this URI is absolute. - * - * @return true iif this URI is absoluteURI - */ - public boolean isAbsoluteURI() { - return (_scheme != null); - } - - - /** - * Tell whether or not this URI is relative. - * - * @return true iif this URI is relativeURI - */ - public boolean isRelativeURI() { - return (_scheme == null); - } - - - /** - * Tell whether or not the absoluteURI of this URI is hier_part. - * - * @return true iif the absoluteURI is hier_part - */ - public boolean isHierPart() { - return _is_hier_part; - } - - - /** - * Tell whether or not the absoluteURI of this URI is opaque_part. - * - * @return true iif the absoluteURI is opaque_part - */ - public boolean isOpaquePart() { - return _is_opaque_part; - } - - - /** - * Tell whether or not the relativeURI or heir_part of this URI is net_path. - * It's the same function as the has_authority() method. - * - * @return true iif the relativeURI or heir_part is net_path - * @see #hasAuthority - */ - public boolean isNetPath() { - return _is_net_path || (_authority != null); - } - - - /** - * Tell whether or not the relativeURI or hier_part of this URI is abs_path. - * - * @return true iif the relativeURI or hier_part is abs_path - */ - public boolean isAbsPath() { - return _is_abs_path; - } - - - /** - * Tell whether or not the relativeURI of this URI is rel_path. - * - * @return true iif the relativeURI is rel_path - */ - public boolean isRelPath() { - return _is_rel_path; - } - - - /** - * Tell whether or not this URI has authority. - * It's the same function as the is_net_path() method. - * - * @return true iif this URI has authority - * @see #isNetPath - */ - public boolean hasAuthority() { - return (_authority != null) || _is_net_path; - } - - /** - * Tell whether or not the authority component of this URI is reg_name. - * - * @return true iif the authority component is reg_name - */ - public boolean isRegName() { - return _is_reg_name; - } - - - /** - * Tell whether or not the authority component of this URI is server. - * - * @return true iif the authority component is server - */ - public boolean isServer() { - return _is_server; - } - - - /** - * Tell whether or not this URI has userinfo. - * - * @return true iif this URI has userinfo - */ - public boolean hasUserinfo() { - return (_userinfo != null); - } - - - /** - * Tell whether or not the host part of this URI is hostname. - * - * @return true iif the host part is hostname - */ - public boolean isHostname() { - return _is_hostname; - } - - - /** - * Tell whether or not the host part of this URI is IPv4address. - * - * @return true iif the host part is IPv4address - */ - public boolean isIPv4address() { - return _is_IPv4address; - } - - - /** - * Tell whether or not the host part of this URI is IPv6reference. - * - * @return true iif the host part is IPv6reference - */ - public boolean isIPv6reference() { - return _is_IPv6reference; - } - - - /** - * Tell whether or not this URI has query. - * - * @return true iif this URI has query - */ - public boolean hasQuery() { - return (_query != null); - } - - - /** - * Tell whether or not this URI has fragment. - * - * @return true iif this URI has fragment - */ - public boolean hasFragment() { - return (_fragment != null); - } - - - // ---------------------------------------------------------------- Charset - - - /** - * Set the default charset of the protocol. - *

- * The character set used to store files SHALL remain a local decision and - * MAY depend on the capability of local operating systems. Prior to the - * exchange of URIs they SHOULD be converted into a ISO/IEC 10646 format - * and UTF-8 encoded. This approach, while allowing international exchange - * of URIs, will still allow backward compatibility with older systems - * because the code set positions for ASCII characters are identical to the - * one byte sequence in UTF-8. - *

- * An individual URI scheme may require a single charset, define a default - * charset, or provide a way to indicate the charset used. - * - * @param charset the default charset for each protocol - */ - public static void setProtocolCharset(String charset) { - _protocolCharset = charset; - } - - - /** - * Get the default charset of the protocol. - *

- * An individual URI scheme may require a single charset, define a default - * charset, or provide a way to indicate the charset used. - *

- * To work globally either requires support of a number of character sets - * and to be able to convert between them, or the use of a single preferred - * character set. - * For support of global compatibility it is STRONGLY RECOMMENDED that - * clients and servers use UTF-8 encoding when exchanging URIs. - * - * @return the charset string - */ - public static String getProtocolCharset() { - return _protocolCharset; - } - - - /** - * Set the default charset of the document. - *

- * Notice that it will be possible to contain mixed characters (e.g. - * ftp://host/KoreanNamespace/ChineseResource). To handle the Bi-directional - * display of these character sets, the protocol charset could be simply - * used again. Because it's not yet implemented that the insertion of BIDI - * control characters at different points during composition is extracted. - * - * @param charset the default charset for the document - */ - public static void setDocumentCharset(String charset) { - _documentCharset = charset; - } - - - /** - * Get the default charset of the document. - * - * @return the charset string - */ - public static String getDocumentCharset() { - return _documentCharset; - } - - // ------------------------------------------------------------- The scheme - - /** - * Get the scheme. - * - * @return the scheme - */ - public char[] getRawScheme() { - return _scheme; - } - - - /** - * Get the scheme. - * - * @return the scheme - * null if undefined scheme - */ - public String getScheme() { - return (_scheme == null) ? null : new String(_scheme); - } - - // ---------------------------------------------------------- The authority - - /** - * Set the authority. It can be one type of server, hostport, hostname, - * IPv4address, IPv6reference and reg_name. - *

-     *   authority     = server | reg_name
-     * 

- * - * @param escapedAuthority the raw escaped authority - * @exception IOException - * @throws NullPointerException null authority - */ - public void setRawAuthority(char[] escapedAuthority) throws IOException { - parseAuthority(new String(escapedAuthority), true); - setUriReference(); - } - - - /** - * Set the authority. It can be one type of server, hostport, hostname, - * IPv4address, IPv6reference and reg_name. - * Note that there is no setAuthority method by the escape encoding reason. - * - * @param escapedAuthority the escaped authority string - * @exception IOException - */ - public void setEscapedAuthority(String escapedAuthority) - throws IOException { - - parseAuthority(escapedAuthority, true); - setUriReference(); - } - - - /** - * Get the raw-escaped authority. - * - * @return the raw-escaped authority - */ - public char[] getRawAuthority() { - return _authority; - } - - - /** - * Get the escaped authority. - * - * @return the escaped authority - */ - public String getEscapedAuthority() { - return (_authority == null) ? null : new String(_authority); - } - - - /** - * Get the authority. - * - * @return the authority - * @exception IOException - * @see #decode - */ - public String getAuthority() throws IOException { - return (_authority == null) ? null : decode(_authority); - } - - // ----------------------------------------------------------- The userinfo - - /** - * Get the raw-escaped userinfo. - * - * @return the raw-escaped userinfo - * @see #getAuthority - */ - public char[] getRawUserinfo() { - return _userinfo; - } - - - /** - * Get the escaped userinfo. - * - * @return the escaped userinfo - * @see #getAuthority - */ - public String getEscapedUserinfo() { - return (_userinfo == null) ? null : new String(_userinfo); - } - - - /** - * Get the userinfo. - * - * @return the userinfo - * @exception IOException - * @see #decode - * @see #getAuthority - */ - public String getUserinfo() throws IOException { - return (_userinfo == null) ? null : decode(_userinfo); - } - - // --------------------------------------------------------------- The host - - /** - * Get the host. - *

-     *   host          = hostname | IPv4address | IPv6reference
-     * 

- * - * @return the host - * @see #getAuthority - */ - public char[] getRawHost() { - return _host; - } - - - /** - * Get the host. - *

-     *   host          = hostname | IPv4address | IPv6reference
-     * 

- * - * @return the host - * @exception IOException - * @see #decode - * @see #getAuthority - */ - public String getHost() throws IOException { - return decode(_host); - } - - // --------------------------------------------------------------- The port - - /** - * Get the port. In order to get the specfic default port, the specific - * protocol-supported class extended from the URI class should be used. - * It has the server-based naming authority. - * - * @return the port - * if -1, it has the default port for the scheme or the server-based - * naming authority is not supported in the specific URI. - */ - public int getPort() { - return _port; - } - - // --------------------------------------------------------------- The path - - /** - * Set the path. The method couldn't be used by API programmers. - * - * @param path the path string - * @exception IOException set incorrectly or fragment only - * @see #encode - */ - protected void setPath(String path) throws IOException { - - // set path - if (_is_net_path || _is_abs_path) { - _path = encode(path, allowed_abs_path); - } else if (_is_rel_path) { - StringBuffer buff = new StringBuffer(path.length()); - int at = path.indexOf('/'); - if (at > 0) { // never 0 - buff.append(encode(path.substring(0, at), allowed_rel_path)); - buff.append(encode(path.substring(at), allowed_abs_path)); - } else { - buff.append(encode(path, allowed_rel_path)); - } - _path = buff.toString().toCharArray(); - } else if (_is_opaque_part) { - _opaque = encode(path, allowed_opaque_part); - } else { - throw new IOException(/*IOException.PARSING, */"URI: incorrect path"); - } - } - - - /** - * Resolve the base and relative path. - * - * @param base_path a character array of the base_path - * @param rel_path a character array of the rel_path - * @return the resolved path - */ - protected char[] resolvePath(char[] base_path, char[] rel_path) { - - // REMINDME: paths are never null - String base = (base_path == null) ? "" : new String(base_path); - int at = base.lastIndexOf('/'); - if (at != -1) { - base_path = base.substring(0, at + 1).toCharArray(); - } - // _path could be empty - if (rel_path == null || rel_path.length == 0) { - return normalize(base_path); - } else if (rel_path[0] == '/') { - return rel_path; - } else { - StringBuffer buff = new StringBuffer(base.length() + - rel_path.length); - if (at != -1) { - buff.append(base.substring(0, at + 1)); - buff.append(rel_path); - } - return normalize(buff.toString().toCharArray()); - } - } - - - /** - * Get the raw-escaped current hierarchy level in the given path. - * If the last namespace is a collection, the slash mark ('/') should be - * ended with at the last character of the path string. - * - * @param path the path - * @return the current hierarchy level - * @exception IOException no hierarchy level - */ - protected char[] getRawCurrentHierPath(char[] path) throws IOException { - - if (_is_opaque_part) { - throw new IOException(/*IOException.PARSING,*/ "URI: no hierarchy level"); - } - if (path == null) { - throw new IOException(/*IOException.PARSING,*/ "URI: emtpy path"); - } - String buff = new String(path); - int first = buff.indexOf('/'); - int last = buff.lastIndexOf('/'); - if (last == 0) { - return rootPath; - } else if (first != last && last != -1) { - return buff.substring(0, last).toCharArray(); - } - // FIXME: it could be a document on the server side - return path; - } - - - /** - * Get the raw-escaped current hierarchy level. - * - * @return the raw-escaped current hierarchy level - * @exception IOException no hierarchy level - */ - public char[] getRawCurrentHierPath() throws IOException { - return (_path == null) ? null : getRawCurrentHierPath(_path); - } - - - /** - * Get the escaped current hierarchy level. - * - * @return the escaped current hierarchy level - * @exception IOException no hierarchy level - */ - public String getEscapedCurrentHierPath() throws IOException { - char[] path = getRawCurrentHierPath(); - return (path == null) ? null : new String(path); - } - - - /** - * Get the current hierarchy level. - * - * @return the current hierarchy level - * @exception IOException - * @see #decode - */ - public String getCurrentHierPath() throws IOException { - char[] path = getRawCurrentHierPath(); - return (path == null) ? null : decode(path); - } - - - /** - * Get the level above the this hierarchy level. - * - * @return the raw above hierarchy level - * @exception IOException - */ - public char[] getRawAboveHierPath() throws IOException { - char[] path = getRawCurrentHierPath(); - return (path == null) ? null : getRawCurrentHierPath(path); - } - - - /** - * Get the level above the this hierarchy level. - * - * @return the raw above hierarchy level - * @exception IOException - */ - public String getEscapedAboveHierPath() throws IOException { - char[] path = getRawAboveHierPath(); - return (path == null) ? null : new String(path); - } - - - /** - * Get the level above the this hierarchy level. - * - * @return the above hierarchy level - * @exception IOException - * @see #decode - */ - public String getAboveHierPath() throws IOException { - char[] path = getRawAboveHierPath(); - return (path == null) ? null : decode(path); - } - - - /** - * Get the raw-escaped path. - *

-     *   path          = [ abs_path | opaque_part ]
-     * 

- * - * @return the raw-escaped path - */ - public char[] getRawPath() { - return _is_opaque_part ? _opaque : _path; - } - - - /** - * Get the escaped path. - *

-     *   path          = [ abs_path | opaque_part ]
-     *   abs_path      = "/"  path_segments 
-     *   opaque_part   = uric_no_slash *uric
-     * 

- * - * @return the escaped path string - */ - public String getEscapedPath() { - char[] path = getRawPath(); - return (path == null) ? null : new String(path); - } - - - /** - * Get the path. - *

-     *   path          = [ abs_path | opaque_part ]
-     * 

- * @return the path string - * @exception IOException - * @see #decode - */ - public String getPath() throws IOException { - char[] path = getRawPath(); - return (path == null) ? null : decode(path); - } - - - /** - * Get the raw-escaped basename of the path. - * - * @return the raw-escaped basename - */ - public char[] getRawName() { - if (_path == null) return null; - - int at = 0; - for (int i = _path.length - 1; i >= 0; i--) { - if (_path[i] == '/') { - at = i + 1; - break; - } - } - int len = _path.length - at; - char[] basename = new char[len]; - System.arraycopy(_path, at, basename, 0, len); - return basename; - } - - - /** - * Get the escaped basename of the path. - * - * @return the escaped basename string - */ - public String getEscapedName() { - char[] basename = getRawName(); - return (basename == null) ? null : new String(basename); - } - - - /** - * Get the basename of the path. - * - * @return the basename string - * @exception IOException incomplete trailing escape pattern - * Or unsupported character encoding - * @see #decode - */ - public String getName() throws IOException { - char[] basename = getRawName(); - return (basename == null) ? null : decode(getRawName()); - } - - // ----------------------------------------------------- The path and query - - /** - * Get the raw-escaped path and query. - * - * @return the raw-escaped path and query - */ - public char[] getRawPathQuery() { - - if (_path == null && _query == null) { - return null; - } - StringBuffer buff = new StringBuffer(); - if (_path != null) { - buff.append(_path); - } - if (_query != null) { - buff.append('?'); - buff.append(_query); - } - return buff.toString().toCharArray(); - } - - - /** - * Get the escaped query. - * - * @return the escaped path and query string - */ - public String getEscapedPathQuery() { - char[] rawPathQuery = getRawPathQuery(); - return (rawPathQuery == null) ? null : new String(rawPathQuery); - } - - - /** - * Get the path and query. - * - * @return the path and query string. - * @exception IOException incomplete trailing escape pattern - * Or unsupported character encoding - * @see #decode - */ - public String getPathQuery() throws IOException { - char[] rawPathQuery = getRawPathQuery(); - return (rawPathQuery == null) ? null : decode(rawPathQuery); - } - - // -------------------------------------------------------------- The query - - /** - * Set the raw-escaped query. - * - * @param escapedQuery the raw-escaped query - * @exception IOException escaped query not valid - * @throws NullPointerException null query - */ - public void setRawQuery(char[] escapedQuery) throws IOException { - if (!validate(escapedQuery, query)) - throw new IOException(/*IOException.ESCAPING,*/ - "URI: escaped query not valid"); - _query = escapedQuery; - setUriReference(); - } - - - /** - * Set the escaped query string. - * - * @param escapedQuery the escaped query string - * @exception IOException escaped query not valid - * @throws NullPointerException null query - */ - public void setEscapedQuery(String escapedQuery) throws IOException { - setRawQuery(escapedQuery.toCharArray()); - } - - - /** - * Set the query. - * When a query string is not misunderstood the reserved special characters - * ("&", "=", "+", ",", and "$") within a query component, it is - * recommended to use in encoding the whole query with this method. - * - * @param query the query string. - * @exception IOException incomplete trailing escape pattern - * Or unsupported character encoding - * @throws NullPointerException null query - * @see #encode - */ - public void setQuery(String query) throws IOException { - setRawQuery(encode(query, allowed_query)); - } - - - /** - * Get the raw-escaped query. - * - * @return the raw-escaped query - */ - public char[] getRawQuery() { - return _query; - } - - - /** - * Get the escaped query. - * - * @return the escaped query string - */ - public String getEscapedQuery() { - return (_query == null) ? null : new String(_query); - } - - - /** - * Get the query. - * - * @return the query string. - * @exception IOException incomplete trailing escape pattern - * Or unsupported character encoding - * @see #decode - */ - public String getQuery() throws IOException { - return (_query == null) ? null : decode(_query); - } - - // ----------------------------------------------------------- The fragment - - /** - * Set the raw-escaped fragment. - * - * @param escapedFragment the raw-escaped fragment - * @exception IOException escaped fragment not valid - * @throws NullPointerException null fragment - */ - public void setRawFragment(char[] escapedFragment) throws IOException { - if (!validate(escapedFragment, fragment)) - throw new IOException(/*IOException.ESCAPING,*/ - "URI: escaped fragment not valid"); - _fragment = escapedFragment; - setUriReference(); - } - - - /** - * Set the escaped fragment string. - * - * @param escapedFragment the escaped fragment string - * @exception IOException escaped fragment not valid - * @throws NullPointerException null fragment - */ - public void setEscapedFragment(String escapedFragment) throws IOException { - char[] fragmentSequence = escapedFragment.toCharArray(); - if (!validate(fragmentSequence, fragment)) - throw new IOException(/*IOException.ESCAPING,*/ - "URI: escaped fragment not valid"); - _fragment = fragmentSequence; - setUriReference(); - } - - - /** - * Set the fragment. - * - * @param the fragment string. - * @exception IOException - * Or unsupported character encoding - * @throws NullPointerException null fragment - */ - public void setFragment(String fragment) throws IOException { - _fragment = encode(fragment, allowed_fragment); - setUriReference(); - } - - - /** - * Get the raw-escaped fragment. - *

- * The optional fragment identifier is not part of a URI, but is often used - * in conjunction with a URI. - *

- * The format and interpretation of fragment identifiers is dependent on - * the media type [RFC2046] of the retrieval result. - *

- * A fragment identifier is only meaningful when a URI reference is - * intended for retrieval and the result of that retrieval is a document - * for which the identified fragment is consistently defined. - * - * @return the raw-escaped fragment - */ - public char[] getRawFragment() { - return _fragment; - } - - - /** - * Get the escaped fragment. - * - * @return the escaped fragment string - */ - public String getEscapedFragment() { - return (_fragment == null) ? null : new String(_fragment); - } - - - /** - * Get the fragment. - * - * @return the fragment string - * @exception IOException incomplete trailing escape pattern - * Or unsupported character encoding - * @see #decode - */ - public String getFragment() throws IOException { - return (_fragment == null) ? null : decode(_fragment); - } - - // ------------------------------------------------------------- Utilities - - /** - * Normalize the given hier path part. - * - * @param path the path to normalize - * @return the normalized path - */ - protected char[] normalize(char[] path) { - - if (path == null) return null; - - String normalized = new String(path); - boolean endsWithSlash = true; - // precondition - if (!normalized.endsWith("/")) { - normalized += '/'; - endsWithSlash = false; - } - if (normalized.endsWith("/./") || normalized.endsWith("/../")) { - endsWithSlash = true; - } - // Resolve occurrences of "/./" in the normalized path - while (true) { - int at = normalized.indexOf("/./"); - if (at == -1) { - break; - } - normalized = normalized.substring(0, at) + - normalized.substring(at + 2); - } - // Resolve occurrences of "/../" in the normalized path - while (true) { - int at = normalized.indexOf("/../"); - if (at == -1) { - break; - } - if (at == 0) { - normalized = "/"; - break; - } - int backward = normalized.lastIndexOf('/', at - 1); - if (backward == -1) { - // consider the rel_path - normalized = normalized.substring(at + 4); - } else { - normalized = normalized.substring(0, backward) + - normalized.substring(at + 3); - } - } - // Resolve occurrences of "//" in the normalized path - while (true) { - int at = normalized.indexOf("//"); - if (at == -1) { - break; - } - normalized = normalized.substring(0, at) + - normalized.substring(at + 1); - } - if (!endsWithSlash && normalized.endsWith("/")) { - normalized = normalized.substring(0, normalized.length()-1); - } else if (endsWithSlash && !normalized.endsWith("/")) { - normalized = normalized + "/"; - } - // Set the normalized path that we have completed - return normalized.toCharArray(); - } - - - /** - * Normalize the path part of this URI. - */ - public void normalize() { - _path = normalize(_path); - } - - - /** - * Test if the first array is equal to the second array. - * - * @param first the first character array - * @param second the second character array - * @return true if they're equal - */ - protected boolean equals(char[] first, char[] second) { - - if (first == null && second == null) { - return true; - } - if (first == null || second == null) { - return false; - } - if (first.length != second.length) { - return false; - } - for (int i = 0; i < first.length; i++) { - if (first[i] != second[i]) { - return false; - } - } - return true; - } - - - /** - * Test an object if this URI is equal to another. - * - * @param obj an object to compare - * @return true if two URI objects are equal - */ - public boolean equals(Object obj) { - - // normalize and test each components - if (obj == this) { - return true; - } - if (!(obj instanceof URI)) { - return false; - } - URI another = (URI) obj; - // scheme - if (!equals(_scheme, another._scheme)) { - return false; - } - // is_opaque_part or is_hier_part? and opaque - if (!equals(_opaque, another._opaque)) { - return false; - } - // is_hier_part - // has_authority - if (!equals(_authority, another._authority)) { - return false; - } - // path - if (!equals(_path, another._path)) { - return false; - } - // has_query - if (!equals(_query, another._query)) { - return false; - } - // has_fragment? should be careful of the only fragment case. - if (!equals(_fragment, another._fragment)) { - return false; - } - return true; - } - - // ---------------------------------------------------------- Serialization - - /** - * Write the content of this URI. - * - * @param oos the object-output stream - */ - protected void writeObject(java.io.ObjectOutputStream oos) - throws IOException { - - oos.defaultWriteObject(); - } - - - /** - * Read a URI. - * - * @param ois the object-input stream - */ - protected void readObject(java.io.ObjectInputStream ois) - throws ClassNotFoundException, IOException { - - ois.defaultReadObject(); - } - - // ------------------------------------------------------------- Comparison - - /** - * Compare this URI to another object. - * - * @param obj the object to be compared. - * @return 0, if it's same, - * -1, if failed, first being compared with in the authority component - * @exception ClassCastException not URI argument - * @throws NullPointerException null object - */ - public int compareTo(Object obj) { - - URI another = (URI) obj; - if (!equals(_authority, another.getRawAuthority())) return -1; - return toString().compareTo(another.toString()); - } - - // ------------------------------------------------------------------ Clone - - /** - * Create and return a copy of this object, the URI-reference containing - * the userinfo component. Notice that the whole URI-reference including - * the userinfo component counld not be gotten as a String. - *

- * To copy the identical URI object including the userinfo - * component, it should be used. - * - * @return a clone of this instance - */ - public synchronized Object clone() { - - URI instance = new URI(); - - instance._uri = _uri; - instance._scheme = _scheme; - instance._opaque = _opaque; - instance._authority = _authority; - instance._userinfo = _userinfo; - instance._host = _host; - instance._port = _port; - instance._path = _path; - instance._query = _query; - instance._fragment = _fragment; - // flags - instance._is_hier_part = _is_hier_part; - instance._is_opaque_part = _is_opaque_part; - instance._is_net_path = _is_net_path; - instance._is_abs_path = _is_abs_path; - instance._is_rel_path = _is_rel_path; - instance._is_reg_name = _is_reg_name; - instance._is_server = _is_server; - instance._is_hostname = _is_hostname; - instance._is_IPv4address = _is_IPv4address; - instance._is_IPv6reference = _is_IPv6reference; - - return instance; - } - - // ------------------------------------------------------------ Get the URI - - /** - * It can be gotten the URI character sequence. It's raw-escaped. - * For the purpose of the protocol to be transported, it will be useful. - *

- * It is clearly unwise to use a URL that contains a password which is - * intended to be secret. In particular, the use of a password within - * the 'userinfo' component of a URL is strongly disrecommended except - * in those rare cases where the 'password' parameter is intended to be - * public. - *

- * When you want to get each part of the userinfo, you need to use the - * specific methods in the specific URL. It depends on the specific URL. - * - * @return URI character sequence - */ - public char[] getRawURI() { - return _uri; - } - - - /** - * It can be gotten the URI character sequence. It's escaped. - * For the purpose of the protocol to be transported, it will be useful. - * - * @return the URI string - */ - public String getEscapedURI() { - return (_uri == null) ? null : new String(_uri); - } - - - /** - * It can be gotten the URI character sequence. - * - * @return the URI string - * @exception IOException incomplete trailing escape pattern - * Or unsupported character encoding - * @see #decode - */ - public String getURI() throws IOException { - return (_uri == null) ? null : decode(_uri); - } - - - /** - * Get the escaped URI string. - *

- * On the document, the URI-reference form is only used without the userinfo - * component like http://jakarta.apache.org/ by the security reason. - * But the URI-reference form with the userinfo component could be parsed. - *

- * In other words, this URI and any its subclasses must not expose the - * URI-reference expression with the userinfo component like - * http://user:password@hostport/restricted_zone.
- * It means that the API client programmer should extract each user and - * password to access manually. Probably it will be supported in the each - * subclass, however, not a whole URI-reference expression. - * - * @return the URI string - * @see #clone() - */ - public String toString() { - return getEscapedURI(); - } - - - // ------------------------------------------------------------ Inner class - - /** - * A mapping to determine the (somewhat arbitrarily) preferred charset for - * a given locale. Supports all locales recognized in JDK 1.1. - *

- * The distribution of this class is Servlets.com. It was originally - * written by Jason Hunter [jhunter at acm.org] and used by with permission. - */ - public static class LocaleToCharsetMap { - - private static Hashtable map; - static { - map = new Hashtable(); - map.put("ar", "ISO-8859-6"); - map.put("be", "ISO-8859-5"); - map.put("bg", "ISO-8859-5"); - map.put("ca", "ISO-8859-1"); - map.put("cs", "ISO-8859-2"); - map.put("da", "ISO-8859-1"); - map.put("de", "ISO-8859-1"); - map.put("el", "ISO-8859-7"); - map.put("en", "ISO-8859-1"); - map.put("es", "ISO-8859-1"); - map.put("et", "ISO-8859-1"); - map.put("fi", "ISO-8859-1"); - map.put("fr", "ISO-8859-1"); - map.put("hr", "ISO-8859-2"); - map.put("hu", "ISO-8859-2"); - map.put("is", "ISO-8859-1"); - map.put("it", "ISO-8859-1"); - map.put("iw", "ISO-8859-8"); - map.put("ja", "Shift_JIS"); - map.put("ko", "EUC-KR"); - map.put("lt", "ISO-8859-2"); - map.put("lv", "ISO-8859-2"); - map.put("mk", "ISO-8859-5"); - map.put("nl", "ISO-8859-1"); - map.put("no", "ISO-8859-1"); - map.put("pl", "ISO-8859-2"); - map.put("pt", "ISO-8859-1"); - map.put("ro", "ISO-8859-2"); - map.put("ru", "ISO-8859-5"); - map.put("sh", "ISO-8859-5"); - map.put("sk", "ISO-8859-2"); - map.put("sl", "ISO-8859-2"); - map.put("sq", "ISO-8859-2"); - map.put("sr", "ISO-8859-5"); - map.put("sv", "ISO-8859-1"); - map.put("tr", "ISO-8859-9"); - map.put("uk", "ISO-8859-5"); - map.put("zh", "GB2312"); - map.put("zh_TW", "Big5"); - } - - /** - * Get the preferred charset for the given locale. - * - * @param locale the locale - * @return the preferred charset - * or null if the locale is not recognized - */ - public static String getCharset(Locale locale) { - // try for an full name match (may include country) - String charset = (String) map.get(locale.toString()); - if (charset != null) return charset; - - // if a full name didn't match, try just the language - charset = (String) map.get(locale.getLanguage()); - return charset; // may be null - } - - } + // ----------------------------------------------------------- Constructors + + protected URI() { + } + + /** + * Construct a URI as an escaped form of a character array. An URI can be placed + * within double-quotes or angle brackets like "http://test.com/" and + * <http://test.com/> + * + * @param escaped the URI character sequence + * @exception IOException + * @throws NullPointerException if escaped is null + */ + public URI(char[] escaped) throws IOException { + parseUriReference(new String(escaped), true); + } + + /** + * Construct a URI from the given string. + *

+ *

+ * + *
+	 *   URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
+	 * 
+ * + *
+ *

+ * An URI can be placed within double-quotes or angle brackets like + * "http://test.com/" and <http://test.com/> + * + * @param original the string to be represented to URI character sequence It is + * one of absoluteURI and relativeURI. + * @exception IOException + */ + public URI(String original) throws IOException { + parseUriReference(original, false); + } + + /** + * Construct a URI from a URL. + * + * @param url a valid URL. + * @throws IOException + * @since 2.0 + */ + public URI(URL url) throws IOException { + this(url.toString()); + } + + /** + * Construct a general URI from the given components. + *

+ *

+ * + *
+	 *   URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
+	 *   absoluteURI   = scheme ":" ( hier_part | opaque_part )
+	 *   opaque_part   = uric_no_slash *uric
+	 * 
+ * + *
+ *

+ * It's for absolute URI = <scheme>:<scheme-specific-part># + * <fragment>. + * + * @param scheme the scheme string + * @param scheme_specific_part scheme_specific_part + * @param fragment the fragment string + * @exception IOException + */ + public URI(String scheme, String scheme_specific_part, String fragment) throws IOException { + + // validate and contruct the URI character sequence + if (scheme == null) { + throw new IOException(/* IOException.PARSING, */ "URI: scheme required"); + } + char[] s = scheme.toLowerCase().toCharArray(); + if (validate(s, URI.scheme)) { + _scheme = s; // is_absoluteURI + } else { + throw new IOException(/* IOException.PARSING, */ "URI: incorrect scheme"); + } + _opaque = encode(scheme_specific_part, allowed_opaque_part); + // Set flag + _is_opaque_part = true; + setUriReference(); + } + + /** + * Construct a general URI from the given components. + *

+ *

+ * + *
+	 *   URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
+	 *   absoluteURI   = scheme ":" ( hier_part | opaque_part )
+	 *   relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ]
+	 *   hier_part     = ( net_path | abs_path ) [ "?" query ]
+	 * 
+ * + *
+ *

+ * It's for absolute URI = <scheme>:<path>?<query>#< + * fragment> and relative URI = <path>?<query>#<fragment >. + * + * @param scheme the scheme string + * @param authority the authority string + * @param path the path string + * @param query the query string + * @param fragment the fragment string + * @exception IOException + */ + public URI(String scheme, String authority, String path, String query, String fragment) throws IOException { + + // validate and contruct the URI character sequence + StringBuffer buff = new StringBuffer(); + if (scheme != null) { + buff.append(scheme); + buff.append(':'); + } + if (authority != null) { + buff.append("//"); + buff.append(authority); + } + if (path != null) { // accept empty path + if ((scheme != null || authority != null) && !path.startsWith("/")) { + throw new IOException(/* IOException.PARSING*, */ + "URI: abs_path requested"); + } + buff.append(path); + } + if (query != null) { + buff.append('?'); + buff.append(query); + } + if (fragment != null) { + buff.append('#'); + buff.append(fragment); + } + parseUriReference(buff.toString(), false); + } + + /** + * Construct a general URI from the given components. + * + * @param scheme the scheme string + * @param userinfo the userinfo string + * @param host the host string + * @param port the port number + * @exception IOException + */ + public URI(String scheme, String userinfo, String host, int port) throws IOException { + + this(scheme, userinfo, host, port, null, null, null); + } + + /** + * Construct a general URI from the given components. + * + * @param scheme the scheme string + * @param userinfo the userinfo string + * @param host the host string + * @param port the port number + * @param path the path string + * @exception IOException + */ + public URI(String scheme, String userinfo, String host, int port, String path) throws IOException { + + this(scheme, userinfo, host, port, path, null, null); + } + + /** + * Construct a general URI from the given components. + * + * @param scheme the scheme string + * @param userinfo the userinfo string + * @param host the host string + * @param port the port number + * @param path the path string + * @param query the query string + * @exception IOException + */ + public URI(String scheme, String userinfo, String host, int port, String path, String query) throws IOException { + + this(scheme, userinfo, host, port, path, query, null); + } + + /** + * Construct a general URI from the given components. + * + * @param scheme the scheme string + * @param userinfo the userinfo string + * @param host the host string + * @param port the port number + * @param path the path string + * @param query the query string + * @param fragment the fragment string + * @exception IOException + */ + public URI(String scheme, String userinfo, String host, int port, String path, String query, String fragment) + throws IOException { + + this(scheme, + (host == null) ? null + : ((userinfo != null) ? userinfo + '@' : "") + host + ((port != -1) ? ":" + port : ""), + path, query, fragment); + } + + /** + * Construct a general URI from the given components. + * + * @param scheme the scheme string + * @param host the host string + * @param path the path string + * @param fragment the fragment string + * @exception IOException + */ + public URI(String scheme, String host, String path, String fragment) throws IOException { + + this(scheme, host, path, null, fragment); + } + + /** + * Construct a general URI with the given relative URI string. + * + * @param base the base URI + * @param relative the relative URI string + * @exception IOException + */ + public URI(URI base, String relative) throws IOException { + this(base, new URI(relative)); + } + + /** + * Construct a general URI with the given relative URI. + *

+ *

+ * + *
+	 *   URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
+	 *   relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ]
+	 * 
+ * + *
+ *

+ * Resolving Relative References to Absolute Form. + * + * Examples of Resolving Relative URI References + * + * Within an object with a well-defined base URI of + *

+ *

+ * + *
+	 *   http://a/b/c/d;p?q
+	 * 
+ * + *
+ *

+ * the relative URI would be resolved as follows: + * + * Normal Examples + * + *

+ *

+ * + *
+	 *   g:h           =  g:h
+	 *   g             =  http://a/b/c/g
+	 *   ./g           =  http://a/b/c/g
+	 *   g/            =  http://a/b/c/g/
+	 *   /g            =  http://a/g
+	 *   //g           =  http://g
+	 *   ?y            =  http://a/b/c/?y
+	 *   g?y           =  http://a/b/c/g?y
+	 *   #s            =  (current document)#s
+	 *   g#s           =  http://a/b/c/g#s
+	 *   g?y#s         =  http://a/b/c/g?y#s
+	 *   ;x            =  http://a/b/c/;x
+	 *   g;x           =  http://a/b/c/g;x
+	 *   g;x?y#s       =  http://a/b/c/g;x?y#s
+	 *   .             =  http://a/b/c/
+	 *   ./            =  http://a/b/c/
+	 *   ..            =  http://a/b/
+	 *   ../           =  http://a/b/
+	 *   ../g          =  http://a/b/g
+	 *   ../..         =  http://a/
+	 *   ../../        =  http://a/ 
+	 *   ../../g       =  http://a/g
+	 * 
+ * + *
+ *

+ * + * Some URI schemes do not allow a hierarchical syntax matching the + * syntax, and thus cannot use relative references. + * + * @param base the base URI + * @param relative the relative URI + * @exception IOException + */ + public URI(URI base, URI relative) throws IOException { + + if (base._scheme == null) { + throw new IOException(/* IOException.PARSING, */ "URI: base URI required"); + } + if (base._scheme != null) { + this._scheme = base._scheme; + this._authority = base._authority; + } + if (base._is_opaque_part || relative._is_opaque_part) { + this._scheme = base._scheme; + this._is_opaque_part = relative._is_opaque_part; + this._opaque = relative._opaque; + this._fragment = relative._fragment; + this.setUriReference(); + return; + } + if (relative._scheme != null) { + this._scheme = relative._scheme; + this._is_net_path = relative._is_net_path; + this._authority = relative._authority; + if (relative._is_server) { + this._userinfo = relative._userinfo; + this._host = relative._host; + this._port = relative._port; + } else if (relative._is_reg_name) { + this._is_reg_name = relative._is_reg_name; + } + this._is_abs_path = relative._is_abs_path; + this._is_rel_path = relative._is_rel_path; + this._path = relative._path; + } else if (base._authority != null && relative._scheme == null) { + this._is_net_path = base._is_net_path; + this._authority = base._authority; + if (base._is_server) { + this._userinfo = base._userinfo; + this._host = base._host; + this._port = base._port; + } else if (base._is_reg_name) { + this._is_reg_name = base._is_reg_name; + } + } + if (relative._authority != null) { + this._is_net_path = relative._is_net_path; + this._authority = relative._authority; + if (relative._is_server) { + this._is_server = relative._is_server; + this._userinfo = relative._userinfo; + this._host = relative._host; + this._port = relative._port; + } else if (relative._is_reg_name) { + this._is_reg_name = relative._is_reg_name; + } + this._is_abs_path = relative._is_abs_path; + this._is_rel_path = relative._is_rel_path; + this._path = relative._path; + } + // resolve the path + if (relative._scheme == null && relative._authority == null || equals(base._scheme, relative._scheme)) { + this._path = resolvePath(base._path, relative._path); + } + // base._query removed + if (relative._query != null) { + this._query = relative._query; + } + // base._fragment removed + if (relative._fragment != null) { + this._fragment = relative._fragment; + } + this.setUriReference(); + } + + // --------------------------------------------------- Instance Variables + + static final long serialVersionUID = 604752400577948726L; + + /** + * This Uniform Resource Identifier (URI). The URI is always in an "escaped" + * form, since escaping or unescaping a completed URI might change its + * semantics. + */ + protected char[] _uri = null; + + /** + * The default charset of the protocol. RFC 2277, 2396 + */ + protected static String _protocolCharset = "UTF-8"; + + /** + * The default charset of the document. RFC 2277, 2396 The platform's charset is + * used for the document by default. + */ + protected static String _documentCharset = null; + // Static initializer for _documentCharset + static { + Locale locale = Locale.getDefault(); + if (locale != null) { + // in order to support backward compatiblity + _documentCharset = LocaleToCharsetMap.getCharset(locale); + } else { + _documentCharset = (String) AccessController.doPrivileged(new GetPropertyAction("file.encoding")); + } + } + + /** + * The scheme. + */ + protected char[] _scheme = null; + + /** + * The opaque. + */ + protected char[] _opaque = null; + + /** + * The authority. + */ + protected char[] _authority = null; + + /** + * The userinfo. + */ + protected char[] _userinfo = null; + + /** + * The host. + */ + protected char[] _host = null; + + /** + * The port. + */ + protected int _port = -1; + + /** + * The path. + */ + protected char[] _path = null; + + /** + * The query. + */ + protected char[] _query = null; + + /** + * The fragment. + */ + protected char[] _fragment = null; + + /** + * The root path. + */ + protected static char[] rootPath = { '/' }; + + // ---------------------- Generous characters for each component validation + + /** + * The percent "%" character always has the reserved purpose of being the escape + * indicator, it must be escaped as "%25" in order to be used as data within a + * URI. + */ + protected static final BitSet percent = new BitSet(256); + // Static initializer for percent + static { + percent.set('%'); + } + + /** + * BitSet for digit. + *

+ *

+ * + *
+	 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
+	 * 
+ * + *
+ *

+ */ + protected static final BitSet digit = new BitSet(256); + // Static initializer for digit + static { + for (int i = '0'; i <= '9'; i++) { + digit.set(i); + } + } + + /** + * BitSet for alpha. + *

+ *

+ * + *
+	 * alpha = lowalpha | upalpha
+	 * 
+ * + *
+ *

+ */ + protected static final BitSet alpha = new BitSet(256); + // Static initializer for alpha + static { + for (int i = 'a'; i <= 'z'; i++) { + alpha.set(i); + } + for (int i = 'A'; i <= 'Z'; i++) { + alpha.set(i); + } + } + + /** + * BitSet for alphanum (join of alpha & digit). + *

+ *

+ * + *
+	 * alphanum = alpha | digit
+	 * 
+ * + *
+ *

+ */ + protected static final BitSet alphanum = new BitSet(256); + // Static initializer for alphanum + static { + alphanum.or(alpha); + alphanum.or(digit); + } + + /** + * BitSet for hex. + *

+ *

+ * + *
+	 * hex = digit | "A" | "B" | "C" | "D" | "E" | "F" | "a" | "b" | "c" | "d" | "e" | "f"
+	 * 
+ * + *
+ *

+ */ + protected static final BitSet hex = new BitSet(256); + // Static initializer for hex + static { + hex.or(digit); + for (int i = 'a'; i <= 'f'; i++) { + hex.set(i); + } + for (int i = 'A'; i <= 'F'; i++) { + hex.set(i); + } + } + + /** + * BitSet for escaped. + *

+ *

+ * + *
+	 * escaped       = "%" hex hex
+	 * 
+ * + *
+ *

+ */ + protected static final BitSet escaped = new BitSet(256); + // Static initializer for escaped + static { + escaped.or(percent); + escaped.or(hex); + } + + /** + * BitSet for mark. + *

+ *

+ * + *
+	 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
+	 * 
+ * + *
+ *

+ */ + protected static final BitSet mark = new BitSet(256); + // Static initializer for mark + static { + mark.set('-'); + mark.set('_'); + mark.set('.'); + mark.set('!'); + mark.set('~'); + mark.set('*'); + mark.set('\''); + mark.set('('); + mark.set(')'); + } + + /** + * Data characters that are allowed in a URI but do not have a reserved purpose + * are called unreserved. + *

+ *

+ * + *
+	 * unreserved = alphanum | mark
+	 * 
+ * + *
+ *

+ */ + protected static final BitSet unreserved = new BitSet(256); + // Static initializer for unreserved + static { + unreserved.or(alphanum); + unreserved.or(mark); + } + + /** + * BitSet for reserved. + *

+ *

+ * + *
+	 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | ","
+	 * 
+ * + *
+ *

+ */ + protected static final BitSet reserved = new BitSet(256); + // Static initializer for reserved + static { + reserved.set(';'); + reserved.set('/'); + reserved.set('?'); + reserved.set(':'); + reserved.set('@'); + reserved.set('&'); + reserved.set('='); + reserved.set('+'); + reserved.set('$'); + reserved.set(','); + } + + /** + * BitSet for uric. + *

+ *

+ * + *
+	 * uric = reserved | unreserved | escaped
+	 * 
+ * + *
+ *

+ */ + protected static final BitSet uric = new BitSet(256); + // Static initializer for uric + static { + uric.or(reserved); + uric.or(unreserved); + uric.or(escaped); + } + + /** + * BitSet for fragment (alias for uric). + *

+ *

+ * + *
+	 * fragment      = *uric
+	 * 
+ * + *
+ *

+ */ + protected static final BitSet fragment = uric; + + /** + * BitSet for query (alias for uric). + *

+ *

+ * + *
+	 * query         = *uric
+	 * 
+ * + *
+ *

+ */ + protected static final BitSet query = uric; + + /** + * BitSet for pchar. + *

+ *

+ * + *
+	 * pchar = unreserved | escaped | ":" | "@" | "&" | "=" | "+" | "$" | ","
+	 * 
+ * + *
+ *

+ */ + protected static final BitSet pchar = new BitSet(256); + // Static initializer for pchar + static { + pchar.or(unreserved); + pchar.or(escaped); + pchar.set(':'); + pchar.set('@'); + pchar.set('&'); + pchar.set('='); + pchar.set('+'); + pchar.set('$'); + pchar.set(','); + } + + /** + * BitSet for param (alias for pchar). + *

+ *

+ * + *
+	 * param         = *pchar
+	 * 
+ * + *
+ *

+ */ + protected static final BitSet param = pchar; + + /** + * BitSet for segment. + *

+ *

+ * + *
+	 * segment       = *pchar *( ";" param )
+	 * 
+ * + *
+ *

+ */ + protected static final BitSet segment = new BitSet(256); + // Static initializer for segment + static { + segment.or(pchar); + segment.set(';'); + segment.or(param); + } + + /** + * BitSet for path segments. + *

+ *

+ * + *
+	 * path_segments = segment *( "/" segment )
+	 * 
+ * + *
+ *

+ */ + protected static final BitSet path_segments = new BitSet(256); + // Static initializer for path_segments + static { + path_segments.set('/'); + path_segments.or(segment); + } + + /** + * URI absolute path. + *

+ *

+ * + *
+	 * abs_path      = "/"  path_segments
+	 * 
+ * + *
+ *

+ */ + protected static final BitSet abs_path = new BitSet(256); + // Static initializer for abs_path + static { + abs_path.set('/'); + abs_path.or(path_segments); + } + + /** + * URI bitset for encoding typical non-slash characters. + *

+ *

+ * + *
+	 * uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | ","
+	 * 
+ * + *
+ *

+ */ + protected static final BitSet uric_no_slash = new BitSet(256); + // Static initializer for uric_no_slash + static { + uric_no_slash.or(unreserved); + uric_no_slash.or(escaped); + uric_no_slash.set(';'); + uric_no_slash.set('?'); + uric_no_slash.set(';'); + uric_no_slash.set('@'); + uric_no_slash.set('&'); + uric_no_slash.set('='); + uric_no_slash.set('+'); + uric_no_slash.set('$'); + uric_no_slash.set(','); + } + + /** + * URI bitset that combines uric_no_slash and uric. + *

+ *

+ * + *
+	 * opaque_part = uric_no_slash * uric
+	 * 
+ * + *
+ *

+ */ + protected static final BitSet opaque_part = new BitSet(256); + // Static initializer for opaque_part + static { + opaque_part.or(uric_no_slash); + opaque_part.or(uric); + } + + /** + * URI bitset that combines absolute path and opaque part. + *

+ *

+ * + *
+	 * path          = [ abs_path | opaque_part ]
+	 * 
+ * + *
+ *

+ */ + protected static final BitSet path = new BitSet(256); + // Static initializer for path + static { + path.or(abs_path); + path.or(opaque_part); + } + + /** + * Port, a logical alias for digit. + */ + protected static final BitSet port = digit; + + /** + * Bitset that combines digit and dot fo IPv$address. + *

+ *

+ * + *
+	 * IPv4address   = 1*digit "." 1*digit "." 1*digit "." 1*digit
+	 * 
+ * + *
+ *

+ */ + protected static final BitSet IPv4address = new BitSet(256); + // Static initializer for IPv4address + static { + IPv4address.or(digit); + IPv4address.set('.'); + } + + /** + * RFC 2373. + *

+ *

+ * + *
+	 * IPv6address = hexpart [ ":" IPv4address ]
+	 * 
+ * + *
+ *

+ */ + protected static final BitSet IPv6address = new BitSet(256); + // Static initializer for IPv6address reference + static { + IPv6address.or(hex); // hexpart + IPv6address.set(':'); + IPv6address.or(IPv4address); + } + + /** + * RFC 2732, 2373. + *

+ *

+ * + *
+	 * IPv6reference   = "[" IPv6address "]"
+	 * 
+ * + *
+ *

+ */ + protected static final BitSet IPv6reference = new BitSet(256); + // Static initializer for IPv6reference + static { + IPv6reference.set('['); + IPv6reference.or(IPv6address); + IPv6reference.set(']'); + } + + /** + * BitSet for toplabel. + *

+ *

+ * + *
+	 * toplabel      = alpha | alpha *( alphanum | "-" ) alphanum
+	 * 
+ * + *
+ *

+ */ + protected static final BitSet toplabel = new BitSet(256); + // Static initializer for toplabel + static { + toplabel.or(alphanum); + toplabel.set('-'); + } + + /** + * BitSet for domainlabel. + *

+ *

+ * + *
+	 * domainlabel   = alphanum | alphanum *( alphanum | "-" ) alphanum
+	 * 
+ * + *
+ *

+ */ + protected static final BitSet domainlabel = toplabel; + + /** + * BitSet for hostname. + *

+ *

+ * + *
+	 * hostname      = *( domainlabel "." ) toplabel [ "." ]
+	 * 
+ * + *
+ *

+ */ + protected static final BitSet hostname = new BitSet(256); + // Static initializer for hostname + static { + hostname.or(toplabel); + // hostname.or(domainlabel); + hostname.set('.'); + } + + /** + * BitSet for host. + *

+ *

+ * + *
+	 * host = hostname | IPv4address | IPv6reference
+	 * 
+ * + *
+ *

+ */ + protected static final BitSet host = new BitSet(256); + // Static initializer for host + static { + host.or(hostname); + // host.or(IPv4address); + host.or(IPv6reference); // IPv4address + } + + /** + * BitSet for hostport. + *

+ *

+ * + *
+	 * hostport      = host [ ":" port ]
+	 * 
+ * + *
+ *

+ */ + protected static final BitSet hostport = new BitSet(256); + // Static initializer for hostport + static { + hostport.or(host); + hostport.set(':'); + hostport.or(port); + } + + /** + * Bitset for userinfo. + *

+ *

+ * + *
+	 * userinfo      = *( unreserved | escaped |
+	 *                    ";" | ":" | "&" | "=" | "+" | "$" | "," )
+	 * 
+ * + *
+ *

+ */ + protected static final BitSet userinfo = new BitSet(256); + // Static initializer for userinfo + static { + userinfo.or(unreserved); + userinfo.or(escaped); + userinfo.set(';'); + userinfo.set(':'); + userinfo.set('&'); + userinfo.set('='); + userinfo.set('+'); + userinfo.set('$'); + userinfo.set(','); + } + + /** + * BitSet for within the userinfo component like user and password. + */ + public static final BitSet within_userinfo = new BitSet(256); + // Static initializer for within_userinfo + static { + within_userinfo.or(userinfo); + within_userinfo.clear(';'); // reserved within authority + within_userinfo.clear(':'); + within_userinfo.clear('@'); + within_userinfo.clear('?'); + within_userinfo.clear('/'); + } + + /** + * Bitset for server. + *

+ *

+ * + *
+	 * server        = [ [ userinfo "@" ] hostport ]
+	 * 
+ * + *
+ *

+ */ + protected static final BitSet server = new BitSet(256); + // Static initializer for server + static { + server.or(userinfo); + server.set('@'); + server.or(hostport); + } + + /** + * BitSet for reg_name. + *

+ *

+ * + *
+	 * reg_name = 1 * (unreserved | escaped | "$" | "," | ";" | ":" | "@" | "&" | "=" | "+")
+	 * 
+ * + *
+ *

+ */ + protected static final BitSet reg_name = new BitSet(256); + // Static initializer for reg_name + static { + reg_name.or(unreserved); + reg_name.or(escaped); + reg_name.set('$'); + reg_name.set(','); + reg_name.set(';'); + reg_name.set(':'); + reg_name.set('@'); + reg_name.set('&'); + reg_name.set('='); + reg_name.set('+'); + } + + /** + * BitSet for authority. + *

+ *

+ * + *
+	 * authority = server | reg_name
+	 * 
+ * + *
+ *

+ */ + protected static final BitSet authority = new BitSet(256); + // Static initializer for authority + static { + authority.or(server); + authority.or(reg_name); + } + + /** + * BitSet for scheme. + *

+ *

+ * + *
+	 * scheme = alpha * (alpha | digit | "+" | "-" | ".")
+	 * 
+ * + *
+ *

+ */ + protected static final BitSet scheme = new BitSet(256); + // Static initializer for scheme + static { + scheme.or(alpha); + scheme.or(digit); + scheme.set('+'); + scheme.set('-'); + scheme.set('.'); + } + + /** + * BitSet for rel_segment. + *

+ *

+ * + *
+	 * rel_segment = 1 * (unreserved | escaped | ";" | "@" | "&" | "=" | "+" | "$" | ",")
+	 * 
+ * + *
+ *

+ */ + protected static final BitSet rel_segment = new BitSet(256); + // Static initializer for rel_segment + static { + rel_segment.or(unreserved); + rel_segment.or(escaped); + rel_segment.set(';'); + rel_segment.set('@'); + rel_segment.set('&'); + rel_segment.set('='); + rel_segment.set('+'); + rel_segment.set('$'); + rel_segment.set(','); + } + + /** + * BitSet for rel_path. + *

+ *

+ * + *
+	 * rel_path = rel_segment[abs_path]
+	 * 
+ * + *
+ *

+ */ + protected static final BitSet rel_path = new BitSet(256); + // Static initializer for rel_path + static { + rel_path.or(rel_segment); + rel_path.or(abs_path); + } + + /** + * BitSet for net_path. + *

+ *

+ * + *
+	 * net_path      = "//" authority [ abs_path ]
+	 * 
+ * + *
+ *

+ */ + protected static final BitSet net_path = new BitSet(256); + // Static initializer for net_path + static { + net_path.set('/'); + net_path.or(authority); + net_path.or(abs_path); + } + + /** + * BitSet for hier_part. + *

+ *

+ * + *
+	 * hier_part     = ( net_path | abs_path ) [ "?" query ]
+	 * 
+ * + *
+ *

+ */ + protected static final BitSet hier_part = new BitSet(256); + // Static initializer for hier_part + static { + hier_part.or(net_path); + hier_part.or(abs_path); + // hier_part.set('?'); aleady included + hier_part.or(query); + } + + /** + * BitSet for relativeURI. + *

+ *

+ * + *
+	 * relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ]
+	 * 
+ * + *
+ *

+ */ + protected static final BitSet relativeURI = new BitSet(256); + // Static initializer for relativeURI + static { + relativeURI.or(net_path); + relativeURI.or(abs_path); + relativeURI.or(rel_path); + // relativeURI.set('?'); aleady included + relativeURI.or(query); + } + + /** + * BitSet for absoluteURI. + *

+ *

+ * + *
+	 * absoluteURI   = scheme ":" ( hier_part | opaque_part )
+	 * 
+ * + *
+ *

+ */ + protected static final BitSet absoluteURI = new BitSet(256); + // Static initializer for absoluteURI + static { + absoluteURI.or(scheme); + absoluteURI.set(':'); + absoluteURI.or(hier_part); + absoluteURI.or(opaque_part); + } + + /** + * BitSet for URI-reference. + *

+ *

+ * + *
+	 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
+	 * 
+ * + *
+ *

+ */ + protected static final BitSet URI_reference = new BitSet(256); + // Static initializer for URI_reference + static { + URI_reference.or(absoluteURI); + URI_reference.or(relativeURI); + URI_reference.set('#'); + URI_reference.or(fragment); + } + + // ---------------------------- Characters disallowed within the URI syntax + // Excluded US-ASCII Characters are like control, space, delims and unwise + + /** + * BitSet for control. + */ + public static final BitSet control = new BitSet(256); + // Static initializer for control + static { + for (int i = 0; i <= 0x1F; i++) { + control.set(i); + } + control.set(0x7F); + } + + /** + * BitSet for space. + */ + public static final BitSet space = new BitSet(256); + // Static initializer for space + static { + space.set(0x20); + } + + /** + * BitSet for delims. + */ + public static final BitSet delims = new BitSet(256); + // Static initializer for delims + static { + delims.set('<'); + delims.set('>'); + delims.set('#'); + delims.set('%'); + delims.set('"'); + } + + /** + * BitSet for unwise. + */ + public static final BitSet unwise = new BitSet(256); + // Static initializer for unwise + static { + unwise.set('{'); + unwise.set('}'); + unwise.set('|'); + unwise.set('\\'); + unwise.set('^'); + unwise.set('['); + unwise.set(']'); + unwise.set('`'); + } + + /** + * Disallowed rel_path before escaping. + */ + public static final BitSet disallowed_rel_path = new BitSet(256); + // Static initializer for disallowed_rel_path + static { + disallowed_rel_path.or(uric); + disallowed_rel_path.andNot(rel_path); + } + + /** + * Disallowed opaque_part before escaping. + */ + public static final BitSet disallowed_opaque_part = new BitSet(256); + // Static initializer for disallowed_opaque_part + static { + disallowed_opaque_part.or(uric); + disallowed_opaque_part.andNot(opaque_part); + } + + // ----------------------- Characters allowed within and for each component + + /** + * Those characters that are allowed for the authority component. + */ + public static final BitSet allowed_authority = new BitSet(256); + // Static initializer for allowed_authority + static { + allowed_authority.or(authority); + allowed_authority.clear('%'); + } + + /** + * Those characters that are allowed for the opaque_part. + */ + public static final BitSet allowed_opaque_part = new BitSet(256); + // Static initializer for allowed_opaque_part + static { + allowed_opaque_part.or(opaque_part); + allowed_opaque_part.clear('%'); + } + + /** + * Those characters that are allowed for the reg_name. + */ + public static final BitSet allowed_reg_name = new BitSet(256); + // Static initializer for allowed_reg_name + static { + allowed_reg_name.or(reg_name); + // allowed_reg_name.andNot(percent); + allowed_reg_name.clear('%'); + } + + /** + * Those characters that are allowed for the userinfo component. + */ + public static final BitSet allowed_userinfo = new BitSet(256); + // Static initializer for allowed_userinfo + static { + allowed_userinfo.or(userinfo); + // allowed_userinfo.andNot(percent); + allowed_userinfo.clear('%'); + } + + /** + * Those characters that are allowed for within the userinfo component. + */ + public static final BitSet allowed_within_userinfo = new BitSet(256); + // Static initializer for allowed_within_userinfo + static { + allowed_within_userinfo.or(within_userinfo); + allowed_within_userinfo.clear('%'); + } + + /** + * Those characters that are allowed for the IPv6reference component. The + * characters '[', ']' in IPv6reference should be excluded. + */ + public static final BitSet allowed_IPv6reference = new BitSet(256); + // Static initializer for allowed_IPv6reference + static { + allowed_IPv6reference.or(IPv6reference); + // allowed_IPv6reference.andNot(unwise); + allowed_IPv6reference.clear('['); + allowed_IPv6reference.clear(']'); + } + + /** + * Those characters that are allowed for the host component. The characters '[', + * ']' in IPv6reference should be excluded. + */ + public static final BitSet allowed_host = new BitSet(256); + // Static initializer for allowed_host + static { + allowed_host.or(hostname); + allowed_host.or(allowed_IPv6reference); + } + + /** + * Those characters that are allowed for the authority component. + */ + public static final BitSet allowed_within_authority = new BitSet(256); + // Static initializer for allowed_within_authority + static { + allowed_within_authority.or(server); + allowed_within_authority.or(reg_name); + allowed_within_authority.clear(';'); + allowed_within_authority.clear(':'); + allowed_within_authority.clear('@'); + allowed_within_authority.clear('?'); + allowed_within_authority.clear('/'); + } + + /** + * Those characters that are allowed for the abs_path. + */ + public static final BitSet allowed_abs_path = new BitSet(256); + // Static initializer for allowed_abs_path + static { + allowed_abs_path.or(abs_path); + // allowed_abs_path.set('/'); // aleady included + allowed_abs_path.andNot(percent); + } + + /** + * Those characters that are allowed for the rel_path. + */ + public static final BitSet allowed_rel_path = new BitSet(256); + // Static initializer for allowed_rel_path + static { + allowed_rel_path.or(rel_path); + allowed_rel_path.clear('%'); + } + + /** + * Those characters that are allowed within the path. + */ + public static final BitSet allowed_within_path = new BitSet(256); + // Static initializer for allowed_within_path + static { + allowed_within_path.or(abs_path); + allowed_within_path.clear('/'); + allowed_within_path.clear(';'); + allowed_within_path.clear('='); + allowed_within_path.clear('?'); + } + + /** + * Those characters that are allowed for the query component. + */ + public static final BitSet allowed_query = new BitSet(256); + // Static initializer for allowed_query + static { + allowed_query.or(uric); + allowed_query.clear('%'); + } + + /** + * Those characters that are allowed within the query component. + */ + public static final BitSet allowed_within_query = new BitSet(256); + // Static initializer for allowed_within_query + static { + allowed_within_query.or(allowed_query); + allowed_within_query.andNot(reserved); // excluded 'reserved' + allowed_within_query.clear('#'); // avoid confict with the fragment + } + + /** + * Those characters that are allowed for the fragment component. + */ + public static final BitSet allowed_fragment = new BitSet(256); + // Static initializer for allowed_fragment + static { + allowed_fragment.or(uric); + allowed_fragment.clear('%'); + } + + // ------------------------------------------- Flags for this URI-reference + + // URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] + // absoluteURI = scheme ":" ( hier_part | opaque_part ) + protected boolean _is_hier_part; + protected boolean _is_opaque_part; + // relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ] + // hier_part = ( net_path | abs_path ) [ "?" query ] + protected boolean _is_net_path; + protected boolean _is_abs_path; + protected boolean _is_rel_path; + // net_path = "//" authority [ abs_path ] + // authority = server | reg_name + protected boolean _is_reg_name; + protected boolean _is_server; // = _has_server + // server = [ [ userinfo "@" ] hostport ] + // host = hostname | IPv4address | IPv6reference + protected boolean _is_hostname; + protected boolean _is_IPv4address; + protected boolean _is_IPv6reference; + + // ------------------------------------------ Character and escape encoding + + /** + * Encode with the default protocol charset. + * + * @param original the original character sequence + * @param allowed those characters that are allowed within a component + * @return URI character sequence + * @exception IOException null component or unsupported character encoding + */ + protected static char[] encode(String original, BitSet allowed) throws IOException { + + return encode(original, allowed, _protocolCharset); + } + + /** + * Encodes URI string. + * + * This is a two mapping, one from original characters to octets, and + * subsequently a second from octets to URI characters: + *

+ *

+ * + *
+	 *   original character sequence->octet sequence->URI character sequence
+	 * 
+ * + *
+ *

+ * + * An escaped octet is encoded as a character triplet, consisting of the percent + * character "%" followed by the two hexadecimal digits representing the octet + * code. For example, "%20" is the escaped encoding for the US-ASCII space + * character. + *

+ * Conversion from the local filesystem character set to UTF-8 will normally + * involve a two step process. First convert the local character set to the UCS; + * then convert the UCS to UTF-8. The first step in the process can be performed + * by maintaining a mapping table that includes the local character set code and + * the corresponding UCS code. The next step is to convert the UCS character + * code to the UTF-8 encoding. + *

+ * Mapping between vendor codepages can be done in a very similar manner as + * described above. + *

+ * The only time escape encodings can allowedly be made is when a URI is being + * created from its component parts. The escape and validate methods are + * internally performed within this method. + * + * @param original the original character sequence + * @param allowed those characters that are allowed within a component + * @param charset the protocol charset + * @return URI character sequence + * @exception IOException null component or unsupported character encoding + */ + protected static char[] encode(String original, BitSet allowed, String charset) throws IOException { + + // encode original to uri characters. + if (original == null) { + throw new IOException(/* IOException.PARSING, */ "URI: null"); + } + // escape octet to uri characters. + if (allowed == null) { + throw new IOException(/* IOException.PARSING, */ + "URI: null allowed characters"); + } + byte[] octets; + try { + octets = original.getBytes(charset); + } catch (UnsupportedEncodingException error) { + throw new IOException(/* IOException.UNSUPPORTED_ENCODING, */ "Unsupported Encoding: " + charset); + } + StringBuffer buf = new StringBuffer(octets.length); + for (int i = 0; i < octets.length; i++) { + char c = (char) octets[i]; + if (allowed.get(c)) { + buf.append(c); + } else { + buf.append('%'); + byte b = octets[i]; // use the original byte value + char hexadecimal = Character.forDigit((b >> 4) & 0xF, 16); + buf.append(Character.toUpperCase(hexadecimal)); // high + hexadecimal = Character.forDigit(b & 0xF, 16); + buf.append(Character.toUpperCase(hexadecimal)); // low + } + } + + return buf.toString().toCharArray(); + } + + /** + * Decode with the default protocol charset. + * + * @param component the URI character sequence + * @return original character sequence + * @exception IOException incomplete trailing escape pattern or unsupported + * character encoding + */ + protected static String decode(char[] component) throws IOException { + return decode(component, _protocolCharset); + } + + /** + * Decodes URI encoded string. + * + * This is a two mapping, one from URI characters to octets, and subsequently a + * second from octets to original characters: + *

+ *

+ * + *
+	 *   URI character sequence->octet sequence->original character sequence
+	 * 
+ * + *
+ *

+ * + * A URI must be separated into its components before the escaped characters + * within those components can be allowedly decoded. + *

+ * Notice that there is a chance that URI characters that are non UTF-8 may be + * parsed as valid UTF-8. A recent non-scientific analysis found that EUC + * encoded Japanese words had a 2.7% false reading; SJIS had a 0.0005% false + * reading; other encoding such as ASCII or KOI-8 have a 0% false reading. + *

+ * The percent "%" character always has the reserved purpose of being the escape + * indicator, it must be escaped as "%25" in order to be used as data within a + * URI. + *

+ * The unescape method is internally performed within this method. + * + * @param component the URI character sequence + * @param charset the protocol charset + * @return original character sequence + * @exception IOException incomplete trailing escape pattern or unsupported + * character encoding + */ + protected static String decode(char[] component, String charset) throws IOException { + + // unescape uri characters to octets + if (component == null) + return null; + + byte[] octets; + try { + octets = new String(component).getBytes(charset); + } catch (UnsupportedEncodingException error) { + throw new IOException(/* IOException.UNSUPPORTED_ENCODING, */ + "URI: not supported " + charset + " encoding"); + } + int length = octets.length; + int oi = 0; // output index + for (int ii = 0; ii < length; oi++) { + byte aByte = (byte) octets[ii++]; + if (aByte == '%' && ii + 2 <= length) { + byte high = (byte) Character.digit((char) octets[ii++], 16); + byte low = (byte) Character.digit((char) octets[ii++], 16); + if (high == -1 || low == -1) { + throw new IOException(/* IOException.ESCAPING, */ + "URI: incomplete trailing escape pattern"); + + } + aByte = (byte) ((high << 4) + low); + } + octets[oi] = (byte) aByte; + } + + String result; + try { + result = new String(octets, 0, oi, charset); + } catch (UnsupportedEncodingException error) { + throw new IOException(/* IOException.UNSUPPORTED_ENCODING, */ + "URI: not supported " + charset + " encoding"); + } + + return result; + } + + /** + * Pre-validate the unescaped URI string within a specific component. + * + * @param component the component string within the component + * @param disallowed those characters disallowed within the component + * @return if true, it doesn't have the disallowed characters if false, the + * component is undefined or an incorrect one + */ + protected boolean prevalidate(String component, BitSet disallowed) { + // prevalidate the given component by disallowed characters + if (component == null) { + return false; // undefined + } + char[] target = component.toCharArray(); + for (int i = 0; i < target.length; i++) { + if (disallowed.get(target[i])) { + return false; + } + } + return true; + } + + /** + * Validate the URI characters within a specific component. The component must + * be performed after escape encoding. Or it doesn't include escaped characters. + * + * @param component the characters sequence within the component + * @param generous those characters that are allowed within a component + * @return if true, it's the correct URI character sequence + */ + protected boolean validate(char[] component, BitSet generous) { + // validate each component by generous characters + return validate(component, 0, -1, generous); + } + + /** + * Validate the URI characters within a specific component. The component must + * be performed after escape encoding. Or it doesn't include escaped characters. + *

+ * It's not that much strict, generous. The strict validation might be performed + * before being called this method. + * + * @param component the characters sequence within the component + * @param soffset the starting offset of the given component + * @param eoffset the ending offset of the given component if -1, it means the + * length of the component + * @param generous those characters that are allowed within a component + * @return if true, it's the correct URI character sequence + * @throws NullPointerException null component + */ + protected boolean validate(char[] component, int soffset, int eoffset, BitSet generous) { + // validate each component by generous characters + if (eoffset == -1) { + eoffset = component.length - 1; + } + for (int i = soffset; i <= eoffset; i++) { + if (!generous.get(component[i])) + return false; + } + return true; + } + + /** + * In order to avoid any possilbity of conflict with non-ASCII characters, Parse + * a URI reference as a String with the character encoding of the + * local system or the document. + *

+ * The following line is the regular expression for breaking-down a URI + * reference into its components. + *

+ *

+ * + *
+	 *   ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
+	 *    12            3  4          5       6  7        8 9
+	 * 
+ * + *
+ *

+ * For example, matching the above expression to + * http://jakarta.apache.org/ietf/uri/#Related results in the following + * subexpression matches: + *

+ *

+ * + *
+	 *               $1 = http:
+	 *  scheme    =  $2 = http
+	 *               $3 = //jakarta.apache.org
+	 *  authority =  $4 = jakarta.apache.org
+	 *  path      =  $5 = /ietf/uri/
+	 *               $6 = 
+	 *  query     =  $7 = 
+	 *               $8 = #Related
+	 *  fragment  =  $9 = Related
+	 * 
+ * + *
+ *

+ * + * @param original the original character sequence + * @param escaped true if original is escaped + * @return the original character sequence + * @exception IOException + */ + protected void parseUriReference(String original, boolean escaped) throws IOException { + + // validate and contruct the URI character sequence + if (original == null || original.length() == 0) { + throw new IOException("URI-Reference required"); + } + + /** + * @ ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? + */ + String tmp = original.trim(); + + /** + * The length of the string sequence of characters. It may not be equal to the + * length of the byte array. + */ + int length = tmp.length(); + + /** + * Remove the delimiters like angle brackets around an URI. + */ + char[] firstDelimiter = { tmp.charAt(0) }; + if (validate(firstDelimiter, delims)) { + if (length >= 2) { + char[] lastDelimiter = { tmp.charAt(length - 1) }; + if (validate(lastDelimiter, delims)) { + tmp = tmp.substring(1, length - 1); + length = length - 2; + } + } + } + + /** + * The starting index + */ + int from = 0; + + /** + * The test flag whether the URI is started from the path component. + */ + boolean isStartedFromPath = false; + int atColon = tmp.indexOf(':'); + int atSlash = tmp.indexOf('/'); + if (atColon < 0 || (atSlash >= 0 && atSlash < atColon)) { + isStartedFromPath = true; + } + + /** + *

+ *

+ * + *
+		 *     @@@@@@@@
+		 *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
+		 * 
+ * + *
+ *

+ */ + int at = indexFirstOf(tmp, isStartedFromPath ? "/?#" : ":/?#", from); + if (at == -1) + at = 0; + + /** + * Parse the scheme. + *

+ *

+ * + *
+		 *  scheme    =  $2 = http
+		 *              @
+		 *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
+		 * 
+ * + *
+ *

+ */ + if (at < length && tmp.charAt(at) == ':') { + char[] target = tmp.substring(0, at).toLowerCase().toCharArray(); + if (validate(target, scheme)) { + _scheme = target; + } else { + throw new IOException("incorrect scheme"); + } + from = ++at; + } + + /** + * Parse the authority component. + *

+ *

+ * + *
+		 *  authority =  $4 = jakarta.apache.org
+		 *                  @@
+		 *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
+		 * 
+ * + *
+ *

+ */ + // Reset flags + _is_net_path = _is_abs_path = _is_rel_path = _is_hier_part = false; + if (0 <= at && at < length && tmp.charAt(at) == '/') { + // Set flag + _is_hier_part = true; + if (at + 2 < length && tmp.charAt(at + 1) == '/') { + // the temporary index to start the search from + int next = indexFirstOf(tmp, "/?#", at + 2); + if (next == -1) { + next = (tmp.substring(at + 2).length() == 0) ? at + 2 : tmp.length(); + } + parseAuthority(tmp.substring(at + 2, next), escaped); + from = at = next; + // Set flag + _is_net_path = true; + } + if (from == at) { + // Set flag + _is_abs_path = true; + } + } + + /** + * Parse the path component. + *

+ *

+ * + *
+		 *  path      =  $5 = /ietf/uri/
+		 *                                @@@@@@
+		 *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
+		 * 
+ * + *
+ *

+ */ + if (from < length) { + // rel_path = rel_segment [ abs_path ] + int next = indexFirstOf(tmp, "?#", from); + if (next == -1) { + next = tmp.length(); + } + if (!_is_abs_path) { + if (!escaped && prevalidate(tmp.substring(from, next), disallowed_rel_path) + || escaped && validate(tmp.substring(from, next).toCharArray(), rel_path)) { + // Set flag + _is_rel_path = true; + } else if (!escaped && prevalidate(tmp.substring(from, next), disallowed_opaque_part) + || escaped && validate(tmp.substring(from, next).toCharArray(), opaque_part)) { + // Set flag + _is_opaque_part = true; + } else { + // the path component may be empty + _path = null; + } + } + setPath(tmp.substring(from, next)); + at = next; + } + + /** + * Parse the query component. + *

+ *

+ * + *
+		 *  query     =  $7 = 
+		 *                                        @@@@@@@@@
+		 *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
+		 * 
+ * + *
+ *

+ */ + if (0 <= at && at + 1 < length && tmp.charAt(at) == '?') { + int next = tmp.indexOf('#', at + 1); + if (next == -1) { + next = tmp.length(); + } + _query = (escaped) ? tmp.substring(at + 1, next).toCharArray() + : encode(tmp.substring(at + 1, next), allowed_query); + at = next; + } + + /** + * Parse the fragment component. + *

+ *

+ * + *
+		 *  fragment  =  $9 = Related
+		 *                                                   @@@@@@@@
+		 *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
+		 * 
+ * + *
+ *

+ */ + if (0 <= at && at + 1 < length && tmp.charAt(at) == '#') { + _fragment = (escaped) ? tmp.substring(at + 1).toCharArray() + : encode(tmp.substring(at + 1), allowed_fragment); + } + + // set this URI. + setUriReference(); + } + + /** + * Get the earlier index that to be searched for the first occurrance in one of + * any of the given string. + * + * @param s the string to be indexed + * @param delims the delimiters used to index + * @return the earlier index if there are delimiters + */ + protected int indexFirstOf(String s, String delims) { + return indexFirstOf(s, delims, -1); + } + + /** + * Get the earlier index that to be searched for the first occurrance in one of + * any of the given string. + * + * @param s the string to be indexed + * @param delims the delimiters used to index + * @param offset the from index + * @return the earlier index if there are delimiters + */ + protected int indexFirstOf(String s, String delims, int offset) { + if (s == null || s.length() == 0) { + return -1; + } + if (delims == null || delims.length() == 0) { + return -1; + } + // check boundaries + if (offset < 0) { + offset = 0; + } else if (offset > s.length()) { + return -1; + } + // s is never null + int min = s.length(); + char[] delim = delims.toCharArray(); + for (int i = 0; i < delim.length; i++) { + int at = s.indexOf(delim[i], offset); + if (at >= 0 && at < min) { + min = at; + } + } + return (min == s.length()) ? -1 : min; + } + + /** + * Get the earlier index that to be searched for the first occurrance in one of + * any of the given array. + * + * @param s the character array to be indexed + * @param delim the delimiter used to index + * @return the ealier index if there are a delimiter + */ + protected int indexFirstOf(char[] s, char delim) { + return indexFirstOf(s, delim, 0); + } + + /** + * Get the earlier index that to be searched for the first occurrance in one of + * any of the given array. + * + * @param s the character array to be indexed + * @param delim the delimiter used to index + * @return the ealier index if there is a delimiter + */ + protected int indexFirstOf(char[] s, char delim, int offset) { + if (s == null || s.length == 0) { + return -1; + } + // check boundaries + if (offset < 0) { + offset = 0; + } else if (offset > s.length) { + return -1; + } + for (int i = offset; i < s.length; i++) { + if (s[i] == delim) { + return i; + } + } + return -1; + } + + /** + * Parse the authority component. + * + * @param original the original character sequence of authority component + * @param escaped true if original is escaped + * @exception IOException + */ + protected void parseAuthority(String original, boolean escaped) throws IOException { + + // Reset flags + _is_reg_name = _is_server = _is_hostname = _is_IPv4address = _is_IPv6reference = false; + + boolean has_port = true; + int from = 0; + int next = original.indexOf('@'); + if (next != -1) { // neither -1 and 0 + // each protocol extented from URI supports the specific userinfo + _userinfo = (escaped) ? original.substring(0, next).toCharArray() + : encode(original.substring(0, next), allowed_userinfo); + from = next + 1; + } + next = original.indexOf('[', from); + if (next >= from) { + next = original.indexOf(']', from); + if (next == -1) { + throw new IOException(/* IOException.PARSING, */ "URI: IPv6reference"); + } else { + next++; + } + // In IPv6reference, '[', ']' should be excluded + _host = (escaped) ? original.substring(from, next).toCharArray() + : encode(original.substring(from, next), allowed_IPv6reference); + // Set flag + _is_IPv6reference = true; + } else { // only for !_is_IPv6reference + next = original.indexOf(':', from); + if (next == -1) { + next = original.length(); + has_port = false; + } + // REMINDME: it doesn't need the pre-validation + _host = original.substring(from, next).toCharArray(); + if (validate(_host, IPv4address)) { + // Set flag + _is_IPv4address = true; + } else if (validate(_host, hostname)) { + // Set flag + _is_hostname = true; + } else { + // Set flag + _is_reg_name = true; + } + } + if (_is_reg_name) { + // Reset flags for a server-based naming authority + _is_server = _is_hostname = _is_IPv4address = _is_IPv6reference = false; + // set a registry-based naming authority + _authority = (escaped) ? original.toString().toCharArray() : encode(original.toString(), allowed_reg_name); + } else { + if (original.length() - 1 > next && has_port && original.charAt(next) == ':') { // not empty + from = next + 1; + try { + _port = Integer.parseInt(original.substring(from)); + } catch (NumberFormatException error) { + throw new IOException(/* IOException.PARSING, */ + "URI: invalid port number"); + } + } + // set a server-based naming authority + StringBuffer buf = new StringBuffer(); + if (_userinfo != null) { // has_userinfo + buf.append(_userinfo); + buf.append('@'); + } + if (_host != null) { + buf.append(_host); + if (_port != -1) { + buf.append(':'); + buf.append(_port); + } + } + _authority = buf.toString().toCharArray(); + // Set flag + _is_server = true; + } + } + + /** + * Once it's parsed successfully, set this URI. + * + * @see #getRawURI + */ + protected void setUriReference() { + // set _uri + StringBuffer buf = new StringBuffer(); + // ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? + if (_scheme != null) { + buf.append(_scheme); + buf.append(':'); + } + if (_is_net_path) { + buf.append("//"); + if (_authority != null) { // has_authority + if (_userinfo != null) { // by default, remove userinfo part + if (_host != null) { + buf.append(_host); + if (_port != -1) { + buf.append(':'); + buf.append(_port); + } + } + } else { + buf.append(_authority); + } + } + } + if (_opaque != null && _is_opaque_part) { + buf.append(_opaque); + } else if (_path != null) { + // _is_hier_part or _is_relativeURI + if (_path.length != 0) { + buf.append(_path); + } + } + if (_query != null) { // has_query + buf.append('?'); + buf.append(_query); + } + if (_fragment != null) { // has_fragment + buf.append('#'); + buf.append(_fragment); + } + + _uri = buf.toString().toCharArray(); + } + + // ----------------------------------------------------------- Test methods + + /** + * Tell whether or not this URI is absolute. + * + * @return true iif this URI is absoluteURI + */ + public boolean isAbsoluteURI() { + return (_scheme != null); + } + + /** + * Tell whether or not this URI is relative. + * + * @return true iif this URI is relativeURI + */ + public boolean isRelativeURI() { + return (_scheme == null); + } + + /** + * Tell whether or not the absoluteURI of this URI is hier_part. + * + * @return true iif the absoluteURI is hier_part + */ + public boolean isHierPart() { + return _is_hier_part; + } + + /** + * Tell whether or not the absoluteURI of this URI is opaque_part. + * + * @return true iif the absoluteURI is opaque_part + */ + public boolean isOpaquePart() { + return _is_opaque_part; + } + + /** + * Tell whether or not the relativeURI or heir_part of this URI is net_path. + * It's the same function as the has_authority() method. + * + * @return true iif the relativeURI or heir_part is net_path + * @see #hasAuthority + */ + public boolean isNetPath() { + return _is_net_path || (_authority != null); + } + + /** + * Tell whether or not the relativeURI or hier_part of this URI is abs_path. + * + * @return true iif the relativeURI or hier_part is abs_path + */ + public boolean isAbsPath() { + return _is_abs_path; + } + + /** + * Tell whether or not the relativeURI of this URI is rel_path. + * + * @return true iif the relativeURI is rel_path + */ + public boolean isRelPath() { + return _is_rel_path; + } + + /** + * Tell whether or not this URI has authority. It's the same function as the + * is_net_path() method. + * + * @return true iif this URI has authority + * @see #isNetPath + */ + public boolean hasAuthority() { + return (_authority != null) || _is_net_path; + } + + /** + * Tell whether or not the authority component of this URI is reg_name. + * + * @return true iif the authority component is reg_name + */ + public boolean isRegName() { + return _is_reg_name; + } + + /** + * Tell whether or not the authority component of this URI is server. + * + * @return true iif the authority component is server + */ + public boolean isServer() { + return _is_server; + } + + /** + * Tell whether or not this URI has userinfo. + * + * @return true iif this URI has userinfo + */ + public boolean hasUserinfo() { + return (_userinfo != null); + } + + /** + * Tell whether or not the host part of this URI is hostname. + * + * @return true iif the host part is hostname + */ + public boolean isHostname() { + return _is_hostname; + } + + /** + * Tell whether or not the host part of this URI is IPv4address. + * + * @return true iif the host part is IPv4address + */ + public boolean isIPv4address() { + return _is_IPv4address; + } + + /** + * Tell whether or not the host part of this URI is IPv6reference. + * + * @return true iif the host part is IPv6reference + */ + public boolean isIPv6reference() { + return _is_IPv6reference; + } + + /** + * Tell whether or not this URI has query. + * + * @return true iif this URI has query + */ + public boolean hasQuery() { + return (_query != null); + } + + /** + * Tell whether or not this URI has fragment. + * + * @return true iif this URI has fragment + */ + public boolean hasFragment() { + return (_fragment != null); + } + + // ---------------------------------------------------------------- Charset + + /** + * Set the default charset of the protocol. + *

+ * The character set used to store files SHALL remain a local decision and MAY + * depend on the capability of local operating systems. Prior to the exchange of + * URIs they SHOULD be converted into a ISO/IEC 10646 format and UTF-8 encoded. + * This approach, while allowing international exchange of URIs, will still + * allow backward compatibility with older systems because the code set + * positions for ASCII characters are identical to the one byte sequence in + * UTF-8. + *

+ * An individual URI scheme may require a single charset, define a default + * charset, or provide a way to indicate the charset used. + * + * @param charset the default charset for each protocol + */ + public static void setProtocolCharset(String charset) { + _protocolCharset = charset; + } + + /** + * Get the default charset of the protocol. + *

+ * An individual URI scheme may require a single charset, define a default + * charset, or provide a way to indicate the charset used. + *

+ * To work globally either requires support of a number of character sets and to + * be able to convert between them, or the use of a single preferred character + * set. For support of global compatibility it is STRONGLY RECOMMENDED that + * clients and servers use UTF-8 encoding when exchanging URIs. + * + * @return the charset string + */ + public static String getProtocolCharset() { + return _protocolCharset; + } + + /** + * Set the default charset of the document. + *

+ * Notice that it will be possible to contain mixed characters (e.g. + * ftp://host/KoreanNamespace/ChineseResource). To handle the Bi-directional + * display of these character sets, the protocol charset could be simply used + * again. Because it's not yet implemented that the insertion of BIDI control + * characters at different points during composition is extracted. + * + * @param charset the default charset for the document + */ + public static void setDocumentCharset(String charset) { + _documentCharset = charset; + } + + /** + * Get the default charset of the document. + * + * @return the charset string + */ + public static String getDocumentCharset() { + return _documentCharset; + } + + // ------------------------------------------------------------- The scheme + + /** + * Get the scheme. + * + * @return the scheme + */ + public char[] getRawScheme() { + return _scheme; + } + + /** + * Get the scheme. + * + * @return the scheme null if undefined scheme + */ + public String getScheme() { + return (_scheme == null) ? null : new String(_scheme); + } + + // ---------------------------------------------------------- The authority + + /** + * Set the authority. It can be one type of server, hostport, hostname, + * IPv4address, IPv6reference and reg_name. + *

+ *

+ * + *
+	 * authority = server | reg_name
+	 * 
+ * + *
+ *

+ * + * @param escapedAuthority the raw escaped authority + * @exception IOException + * @throws NullPointerException null authority + */ + public void setRawAuthority(char[] escapedAuthority) throws IOException { + parseAuthority(new String(escapedAuthority), true); + setUriReference(); + } + + /** + * Set the authority. It can be one type of server, hostport, hostname, + * IPv4address, IPv6reference and reg_name. Note that there is no setAuthority + * method by the escape encoding reason. + * + * @param escapedAuthority the escaped authority string + * @exception IOException + */ + public void setEscapedAuthority(String escapedAuthority) throws IOException { + + parseAuthority(escapedAuthority, true); + setUriReference(); + } + + /** + * Get the raw-escaped authority. + * + * @return the raw-escaped authority + */ + public char[] getRawAuthority() { + return _authority; + } + + /** + * Get the escaped authority. + * + * @return the escaped authority + */ + public String getEscapedAuthority() { + return (_authority == null) ? null : new String(_authority); + } + + /** + * Get the authority. + * + * @return the authority + * @exception IOException + * @see #decode + */ + public String getAuthority() throws IOException { + return (_authority == null) ? null : decode(_authority); + } + + // ----------------------------------------------------------- The userinfo + + /** + * Get the raw-escaped userinfo. + * + * @return the raw-escaped userinfo + * @see #getAuthority + */ + public char[] getRawUserinfo() { + return _userinfo; + } + + /** + * Get the escaped userinfo. + * + * @return the escaped userinfo + * @see #getAuthority + */ + public String getEscapedUserinfo() { + return (_userinfo == null) ? null : new String(_userinfo); + } + + /** + * Get the userinfo. + * + * @return the userinfo + * @exception IOException + * @see #decode + * @see #getAuthority + */ + public String getUserinfo() throws IOException { + return (_userinfo == null) ? null : decode(_userinfo); + } + + // --------------------------------------------------------------- The host + + /** + * Get the host. + *

+ *

+ * + *
+	 * host = hostname | IPv4address | IPv6reference
+	 * 
+ * + *
+ *

+ * + * @return the host + * @see #getAuthority + */ + public char[] getRawHost() { + return _host; + } + + /** + * Get the host. + *

+ *

+ * + *
+	 * host = hostname | IPv4address | IPv6reference
+	 * 
+ * + *
+ *

+ * + * @return the host + * @exception IOException + * @see #decode + * @see #getAuthority + */ + public String getHost() throws IOException { + return decode(_host); + } + + // --------------------------------------------------------------- The port + + /** + * Get the port. In order to get the specfic default port, the specific + * protocol-supported class extended from the URI class should be used. It has + * the server-based naming authority. + * + * @return the port if -1, it has the default port for the scheme or the + * server-based naming authority is not supported in the specific URI. + */ + public int getPort() { + return _port; + } + + // --------------------------------------------------------------- The path + + /** + * Set the path. The method couldn't be used by API programmers. + * + * @param path the path string + * @exception IOException set incorrectly or fragment only + * @see #encode + */ + protected void setPath(String path) throws IOException { + + // set path + if (_is_net_path || _is_abs_path) { + _path = encode(path, allowed_abs_path); + } else if (_is_rel_path) { + StringBuffer buff = new StringBuffer(path.length()); + int at = path.indexOf('/'); + if (at > 0) { // never 0 + buff.append(encode(path.substring(0, at), allowed_rel_path)); + buff.append(encode(path.substring(at), allowed_abs_path)); + } else { + buff.append(encode(path, allowed_rel_path)); + } + _path = buff.toString().toCharArray(); + } else if (_is_opaque_part) { + _opaque = encode(path, allowed_opaque_part); + } else { + throw new IOException(/* IOException.PARSING, */"URI: incorrect path"); + } + } + + /** + * Resolve the base and relative path. + * + * @param base_path a character array of the base_path + * @param rel_path a character array of the rel_path + * @return the resolved path + */ + protected char[] resolvePath(char[] base_path, char[] rel_path) { + + // REMINDME: paths are never null + String base = (base_path == null) ? "" : new String(base_path); + int at = base.lastIndexOf('/'); + if (at != -1) { + base_path = base.substring(0, at + 1).toCharArray(); + } + // _path could be empty + if (rel_path == null || rel_path.length == 0) { + return normalize(base_path); + } else if (rel_path[0] == '/') { + return rel_path; + } else { + StringBuffer buff = new StringBuffer(base.length() + rel_path.length); + if (at != -1) { + buff.append(base.substring(0, at + 1)); + buff.append(rel_path); + } + return normalize(buff.toString().toCharArray()); + } + } + + /** + * Get the raw-escaped current hierarchy level in the given path. If the last + * namespace is a collection, the slash mark ('/') should be ended with at the + * last character of the path string. + * + * @param path the path + * @return the current hierarchy level + * @exception IOException no hierarchy level + */ + protected char[] getRawCurrentHierPath(char[] path) throws IOException { + + if (_is_opaque_part) { + throw new IOException(/* IOException.PARSING, */ "URI: no hierarchy level"); + } + if (path == null) { + throw new IOException(/* IOException.PARSING, */ "URI: emtpy path"); + } + String buff = new String(path); + int first = buff.indexOf('/'); + int last = buff.lastIndexOf('/'); + if (last == 0) { + return rootPath; + } else if (first != last && last != -1) { + return buff.substring(0, last).toCharArray(); + } + // FIXME: it could be a document on the server side + return path; + } + + /** + * Get the raw-escaped current hierarchy level. + * + * @return the raw-escaped current hierarchy level + * @exception IOException no hierarchy level + */ + public char[] getRawCurrentHierPath() throws IOException { + return (_path == null) ? null : getRawCurrentHierPath(_path); + } + + /** + * Get the escaped current hierarchy level. + * + * @return the escaped current hierarchy level + * @exception IOException no hierarchy level + */ + public String getEscapedCurrentHierPath() throws IOException { + char[] path = getRawCurrentHierPath(); + return (path == null) ? null : new String(path); + } + + /** + * Get the current hierarchy level. + * + * @return the current hierarchy level + * @exception IOException + * @see #decode + */ + public String getCurrentHierPath() throws IOException { + char[] path = getRawCurrentHierPath(); + return (path == null) ? null : decode(path); + } + + /** + * Get the level above the this hierarchy level. + * + * @return the raw above hierarchy level + * @exception IOException + */ + public char[] getRawAboveHierPath() throws IOException { + char[] path = getRawCurrentHierPath(); + return (path == null) ? null : getRawCurrentHierPath(path); + } + + /** + * Get the level above the this hierarchy level. + * + * @return the raw above hierarchy level + * @exception IOException + */ + public String getEscapedAboveHierPath() throws IOException { + char[] path = getRawAboveHierPath(); + return (path == null) ? null : new String(path); + } + + /** + * Get the level above the this hierarchy level. + * + * @return the above hierarchy level + * @exception IOException + * @see #decode + */ + public String getAboveHierPath() throws IOException { + char[] path = getRawAboveHierPath(); + return (path == null) ? null : decode(path); + } + + /** + * Get the raw-escaped path. + *

+ *

+ * + *
+	 *   path          = [ abs_path | opaque_part ]
+	 * 
+ * + *
+ *

+ * + * @return the raw-escaped path + */ + public char[] getRawPath() { + return _is_opaque_part ? _opaque : _path; + } + + /** + * Get the escaped path. + *

+ *

+ * + *
+	 *   path          = [ abs_path | opaque_part ]
+	 *   abs_path      = "/"  path_segments 
+	 *   opaque_part   = uric_no_slash *uric
+	 * 
+ * + *
+ *

+ * + * @return the escaped path string + */ + public String getEscapedPath() { + char[] path = getRawPath(); + return (path == null) ? null : new String(path); + } + + /** + * Get the path. + *

+ *

+ * + *
+	 *   path          = [ abs_path | opaque_part ]
+	 * 
+ * + *
+ *

+ * + * @return the path string + * @exception IOException + * @see #decode + */ + public String getPath() throws IOException { + char[] path = getRawPath(); + return (path == null) ? null : decode(path); + } + + /** + * Get the raw-escaped basename of the path. + * + * @return the raw-escaped basename + */ + public char[] getRawName() { + if (_path == null) + return null; + + int at = 0; + for (int i = _path.length - 1; i >= 0; i--) { + if (_path[i] == '/') { + at = i + 1; + break; + } + } + int len = _path.length - at; + char[] basename = new char[len]; + System.arraycopy(_path, at, basename, 0, len); + return basename; + } + + /** + * Get the escaped basename of the path. + * + * @return the escaped basename string + */ + public String getEscapedName() { + char[] basename = getRawName(); + return (basename == null) ? null : new String(basename); + } + + /** + * Get the basename of the path. + * + * @return the basename string + * @exception IOException incomplete trailing escape pattern Or unsupported + * character encoding + * @see #decode + */ + public String getName() throws IOException { + char[] basename = getRawName(); + return (basename == null) ? null : decode(getRawName()); + } + + // ----------------------------------------------------- The path and query + + /** + * Get the raw-escaped path and query. + * + * @return the raw-escaped path and query + */ + public char[] getRawPathQuery() { + + if (_path == null && _query == null) { + return null; + } + StringBuffer buff = new StringBuffer(); + if (_path != null) { + buff.append(_path); + } + if (_query != null) { + buff.append('?'); + buff.append(_query); + } + return buff.toString().toCharArray(); + } + + /** + * Get the escaped query. + * + * @return the escaped path and query string + */ + public String getEscapedPathQuery() { + char[] rawPathQuery = getRawPathQuery(); + return (rawPathQuery == null) ? null : new String(rawPathQuery); + } + + /** + * Get the path and query. + * + * @return the path and query string. + * @exception IOException incomplete trailing escape pattern Or unsupported + * character encoding + * @see #decode + */ + public String getPathQuery() throws IOException { + char[] rawPathQuery = getRawPathQuery(); + return (rawPathQuery == null) ? null : decode(rawPathQuery); + } + + // -------------------------------------------------------------- The query + + /** + * Set the raw-escaped query. + * + * @param escapedQuery the raw-escaped query + * @exception IOException escaped query not valid + * @throws NullPointerException null query + */ + public void setRawQuery(char[] escapedQuery) throws IOException { + if (!validate(escapedQuery, query)) + throw new IOException(/* IOException.ESCAPING, */ + "URI: escaped query not valid"); + _query = escapedQuery; + setUriReference(); + } + + /** + * Set the escaped query string. + * + * @param escapedQuery the escaped query string + * @exception IOException escaped query not valid + * @throws NullPointerException null query + */ + public void setEscapedQuery(String escapedQuery) throws IOException { + setRawQuery(escapedQuery.toCharArray()); + } + + /** + * Set the query. When a query string is not misunderstood the reserved special + * characters ("&", "=", "+", ",", and "$") within a query component, it is + * recommended to use in encoding the whole query with this method. + * + * @param query the query string. + * @exception IOException incomplete trailing escape pattern Or unsupported + * character encoding + * @throws NullPointerException null query + * @see #encode + */ + public void setQuery(String query) throws IOException { + setRawQuery(encode(query, allowed_query)); + } + + /** + * Get the raw-escaped query. + * + * @return the raw-escaped query + */ + public char[] getRawQuery() { + return _query; + } + + /** + * Get the escaped query. + * + * @return the escaped query string + */ + public String getEscapedQuery() { + return (_query == null) ? null : new String(_query); + } + + /** + * Get the query. + * + * @return the query string. + * @exception IOException incomplete trailing escape pattern Or unsupported + * character encoding + * @see #decode + */ + public String getQuery() throws IOException { + return (_query == null) ? null : decode(_query); + } + + // ----------------------------------------------------------- The fragment + + /** + * Set the raw-escaped fragment. + * + * @param escapedFragment the raw-escaped fragment + * @exception IOException escaped fragment not valid + * @throws NullPointerException null fragment + */ + public void setRawFragment(char[] escapedFragment) throws IOException { + if (!validate(escapedFragment, fragment)) + throw new IOException(/* IOException.ESCAPING, */ + "URI: escaped fragment not valid"); + _fragment = escapedFragment; + setUriReference(); + } + + /** + * Set the escaped fragment string. + * + * @param escapedFragment the escaped fragment string + * @exception IOException escaped fragment not valid + * @throws NullPointerException null fragment + */ + public void setEscapedFragment(String escapedFragment) throws IOException { + char[] fragmentSequence = escapedFragment.toCharArray(); + if (!validate(fragmentSequence, fragment)) + throw new IOException(/* IOException.ESCAPING, */ + "URI: escaped fragment not valid"); + _fragment = fragmentSequence; + setUriReference(); + } + + /** + * Set the fragment. + * + * @param the fragment string. + * @exception IOException Or unsupported character encoding + * @throws NullPointerException null fragment + */ + public void setFragment(String fragment) throws IOException { + _fragment = encode(fragment, allowed_fragment); + setUriReference(); + } + + /** + * Get the raw-escaped fragment. + *

+ * The optional fragment identifier is not part of a URI, but is often used in + * conjunction with a URI. + *

+ * The format and interpretation of fragment identifiers is dependent on the + * media type [RFC2046] of the retrieval result. + *

+ * A fragment identifier is only meaningful when a URI reference is intended for + * retrieval and the result of that retrieval is a document for which the + * identified fragment is consistently defined. + * + * @return the raw-escaped fragment + */ + public char[] getRawFragment() { + return _fragment; + } + + /** + * Get the escaped fragment. + * + * @return the escaped fragment string + */ + public String getEscapedFragment() { + return (_fragment == null) ? null : new String(_fragment); + } + + /** + * Get the fragment. + * + * @return the fragment string + * @exception IOException incomplete trailing escape pattern Or unsupported + * character encoding + * @see #decode + */ + public String getFragment() throws IOException { + return (_fragment == null) ? null : decode(_fragment); + } + + // ------------------------------------------------------------- Utilities + + /** + * Normalize the given hier path part. + * + * @param path the path to normalize + * @return the normalized path + */ + protected char[] normalize(char[] path) { + + if (path == null) + return null; + + String normalized = new String(path); + boolean endsWithSlash = true; + // precondition + if (!normalized.endsWith("/")) { + normalized += '/'; + endsWithSlash = false; + } + if (normalized.endsWith("/./") || normalized.endsWith("/../")) { + endsWithSlash = true; + } + // Resolve occurrences of "/./" in the normalized path + while (true) { + int at = normalized.indexOf("/./"); + if (at == -1) { + break; + } + normalized = normalized.substring(0, at) + normalized.substring(at + 2); + } + // Resolve occurrences of "/../" in the normalized path + while (true) { + int at = normalized.indexOf("/../"); + if (at == -1) { + break; + } + if (at == 0) { + normalized = "/"; + break; + } + int backward = normalized.lastIndexOf('/', at - 1); + if (backward == -1) { + // consider the rel_path + normalized = normalized.substring(at + 4); + } else { + normalized = normalized.substring(0, backward) + normalized.substring(at + 3); + } + } + // Resolve occurrences of "//" in the normalized path + while (true) { + int at = normalized.indexOf("//"); + if (at == -1) { + break; + } + normalized = normalized.substring(0, at) + normalized.substring(at + 1); + } + if (!endsWithSlash && normalized.endsWith("/")) { + normalized = normalized.substring(0, normalized.length() - 1); + } else if (endsWithSlash && !normalized.endsWith("/")) { + normalized = normalized + "/"; + } + // Set the normalized path that we have completed + return normalized.toCharArray(); + } + + /** + * Normalize the path part of this URI. + */ + public void normalize() { + _path = normalize(_path); + } + + /** + * Test if the first array is equal to the second array. + * + * @param first the first character array + * @param second the second character array + * @return true if they're equal + */ + protected boolean equals(char[] first, char[] second) { + + if (first == null && second == null) { + return true; + } + if (first == null || second == null) { + return false; + } + if (first.length != second.length) { + return false; + } + for (int i = 0; i < first.length; i++) { + if (first[i] != second[i]) { + return false; + } + } + return true; + } + + /** + * Test an object if this URI is equal to another. + * + * @param obj an object to compare + * @return true if two URI objects are equal + */ + public boolean equals(Object obj) { + + // normalize and test each components + if (obj == this) { + return true; + } + if (!(obj instanceof URI)) { + return false; + } + URI another = (URI) obj; + // scheme + if (!equals(_scheme, another._scheme)) { + return false; + } + // is_opaque_part or is_hier_part? and opaque + if (!equals(_opaque, another._opaque)) { + return false; + } + // is_hier_part + // has_authority + if (!equals(_authority, another._authority)) { + return false; + } + // path + if (!equals(_path, another._path)) { + return false; + } + // has_query + if (!equals(_query, another._query)) { + return false; + } + // has_fragment? should be careful of the only fragment case. + if (!equals(_fragment, another._fragment)) { + return false; + } + return true; + } + + // ---------------------------------------------------------- Serialization + + /** + * Write the content of this URI. + * + * @param oos the object-output stream + */ + protected void writeObject(java.io.ObjectOutputStream oos) throws IOException { + + oos.defaultWriteObject(); + } + + /** + * Read a URI. + * + * @param ois the object-input stream + */ + protected void readObject(java.io.ObjectInputStream ois) throws ClassNotFoundException, IOException { + + ois.defaultReadObject(); + } + + // ------------------------------------------------------------- Comparison + + /** + * Compare this URI to another object. + * + * @param obj the object to be compared. + * @return 0, if it's same, -1, if failed, first being compared with in the + * authority component + * @exception ClassCastException not URI argument + * @throws NullPointerException null object + */ + public int compareTo(Object obj) { + + URI another = (URI) obj; + if (!equals(_authority, another.getRawAuthority())) + return -1; + return toString().compareTo(another.toString()); + } + + // ------------------------------------------------------------------ Clone + + /** + * Create and return a copy of this object, the URI-reference containing the + * userinfo component. Notice that the whole URI-reference including the + * userinfo component counld not be gotten as a String. + *

+ * To copy the identical URI object including the userinfo + * component, it should be used. + * + * @return a clone of this instance + */ + public synchronized Object clone() { + + URI instance = new URI(); + + instance._uri = _uri; + instance._scheme = _scheme; + instance._opaque = _opaque; + instance._authority = _authority; + instance._userinfo = _userinfo; + instance._host = _host; + instance._port = _port; + instance._path = _path; + instance._query = _query; + instance._fragment = _fragment; + // flags + instance._is_hier_part = _is_hier_part; + instance._is_opaque_part = _is_opaque_part; + instance._is_net_path = _is_net_path; + instance._is_abs_path = _is_abs_path; + instance._is_rel_path = _is_rel_path; + instance._is_reg_name = _is_reg_name; + instance._is_server = _is_server; + instance._is_hostname = _is_hostname; + instance._is_IPv4address = _is_IPv4address; + instance._is_IPv6reference = _is_IPv6reference; + + return instance; + } + + // ------------------------------------------------------------ Get the URI + + /** + * It can be gotten the URI character sequence. It's raw-escaped. For the + * purpose of the protocol to be transported, it will be useful. + *

+ * It is clearly unwise to use a URL that contains a password which is intended + * to be secret. In particular, the use of a password within the 'userinfo' + * component of a URL is strongly disrecommended except in those rare cases + * where the 'password' parameter is intended to be public. + *

+ * When you want to get each part of the userinfo, you need to use the specific + * methods in the specific URL. It depends on the specific URL. + * + * @return URI character sequence + */ + public char[] getRawURI() { + return _uri; + } + + /** + * It can be gotten the URI character sequence. It's escaped. For the purpose of + * the protocol to be transported, it will be useful. + * + * @return the URI string + */ + public String getEscapedURI() { + return (_uri == null) ? null : new String(_uri); + } + + /** + * It can be gotten the URI character sequence. + * + * @return the URI string + * @exception IOException incomplete trailing escape pattern Or unsupported + * character encoding + * @see #decode + */ + public String getURI() throws IOException { + return (_uri == null) ? null : decode(_uri); + } + + /** + * Get the escaped URI string. + *

+ * On the document, the URI-reference form is only used without the userinfo + * component like http://jakarta.apache.org/ by the security reason. But the + * URI-reference form with the userinfo component could be parsed. + *

+ * In other words, this URI and any its subclasses must not expose the + * URI-reference expression with the userinfo component like + * http://user:password@hostport/restricted_zone.
+ * It means that the API client programmer should extract each user and password + * to access manually. Probably it will be supported in the each subclass, + * however, not a whole URI-reference expression. + * + * @return the URI string + * @see #clone() + */ + public String toString() { + return getEscapedURI(); + } + + // ------------------------------------------------------------ Inner class + + /** + * A mapping to determine the (somewhat arbitrarily) preferred charset for a + * given locale. Supports all locales recognized in JDK 1.1. + *

+ * The distribution of this class is Servlets.com. It was originally written by + * Jason Hunter [jhunter at acm.org] and used by with permission. + */ + public static class LocaleToCharsetMap { + + private static Hashtable map; + static { + map = new Hashtable(); + map.put("ar", "ISO-8859-6"); + map.put("be", "ISO-8859-5"); + map.put("bg", "ISO-8859-5"); + map.put("ca", "ISO-8859-1"); + map.put("cs", "ISO-8859-2"); + map.put("da", "ISO-8859-1"); + map.put("de", "ISO-8859-1"); + map.put("el", "ISO-8859-7"); + map.put("en", "ISO-8859-1"); + map.put("es", "ISO-8859-1"); + map.put("et", "ISO-8859-1"); + map.put("fi", "ISO-8859-1"); + map.put("fr", "ISO-8859-1"); + map.put("hr", "ISO-8859-2"); + map.put("hu", "ISO-8859-2"); + map.put("is", "ISO-8859-1"); + map.put("it", "ISO-8859-1"); + map.put("iw", "ISO-8859-8"); + map.put("ja", "Shift_JIS"); + map.put("ko", "EUC-KR"); + map.put("lt", "ISO-8859-2"); + map.put("lv", "ISO-8859-2"); + map.put("mk", "ISO-8859-5"); + map.put("nl", "ISO-8859-1"); + map.put("no", "ISO-8859-1"); + map.put("pl", "ISO-8859-2"); + map.put("pt", "ISO-8859-1"); + map.put("ro", "ISO-8859-2"); + map.put("ru", "ISO-8859-5"); + map.put("sh", "ISO-8859-5"); + map.put("sk", "ISO-8859-2"); + map.put("sl", "ISO-8859-2"); + map.put("sq", "ISO-8859-2"); + map.put("sr", "ISO-8859-5"); + map.put("sv", "ISO-8859-1"); + map.put("tr", "ISO-8859-9"); + map.put("uk", "ISO-8859-5"); + map.put("zh", "GB2312"); + map.put("zh_TW", "Big5"); + } + + /** + * Get the preferred charset for the given locale. + * + * @param locale the locale + * @return the preferred charset or null if the locale is not recognized + */ + public static String getCharset(Locale locale) { + // try for an full name match (may include country) + String charset = (String) map.get(locale.toString()); + if (charset != null) + return charset; + + // if a full name didn't match, try just the language + charset = (String) map.get(locale.getLanguage()); + return charset; // may be null + } + + } } - -- cgit v1.2.3