001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * https://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.codec.net; 019 020import java.io.ByteArrayOutputStream; 021import java.io.UnsupportedEncodingException; 022import java.util.BitSet; 023 024import org.apache.commons.codec.BinaryDecoder; 025import org.apache.commons.codec.BinaryEncoder; 026import org.apache.commons.codec.CharEncoding; 027import org.apache.commons.codec.DecoderException; 028import org.apache.commons.codec.EncoderException; 029import org.apache.commons.codec.StringDecoder; 030import org.apache.commons.codec.StringEncoder; 031import org.apache.commons.codec.binary.StringUtils; 032 033/** 034 * Implements the 'www-form-urlencoded' encoding scheme, also misleadingly known as URL encoding. 035 * <p> 036 * This codec is meant to be a replacement for standard Java classes {@link java.net.URLEncoder} and 037 * {@link java.net.URLDecoder} on older Java platforms, as these classes in Java versions below 038 * 1.4 rely on the platform's default charset encoding. 039 * </p> 040 * <p> 041 * This class is thread-safe as of 1.11 042 * </p> 043 * 044 * @see <a href="https://www.w3.org/TR/html4/interact/forms.html#h-17.13.4.1">Chapter 17.13.4 Form content types</a> 045 * of the <a href="https://www.w3.org/TR/html4/">HTML 4.01 Specification</a> 046 * 047 * @since 1.2 048 */ 049public class URLCodec implements BinaryEncoder, BinaryDecoder, StringEncoder, StringDecoder { 050 051 /** 052 * Release 1.5 made this field final. 053 */ 054 protected static final byte ESCAPE_CHAR = '%'; 055 056 /** 057 * BitSet of www-form-url safe characters. 058 * This is a copy of the internal BitSet which is now used for the conversion. 059 * Changes to this field are ignored. 060 * 061 * @deprecated 1.11 Will be removed in 2.0 (CODEC-230) 062 */ 063 @Deprecated 064 protected static final BitSet WWW_FORM_URL; 065 066 private static final BitSet WWW_FORM_URL_SAFE = new BitSet(256); 067 068 // Static initializer for www_form_url 069 static { 070 // alpha characters 071 for (int i = 'a'; i <= 'z'; i++) { 072 WWW_FORM_URL_SAFE.set(i); 073 } 074 for (int i = 'A'; i <= 'Z'; i++) { 075 WWW_FORM_URL_SAFE.set(i); 076 } 077 // numeric characters 078 for (int i = '0'; i <= '9'; i++) { 079 WWW_FORM_URL_SAFE.set(i); 080 } 081 // special chars 082 WWW_FORM_URL_SAFE.set('-'); 083 WWW_FORM_URL_SAFE.set('_'); 084 WWW_FORM_URL_SAFE.set('.'); 085 WWW_FORM_URL_SAFE.set('*'); 086 // blank to be replaced with + 087 WWW_FORM_URL_SAFE.set(' '); 088 089 // Create a copy in case anyone (ab)uses it 090 WWW_FORM_URL = (BitSet) WWW_FORM_URL_SAFE.clone(); 091 } 092 093 /** 094 * Decodes an array of URL safe 7-bit characters into an array of original bytes. Escaped characters are converted 095 * back to their original representation. 096 * 097 * @param bytes 098 * array of URL safe characters. 099 * @return array of original bytes. 100 * @throws DecoderException 101 * Thrown if URL decoding is unsuccessful. 102 */ 103 public static final byte[] decodeUrl(final byte[] bytes) throws DecoderException { 104 if (bytes == null) { 105 return null; 106 } 107 final ByteArrayOutputStream buffer = new ByteArrayOutputStream(); 108 for (int i = 0; i < bytes.length; i++) { 109 final int b = bytes[i]; 110 if (b == '+') { 111 buffer.write(' '); 112 } else if (b == ESCAPE_CHAR) { 113 try { 114 final int u = Utils.digit16(bytes[++i]); 115 final int l = Utils.digit16(bytes[++i]); 116 buffer.write((char) ((u << 4) + l)); 117 } catch (final ArrayIndexOutOfBoundsException e) { 118 throw new DecoderException("Invalid URL encoding: ", e); 119 } 120 } else { 121 buffer.write(b); 122 } 123 } 124 return buffer.toByteArray(); 125 } 126 127 /** 128 * Encodes an array of bytes into an array of URL safe 7-bit characters. Unsafe characters are escaped. 129 * 130 * @param urlsafe 131 * bitset of characters deemed URL safe. 132 * @param bytes 133 * array of bytes to convert to URL safe characters. 134 * @return array of bytes containing URL safe characters. 135 */ 136 public static final byte[] encodeUrl(BitSet urlsafe, final byte[] bytes) { 137 if (bytes == null) { 138 return null; 139 } 140 if (urlsafe == null) { 141 urlsafe = WWW_FORM_URL_SAFE; 142 } 143 144 final ByteArrayOutputStream buffer = new ByteArrayOutputStream(); 145 for (final byte c : bytes) { 146 int b = c; 147 if (b < 0) { 148 b = 256 + b; 149 } 150 if (urlsafe.get(b)) { 151 if (b == ' ') { 152 b = '+'; 153 } 154 buffer.write(b); 155 } else { 156 buffer.write(ESCAPE_CHAR); 157 final char hex1 = Utils.hexChar(b >> 4); 158 final char hex2 = Utils.hexChar(b); 159 buffer.write(hex1); 160 buffer.write(hex2); 161 } 162 } 163 return buffer.toByteArray(); 164 } 165 166 /** 167 * The default charset used for string decoding and encoding. 168 * 169 * @deprecated TODO: This field will be changed to a private final Charset in 2.0. (CODEC-126) 170 */ 171 @Deprecated 172 protected volatile String charset; // added volatile: see CODEC-232 173 174 /** 175 * Default constructor. 176 */ 177 public URLCodec() { 178 this(CharEncoding.UTF_8); 179 } 180 181 /** 182 * Constructor which allows for the selection of a default charset. 183 * 184 * @param charset the default string charset to use. 185 */ 186 public URLCodec(final String charset) { 187 this.charset = charset; 188 } 189 190 /** 191 * Decodes an array of URL safe 7-bit characters into an array of original bytes. Escaped characters are converted 192 * back to their original representation. 193 * 194 * @param bytes 195 * array of URL safe characters. 196 * @return array of original bytes. 197 * @throws DecoderException 198 * Thrown if URL decoding is unsuccessful. 199 */ 200 @Override 201 public byte[] decode(final byte[] bytes) throws DecoderException { 202 return decodeUrl(bytes); 203 } 204 205 /** 206 * Decodes a URL safe object into its original form. Escaped characters are converted back to their original 207 * representation. 208 * 209 * @param obj 210 * URL safe object to convert into its original form. 211 * @return original object. 212 * @throws DecoderException 213 * Thrown if the argument is not a {@code String} or {@code byte[]}. Thrown if a failure 214 * condition is encountered during the decode process. 215 */ 216 @Override 217 public Object decode(final Object obj) throws DecoderException { 218 if (obj == null) { 219 return null; 220 } 221 if (obj instanceof byte[]) { 222 return decode((byte[]) obj); 223 } 224 if (obj instanceof String) { 225 return decode((String) obj); 226 } 227 throw new DecoderException("Objects of type " + obj.getClass().getName() + " cannot be URL decoded"); 228 } 229 230 /** 231 * Decodes a URL safe string into its original form using the default string charset. Escaped characters are 232 * converted back to their original representation. 233 * 234 * @param str 235 * URL safe string to convert into its original form. 236 * @return original string. 237 * @throws DecoderException 238 * Thrown if URL decoding is unsuccessful. 239 * @see #getDefaultCharset() 240 */ 241 @Override 242 public String decode(final String str) throws DecoderException { 243 if (str == null) { 244 return null; 245 } 246 try { 247 return decode(str, getDefaultCharset()); 248 } catch (final UnsupportedEncodingException e) { 249 throw new DecoderException(e.getMessage(), e); 250 } 251 } 252 253 /** 254 * Decodes a URL safe string into its original form using the specified encoding. Escaped characters are converted 255 * back to their original representation. 256 * 257 * @param str 258 * URL safe string to convert into its original form. 259 * @param charsetName 260 * the original string charset. 261 * @return original string. 262 * @throws DecoderException 263 * Thrown if URL decoding is unsuccessful. 264 * @throws UnsupportedEncodingException 265 * Thrown if charset is not supported. 266 */ 267 public String decode(final String str, final String charsetName) 268 throws DecoderException, UnsupportedEncodingException { 269 if (str == null) { 270 return null; 271 } 272 return new String(decode(StringUtils.getBytesUsAscii(str)), charsetName); 273 } 274 275 /** 276 * Encodes an array of bytes into an array of URL safe 7-bit characters. Unsafe characters are escaped. 277 * 278 * @param bytes 279 * array of bytes to convert to URL safe characters. 280 * @return array of bytes containing URL safe characters. 281 */ 282 @Override 283 public byte[] encode(final byte[] bytes) { 284 return encodeUrl(WWW_FORM_URL_SAFE, bytes); 285 } 286 287 /** 288 * Encodes an object into its URL safe form. Unsafe characters are escaped. 289 * 290 * @param obj 291 * string to convert to a URL safe form. 292 * @return URL safe object. 293 * @throws EncoderException 294 * Thrown if URL encoding is not applicable to objects of this type or if encoding is unsuccessful. 295 */ 296 @Override 297 public Object encode(final Object obj) throws EncoderException { 298 if (obj == null) { 299 return null; 300 } 301 if (obj instanceof byte[]) { 302 return encode((byte[]) obj); 303 } 304 if (obj instanceof String) { 305 return encode((String) obj); 306 } 307 throw new EncoderException("Objects of type " + obj.getClass().getName() + " cannot be URL encoded"); 308 } 309 310 /** 311 * Encodes a string into its URL safe form using the default string charset. Unsafe characters are escaped. 312 * 313 * @param str 314 * string to convert to a URL safe form. 315 * @return URL safe string. 316 * @throws EncoderException 317 * Thrown if URL encoding is unsuccessful. 318 * @see #getDefaultCharset() 319 */ 320 @Override 321 public String encode(final String str) throws EncoderException { 322 if (str == null) { 323 return null; 324 } 325 try { 326 return encode(str, getDefaultCharset()); 327 } catch (final UnsupportedEncodingException e) { 328 throw new EncoderException(e.getMessage(), e); 329 } 330 } 331 332 /** 333 * Encodes a string into its URL safe form using the specified string charset. Unsafe characters are escaped. 334 * 335 * @param str 336 * string to convert to a URL safe form. 337 * @param charsetName 338 * the charset for str. 339 * @return URL safe string. 340 * @throws UnsupportedEncodingException 341 * Thrown if charset is not supported. 342 */ 343 public String encode(final String str, final String charsetName) throws UnsupportedEncodingException { 344 if (str == null) { 345 return null; 346 } 347 return StringUtils.newStringUsAscii(encode(str.getBytes(charsetName))); 348 } 349 350 /** 351 * The default charset used for string decoding and encoding. 352 * 353 * @return the default string charset. 354 */ 355 public String getDefaultCharset() { 356 return this.charset; 357 } 358 359 /** 360 * The {@code String} encoding used for decoding and encoding. 361 * 362 * @return the encoding. 363 * @deprecated Use {@link #getDefaultCharset()}, will be removed in 2.0. 364 */ 365 @Deprecated 366 public String getEncoding() { 367 return this.charset; 368 } 369 370}