View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.codec.net;
19  
20  import java.io.UnsupportedEncodingException;
21  import java.nio.charset.Charset;
22  import java.nio.charset.UnsupportedCharsetException;
23  import java.util.Objects;
24  
25  import org.apache.commons.codec.DecoderException;
26  import org.apache.commons.codec.EncoderException;
27  import org.apache.commons.codec.binary.StringUtils;
28  
29  /**
30   * Implements methods common to all codecs defined in RFC 1522.
31   * <p>
32   * <a href="https://www.ietf.org/rfc/rfc1522.txt">RFC 1522</a> describes techniques to allow the encoding of non-ASCII text in various portions of a RFC 822 [2]
33   * message header, in a manner which is unlikely to confuse existing message handling software.
34   * </p>
35   * <p>
36   * This class is immutable and thread-safe.
37   * </p>
38   *
39   * @see <a href="https://www.ietf.org/rfc/rfc1522.txt">MIME (Multipurpose Internet Mail Extensions) Part Two: Message Header Extensions for Non-ASCII Text</a>
40   * @since 1.3
41   */
42  abstract class RFC1522Codec {
43  
44      /** Separator. */
45      protected static final char SEP = '?';
46  
47      /** Prefix. */
48      protected static final String POSTFIX = "?=";
49  
50      /** Postfix. */
51      protected static final String PREFIX = "=?";
52  
53      /**
54       * The default Charset used for string decoding and encoding.
55       */
56      protected final Charset charset;
57  
58      RFC1522Codec(final Charset charset) {
59          this.charset = Objects.requireNonNull(charset, "charset");
60      }
61  
62      /**
63       * Applies an RFC 1522 compliant decoding scheme to the given string of text.
64       * <p>
65       * This method processes the "encoded-word" header common to all the RFC 1522 codecs and then invokes {@link #doDecoding(byte[])} method of a concrete class
66       * to perform the specific decoding.
67       * </p>
68       *
69       * @param text a string to decode.
70       * @return A new decoded String or {@code null} if the input is {@code null}.
71       * @throws DecoderException             thrown if there is an error condition during the decoding process.
72       * @throws UnsupportedEncodingException thrown if charset specified in the "encoded-word" header is not supported.
73       */
74      protected String decodeText(final String text) throws DecoderException, UnsupportedEncodingException {
75          if (text == null) {
76              return null;
77          }
78          if (!text.startsWith(PREFIX) || !text.endsWith(POSTFIX)) {
79              throw new DecoderException("RFC 1522 violation: malformed encoded content");
80          }
81          final int terminator = text.length() - 2;
82          int from = 2;
83          int to = text.indexOf(SEP, from);
84          if (to == terminator) {
85              throw new DecoderException("RFC 1522 violation: charset token not found");
86          }
87          final String charset = text.substring(from, to);
88          if (charset.isEmpty()) {
89              throw new DecoderException("RFC 1522 violation: charset not specified");
90          }
91          from = to + 1;
92          to = text.indexOf(SEP, from);
93          if (to == terminator) {
94              throw new DecoderException("RFC 1522 violation: encoding token not found");
95          }
96          final String encoding = text.substring(from, to);
97          if (!getEncoding().equalsIgnoreCase(encoding)) {
98              throw new DecoderException("This codec cannot decode " + encoding + " encoded content");
99          }
100         from = to + 1;
101         to = text.indexOf(SEP, from);
102         byte[] data = StringUtils.getBytesUsAscii(text.substring(from, to));
103         data = doDecoding(data);
104         return new String(data, charset);
105     }
106 
107     /**
108      * Decodes an array of bytes using the defined encoding scheme.
109      *
110      * @param bytes Data to be decoded.
111      * @return a byte array that contains decoded data.
112      * @throws DecoderException A decoder exception is thrown if a Decoder encounters a failure condition during the decode process.
113      */
114     protected abstract byte[] doDecoding(byte[] bytes) throws DecoderException;
115 
116     /**
117      * Encodes an array of bytes using the defined encoding scheme.
118      *
119      * @param bytes Data to be encoded.
120      * @return A byte array containing the encoded data.
121      * @throws EncoderException thrown if the Encoder encounters a failure condition during the encoding process.
122      */
123     protected abstract byte[] doEncoding(byte[] bytes) throws EncoderException;
124 
125     /**
126      * Applies an RFC 1522 compliant encoding scheme to the given string of text with the given charset.
127      * <p>
128      * This method constructs the "encoded-word" header common to all the RFC 1522 codecs and then invokes {@link #doEncoding(byte[])} method of a concrete
129      * class to perform the specific encoding.
130      * </p>
131      *
132      * @param text    a string to encode.
133      * @param charset a charset to be used.
134      * @return RFC 1522 compliant "encoded-word".
135      * @throws EncoderException thrown if there is an error condition during the Encoding process.
136      * @see Charset
137      */
138     protected String encodeText(final String text, final Charset charset) throws EncoderException {
139         if (text == null) {
140             return null;
141         }
142         final StringBuilder buffer = new StringBuilder();
143         buffer.append(PREFIX);
144         buffer.append(charset);
145         buffer.append(SEP);
146         buffer.append(getEncoding());
147         buffer.append(SEP);
148         buffer.append(StringUtils.newStringUsAscii(doEncoding(text.getBytes(charset))));
149         buffer.append(POSTFIX);
150         return buffer.toString();
151     }
152 
153     /**
154      * Applies an RFC 1522 compliant encoding scheme to the given string of text with the given charset.
155      * <p>
156      * This method constructs the "encoded-word" header common to all the RFC 1522 codecs and then invokes {@link #doEncoding(byte[])} method of a concrete
157      * class to perform the specific encoding.
158      * </p>
159      *
160      * @param text        a string to encode.
161      * @param charsetName the charset to use.
162      * @return RFC 1522 compliant "encoded-word".
163      * @throws EncoderException            thrown if there is an error condition during the Encoding process.
164      * @throws UnsupportedCharsetException if charset is not available.
165      * @see Charset
166      */
167     protected String encodeText(final String text, final String charsetName) throws EncoderException {
168         if (text == null) {
169             // Don't attempt charsetName conversion.
170             return null;
171         }
172         return encodeText(text, Charset.forName(charsetName));
173     }
174 
175     /**
176      * Gets the default Charset name used for string decoding and encoding.
177      *
178      * @return the default Charset name.
179      * @since 1.7
180      */
181     public Charset getCharset() {
182         return charset;
183     }
184 
185     /**
186      * Gets the default Charset name used for string decoding and encoding.
187      *
188      * @return the default Charset name.
189      */
190     public String getDefaultCharset() {
191         return charset.name();
192     }
193 
194     /**
195      * Returns the codec name (referred to as encoding in the RFC 1522).
196      *
197      * @return name of the codec.
198      */
199     protected abstract String getEncoding();
200 }