1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * https://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.commons.codec.net;
19
20 import java.io.UnsupportedEncodingException;
21 import java.nio.charset.Charset;
22 import java.nio.charset.UnsupportedCharsetException;
23 import java.util.Objects;
24
25 import org.apache.commons.codec.DecoderException;
26 import org.apache.commons.codec.EncoderException;
27 import org.apache.commons.codec.binary.StringUtils;
28
29 /**
30 * Implements methods common to all codecs defined in RFC 1522.
31 * <p>
32 * <a href="https://www.ietf.org/rfc/rfc1522.txt">RFC 1522</a> describes techniques to allow the encoding of non-ASCII text in various portions of a RFC 822 [2]
33 * message header, in a manner which is unlikely to confuse existing message handling software.
34 * </p>
35 * <p>
36 * This class is immutable and thread-safe.
37 * </p>
38 *
39 * @see <a href="https://www.ietf.org/rfc/rfc1522.txt">MIME (Multipurpose Internet Mail Extensions) Part Two: Message Header Extensions for Non-ASCII Text</a>
40 * @since 1.3
41 */
42 abstract class RFC1522Codec {
43
44 /** Separator. */
45 protected static final char SEP = '?';
46
47 /** Prefix. */
48 protected static final String POSTFIX = "?=";
49
50 /** Postfix. */
51 protected static final String PREFIX = "=?";
52
53 /**
54 * The default Charset used for string decoding and encoding.
55 */
56 protected final Charset charset;
57
58 RFC1522Codec(final Charset charset) {
59 this.charset = Objects.requireNonNull(charset, "charset");
60 }
61
62 /**
63 * Applies an RFC 1522 compliant decoding scheme to the given string of text.
64 * <p>
65 * This method processes the "encoded-word" header common to all the RFC 1522 codecs and then invokes {@link #doDecoding(byte[])} method of a concrete class
66 * to perform the specific decoding.
67 * </p>
68 *
69 * @param text a string to decode.
70 * @return A new decoded String or {@code null} if the input is {@code null}.
71 * @throws DecoderException thrown if there is an error condition during the decoding process.
72 * @throws UnsupportedEncodingException thrown if charset specified in the "encoded-word" header is not supported.
73 */
74 protected String decodeText(final String text) throws DecoderException, UnsupportedEncodingException {
75 if (text == null) {
76 return null;
77 }
78 if (!text.startsWith(PREFIX) || !text.endsWith(POSTFIX)) {
79 throw new DecoderException("RFC 1522 violation: malformed encoded content");
80 }
81 final int terminator = text.length() - 2;
82 int from = 2;
83 int to = text.indexOf(SEP, from);
84 if (to == terminator) {
85 throw new DecoderException("RFC 1522 violation: charset token not found");
86 }
87 final String charset = text.substring(from, to);
88 if (charset.isEmpty()) {
89 throw new DecoderException("RFC 1522 violation: charset not specified");
90 }
91 from = to + 1;
92 to = text.indexOf(SEP, from);
93 if (to == terminator) {
94 throw new DecoderException("RFC 1522 violation: encoding token not found");
95 }
96 final String encoding = text.substring(from, to);
97 if (!getEncoding().equalsIgnoreCase(encoding)) {
98 throw new DecoderException("This codec cannot decode " + encoding + " encoded content");
99 }
100 from = to + 1;
101 to = text.indexOf(SEP, from);
102 byte[] data = StringUtils.getBytesUsAscii(text.substring(from, to));
103 data = doDecoding(data);
104 return new String(data, charset);
105 }
106
107 /**
108 * Decodes an array of bytes using the defined encoding scheme.
109 *
110 * @param bytes Data to be decoded.
111 * @return a byte array that contains decoded data.
112 * @throws DecoderException A decoder exception is thrown if a Decoder encounters a failure condition during the decode process.
113 */
114 protected abstract byte[] doDecoding(byte[] bytes) throws DecoderException;
115
116 /**
117 * Encodes an array of bytes using the defined encoding scheme.
118 *
119 * @param bytes Data to be encoded.
120 * @return A byte array containing the encoded data.
121 * @throws EncoderException thrown if the Encoder encounters a failure condition during the encoding process.
122 */
123 protected abstract byte[] doEncoding(byte[] bytes) throws EncoderException;
124
125 /**
126 * Applies an RFC 1522 compliant encoding scheme to the given string of text with the given charset.
127 * <p>
128 * This method constructs the "encoded-word" header common to all the RFC 1522 codecs and then invokes {@link #doEncoding(byte[])} method of a concrete
129 * class to perform the specific encoding.
130 * </p>
131 *
132 * @param text a string to encode.
133 * @param charset a charset to be used.
134 * @return RFC 1522 compliant "encoded-word".
135 * @throws EncoderException thrown if there is an error condition during the Encoding process.
136 * @see Charset
137 */
138 protected String encodeText(final String text, final Charset charset) throws EncoderException {
139 if (text == null) {
140 return null;
141 }
142 final StringBuilder buffer = new StringBuilder();
143 buffer.append(PREFIX);
144 buffer.append(charset);
145 buffer.append(SEP);
146 buffer.append(getEncoding());
147 buffer.append(SEP);
148 buffer.append(StringUtils.newStringUsAscii(doEncoding(text.getBytes(charset))));
149 buffer.append(POSTFIX);
150 return buffer.toString();
151 }
152
153 /**
154 * Applies an RFC 1522 compliant encoding scheme to the given string of text with the given charset.
155 * <p>
156 * This method constructs the "encoded-word" header common to all the RFC 1522 codecs and then invokes {@link #doEncoding(byte[])} method of a concrete
157 * class to perform the specific encoding.
158 * </p>
159 *
160 * @param text a string to encode.
161 * @param charsetName the charset to use.
162 * @return RFC 1522 compliant "encoded-word".
163 * @throws EncoderException thrown if there is an error condition during the Encoding process.
164 * @throws UnsupportedCharsetException if charset is not available.
165 * @see Charset
166 */
167 protected String encodeText(final String text, final String charsetName) throws EncoderException {
168 if (text == null) {
169 // Don't attempt charsetName conversion.
170 return null;
171 }
172 return encodeText(text, Charset.forName(charsetName));
173 }
174
175 /**
176 * Gets the default Charset name used for string decoding and encoding.
177 *
178 * @return the default Charset name.
179 * @since 1.7
180 */
181 public Charset getCharset() {
182 return charset;
183 }
184
185 /**
186 * Gets the default Charset name used for string decoding and encoding.
187 *
188 * @return the default Charset name.
189 */
190 public String getDefaultCharset() {
191 return charset.name();
192 }
193
194 /**
195 * Returns the codec name (referred to as encoding in the RFC 1522).
196 *
197 * @return name of the codec.
198 */
199 protected abstract String getEncoding();
200 }