1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * https://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.commons.codec.binary;
19
20 import java.math.BigInteger;
21 import java.util.Arrays;
22 import java.util.Objects;
23
24 import org.apache.commons.codec.CodecPolicy;
25
26 /**
27 * Provides Base64 encoding and decoding as defined by <a href="https://www.ietf.org/rfc/rfc2045">RFC 2045 Multipurpose Internet Mail Extensions (MIME) Part
28 * One: Format of Internet Message Bodies</a> and portions of <a href="https://datatracker.ietf.org/doc/html/rfc4648">RFC 4648 The Base16, Base32, and Base64
29 * Data Encodings</a>
30 *
31 * <p>
32 * This class implements <a href="https://www.ietf.org/rfc/rfc2045#section-6.8">RFC 2045 6.8. Base64 Content-Transfer-Encoding</a>.
33 * </p>
34 * <p>
35 * The class can be parameterized in the following manner with its {@link Builder}:
36 * </p>
37 * <ul>
38 * <li>URL-safe mode: Default off.</li>
39 * <li>Line length: Default 76. Line length that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data.
40 * <li>Line separator: Default is CRLF ({@code "\r\n"})</li>
41 * <li>Strict or lenient decoding policy; default is {@link CodecPolicy#LENIENT}.</li>
42 * <li>Custom decoding table.</li>
43 * <li>Custom encoding table.</li>
44 * <li>Padding; defaults is {@code '='}.</li>
45 * </ul>
46 * <p>
47 * The URL-safe parameter is only applied to encode operations. Decoding seamlessly handles both modes, see also
48 * {@code Builder#setDecodeTableFormat(DecodeTableFormat)}.
49 * </p>
50 * <p>
51 * Since this class operates directly on byte streams, and not character streams, it is hard-coded to only encode/decode character encodings which are
52 * compatible with the lower 127 ASCII chart (ISO-8859-1, Windows-1252, UTF-8, etc).
53 * </p>
54 * <p>
55 * This class is thread-safe.
56 * </p>
57 * <p>
58 * To configure a new instance, use a {@link Builder}. For example:
59 * </p>
60 *
61 * <pre>
62 * Base64 base64 = Base64.builder()
63 * .setDecodingPolicy(CodecPolicy.LENIENT) // default is lenient, null resets to default
64 * .setEncodeTable(customEncodeTable) // default is built in, null resets to default
65 * .setLineLength(0) // default is none
66 * .setLineSeparator('\r', '\n') // default is CR LF, null resets to default
67 * .setPadding('=') // default is '='
68 * .setUrlSafe(false) // default is false
69 * .get()
70 * </pre>
71 *
72 * @see Base64InputStream
73 * @see Base64OutputStream
74 * @see <a href="https://www.ietf.org/rfc/rfc2045">RFC 2045 Multipurpose Internet Mail Extensions (MIME) Part One: Format of Internet Message Bodies</a>
75 * @see <a href="https://datatracker.ietf.org/doc/html/rfc4648">RFC 4648 The Base16, Base32, and Base64 Data Encodings</a>
76 * @since 1.0
77 */
78 public class Base64 extends BaseNCodec {
79
80 /**
81 * Builds {@link Base64} instances.
82 *
83 * <p>
84 * To configure a new instance, use a {@link Builder}. For example:
85 * </p>
86 *
87 * <pre>
88 * Base64 base64 = Base64.builder()
89 * .setCodecPolicy(CodecPolicy.LENIENT) // default is lenient, null resets to default
90 * .setEncodeTable(customEncodeTable) // default is built in, null resets to default
91 * .setLineLength(0) // default is none
92 * .setLineSeparator('\r', '\n') // default is CR LF, null resets to default
93 * .setPadding('=') // default is '='
94 * .setUrlSafe(false) // default is false
95 * .get()
96 * </pre>
97 *
98 * @since 1.17.0
99 */
100 public static class Builder extends AbstractBuilder<Base64, Builder> {
101
102 /**
103 * Constructs a new instance.
104 */
105 public Builder() {
106 super(STANDARD_ENCODE_TABLE);
107 setDecodeTableRaw(DECODE_TABLE);
108 setEncodeTableRaw(STANDARD_ENCODE_TABLE);
109 setEncodedBlockSize(BYTES_PER_ENCODED_BLOCK);
110 setUnencodedBlockSize(BYTES_PER_UNENCODED_BLOCK);
111 }
112
113 @Override
114 public Base64 get() {
115 return new Base64(this);
116 }
117
118 /**
119 * Sets the format of the decoding table. This method allows to explicitly state whether a standard or URL-safe Base64 decoding is expected. This method
120 * does not modify behavior on encoding operations. For configuration of the encoding behavior, please use {@link #setUrlSafe(boolean)} method.
121 * <p>
122 * By default, the implementation uses the {@link DecodeTableFormat#MIXED} approach, allowing a seamless handling of both
123 * {@link DecodeTableFormat#URL_SAFE} and {@link DecodeTableFormat#STANDARD} base64.
124 * </p>
125 *
126 * @param format table format to be used on Base64 decoding. Use {@link DecodeTableFormat#MIXED} or null to reset to the default behavior.
127 * @return {@code this} instance.
128 * @since 1.21
129 */
130 public Builder setDecodeTableFormat(final DecodeTableFormat format) {
131 if (format == null) {
132 return setDecodeTableRaw(DECODE_TABLE);
133 }
134 switch (format) {
135 case STANDARD:
136 return setDecodeTableRaw(STANDARD_DECODE_TABLE);
137 case URL_SAFE:
138 return setDecodeTableRaw(URL_SAFE_DECODE_TABLE);
139 case MIXED:
140 default:
141 return setDecodeTableRaw(DECODE_TABLE);
142 }
143 }
144
145 @Override
146 public Builder setEncodeTable(final byte... encodeTable) {
147 final boolean isStandardEncodeTable = Arrays.equals(encodeTable, STANDARD_ENCODE_TABLE);
148 final boolean isUrlSafe = Arrays.equals(encodeTable, URL_SAFE_ENCODE_TABLE);
149 setDecodeTableRaw(isStandardEncodeTable || isUrlSafe ? DECODE_TABLE : calculateDecodeTable(encodeTable));
150 return super.setEncodeTable(encodeTable);
151 }
152
153 /**
154 * Sets the URL-safe encoding policy.
155 * <p>
156 * This method does not modify behavior on decoding operations. For configuration of the decoding behavior, please use
157 * {@code Builder.setDecodeTableFormat(DecodeTableFormat)} method.
158 * </p>
159 *
160 * @param urlSafe URL-safe encoding policy, null resets to the default.
161 * @return {@code this} instance.
162 */
163 public Builder setUrlSafe(final boolean urlSafe) {
164 // Javadoc 8 can't find {@link #setDecodeTableFormat(DecodeTableFormat)}
165 return setEncodeTable(toUrlSafeEncodeTable(urlSafe));
166 }
167
168 }
169
170 /**
171 * Defines the Base64 table format to be used on decoding.
172 * <p>
173 * By default, the method uses {@link DecodeTableFormat#MIXED} approach, allowing a seamless handling of both {@link DecodeTableFormat#URL_SAFE} and
174 * {@link DecodeTableFormat#STANDARD} base64 options.
175 * </p>
176 *
177 * @since 1.21
178 */
179 public enum DecodeTableFormat {
180
181 /**
182 * Corresponds to the standard Base64 coding table, as specified in
183 * <a href="https://www.ietf.org/rfc/rfc2045#:~:text=Table%201%3A%20The%20Base64%20Alphabet">RFC 2045 Table 1: The Base64 Alphabet</a>.
184 */
185 STANDARD,
186
187 /**
188 * Corresponds to the URL-safe Base64 coding table, as specified in
189 * <a href="https://datatracker.ietf.org/doc/html/rfc4648#:~:text=Table%202%3A%20The%20%22URL%20and%20Filename%20safe%22%20Base%2064%20Alphabet">RFC
190 * 4648 Table 2: The "URL and Filename safe" Base 64 Alphabet</a>.
191 */
192 URL_SAFE,
193
194 /**
195 * Represents a joint approach, allowing a seamless decoding of both character sets, corresponding to either
196 * <a href="https://www.ietf.org/rfc/rfc2045#:~:text=Table%201%3A%20The%20Base64%20Alphabet">RFC 2045 Table 1: The Base64 Alphabet</a> or
197 * <a href="https://datatracker.ietf.org/doc/html/rfc4648#:~:text=Table%202%3A%20The%20%22URL%20and%20Filename%20safe%22%20Base%2064%20Alphabet">RFC
198 * 4648 Table 2: The "URL and Filename safe" Base 64 Alphabet</a>. This decoding table is used by default.
199 */
200 MIXED
201 }
202
203 /**
204 * BASE64 characters are 6 bits in length.
205 * They are formed by taking a block of 3 octets to form a 24-bit string,
206 * which is converted into 4 BASE64 characters.
207 */
208 private static final int BITS_PER_ENCODED_BYTE = 6;
209 private static final int BYTES_PER_UNENCODED_BLOCK = 3;
210 private static final int BYTES_PER_ENCODED_BLOCK = 4;
211 private static final int DECODING_TABLE_LENGTH = 256;
212
213 /**
214 * This array is a lookup table that translates 6-bit positive integer index values into their "Base64 Alphabet" equivalents as specified in
215 * <a href="https://www.ietf.org/rfc/rfc2045#:~:text=Table%201%3A%20The%20Base64%20Alphabet">RFC 2045 Table 1: The Base64 Alphabet</a>.
216 * <p>
217 * Thanks to "commons" project in ws.apache.org for this code. https://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
218 * </p>
219 */
220 // @formatter:off
221 private static final byte[] STANDARD_ENCODE_TABLE = {
222 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
223 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
224 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
225 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
226 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'
227 };
228
229 /**
230 * This is a copy of the STANDARD_ENCODE_TABLE above, but with + and / changed to - and _ to make the encoded Base64 results more URL-SAFE. This table is
231 * only used when the Base64's mode is set to URL-SAFE.
232 */
233 // @formatter:off
234 private static final byte[] URL_SAFE_ENCODE_TABLE = {
235 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
236 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
237 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
238 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
239 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_'
240 };
241 // @formatter:on
242
243 /**
244 * This array is a lookup table that translates Unicode characters drawn from the "Base64 Alphabet" (as specified in
245 * <a href="https://www.ietf.org/rfc/rfc2045#:~:text=Table%201%3A%20The%20Base64%20Alphabet">RFC 2045 Table 1: The Base64 Alphabet</a>) into their 6-bit
246 * positive integer equivalents. Characters that are not in the Base64 or Base64 URL-safe alphabets but fall within the bounds of the array are translated
247 * to -1.
248 * <p>
249 * The characters '+' and '-' both decode to 62. '/' and '_' both decode to 63. This means decoder seamlessly handles both URL_SAFE and STANDARD base64.
250 * (The encoder, on the other hand, needs to know ahead of time what to emit).
251 * </p>
252 * <p>
253 * Thanks to "commons" project in ws.apache.org for this code. https://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
254 * </p>
255 */
256 private static final byte[] DECODE_TABLE = {
257 // 0 1 2 3 4 5 6 7 8 9 A B C D E F
258 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
259 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
260 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, 62, -1, 63, // 20-2f + - /
261 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, // 30-3f 0-9
262 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 40-4f A-O
263 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, 63, // 50-5f P-Z _
264 -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, // 60-6f a-o
265 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51 // 70-7a p-z
266 };
267
268 /**
269 * This array is a lookup table that translates Unicode characters drawn from the "Base64 Alphabet" (as specified in
270 * <a href="https://www.ietf.org/rfc/rfc2045#:~:text=Table%201%3A%20The%20Base64%20Alphabet">RFC 2045 Table 1: The Base64 Alphabet</a>) into their 6-bit
271 * positive integer equivalents. Characters that are not in the Base64 alphabet but fall within the bounds of the array are translated to -1. This decoding
272 * table handles only the standard base64 characters, such as '+' and '/'. The "url-safe" characters such as '-' and '_' are not supported by the table.
273 */
274 private static final byte[] STANDARD_DECODE_TABLE = {
275 // 0 1 2 3 4 5 6 7 8 9 A B C D E F
276 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
277 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
278 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, // 20-2f + /
279 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, // 30-3f 0-9
280 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 40-4f A-O
281 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, // 50-5f P-Z
282 -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, // 60-6f a-o
283 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51 // 70-7a p-z
284 };
285
286 /**
287 * This array is a lookup table that translates Unicode characters drawn from the "Base64 URL-safe Alphabet" (as specified in
288 * <a href="https://datatracker.ietf.org/doc/html/rfc4648#:~:text=Table%202%3A%20The%20%22URL%20and%20Filename%20safe%22%20Base%2064%20Alphabet">RFC 4648
289 * Table 2: The "URL and Filename safe" Base 64 Alphabet</a>) into their 6-bit positive integer equivalents. Characters that are not in the Base64 URL-safe
290 * alphabet but fall within the bounds of the array are translated to -1. This decoding table handles only the URL-safe base64 characters, such as '-' and
291 * '_'. The standard characters such as '+' and '/' are not supported by the table.
292 */
293 private static final byte[] URL_SAFE_DECODE_TABLE = {
294 // 0 1 2 3 4 5 6 7 8 9 A B C D E F
295 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
296 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
297 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, // 20-2f -
298 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, // 30-3f 0-9
299 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 40-4f A-O
300 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, 63, // 50-5f P-Z _
301 -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, // 60-6f a-o
302 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51 // 70-7a p-z
303 };
304
305 /**
306 * Base64 uses 6-bit fields.
307 */
308
309 /** Mask used to extract 6 bits, used when encoding */
310 private static final int MASK_6_BITS = 0x3f;
311
312 // The static final fields above are used for the original static byte[] methods on Base64.
313 // The private member fields below are used with the new streaming approach, which requires
314 // some state be preserved between calls of encode() and decode().
315
316 /** Mask used to extract 4 bits, used when decoding final trailing character. */
317 private static final int MASK_4_BITS = 0xf;
318
319 /** Mask used to extract 2 bits, used when decoding final trailing character. */
320 private static final int MASK_2_BITS = 0x3;
321
322 /**
323 * Creates a new Builder.
324 *
325 * <p>
326 * To configure a new instance, use a {@link Builder}. For example:
327 * </p>
328 *
329 * <pre>
330 * Base64 base64 = Base64.builder()
331 * .setDecodingPolicy(CodecPolicy.LENIENT) // default is lenient, null resets to default
332 * .setEncodeTable(customEncodeTable) // default is built in, null resets to default
333 * .setLineLength(0) // default is none
334 * .setLineSeparator('\r', '\n') // default is CR LF, null resets to default
335 * .setPadding('=') // default is '='
336 * .setUrlSafe(false) // default is false
337 * .get()
338 * </pre>
339 *
340 * @return a new Builder.
341 * @since 1.17.0
342 */
343 public static Builder builder() {
344 return new Builder();
345 }
346
347 /**
348 * Calculates a decode table for a given encode table.
349 *
350 * @param encodeTable that is used to determine decode lookup table.
351 * @return A new decode table.
352 */
353 private static byte[] calculateDecodeTable(final byte[] encodeTable) {
354 final byte[] decodeTable = new byte[DECODING_TABLE_LENGTH];
355 Arrays.fill(decodeTable, (byte) -1);
356 for (int i = 0; i < encodeTable.length; i++) {
357 decodeTable[encodeTable[i]] = (byte) i;
358 }
359 return decodeTable;
360 }
361
362 /**
363 * Decodes Base64 data into octets.
364 * <p>
365 * This method seamlessly handles data encoded in URL-safe or normal mode. For enforcing verification against strict standard Base64 or Base64 URL-safe
366 * tables, please use {@link #decodeBase64Standard(byte[])} or {@link #decodeBase64UrlSafe(byte[])} methods respectively. This method skips unknown or
367 * unsupported bytes.
368 * </p>
369 *
370 * @param base64Data Byte array containing Base64 data.
371 * @return New array containing decoded data.
372 */
373 public static byte[] decodeBase64(final byte[] base64Data) {
374 return new Base64().decode(base64Data);
375 }
376
377 /**
378 * Decodes a Base64 String into octets.
379 * <p>
380 * This method seamlessly handles data encoded in URL-safe or normal mode. For enforcing verification against strict standard Base64 or Base64 URL-safe
381 * tables, please use {@link #decodeBase64Standard(String)} or {@link #decodeBase64UrlSafe(String)} methods respectively. This method skips unknown or
382 * unsupported bytes.
383 * </p>
384 *
385 * @param base64String String containing Base64 data.
386 * @return New array containing decoded data.
387 * @since 1.4
388 */
389 public static byte[] decodeBase64(final String base64String) {
390 return new Base64().decode(base64String);
391 }
392
393 /**
394 * Decodes standard Base64 data into octets.
395 * <p>
396 * This implementation is aligned with the <a href="https://www.ietf.org/rfc/rfc2045#:~:text=Table%201%3A%20The%20Base64%20Alphabet">RFC 2045 Table 1: The
397 * Base64 Alphabet</a>. This method skips unknown or unsupported bytes.
398 * </p>
399 *
400 * @param base64Data Byte array containing Base64 data.
401 * @return New array containing decoded data.
402 * @since 1.21
403 */
404 public static byte[] decodeBase64Standard(final byte[] base64Data) {
405 return builder().setDecodeTableFormat(DecodeTableFormat.STANDARD).get().decode(base64Data);
406 }
407
408 /**
409 * Decodes a standard Base64 String into octets.
410 * <p>
411 * This implementation is aligned with the <a href="https://www.ietf.org/rfc/rfc2045#:~:text=Table%201%3A%20The%20Base64%20Alphabet">RFC 2045 Table 1: The
412 * Base64 Alphabet</a>. This method skips unknown or unsupported characters.
413 * </p>
414 *
415 * @param base64String String containing Base64 data.
416 * @return New array containing decoded data.
417 * @since 1.21
418 */
419 public static byte[] decodeBase64Standard(final String base64String) {
420 return builder().setDecodeTableFormat(DecodeTableFormat.STANDARD).get().decode(base64String);
421 }
422
423 /**
424 * Decodes URL-safe Base64 data into octets.
425 * <p>
426 * This implementation is aligned with
427 * <a href="https://datatracker.ietf.org/doc/html/rfc4648#:~:text=Table%202%3A%20The%20%22URL%20and%20Filename%20safe%22%20Base%2064%20Alphabet">RFC 4648
428 * Table 2: The "URL and Filename safe" Base 64 Alphabet</a>. This method skips unknown or unsupported characters.
429 * </p>
430 *
431 * @param base64Data Byte array containing Base64 data.
432 * @return New array containing decoded data.
433 * @since 1.21
434 */
435 public static byte[] decodeBase64UrlSafe(final byte[] base64Data) {
436 return builder().setDecodeTableFormat(DecodeTableFormat.URL_SAFE).get().decode(base64Data);
437 }
438
439 /**
440 * Decodes a URL-safe Base64 String into octets.
441 * <p>
442 * This implementation is aligned with
443 * <a href="https://datatracker.ietf.org/doc/html/rfc4648#:~:text=Table%202%3A%20The%20%22URL%20and%20Filename%20safe%22%20Base%2064%20Alphabet">RFC 4648
444 * Table 2: The "URL and Filename safe" Base 64 Alphabet</a>. This method skips unknown or unsupported characters.
445 * </p>
446 *
447 * @param base64String String containing Base64 data.
448 * @return New array containing decoded data.
449 * @since 1.21
450 */
451 public static byte[] decodeBase64UrlSafe(final String base64String) {
452 return builder().setDecodeTableFormat(DecodeTableFormat.URL_SAFE).get().decode(base64String);
453 }
454
455 /**
456 * Decodes a byte64-encoded integer according to crypto standards such as W3C's XML-Signature.
457 *
458 * @param array a byte array containing base64 character data.
459 * @return A BigInteger.
460 * @since 1.4
461 */
462 public static BigInteger decodeInteger(final byte[] array) {
463 return new BigInteger(1, decodeBase64(array));
464 }
465
466 /**
467 * Encodes binary data using the base64 algorithm but does not chunk the output.
468 *
469 * @param binaryData binary data to encode.
470 * @return byte[] containing Base64 characters in their UTF-8 representation.
471 */
472 public static byte[] encodeBase64(final byte[] binaryData) {
473 return encodeBase64(binaryData, false);
474 }
475
476 /**
477 * Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks.
478 *
479 * @param binaryData Array containing binary data to encode.
480 * @param isChunked if {@code true} this encoder will chunk the base64 output into 76 character blocks.
481 * @return Base64-encoded data.
482 * @throws IllegalArgumentException Thrown when the input array needs an output array bigger than {@link Integer#MAX_VALUE}.
483 */
484 public static byte[] encodeBase64(final byte[] binaryData, final boolean isChunked) {
485 return encodeBase64(binaryData, isChunked, false);
486 }
487
488 /**
489 * Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks.
490 *
491 * @param binaryData Array containing binary data to encode.
492 * @param isChunked if {@code true} this encoder will chunk the base64 output into 76 character blocks.
493 * @param urlSafe if {@code true} this encoder will emit - and _ instead of the usual + and / characters. <strong>No padding is added when encoding using
494 * the URL-safe alphabet.</strong>
495 * @return Base64-encoded data.
496 * @throws IllegalArgumentException Thrown when the input array needs an output array bigger than {@link Integer#MAX_VALUE}.
497 * @since 1.4
498 */
499 public static byte[] encodeBase64(final byte[] binaryData, final boolean isChunked, final boolean urlSafe) {
500 return encodeBase64(binaryData, isChunked, urlSafe, Integer.MAX_VALUE);
501 }
502
503 /**
504 * Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks.
505 *
506 * @param binaryData Array containing binary data to encode.
507 * @param isChunked if {@code true} this encoder will chunk the base64 output into 76 character blocks.
508 * @param urlSafe if {@code true} this encoder will emit - and _ instead of the usual + and / characters. <strong>No padding is added when encoding
509 * using the URL-safe alphabet.</strong>
510 * @param maxResultSize The maximum result size to accept.
511 * @return Base64-encoded data.
512 * @throws IllegalArgumentException Thrown when the input array needs an output array bigger than maxResultSize.
513 * @since 1.4
514 */
515 public static byte[] encodeBase64(final byte[] binaryData, final boolean isChunked, final boolean urlSafe, final int maxResultSize) {
516 if (BinaryCodec.isEmpty(binaryData)) {
517 return binaryData;
518 }
519 // Create this so can use the super-class method
520 // Also ensures that the same roundings are performed by the ctor and the code
521 final Base64 b64 = isChunked ? new Base64(urlSafe) : new Base64(0, CHUNK_SEPARATOR, urlSafe);
522 final long len = b64.getEncodedLength(binaryData);
523 if (len > maxResultSize) {
524 throw new IllegalArgumentException(
525 "Input array too big, the output array would be bigger (" + len + ") than the specified maximum size of " + maxResultSize);
526 }
527 return b64.encode(binaryData);
528 }
529
530 /**
531 * Encodes binary data using the base64 algorithm and chunks the encoded output into 76 character blocks
532 *
533 * @param binaryData binary data to encode.
534 * @return Base64 characters chunked in 76 character blocks.
535 */
536 public static byte[] encodeBase64Chunked(final byte[] binaryData) {
537 return encodeBase64(binaryData, true);
538 }
539
540 /**
541 * Encodes binary data using the base64 algorithm but does not chunk the output.
542 * <p>
543 * <strong> We changed the behavior of this method from multi-line chunking (1.4) to single-line non-chunking (1.5).</strong>
544 * </p>
545 *
546 * @param binaryData binary data to encode.
547 * @return String containing Base64 characters.
548 * @since 1.4 (NOTE: 1.4 chunked the output, whereas 1.5 does not).
549 */
550 public static String encodeBase64String(final byte[] binaryData) {
551 return StringUtils.newStringUsAscii(encodeBase64(binaryData, false));
552 }
553
554 /**
555 * Encodes binary data using a URL-safe variation of the base64 algorithm but does not chunk the output. The url-safe variation emits - and _ instead of +
556 * and / characters. <strong>No padding is added.</strong>
557 *
558 * @param binaryData binary data to encode.
559 * @return byte[] containing Base64 characters in their UTF-8 representation.
560 * @since 1.4
561 */
562 public static byte[] encodeBase64URLSafe(final byte[] binaryData) {
563 return encodeBase64(binaryData, false, true);
564 }
565
566 /**
567 * Encodes binary data using a URL-safe variation of the base64 algorithm but does not chunk the output. The url-safe variation emits - and _ instead of +
568 * and / characters. <strong>No padding is added.</strong>
569 *
570 * @param binaryData binary data to encode.
571 * @return String containing Base64 characters.
572 * @since 1.4
573 */
574 public static String encodeBase64URLSafeString(final byte[] binaryData) {
575 return StringUtils.newStringUsAscii(encodeBase64(binaryData, false, true));
576 }
577
578 /**
579 * Encodes to a byte64-encoded integer according to crypto standards such as W3C's XML-Signature.
580 *
581 * @param bigInteger a BigInteger.
582 * @return A byte array containing base64 character data.
583 * @throws NullPointerException if null is passed in.
584 * @since 1.4
585 */
586 public static byte[] encodeInteger(final BigInteger bigInteger) {
587 Objects.requireNonNull(bigInteger, "bigInteger");
588 return encodeBase64(toIntegerBytes(bigInteger), false);
589 }
590
591 /**
592 * Tests a given byte array to see if it contains only valid characters within the Base64 alphabet. Currently the method treats whitespace as valid.
593 *
594 * @param arrayOctet byte array to test.
595 * @return {@code true} if all bytes are valid characters in the Base64 alphabet or if the byte array is empty; {@code false}, otherwise.
596 * @deprecated 1.5 Use {@link #isBase64(byte[])}, will be removed in 2.0.
597 */
598 @Deprecated
599 public static boolean isArrayByteBase64(final byte[] arrayOctet) {
600 return isBase64(arrayOctet);
601 }
602
603 /**
604 * Tests whether or not the {@code octet} is in the Base64 alphabet.
605 * <p>
606 * This method threats all characters included within standard base64 and base64url encodings as valid base64 characters. This includes the '+' and '/'
607 * (standard base64), as well as '-' and '_' (URL-safe base64) characters. For enforcing verification against strict standard Base64 or Base64 URL-safe
608 * tables, please use {@link #isBase64Standard(byte)} or {@link #isBase64Url(byte)} methods respectively.
609 * </p>
610 *
611 * @param octet The value to test.
612 * @return {@code true} if the value is defined in the Base64 alphabet, {@code false} otherwise.
613 * @since 1.4
614 */
615 public static boolean isBase64(final byte octet) {
616 return octet == PAD_DEFAULT || octet >= 0 && octet < DECODE_TABLE.length && DECODE_TABLE[octet] != -1;
617 }
618
619 /**
620 * Tests a given byte array to see if it contains only valid characters within the Base64 alphabet. Currently the method treats whitespace as valid.
621 * <p>
622 * This method treats all characters included within standard base64 and base64url encodings as valid base64 characters. This includes the '+' and '/'
623 * (standard base64), as well as '-' and '_' (URL-safe base64) characters. For enforcing verification against strict standard Base64 or Base64 URL-safe
624 * tables, please use {@link #isBase64Standard(byte[])} or {@link #isBase64Url(byte[])} methods respectively.
625 * </p>
626 *
627 * @param arrayOctet byte array to test.
628 * @return {@code true} if all bytes are valid characters in the Base64 alphabet or if the byte array is empty; {@code false}, otherwise.
629 * @since 1.5
630 */
631 public static boolean isBase64(final byte[] arrayOctet) {
632 for (final byte element : arrayOctet) {
633 if (!isBase64(element) && !Character.isWhitespace(element)) {
634 return false;
635 }
636 }
637 return true;
638 }
639
640 /**
641 * Tests a given String to see if it contains only valid characters within the Base64 alphabet. Currently the method treats whitespace as valid.
642 * <p>
643 * This method threats all characters included within standard base64 and base64url encodings as valid base64 characters. This includes the '+' and '/'
644 * (standard base64), as well as '-' and '_' (URL-safe base64) characters. For enforcing verification against strict standard Base64 or Base64 URL-safe
645 * tables, please use {@link #isBase64Standard(String)} or {@link #isBase64Url(String)} methods respectively.
646 * </p>
647 *
648 * @param base64 String to test.
649 * @return {@code true} if all characters in the String are valid characters in the Base64 alphabet or if the String is empty; {@code false}, otherwise.
650 * @since 1.5
651 */
652 public static boolean isBase64(final String base64) {
653 return isBase64(StringUtils.getBytesUtf8(base64));
654 }
655
656 /**
657 * Tests whether or not the {@code octet} is in the standard Base64 alphabet.
658 * <p>
659 * This implementation is aligned with <a href="https://www.ietf.org/rfc/rfc2045#:~:text=Table%201%3A%20The%20Base64%20Alphabet">RFC 2045 Table 1: The
660 * Base64 Alphabet</a>.
661 * </p>
662 *
663 * @param octet The value to test.
664 * @return {@code true} if the value is defined in the standard Base64 alphabet, {@code false} otherwise.
665 * @since 1.21
666 */
667 public static boolean isBase64Standard(final byte octet) {
668 return octet == PAD_DEFAULT || octet >= 0 && octet < STANDARD_DECODE_TABLE.length && STANDARD_DECODE_TABLE[octet] != -1;
669 }
670
671 /**
672 * Tests a given byte array to see if it contains only valid characters within the standard Base64 alphabet. The method treats whitespace as valid.
673 * <p>
674 * This implementation is aligned with <a href="https://www.ietf.org/rfc/rfc2045#:~:text=Table%201%3A%20The%20Base64%20Alphabet">RFC 2045 Table 1: The
675 * Base64 Alphabet</a>.
676 * </p>
677 *
678 * @param arrayOctet byte array to test.
679 * @return {@code true} if all bytes are valid characters in the standard Base64 alphabet. {@code false}, otherwise.
680 * @since 1.21
681 */
682 public static boolean isBase64Standard(final byte[] arrayOctet) {
683 for (final byte element : arrayOctet) {
684 if (!isBase64Standard(element) && !Character.isWhitespace(element)) {
685 return false;
686 }
687 }
688 return true;
689 }
690
691 /**
692 * Tests a given String to see if it contains only valid characters within the standard Base64 alphabet. The method treats whitespace as valid.
693 * <p>
694 * This implementation is aligned with <a href="https://www.ietf.org/rfc/rfc2045#:~:text=Table%201%3A%20The%20Base64%20Alphabet">RFC 2045 Table 1: The
695 * Base64 Alphabet</a>.
696 * </p>
697 *
698 * @param base64 String to test.
699 * @return {@code true} if all characters in the String are valid characters in the standard Base64 alphabet or if the String is empty; {@code false},
700 * otherwise.
701 * @since 1.21
702 */
703 public static boolean isBase64Standard(final String base64) {
704 return isBase64Standard(StringUtils.getBytesUtf8(base64));
705 }
706
707 /**
708 * Tests whether or not the {@code octet} is in the URL-safe Base64 alphabet.
709 * <p>
710 * This implementation is aligned with
711 * <a href="https://datatracker.ietf.org/doc/html/rfc4648#:~:text=Table%202%3A%20The%20%22URL%20and%20Filename%20safe%22%20Base%2064%20Alphabet">RFC 4648
712 * Table 2: The "URL and Filename safe" Base 64 Alphabet</a>.
713 * </p>
714 *
715 * @param octet The value to test.
716 * @return {@code true} if the value is defined in the URL-safe Base64 alphabet, {@code false} otherwise.
717 * @since 1.21
718 */
719 public static boolean isBase64Url(final byte octet) {
720 return octet == PAD_DEFAULT || octet >= 0 && octet < URL_SAFE_DECODE_TABLE.length && URL_SAFE_DECODE_TABLE[octet] != -1;
721 }
722
723 /**
724 * Tests a given byte array to see if it contains only valid characters within the URL-safe Base64 alphabet. The method treats whitespace as valid.
725 * <p>
726 * This implementation is aligned with
727 * <a href="https://datatracker.ietf.org/doc/html/rfc4648#:~:text=Table%202%3A%20The%20%22URL%20and%20Filename%20safe%22%20Base%2064%20Alphabet">RFC 4648
728 * Table 2: The "URL and Filename safe" Base 64 Alphabet</a>.
729 * </p>
730 *
731 * @param arrayOctet byte array to test.
732 * @return {@code true} if all bytes are valid characters in the URL-safe Base64 alphabet, {@code false}, otherwise.
733 * @since 1.21
734 */
735 public static boolean isBase64Url(final byte[] arrayOctet) {
736 for (final byte element : arrayOctet) {
737 if (!isBase64Url(element) && !Character.isWhitespace(element)) {
738 return false;
739 }
740 }
741 return true;
742 }
743
744 /**
745 * Tests a given String to see if it contains only valid characters within the URL-safe Base64 alphabet. The method treats whitespace as valid.
746 * <p>
747 * This implementation is aligned with
748 * <a href="https://datatracker.ietf.org/doc/html/rfc4648#:~:text=Table%202%3A%20The%20%22URL%20and%20Filename%20safe%22%20Base%2064%20Alphabet">RFC 4648
749 * Table 2: The "URL and Filename safe" Base 64 Alphabet</a>.
750 * </p>
751 *
752 * @param base64 String to test.
753 * @return {@code true} if all characters in the String are valid characters in the URL-safe Base64 alphabet or if the String is empty; {@code false},
754 * otherwise.
755 * @since 1.21
756 */
757 public static boolean isBase64Url(final String base64) {
758 return isBase64Url(StringUtils.getBytesUtf8(base64));
759 }
760
761 /**
762 * Returns a byte-array representation of a {@code BigInteger} without sign bit.
763 *
764 * @param bigInt {@code BigInteger} to be converted.
765 * @return a byte array representation of the BigInteger parameter.
766 */
767 static byte[] toIntegerBytes(final BigInteger bigInt) {
768 int bitlen = bigInt.bitLength();
769 // round bitlen
770 bitlen = bitlen + 7 >> 3 << 3;
771 final byte[] bigBytes = bigInt.toByteArray();
772 if (bigInt.bitLength() % 8 != 0 && bigInt.bitLength() / 8 + 1 == bitlen / 8) {
773 return bigBytes;
774 }
775 // set up params for copying everything but sign bit
776 int startSrc = 0;
777 int len = bigBytes.length;
778 // if bigInt is exactly byte-aligned, just skip signbit in copy
779 if (bigInt.bitLength() % 8 == 0) {
780 startSrc = 1;
781 len--;
782 }
783 final int startDst = bitlen / 8 - len; // to pad w/ nulls as per spec
784 final byte[] resizedBytes = new byte[bitlen / 8];
785 System.arraycopy(bigBytes, startSrc, resizedBytes, startDst, len);
786 return resizedBytes;
787 }
788
789 static byte[] toUrlSafeEncodeTable(final boolean urlSafe) {
790 return urlSafe ? URL_SAFE_ENCODE_TABLE : STANDARD_ENCODE_TABLE;
791 }
792
793 /**
794 * Line separator for encoding. Not used when decoding. Only used if lineLength > 0.
795 */
796 private final byte[] lineSeparator;
797
798 /**
799 * Convenience variable to help us determine when our buffer is going to run out of room and needs resizing. {@code encodeSize = 4 + lineSeparator.length;}
800 */
801 private final int encodeSize;
802 private final boolean isUrlSafe;
803 private final boolean isStandardEncodeTable;
804
805 /**
806 * Constructs a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode.
807 * <p>
808 * When encoding the line length is 0 (no chunking), and the encoding table is STANDARD_ENCODE_TABLE.
809 * </p>
810 * <p>
811 * When decoding all variants are supported.
812 * </p>
813 */
814 public Base64() {
815 this(0);
816 }
817
818 /**
819 * Constructs a Base64 codec used for decoding (all modes) and encoding in the given URL-safe mode.
820 * <p>
821 * When encoding the line length is 76, the line separator is CRLF, and the encoding table is STANDARD_ENCODE_TABLE.
822 * </p>
823 * <p>
824 * When decoding all variants are supported.
825 * </p>
826 *
827 * @param urlSafe if {@code true}, URL-safe encoding is used. In most cases this should be set to {@code false}.
828 * @since 1.4
829 * @deprecated Use {@link #builder()} and {@link Builder}.
830 */
831 @Deprecated
832 public Base64(final boolean urlSafe) {
833 this(MIME_CHUNK_SIZE, CHUNK_SEPARATOR, urlSafe);
834 }
835
836 private Base64(final Builder builder) {
837 super(builder);
838 final byte[] encTable = builder.getEncodeTable();
839 if (encTable.length != STANDARD_ENCODE_TABLE.length) {
840 throw new IllegalArgumentException("encodeTable must have exactly 64 entries.");
841 }
842 this.isStandardEncodeTable = Arrays.equals(encTable, STANDARD_ENCODE_TABLE);
843 this.isUrlSafe = Arrays.equals(encTable, URL_SAFE_ENCODE_TABLE);
844 // TODO could be simplified if there is no requirement to reject invalid line sep when length <=0
845 // @see test case Base64Test.testConstructors()
846 if (builder.getLineSeparator().length > 0) {
847 final byte[] lineSeparatorB = builder.getLineSeparator();
848 if (containsAlphabetOrPad(lineSeparatorB)) {
849 final String sep = StringUtils.newStringUtf8(lineSeparatorB);
850 throw new IllegalArgumentException("lineSeparator must not contain base64 characters: [" + sep + "]");
851 }
852 if (builder.getLineLength() > 0) { // null line-sep forces no chunking rather than throwing IAE
853 this.encodeSize = BYTES_PER_ENCODED_BLOCK + lineSeparatorB.length;
854 this.lineSeparator = lineSeparatorB;
855 } else {
856 this.encodeSize = BYTES_PER_ENCODED_BLOCK;
857 this.lineSeparator = null;
858 }
859 } else {
860 this.encodeSize = BYTES_PER_ENCODED_BLOCK;
861 this.lineSeparator = null;
862 }
863 }
864
865 /**
866 * Constructs a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode.
867 * <p>
868 * When encoding the line length is given in the constructor, the line separator is CRLF, and the encoding table is STANDARD_ENCODE_TABLE.
869 * </p>
870 * <p>
871 * Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data.
872 * </p>
873 * <p>
874 * When decoding all variants are supported.
875 * </p>
876 *
877 * @param lineLength Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 4). If lineLength <= 0, then
878 * the output will not be divided into lines (chunks). Ignored when decoding.
879 * @since 1.4
880 * @deprecated Use {@link #builder()} and {@link Builder}.
881 */
882 @Deprecated
883 public Base64(final int lineLength) {
884 this(lineLength, CHUNK_SEPARATOR);
885 }
886
887 /**
888 * Constructs a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode.
889 * <p>
890 * When encoding the line length and line separator are given in the constructor, and the encoding table is STANDARD_ENCODE_TABLE.
891 * </p>
892 * <p>
893 * Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data.
894 * </p>
895 * <p>
896 * When decoding all variants are supported.
897 * </p>
898 *
899 * @param lineLength Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 4). If lineLength <= 0,
900 * then the output will not be divided into lines (chunks). Ignored when decoding.
901 * @param lineSeparator Each line of encoded data will end with this sequence of bytes.
902 * @throws IllegalArgumentException Thrown when the provided lineSeparator included some base64 characters.
903 * @since 1.4
904 * @deprecated Use {@link #builder()} and {@link Builder}.
905 */
906 @Deprecated
907 public Base64(final int lineLength, final byte[] lineSeparator) {
908 this(lineLength, lineSeparator, false);
909 }
910
911 /**
912 * Constructs a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode.
913 * <p>
914 * When encoding the line length and line separator are given in the constructor, and the encoding table is STANDARD_ENCODE_TABLE.
915 * </p>
916 * <p>
917 * Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data.
918 * </p>
919 * <p>
920 * When decoding all variants are supported.
921 * </p>
922 *
923 * @param lineLength Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 4). If lineLength <= 0,
924 * then the output will not be divided into lines (chunks). Ignored when decoding.
925 * @param lineSeparator Each line of encoded data will end with this sequence of bytes.
926 * @param urlSafe Instead of emitting '+' and '/' we emit '-' and '_' respectively. urlSafe is only applied to encode operations. Decoding seamlessly
927 * handles both modes. <strong>No padding is added when using the URL-safe alphabet.</strong>
928 * @throws IllegalArgumentException Thrown when the {@code lineSeparator} contains Base64 characters.
929 * @since 1.4
930 * @deprecated Use {@link #builder()} and {@link Builder}.
931 */
932 @Deprecated
933 public Base64(final int lineLength, final byte[] lineSeparator, final boolean urlSafe) {
934 this(builder().setLineLength(lineLength).setLineSeparator(lineSeparator != null ? lineSeparator : EMPTY_BYTE_ARRAY).setPadding(PAD_DEFAULT)
935 .setEncodeTableRaw(toUrlSafeEncodeTable(urlSafe)).setDecodingPolicy(DECODING_POLICY_DEFAULT));
936 }
937
938 /**
939 * Constructs a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode.
940 * <p>
941 * When encoding the line length and line separator are given in the constructor, and the encoding table is STANDARD_ENCODE_TABLE.
942 * </p>
943 * <p>
944 * Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data.
945 * </p>
946 * <p>
947 * When decoding all variants are supported.
948 * </p>
949 *
950 * @param lineLength Each line of encoded data will be at most of the given length (rounded down to the nearest multiple of 4). If lineLength <= 0,
951 * then the output will not be divided into lines (chunks). Ignored when decoding.
952 * @param lineSeparator Each line of encoded data will end with this sequence of bytes.
953 * @param urlSafe Instead of emitting '+' and '/' we emit '-' and '_' respectively. urlSafe is only applied to encode operations. Decoding seamlessly
954 * handles both modes. <strong>No padding is added when using the URL-safe alphabet.</strong>
955 * @param decodingPolicy The decoding policy.
956 * @throws IllegalArgumentException Thrown when the {@code lineSeparator} contains Base64 characters.
957 * @since 1.15
958 * @deprecated Use {@link #builder()} and {@link Builder}.
959 */
960 @Deprecated
961 public Base64(final int lineLength, final byte[] lineSeparator, final boolean urlSafe, final CodecPolicy decodingPolicy) {
962 this(builder().setLineLength(lineLength).setLineSeparator(lineSeparator).setPadding(PAD_DEFAULT).setEncodeTableRaw(toUrlSafeEncodeTable(urlSafe))
963 .setDecodingPolicy(decodingPolicy));
964 }
965
966 /**
967 * <p>
968 * Decodes all of the provided data, starting at inPos, for inAvail bytes. Should be called at least twice: once with the data to decode, and once with
969 * inAvail set to "-1" to alert decoder that EOF has been reached. The "-1" call is not necessary when decoding, but it doesn't hurt, either.
970 * </p>
971 * <p>
972 * Ignores all non-base64 characters. This is how chunked (for example 76 character) data is handled, since CR and LF are silently ignored, but has
973 * implications for other bytes, too. This method subscribes to the garbage-in, garbage-out philosophy: it will not check the provided data for validity.
974 * </p>
975 * <p>
976 * Thanks to "commons" project in ws.apache.org for the bitwise operations, and general approach.
977 * https://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
978 * </p>
979 *
980 * @param input byte[] array of ASCII data to base64 decode.
981 * @param inPos Position to start reading data from.
982 * @param inAvail Amount of bytes available from input for decoding.
983 * @param context the context to be used.
984 */
985 @Override
986 void decode(final byte[] input, int inPos, final int inAvail, final Context context) {
987 if (context.eof) {
988 return;
989 }
990 if (inAvail < 0) {
991 context.eof = true;
992 }
993 final int decodeSize = this.encodeSize - 1;
994 for (int i = 0; i < inAvail; i++) {
995 final byte[] buffer = ensureBufferSize(decodeSize, context);
996 final byte b = input[inPos++];
997 if (b == pad) {
998 // We're done.
999 context.eof = true;
1000 break;
1001 }
1002 if (b >= 0 && b < decodeTable.length) {
1003 final int result = decodeTable[b];
1004 if (result >= 0) {
1005 context.modulus = (context.modulus + 1) % BYTES_PER_ENCODED_BLOCK;
1006 context.ibitWorkArea = (context.ibitWorkArea << BITS_PER_ENCODED_BYTE) + result;
1007 if (context.modulus == 0) {
1008 buffer[context.pos++] = (byte) (context.ibitWorkArea >> 16 & MASK_8BITS);
1009 buffer[context.pos++] = (byte) (context.ibitWorkArea >> 8 & MASK_8BITS);
1010 buffer[context.pos++] = (byte) (context.ibitWorkArea & MASK_8BITS);
1011 }
1012 }
1013 }
1014 }
1015
1016 // Two forms of EOF as far as base64 decoder is concerned: actual
1017 // EOF (-1) and first time '=' character is encountered in stream.
1018 // This approach makes the '=' padding characters completely optional.
1019 if (context.eof && context.modulus != 0) {
1020 final byte[] buffer = ensureBufferSize(decodeSize, context);
1021
1022 // We have some spare bits remaining
1023 // Output all whole multiples of 8 bits and ignore the rest
1024 switch (context.modulus) {
1025 // case 0 : // impossible, as excluded above
1026 case 1 : // 6 bits - either ignore entirely, or raise an exception
1027 validateTrailingCharacter();
1028 break;
1029 case 2 : // 12 bits = 8 + 4
1030 validateCharacter(MASK_4_BITS, context);
1031 context.ibitWorkArea = context.ibitWorkArea >> 4; // dump the extra 4 bits
1032 buffer[context.pos++] = (byte) (context.ibitWorkArea & MASK_8BITS);
1033 break;
1034 case 3 : // 18 bits = 8 + 8 + 2
1035 validateCharacter(MASK_2_BITS, context);
1036 context.ibitWorkArea = context.ibitWorkArea >> 2; // dump 2 bits
1037 buffer[context.pos++] = (byte) (context.ibitWorkArea >> 8 & MASK_8BITS);
1038 buffer[context.pos++] = (byte) (context.ibitWorkArea & MASK_8BITS);
1039 break;
1040 default:
1041 throw new IllegalStateException("Impossible modulus " + context.modulus);
1042 }
1043 }
1044 }
1045
1046 /**
1047 * <p>
1048 * Encodes all of the provided data, starting at inPos, for inAvail bytes. Must be called at least twice: once with the data to encode, and once with
1049 * inAvail set to "-1" to alert encoder that EOF has been reached, to flush last remaining bytes (if not multiple of 3).
1050 * </p>
1051 * <p>
1052 * <strong>No padding is added when encoding using the URL-safe alphabet.</strong>
1053 * </p>
1054 * <p>
1055 * Thanks to "commons" project in ws.apache.org for the bitwise operations, and general approach.
1056 * https://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/
1057 * </p>
1058 *
1059 * @param in byte[] array of binary data to base64 encode.
1060 * @param inPos Position to start reading data from.
1061 * @param inAvail Amount of bytes available from input for encoding.
1062 * @param context the context to be used.
1063 */
1064 @Override
1065 void encode(final byte[] in, int inPos, final int inAvail, final Context context) {
1066 if (context.eof) {
1067 return;
1068 }
1069 // inAvail < 0 is how we're informed of EOF in the underlying data we're
1070 // encoding.
1071 if (inAvail < 0) {
1072 context.eof = true;
1073 if (0 == context.modulus && lineLength == 0) {
1074 return; // no leftovers to process and not using chunking
1075 }
1076 final byte[] buffer = ensureBufferSize(encodeSize, context);
1077 final int savedPos = context.pos;
1078 switch (context.modulus) { // 0-2
1079 case 0 : // nothing to do here
1080 break;
1081 case 1 : // 8 bits = 6 + 2
1082 // top 6 bits:
1083 buffer[context.pos++] = encodeTable[context.ibitWorkArea >> 2 & MASK_6_BITS];
1084 // remaining 2:
1085 buffer[context.pos++] = encodeTable[context.ibitWorkArea << 4 & MASK_6_BITS];
1086 // URL-SAFE skips the padding to further reduce size.
1087 if (isStandardEncodeTable) {
1088 buffer[context.pos++] = pad;
1089 buffer[context.pos++] = pad;
1090 }
1091 break;
1092
1093 case 2 : // 16 bits = 6 + 6 + 4
1094 buffer[context.pos++] = encodeTable[context.ibitWorkArea >> 10 & MASK_6_BITS];
1095 buffer[context.pos++] = encodeTable[context.ibitWorkArea >> 4 & MASK_6_BITS];
1096 buffer[context.pos++] = encodeTable[context.ibitWorkArea << 2 & MASK_6_BITS];
1097 // URL-SAFE skips the padding to further reduce size.
1098 if (isStandardEncodeTable) {
1099 buffer[context.pos++] = pad;
1100 }
1101 break;
1102 default:
1103 throw new IllegalStateException("Impossible modulus " + context.modulus);
1104 }
1105 context.currentLinePos += context.pos - savedPos; // keep track of current line position
1106 // if currentPos == 0 we are at the start of a line, so don't add CRLF
1107 if (lineLength > 0 && context.currentLinePos > 0) {
1108 System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length);
1109 context.pos += lineSeparator.length;
1110 }
1111 } else {
1112 for (int i = 0; i < inAvail; i++) {
1113 final byte[] buffer = ensureBufferSize(encodeSize, context);
1114 context.modulus = (context.modulus + 1) % BYTES_PER_UNENCODED_BLOCK;
1115 int b = in[inPos++];
1116 if (b < 0) {
1117 b += 256;
1118 }
1119 context.ibitWorkArea = (context.ibitWorkArea << 8) + b; // BITS_PER_BYTE
1120 if (0 == context.modulus) { // 3 bytes = 24 bits = 4 * 6 bits to extract
1121 buffer[context.pos++] = encodeTable[context.ibitWorkArea >> 18 & MASK_6_BITS];
1122 buffer[context.pos++] = encodeTable[context.ibitWorkArea >> 12 & MASK_6_BITS];
1123 buffer[context.pos++] = encodeTable[context.ibitWorkArea >> 6 & MASK_6_BITS];
1124 buffer[context.pos++] = encodeTable[context.ibitWorkArea & MASK_6_BITS];
1125 context.currentLinePos += BYTES_PER_ENCODED_BLOCK;
1126 if (lineLength > 0 && lineLength <= context.currentLinePos) {
1127 System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length);
1128 context.pos += lineSeparator.length;
1129 context.currentLinePos = 0;
1130 }
1131 }
1132 }
1133 }
1134 }
1135
1136 /**
1137 * Gets the line separator (for testing only).
1138 *
1139 * @return the line separator.
1140 */
1141 byte[] getLineSeparator() {
1142 return lineSeparator;
1143 }
1144
1145 /**
1146 * Returns whether or not the {@code octet} is in the Base64 alphabet.
1147 *
1148 * @param octet The value to test.
1149 * @return {@code true} if the value is defined in the Base64 alphabet {@code false} otherwise.
1150 */
1151 @Override
1152 protected boolean isInAlphabet(final byte octet) {
1153 return octet >= 0 && octet < decodeTable.length && decodeTable[octet] != -1;
1154 }
1155
1156 /**
1157 * Returns our current encode mode. True if we're URL-safe, false otherwise.
1158 *
1159 * @return true if we're in URL-safe mode, false otherwise.
1160 * @since 1.4
1161 */
1162 public boolean isUrlSafe() {
1163 return isUrlSafe;
1164 }
1165
1166 /**
1167 * Validates whether decoding the final trailing character is possible in the context of the set of possible Base64 values.
1168 * <p>
1169 * The character is valid if the lower bits within the provided mask are zero. This is used to test the final trailing base-64 digit is zero in the bits
1170 * that will be discarded.
1171 * </p>
1172 *
1173 * @param emptyBitsMask The mask of the lower bits that should be empty.
1174 * @param context the context to be used.
1175 * @throws IllegalArgumentException if the bits being checked contain any non-zero value.
1176 */
1177 private void validateCharacter(final int emptyBitsMask, final Context context) {
1178 if (isStrictDecoding() && (context.ibitWorkArea & emptyBitsMask) != 0) {
1179 throw new IllegalArgumentException("Strict decoding: Last encoded character (before the paddings if any) is a valid " +
1180 "Base64 alphabet but not a possible encoding. Expected the discarded bits from the character to be zero.");
1181 }
1182 }
1183
1184 /**
1185 * Validates whether decoding allows an entire final trailing character that cannot be used for a complete byte.
1186 *
1187 * @throws IllegalArgumentException if strict decoding is enabled.
1188 */
1189 private void validateTrailingCharacter() {
1190 if (isStrictDecoding()) {
1191 throw new IllegalArgumentException("Strict decoding: Last encoded character (before the paddings if any) is a valid " +
1192 "Base64 alphabet but not a possible encoding. Decoding requires at least two trailing 6-bit characters to create bytes.");
1193 }
1194 }
1195
1196 }