1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * https://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.commons.codec.binary;
19
20 import java.util.Arrays;
21
22 import org.apache.commons.codec.CodecPolicy;
23
24 /**
25 * Provides Base16 encoding and decoding as defined by <a href="https://tools.ietf.org/html/rfc4648#section-8">RFC 4648 - 8. Base 16 Encoding</a>.
26 *
27 * <p>
28 * This class is thread-safe.
29 * </p>
30 * <p>
31 * This implementation strictly follows RFC 4648, and as such unlike the {@link Base32} and {@link Base64} implementations, it does not ignore invalid alphabet
32 * characters or whitespace, neither does it offer chunking or padding characters.
33 * </p>
34 * <p>
35 * The only additional feature above those specified in RFC 4648 is support for working with a lower-case alphabet in addition to the default upper-case
36 * alphabet.
37 * </p>
38 *
39 * @see Base16InputStream
40 * @see Base16OutputStream
41 * @see <a href="https://tools.ietf.org/html/rfc4648#section-8">RFC 4648 - 8. Base 16 Encoding</a>
42 * @since 1.15
43 */
44 public class Base16 extends BaseNCodec {
45
46 /**
47 * Builds {@link Base16} instances.
48 *
49 * <p>
50 * To configure a new instance, use a {@link Builder}. For example:
51 * </p>
52 *
53 * <pre>
54 * Base16 Base16 = Base16.builder()
55 * .setDecodingPolicy(DecodingPolicy.LENIENT) // default is lenient
56 * .get()
57 * </pre>
58 *
59 * @since 1.20.0
60 */
61 public static class Builder extends AbstractBuilder<Base16, Builder> {
62
63 /**
64 * Constructs a new instance.
65 */
66 public Builder() {
67 super(null);
68 setDecodeTable(UPPER_CASE_DECODE_TABLE);
69 setEncodeTable(UPPER_CASE_ENCODE_TABLE);
70 setEncodedBlockSize(BYTES_PER_ENCODED_BLOCK);
71 setUnencodedBlockSize(BYTES_PER_UNENCODED_BLOCK);
72 setLineLength(0);
73 setLineSeparator(EMPTY_BYTE_ARRAY);
74 }
75
76 @Override
77 public Base16 get() {
78 return new Base16(this);
79 }
80
81 @Override
82 public Builder setEncodeTable(final byte... encodeTable) {
83 super.setDecodeTableRaw(Arrays.equals(encodeTable, LOWER_CASE_ENCODE_TABLE) ? LOWER_CASE_DECODE_TABLE : UPPER_CASE_DECODE_TABLE);
84 return super.setEncodeTable(encodeTable);
85 }
86
87 /**
88 * Sets whether to use the lower-case Base16 alphabet.
89 *
90 * @param lowerCase {@code true} to use the lower-case Base16 alphabet.
91 * @return {@code this} instance.
92 */
93 public Builder setLowerCase(final boolean lowerCase) {
94 setEncodeTableRaw(lowerCase ? LOWER_CASE_ENCODE_TABLE : UPPER_CASE_ENCODE_TABLE);
95 return asThis();
96 }
97
98 }
99
100 /**
101 * BASE16 characters are 4 bits in length. They are formed by taking an 8-bit group, which is converted into two BASE16 characters.
102 */
103 private static final int BITS_PER_ENCODED_BYTE = 4;
104
105 private static final int BYTES_PER_ENCODED_BLOCK = 2;
106
107 private static final int BYTES_PER_UNENCODED_BLOCK = 1;
108
109 /**
110 * This array is a lookup table that translates Unicode characters drawn from the "Base16 Alphabet" (as specified in Table 5 of RFC 4648) into their 4-bit
111 * positive integer equivalents. Characters that are not in the Base16 alphabet but fall within the bounds of the array are translated to -1.
112 */
113 // @formatter:off
114 private static final byte[] UPPER_CASE_DECODE_TABLE = {
115 // 0 1 2 3 4 5 6 7 8 9 A B C D E F
116 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
117 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
118 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f
119 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 30-3f 0-9
120 -1, 10, 11, 12, 13, 14, 15 // 40-46 A-F
121 };
122 // @formatter:on
123
124 /**
125 * This array is a lookup table that translates 4-bit positive integer index values into their "Base16 Alphabet" equivalents as specified in Table 5 of RFC
126 * 4648.
127 */
128 private static final byte[] UPPER_CASE_ENCODE_TABLE = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
129
130 /**
131 * This array is a lookup table that translates Unicode characters drawn from the a lower-case "Base16 Alphabet" into their 4-bit positive integer
132 * equivalents. Characters that are not in the Base16 alphabet but fall within the bounds of the array are translated to -1.
133 */
134 // @formatter:off
135 private static final byte[] LOWER_CASE_DECODE_TABLE = {
136 // 0 1 2 3 4 5 6 7 8 9 A B C D E F
137 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
138 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
139 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f
140 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 30-3f 0-9
141 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 40-4f
142 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 50-5f
143 -1, 10, 11, 12, 13, 14, 15 // 60-66 a-f
144 };
145 // @formatter:on
146
147 /**
148 * This array is a lookup table that translates 4-bit positive integer index values into their "Base16 Alphabet" lower-case equivalents.
149 */
150 private static final byte[] LOWER_CASE_ENCODE_TABLE = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
151
152 /** Mask used to extract 4 bits, used when decoding character. */
153 private static final int MASK_4_BITS = 0x0f;
154
155 /**
156 * Constructs a new builder.
157 *
158 * @return a new builder.
159 * @since 1.20.0
160 */
161 public static Builder builder() {
162 return new Builder();
163 }
164
165 /**
166 * Constructs a Base16 codec used for decoding and encoding.
167 */
168 public Base16() {
169 this(false);
170 }
171
172 /**
173 * Constructs a Base16 codec used for decoding and encoding.
174 *
175 * @param lowerCase {@code true} to use the lower-case Base16 alphabet.
176 * @deprecated Use {@link #builder()} and {@link Builder}.
177 */
178 @Deprecated
179 public Base16(final boolean lowerCase) {
180 this(lowerCase, DECODING_POLICY_DEFAULT);
181 }
182
183 /**
184 * Constructs a Base16 codec used for decoding and encoding.
185 *
186 * @param lowerCase {@code true} to use the lower-case Base16 alphabet.
187 * @param decodingPolicy Decoding policy.
188 * @deprecated Use {@link #builder()} and {@link Builder}.
189 */
190 @Deprecated
191 public Base16(final boolean lowerCase, final CodecPolicy decodingPolicy) {
192 this(builder().setEncodeTable(lowerCase ? LOWER_CASE_ENCODE_TABLE : UPPER_CASE_ENCODE_TABLE).setDecodingPolicy(decodingPolicy));
193 }
194
195 private Base16(final Builder builder) {
196 super(builder);
197 }
198
199 @Override
200 void decode(final byte[] data, int offset, final int length, final Context context) {
201 if (context.eof || length < 0) {
202 context.eof = true;
203 if (context.ibitWorkArea != 0) {
204 validateTrailingCharacter();
205 }
206 return;
207 }
208 final int dataLen = Math.min(data.length - offset, length);
209 final int availableChars = (context.ibitWorkArea != 0 ? 1 : 0) + dataLen;
210 // small optimization to short-cut the rest of this method when it is fed byte-by-byte
211 if (availableChars == 1 && availableChars == dataLen) {
212 // store 1/2 byte for next invocation of decode, we offset by +1 as empty-value is 0
213 context.ibitWorkArea = decodeOctet(data[offset]) + 1;
214 return;
215 }
216 // we must have an even number of chars to decode
217 final int charsToProcess = availableChars % BYTES_PER_ENCODED_BLOCK == 0 ? availableChars : availableChars - 1;
218 final int end = offset + dataLen;
219 final byte[] buffer = ensureBufferSize(charsToProcess / BYTES_PER_ENCODED_BLOCK, context);
220 int result;
221 if (dataLen < availableChars) {
222 // we have 1/2 byte from previous invocation to decode
223 result = context.ibitWorkArea - 1 << BITS_PER_ENCODED_BYTE;
224 result |= decodeOctet(data[offset++]);
225 buffer[context.pos++] = (byte) result;
226 // reset to empty-value for next invocation!
227 context.ibitWorkArea = 0;
228 }
229 final int loopEnd = end - 1;
230 while (offset < loopEnd) {
231 result = decodeOctet(data[offset++]) << BITS_PER_ENCODED_BYTE;
232 result |= decodeOctet(data[offset++]);
233 buffer[context.pos++] = (byte) result;
234 }
235 // we have one char of a hex-pair left over
236 if (offset < end) {
237 // store 1/2 byte for next invocation of decode, we offset by +1 as empty-value is 0
238 context.ibitWorkArea = decodeOctet(data[offset]) + 1;
239 }
240 }
241
242 private int decodeOctet(final byte octet) {
243 int decoded = -1;
244 if ((octet & 0xff) < decodeTable.length) {
245 decoded = decodeTable[octet];
246 }
247 if (decoded == -1) {
248 throw new IllegalArgumentException("Invalid octet in encoded value: " + (int) octet);
249 }
250 return decoded;
251 }
252
253 @Override
254 void encode(final byte[] data, final int offset, final int length, final Context context) {
255 if (context.eof) {
256 return;
257 }
258 if (length < 0) {
259 context.eof = true;
260 return;
261 }
262 final int size = length * BYTES_PER_ENCODED_BLOCK;
263 if (size < 0) {
264 throw new IllegalArgumentException("Input length exceeds maximum size for encoded data: " + length);
265 }
266 final byte[] buffer = ensureBufferSize(size, context);
267 final int end = offset + length;
268 for (int i = offset; i < end; i++) {
269 final int value = data[i];
270 final int high = value >> BITS_PER_ENCODED_BYTE & MASK_4_BITS;
271 final int low = value & MASK_4_BITS;
272 buffer[context.pos++] = encodeTable[high];
273 buffer[context.pos++] = encodeTable[low];
274 }
275 }
276
277 /**
278 * Returns whether or not the {@code octet} is in the Base16 alphabet.
279 *
280 * @param octet The value to test.
281 * @return {@code true} if the value is defined in the Base16 alphabet {@code false} otherwise.
282 */
283 @Override
284 public boolean isInAlphabet(final byte octet) {
285 return (octet & 0xff) < decodeTable.length && decodeTable[octet] != -1;
286 }
287
288 /**
289 * Validates whether decoding allows an entire final trailing character that cannot be used for a complete byte.
290 *
291 * @throws IllegalArgumentException if strict decoding is enabled.
292 */
293 private void validateTrailingCharacter() {
294 if (isStrictDecoding()) {
295 throw new IllegalArgumentException("Strict decoding: Last encoded character is a valid Base 16 alphabet character but not a possible encoding. " +
296 "Decoding requires at least two characters to create one byte.");
297 }
298 }
299 }