View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.codec.binary;
19  
20  import java.util.Arrays;
21  
22  import org.apache.commons.codec.CodecPolicy;
23  
24  /**
25   * Provides Base16 encoding and decoding as defined by <a href="https://tools.ietf.org/html/rfc4648#section-8">RFC 4648 - 8. Base 16 Encoding</a>.
26   *
27   * <p>
28   * This class is thread-safe.
29   * </p>
30   * <p>
31   * This implementation strictly follows RFC 4648, and as such unlike the {@link Base32} and {@link Base64} implementations, it does not ignore invalid alphabet
32   * characters or whitespace, neither does it offer chunking or padding characters.
33   * </p>
34   * <p>
35   * The only additional feature above those specified in RFC 4648 is support for working with a lower-case alphabet in addition to the default upper-case
36   * alphabet.
37   * </p>
38   *
39   * @see Base16InputStream
40   * @see Base16OutputStream
41   * @see <a href="https://tools.ietf.org/html/rfc4648#section-8">RFC 4648 - 8. Base 16 Encoding</a>
42   * @since 1.15
43   */
44  public class Base16 extends BaseNCodec {
45  
46      /**
47       * Builds {@link Base16} instances.
48       *
49       * <p>
50       * To configure a new instance, use a {@link Builder}. For example:
51       * </p>
52       *
53       * <pre>
54       * Base16 Base16 = Base16.builder()
55       *   .setDecodingPolicy(DecodingPolicy.LENIENT) // default is lenient
56       *   .get()
57       * </pre>
58       *
59       * @since 1.20.0
60       */
61      public static class Builder extends AbstractBuilder<Base16, Builder> {
62  
63          /**
64           * Constructs a new instance.
65           */
66          public Builder() {
67              super(null);
68              setDecodeTable(UPPER_CASE_DECODE_TABLE);
69              setEncodeTable(UPPER_CASE_ENCODE_TABLE);
70              setEncodedBlockSize(BYTES_PER_ENCODED_BLOCK);
71              setUnencodedBlockSize(BYTES_PER_UNENCODED_BLOCK);
72              setLineLength(0);
73              setLineSeparator(EMPTY_BYTE_ARRAY);
74          }
75  
76          @Override
77          public Base16 get() {
78              return new Base16(this);
79          }
80  
81          @Override
82          public Builder setEncodeTable(final byte... encodeTable) {
83              super.setDecodeTableRaw(Arrays.equals(encodeTable, LOWER_CASE_ENCODE_TABLE) ? LOWER_CASE_DECODE_TABLE : UPPER_CASE_DECODE_TABLE);
84              return super.setEncodeTable(encodeTable);
85          }
86  
87          /**
88           * Sets whether to use the lower-case Base16 alphabet.
89           *
90           * @param lowerCase {@code true} to use the lower-case Base16 alphabet.
91           * @return {@code this} instance.
92           */
93          public Builder setLowerCase(final boolean lowerCase) {
94              setEncodeTableRaw(lowerCase ? LOWER_CASE_ENCODE_TABLE : UPPER_CASE_ENCODE_TABLE);
95              return asThis();
96          }
97  
98      }
99  
100     /**
101      * BASE16 characters are 4 bits in length. They are formed by taking an 8-bit group, which is converted into two BASE16 characters.
102      */
103     private static final int BITS_PER_ENCODED_BYTE = 4;
104 
105     private static final int BYTES_PER_ENCODED_BLOCK = 2;
106 
107     private static final int BYTES_PER_UNENCODED_BLOCK = 1;
108 
109     /**
110      * This array is a lookup table that translates Unicode characters drawn from the "Base16 Alphabet" (as specified in Table 5 of RFC 4648) into their 4-bit
111      * positive integer equivalents. Characters that are not in the Base16 alphabet but fall within the bounds of the array are translated to -1.
112      */
113     // @formatter:off
114     private static final byte[] UPPER_CASE_DECODE_TABLE = {
115             //  0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
116             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
117             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
118             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f
119              0,  1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -1, -1, -1, // 30-3f 0-9
120             -1, 10, 11, 12, 13, 14, 15                                      // 40-46 A-F
121     };
122     // @formatter:on
123 
124     /**
125      * This array is a lookup table that translates 4-bit positive integer index values into their "Base16 Alphabet" equivalents as specified in Table 5 of RFC
126      * 4648.
127      */
128     private static final byte[] UPPER_CASE_ENCODE_TABLE = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
129 
130     /**
131      * This array is a lookup table that translates Unicode characters drawn from the a lower-case "Base16 Alphabet" into their 4-bit positive integer
132      * equivalents. Characters that are not in the Base16 alphabet but fall within the bounds of the array are translated to -1.
133      */
134     // @formatter:off
135     private static final byte[] LOWER_CASE_DECODE_TABLE = {
136             //  0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
137             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
138             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
139             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f
140              0,  1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -1, -1, -1, // 30-3f 0-9
141             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 40-4f
142             -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 50-5f
143             -1, 10, 11, 12, 13, 14, 15                                      // 60-66 a-f
144     };
145     // @formatter:on
146 
147     /**
148      * This array is a lookup table that translates 4-bit positive integer index values into their "Base16 Alphabet" lower-case equivalents.
149      */
150     private static final byte[] LOWER_CASE_ENCODE_TABLE = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
151 
152     /** Mask used to extract 4 bits, used when decoding character. */
153     private static final int MASK_4_BITS = 0x0f;
154 
155     /**
156      * Constructs a new builder.
157      *
158      * @return a new builder.
159      * @since 1.20.0
160      */
161     public static Builder builder() {
162         return new Builder();
163     }
164 
165     /**
166      * Constructs a Base16 codec used for decoding and encoding.
167      */
168     public Base16() {
169         this(false);
170     }
171 
172     /**
173      * Constructs a Base16 codec used for decoding and encoding.
174      *
175      * @param lowerCase {@code true} to use the lower-case Base16 alphabet.
176      * @deprecated Use {@link #builder()} and {@link Builder}.
177      */
178     @Deprecated
179     public Base16(final boolean lowerCase) {
180         this(lowerCase, DECODING_POLICY_DEFAULT);
181     }
182 
183     /**
184      * Constructs a Base16 codec used for decoding and encoding.
185      *
186      * @param lowerCase      {@code true} to use the lower-case Base16 alphabet.
187      * @param decodingPolicy Decoding policy.
188      * @deprecated Use {@link #builder()} and {@link Builder}.
189      */
190     @Deprecated
191     public Base16(final boolean lowerCase, final CodecPolicy decodingPolicy) {
192         this(builder().setEncodeTable(lowerCase ? LOWER_CASE_ENCODE_TABLE : UPPER_CASE_ENCODE_TABLE).setDecodingPolicy(decodingPolicy));
193     }
194 
195     private Base16(final Builder builder) {
196         super(builder);
197     }
198 
199     @Override
200     void decode(final byte[] data, int offset, final int length, final Context context) {
201         if (context.eof || length < 0) {
202             context.eof = true;
203             if (context.ibitWorkArea != 0) {
204                 validateTrailingCharacter();
205             }
206             return;
207         }
208         final int dataLen = Math.min(data.length - offset, length);
209         final int availableChars = (context.ibitWorkArea != 0 ? 1 : 0) + dataLen;
210         // small optimization to short-cut the rest of this method when it is fed byte-by-byte
211         if (availableChars == 1 && availableChars == dataLen) {
212             // store 1/2 byte for next invocation of decode, we offset by +1 as empty-value is 0
213             context.ibitWorkArea = decodeOctet(data[offset]) + 1;
214             return;
215         }
216         // we must have an even number of chars to decode
217         final int charsToProcess = availableChars % BYTES_PER_ENCODED_BLOCK == 0 ? availableChars : availableChars - 1;
218         final int end = offset + dataLen;
219         final byte[] buffer = ensureBufferSize(charsToProcess / BYTES_PER_ENCODED_BLOCK, context);
220         int result;
221         if (dataLen < availableChars) {
222             // we have 1/2 byte from previous invocation to decode
223             result = context.ibitWorkArea - 1 << BITS_PER_ENCODED_BYTE;
224             result |= decodeOctet(data[offset++]);
225             buffer[context.pos++] = (byte) result;
226             // reset to empty-value for next invocation!
227             context.ibitWorkArea = 0;
228         }
229         final int loopEnd = end - 1;
230         while (offset < loopEnd) {
231             result = decodeOctet(data[offset++]) << BITS_PER_ENCODED_BYTE;
232             result |= decodeOctet(data[offset++]);
233             buffer[context.pos++] = (byte) result;
234         }
235         // we have one char of a hex-pair left over
236         if (offset < end) {
237             // store 1/2 byte for next invocation of decode, we offset by +1 as empty-value is 0
238             context.ibitWorkArea = decodeOctet(data[offset]) + 1;
239         }
240     }
241 
242     private int decodeOctet(final byte octet) {
243         int decoded = -1;
244         if ((octet & 0xff) < decodeTable.length) {
245             decoded = decodeTable[octet];
246         }
247         if (decoded == -1) {
248             throw new IllegalArgumentException("Invalid octet in encoded value: " + (int) octet);
249         }
250         return decoded;
251     }
252 
253     @Override
254     void encode(final byte[] data, final int offset, final int length, final Context context) {
255         if (context.eof) {
256             return;
257         }
258         if (length < 0) {
259             context.eof = true;
260             return;
261         }
262         final int size = length * BYTES_PER_ENCODED_BLOCK;
263         if (size < 0) {
264             throw new IllegalArgumentException("Input length exceeds maximum size for encoded data: " + length);
265         }
266         final byte[] buffer = ensureBufferSize(size, context);
267         final int end = offset + length;
268         for (int i = offset; i < end; i++) {
269             final int value = data[i];
270             final int high = value >> BITS_PER_ENCODED_BYTE & MASK_4_BITS;
271             final int low = value & MASK_4_BITS;
272             buffer[context.pos++] = encodeTable[high];
273             buffer[context.pos++] = encodeTable[low];
274         }
275     }
276 
277     /**
278      * Returns whether or not the {@code octet} is in the Base16 alphabet.
279      *
280      * @param octet The value to test.
281      * @return {@code true} if the value is defined in the Base16 alphabet {@code false} otherwise.
282      */
283     @Override
284     public boolean isInAlphabet(final byte octet) {
285         return (octet & 0xff) < decodeTable.length && decodeTable[octet] != -1;
286     }
287 
288     /**
289      * Validates whether decoding allows an entire final trailing character that cannot be used for a complete byte.
290      *
291      * @throws IllegalArgumentException if strict decoding is enabled.
292      */
293     private void validateTrailingCharacter() {
294         if (isStrictDecoding()) {
295             throw new IllegalArgumentException("Strict decoding: Last encoded character is a valid Base 16 alphabet character but not a possible encoding. " +
296                     "Decoding requires at least two characters to create one byte.");
297         }
298     }
299 }