View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.imaging.common;
18  
19  import java.io.ByteArrayInputStream;
20  import java.io.ByteArrayOutputStream;
21  import java.io.IOException;
22  import java.io.InputStream;
23  import java.io.PushbackInputStream;
24  import java.util.Map;
25  
26  import org.apache.commons.imaging.ImagingException;
27  import org.apache.commons.lang3.StringUtils;
28  
29  /**
30   * A rudimentary preprocessor and parser for the C programming language.
31   *
32   * FIXME replace this by a parser generated via ANTLR (if we really need it?!)
33   */
34  public class BasicCParser {
35      /**
36       * Parses the hexadecimal-base escape-sequence found at index {@code i} of {@code string}.
37       *
38       * <p>
39       * Helper-function for {@code unescapeString()}.
40       * </p>
41       *
42       * @param i             the index of the escape-sequence in the string
43       * @param stringBuilder the stringBuilder to append the escape-char to
44       * @param string        the string whose chars are parsed
45       * @return the new index i
46       * @since 1.0-alpha3
47       */
48      private static int appendHex(int i, final StringBuilder stringBuilder, final String string) throws ImagingException {
49          if (i + 2 >= string.length()) {
50              throw new ImagingException("Parsing XPM file failed, " + "hex constant in string too short");
51          }
52          final char hex1 = string.charAt(i + 1);
53          final char hex2 = string.charAt(i + 2);
54          i += 2;
55          final int constant;
56          try {
57              constant = Integer.parseInt(hex1 + Character.toString(hex2), 16);
58          } catch (final NumberFormatException nfe) {
59              throw new ImagingException("Parsing XPM file failed, " + "hex constant invalid", nfe);
60          }
61          stringBuilder.append((char) constant);
62          return i;
63      }
64  
65      /**
66       * Parses the octal-base escape-sequence found at index {@code i} of {@code string}.
67       *
68       * <p>
69       * Helper-function for {@code unescapeString()}.
70       * </p>
71       *
72       * @param i             the index of the escape-sequence in the string
73       * @param stringBuilder the stringBuilder to append the escape-char to
74       * @param string        the string whose chars are parsed
75       * @return the new index i
76       * @since 1.0-alpha3
77       */
78      private static int appendOct(int i, final StringBuilder stringBuilder, final String string) {
79          int length = 1;
80          if (i + 1 < string.length() && '0' <= string.charAt(i + 1) && string.charAt(i + 1) <= '7') {
81              ++length;
82          }
83          if (i + 2 < string.length() && '0' <= string.charAt(i + 2) && string.charAt(i + 2) <= '7') {
84              ++length;
85          }
86          int constant = 0;
87          for (int j = 0; j < length; j++) {
88              constant *= 8;
89              constant += string.charAt(i + j) - '0';
90          }
91          i += length - 1;
92          stringBuilder.append((char) constant);
93          return i;
94      }
95  
96      /**
97       * Parses the {@code i:th} escape-char in the input {@code string} and appends it to {@code stringBuilder}.
98       *
99       * <p>
100      * Helper-function for {@code unescapeString()}.
101      * </p>
102      *
103      * @param i             the index of the escape-char in the string
104      * @param stringBuilder the stringBuilder to append the escape-char to
105      * @param string        the string whose chars are parsed
106      * @return the new index i
107      * @since 1.0-alpha3
108      */
109     private static int parseEscape(int i, final StringBuilder stringBuilder, final String string) throws ImagingException {
110         final char c = string.charAt(i);
111         switch (c) {
112         case '\\':
113             stringBuilder.append('\\');
114             break;
115         case '"':
116             stringBuilder.append('"');
117             break;
118         case '\'':
119             stringBuilder.append('\'');
120             break;
121         case 'x':
122             i = appendHex(i, stringBuilder, string);
123             break;
124         case '0':
125         case '1':
126         case '2':
127         case '3':
128         case '4':
129         case '5':
130         case '6':
131         case '7':
132             i = appendOct(i, stringBuilder, string);
133             break;
134         case 'a':
135             stringBuilder.append((char) 0x07);
136             break;
137         case 'b':
138             stringBuilder.append((char) 0x08);
139             break;
140         case 'f':
141             stringBuilder.append((char) 0x0c);
142             break;
143         case 'n':
144             stringBuilder.append((char) 0x0a);
145             break;
146         case 'r':
147             stringBuilder.append((char) 0x0d);
148             break;
149         case 't':
150             stringBuilder.append((char) 0x09);
151             break;
152         case 'v':
153             stringBuilder.append((char) 0x0b);
154             break;
155         default:
156             throw new ImagingException("Parsing XPM file failed, " + "invalid escape sequence");
157         }
158         return i;
159 
160     }
161 
162     public static ByteArrayOutputStream preprocess(final InputStream is, final StringBuilder firstComment, final Map<String, String> defines)
163             throws IOException, ImagingException {
164         boolean inSingleQuotes = false;
165         boolean inString = false;
166         boolean inComment = false;
167         boolean inDirective = false;
168         boolean hadSlash = false;
169         boolean hadStar = false;
170         boolean hadBackSlash = false;
171         final ByteArrayOutputStream out = new ByteArrayOutputStream();
172         boolean seenFirstComment = firstComment == null;
173         final StringBuilder directiveBuffer = new StringBuilder();
174         for (int c = is.read(); c != -1; c = is.read()) {
175             if (inComment) {
176                 if (c == '*') {
177                     if (hadStar && !seenFirstComment) {
178                         firstComment.append('*');
179                     }
180                     hadStar = true;
181                 } else if (c == '/') {
182                     if (hadStar) {
183                         hadStar = false;
184                         inComment = false;
185                         seenFirstComment = true;
186                     } else if (!seenFirstComment) {
187                         firstComment.append((char) c);
188                     }
189                 } else {
190                     if (hadStar && !seenFirstComment) {
191                         firstComment.append('*');
192                     }
193                     hadStar = false;
194                     if (!seenFirstComment) {
195                         firstComment.append((char) c);
196                     }
197                 }
198             } else if (inSingleQuotes) {
199                 switch (c) {
200                 case '\\':
201                     if (hadBackSlash) {
202                         out.write('\\');
203                         out.write('\\');
204                         hadBackSlash = false;
205                     } else {
206                         hadBackSlash = true;
207                     }
208                     break;
209                 case '\'':
210                     if (hadBackSlash) {
211                         out.write('\\');
212                         hadBackSlash = false;
213                     } else {
214                         inSingleQuotes = false;
215                     }
216                     out.write('\'');
217                     break;
218                 case '\r':
219                 case '\n':
220                     throw new ImagingException("Unterminated single quote in file");
221                 default:
222                     if (hadBackSlash) {
223                         out.write('\\');
224                         hadBackSlash = false;
225                     }
226                     out.write(c);
227                     break;
228                 }
229             } else if (inString) {
230                 switch (c) {
231                 case '\\':
232                     if (hadBackSlash) {
233                         out.write('\\');
234                         out.write('\\');
235                         hadBackSlash = false;
236                     } else {
237                         hadBackSlash = true;
238                     }
239                     break;
240                 case '"':
241                     if (hadBackSlash) {
242                         out.write('\\');
243                         hadBackSlash = false;
244                     } else {
245                         inString = false;
246                     }
247                     out.write('"');
248                     break;
249                 case '\r':
250                 case '\n':
251                     throw new ImagingException("Unterminated string in file");
252                 default:
253                     if (hadBackSlash) {
254                         out.write('\\');
255                         hadBackSlash = false;
256                     }
257                     out.write(c);
258                     break;
259                 }
260             } else if (inDirective) {
261                 if (c == '\r' || c == '\n') {
262                     inDirective = false;
263                     final String[] tokens = tokenizeRow(directiveBuffer.toString());
264                     if (tokens.length < 2 || tokens.length > 3) {
265                         throw new ImagingException("Bad preprocessor directive");
266                     }
267                     if (!tokens[0].equals("define")) {
268                         throw new ImagingException("Invalid/unsupported " + "preprocessor directive '" + tokens[0] + "'");
269                     }
270                     defines.put(tokens[1], tokens.length == 3 ? tokens[2] : null);
271                     directiveBuffer.setLength(0);
272                 } else {
273                     directiveBuffer.append((char) c);
274                 }
275             } else {
276                 switch (c) {
277                 case '/':
278                     if (hadSlash) {
279                         out.write('/');
280                     }
281                     hadSlash = true;
282                     break;
283                 case '*':
284                     if (hadSlash) {
285                         inComment = true;
286                         hadSlash = false;
287                     } else {
288                         out.write(c);
289                     }
290                     break;
291                 case '\'':
292                     if (hadSlash) {
293                         out.write('/');
294                     }
295                     hadSlash = false;
296                     out.write(c);
297                     inSingleQuotes = true;
298                     break;
299                 case '"':
300                     if (hadSlash) {
301                         out.write('/');
302                     }
303                     hadSlash = false;
304                     out.write(c);
305                     inString = true;
306                     break;
307                 case '#':
308                     if (defines == null) {
309                         throw new ImagingException("Unexpected preprocessor directive");
310                     }
311                     inDirective = true;
312                     break;
313                 default:
314                     if (hadSlash) {
315                         out.write('/');
316                     }
317                     hadSlash = false;
318                     out.write(c);
319                     // Only whitespace allowed before first comment:
320                     if (c != ' ' && c != '\t' && c != '\r' && c != '\n') {
321                         seenFirstComment = true;
322                     }
323                     break;
324                 }
325             }
326         }
327         if (hadSlash) {
328             out.write('/');
329         }
330         if (hadStar) {
331             out.write('*');
332         }
333         if (inString) {
334             throw new ImagingException("Unterminated string at the end of file");
335         }
336         if (inComment) {
337             throw new ImagingException("Unterminated comment at the end of file");
338         }
339         return out;
340     }
341 
342     public static String[] tokenizeRow(final String row) {
343         final String[] tokens = row.split("[ \t]");
344         int numLiveTokens = 0;
345         for (final String token : tokens) {
346             if (StringUtils.isNotEmpty(token)) {
347                 ++numLiveTokens;
348             }
349         }
350         final String[] liveTokens = Allocator.array(numLiveTokens, String[]::new, 24);
351         int next = 0;
352         for (final String token : tokens) {
353             if (StringUtils.isNotEmpty(token)) {
354                 liveTokens[next++] = token;
355             }
356         }
357         return liveTokens;
358     }
359 
360     public static void unescapeString(final StringBuilder stringBuilder, final String string) throws ImagingException {
361         if (string.length() < 2) {
362             throw new ImagingException("Parsing XPM file failed, " + "string is too short");
363         }
364         if (string.charAt(0) != '"' || string.charAt(string.length() - 1) != '"') {
365             throw new ImagingException("Parsing XPM file failed, " + "string not surrounded by '\"'");
366         }
367         boolean hadBackSlash = false;
368         for (int i = 1; i < string.length() - 1; i++) {
369             final char c = string.charAt(i);
370             if (hadBackSlash) {
371                 i = parseEscape(i, stringBuilder, string);
372                 hadBackSlash = false;
373             } else if (c == '\\') {
374                 hadBackSlash = true;
375             } else if (c == '"') {
376                 throw new ImagingException("Parsing XPM file failed, " + "extra '\"' found in string");
377             } else {
378                 stringBuilder.append(c);
379             }
380         }
381         if (hadBackSlash) {
382             throw new ImagingException("Parsing XPM file failed, " + "unterminated escape sequence found in string");
383         }
384     }
385 
386     private final PushbackInputStream is;
387 
388     public BasicCParser(final ByteArrayInputStream is) {
389         this.is = new PushbackInputStream(is);
390     }
391 
392     public String nextToken() throws IOException, ImagingException {
393         // I don't know how complete the C parsing in an XPM file
394         // is meant to be, this is just the very basics...
395 
396         boolean inString = false;
397         boolean inIdentifier = false;
398         boolean hadBackSlash = false;
399         final StringBuilder token = new StringBuilder();
400         for (int c = is.read(); c != -1; c = is.read()) {
401             if (inString) {
402                 switch (c) {
403                 case '\\':
404                     token.append('\\');
405                     hadBackSlash = !hadBackSlash;
406                     break;
407                 case '"':
408                     token.append('"');
409                     if (!hadBackSlash) {
410                         return token.toString();
411                     }
412                     hadBackSlash = false;
413                     break;
414                 case '\r':
415                 case '\n':
416                     throw new ImagingException("Unterminated string in XPM file");
417                 default:
418                     token.append((char) c);
419                     hadBackSlash = false;
420                     break;
421                 }
422             } else if (inIdentifier) {
423                 if (!Character.isLetterOrDigit(c) && c != '_') {
424                     is.unread(c);
425                     return token.toString();
426                 }
427                 token.append((char) c);
428             } else if (c == '"') {
429                 token.append('"');
430                 inString = true;
431             } else if (Character.isLetterOrDigit(c) || c == '_') {
432                 token.append((char) c);
433                 inIdentifier = true;
434             } else if (c == '{' || c == '}' || c == '[' || c == ']' || c == '*' || c == ';' || c == '=' || c == ',') {
435                 token.append((char) c);
436                 return token.toString();
437             } else if (c == ' ' || c == '\t' || c == '\r' || c == '\n') { // NOPMD
438                 // ignore
439             } else {
440                 throw new ImagingException("Unhandled/invalid character '" + (char) c + "' found in XPM file");
441             }
442         }
443 
444         if (inIdentifier) {
445             return token.toString();
446         }
447         if (inString) {
448             throw new ImagingException("Unterminated string ends XMP file");
449         }
450         return null;
451     }
452 
453 }