View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.lang3.text;
18  
19  import java.util.Arrays;
20  
21  import org.apache.commons.lang3.ArraySorter;
22  import org.apache.commons.lang3.ArrayUtils;
23  import org.apache.commons.lang3.StringUtils;
24  
25  /**
26   * A matcher class that can be queried to determine if a character array
27   * portion matches.
28   * <p>
29   * This class comes complete with various factory methods.
30   * If these do not suffice, you can subclass and implement your own matcher.
31   * </p>
32   *
33   * @since 2.2
34   * @deprecated As of <a href="https://commons.apache.org/proper/commons-lang/changes-report.html#a3.6">3.6</a>, use Apache Commons Text
35   * <a href="https://commons.apache.org/proper/commons-text/javadocs/api-release/org/apache/commons/text/matcher/StringMatcherFactory.html">
36   * StringMatcherFactory</a>.
37   */
38  @Deprecated
39  public abstract class StrMatcher {
40  
41      /**
42       * Class used to define a character for matching purposes.
43       */
44      static final class CharMatcher extends StrMatcher {
45  
46          /** The character to match. */
47          private final char ch;
48  
49          /**
50           * Constructor that creates a matcher that matches a single character.
51           *
52           * @param ch  the character to match.
53           */
54          CharMatcher(final char ch) {
55              this.ch = ch;
56          }
57  
58          /**
59           * Tests whether or not the given character matches.
60           *
61           * @param buffer  the text content to match against, do not change.
62           * @param pos  the starting position for the match, valid for buffer.
63           * @param bufferStart  the first active index in the buffer, valid for buffer.
64           * @param bufferEnd  the end index of the active buffer, valid for buffer.
65           * @return the number of matching characters, zero for no match.
66           */
67          @Override
68          public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
69              return ch == buffer[pos] ? 1 : 0;
70          }
71      }
72  
73      /**
74       * Class used to define a set of characters for matching purposes.
75       */
76      static final class CharSetMatcher extends StrMatcher {
77  
78          /** The set of characters to match. */
79          private final char[] chars;
80  
81          /**
82           * Constructor that creates a matcher from a character array.
83           *
84           * @param chars  the characters to match, must not be null.
85           */
86          CharSetMatcher(final char[] chars) {
87              this.chars = ArraySorter.sort(chars.clone());
88          }
89  
90          /**
91           * Returns whether or not the given character matches.
92           *
93           * @param buffer  the text content to match against, do not change.
94           * @param pos  the starting position for the match, valid for buffer.
95           * @param bufferStart  the first active index in the buffer, valid for buffer.
96           * @param bufferEnd  the end index of the active buffer, valid for buffer.
97           * @return the number of matching characters, zero for no match.
98           */
99          @Override
100         public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
101             return Arrays.binarySearch(chars, buffer[pos]) >= 0 ? 1 : 0;
102         }
103     }
104 
105     /**
106      * Class used to match no characters.
107      */
108     static final class NoMatcher extends StrMatcher {
109 
110         /**
111          * Constructs a new instance of {@link NoMatcher}.
112          */
113         NoMatcher() {
114         }
115 
116         /**
117          * Always returns {@code false}.
118          *
119          * @param buffer  the text content to match against, do not change.
120          * @param pos  the starting position for the match, valid for buffer.
121          * @param bufferStart  the first active index in the buffer, valid for buffer.
122          * @param bufferEnd  the end index of the active buffer, valid for buffer.
123          * @return the number of matching characters, zero for no match.
124          */
125         @Override
126         public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
127             return 0;
128         }
129     }
130 
131     /**
132      * Class used to define a set of characters for matching purposes.
133      */
134     static final class StringMatcher extends StrMatcher {
135 
136         /** The string to match, as a character array. */
137         private final char[] chars;
138 
139         /**
140          * Constructor that creates a matcher from a String.
141          *
142          * @param str  the string to match, must not be null
143          */
144         StringMatcher(final String str) {
145             chars = str.toCharArray();
146         }
147 
148         /**
149          * Tests whether or not the given text matches the stored string.
150          *
151          * @param buffer  the text content to match against, do not change.
152          * @param pos  the starting position for the match, valid for buffer.
153          * @param bufferStart  the first active index in the buffer, valid for buffer.
154          * @param bufferEnd  the end index of the active buffer, valid for buffer.
155          * @return the number of matching characters, zero for no match.
156          */
157         @Override
158         public int isMatch(final char[] buffer, int pos, final int bufferStart, final int bufferEnd) {
159             final int len = chars.length;
160             if (pos + len > bufferEnd) {
161                 return 0;
162             }
163             for (int i = 0; i < chars.length; i++, pos++) {
164                 if (chars[i] != buffer[pos]) {
165                     return 0;
166                 }
167             }
168             return len;
169         }
170 
171         @Override
172         public String toString() {
173             return super.toString() + ' ' + Arrays.toString(chars);
174         }
175 
176     }
177 
178     /**
179      * Class used to match whitespace as per trim().
180      */
181     static final class TrimMatcher extends StrMatcher {
182 
183         /**
184          * Constructs a new instance of {@link TrimMatcher}.
185          */
186         TrimMatcher() {
187         }
188 
189         /**
190          * Tests whether or not the given character matches.
191          *
192          * @param buffer  the text content to match against, do not change.
193          * @param pos  the starting position for the match, valid for buffer.
194          * @param bufferStart  the first active index in the buffer, valid for buffer.
195          * @param bufferEnd  the end index of the active buffer, valid for buffer.
196          * @return the number of matching characters, zero for no match.
197          */
198         @Override
199         public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
200             return buffer[pos] <= 32 ? 1 : 0;
201         }
202     }
203 
204     /**
205      * Matches the comma character.
206      */
207     private static final StrMatcher COMMA_MATCHER = new CharMatcher(',');
208 
209     /**
210      * Matches the tab character.
211      */
212     private static final StrMatcher TAB_MATCHER = new CharMatcher('\t');
213 
214     /**
215      * Matches the space character.
216      */
217     private static final StrMatcher SPACE_MATCHER = new CharMatcher(' ');
218 
219     /**
220      * Matches the same characters as StringTokenizer,
221      * namely space, tab, newline, formfeed.
222      */
223     private static final StrMatcher SPLIT_MATCHER = new CharSetMatcher(" \t\n\r\f".toCharArray());
224 
225     /**
226      * Matches the String trim() whitespace characters.
227      */
228     private static final StrMatcher TRIM_MATCHER = new TrimMatcher();
229 
230     /**
231      * Matches the double quote character.
232      */
233     private static final StrMatcher SINGLE_QUOTE_MATCHER = new CharMatcher('\'');
234 
235     /**
236      * Matches the double quote character.
237      */
238     private static final StrMatcher DOUBLE_QUOTE_MATCHER = new CharMatcher('"');
239 
240     /**
241      * Matches the single or double quote character.
242      */
243     private static final StrMatcher QUOTE_MATCHER = new CharSetMatcher("'\"".toCharArray());
244 
245     /**
246      * Matches no characters.
247      */
248     private static final StrMatcher NONE_MATCHER = new NoMatcher();
249 
250     /**
251      * Creates a matcher from a character.
252      *
253      * @param ch  the character to match, must not be null.
254      * @return a new Matcher for the given char.
255      */
256     public static StrMatcher charMatcher(final char ch) {
257         return new CharMatcher(ch);
258     }
259 
260     /**
261      * Creates a matcher from a set of characters.
262      *
263      * @param chars  the characters to match, null or empty matches nothing.
264      * @return a new matcher for the given char[].
265      */
266     public static StrMatcher charSetMatcher(final char... chars) {
267         if (ArrayUtils.isEmpty(chars)) {
268             return NONE_MATCHER;
269         }
270         if (chars.length == 1) {
271             return new CharMatcher(chars[0]);
272         }
273         return new CharSetMatcher(chars);
274     }
275 
276     /**
277      * Creates a matcher from a string representing a set of characters.
278      *
279      * @param chars  the characters to match, null or empty matches nothing.
280      * @return a new Matcher for the given characters.
281      */
282     public static StrMatcher charSetMatcher(final String chars) {
283         if (StringUtils.isEmpty(chars)) {
284             return NONE_MATCHER;
285         }
286         if (chars.length() == 1) {
287             return new CharMatcher(chars.charAt(0));
288         }
289         return new CharSetMatcher(chars.toCharArray());
290     }
291 
292     /**
293      * Gets the matcher for the comma character.
294      *
295      * @return the matcher for a comma.
296      */
297     public static StrMatcher commaMatcher() {
298         return COMMA_MATCHER;
299     }
300 
301     /**
302      * Gets the matcher for the double quote character.
303      *
304      * @return the matcher for a double quote.
305      */
306     public static StrMatcher doubleQuoteMatcher() {
307         return DOUBLE_QUOTE_MATCHER;
308     }
309 
310     /**
311      * Gets the matcher for no characters.
312      *
313      * @return the matcher that matches nothing.
314      */
315     public static StrMatcher noneMatcher() {
316         return NONE_MATCHER;
317     }
318 
319     /**
320      * Gets the matcher for the single or double quote character.
321      *
322      * @return the matcher for a single or double quote.
323      */
324     public static StrMatcher quoteMatcher() {
325         return QUOTE_MATCHER;
326     }
327 
328     /**
329      * Gets the matcher for the single quote character.
330      *
331      * @return the matcher for a single quote.
332      */
333     public static StrMatcher singleQuoteMatcher() {
334         return SINGLE_QUOTE_MATCHER;
335     }
336 
337     /**
338      * Gets the matcher for the space character.
339      *
340      * @return the matcher for a space.
341      */
342     public static StrMatcher spaceMatcher() {
343         return SPACE_MATCHER;
344     }
345 
346     /**
347      * Gets the matcher for the same characters as StringTokenizer,
348      * namely space, tab, newline and form-feed.
349      *
350      * @return the split matcher.
351      */
352     public static StrMatcher splitMatcher() {
353         return SPLIT_MATCHER;
354     }
355 
356     /**
357      * Creates a matcher for a string.
358      *
359      * @param str  the string to match, null or empty matches nothing.
360      * @return a new Matcher for the given String.
361      */
362     public static StrMatcher stringMatcher(final String str) {
363         if (StringUtils.isEmpty(str)) {
364             return NONE_MATCHER;
365         }
366         return new StringMatcher(str);
367     }
368 
369     /**
370      * Gets the matcher for the tab character.
371      *
372      * @return the matcher for a tab.
373      */
374     public static StrMatcher tabMatcher() {
375         return TAB_MATCHER;
376     }
377 
378     /**
379      * Gets the matcher to String trim() whitespace characters.
380      *
381      * @return the trim matcher.
382      */
383     public static StrMatcher trimMatcher() {
384         return TRIM_MATCHER;
385     }
386 
387     /**
388      * Constructs a new instance.
389      */
390     protected StrMatcher() {
391     }
392 
393     /**
394      * Tests whether the number of matching characters, zero for no match.
395      * <p>
396      * This method is called to check for a match.
397      * The parameter {@code pos} represents the current position to be
398      * checked in the string {@code buffer} (a character array which must
399      * not be changed).
400      * The API guarantees that {@code pos} is a valid index for {@code buffer}.
401      * </p>
402      * <p>
403      * The matching code may check one character or many.
404      * It may check characters preceding {@code pos} as well as those after.
405      * </p>
406      * <p>
407      * It must return zero for no match, or a positive number if a match was found.
408      * The number indicates the number of characters that matched.
409      * </p>
410      *
411      * @param buffer  the text content to match against, do not change.
412      * @param pos  the starting position for the match, valid for buffer.
413      * @return the number of matching characters, zero for no match.
414      * @since 2.4
415      */
416     public int isMatch(final char[] buffer, final int pos) {
417         return isMatch(buffer, pos, 0, buffer.length);
418     }
419 
420     /**
421      * Tests whether the number of matching characters, zero for no match.
422      * <p>
423      * This method is called to check for a match.
424      * The parameter {@code pos} represents the current position to be
425      * checked in the string {@code buffer} (a character array which must
426      * not be changed).
427      * The API guarantees that {@code pos} is a valid index for {@code buffer}.
428      * </p>
429      * <p>
430      * The character array may be larger than the active area to be matched.
431      * Only values in the buffer between the specified indices may be accessed.
432      * </p>
433      * <p>
434      * The matching code may check one character or many.
435      * It may check characters preceding {@code pos} as well as those
436      * after, so long as no checks exceed the bounds specified.
437      * </p>
438      * <p>
439      * It must return zero for no match, or a positive number if a match was found.
440      * The number indicates the number of characters that matched.
441      * </p>
442      *
443      * @param buffer  the text content to match against, do not change.
444      * @param pos  the starting position for the match, valid for buffer.
445      * @param bufferStart  the first active index in the buffer, valid for buffer.
446      * @param bufferEnd  the end index (exclusive) of the active buffer, valid for buffer.
447      * @return the number of matching characters, zero for no match.
448      */
449     public abstract int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd);
450 
451 }