001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      https://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.lang3.text;
018
019import java.util.Arrays;
020
021import org.apache.commons.lang3.ArraySorter;
022import org.apache.commons.lang3.ArrayUtils;
023import org.apache.commons.lang3.StringUtils;
024
025/**
026 * A matcher class that can be queried to determine if a character array
027 * portion matches.
028 * <p>
029 * This class comes complete with various factory methods.
030 * If these do not suffice, you can subclass and implement your own matcher.
031 * </p>
032 *
033 * @since 2.2
034 * @deprecated As of <a href="https://commons.apache.org/proper/commons-lang/changes-report.html#a3.6">3.6</a>, use Apache Commons Text
035 * <a href="https://commons.apache.org/proper/commons-text/javadocs/api-release/org/apache/commons/text/matcher/StringMatcherFactory.html">
036 * StringMatcherFactory</a>.
037 */
038@Deprecated
039public abstract class StrMatcher {
040
041    /**
042     * Class used to define a character for matching purposes.
043     */
044    static final class CharMatcher extends StrMatcher {
045
046        /** The character to match. */
047        private final char ch;
048
049        /**
050         * Constructor that creates a matcher that matches a single character.
051         *
052         * @param ch  the character to match.
053         */
054        CharMatcher(final char ch) {
055            this.ch = ch;
056        }
057
058        /**
059         * Tests whether or not the given character matches.
060         *
061         * @param buffer  the text content to match against, do not change.
062         * @param pos  the starting position for the match, valid for buffer.
063         * @param bufferStart  the first active index in the buffer, valid for buffer.
064         * @param bufferEnd  the end index of the active buffer, valid for buffer.
065         * @return the number of matching characters, zero for no match.
066         */
067        @Override
068        public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
069            return ch == buffer[pos] ? 1 : 0;
070        }
071    }
072
073    /**
074     * Class used to define a set of characters for matching purposes.
075     */
076    static final class CharSetMatcher extends StrMatcher {
077
078        /** The set of characters to match. */
079        private final char[] chars;
080
081        /**
082         * Constructor that creates a matcher from a character array.
083         *
084         * @param chars  the characters to match, must not be null.
085         */
086        CharSetMatcher(final char[] chars) {
087            this.chars = ArraySorter.sort(chars.clone());
088        }
089
090        /**
091         * Returns whether or not the given character matches.
092         *
093         * @param buffer  the text content to match against, do not change.
094         * @param pos  the starting position for the match, valid for buffer.
095         * @param bufferStart  the first active index in the buffer, valid for buffer.
096         * @param bufferEnd  the end index of the active buffer, valid for buffer.
097         * @return the number of matching characters, zero for no match.
098         */
099        @Override
100        public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
101            return Arrays.binarySearch(chars, buffer[pos]) >= 0 ? 1 : 0;
102        }
103    }
104
105    /**
106     * Class used to match no characters.
107     */
108    static final class NoMatcher extends StrMatcher {
109
110        /**
111         * Constructs a new instance of {@link NoMatcher}.
112         */
113        NoMatcher() {
114        }
115
116        /**
117         * Always returns {@code false}.
118         *
119         * @param buffer  the text content to match against, do not change.
120         * @param pos  the starting position for the match, valid for buffer.
121         * @param bufferStart  the first active index in the buffer, valid for buffer.
122         * @param bufferEnd  the end index of the active buffer, valid for buffer.
123         * @return the number of matching characters, zero for no match.
124         */
125        @Override
126        public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
127            return 0;
128        }
129    }
130
131    /**
132     * Class used to define a set of characters for matching purposes.
133     */
134    static final class StringMatcher extends StrMatcher {
135
136        /** The string to match, as a character array. */
137        private final char[] chars;
138
139        /**
140         * Constructor that creates a matcher from a String.
141         *
142         * @param str  the string to match, must not be null
143         */
144        StringMatcher(final String str) {
145            chars = str.toCharArray();
146        }
147
148        /**
149         * Tests whether or not the given text matches the stored string.
150         *
151         * @param buffer  the text content to match against, do not change.
152         * @param pos  the starting position for the match, valid for buffer.
153         * @param bufferStart  the first active index in the buffer, valid for buffer.
154         * @param bufferEnd  the end index of the active buffer, valid for buffer.
155         * @return the number of matching characters, zero for no match.
156         */
157        @Override
158        public int isMatch(final char[] buffer, int pos, final int bufferStart, final int bufferEnd) {
159            final int len = chars.length;
160            if (pos + len > bufferEnd) {
161                return 0;
162            }
163            for (int i = 0; i < chars.length; i++, pos++) {
164                if (chars[i] != buffer[pos]) {
165                    return 0;
166                }
167            }
168            return len;
169        }
170
171        @Override
172        public String toString() {
173            return super.toString() + ' ' + Arrays.toString(chars);
174        }
175
176    }
177
178    /**
179     * Class used to match whitespace as per trim().
180     */
181    static final class TrimMatcher extends StrMatcher {
182
183        /**
184         * Constructs a new instance of {@link TrimMatcher}.
185         */
186        TrimMatcher() {
187        }
188
189        /**
190         * Tests whether or not the given character matches.
191         *
192         * @param buffer  the text content to match against, do not change.
193         * @param pos  the starting position for the match, valid for buffer.
194         * @param bufferStart  the first active index in the buffer, valid for buffer.
195         * @param bufferEnd  the end index of the active buffer, valid for buffer.
196         * @return the number of matching characters, zero for no match.
197         */
198        @Override
199        public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
200            return buffer[pos] <= 32 ? 1 : 0;
201        }
202    }
203
204    /**
205     * Matches the comma character.
206     */
207    private static final StrMatcher COMMA_MATCHER = new CharMatcher(',');
208
209    /**
210     * Matches the tab character.
211     */
212    private static final StrMatcher TAB_MATCHER = new CharMatcher('\t');
213
214    /**
215     * Matches the space character.
216     */
217    private static final StrMatcher SPACE_MATCHER = new CharMatcher(' ');
218
219    /**
220     * Matches the same characters as StringTokenizer,
221     * namely space, tab, newline, formfeed.
222     */
223    private static final StrMatcher SPLIT_MATCHER = new CharSetMatcher(" \t\n\r\f".toCharArray());
224
225    /**
226     * Matches the String trim() whitespace characters.
227     */
228    private static final StrMatcher TRIM_MATCHER = new TrimMatcher();
229
230    /**
231     * Matches the double quote character.
232     */
233    private static final StrMatcher SINGLE_QUOTE_MATCHER = new CharMatcher('\'');
234
235    /**
236     * Matches the double quote character.
237     */
238    private static final StrMatcher DOUBLE_QUOTE_MATCHER = new CharMatcher('"');
239
240    /**
241     * Matches the single or double quote character.
242     */
243    private static final StrMatcher QUOTE_MATCHER = new CharSetMatcher("'\"".toCharArray());
244
245    /**
246     * Matches no characters.
247     */
248    private static final StrMatcher NONE_MATCHER = new NoMatcher();
249
250    /**
251     * Creates a matcher from a character.
252     *
253     * @param ch  the character to match, must not be null.
254     * @return a new Matcher for the given char.
255     */
256    public static StrMatcher charMatcher(final char ch) {
257        return new CharMatcher(ch);
258    }
259
260    /**
261     * Creates a matcher from a set of characters.
262     *
263     * @param chars  the characters to match, null or empty matches nothing.
264     * @return a new matcher for the given char[].
265     */
266    public static StrMatcher charSetMatcher(final char... chars) {
267        if (ArrayUtils.isEmpty(chars)) {
268            return NONE_MATCHER;
269        }
270        if (chars.length == 1) {
271            return new CharMatcher(chars[0]);
272        }
273        return new CharSetMatcher(chars);
274    }
275
276    /**
277     * Creates a matcher from a string representing a set of characters.
278     *
279     * @param chars  the characters to match, null or empty matches nothing.
280     * @return a new Matcher for the given characters.
281     */
282    public static StrMatcher charSetMatcher(final String chars) {
283        if (StringUtils.isEmpty(chars)) {
284            return NONE_MATCHER;
285        }
286        if (chars.length() == 1) {
287            return new CharMatcher(chars.charAt(0));
288        }
289        return new CharSetMatcher(chars.toCharArray());
290    }
291
292    /**
293     * Gets the matcher for the comma character.
294     *
295     * @return the matcher for a comma.
296     */
297    public static StrMatcher commaMatcher() {
298        return COMMA_MATCHER;
299    }
300
301    /**
302     * Gets the matcher for the double quote character.
303     *
304     * @return the matcher for a double quote.
305     */
306    public static StrMatcher doubleQuoteMatcher() {
307        return DOUBLE_QUOTE_MATCHER;
308    }
309
310    /**
311     * Gets the matcher for no characters.
312     *
313     * @return the matcher that matches nothing.
314     */
315    public static StrMatcher noneMatcher() {
316        return NONE_MATCHER;
317    }
318
319    /**
320     * Gets the matcher for the single or double quote character.
321     *
322     * @return the matcher for a single or double quote.
323     */
324    public static StrMatcher quoteMatcher() {
325        return QUOTE_MATCHER;
326    }
327
328    /**
329     * Gets the matcher for the single quote character.
330     *
331     * @return the matcher for a single quote.
332     */
333    public static StrMatcher singleQuoteMatcher() {
334        return SINGLE_QUOTE_MATCHER;
335    }
336
337    /**
338     * Gets the matcher for the space character.
339     *
340     * @return the matcher for a space.
341     */
342    public static StrMatcher spaceMatcher() {
343        return SPACE_MATCHER;
344    }
345
346    /**
347     * Gets the matcher for the same characters as StringTokenizer,
348     * namely space, tab, newline and form-feed.
349     *
350     * @return the split matcher.
351     */
352    public static StrMatcher splitMatcher() {
353        return SPLIT_MATCHER;
354    }
355
356    /**
357     * Creates a matcher for a string.
358     *
359     * @param str  the string to match, null or empty matches nothing.
360     * @return a new Matcher for the given String.
361     */
362    public static StrMatcher stringMatcher(final String str) {
363        if (StringUtils.isEmpty(str)) {
364            return NONE_MATCHER;
365        }
366        return new StringMatcher(str);
367    }
368
369    /**
370     * Gets the matcher for the tab character.
371     *
372     * @return the matcher for a tab.
373     */
374    public static StrMatcher tabMatcher() {
375        return TAB_MATCHER;
376    }
377
378    /**
379     * Gets the matcher to String trim() whitespace characters.
380     *
381     * @return the trim matcher.
382     */
383    public static StrMatcher trimMatcher() {
384        return TRIM_MATCHER;
385    }
386
387    /**
388     * Constructs a new instance.
389     */
390    protected StrMatcher() {
391    }
392
393    /**
394     * Tests whether the number of matching characters, zero for no match.
395     * <p>
396     * This method is called to check for a match.
397     * The parameter {@code pos} represents the current position to be
398     * checked in the string {@code buffer} (a character array which must
399     * not be changed).
400     * The API guarantees that {@code pos} is a valid index for {@code buffer}.
401     * </p>
402     * <p>
403     * The matching code may check one character or many.
404     * It may check characters preceding {@code pos} as well as those after.
405     * </p>
406     * <p>
407     * It must return zero for no match, or a positive number if a match was found.
408     * The number indicates the number of characters that matched.
409     * </p>
410     *
411     * @param buffer  the text content to match against, do not change.
412     * @param pos  the starting position for the match, valid for buffer.
413     * @return the number of matching characters, zero for no match.
414     * @since 2.4
415     */
416    public int isMatch(final char[] buffer, final int pos) {
417        return isMatch(buffer, pos, 0, buffer.length);
418    }
419
420    /**
421     * Tests whether the number of matching characters, zero for no match.
422     * <p>
423     * This method is called to check for a match.
424     * The parameter {@code pos} represents the current position to be
425     * checked in the string {@code buffer} (a character array which must
426     * not be changed).
427     * The API guarantees that {@code pos} is a valid index for {@code buffer}.
428     * </p>
429     * <p>
430     * The character array may be larger than the active area to be matched.
431     * Only values in the buffer between the specified indices may be accessed.
432     * </p>
433     * <p>
434     * The matching code may check one character or many.
435     * It may check characters preceding {@code pos} as well as those
436     * after, so long as no checks exceed the bounds specified.
437     * </p>
438     * <p>
439     * It must return zero for no match, or a positive number if a match was found.
440     * The number indicates the number of characters that matched.
441     * </p>
442     *
443     * @param buffer  the text content to match against, do not change.
444     * @param pos  the starting position for the match, valid for buffer.
445     * @param bufferStart  the first active index in the buffer, valid for buffer.
446     * @param bufferEnd  the end index (exclusive) of the active buffer, valid for buffer.
447     * @return the number of matching characters, zero for no match.
448     */
449    public abstract int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd);
450
451}