1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * https://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.lang3.text;
18
19 import java.util.Arrays;
20
21 import org.apache.commons.lang3.ArraySorter;
22 import org.apache.commons.lang3.ArrayUtils;
23 import org.apache.commons.lang3.StringUtils;
24
25 /**
26 * A matcher class that can be queried to determine if a character array
27 * portion matches.
28 * <p>
29 * This class comes complete with various factory methods.
30 * If these do not suffice, you can subclass and implement your own matcher.
31 * </p>
32 *
33 * @since 2.2
34 * @deprecated As of <a href="https://commons.apache.org/proper/commons-lang/changes-report.html#a3.6">3.6</a>, use Apache Commons Text
35 * <a href="https://commons.apache.org/proper/commons-text/javadocs/api-release/org/apache/commons/text/matcher/StringMatcherFactory.html">
36 * StringMatcherFactory</a>.
37 */
38 @Deprecated
39 public abstract class StrMatcher {
40
41 /**
42 * Class used to define a character for matching purposes.
43 */
44 static final class CharMatcher extends StrMatcher {
45
46 /** The character to match. */
47 private final char ch;
48
49 /**
50 * Constructor that creates a matcher that matches a single character.
51 *
52 * @param ch the character to match.
53 */
54 CharMatcher(final char ch) {
55 this.ch = ch;
56 }
57
58 /**
59 * Tests whether or not the given character matches.
60 *
61 * @param buffer the text content to match against, do not change.
62 * @param pos the starting position for the match, valid for buffer.
63 * @param bufferStart the first active index in the buffer, valid for buffer.
64 * @param bufferEnd the end index of the active buffer, valid for buffer.
65 * @return the number of matching characters, zero for no match.
66 */
67 @Override
68 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
69 return ch == buffer[pos] ? 1 : 0;
70 }
71 }
72
73 /**
74 * Class used to define a set of characters for matching purposes.
75 */
76 static final class CharSetMatcher extends StrMatcher {
77
78 /** The set of characters to match. */
79 private final char[] chars;
80
81 /**
82 * Constructor that creates a matcher from a character array.
83 *
84 * @param chars the characters to match, must not be null.
85 */
86 CharSetMatcher(final char[] chars) {
87 this.chars = ArraySorter.sort(chars.clone());
88 }
89
90 /**
91 * Returns whether or not the given character matches.
92 *
93 * @param buffer the text content to match against, do not change.
94 * @param pos the starting position for the match, valid for buffer.
95 * @param bufferStart the first active index in the buffer, valid for buffer.
96 * @param bufferEnd the end index of the active buffer, valid for buffer.
97 * @return the number of matching characters, zero for no match.
98 */
99 @Override
100 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
101 return Arrays.binarySearch(chars, buffer[pos]) >= 0 ? 1 : 0;
102 }
103 }
104
105 /**
106 * Class used to match no characters.
107 */
108 static final class NoMatcher extends StrMatcher {
109
110 /**
111 * Constructs a new instance of {@link NoMatcher}.
112 */
113 NoMatcher() {
114 }
115
116 /**
117 * Always returns {@code false}.
118 *
119 * @param buffer the text content to match against, do not change.
120 * @param pos the starting position for the match, valid for buffer.
121 * @param bufferStart the first active index in the buffer, valid for buffer.
122 * @param bufferEnd the end index of the active buffer, valid for buffer.
123 * @return the number of matching characters, zero for no match.
124 */
125 @Override
126 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
127 return 0;
128 }
129 }
130
131 /**
132 * Class used to define a set of characters for matching purposes.
133 */
134 static final class StringMatcher extends StrMatcher {
135
136 /** The string to match, as a character array. */
137 private final char[] chars;
138
139 /**
140 * Constructor that creates a matcher from a String.
141 *
142 * @param str the string to match, must not be null
143 */
144 StringMatcher(final String str) {
145 chars = str.toCharArray();
146 }
147
148 /**
149 * Tests whether or not the given text matches the stored string.
150 *
151 * @param buffer the text content to match against, do not change.
152 * @param pos the starting position for the match, valid for buffer.
153 * @param bufferStart the first active index in the buffer, valid for buffer.
154 * @param bufferEnd the end index of the active buffer, valid for buffer.
155 * @return the number of matching characters, zero for no match.
156 */
157 @Override
158 public int isMatch(final char[] buffer, int pos, final int bufferStart, final int bufferEnd) {
159 final int len = chars.length;
160 if (pos + len > bufferEnd) {
161 return 0;
162 }
163 for (int i = 0; i < chars.length; i++, pos++) {
164 if (chars[i] != buffer[pos]) {
165 return 0;
166 }
167 }
168 return len;
169 }
170
171 @Override
172 public String toString() {
173 return super.toString() + ' ' + Arrays.toString(chars);
174 }
175
176 }
177
178 /**
179 * Class used to match whitespace as per trim().
180 */
181 static final class TrimMatcher extends StrMatcher {
182
183 /**
184 * Constructs a new instance of {@link TrimMatcher}.
185 */
186 TrimMatcher() {
187 }
188
189 /**
190 * Tests whether or not the given character matches.
191 *
192 * @param buffer the text content to match against, do not change.
193 * @param pos the starting position for the match, valid for buffer.
194 * @param bufferStart the first active index in the buffer, valid for buffer.
195 * @param bufferEnd the end index of the active buffer, valid for buffer.
196 * @return the number of matching characters, zero for no match.
197 */
198 @Override
199 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
200 return buffer[pos] <= 32 ? 1 : 0;
201 }
202 }
203
204 /**
205 * Matches the comma character.
206 */
207 private static final StrMatcher COMMA_MATCHER = new CharMatcher(',');
208
209 /**
210 * Matches the tab character.
211 */
212 private static final StrMatcher TAB_MATCHER = new CharMatcher('\t');
213
214 /**
215 * Matches the space character.
216 */
217 private static final StrMatcher SPACE_MATCHER = new CharMatcher(' ');
218
219 /**
220 * Matches the same characters as StringTokenizer,
221 * namely space, tab, newline, formfeed.
222 */
223 private static final StrMatcher SPLIT_MATCHER = new CharSetMatcher(" \t\n\r\f".toCharArray());
224
225 /**
226 * Matches the String trim() whitespace characters.
227 */
228 private static final StrMatcher TRIM_MATCHER = new TrimMatcher();
229
230 /**
231 * Matches the double quote character.
232 */
233 private static final StrMatcher SINGLE_QUOTE_MATCHER = new CharMatcher('\'');
234
235 /**
236 * Matches the double quote character.
237 */
238 private static final StrMatcher DOUBLE_QUOTE_MATCHER = new CharMatcher('"');
239
240 /**
241 * Matches the single or double quote character.
242 */
243 private static final StrMatcher QUOTE_MATCHER = new CharSetMatcher("'\"".toCharArray());
244
245 /**
246 * Matches no characters.
247 */
248 private static final StrMatcher NONE_MATCHER = new NoMatcher();
249
250 /**
251 * Creates a matcher from a character.
252 *
253 * @param ch the character to match, must not be null.
254 * @return a new Matcher for the given char.
255 */
256 public static StrMatcher charMatcher(final char ch) {
257 return new CharMatcher(ch);
258 }
259
260 /**
261 * Creates a matcher from a set of characters.
262 *
263 * @param chars the characters to match, null or empty matches nothing.
264 * @return a new matcher for the given char[].
265 */
266 public static StrMatcher charSetMatcher(final char... chars) {
267 if (ArrayUtils.isEmpty(chars)) {
268 return NONE_MATCHER;
269 }
270 if (chars.length == 1) {
271 return new CharMatcher(chars[0]);
272 }
273 return new CharSetMatcher(chars);
274 }
275
276 /**
277 * Creates a matcher from a string representing a set of characters.
278 *
279 * @param chars the characters to match, null or empty matches nothing.
280 * @return a new Matcher for the given characters.
281 */
282 public static StrMatcher charSetMatcher(final String chars) {
283 if (StringUtils.isEmpty(chars)) {
284 return NONE_MATCHER;
285 }
286 if (chars.length() == 1) {
287 return new CharMatcher(chars.charAt(0));
288 }
289 return new CharSetMatcher(chars.toCharArray());
290 }
291
292 /**
293 * Gets the matcher for the comma character.
294 *
295 * @return the matcher for a comma.
296 */
297 public static StrMatcher commaMatcher() {
298 return COMMA_MATCHER;
299 }
300
301 /**
302 * Gets the matcher for the double quote character.
303 *
304 * @return the matcher for a double quote.
305 */
306 public static StrMatcher doubleQuoteMatcher() {
307 return DOUBLE_QUOTE_MATCHER;
308 }
309
310 /**
311 * Gets the matcher for no characters.
312 *
313 * @return the matcher that matches nothing.
314 */
315 public static StrMatcher noneMatcher() {
316 return NONE_MATCHER;
317 }
318
319 /**
320 * Gets the matcher for the single or double quote character.
321 *
322 * @return the matcher for a single or double quote.
323 */
324 public static StrMatcher quoteMatcher() {
325 return QUOTE_MATCHER;
326 }
327
328 /**
329 * Gets the matcher for the single quote character.
330 *
331 * @return the matcher for a single quote.
332 */
333 public static StrMatcher singleQuoteMatcher() {
334 return SINGLE_QUOTE_MATCHER;
335 }
336
337 /**
338 * Gets the matcher for the space character.
339 *
340 * @return the matcher for a space.
341 */
342 public static StrMatcher spaceMatcher() {
343 return SPACE_MATCHER;
344 }
345
346 /**
347 * Gets the matcher for the same characters as StringTokenizer,
348 * namely space, tab, newline and form-feed.
349 *
350 * @return the split matcher.
351 */
352 public static StrMatcher splitMatcher() {
353 return SPLIT_MATCHER;
354 }
355
356 /**
357 * Creates a matcher for a string.
358 *
359 * @param str the string to match, null or empty matches nothing.
360 * @return a new Matcher for the given String.
361 */
362 public static StrMatcher stringMatcher(final String str) {
363 if (StringUtils.isEmpty(str)) {
364 return NONE_MATCHER;
365 }
366 return new StringMatcher(str);
367 }
368
369 /**
370 * Gets the matcher for the tab character.
371 *
372 * @return the matcher for a tab.
373 */
374 public static StrMatcher tabMatcher() {
375 return TAB_MATCHER;
376 }
377
378 /**
379 * Gets the matcher to String trim() whitespace characters.
380 *
381 * @return the trim matcher.
382 */
383 public static StrMatcher trimMatcher() {
384 return TRIM_MATCHER;
385 }
386
387 /**
388 * Constructs a new instance.
389 */
390 protected StrMatcher() {
391 }
392
393 /**
394 * Tests whether the number of matching characters, zero for no match.
395 * <p>
396 * This method is called to check for a match.
397 * The parameter {@code pos} represents the current position to be
398 * checked in the string {@code buffer} (a character array which must
399 * not be changed).
400 * The API guarantees that {@code pos} is a valid index for {@code buffer}.
401 * </p>
402 * <p>
403 * The matching code may check one character or many.
404 * It may check characters preceding {@code pos} as well as those after.
405 * </p>
406 * <p>
407 * It must return zero for no match, or a positive number if a match was found.
408 * The number indicates the number of characters that matched.
409 * </p>
410 *
411 * @param buffer the text content to match against, do not change.
412 * @param pos the starting position for the match, valid for buffer.
413 * @return the number of matching characters, zero for no match.
414 * @since 2.4
415 */
416 public int isMatch(final char[] buffer, final int pos) {
417 return isMatch(buffer, pos, 0, buffer.length);
418 }
419
420 /**
421 * Tests whether the number of matching characters, zero for no match.
422 * <p>
423 * This method is called to check for a match.
424 * The parameter {@code pos} represents the current position to be
425 * checked in the string {@code buffer} (a character array which must
426 * not be changed).
427 * The API guarantees that {@code pos} is a valid index for {@code buffer}.
428 * </p>
429 * <p>
430 * The character array may be larger than the active area to be matched.
431 * Only values in the buffer between the specified indices may be accessed.
432 * </p>
433 * <p>
434 * The matching code may check one character or many.
435 * It may check characters preceding {@code pos} as well as those
436 * after, so long as no checks exceed the bounds specified.
437 * </p>
438 * <p>
439 * It must return zero for no match, or a positive number if a match was found.
440 * The number indicates the number of characters that matched.
441 * </p>
442 *
443 * @param buffer the text content to match against, do not change.
444 * @param pos the starting position for the match, valid for buffer.
445 * @param bufferStart the first active index in the buffer, valid for buffer.
446 * @param bufferEnd the end index (exclusive) of the active buffer, valid for buffer.
447 * @return the number of matching characters, zero for no match.
448 */
449 public abstract int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd);
450
451 }