001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * https://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.lang3.text; 018 019import java.util.Arrays; 020 021import org.apache.commons.lang3.ArraySorter; 022import org.apache.commons.lang3.ArrayUtils; 023import org.apache.commons.lang3.StringUtils; 024 025/** 026 * A matcher class that can be queried to determine if a character array 027 * portion matches. 028 * <p> 029 * This class comes complete with various factory methods. 030 * If these do not suffice, you can subclass and implement your own matcher. 031 * </p> 032 * 033 * @since 2.2 034 * @deprecated As of <a href="https://commons.apache.org/proper/commons-lang/changes-report.html#a3.6">3.6</a>, use Apache Commons Text 035 * <a href="https://commons.apache.org/proper/commons-text/javadocs/api-release/org/apache/commons/text/matcher/StringMatcherFactory.html"> 036 * StringMatcherFactory</a>. 037 */ 038@Deprecated 039public abstract class StrMatcher { 040 041 /** 042 * Class used to define a character for matching purposes. 043 */ 044 static final class CharMatcher extends StrMatcher { 045 046 /** The character to match. */ 047 private final char ch; 048 049 /** 050 * Constructor that creates a matcher that matches a single character. 051 * 052 * @param ch the character to match. 053 */ 054 CharMatcher(final char ch) { 055 this.ch = ch; 056 } 057 058 /** 059 * Tests whether or not the given character matches. 060 * 061 * @param buffer the text content to match against, do not change. 062 * @param pos the starting position for the match, valid for buffer. 063 * @param bufferStart the first active index in the buffer, valid for buffer. 064 * @param bufferEnd the end index of the active buffer, valid for buffer. 065 * @return the number of matching characters, zero for no match. 066 */ 067 @Override 068 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) { 069 return ch == buffer[pos] ? 1 : 0; 070 } 071 } 072 073 /** 074 * Class used to define a set of characters for matching purposes. 075 */ 076 static final class CharSetMatcher extends StrMatcher { 077 078 /** The set of characters to match. */ 079 private final char[] chars; 080 081 /** 082 * Constructor that creates a matcher from a character array. 083 * 084 * @param chars the characters to match, must not be null. 085 */ 086 CharSetMatcher(final char[] chars) { 087 this.chars = ArraySorter.sort(chars.clone()); 088 } 089 090 /** 091 * Returns whether or not the given character matches. 092 * 093 * @param buffer the text content to match against, do not change. 094 * @param pos the starting position for the match, valid for buffer. 095 * @param bufferStart the first active index in the buffer, valid for buffer. 096 * @param bufferEnd the end index of the active buffer, valid for buffer. 097 * @return the number of matching characters, zero for no match. 098 */ 099 @Override 100 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) { 101 return Arrays.binarySearch(chars, buffer[pos]) >= 0 ? 1 : 0; 102 } 103 } 104 105 /** 106 * Class used to match no characters. 107 */ 108 static final class NoMatcher extends StrMatcher { 109 110 /** 111 * Constructs a new instance of {@link NoMatcher}. 112 */ 113 NoMatcher() { 114 } 115 116 /** 117 * Always returns {@code false}. 118 * 119 * @param buffer the text content to match against, do not change. 120 * @param pos the starting position for the match, valid for buffer. 121 * @param bufferStart the first active index in the buffer, valid for buffer. 122 * @param bufferEnd the end index of the active buffer, valid for buffer. 123 * @return the number of matching characters, zero for no match. 124 */ 125 @Override 126 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) { 127 return 0; 128 } 129 } 130 131 /** 132 * Class used to define a set of characters for matching purposes. 133 */ 134 static final class StringMatcher extends StrMatcher { 135 136 /** The string to match, as a character array. */ 137 private final char[] chars; 138 139 /** 140 * Constructor that creates a matcher from a String. 141 * 142 * @param str the string to match, must not be null 143 */ 144 StringMatcher(final String str) { 145 chars = str.toCharArray(); 146 } 147 148 /** 149 * Tests whether or not the given text matches the stored string. 150 * 151 * @param buffer the text content to match against, do not change. 152 * @param pos the starting position for the match, valid for buffer. 153 * @param bufferStart the first active index in the buffer, valid for buffer. 154 * @param bufferEnd the end index of the active buffer, valid for buffer. 155 * @return the number of matching characters, zero for no match. 156 */ 157 @Override 158 public int isMatch(final char[] buffer, int pos, final int bufferStart, final int bufferEnd) { 159 final int len = chars.length; 160 if (pos + len > bufferEnd) { 161 return 0; 162 } 163 for (int i = 0; i < chars.length; i++, pos++) { 164 if (chars[i] != buffer[pos]) { 165 return 0; 166 } 167 } 168 return len; 169 } 170 171 @Override 172 public String toString() { 173 return super.toString() + ' ' + Arrays.toString(chars); 174 } 175 176 } 177 178 /** 179 * Class used to match whitespace as per trim(). 180 */ 181 static final class TrimMatcher extends StrMatcher { 182 183 /** 184 * Constructs a new instance of {@link TrimMatcher}. 185 */ 186 TrimMatcher() { 187 } 188 189 /** 190 * Tests whether or not the given character matches. 191 * 192 * @param buffer the text content to match against, do not change. 193 * @param pos the starting position for the match, valid for buffer. 194 * @param bufferStart the first active index in the buffer, valid for buffer. 195 * @param bufferEnd the end index of the active buffer, valid for buffer. 196 * @return the number of matching characters, zero for no match. 197 */ 198 @Override 199 public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) { 200 return buffer[pos] <= 32 ? 1 : 0; 201 } 202 } 203 204 /** 205 * Matches the comma character. 206 */ 207 private static final StrMatcher COMMA_MATCHER = new CharMatcher(','); 208 209 /** 210 * Matches the tab character. 211 */ 212 private static final StrMatcher TAB_MATCHER = new CharMatcher('\t'); 213 214 /** 215 * Matches the space character. 216 */ 217 private static final StrMatcher SPACE_MATCHER = new CharMatcher(' '); 218 219 /** 220 * Matches the same characters as StringTokenizer, 221 * namely space, tab, newline, formfeed. 222 */ 223 private static final StrMatcher SPLIT_MATCHER = new CharSetMatcher(" \t\n\r\f".toCharArray()); 224 225 /** 226 * Matches the String trim() whitespace characters. 227 */ 228 private static final StrMatcher TRIM_MATCHER = new TrimMatcher(); 229 230 /** 231 * Matches the double quote character. 232 */ 233 private static final StrMatcher SINGLE_QUOTE_MATCHER = new CharMatcher('\''); 234 235 /** 236 * Matches the double quote character. 237 */ 238 private static final StrMatcher DOUBLE_QUOTE_MATCHER = new CharMatcher('"'); 239 240 /** 241 * Matches the single or double quote character. 242 */ 243 private static final StrMatcher QUOTE_MATCHER = new CharSetMatcher("'\"".toCharArray()); 244 245 /** 246 * Matches no characters. 247 */ 248 private static final StrMatcher NONE_MATCHER = new NoMatcher(); 249 250 /** 251 * Creates a matcher from a character. 252 * 253 * @param ch the character to match, must not be null. 254 * @return a new Matcher for the given char. 255 */ 256 public static StrMatcher charMatcher(final char ch) { 257 return new CharMatcher(ch); 258 } 259 260 /** 261 * Creates a matcher from a set of characters. 262 * 263 * @param chars the characters to match, null or empty matches nothing. 264 * @return a new matcher for the given char[]. 265 */ 266 public static StrMatcher charSetMatcher(final char... chars) { 267 if (ArrayUtils.isEmpty(chars)) { 268 return NONE_MATCHER; 269 } 270 if (chars.length == 1) { 271 return new CharMatcher(chars[0]); 272 } 273 return new CharSetMatcher(chars); 274 } 275 276 /** 277 * Creates a matcher from a string representing a set of characters. 278 * 279 * @param chars the characters to match, null or empty matches nothing. 280 * @return a new Matcher for the given characters. 281 */ 282 public static StrMatcher charSetMatcher(final String chars) { 283 if (StringUtils.isEmpty(chars)) { 284 return NONE_MATCHER; 285 } 286 if (chars.length() == 1) { 287 return new CharMatcher(chars.charAt(0)); 288 } 289 return new CharSetMatcher(chars.toCharArray()); 290 } 291 292 /** 293 * Gets the matcher for the comma character. 294 * 295 * @return the matcher for a comma. 296 */ 297 public static StrMatcher commaMatcher() { 298 return COMMA_MATCHER; 299 } 300 301 /** 302 * Gets the matcher for the double quote character. 303 * 304 * @return the matcher for a double quote. 305 */ 306 public static StrMatcher doubleQuoteMatcher() { 307 return DOUBLE_QUOTE_MATCHER; 308 } 309 310 /** 311 * Gets the matcher for no characters. 312 * 313 * @return the matcher that matches nothing. 314 */ 315 public static StrMatcher noneMatcher() { 316 return NONE_MATCHER; 317 } 318 319 /** 320 * Gets the matcher for the single or double quote character. 321 * 322 * @return the matcher for a single or double quote. 323 */ 324 public static StrMatcher quoteMatcher() { 325 return QUOTE_MATCHER; 326 } 327 328 /** 329 * Gets the matcher for the single quote character. 330 * 331 * @return the matcher for a single quote. 332 */ 333 public static StrMatcher singleQuoteMatcher() { 334 return SINGLE_QUOTE_MATCHER; 335 } 336 337 /** 338 * Gets the matcher for the space character. 339 * 340 * @return the matcher for a space. 341 */ 342 public static StrMatcher spaceMatcher() { 343 return SPACE_MATCHER; 344 } 345 346 /** 347 * Gets the matcher for the same characters as StringTokenizer, 348 * namely space, tab, newline and form-feed. 349 * 350 * @return the split matcher. 351 */ 352 public static StrMatcher splitMatcher() { 353 return SPLIT_MATCHER; 354 } 355 356 /** 357 * Creates a matcher for a string. 358 * 359 * @param str the string to match, null or empty matches nothing. 360 * @return a new Matcher for the given String. 361 */ 362 public static StrMatcher stringMatcher(final String str) { 363 if (StringUtils.isEmpty(str)) { 364 return NONE_MATCHER; 365 } 366 return new StringMatcher(str); 367 } 368 369 /** 370 * Gets the matcher for the tab character. 371 * 372 * @return the matcher for a tab. 373 */ 374 public static StrMatcher tabMatcher() { 375 return TAB_MATCHER; 376 } 377 378 /** 379 * Gets the matcher to String trim() whitespace characters. 380 * 381 * @return the trim matcher. 382 */ 383 public static StrMatcher trimMatcher() { 384 return TRIM_MATCHER; 385 } 386 387 /** 388 * Constructs a new instance. 389 */ 390 protected StrMatcher() { 391 } 392 393 /** 394 * Tests whether the number of matching characters, zero for no match. 395 * <p> 396 * This method is called to check for a match. 397 * The parameter {@code pos} represents the current position to be 398 * checked in the string {@code buffer} (a character array which must 399 * not be changed). 400 * The API guarantees that {@code pos} is a valid index for {@code buffer}. 401 * </p> 402 * <p> 403 * The matching code may check one character or many. 404 * It may check characters preceding {@code pos} as well as those after. 405 * </p> 406 * <p> 407 * It must return zero for no match, or a positive number if a match was found. 408 * The number indicates the number of characters that matched. 409 * </p> 410 * 411 * @param buffer the text content to match against, do not change. 412 * @param pos the starting position for the match, valid for buffer. 413 * @return the number of matching characters, zero for no match. 414 * @since 2.4 415 */ 416 public int isMatch(final char[] buffer, final int pos) { 417 return isMatch(buffer, pos, 0, buffer.length); 418 } 419 420 /** 421 * Tests whether the number of matching characters, zero for no match. 422 * <p> 423 * This method is called to check for a match. 424 * The parameter {@code pos} represents the current position to be 425 * checked in the string {@code buffer} (a character array which must 426 * not be changed). 427 * The API guarantees that {@code pos} is a valid index for {@code buffer}. 428 * </p> 429 * <p> 430 * The character array may be larger than the active area to be matched. 431 * Only values in the buffer between the specified indices may be accessed. 432 * </p> 433 * <p> 434 * The matching code may check one character or many. 435 * It may check characters preceding {@code pos} as well as those 436 * after, so long as no checks exceed the bounds specified. 437 * </p> 438 * <p> 439 * It must return zero for no match, or a positive number if a match was found. 440 * The number indicates the number of characters that matched. 441 * </p> 442 * 443 * @param buffer the text content to match against, do not change. 444 * @param pos the starting position for the match, valid for buffer. 445 * @param bufferStart the first active index in the buffer, valid for buffer. 446 * @param bufferEnd the end index (exclusive) of the active buffer, valid for buffer. 447 * @return the number of matching characters, zero for no match. 448 */ 449 public abstract int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd); 450 451}