001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * https://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.bcel.util; 020 021import java.util.ArrayList; 022import java.util.Arrays; 023import java.util.HashMap; 024import java.util.Iterator; 025import java.util.List; 026import java.util.Map; 027import java.util.regex.Matcher; 028import java.util.regex.Pattern; 029 030import org.apache.bcel.Const; 031import org.apache.bcel.generic.ClassGenException; 032import org.apache.bcel.generic.InstructionHandle; 033import org.apache.bcel.generic.InstructionList; 034import org.apache.commons.lang3.StringUtils; 035 036/** 037 * InstructionFinder is a tool to search for given instructions patterns, that is, match sequences of instructions in an 038 * instruction list via regular expressions. This can be used, for example, in order to implement a peep hole optimizer that 039 * looks for code patterns and replaces them with faster equivalents. 040 * 041 * <p> 042 * This class internally uses the java.util.regex package to search for regular expressions. 043 * </p> 044 * <p> 045 * A typical application would look like this: 046 * </p> 047 * <pre> 048 * InstructionFinder f = new InstructionFinder(il); 049 * String pat = "IfInstruction ICONST_0 GOTO ICONST_1 NOP (IFEQ|IFNE)"; 050 * 051 * for (Iterator i = f.search(pat, constraint); i.hasNext(); ) { 052 * InstructionHandle[] match = (InstructionHandle[])i.next(); 053 * ... 054 * il.delete(match[1], match[5]); 055 * ... 056 * } 057 058 * </pre> 059 * 060 * @see org.apache.bcel.generic.Instruction 061 * @see InstructionList 062 */ 063public class InstructionFinder { 064 065 /** 066 * Code patterns found may be checked using an additional user-defined constraint object whether they really match the 067 * needed criterion. I.e., check constraints that cannot expressed with regular expressions. 068 */ 069 public interface CodeConstraint { 070 071 /** 072 * Checks if the matched code area is really useful. 073 * 074 * @param match array of instructions matching the requested pattern. 075 * @return true if the matched area is really useful. 076 */ 077 boolean checkCode(InstructionHandle[] match); 078 } 079 080 private static final int OFFSET = 32767; // char + OFFSET is outside of LATIN-1 081 private static final int NO_OPCODES = 256; // Potential number, some are not used 082 private static final Map<String, String> map = new HashMap<>(); 083 084 // Initialize pattern map 085 static { 086 map.put("arithmeticinstruction", 087 "(irem|lrem|iand|ior|ineg|isub|lneg|fneg|fmul|ldiv|fadd|lxor|frem|idiv|land|ixor|ishr|fsub|lshl|fdiv|iadd|lor|dmul|lsub|ishl|imul|lmul|lushr|dneg|iushr|lshr|ddiv|drem|dadd|ladd|dsub)"); 088 map.put("invokeinstruction", "(invokevirtual|invokeinterface|invokestatic|invokespecial|invokedynamic)"); 089 map.put("arrayinstruction", 090 "(baload|aastore|saload|caload|fastore|lastore|iaload|castore|iastore|aaload|bastore|sastore|faload|laload|daload|dastore)"); 091 map.put("gotoinstruction", "(goto|goto_w)"); 092 map.put("conversioninstruction", "(d2l|l2d|i2s|d2i|l2i|i2b|l2f|d2f|f2i|i2d|i2l|f2d|i2c|f2l|i2f)"); 093 map.put("localvariableinstruction", "(fstore|iinc|lload|dstore|dload|iload|aload|astore|istore|fload|lstore)"); 094 map.put("loadinstruction", "(fload|dload|lload|iload|aload)"); 095 map.put("fieldinstruction", "(getfield|putstatic|getstatic|putfield)"); 096 map.put("cpinstruction", 097 "(ldc2_w|invokeinterface|invokedynamic|multianewarray|putstatic|instanceof|getstatic|checkcast|getfield|invokespecial|ldc_w|invokestatic|invokevirtual|putfield|ldc|new|anewarray)"); 098 map.put("stackinstruction", "(dup2|swap|dup2_x2|pop|pop2|dup|dup2_x1|dup_x2|dup_x1)"); 099 map.put("branchinstruction", 100 "(ifle|if_acmpne|if_icmpeq|if_acmpeq|ifnonnull|goto_w|iflt|ifnull|if_icmpne|tableswitch|if_icmple|ifeq|if_icmplt|jsr_w|if_icmpgt|ifgt|jsr|goto|ifne|ifge|lookupswitch|if_icmpge)"); 101 map.put("returninstruction", "(lreturn|ireturn|freturn|dreturn|areturn|return)"); 102 map.put("storeinstruction", "(istore|fstore|dstore|astore|lstore)"); 103 map.put("select", "(tableswitch|lookupswitch)"); 104 map.put("ifinstruction", 105 "(ifeq|ifgt|if_icmpne|if_icmpeq|ifge|ifnull|ifne|if_icmple|if_icmpge|if_acmpeq|if_icmplt|if_acmpne|ifnonnull|iflt|if_icmpgt|ifle)"); 106 map.put("jsrinstruction", "(jsr|jsr_w)"); 107 map.put("variablelengthinstruction", "(tableswitch|jsr|goto|lookupswitch)"); 108 map.put("unconditionalbranch", "(goto|jsr|jsr_w|athrow|goto_w)"); 109 map.put("constantpushinstruction", "(dconst|bipush|sipush|fconst|iconst|lconst)"); 110 map.put("typedinstruction", 111 "(imul|lsub|aload|fload|lor|new|aaload|fcmpg|iand|iaload|lrem|idiv|d2l|isub|dcmpg|dastore|ret|f2d|f2i|drem|iinc|i2c|checkcast|frem|lreturn|astore|lushr|daload|dneg|fastore|istore|lshl|ldiv|lstore|areturn|ishr|ldc_w|invokeinterface|invokedynamic|aastore|lxor|ishl|l2d|i2f|return|faload|sipush|iushr|caload|instanceof|invokespecial|putfield|fmul|ireturn|laload|d2f|lneg|ixor|i2l|fdiv|lastore|multianewarray|i2b|getstatic|i2d|putstatic|fcmpl|saload|ladd|irem|dload|jsr_w|dconst|dcmpl|fsub|freturn|ldc|aconst_null|castore|lmul|ldc2_w|dadd|iconst|f2l|ddiv|dstore|land|jsr|anewarray|dmul|bipush|dsub|sastore|d2i|i2s|lshr|iadd|l2i|lload|bastore|fstore|fneg|iload|fadd|baload|fconst|ior|ineg|dreturn|l2f|lconst|getfield|invokevirtual|invokestatic|iastore)"); 112 map.put("popinstruction", "(fstore|dstore|pop|pop2|astore|putstatic|istore|lstore)"); 113 map.put("allocationinstruction", "(multianewarray|new|anewarray|newarray)"); 114 map.put("indexedinstruction", 115 "(lload|lstore|fload|ldc2_w|invokeinterface|invokedynamic|multianewarray|astore|dload|putstatic|instanceof|getstatic|checkcast|getfield|invokespecial|dstore|istore|iinc|ldc_w|ret|fstore|invokestatic|iload|putfield|invokevirtual|ldc|new|aload|anewarray)"); 116 map.put("pushinstruction", "(dup|lload|dup2|bipush|fload|ldc2_w|sipush|lconst|fconst|dload|getstatic|ldc_w|aconst_null|dconst|iload|ldc|iconst|aload)"); 117 map.put("stackproducer", 118 "(imul|lsub|aload|fload|lor|new|aaload|fcmpg|iand|iaload|lrem|idiv|d2l|isub|dcmpg|dup|f2d|f2i|drem|i2c|checkcast|frem|lushr|daload|dneg|lshl|ldiv|ishr|ldc_w|invokeinterface|invokedynamic|lxor|ishl|l2d|i2f|faload|sipush|iushr|caload|instanceof|invokespecial|fmul|laload|d2f|lneg|ixor|i2l|fdiv|getstatic|i2b|swap|i2d|dup2|fcmpl|saload|ladd|irem|dload|jsr_w|dconst|dcmpl|fsub|ldc|arraylength|aconst_null|tableswitch|lmul|ldc2_w|iconst|dadd|f2l|ddiv|land|jsr|anewarray|dmul|bipush|dsub|d2i|newarray|i2s|lshr|iadd|lload|l2i|fneg|iload|fadd|baload|fconst|lookupswitch|ior|ineg|lconst|l2f|getfield|invokevirtual|invokestatic)"); 119 map.put("stackconsumer", 120 "(imul|lsub|lor|iflt|fcmpg|if_icmpgt|iand|ifeq|if_icmplt|lrem|ifnonnull|idiv|d2l|isub|dcmpg|dastore|if_icmpeq|f2d|f2i|drem|i2c|checkcast|frem|lreturn|astore|lushr|pop2|monitorexit|dneg|fastore|istore|lshl|ldiv|lstore|areturn|if_icmpge|ishr|monitorenter|invokeinterface|invokedynamic|aastore|lxor|ishl|l2d|i2f|return|iushr|instanceof|invokespecial|fmul|ireturn|d2f|lneg|ixor|pop|i2l|ifnull|fdiv|lastore|i2b|if_acmpeq|ifge|swap|i2d|putstatic|fcmpl|ladd|irem|dcmpl|fsub|freturn|ifgt|castore|lmul|dadd|f2l|ddiv|dstore|land|if_icmpne|if_acmpne|dmul|dsub|sastore|ifle|d2i|i2s|lshr|iadd|l2i|bastore|fstore|fneg|fadd|ior|ineg|ifne|dreturn|l2f|if_icmple|getfield|invokevirtual|invokestatic|iastore)"); 121 map.put("exceptionthrower", 122 "(irem|lrem|laload|putstatic|baload|dastore|areturn|getstatic|ldiv|anewarray|iastore|castore|idiv|saload|lastore|fastore|putfield|lreturn|caload|getfield|return|aastore|freturn|newarray|instanceof|multianewarray|athrow|faload|iaload|aaload|dreturn|monitorenter|checkcast|bastore|arraylength|new|invokevirtual|sastore|ldc_w|ireturn|invokespecial|monitorexit|invokeinterface|invokedynamic|ldc|invokestatic|daload)"); 123 map.put("loadclass", 124 "(multianewarray|invokeinterface|invokedynamic|instanceof|invokespecial|putfield|checkcast|putstatic|invokevirtual|new|getstatic|invokestatic|getfield|anewarray)"); 125 map.put("instructiontargeter", 126 "(ifle|if_acmpne|if_icmpeq|if_acmpeq|ifnonnull|goto_w|iflt|ifnull|if_icmpne|tableswitch|if_icmple|ifeq|if_icmplt|jsr_w|if_icmpgt|ifgt|jsr|goto|ifne|ifge|lookupswitch|if_icmpge)"); 127 // Some aliases 128 map.put("if_icmp", "(if_icmpne|if_icmpeq|if_icmple|if_icmpge|if_icmplt|if_icmpgt)"); 129 map.put("if_acmp", "(if_acmpeq|if_acmpne)"); 130 map.put("if", "(ifeq|ifne|iflt|ifge|ifgt|ifle)"); 131 // Precompile some aliases first 132 map.put("iconst", precompile(Const.ICONST_0, Const.ICONST_5, Const.ICONST_M1)); 133 map.put("lconst", new String(new char[] {'(', makeChar(Const.LCONST_0), '|', makeChar(Const.LCONST_1), ')'})); 134 map.put("dconst", new String(new char[] {'(', makeChar(Const.DCONST_0), '|', makeChar(Const.DCONST_1), ')'})); 135 map.put("fconst", new String(new char[] {'(', makeChar(Const.FCONST_0), '|', makeChar(Const.FCONST_1), '|', makeChar(Const.FCONST_2), ')'})); 136 map.put("lload", precompile(Const.LLOAD_0, Const.LLOAD_3, Const.LLOAD)); 137 map.put("iload", precompile(Const.ILOAD_0, Const.ILOAD_3, Const.ILOAD)); 138 map.put("dload", precompile(Const.DLOAD_0, Const.DLOAD_3, Const.DLOAD)); 139 map.put("fload", precompile(Const.FLOAD_0, Const.FLOAD_3, Const.FLOAD)); 140 map.put("aload", precompile(Const.ALOAD_0, Const.ALOAD_3, Const.ALOAD)); 141 map.put("lstore", precompile(Const.LSTORE_0, Const.LSTORE_3, Const.LSTORE)); 142 map.put("istore", precompile(Const.ISTORE_0, Const.ISTORE_3, Const.ISTORE)); 143 map.put("dstore", precompile(Const.DSTORE_0, Const.DSTORE_3, Const.DSTORE)); 144 map.put("fstore", precompile(Const.FSTORE_0, Const.FSTORE_3, Const.FSTORE)); 145 map.put("astore", precompile(Const.ASTORE_0, Const.ASTORE_3, Const.ASTORE)); 146 // Compile strings 147 map.forEach((key, value) -> { 148 final char ch = value.charAt(1); // Omit already precompiled patterns 149 if (ch < OFFSET) { 150 map.put(key, compilePattern(value)); // precompile all patterns 151 } 152 }); 153 // Add instruction alias to match anything 154 final StringBuilder buf = new StringBuilder("("); 155 for (short i = 0; i < NO_OPCODES; i++) { 156 if (Const.getNoOfOperands(i) != Const.UNDEFINED) { // Not an invalid opcode 157 buf.append(makeChar(i)); 158 if (i < NO_OPCODES - 1) { 159 buf.append('|'); 160 } 161 } 162 } 163 buf.append(')'); 164 map.put("instruction", buf.toString()); 165 } 166 167 /** 168 * Replace symbolic names of instructions with the appropriate character and remove all white space from string. Meta 169 * characters such as +, * are ignored. 170 * 171 * @param pattern The pattern to compile. 172 * @return translated regular expression string. 173 */ 174 private static String compilePattern(final String pattern) { 175 // Bug: BCEL-77 - Instructions are assumed to be english, to avoid odd Locale issues 176 final String lower = StringUtils.toRootLowerCase(pattern); 177 final StringBuilder buf = new StringBuilder(); 178 final int size = pattern.length(); 179 for (int i = 0; i < size; i++) { 180 char ch = lower.charAt(i); 181 if (Character.isLetterOrDigit(ch)) { 182 final StringBuilder name = new StringBuilder(); 183 while ((Character.isLetterOrDigit(ch) || ch == '_') && i < size) { 184 name.append(ch); 185 if (++i >= size) { 186 break; 187 } 188 ch = lower.charAt(i); 189 } 190 i--; 191 buf.append(mapName(name.toString())); 192 } else if (!Character.isWhitespace(ch)) { 193 buf.append(ch); 194 } 195 } 196 return buf.toString(); 197 } 198 199 /** 200 * Convert opcode number to char. 201 */ 202 private static char makeChar(final short opcode) { 203 return (char) (opcode + OFFSET); 204 } 205 206 /** 207 * Map symbolic instruction names like "getfield" to a single character. 208 * 209 * @param pattern instruction pattern in lower case. 210 * @return encoded string for a pattern such as "BranchInstruction". 211 */ 212 private static String mapName(final String pattern) { 213 final String result = map.get(pattern); 214 if (result != null) { 215 return result; 216 } 217 for (short i = 0; i < NO_OPCODES; i++) { 218 if (pattern.equals(Const.getOpcodeName(i))) { 219 return String.valueOf(makeChar(i)); 220 } 221 } 222 throw new IllegalArgumentException("Instruction unknown: " + pattern); 223 } 224 225 private static String precompile(final short from, final short to, final short extra) { 226 final StringBuilder buf = new StringBuilder("("); 227 for (short i = from; i <= to; i++) { 228 buf.append(makeChar(i)); 229 buf.append('|'); 230 } 231 buf.append(makeChar(extra)); 232 buf.append(")"); 233 return buf.toString(); 234 } 235 236 private final InstructionList il; 237 238 private String ilString; // instruction list as string 239 240 private InstructionHandle[] handles; // map instruction 241 242 // list to array 243 244 /** 245 * Constructs an InstructionFinder. 246 * 247 * @param il instruction list to search for given patterns. 248 */ 249 public InstructionFinder(final InstructionList il) { 250 this.il = il; 251 reread(); 252 } 253 254 /** 255 * Gets the inquired instruction list. 256 * 257 * @return the inquired instruction list. 258 */ 259 public final InstructionList getInstructionList() { 260 return il; 261 } 262 263 /** 264 * @return the matched piece of code as an array of instruction (handles). 265 */ 266 private InstructionHandle[] getMatch(final int matchedFrom, final int matchLength) { 267 return Arrays.copyOfRange(handles, matchedFrom, matchedFrom + matchLength); 268 } 269 270 /** 271 * Reread the instruction list, for example, after you've altered the list upon a match. 272 */ 273 public final void reread() { 274 final int size = il.getLength(); 275 final char[] buf = new char[size]; // Create a string with length equal to il length 276 handles = il.getInstructionHandles(); 277 // Map opcodes to characters 278 for (int i = 0; i < size; i++) { 279 buf[i] = makeChar(handles[i].getInstruction().getOpcode()); 280 } 281 ilString = new String(buf); 282 } 283 284 /** 285 * Start search beginning from the start of the given instruction list. 286 * 287 * @param pattern the instruction pattern to search for, where case is ignored. 288 * @return iterator of matches where e.nextElement() returns an array of instruction handles describing the matched area. 289 */ 290 public final Iterator<InstructionHandle[]> search(final String pattern) { 291 return search(pattern, il.getStart(), null); 292 } 293 294 /** 295 * Start search beginning from the start of the given instruction list. Check found matches with the constraint object. 296 * 297 * @param pattern the instruction pattern to search for, case is ignored. 298 * @param constraint constraints to be checked on matching code. 299 * @return instruction handle or 'null' if the match failed. 300 */ 301 public final Iterator<InstructionHandle[]> search(final String pattern, final CodeConstraint constraint) { 302 return search(pattern, il.getStart(), constraint); 303 } 304 305 /** 306 * Start search beginning from 'from'. 307 * 308 * @param pattern the instruction pattern to search for, where case is ignored. 309 * @param from where to start the search in the instruction list. 310 * @return iterator of matches where e.nextElement() returns an array of instruction handles describing the matched area. 311 */ 312 public final Iterator<InstructionHandle[]> search(final String pattern, final InstructionHandle from) { 313 return search(pattern, from, null); 314 } 315 316 /** 317 * Search for the given pattern in the instruction list. You can search for any valid opcode via its symbolic name, for example 318 * "istore". You can also use a super class or an interface name to match a whole set of instructions, for example 319 * "BranchInstruction" or "LoadInstruction". "istore" is also an alias for all "istore_x" instructions. Additional 320 * aliases are "if" for "ifxx", "if_icmp" for "if_icmpxx", "if_acmp" for "if_acmpxx". 321 * 322 * Consecutive instruction names must be separated by white space which will be removed during the compilation of the 323 * pattern. 324 * 325 * For the rest the usual pattern matching rules for regular expressions apply. 326 * <p> 327 * Example pattern: 328 * </p> 329 * <pre> 330 * search("BranchInstruction NOP ((IfInstruction|GOTO)+ ISTORE Instruction)*"); 331 * </pre> 332 * 333 * <p> 334 * If you alter the instruction list upon a match such that other matching areas are affected, you should call reread() 335 * to update the finder and call search() again, because the matches are cached. 336 * </p> 337 * 338 * @param pattern the instruction pattern to search for, where case is ignored. 339 * @param from where to start the search in the instruction list. 340 * @param constraint optional CodeConstraint to check the found code pattern for user-defined constraints. 341 * @return iterator of matches where e.nextElement() returns an array of instruction handles describing the matched area. 342 */ 343 public final Iterator<InstructionHandle[]> search(final String pattern, final InstructionHandle from, final CodeConstraint constraint) { 344 final String search = compilePattern(pattern); 345 int start = -1; 346 for (int i = 0; i < handles.length; i++) { 347 if (handles[i] == from) { 348 start = i; // Where to start search from (index) 349 break; 350 } 351 } 352 if (start == -1) { 353 throw new ClassGenException("Instruction handle " + from + " not found in instruction list."); 354 } 355 final Pattern regex = Pattern.compile(search); 356 final List<InstructionHandle[]> matches = new ArrayList<>(); 357 final Matcher matcher = regex.matcher(ilString); 358 while (start < ilString.length() && matcher.find(start)) { 359 final int startExpr = matcher.start(); 360 final int endExpr = matcher.end(); 361 final int lenExpr = endExpr - startExpr; 362 final InstructionHandle[] match = getMatch(startExpr, lenExpr); 363 if (constraint == null || constraint.checkCode(match)) { 364 matches.add(match); 365 } 366 start = endExpr; 367 } 368 return matches.iterator(); 369 } 370 371 /* 372 * Internal debugging routines. 373 */ 374// private static final String pattern2string( String pattern ) { 375// return pattern2string(pattern, true); 376// } 377 378// private static final String pattern2string( String pattern, boolean make_string ) { 379// StringBuilder buf = new StringBuilder(); 380// for (int i = 0; i < pattern.length(); i++) { 381// char ch = pattern.charAt(i); 382// if (ch >= OFFSET) { 383// if (make_string) { 384// buf.append(Constants.getOpcodeName(ch - OFFSET)); 385// } else { 386// buf.append((ch - OFFSET)); 387// } 388// } else { 389// buf.append(ch); 390// } 391// } 392// return buf.toString(); 393// } 394}