001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *   https://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.bcel.verifier.structurals;
020
021import java.util.ArrayList;
022import java.util.HashMap;
023import java.util.HashSet;
024import java.util.List;
025import java.util.Map;
026import java.util.Set;
027
028import org.apache.bcel.generic.ASTORE;
029import org.apache.bcel.generic.ATHROW;
030import org.apache.bcel.generic.BranchInstruction;
031import org.apache.bcel.generic.CodeExceptionGen;
032import org.apache.bcel.generic.GotoInstruction;
033import org.apache.bcel.generic.IndexedInstruction;
034import org.apache.bcel.generic.Instruction;
035import org.apache.bcel.generic.InstructionHandle;
036import org.apache.bcel.generic.JsrInstruction;
037import org.apache.bcel.generic.LocalVariableInstruction;
038import org.apache.bcel.generic.MethodGen;
039import org.apache.bcel.generic.RET;
040import org.apache.bcel.generic.ReturnInstruction;
041import org.apache.bcel.generic.Select;
042import org.apache.bcel.verifier.exc.AssertionViolatedException;
043import org.apache.bcel.verifier.exc.StructuralCodeConstraintException;
044
045/**
046 * Instances of this class contain information about the subroutines found in a code array of a method. This
047 * implementation considers the top-level (the instructions reachable without a JSR or JSR_W starting off from the first
048 * instruction in a code array of a method) being a special subroutine; see getTopLevel() for that. Please note that the
049 * definition of subroutines in the Java Virtual Machine Specification, Second Edition is somewhat incomplete.
050 * Therefore, JustIce uses an own, more rigid notion. Basically, a subroutine is a piece of code that starts at the
051 * target of a JSR of JSR_W instruction and ends at a corresponding RET instruction. Note also that the control flow of
052 * a subroutine may be complex and non-linear; and that subroutines may be nested. JustIce also mandates subroutines not
053 * to be protected by exception handling code (for the sake of control flow predictability). To understand JustIce's
054 * notion of subroutines, please read
055 *
056 * TODO: refer to the paper.
057 *
058 * @see #getTopLevel()
059 */
060public class Subroutines {
061    // Node coloring constants
062    private enum ColourConstants {
063        WHITE, GRAY, BLACK
064    }
065
066    /**
067     * This inner class implements the Subroutine interface.
068     */
069    private final class SubroutineImpl implements Subroutine {
070
071        /**
072         * UNSET, a symbol for an uninitialized localVariable field. This is used for the "top-level" Subroutine; for example no
073         * subroutine.
074         */
075        private static final int UNSET = -1;
076
077        private final SubroutineImpl[] EMPTY_ARRAY = {};
078
079        /**
080         * The Local Variable slot where the first instruction of this subroutine (an ASTORE) stores the JsrInstruction's
081         * ReturnAddress in and the RET of this subroutine operates on.
082         */
083        private int localVariable = UNSET;
084
085        /** The instructions that belong to this subroutine. */
086        private final Set<InstructionHandle> instructions = new HashSet<>(); // Elements: InstructionHandle
087
088        /**
089         * The JSR or JSR_W instructions that define this subroutine by targeting it.
090         */
091        private final Set<InstructionHandle> theJSRs = new HashSet<>();
092
093        /**
094         * The RET instruction that leaves this subroutine.
095         */
096        private InstructionHandle theRET;
097
098        /**
099         * Constructs a new instance.
100         */
101        SubroutineImpl() {
102            // empty
103        }
104
105        /**
106         * Adds a new JSR or JSR_W that has this subroutine as its target.
107         */
108        public void addEnteringJsrInstruction(final InstructionHandle jsrInst) {
109            if (jsrInst == null || !(jsrInst.getInstruction() instanceof JsrInstruction)) {
110                throw new AssertionViolatedException("Expecting JsrInstruction InstructionHandle.");
111            }
112            if (localVariable == UNSET) {
113                throw new AssertionViolatedException("Set the localVariable first!");
114            }
115            // Something is wrong when an ASTORE is targeted that does not operate on the same local variable than the rest of the
116            // JsrInstruction-targets and the RET.
117            // (We don't know out leader here so we cannot check if we're really targeted!)
118            if (localVariable != ((ASTORE) ((JsrInstruction) jsrInst.getInstruction()).getTarget().getInstruction()).getIndex()) {
119                throw new AssertionViolatedException("Setting a wrong JsrInstruction.");
120            }
121            theJSRs.add(jsrInst);
122        }
123
124        /*
125         * Adds an instruction to this subroutine. All instructions must have been added before invoking setLeavingRET().
126         *
127         * @see #setLeavingRET
128         */
129        void addInstruction(final InstructionHandle ih) {
130            if (theRET != null) {
131                throw new AssertionViolatedException("All instructions must have been added before invoking setLeavingRET().");
132            }
133            instructions.add(ih);
134        }
135
136        /*
137         * Refer to the Subroutine interface for documentation.
138         */
139        @Override
140        public boolean contains(final InstructionHandle inst) {
141            return instructions.contains(inst);
142        }
143
144        /*
145         * Satisfies Subroutine.getAccessedLocalIndices().
146         */
147        @Override
148        public int[] getAccessedLocalsIndices() {
149            // TODO: Implement caching.
150            final Set<Integer> acc = new HashSet<>();
151            if (theRET == null && this != getTopLevel()) {
152                throw new AssertionViolatedException("This subroutine object must be built up completely before calculating accessed locals.");
153            }
154            {
155                for (final InstructionHandle ih : instructions) {
156                    // RET is not a LocalVariableInstruction in the current version of BCEL.
157                    if (ih.getInstruction() instanceof LocalVariableInstruction || ih.getInstruction() instanceof RET) {
158                        final int idx = ((IndexedInstruction) ih.getInstruction()).getIndex();
159                        acc.add(Integer.valueOf(idx));
160                        // LONG? DOUBLE?.
161                        try {
162                            // LocalVariableInstruction instances are typed without the need to look into
163                            // the constant pool.
164                            if (ih.getInstruction() instanceof LocalVariableInstruction) {
165                                final int s = ((LocalVariableInstruction) ih.getInstruction()).getType(null).getSize();
166                                if (s == 2) {
167                                    acc.add(Integer.valueOf(idx + 1));
168                                }
169                            }
170                        } catch (final RuntimeException re) {
171                            throw new AssertionViolatedException("BCEL did not like NULL as a ConstantPoolGen object.", re);
172                        }
173                    }
174                }
175            }
176
177            {
178                final int[] ret = new int[acc.size()];
179                int j = -1;
180                for (final Integer accessedLocal : acc) {
181                    j++;
182                    ret[j] = accessedLocal.intValue();
183                }
184                return ret;
185            }
186        }
187
188        /*
189         * Refer to the Subroutine interface for documentation.
190         */
191        @Override
192        public InstructionHandle[] getEnteringJsrInstructions() {
193            if (this == getTopLevel()) {
194                throw new AssertionViolatedException("getLeavingRET() called on top level pseudo-subroutine.");
195            }
196            return theJSRs.toArray(InstructionHandle.EMPTY_ARRAY);
197        }
198
199        /*
200         * Refer to the Subroutine interface for documentation.
201         */
202        @Override
203        public InstructionHandle[] getInstructions() {
204            return instructions.toArray(InstructionHandle.EMPTY_ARRAY);
205        }
206
207        /*
208         * Refer to the Subroutine interface for documentation.
209         */
210        @Override
211        public InstructionHandle getLeavingRET() {
212            if (this == getTopLevel()) {
213                throw new AssertionViolatedException("getLeavingRET() called on top level pseudo-subroutine.");
214            }
215            return theRET;
216        }
217
218        /* Satisfies Subroutine.getRecursivelyAccessedLocalsIndices(). */
219        @Override
220        public int[] getRecursivelyAccessedLocalsIndices() {
221            final Set<Integer> s = new HashSet<>();
222            final int[] lvs = getAccessedLocalsIndices();
223            for (final int lv : lvs) {
224                s.add(Integer.valueOf(lv));
225            }
226            getRecursivelyAccessedLocalsIndicesHelper(s, subSubs());
227            final int[] ret = new int[s.size()];
228            int j = -1;
229            for (final Integer index : s) {
230                j++;
231                ret[j] = index.intValue();
232            }
233            return ret;
234        }
235
236        /**
237         * A recursive helper method for getRecursivelyAccessedLocalsIndices().
238         *
239         * @see #getRecursivelyAccessedLocalsIndices()
240         */
241        private void getRecursivelyAccessedLocalsIndicesHelper(final Set<Integer> set, final Subroutine[] subs) {
242            for (final Subroutine sub : subs) {
243                final int[] lvs = sub.getAccessedLocalsIndices();
244                for (final int lv : lvs) {
245                    set.add(Integer.valueOf(lv));
246                }
247                if (sub.subSubs().length != 0) {
248                    getRecursivelyAccessedLocalsIndicesHelper(set, sub.subSubs());
249                }
250            }
251        }
252
253        /**
254         * Sets the leaving RET instruction. Must be invoked after all instructions are added. Must not be invoked for top-level
255         * 'subroutine'.
256         */
257        void setLeavingRET() {
258            if (localVariable == UNSET) {
259                throw new AssertionViolatedException("setLeavingRET() called for top-level 'subroutine' or forgot to set local variable first.");
260            }
261            InstructionHandle ret = null;
262            for (final InstructionHandle actual : instructions) {
263                if (actual.getInstruction() instanceof RET) {
264                    if (ret != null) {
265                        throw new StructuralCodeConstraintException("Subroutine with more then one RET detected: '" + ret + "' and '" + actual + "'.");
266                    }
267                    ret = actual;
268                }
269            }
270            if (ret == null) {
271                throw new StructuralCodeConstraintException("Subroutine without a RET detected.");
272            }
273            if (((RET) ret.getInstruction()).getIndex() != localVariable) {
274                throw new StructuralCodeConstraintException(
275                    "Subroutine uses '" + ret + "' which does not match the correct local variable '" + localVariable + "'.");
276            }
277            theRET = ret;
278        }
279
280        /*
281         * Sets the local variable slot the ASTORE that is targeted by the JsrInstructions of this subroutine operates on. This
282         * subroutine's RET operates on that same local variable slot, of course.
283         */
284        void setLocalVariable(final int i) {
285            if (localVariable != UNSET) {
286                throw new AssertionViolatedException("localVariable set twice.");
287            }
288            localVariable = i;
289        }
290
291        /*
292         * Satisfies Subroutine.subSubs().
293         */
294        @Override
295        public Subroutine[] subSubs() {
296            final Set<Subroutine> h = new HashSet<>();
297
298            for (final InstructionHandle ih : instructions) {
299                final Instruction inst = ih.getInstruction();
300                if (inst instanceof JsrInstruction) {
301                    final InstructionHandle targ = ((JsrInstruction) inst).getTarget();
302                    h.add(getSubroutine(targ));
303                }
304            }
305            return h.toArray(EMPTY_ARRAY);
306        }
307
308        /**
309         * Returns a String representation of this object, merely for debugging purposes. (Internal) Warning: Verbosity on a
310         * problematic subroutine may cause stack overflow errors due to recursive subSubs() calls. Don't use this, then.
311         */
312        @Override
313        public String toString() {
314            final StringBuilder ret = new StringBuilder();
315            ret.append("Subroutine: Local variable is '").append(localVariable);
316            ret.append("', JSRs are '").append(theJSRs);
317            ret.append("', RET is '").append(theRET);
318            ret.append("', Instructions: '").append(instructions).append("'.");
319
320            ret.append(" Accessed local variable slots: '");
321            int[] alv = getAccessedLocalsIndices();
322            for (final int element : alv) {
323                ret.append(element);
324                ret.append(" ");
325            }
326            ret.append("'.");
327
328            ret.append(" Recursively (via subsub...routines) accessed local variable slots: '");
329            alv = getRecursivelyAccessedLocalsIndices();
330            for (final int element : alv) {
331                ret.append(element);
332                ret.append(" ");
333            }
334            ret.append("'.");
335
336            return ret.toString();
337        }
338
339    } // end Inner Class SubrouteImpl
340
341    /**
342     * A utility method that calculates the successors of a given InstructionHandle <strong>in the same subroutine</strong>. That
343     * means, a RET does not have any successors as defined here. A JsrInstruction has its physical successor as its
344     * successor (opposed to its target) as defined here.
345     */
346    private static InstructionHandle[] getSuccessors(final InstructionHandle instruction) {
347        final InstructionHandle[] single = new InstructionHandle[1];
348
349        final Instruction inst = instruction.getInstruction();
350
351        // Terminates method normally.
352        // Terminates method abnormally, because JustIce mandates
353        // subroutines not to be protected by exception handlers.
354        if (inst instanceof RET || inst instanceof ReturnInstruction || inst instanceof ATHROW) {
355            return InstructionHandle.EMPTY_ARRAY;
356        }
357
358        // See method comment.
359        if (inst instanceof JsrInstruction) {
360            single[0] = instruction.getNext();
361            return single;
362        }
363
364        if (inst instanceof GotoInstruction) {
365            single[0] = ((GotoInstruction) inst).getTarget();
366            return single;
367        }
368
369        if (inst instanceof BranchInstruction) {
370            if (inst instanceof Select) {
371                // BCEL's getTargets() returns only the non-default targets,
372                // thanks to Eli Tilevich for reporting.
373                final InstructionHandle[] matchTargets = ((Select) inst).getTargets();
374                final InstructionHandle[] ret = new InstructionHandle[matchTargets.length + 1];
375                ret[0] = ((Select) inst).getTarget();
376                System.arraycopy(matchTargets, 0, ret, 1, matchTargets.length);
377                return ret;
378            }
379            final InstructionHandle[] pair = new InstructionHandle[2];
380            pair[0] = instruction.getNext();
381            pair[1] = ((BranchInstruction) inst).getTarget();
382            return pair;
383        }
384
385        // default case: Fall through.
386        single[0] = instruction.getNext();
387        return single;
388    }
389
390    /**
391     * The map containing the subroutines found. Key: InstructionHandle of the leader of the subroutine. Elements:
392     * SubroutineImpl objects.
393     */
394    private final Map<InstructionHandle, Subroutine> subroutines = new HashMap<>();
395
396    /**
397     * This is referring to a special subroutine, namely the top level. This is not really a subroutine but we use it to
398     * distinguish between top level instructions and unreachable instructions.
399     */
400    // CHECKSTYLE:OFF
401    public final Subroutine TOPLEVEL; // TODO can this be made private?
402    // CHECKSTYLE:ON
403
404    /**
405     * Constructs a new instance.
406     *
407     * @param mg A MethodGen object representing method to create the Subroutine objects of. Assumes that JustIce strict
408     *        checks are needed.
409     */
410    public Subroutines(final MethodGen mg) {
411        this(mg, true);
412    }
413
414    /**
415     * Constructs a new instance.
416     *
417     * @param mg A MethodGen object representing method to create the Subroutine objects of.
418     * @param enableJustIceCheck whether to enable additional JustIce checks.
419     * @since 6.0
420     */
421    public Subroutines(final MethodGen mg, final boolean enableJustIceCheck) {
422        final InstructionHandle[] all = mg.getInstructionList().getInstructionHandles();
423        final CodeExceptionGen[] handlers = mg.getExceptionHandlers();
424
425        // Define our "Toplevel" fake subroutine.
426        TOPLEVEL = new SubroutineImpl();
427
428        // Calculate "real" subroutines.
429        final Set<InstructionHandle> subLeaders = new HashSet<>(); // Elements: InstructionHandle
430        for (final InstructionHandle element : all) {
431            final Instruction inst = element.getInstruction();
432            if (inst instanceof JsrInstruction) {
433                subLeaders.add(((JsrInstruction) inst).getTarget());
434            }
435        }
436
437        // Build up the database.
438        for (final InstructionHandle astore : subLeaders) {
439            final SubroutineImpl sr = new SubroutineImpl();
440            sr.setLocalVariable(((ASTORE) astore.getInstruction()).getIndex());
441            subroutines.put(astore, sr);
442        }
443
444        // Fake it a bit. We want a virtual "TopLevel" subroutine.
445        subroutines.put(all[0], TOPLEVEL);
446        subLeaders.add(all[0]);
447
448        // Tell the subroutines about their JsrInstructions.
449        // Note that there cannot be a JSR targeting the top-level
450        // since "Jsr 0" is disallowed in Pass 3a.
451        // Instructions shared by a subroutine and the toplevel are
452        // disallowed and checked below, after the BFS.
453        for (final InstructionHandle element : all) {
454            final Instruction inst = element.getInstruction();
455            if (inst instanceof JsrInstruction) {
456                final InstructionHandle leader = ((JsrInstruction) inst).getTarget();
457                ((SubroutineImpl) getSubroutine(leader)).addEnteringJsrInstruction(element);
458            }
459        }
460
461        // Now do a BFS from every subroutine leader to find all the
462        // instructions that belong to a subroutine.
463        // we don't want to assign an instruction to two or more Subroutine objects.
464        final Set<InstructionHandle> instructionsAssigned = new HashSet<>();
465
466        // Graph coloring. Key: InstructionHandle, Value: ColourConstants enum.
467        final Map<InstructionHandle, ColourConstants> colors = new HashMap<>();
468
469        final List<InstructionHandle> qList = new ArrayList<>();
470        for (final InstructionHandle actual : subLeaders) {
471            // Do some BFS with "actual" as the root of the graph.
472            // Init colors
473            for (final InstructionHandle element : all) {
474                colors.put(element, ColourConstants.WHITE);
475            }
476            colors.put(actual, ColourConstants.GRAY);
477            // Init Queue
478
479            qList.clear();
480            qList.add(actual); // add(Obj) adds to the end, remove(0) removes from the start.
481
482            /*
483             * BFS ALGORITHM MODIFICATION: Start out with multiple "root" nodes, as exception handlers are starting points of
484             * top-level code, too. [why top-level? TODO: Refer to the special JustIce notion of subroutines.]
485             */
486            if (actual == all[0]) {
487                for (final CodeExceptionGen handler : handlers) {
488                    colors.put(handler.getHandlerPC(), ColourConstants.GRAY);
489                    qList.add(handler.getHandlerPC());
490                }
491            }
492            /* CONTINUE NORMAL BFS ALGORITHM */
493
494            // Loop until Queue is empty
495            while (!qList.isEmpty()) {
496                final InstructionHandle u = qList.remove(0);
497                final InstructionHandle[] successors = getSuccessors(u);
498                for (final InstructionHandle successor : successors) {
499                    if (colors.get(successor) == ColourConstants.WHITE) {
500                        colors.put(successor, ColourConstants.GRAY);
501                        qList.add(successor);
502                    }
503                }
504                colors.put(u, ColourConstants.BLACK);
505            }
506            // BFS ended above.
507            for (final InstructionHandle element : all) {
508                if (colors.get(element) == ColourConstants.BLACK) {
509                    ((SubroutineImpl) (actual == all[0] ? getTopLevel() : getSubroutine(actual))).addInstruction(element);
510                    if (instructionsAssigned.contains(element)) {
511                        throw new StructuralCodeConstraintException(
512                            "Instruction '" + element + "' is part of more than one subroutine (or of the top level and a subroutine).");
513                    }
514                    instructionsAssigned.add(element);
515                }
516            }
517            if (actual != all[0]) { // If we don't deal with the top-level 'subroutine'
518                ((SubroutineImpl) getSubroutine(actual)).setLeavingRET();
519            }
520        }
521
522        if (enableJustIceCheck) {
523            // Now make sure no instruction of a Subroutine is protected by exception handling code
524            // as is mandated by JustIces notion of subroutines.
525            for (final CodeExceptionGen handler : handlers) {
526                InstructionHandle protectedIh = handler.getStartPC();
527                while (protectedIh != handler.getEndPC().getNext()) {
528                    // Note the inclusive/inclusive notation of "generic API" exception handlers!
529                    for (final Subroutine sub : subroutines.values()) {
530                        if (sub != subroutines.get(all[0]) && sub.contains(protectedIh)) {
531                            throw new StructuralCodeConstraintException("Subroutine instruction '" + protectedIh + "' is protected by an exception handler, '"
532                                + handler + "'. This is forbidden by the JustIce verifier due to its clear definition of subroutines.");
533                        }
534                    }
535                    protectedIh = protectedIh.getNext();
536                }
537            }
538        }
539
540        // Now make sure no subroutine is calling a subroutine
541        // that uses the same local variable for the RET as themselves
542        // (recursively).
543        // This includes that subroutines may not call themselves
544        // recursively, even not through intermediate calls to other
545        // subroutines.
546        noRecursiveCalls(getTopLevel(), new HashSet<>());
547
548    }
549
550    /**
551     * Returns the Subroutine object associated with the given leader (that is, the first instruction of the subroutine).
552     * You must not use this to get the top-level instructions modeled as a Subroutine object.
553     *
554     * @see #getTopLevel()
555     */
556    public Subroutine getSubroutine(final InstructionHandle leader) {
557        final Subroutine ret = subroutines.get(leader);
558
559        if (ret == null) {
560            throw new AssertionViolatedException("Subroutine requested for an InstructionHandle that is not a leader of a subroutine.");
561        }
562
563        if (ret == TOPLEVEL) {
564            throw new AssertionViolatedException("TOPLEVEL special subroutine requested; use getTopLevel().");
565        }
566
567        return ret;
568    }
569
570    /**
571     * For easy handling, the piece of code that is <strong>not</strong> a subroutine, the top-level, is also modeled as a Subroutine
572     * object. It is a special Subroutine object where <B>you must not invoke getEnteringJsrInstructions() or
573     * getLeavingRET()</B>.
574     *
575     * @see Subroutine#getEnteringJsrInstructions()
576     * @see Subroutine#getLeavingRET()
577     */
578    public Subroutine getTopLevel() {
579        return TOPLEVEL;
580    }
581
582    /**
583     * This (recursive) utility method makes sure that no subroutine is calling a subroutine that uses the same local
584     * variable for the RET as themselves (recursively). This includes that subroutines may not call themselves recursively,
585     * even not through intermediate calls to other subroutines.
586     *
587     * @throws StructuralCodeConstraintException if the above constraint is not satisfied.
588     */
589    private void noRecursiveCalls(final Subroutine sub, final Set<Integer> set) {
590        final Subroutine[] subs = sub.subSubs();
591
592        for (final Subroutine sub2 : subs) {
593            final int index = ((RET) sub2.getLeavingRET().getInstruction()).getIndex();
594
595            if (!set.add(Integer.valueOf(index))) {
596                // Don't use toString() here because of possibly infinite recursive subSubs() calls then.
597                final SubroutineImpl si = (SubroutineImpl) sub2;
598                throw new StructuralCodeConstraintException("Subroutine with local variable '" + si.localVariable + "', JSRs '" + si.theJSRs + "', RET '"
599                    + si.theRET + "' is called by a subroutine which uses the same local variable index as itself; maybe even a recursive call?"
600                    + " JustIce's clean definition of a subroutine forbids both.");
601            }
602
603            noRecursiveCalls(sub2, set);
604
605            set.remove(Integer.valueOf(index));
606        }
607    }
608
609    /**
610     * Returns the subroutine object associated with the given instruction. This is a costly operation, you should consider
611     * using getSubroutine(InstructionHandle). Returns 'null' if the given InstructionHandle lies in so-called 'dead code',
612     * for example code that can never be executed.
613     *
614     * @see #getSubroutine(InstructionHandle)
615     * @see #getTopLevel()
616     */
617    public Subroutine subroutineOf(final InstructionHandle any) {
618        for (final Subroutine s : subroutines.values()) {
619            if (s.contains(any)) {
620                return s;
621            }
622        }
623        System.err.println("DEBUG: Please verify '" + any.toString(true) + "' lies in dead code.");
624        return null;
625        // throw new AssertionViolatedException("No subroutine for InstructionHandle found (DEAD CODE?).");
626    }
627
628    /**
629     * Returns a String representation of this object; merely for debugging puposes.
630     */
631    @Override
632    public String toString() {
633        return "---\n" + subroutines + "\n---\n";
634    }
635}