001/*
002 *  Licensed to the Apache Software Foundation (ASF) under one or more
003 *  contributor license agreements.  See the NOTICE file distributed with
004 *  this work for additional information regarding copyright ownership.
005 *  The ASF licenses this file to You under the Apache License, Version 2.0
006 *  (the "License"); you may not use this file except in compliance with
007 *  the License.  You may obtain a copy of the License at
008 *
009 *     https://www.apache.org/licenses/LICENSE-2.0
010 *
011 *  Unless required by applicable law or agreed to in writing, software
012 *  distributed under the License is distributed on an "AS IS" BASIS,
013 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 *  See the License for the specific language governing permissions and
015 *  limitations under the License.
016 */
017
018package org.apache.commons.io.input;
019
020import static org.apache.commons.io.IOUtils.CR;
021import static org.apache.commons.io.IOUtils.EOF;
022import static org.apache.commons.io.IOUtils.LF;
023
024import java.io.BufferedReader;
025import java.io.BufferedWriter;
026import java.io.IOException;
027import java.io.Reader;
028
029import org.apache.commons.io.IOUtils;
030
031/**
032 * Wraps an existing {@link Reader} and buffers the input <em>without any synchronization</em>. Expensive interaction with the underlying reader is minimized,
033 * since most (smaller) requests can be satisfied by accessing the buffer alone. The drawback is that some extra space is required to hold the buffer and that
034 * copying takes place when filling that buffer, but this is usually outweighed by the performance benefits.
035 * <p>
036 * A typical application pattern for the class looks like this:
037 * </p>
038 *
039 * <pre>{@code
040 * UnsynchronizedBufferedReader buf = new UnsynchronizedBufferedReader(new FileReader("file"));
041 * }</pre>
042 * <p>
043 * Provenance: Apache Harmony's java.io.BufferedReader, renamed, and modified.
044 * </p>
045 *
046 * @see BufferedReader
047 * @see BufferedWriter
048 * @since 2.17.0
049 */
050public class UnsynchronizedBufferedReader extends UnsynchronizedReader {
051
052    private static final char NUL = '\0';
053
054    private final Reader in;
055
056    /**
057     * The characters that can be read and refilled in bulk. We maintain three indices into this buffer:
058     *
059     * <pre>
060     *     { X X X X X X X X X X X X - - }
061     *           ^     ^             ^
062     *           |     |             |
063     *         mark   pos           end
064     * </pre>
065     * <p>
066     * Pos points to the next readable character. End is one greater than the last readable character. When {@code pos == end}, the buffer is empty and must be
067     * {@link #fillBuf() filled} before characters can be read.
068     * </p>
069     * <p>
070     * Mark is the value pos will be set to on calls to {@link #reset()}. Its value is in the range {@code [0...pos]}. If the mark is {@code -1}, the buffer
071     * cannot be reset.
072     * </p>
073     * <p>
074     * MarkLimit limits the distance between the mark and the pos. When this limit is exceeded, {@link #reset()} is permitted (but not required) to throw an
075     * exception. For shorter distances, {@link #reset()} shall not throw (unless the reader is closed).
076     * </p>
077     */
078    private char[] buf;
079
080    private int pos;
081
082    private int end;
083
084    private int mark = -1;
085
086    private int markLimit = -1;
087
088    /**
089     * Constructs a new BufferedReader on the Reader {@code in}. The buffer gets the default size (8 KB).
090     *
091     * @param in the Reader that is buffered.
092     */
093    public UnsynchronizedBufferedReader(final Reader in) {
094        this(in, IOUtils.DEFAULT_BUFFER_SIZE);
095    }
096
097    /**
098     * Constructs a new BufferedReader on the Reader {@code in}. The buffer size is specified by the parameter {@code size}.
099     *
100     * @param in   the Reader that is buffered.
101     * @param size the size of the buffer to allocate.
102     * @throws IllegalArgumentException if {@code size <= 0}.
103     */
104    public UnsynchronizedBufferedReader(final Reader in, final int size) {
105        if (size <= 0) {
106            throw new IllegalArgumentException("size <= 0");
107        }
108        this.in = in;
109        buf = new char[size];
110    }
111
112    /**
113     * Peeks at the next input character, refilling the buffer if necessary. If this character is a newline character ("\n"), it is discarded.
114     */
115    final void chompNewline() throws IOException {
116        if ((pos != end || fillBuf() != EOF) && buf[pos] == LF) {
117            pos++;
118        }
119    }
120
121    /**
122     * Closes this reader. This implementation closes the buffered source reader and releases the buffer. Nothing is done if this reader has already been
123     * closed.
124     *
125     * @throws IOException if an error occurs while closing this reader.
126     */
127    @Override
128    public void close() throws IOException {
129        if (!isClosed()) {
130            in.close();
131            buf = null;
132            super.close();
133        }
134    }
135
136    /**
137     * Populates the buffer with data. It is an error to call this method when the buffer still contains data; ie. if {@code pos < end}.
138     *
139     * @return the number of bytes read into the buffer, or -1 if the end of the source stream has been reached.
140     */
141    private int fillBuf() throws IOException {
142        // assert(pos == end);
143
144        if (mark == EOF || pos - mark >= markLimit) {
145            /* mark isn't set or has exceeded its limit. use the whole buffer */
146            final int result = in.read(buf, 0, buf.length);
147            if (result > 0) {
148                mark = -1;
149                pos = 0;
150                end = result;
151            }
152            return result;
153        }
154
155        if (mark == 0 && markLimit > buf.length) {
156            /* the only way to make room when mark=0 is by growing the buffer */
157            int newLength = buf.length * 2;
158            if (newLength > markLimit) {
159                newLength = markLimit;
160            }
161            final char[] newbuf = new char[newLength];
162            System.arraycopy(buf, 0, newbuf, 0, buf.length);
163            buf = newbuf;
164        } else if (mark > 0) {
165            /* make room by shifting the buffered data to left mark positions */
166            System.arraycopy(buf, mark, buf, 0, buf.length - mark);
167            pos -= mark;
168            end -= mark;
169            mark = 0;
170        }
171
172        /* Set the new position and mark position */
173        final int count = in.read(buf, pos, buf.length - pos);
174        if (count != EOF) {
175            end += count;
176        }
177        return count;
178    }
179
180    /**
181     * Sets a mark position in this reader. The parameter {@code markLimit} indicates how many characters can be read before the mark is invalidated. Calling
182     * {@link #reset()} will reposition the reader back to the marked position if {@code markLimit} has not been surpassed.
183     *
184     * @param markLimit the number of characters that can be read before the mark is invalidated.
185     * @throws IllegalArgumentException if {@code markLimit < 0}.
186     * @throws IOException              if an error occurs while setting a mark in this reader.
187     * @see #markSupported()
188     * @see #reset()
189     */
190    @Override
191    public void mark(final int markLimit) throws IOException {
192        if (markLimit < 0) {
193            throw new IllegalArgumentException();
194        }
195        checkOpen();
196        this.markLimit = markLimit;
197        mark = pos;
198    }
199
200    /**
201     * Tests whether this reader supports the {@link #mark(int)} and {@link #reset()} methods. This implementation returns {@code true}.
202     *
203     * @return {@code true} for {@code BufferedReader}.
204     * @see #mark(int)
205     * @see #reset()
206     */
207    @Override
208    public boolean markSupported() {
209        return true;
210    }
211
212    /**
213     * Returns the next character in the current reader without consuming it. So the next call to {@link #read()} will still return this value.
214     *
215     * @return the next character
216     * @throws IOException If an I/O error occurs
217     */
218    public int peek() throws IOException {
219        mark(1);
220        final int c = read();
221        reset();
222        return c;
223    }
224
225    /**
226     * Populates the buffer with the next {@code buf.length} characters in the current reader without consuming them. The next call to {@link #read()} will
227     * still return the next value.
228     *
229     * @param buf the buffer to fill for the look ahead.
230     * @return the buffer itself
231     * @throws IOException If an I/O error occurs
232     */
233    public int peek(final char[] buf) throws IOException {
234        final int n = buf.length;
235        mark(n);
236        final int c = read(buf, 0, n);
237        reset();
238        return c;
239    }
240
241    /**
242     * Reads a single character from this reader and returns it with the two higher-order bytes set to 0. If possible, BufferedReader returns a character from
243     * the buffer. If there are no characters available in the buffer, it fills the buffer and then returns a character. It returns -1 if there are no more
244     * characters in the source reader.
245     *
246     * @return the character read or -1 if the end of the source reader has been reached.
247     * @throws IOException if this reader is closed or some other I/O error occurs.
248     */
249    @Override
250    public int read() throws IOException {
251        checkOpen();
252        /* Are there buffered characters available? */
253        if (pos < end || fillBuf() != EOF) {
254            return buf[pos++];
255        }
256        return EOF;
257    }
258
259    /**
260     * Reads at most {@code length} characters from this reader and stores them at {@code offset} in the character array {@code buffer}. Returns the number of
261     * characters actually read or -1 if the end of the source reader has been reached. If all the buffered characters have been used, a mark has not been set
262     * and the requested number of characters is larger than this readers buffer size, BufferedReader bypasses the buffer and simply places the results directly
263     * into {@code buffer}.
264     *
265     * @param buffer the character array to store the characters read.
266     * @param offset the initial position in {@code buffer} to store the bytes read from this reader.
267     * @param length the maximum number of characters to read, must be non-negative.
268     * @return number of characters read or -1 if the end of the source reader has been reached.
269     * @throws IndexOutOfBoundsException if {@code offset < 0} or {@code length < 0}, or if {@code offset + length} is greater than the size of {@code buffer}.
270     * @throws IOException               if this reader is closed or some other I/O error occurs.
271     */
272    @Override
273    public int read(final char[] buffer, int offset, final int length) throws IOException {
274        /*
275         * First throw on a closed reader, then check the parameters.
276         *
277         * This behavior is not specified in the Javadoc, but is followed by most readers in java.io.
278         */
279        checkOpen();
280        IOUtils.checkFromIndexSize(buffer, offset, length);
281        if (length == 0) {
282            return 0;
283        }
284
285        int outstanding = length;
286        while (outstanding > 0) {
287
288            /*
289             * If there are bytes in the buffer, grab those first.
290             */
291            final int available = end - pos;
292            if (available > 0) {
293                final int count = available >= outstanding ? outstanding : available;
294                System.arraycopy(buf, pos, buffer, offset, count);
295                pos += count;
296                offset += count;
297                outstanding -= count;
298            }
299
300            /*
301             * Before attempting to read from the underlying stream, make sure we really, really want to. We won't bother if we're done, or if we've already got
302             * some bytes and reading from the underlying stream would block.
303             */
304            if (outstanding == 0 || outstanding < length && !in.ready()) {
305                break;
306            }
307
308            // assert(pos == end);
309
310            /*
311             * If we're unmarked and the requested size is greater than our buffer, read the bytes directly into the caller's buffer. We don't read into smaller
312             * buffers because that could result in a many reads.
313             */
314            if ((mark == -1 || pos - mark >= markLimit) && outstanding >= buf.length) {
315                final int count = in.read(buffer, offset, outstanding);
316                if (count > 0) {
317                    outstanding -= count;
318                    mark = -1;
319                }
320
321                break; // assume the source stream gave us all that it could
322            }
323
324            if (fillBuf() == EOF) {
325                break; // source is exhausted
326            }
327        }
328
329        final int count = length - outstanding;
330        return count > 0 || count == length ? count : EOF;
331    }
332
333    /**
334     * Returns the next line of text available from this reader. A line is represented by zero or more characters followed by {@code LF}, {@code CR},
335     * {@code "\r\n"} or the end of the reader. The string does not include the newline sequence.
336     *
337     * @return the contents of the line or {@code null} if no characters were read before the end of the reader has been reached.
338     * @throws IOException if this reader is closed or some other I/O error occurs.
339     */
340    public String readLine() throws IOException {
341        checkOpen();
342        /* has the underlying stream been exhausted? */
343        if (pos == end && fillBuf() == EOF) {
344            return null;
345        }
346        for (int charPos = pos; charPos < end; charPos++) {
347            final char ch = buf[charPos];
348            if (ch > CR) {
349                continue;
350            }
351            if (ch == LF) {
352                final String res = new String(buf, pos, charPos - pos);
353                pos = charPos + 1;
354                return res;
355            }
356            if (ch == CR) {
357                final String res = new String(buf, pos, charPos - pos);
358                pos = charPos + 1;
359                if ((pos < end || fillBuf() != EOF) && buf[pos] == LF) {
360                    pos++;
361                }
362                return res;
363            }
364        }
365
366        char eol = NUL;
367        final StringBuilder result = new StringBuilder(80);
368        /* Typical Line Length */
369
370        result.append(buf, pos, end - pos);
371        while (true) {
372            pos = end;
373
374            /* Are there buffered characters available? */
375            if (eol == LF) {
376                return result.toString();
377            }
378            // attempt to fill buffer
379            if (fillBuf() == EOF) {
380                // characters or null.
381                return result.length() > 0 || eol != NUL ? result.toString() : null;
382            }
383            for (int charPos = pos; charPos < end; charPos++) {
384                final char c = buf[charPos];
385                if (eol != NUL) {
386                    if (eol == CR && c == LF) {
387                        if (charPos > pos) {
388                            result.append(buf, pos, charPos - pos - 1);
389                        }
390                        pos = charPos + 1;
391                    } else {
392                        if (charPos > pos) {
393                            result.append(buf, pos, charPos - pos - 1);
394                        }
395                        pos = charPos;
396                    }
397                    return result.toString();
398                }
399                if (c == LF || c == CR) {
400                    eol = c;
401                }
402            }
403            if (eol == NUL) {
404                result.append(buf, pos, end - pos);
405            } else {
406                result.append(buf, pos, end - pos - 1);
407            }
408        }
409    }
410
411    /**
412     * Tests whether this reader is ready to be read without blocking.
413     *
414     * @return {@code true} if this reader will not block when {@code read} is called, {@code false} if unknown or blocking will occur.
415     * @throws IOException if this reader is closed or some other I/O error occurs.
416     * @see #read()
417     * @see #read(char[], int, int)
418     * @see #readLine()
419     */
420    @Override
421    public boolean ready() throws IOException {
422        checkOpen();
423        return end - pos > 0 || in.ready();
424    }
425
426    /**
427     * Resets this reader's position to the last {@code mark()} location. Invocations of {@code read()} and {@code skip()} will occur from this new location.
428     *
429     * @throws IOException if this reader is closed or no mark has been set.
430     * @see #mark(int)
431     * @see #markSupported()
432     */
433    @Override
434    public void reset() throws IOException {
435        checkOpen();
436        if (mark == -1) {
437            throw new IOException("mark == -1");
438        }
439        pos = mark;
440    }
441
442    /**
443     * Skips {@code amount} characters in this reader. Subsequent {@code read()}s will not return these characters unless {@code reset()} is used. Skipping
444     * characters may invalidate a mark if {@code markLimit} is surpassed.
445     *
446     * @param amount the maximum number of characters to skip.
447     * @return the number of characters actually skipped.
448     * @throws IllegalArgumentException if {@code amount < 0}.
449     * @throws IOException              if this reader is closed or some other I/O error occurs.
450     * @see #mark(int)
451     * @see #markSupported()
452     * @see #reset()
453     */
454    @Override
455    public long skip(final long amount) throws IOException {
456        if (amount < 0) {
457            throw new IllegalArgumentException();
458        }
459        checkOpen();
460        if (amount < 1) {
461            return 0;
462        }
463        if (end - pos >= amount) {
464            pos += Math.toIntExact(amount);
465            return amount;
466        }
467
468        long read = end - pos;
469        pos = end;
470        while (read < amount) {
471            if (fillBuf() == EOF) {
472                return read;
473            }
474            if (end - pos >= amount - read) {
475                pos += Math.toIntExact(amount - read);
476                return amount;
477            }
478            // Couldn't get all the characters, skip what we read
479            read += end - pos;
480            pos = end;
481        }
482        return amount;
483    }
484
485}