001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * https://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.io.input; 019 020import static org.apache.commons.io.IOUtils.CR; 021import static org.apache.commons.io.IOUtils.EOF; 022import static org.apache.commons.io.IOUtils.LF; 023 024import java.io.BufferedReader; 025import java.io.BufferedWriter; 026import java.io.IOException; 027import java.io.Reader; 028 029import org.apache.commons.io.IOUtils; 030 031/** 032 * Wraps an existing {@link Reader} and buffers the input <em>without any synchronization</em>. Expensive interaction with the underlying reader is minimized, 033 * since most (smaller) requests can be satisfied by accessing the buffer alone. The drawback is that some extra space is required to hold the buffer and that 034 * copying takes place when filling that buffer, but this is usually outweighed by the performance benefits. 035 * <p> 036 * A typical application pattern for the class looks like this: 037 * </p> 038 * 039 * <pre>{@code 040 * UnsynchronizedBufferedReader buf = new UnsynchronizedBufferedReader(new FileReader("file")); 041 * }</pre> 042 * <p> 043 * Provenance: Apache Harmony's java.io.BufferedReader, renamed, and modified. 044 * </p> 045 * 046 * @see BufferedReader 047 * @see BufferedWriter 048 * @since 2.17.0 049 */ 050public class UnsynchronizedBufferedReader extends UnsynchronizedReader { 051 052 private static final char NUL = '\0'; 053 054 private final Reader in; 055 056 /** 057 * The characters that can be read and refilled in bulk. We maintain three indices into this buffer: 058 * 059 * <pre> 060 * { X X X X X X X X X X X X - - } 061 * ^ ^ ^ 062 * | | | 063 * mark pos end 064 * </pre> 065 * <p> 066 * Pos points to the next readable character. End is one greater than the last readable character. When {@code pos == end}, the buffer is empty and must be 067 * {@link #fillBuf() filled} before characters can be read. 068 * </p> 069 * <p> 070 * Mark is the value pos will be set to on calls to {@link #reset()}. Its value is in the range {@code [0...pos]}. If the mark is {@code -1}, the buffer 071 * cannot be reset. 072 * </p> 073 * <p> 074 * MarkLimit limits the distance between the mark and the pos. When this limit is exceeded, {@link #reset()} is permitted (but not required) to throw an 075 * exception. For shorter distances, {@link #reset()} shall not throw (unless the reader is closed). 076 * </p> 077 */ 078 private char[] buf; 079 080 private int pos; 081 082 private int end; 083 084 private int mark = -1; 085 086 private int markLimit = -1; 087 088 /** 089 * Constructs a new BufferedReader on the Reader {@code in}. The buffer gets the default size (8 KB). 090 * 091 * @param in the Reader that is buffered. 092 */ 093 public UnsynchronizedBufferedReader(final Reader in) { 094 this(in, IOUtils.DEFAULT_BUFFER_SIZE); 095 } 096 097 /** 098 * Constructs a new BufferedReader on the Reader {@code in}. The buffer size is specified by the parameter {@code size}. 099 * 100 * @param in the Reader that is buffered. 101 * @param size the size of the buffer to allocate. 102 * @throws IllegalArgumentException if {@code size <= 0}. 103 */ 104 public UnsynchronizedBufferedReader(final Reader in, final int size) { 105 if (size <= 0) { 106 throw new IllegalArgumentException("size <= 0"); 107 } 108 this.in = in; 109 buf = new char[size]; 110 } 111 112 /** 113 * Peeks at the next input character, refilling the buffer if necessary. If this character is a newline character ("\n"), it is discarded. 114 */ 115 final void chompNewline() throws IOException { 116 if ((pos != end || fillBuf() != EOF) && buf[pos] == LF) { 117 pos++; 118 } 119 } 120 121 /** 122 * Closes this reader. This implementation closes the buffered source reader and releases the buffer. Nothing is done if this reader has already been 123 * closed. 124 * 125 * @throws IOException if an error occurs while closing this reader. 126 */ 127 @Override 128 public void close() throws IOException { 129 if (!isClosed()) { 130 in.close(); 131 buf = null; 132 super.close(); 133 } 134 } 135 136 /** 137 * Populates the buffer with data. It is an error to call this method when the buffer still contains data; ie. if {@code pos < end}. 138 * 139 * @return the number of bytes read into the buffer, or -1 if the end of the source stream has been reached. 140 */ 141 private int fillBuf() throws IOException { 142 // assert(pos == end); 143 144 if (mark == EOF || pos - mark >= markLimit) { 145 /* mark isn't set or has exceeded its limit. use the whole buffer */ 146 final int result = in.read(buf, 0, buf.length); 147 if (result > 0) { 148 mark = -1; 149 pos = 0; 150 end = result; 151 } 152 return result; 153 } 154 155 if (mark == 0 && markLimit > buf.length) { 156 /* the only way to make room when mark=0 is by growing the buffer */ 157 int newLength = buf.length * 2; 158 if (newLength > markLimit) { 159 newLength = markLimit; 160 } 161 final char[] newbuf = new char[newLength]; 162 System.arraycopy(buf, 0, newbuf, 0, buf.length); 163 buf = newbuf; 164 } else if (mark > 0) { 165 /* make room by shifting the buffered data to left mark positions */ 166 System.arraycopy(buf, mark, buf, 0, buf.length - mark); 167 pos -= mark; 168 end -= mark; 169 mark = 0; 170 } 171 172 /* Set the new position and mark position */ 173 final int count = in.read(buf, pos, buf.length - pos); 174 if (count != EOF) { 175 end += count; 176 } 177 return count; 178 } 179 180 /** 181 * Sets a mark position in this reader. The parameter {@code markLimit} indicates how many characters can be read before the mark is invalidated. Calling 182 * {@link #reset()} will reposition the reader back to the marked position if {@code markLimit} has not been surpassed. 183 * 184 * @param markLimit the number of characters that can be read before the mark is invalidated. 185 * @throws IllegalArgumentException if {@code markLimit < 0}. 186 * @throws IOException if an error occurs while setting a mark in this reader. 187 * @see #markSupported() 188 * @see #reset() 189 */ 190 @Override 191 public void mark(final int markLimit) throws IOException { 192 if (markLimit < 0) { 193 throw new IllegalArgumentException(); 194 } 195 checkOpen(); 196 this.markLimit = markLimit; 197 mark = pos; 198 } 199 200 /** 201 * Tests whether this reader supports the {@link #mark(int)} and {@link #reset()} methods. This implementation returns {@code true}. 202 * 203 * @return {@code true} for {@code BufferedReader}. 204 * @see #mark(int) 205 * @see #reset() 206 */ 207 @Override 208 public boolean markSupported() { 209 return true; 210 } 211 212 /** 213 * Returns the next character in the current reader without consuming it. So the next call to {@link #read()} will still return this value. 214 * 215 * @return the next character 216 * @throws IOException If an I/O error occurs 217 */ 218 public int peek() throws IOException { 219 mark(1); 220 final int c = read(); 221 reset(); 222 return c; 223 } 224 225 /** 226 * Populates the buffer with the next {@code buf.length} characters in the current reader without consuming them. The next call to {@link #read()} will 227 * still return the next value. 228 * 229 * @param buf the buffer to fill for the look ahead. 230 * @return the buffer itself 231 * @throws IOException If an I/O error occurs 232 */ 233 public int peek(final char[] buf) throws IOException { 234 final int n = buf.length; 235 mark(n); 236 final int c = read(buf, 0, n); 237 reset(); 238 return c; 239 } 240 241 /** 242 * Reads a single character from this reader and returns it with the two higher-order bytes set to 0. If possible, BufferedReader returns a character from 243 * the buffer. If there are no characters available in the buffer, it fills the buffer and then returns a character. It returns -1 if there are no more 244 * characters in the source reader. 245 * 246 * @return the character read or -1 if the end of the source reader has been reached. 247 * @throws IOException if this reader is closed or some other I/O error occurs. 248 */ 249 @Override 250 public int read() throws IOException { 251 checkOpen(); 252 /* Are there buffered characters available? */ 253 if (pos < end || fillBuf() != EOF) { 254 return buf[pos++]; 255 } 256 return EOF; 257 } 258 259 /** 260 * Reads at most {@code length} characters from this reader and stores them at {@code offset} in the character array {@code buffer}. Returns the number of 261 * characters actually read or -1 if the end of the source reader has been reached. If all the buffered characters have been used, a mark has not been set 262 * and the requested number of characters is larger than this readers buffer size, BufferedReader bypasses the buffer and simply places the results directly 263 * into {@code buffer}. 264 * 265 * @param buffer the character array to store the characters read. 266 * @param offset the initial position in {@code buffer} to store the bytes read from this reader. 267 * @param length the maximum number of characters to read, must be non-negative. 268 * @return number of characters read or -1 if the end of the source reader has been reached. 269 * @throws IndexOutOfBoundsException if {@code offset < 0} or {@code length < 0}, or if {@code offset + length} is greater than the size of {@code buffer}. 270 * @throws IOException if this reader is closed or some other I/O error occurs. 271 */ 272 @Override 273 public int read(final char[] buffer, int offset, final int length) throws IOException { 274 /* 275 * First throw on a closed reader, then check the parameters. 276 * 277 * This behavior is not specified in the Javadoc, but is followed by most readers in java.io. 278 */ 279 checkOpen(); 280 IOUtils.checkFromIndexSize(buffer, offset, length); 281 if (length == 0) { 282 return 0; 283 } 284 285 int outstanding = length; 286 while (outstanding > 0) { 287 288 /* 289 * If there are bytes in the buffer, grab those first. 290 */ 291 final int available = end - pos; 292 if (available > 0) { 293 final int count = available >= outstanding ? outstanding : available; 294 System.arraycopy(buf, pos, buffer, offset, count); 295 pos += count; 296 offset += count; 297 outstanding -= count; 298 } 299 300 /* 301 * Before attempting to read from the underlying stream, make sure we really, really want to. We won't bother if we're done, or if we've already got 302 * some bytes and reading from the underlying stream would block. 303 */ 304 if (outstanding == 0 || outstanding < length && !in.ready()) { 305 break; 306 } 307 308 // assert(pos == end); 309 310 /* 311 * If we're unmarked and the requested size is greater than our buffer, read the bytes directly into the caller's buffer. We don't read into smaller 312 * buffers because that could result in a many reads. 313 */ 314 if ((mark == -1 || pos - mark >= markLimit) && outstanding >= buf.length) { 315 final int count = in.read(buffer, offset, outstanding); 316 if (count > 0) { 317 outstanding -= count; 318 mark = -1; 319 } 320 321 break; // assume the source stream gave us all that it could 322 } 323 324 if (fillBuf() == EOF) { 325 break; // source is exhausted 326 } 327 } 328 329 final int count = length - outstanding; 330 return count > 0 || count == length ? count : EOF; 331 } 332 333 /** 334 * Returns the next line of text available from this reader. A line is represented by zero or more characters followed by {@code LF}, {@code CR}, 335 * {@code "\r\n"} or the end of the reader. The string does not include the newline sequence. 336 * 337 * @return the contents of the line or {@code null} if no characters were read before the end of the reader has been reached. 338 * @throws IOException if this reader is closed or some other I/O error occurs. 339 */ 340 public String readLine() throws IOException { 341 checkOpen(); 342 /* has the underlying stream been exhausted? */ 343 if (pos == end && fillBuf() == EOF) { 344 return null; 345 } 346 for (int charPos = pos; charPos < end; charPos++) { 347 final char ch = buf[charPos]; 348 if (ch > CR) { 349 continue; 350 } 351 if (ch == LF) { 352 final String res = new String(buf, pos, charPos - pos); 353 pos = charPos + 1; 354 return res; 355 } 356 if (ch == CR) { 357 final String res = new String(buf, pos, charPos - pos); 358 pos = charPos + 1; 359 if ((pos < end || fillBuf() != EOF) && buf[pos] == LF) { 360 pos++; 361 } 362 return res; 363 } 364 } 365 366 char eol = NUL; 367 final StringBuilder result = new StringBuilder(80); 368 /* Typical Line Length */ 369 370 result.append(buf, pos, end - pos); 371 while (true) { 372 pos = end; 373 374 /* Are there buffered characters available? */ 375 if (eol == LF) { 376 return result.toString(); 377 } 378 // attempt to fill buffer 379 if (fillBuf() == EOF) { 380 // characters or null. 381 return result.length() > 0 || eol != NUL ? result.toString() : null; 382 } 383 for (int charPos = pos; charPos < end; charPos++) { 384 final char c = buf[charPos]; 385 if (eol != NUL) { 386 if (eol == CR && c == LF) { 387 if (charPos > pos) { 388 result.append(buf, pos, charPos - pos - 1); 389 } 390 pos = charPos + 1; 391 } else { 392 if (charPos > pos) { 393 result.append(buf, pos, charPos - pos - 1); 394 } 395 pos = charPos; 396 } 397 return result.toString(); 398 } 399 if (c == LF || c == CR) { 400 eol = c; 401 } 402 } 403 if (eol == NUL) { 404 result.append(buf, pos, end - pos); 405 } else { 406 result.append(buf, pos, end - pos - 1); 407 } 408 } 409 } 410 411 /** 412 * Tests whether this reader is ready to be read without blocking. 413 * 414 * @return {@code true} if this reader will not block when {@code read} is called, {@code false} if unknown or blocking will occur. 415 * @throws IOException if this reader is closed or some other I/O error occurs. 416 * @see #read() 417 * @see #read(char[], int, int) 418 * @see #readLine() 419 */ 420 @Override 421 public boolean ready() throws IOException { 422 checkOpen(); 423 return end - pos > 0 || in.ready(); 424 } 425 426 /** 427 * Resets this reader's position to the last {@code mark()} location. Invocations of {@code read()} and {@code skip()} will occur from this new location. 428 * 429 * @throws IOException if this reader is closed or no mark has been set. 430 * @see #mark(int) 431 * @see #markSupported() 432 */ 433 @Override 434 public void reset() throws IOException { 435 checkOpen(); 436 if (mark == -1) { 437 throw new IOException("mark == -1"); 438 } 439 pos = mark; 440 } 441 442 /** 443 * Skips {@code amount} characters in this reader. Subsequent {@code read()}s will not return these characters unless {@code reset()} is used. Skipping 444 * characters may invalidate a mark if {@code markLimit} is surpassed. 445 * 446 * @param amount the maximum number of characters to skip. 447 * @return the number of characters actually skipped. 448 * @throws IllegalArgumentException if {@code amount < 0}. 449 * @throws IOException if this reader is closed or some other I/O error occurs. 450 * @see #mark(int) 451 * @see #markSupported() 452 * @see #reset() 453 */ 454 @Override 455 public long skip(final long amount) throws IOException { 456 if (amount < 0) { 457 throw new IllegalArgumentException(); 458 } 459 checkOpen(); 460 if (amount < 1) { 461 return 0; 462 } 463 if (end - pos >= amount) { 464 pos += Math.toIntExact(amount); 465 return amount; 466 } 467 468 long read = end - pos; 469 pos = end; 470 while (read < amount) { 471 if (fillBuf() == EOF) { 472 return read; 473 } 474 if (end - pos >= amount - read) { 475 pos += Math.toIntExact(amount - read); 476 return amount; 477 } 478 // Couldn't get all the characters, skip what we read 479 read += end - pos; 480 pos = end; 481 } 482 return amount; 483 } 484 485}