001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.io.input; 018 019import java.io.Closeable; 020import java.io.File; 021import java.io.IOException; 022import java.io.UnsupportedEncodingException; 023import java.nio.ByteBuffer; 024import java.nio.channels.SeekableByteChannel; 025import java.nio.charset.Charset; 026import java.nio.charset.CharsetEncoder; 027import java.nio.charset.StandardCharsets; 028import java.nio.file.Files; 029import java.nio.file.Path; 030import java.nio.file.StandardOpenOption; 031import java.util.ArrayList; 032import java.util.Arrays; 033import java.util.Collections; 034import java.util.List; 035 036import org.apache.commons.io.Charsets; 037import org.apache.commons.io.FileSystem; 038import org.apache.commons.io.StandardLineSeparator; 039import org.apache.commons.io.build.AbstractOrigin; 040import org.apache.commons.io.build.AbstractStreamBuilder; 041 042/** 043 * Reads lines in a file reversely (similar to a BufferedReader, but starting at the last line). Useful for e.g. searching in log files. 044 * <p> 045 * To build an instance, see {@link Builder}. 046 * </p> 047 * 048 * @since 2.2 049 */ 050public class ReversedLinesFileReader implements Closeable { 051 052 /** 053 * Builds a new {@link ReversedLinesFileReader} instance. 054 * <p> 055 * For example: 056 * </p> 057 * <pre>{@code 058 * ReversedLinesFileReader r = ReversedLinesFileReader.builder() 059 * .setPath(path) 060 * .setBufferSize(4096) 061 * .setCharset(StandardCharsets.UTF_8) 062 * .get();} 063 * </pre> 064 * 065 * @since 2.12.0 066 */ 067 public static class Builder extends AbstractStreamBuilder<ReversedLinesFileReader, Builder> { 068 069 /** 070 * Constructs a new Builder. 071 */ 072 public Builder() { 073 setBufferSizeDefault(DEFAULT_BLOCK_SIZE); 074 setBufferSize(DEFAULT_BLOCK_SIZE); 075 } 076 077 /** 078 * Constructs a new instance. 079 * <p> 080 * This builder use the aspects Path, Charset, buffer size. 081 * </p> 082 * <p> 083 * You must provide an origin that can be converted to a Path by this builder, otherwise, this call will throw an 084 * {@link UnsupportedOperationException}. 085 * </p> 086 * 087 * @return a new instance. 088 * @throws UnsupportedOperationException if the origin cannot provide a Path. 089 * @see AbstractOrigin#getPath() 090 */ 091 @Override 092 public ReversedLinesFileReader get() throws IOException { 093 return new ReversedLinesFileReader(getPath(), getBufferSize(), getCharset()); 094 } 095 096 } 097 098 private final class FilePart { 099 private final long no; 100 101 private final byte[] data; 102 103 private byte[] leftOver; 104 105 private int currentLastBytePos; 106 107 /** 108 * Constructs a new instance. 109 * 110 * @param no the part number 111 * @param length its length 112 * @param leftOverOfLastFilePart remainder 113 * @throws IOException if there is a problem reading the file 114 */ 115 private FilePart(final long no, final int length, final byte[] leftOverOfLastFilePart) throws IOException { 116 this.no = no; 117 final int dataLength = length + (leftOverOfLastFilePart != null ? leftOverOfLastFilePart.length : 0); 118 this.data = new byte[dataLength]; 119 final long off = (no - 1) * blockSize; 120 121 // read data 122 if (no > 0 /* file not empty */) { 123 channel.position(off); 124 final int countRead = channel.read(ByteBuffer.wrap(data, 0, length)); 125 if (countRead != length) { 126 throw new IllegalStateException("Count of requested bytes and actually read bytes don't match"); 127 } 128 } 129 // copy left over part into data arr 130 if (leftOverOfLastFilePart != null) { 131 System.arraycopy(leftOverOfLastFilePart, 0, data, length, leftOverOfLastFilePart.length); 132 } 133 this.currentLastBytePos = data.length - 1; 134 this.leftOver = null; 135 } 136 137 /** 138 * Constructs the buffer containing any leftover bytes. 139 */ 140 private void createLeftOver() { 141 final int lineLengthBytes = currentLastBytePos + 1; 142 if (lineLengthBytes > 0) { 143 // create left over for next block 144 leftOver = Arrays.copyOf(data, lineLengthBytes); 145 } else { 146 leftOver = null; 147 } 148 currentLastBytePos = -1; 149 } 150 151 /** 152 * Finds the new-line sequence and return its length. 153 * 154 * @param data buffer to scan 155 * @param i start offset in buffer 156 * @return length of newline sequence or 0 if none found 157 */ 158 private int getNewLineMatchByteCount(final byte[] data, final int i) { 159 for (final byte[] newLineSequence : newLineSequences) { 160 boolean match = true; 161 for (int j = newLineSequence.length - 1; j >= 0; j--) { 162 final int k = i + j - (newLineSequence.length - 1); 163 match &= k >= 0 && data[k] == newLineSequence[j]; 164 } 165 if (match) { 166 return newLineSequence.length; 167 } 168 } 169 return 0; 170 } 171 172 /** 173 * Reads a line. 174 * 175 * @return the line or null 176 */ 177 private String readLine() { //NOPMD Bug in PMD 178 179 String line = null; 180 int newLineMatchByteCount; 181 182 final boolean isLastFilePart = no == 1; 183 184 int i = currentLastBytePos; 185 while (i > -1) { 186 187 if (!isLastFilePart && i < avoidNewlineSplitBufferSize) { 188 // avoidNewlineSplitBuffer: for all except the last file part we 189 // take a few bytes to the next file part to avoid splitting of newlines 190 createLeftOver(); 191 break; // skip last few bytes and leave it to the next file part 192 } 193 194 // --- check for newline --- 195 if ((newLineMatchByteCount = getNewLineMatchByteCount(data, i)) > 0 /* found newline */) { 196 final int lineStart = i + 1; 197 final int lineLengthBytes = currentLastBytePos - lineStart + 1; 198 199 if (lineLengthBytes < 0) { 200 throw new IllegalStateException("Unexpected negative line length=" + lineLengthBytes); 201 } 202 final byte[] lineData = Arrays.copyOfRange(data, lineStart, lineStart + lineLengthBytes); 203 204 line = new String(lineData, charset); 205 206 currentLastBytePos = i - newLineMatchByteCount; 207 break; // found line 208 } 209 210 // --- move cursor --- 211 i -= byteDecrement; 212 213 // --- end of file part handling --- 214 if (i < 0) { 215 createLeftOver(); 216 break; // end of file part 217 } 218 } 219 220 // --- last file part handling --- 221 if (isLastFilePart && leftOver != null) { 222 // there will be no line break anymore, this is the first line of the file 223 line = new String(leftOver, charset); 224 leftOver = null; 225 } 226 227 return line; 228 } 229 230 /** 231 * Handles block rollover 232 * 233 * @return the new FilePart or null 234 * @throws IOException if there was a problem reading the file 235 */ 236 private FilePart rollOver() throws IOException { 237 238 if (currentLastBytePos > -1) { 239 throw new IllegalStateException("Current currentLastCharPos unexpectedly positive... " 240 + "last readLine() should have returned something! currentLastCharPos=" + currentLastBytePos); 241 } 242 243 if (no > 1) { 244 return new FilePart(no - 1, blockSize, leftOver); 245 } 246 // NO 1 was the last FilePart, we're finished 247 if (leftOver != null) { 248 throw new IllegalStateException("Unexpected leftover of the last block: leftOverOfThisFilePart=" 249 + new String(leftOver, charset)); 250 } 251 return null; 252 } 253 } 254 255 private static final String EMPTY_STRING = ""; 256 257 private static final int DEFAULT_BLOCK_SIZE = FileSystem.getCurrent().getBlockSize(); 258 259 /** 260 * Constructs a new {@link Builder}. 261 * 262 * @return a new {@link Builder}. 263 * @since 2.12.0 264 */ 265 public static Builder builder() { 266 return new Builder(); 267 } 268 269 private final int blockSize; 270 private final Charset charset; 271 private final SeekableByteChannel channel; 272 private final long totalByteLength; 273 private final long totalBlockCount; 274 private final byte[][] newLineSequences; 275 private final int avoidNewlineSplitBufferSize; 276 private final int byteDecrement; 277 private FilePart currentFilePart; 278 private boolean trailingNewlineOfFileSkipped; 279 280 /** 281 * Constructs a ReversedLinesFileReader with default block size of 4KB and the 282 * platform's default encoding. 283 * 284 * @param file the file to be read 285 * @throws IOException if an I/O error occurs. 286 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 287 */ 288 @Deprecated 289 public ReversedLinesFileReader(final File file) throws IOException { 290 this(file, DEFAULT_BLOCK_SIZE, Charset.defaultCharset()); 291 } 292 293 /** 294 * Constructs a ReversedLinesFileReader with default block size of 4KB and the 295 * specified encoding. 296 * 297 * @param file the file to be read 298 * @param charset the charset to use, null uses the default Charset. 299 * @throws IOException if an I/O error occurs. 300 * @since 2.5 301 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 302 */ 303 @Deprecated 304 public ReversedLinesFileReader(final File file, final Charset charset) throws IOException { 305 this(file.toPath(), charset); 306 } 307 308 /** 309 * Constructs a ReversedLinesFileReader with the given block size and encoding. 310 * 311 * @param file the file to be read 312 * @param blockSize size of the internal buffer (for ideal performance this 313 * should match with the block size of the underlying file 314 * system). 315 * @param charset the encoding of the file, null uses the default Charset. 316 * @throws IOException if an I/O error occurs. 317 * @since 2.3 318 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 319 */ 320 @Deprecated 321 public ReversedLinesFileReader(final File file, final int blockSize, final Charset charset) throws IOException { 322 this(file.toPath(), blockSize, charset); 323 } 324 325 /** 326 * Constructs a ReversedLinesFileReader with the given block size and encoding. 327 * 328 * @param file the file to be read 329 * @param blockSize size of the internal buffer (for ideal performance this 330 * should match with the block size of the underlying file 331 * system). 332 * @param charsetName the encoding of the file, null uses the default Charset. 333 * @throws IOException if an I/O error occurs 334 * @throws java.nio.charset.UnsupportedCharsetException thrown instead of 335 * {@link UnsupportedEncodingException} 336 * in version 2.2 if the 337 * encoding is not 338 * supported. 339 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 340 */ 341 @Deprecated 342 public ReversedLinesFileReader(final File file, final int blockSize, final String charsetName) throws IOException { 343 this(file.toPath(), blockSize, charsetName); 344 } 345 346 /** 347 * Constructs a ReversedLinesFileReader with default block size of 4KB and the 348 * specified encoding. 349 * 350 * @param file the file to be read 351 * @param charset the charset to use, null uses the default Charset. 352 * @throws IOException if an I/O error occurs. 353 * @since 2.7 354 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 355 */ 356 @Deprecated 357 public ReversedLinesFileReader(final Path file, final Charset charset) throws IOException { 358 this(file, DEFAULT_BLOCK_SIZE, charset); 359 } 360 361 /** 362 * Constructs a ReversedLinesFileReader with the given block size and encoding. 363 * 364 * @param file the file to be read 365 * @param blockSize size of the internal buffer (for ideal performance this 366 * should match with the block size of the underlying file 367 * system). 368 * @param charset the encoding of the file, null uses the default Charset. 369 * @throws IOException if an I/O error occurs. 370 * @since 2.7 371 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 372 */ 373 @Deprecated 374 public ReversedLinesFileReader(final Path file, final int blockSize, final Charset charset) throws IOException { 375 this.blockSize = blockSize; 376 this.charset = Charsets.toCharset(charset); 377 378 // --- check & prepare encoding --- 379 final CharsetEncoder charsetEncoder = this.charset.newEncoder(); 380 final float maxBytesPerChar = charsetEncoder.maxBytesPerChar(); 381 if (maxBytesPerChar == 1f || this.charset == StandardCharsets.UTF_8) { 382 // all one byte encodings are no problem 383 byteDecrement = 1; 384 } else if (this.charset == Charset.forName("Shift_JIS") || // Same as for UTF-8 385 // http://www.herongyang.com/Unicode/JIS-Shift-JIS-Encoding.html 386 this.charset == Charset.forName("windows-31j") || // Windows code page 932 (Japanese) 387 this.charset == Charset.forName("x-windows-949") || // Windows code page 949 (Korean) 388 this.charset == Charset.forName("gbk") || // Windows code page 936 (Simplified Chinese) 389 this.charset == Charset.forName("x-windows-950")) { // Windows code page 950 (Traditional Chinese) 390 byteDecrement = 1; 391 } else if (this.charset == StandardCharsets.UTF_16BE || this.charset == StandardCharsets.UTF_16LE) { 392 // UTF-16 new line sequences are not allowed as second tuple of four byte 393 // sequences, 394 // however byte order has to be specified 395 byteDecrement = 2; 396 } else if (this.charset == StandardCharsets.UTF_16) { 397 throw new UnsupportedEncodingException( 398 "For UTF-16, you need to specify the byte order (use UTF-16BE or " + "UTF-16LE)"); 399 } else { 400 throw new UnsupportedEncodingException( 401 "Encoding " + charset + " is not supported yet (feel free to " + "submit a patch)"); 402 } 403 404 // NOTE: The new line sequences are matched in the order given, so it is 405 // important that \r\n is BEFORE \n 406 this.newLineSequences = new byte[][] { 407 StandardLineSeparator.CRLF.getBytes(this.charset), 408 StandardLineSeparator.LF.getBytes(this.charset), 409 StandardLineSeparator.CR.getBytes(this.charset) 410 }; 411 412 this.avoidNewlineSplitBufferSize = newLineSequences[0].length; 413 414 // Open file 415 this.channel = Files.newByteChannel(file, StandardOpenOption.READ); 416 this.totalByteLength = channel.size(); 417 int lastBlockLength = (int) (this.totalByteLength % blockSize); 418 if (lastBlockLength > 0) { 419 this.totalBlockCount = this.totalByteLength / blockSize + 1; 420 } else { 421 this.totalBlockCount = this.totalByteLength / blockSize; 422 if (this.totalByteLength > 0) { 423 lastBlockLength = blockSize; 424 } 425 } 426 this.currentFilePart = new FilePart(totalBlockCount, lastBlockLength, null); 427 428 } 429 430 /** 431 * Constructs a ReversedLinesFileReader with the given block size and encoding. 432 * 433 * @param file the file to be read 434 * @param blockSize size of the internal buffer (for ideal performance this 435 * should match with the block size of the underlying file 436 * system). 437 * @param charsetName the encoding of the file, null uses the default Charset. 438 * @throws IOException if an I/O error occurs 439 * @throws java.nio.charset.UnsupportedCharsetException thrown instead of 440 * {@link UnsupportedEncodingException} 441 * in version 2.2 if the 442 * encoding is not 443 * supported. 444 * @since 2.7 445 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 446 */ 447 @Deprecated 448 public ReversedLinesFileReader(final Path file, final int blockSize, final String charsetName) throws IOException { 449 this(file, blockSize, Charsets.toCharset(charsetName)); 450 } 451 452 /** 453 * Closes underlying resources. 454 * 455 * @throws IOException if an I/O error occurs. 456 */ 457 @Override 458 public void close() throws IOException { 459 channel.close(); 460 } 461 462 /** 463 * Returns the lines of the file from bottom to top. 464 * 465 * @return the next line or null if the start of the file is reached 466 * @throws IOException if an I/O error occurs. 467 */ 468 public String readLine() throws IOException { 469 470 String line = currentFilePart.readLine(); 471 while (line == null) { 472 currentFilePart = currentFilePart.rollOver(); 473 if (currentFilePart == null) { 474 // no more FileParts: we're done, leave line set to null 475 break; 476 } 477 line = currentFilePart.readLine(); 478 } 479 480 // aligned behavior with BufferedReader that doesn't return a last, empty line 481 if (EMPTY_STRING.equals(line) && !trailingNewlineOfFileSkipped) { 482 trailingNewlineOfFileSkipped = true; 483 line = readLine(); 484 } 485 486 return line; 487 } 488 489 /** 490 * Returns {@code lineCount} lines of the file from bottom to top. 491 * <p> 492 * If there are less than {@code lineCount} lines in the file, then that's what 493 * you get. 494 * </p> 495 * <p> 496 * Note: You can easily flip the result with {@link Collections#reverse(List)}. 497 * </p> 498 * 499 * @param lineCount How many lines to read. 500 * @return A new list 501 * @throws IOException if an I/O error occurs. 502 * @since 2.8.0 503 */ 504 public List<String> readLines(final int lineCount) throws IOException { 505 if (lineCount < 0) { 506 throw new IllegalArgumentException("lineCount < 0"); 507 } 508 final ArrayList<String> arrayList = new ArrayList<>(lineCount); 509 for (int i = 0; i < lineCount; i++) { 510 final String line = readLine(); 511 if (line == null) { 512 return arrayList; 513 } 514 arrayList.add(line); 515 } 516 return arrayList; 517 } 518 519 /** 520 * Returns the last {@code lineCount} lines of the file. 521 * <p> 522 * If there are less than {@code lineCount} lines in the file, then that's what 523 * you get. 524 * </p> 525 * 526 * @param lineCount How many lines to read. 527 * @return A String. 528 * @throws IOException if an I/O error occurs. 529 * @since 2.8.0 530 */ 531 public String toString(final int lineCount) throws IOException { 532 final List<String> lines = readLines(lineCount); 533 Collections.reverse(lines); 534 return lines.isEmpty() ? EMPTY_STRING : String.join(System.lineSeparator(), lines) + System.lineSeparator(); 535 } 536 537}