001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.compress.archivers.sevenz; 018 019import static java.nio.charset.StandardCharsets.UTF_16LE; 020 021import java.io.BufferedInputStream; 022import java.io.ByteArrayInputStream; 023import java.io.Closeable; 024import java.io.DataInputStream; 025import java.io.EOFException; 026import java.io.File; 027import java.io.FilterInputStream; 028import java.io.IOException; 029import java.io.InputStream; 030import java.nio.ByteBuffer; 031import java.nio.ByteOrder; 032import java.nio.channels.Channels; 033import java.nio.channels.SeekableByteChannel; 034import java.nio.file.Files; 035import java.nio.file.StandardOpenOption; 036import java.util.ArrayList; 037import java.util.Arrays; 038import java.util.BitSet; 039import java.util.EnumSet; 040import java.util.LinkedHashMap; 041import java.util.LinkedList; 042import java.util.List; 043import java.util.Map; 044import java.util.Objects; 045import java.util.zip.CRC32; 046import java.util.zip.CheckedInputStream; 047 048import org.apache.commons.compress.MemoryLimitException; 049import org.apache.commons.compress.utils.BoundedInputStream; 050import org.apache.commons.compress.utils.ByteUtils; 051import org.apache.commons.compress.utils.CRC32VerifyingInputStream; 052import org.apache.commons.compress.utils.IOUtils; 053import org.apache.commons.compress.utils.InputStreamStatistics; 054 055/** 056 * Reads a 7z file, using SeekableByteChannel under 057 * the covers. 058 * <p> 059 * The 7z file format is a flexible container 060 * that can contain many compression and 061 * encryption types, but at the moment only 062 * only Copy, LZMA, LZMA2, BZIP2, Deflate and AES-256 + SHA-256 063 * are supported. 064 * </p> 065 * <p> 066 * The format is very Windows/Intel specific, 067 * so it uses little-endian byte order, 068 * doesn't store user/group or permission bits, 069 * and represents times using NTFS timestamps 070 * (100 nanosecond units since 1 January 1601). 071 * Hence the official tools recommend against 072 * using it for backup purposes on *nix, and 073 * recommend .tar.7z or .tar.lzma or .tar.xz 074 * instead. 075 * </p> 076 * <p> 077 * Both the header and file contents may be 078 * compressed and/or encrypted. With both 079 * encrypted, neither file names nor file 080 * contents can be read, but the use of 081 * encryption isn't plausibly deniable. 082 * </p> 083 * <p>Multi volume archives can be read by concatenating the parts in 084 * correct order - either manually or by using {link 085 * org.apache.commons.compress.utils.MultiReadOnlySeekableByteChannel} 086 * for example.</p> 087 * 088 * @NotThreadSafe 089 * @since 1.6 090 */ 091public class SevenZFile implements Closeable { 092 private static final class ArchiveStatistics { 093 private int numberOfPackedStreams; 094 private long numberOfCoders; 095 private long numberOfOutStreams; 096 private long numberOfInStreams; 097 private long numberOfUnpackSubStreams; 098 private int numberOfFolders; 099 private BitSet folderHasCrc; 100 private int numberOfEntries; 101 private int numberOfEntriesWithStream; 102 103 void assertValidity(final int maxMemoryLimitInKb) throws IOException { 104 if (numberOfEntriesWithStream > 0 && numberOfFolders == 0) { 105 throw new IOException("archive with entries but no folders"); 106 } 107 if (numberOfEntriesWithStream > numberOfUnpackSubStreams) { 108 throw new IOException("archive doesn't contain enough substreams for entries"); 109 } 110 111 final long memoryNeededInKb = estimateSize() / 1024; 112 if (maxMemoryLimitInKb < memoryNeededInKb) { 113 throw new MemoryLimitException(memoryNeededInKb, maxMemoryLimitInKb); 114 } 115 } 116 117 private long bindPairSize() { 118 return 16; 119 } 120 121 private long coderSize() { 122 return 2 /* methodId is between 1 and four bytes currently, COPY and LZMA2 are the most common with 1 */ 123 + 16 124 + 4 /* properties, guess */ 125 ; 126 } 127 128 private long entrySize() { 129 return 100; /* real size depends on name length, everything without name is about 70 bytes */ 130 } 131 132 long estimateSize() { 133 final long lowerBound = 16L * numberOfPackedStreams /* packSizes, packCrcs in Archive */ 134 + numberOfPackedStreams / 8 /* packCrcsDefined in Archive */ 135 + numberOfFolders * folderSize() /* folders in Archive */ 136 + numberOfCoders * coderSize() /* coders in Folder */ 137 + (numberOfOutStreams - numberOfFolders) * bindPairSize() /* bindPairs in Folder */ 138 + 8L * (numberOfInStreams - numberOfOutStreams + numberOfFolders) /* packedStreams in Folder */ 139 + 8L * numberOfOutStreams /* unpackSizes in Folder */ 140 + numberOfEntries * entrySize() /* files in Archive */ 141 + streamMapSize() 142 ; 143 return 2 * lowerBound /* conservative guess */; 144 } 145 146 private long folderSize() { 147 return 30; /* nested arrays are accounted for separately */ 148 } 149 150 private long streamMapSize() { 151 return 8 * numberOfFolders /* folderFirstPackStreamIndex, folderFirstFileIndex */ 152 + 8 * numberOfPackedStreams /* packStreamOffsets */ 153 + 4 * numberOfEntries /* fileFolderIndex */ 154 ; 155 } 156 157 @Override 158 public String toString() { 159 return "Archive with " + numberOfEntries + " entries in " + numberOfFolders 160 + " folders. Estimated size " + estimateSize()/ 1024L + " kB."; 161 } 162 } 163 164 static final int SIGNATURE_HEADER_SIZE = 32; 165 166 private static final String DEFAULT_FILE_NAME = "unknown archive"; 167 168 /** Shared with SevenZOutputFile and tests, neither mutates it. */ 169 static final byte[] sevenZSignature = { //NOSONAR 170 (byte) '7', (byte) 'z', (byte) 0xBC, (byte) 0xAF, (byte) 0x27, (byte) 0x1C 171 }; 172 173 private static int assertFitsIntoNonNegativeInt(final String what, final long value) throws IOException { 174 if (value > Integer.MAX_VALUE || value < 0) { 175 throw new IOException(String.format("Cannot handle % %,d", what, value)); 176 } 177 return (int) value; 178 } 179 180 private static ByteBuffer checkEndOfFile(final ByteBuffer buf, final int expectRemaining) throws EOFException { 181 final int remaining = buf.remaining(); 182 if (remaining < expectRemaining) { 183 throw new EOFException(String.format("remaining %,d < expectRemaining %,d", remaining, expectRemaining)); 184 } 185 return buf; 186 } 187 188 private static void get(final ByteBuffer buf, final byte[] to) throws EOFException { 189 checkEndOfFile(buf, to.length).get(to); 190 } 191 192 private static char getChar(final ByteBuffer buf) throws EOFException { 193 return checkEndOfFile(buf, Character.BYTES).getChar(); 194 } 195 196 private static int getInt(final ByteBuffer buf) throws EOFException { 197 return checkEndOfFile(buf, Integer.BYTES).getInt(); 198 } 199 200 private static long getLong(final ByteBuffer buf) throws EOFException { 201 return checkEndOfFile(buf, Long.BYTES).getLong(); 202 } 203 204 private static int getUnsignedByte(final ByteBuffer buf) throws EOFException { 205 if (!buf.hasRemaining()) { 206 throw new EOFException(); 207 } 208 return buf.get() & 0xff; 209 } 210 211 /** 212 * Checks if the signature matches what is expected for a 7z file. 213 * 214 * @param signature 215 * the bytes to check 216 * @param length 217 * the number of bytes to check 218 * @return true, if this is the signature of a 7z archive. 219 * @since 1.8 220 */ 221 public static boolean matches(final byte[] signature, final int length) { 222 if (length < sevenZSignature.length) { 223 return false; 224 } 225 226 for (int i = 0; i < sevenZSignature.length; i++) { 227 if (signature[i] != sevenZSignature[i]) { 228 return false; 229 } 230 } 231 return true; 232 } 233 private static long readUint64(final ByteBuffer in) throws IOException { 234 // long rather than int as it might get shifted beyond the range of an int 235 final long firstByte = getUnsignedByte(in); 236 int mask = 0x80; 237 long value = 0; 238 for (int i = 0; i < 8; i++) { 239 if ((firstByte & mask) == 0) { 240 return value | (firstByte & mask - 1) << 8 * i; 241 } 242 final long nextByte = getUnsignedByte(in); 243 value |= nextByte << 8 * i; 244 mask >>>= 1; 245 } 246 return value; 247 } 248 249 private static long skipBytesFully(final ByteBuffer input, long bytesToSkip) { 250 if (bytesToSkip < 1) { 251 return 0; 252 } 253 final int current = input.position(); 254 final int maxSkip = input.remaining(); 255 if (maxSkip < bytesToSkip) { 256 bytesToSkip = maxSkip; 257 } 258 input.position(current + (int) bytesToSkip); 259 return bytesToSkip; 260 } 261 262 private final String fileName; 263 264 private SeekableByteChannel channel; 265 266 private final Archive archive; 267 268 private int currentEntryIndex = -1; 269 270 private int currentFolderIndex = -1; 271 272 private InputStream currentFolderInputStream; 273 274 private byte[] password; 275 276 private final SevenZFileOptions options; 277 278 private long compressedBytesReadFromCurrentEntry; 279 280 private long uncompressedBytesReadFromCurrentEntry; 281 282 private final ArrayList<InputStream> deferredBlockStreams = new ArrayList<>(); 283 284 /** 285 * Reads a file as unencrypted 7z archive 286 * 287 * @param fileName the file to read 288 * @throws IOException if reading the archive fails 289 */ 290 public SevenZFile(final File fileName) throws IOException { 291 this(fileName, SevenZFileOptions.DEFAULT); 292 } 293 294 /** 295 * Reads a file as 7z archive 296 * 297 * @param fileName the file to read 298 * @param password optional password if the archive is encrypted - 299 * the byte array is supposed to be the UTF16-LE encoded 300 * representation of the password. 301 * @throws IOException if reading the archive fails 302 * @deprecated use the char[]-arg version for the password instead 303 */ 304 @Deprecated 305 public SevenZFile(final File fileName, final byte[] password) throws IOException { 306 this(Files.newByteChannel(fileName.toPath(), EnumSet.of(StandardOpenOption.READ)), 307 fileName.getAbsolutePath(), password, true, SevenZFileOptions.DEFAULT); 308 } 309 310 /** 311 * Reads a file as 7z archive 312 * 313 * @param fileName the file to read 314 * @param password optional password if the archive is encrypted 315 * @throws IOException if reading the archive fails 316 * @since 1.17 317 */ 318 public SevenZFile(final File fileName, final char[] password) throws IOException { 319 this(fileName, password, SevenZFileOptions.DEFAULT); 320 } 321 322 /** 323 * Reads a file as 7z archive with additional options. 324 * 325 * @param fileName the file to read 326 * @param password optional password if the archive is encrypted 327 * @param options the options to apply 328 * @throws IOException if reading the archive fails or the memory limit (if set) is too small 329 * @since 1.19 330 */ 331 public SevenZFile(final File fileName, final char[] password, final SevenZFileOptions options) throws IOException { 332 this(Files.newByteChannel(fileName.toPath(), EnumSet.of(StandardOpenOption.READ)), // NOSONAR 333 fileName.getAbsolutePath(), AES256SHA256Decoder.utf16Decode(password), true, options); 334 } 335 336 /** 337 * Reads a file as unencrypted 7z archive 338 * 339 * @param fileName the file to read 340 * @param options the options to apply 341 * @throws IOException if reading the archive fails or the memory limit (if set) is too small 342 * @since 1.19 343 */ 344 public SevenZFile(final File fileName, final SevenZFileOptions options) throws IOException { 345 this(fileName, null, options); 346 } 347 348 /** 349 * Reads a SeekableByteChannel as 7z archive 350 * 351 * <p>{@link 352 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 353 * allows you to read from an in-memory archive.</p> 354 * 355 * @param channel the channel to read 356 * @throws IOException if reading the archive fails 357 * @since 1.13 358 */ 359 public SevenZFile(final SeekableByteChannel channel) throws IOException { 360 this(channel, SevenZFileOptions.DEFAULT); 361 } 362 363 /** 364 * Reads a SeekableByteChannel as 7z archive 365 * 366 * <p>{@link 367 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 368 * allows you to read from an in-memory archive.</p> 369 * 370 * @param channel the channel to read 371 * @param password optional password if the archive is encrypted - 372 * the byte array is supposed to be the UTF16-LE encoded 373 * representation of the password. 374 * @throws IOException if reading the archive fails 375 * @since 1.13 376 * @deprecated use the char[]-arg version for the password instead 377 */ 378 @Deprecated 379 public SevenZFile(final SeekableByteChannel channel, 380 final byte[] password) throws IOException { 381 this(channel, DEFAULT_FILE_NAME, password); 382 } 383 384 /** 385 * Reads a SeekableByteChannel as 7z archive 386 * 387 * <p>{@link 388 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 389 * allows you to read from an in-memory archive.</p> 390 * 391 * @param channel the channel to read 392 * @param password optional password if the archive is encrypted 393 * @throws IOException if reading the archive fails 394 * @since 1.17 395 */ 396 public SevenZFile(final SeekableByteChannel channel, 397 final char[] password) throws IOException { 398 this(channel, password, SevenZFileOptions.DEFAULT); 399 } 400 401 /** 402 * Reads a SeekableByteChannel as 7z archive with additional options. 403 * 404 * <p>{@link 405 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 406 * allows you to read from an in-memory archive.</p> 407 * 408 * @param channel the channel to read 409 * @param password optional password if the archive is encrypted 410 * @param options the options to apply 411 * @throws IOException if reading the archive fails or the memory limit (if set) is too small 412 * @since 1.19 413 */ 414 public SevenZFile(final SeekableByteChannel channel, final char[] password, final SevenZFileOptions options) 415 throws IOException { 416 this(channel, DEFAULT_FILE_NAME, password, options); 417 } 418 419 /** 420 * Reads a SeekableByteChannel as 7z archive with additional options. 421 * 422 * <p>{@link 423 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 424 * allows you to read from an in-memory archive.</p> 425 * 426 * @param channel the channel to read 427 * @param options the options to apply 428 * @throws IOException if reading the archive fails or the memory limit (if set) is too small 429 * @since 1.19 430 */ 431 public SevenZFile(final SeekableByteChannel channel, final SevenZFileOptions options) throws IOException { 432 this(channel, DEFAULT_FILE_NAME, null, options); 433 } 434 435 /** 436 * Reads a SeekableByteChannel as 7z archive 437 * 438 * <p>{@link 439 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 440 * allows you to read from an in-memory archive.</p> 441 * 442 * @param channel the channel to read 443 * @param fileName name of the archive - only used for error reporting 444 * @throws IOException if reading the archive fails 445 * @since 1.17 446 */ 447 public SevenZFile(final SeekableByteChannel channel, final String fileName) 448 throws IOException { 449 this(channel, fileName, SevenZFileOptions.DEFAULT); 450 } 451 452 /** 453 * Reads a SeekableByteChannel as 7z archive 454 * 455 * <p>{@link 456 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 457 * allows you to read from an in-memory archive.</p> 458 * 459 * @param channel the channel to read 460 * @param fileName name of the archive - only used for error reporting 461 * @param password optional password if the archive is encrypted - 462 * the byte array is supposed to be the UTF16-LE encoded 463 * representation of the password. 464 * @throws IOException if reading the archive fails 465 * @since 1.13 466 * @deprecated use the char[]-arg version for the password instead 467 */ 468 @Deprecated 469 public SevenZFile(final SeekableByteChannel channel, final String fileName, 470 final byte[] password) throws IOException { 471 this(channel, fileName, password, false, SevenZFileOptions.DEFAULT); 472 } 473 474 private SevenZFile(final SeekableByteChannel channel, final String fileName, 475 final byte[] password, final boolean closeOnError, final SevenZFileOptions options) throws IOException { 476 boolean succeeded = false; 477 this.channel = channel; 478 this.fileName = fileName; 479 this.options = options; 480 try { 481 archive = readHeaders(password); 482 if (password != null) { 483 this.password = Arrays.copyOf(password, password.length); 484 } else { 485 this.password = null; 486 } 487 succeeded = true; 488 } finally { 489 if (!succeeded && closeOnError) { 490 this.channel.close(); 491 } 492 } 493 } 494 495 /** 496 * Reads a SeekableByteChannel as 7z archive 497 * 498 * <p>{@link 499 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 500 * allows you to read from an in-memory archive.</p> 501 * 502 * @param channel the channel to read 503 * @param fileName name of the archive - only used for error reporting 504 * @param password optional password if the archive is encrypted 505 * @throws IOException if reading the archive fails 506 * @since 1.17 507 */ 508 public SevenZFile(final SeekableByteChannel channel, final String fileName, 509 final char[] password) throws IOException { 510 this(channel, fileName, password, SevenZFileOptions.DEFAULT); 511 } 512 513 /** 514 * Reads a SeekableByteChannel as 7z archive with additional options. 515 * 516 * <p>{@link 517 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 518 * allows you to read from an in-memory archive.</p> 519 * 520 * @param channel the channel to read 521 * @param fileName name of the archive - only used for error reporting 522 * @param password optional password if the archive is encrypted 523 * @param options the options to apply 524 * @throws IOException if reading the archive fails or the memory limit (if set) is too small 525 * @since 1.19 526 */ 527 public SevenZFile(final SeekableByteChannel channel, final String fileName, final char[] password, 528 final SevenZFileOptions options) throws IOException { 529 this(channel, fileName, AES256SHA256Decoder.utf16Decode(password), false, options); 530 } 531 532 /** 533 * Reads a SeekableByteChannel as 7z archive with additional options. 534 * 535 * <p>{@link 536 * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} 537 * allows you to read from an in-memory archive.</p> 538 * 539 * @param channel the channel to read 540 * @param fileName name of the archive - only used for error reporting 541 * @param options the options to apply 542 * @throws IOException if reading the archive fails or the memory limit (if set) is too small 543 * @since 1.19 544 */ 545 public SevenZFile(final SeekableByteChannel channel, final String fileName, final SevenZFileOptions options) 546 throws IOException { 547 this(channel, fileName, null, false, options); 548 } 549 550 private InputStream buildDecoderStack(final Folder folder, final long folderOffset, 551 final int firstPackStreamIndex, final SevenZArchiveEntry entry) throws IOException { 552 channel.position(folderOffset); 553 InputStream inputStreamStack = new FilterInputStream(new BufferedInputStream( 554 new BoundedSeekableByteChannelInputStream(channel, 555 archive.packSizes[firstPackStreamIndex]))) { 556 private void count(final int c) { 557 compressedBytesReadFromCurrentEntry += c; 558 } 559 @Override 560 public int read() throws IOException { 561 final int r = in.read(); 562 if (r >= 0) { 563 count(1); 564 } 565 return r; 566 } 567 @Override 568 public int read(final byte[] b) throws IOException { 569 return read(b, 0, b.length); 570 } 571 @Override 572 public int read(final byte[] b, final int off, final int len) throws IOException { 573 if (len == 0) { 574 return 0; 575 } 576 final int r = in.read(b, off, len); 577 if (r >= 0) { 578 count(r); 579 } 580 return r; 581 } 582 }; 583 final LinkedList<SevenZMethodConfiguration> methods = new LinkedList<>(); 584 for (final Coder coder : folder.getOrderedCoders()) { 585 if (coder.numInStreams != 1 || coder.numOutStreams != 1) { 586 throw new IOException("Multi input/output stream coders are not yet supported"); 587 } 588 final SevenZMethod method = SevenZMethod.byId(coder.decompressionMethodId); 589 inputStreamStack = Coders.addDecoder(fileName, inputStreamStack, 590 folder.getUnpackSizeForCoder(coder), coder, password, options.getMaxMemoryLimitInKb()); 591 methods.addFirst(new SevenZMethodConfiguration(method, 592 Coders.findByMethod(method).getOptionsFromCoder(coder, inputStreamStack))); 593 } 594 entry.setContentMethods(methods); 595 if (folder.hasCrc) { 596 return new CRC32VerifyingInputStream(inputStreamStack, 597 folder.getUnpackSize(), folder.crc); 598 } 599 return inputStreamStack; 600 } 601 602 /** 603 * Build the decoding stream for the entry to be read. 604 * This method may be called from a random access(getInputStream) or 605 * sequential access(getNextEntry). 606 * If this method is called from a random access, some entries may 607 * need to be skipped(we put them to the deferredBlockStreams and 608 * skip them when actually needed to improve the performance) 609 * 610 * @param entryIndex the index of the entry to be read 611 * @param isRandomAccess is this called in a random access 612 * @throws IOException if there are exceptions when reading the file 613 */ 614 private void buildDecodingStream(final int entryIndex, final boolean isRandomAccess) throws IOException { 615 if (archive.streamMap == null) { 616 throw new IOException("Archive doesn't contain stream information to read entries"); 617 } 618 final int folderIndex = archive.streamMap.fileFolderIndex[entryIndex]; 619 if (folderIndex < 0) { 620 deferredBlockStreams.clear(); 621 // TODO: previously it'd return an empty stream? 622 // new BoundedInputStream(new ByteArrayInputStream(ByteUtils.EMPTY_BYTE_ARRAY), 0); 623 return; 624 } 625 final SevenZArchiveEntry file = archive.files[entryIndex]; 626 boolean isInSameFolder = false; 627 if (currentFolderIndex == folderIndex) { 628 // (COMPRESS-320). 629 // The current entry is within the same (potentially opened) folder. The 630 // previous stream has to be fully decoded before we can start reading 631 // but don't do it eagerly -- if the user skips over the entire folder nothing 632 // is effectively decompressed. 633 if (entryIndex > 0) { 634 file.setContentMethods(archive.files[entryIndex - 1].getContentMethods()); 635 } 636 637 // if this is called in a random access, then the content methods of previous entry may be null 638 // the content methods should be set to methods of the first entry as it must not be null, 639 // and the content methods would only be set if the content methods was not set 640 if (isRandomAccess && file.getContentMethods() == null) { 641 final int folderFirstFileIndex = archive.streamMap.folderFirstFileIndex[folderIndex]; 642 final SevenZArchiveEntry folderFirstFile = archive.files[folderFirstFileIndex]; 643 file.setContentMethods(folderFirstFile.getContentMethods()); 644 } 645 isInSameFolder = true; 646 } else { 647 currentFolderIndex = folderIndex; 648 // We're opening a new folder. Discard any queued streams/ folder stream. 649 reopenFolderInputStream(folderIndex, file); 650 } 651 652 boolean haveSkippedEntries = false; 653 if (isRandomAccess) { 654 // entries will only need to be skipped if it's a random access 655 haveSkippedEntries = skipEntriesWhenNeeded(entryIndex, isInSameFolder, folderIndex); 656 } 657 658 if (isRandomAccess && currentEntryIndex == entryIndex && !haveSkippedEntries) { 659 // we don't need to add another entry to the deferredBlockStreams when : 660 // 1. If this method is called in a random access and the entry index 661 // to be read equals to the current entry index, the input stream 662 // has already been put in the deferredBlockStreams 663 // 2. If this entry has not been read(which means no entries are skipped) 664 return; 665 } 666 667 InputStream fileStream = new BoundedInputStream(currentFolderInputStream, file.getSize()); 668 if (file.getHasCrc()) { 669 fileStream = new CRC32VerifyingInputStream(fileStream, file.getSize(), file.getCrcValue()); 670 } 671 672 deferredBlockStreams.add(fileStream); 673 } 674 675 private void calculateStreamMap(final Archive archive) throws IOException { 676 final StreamMap streamMap = new StreamMap(); 677 678 int nextFolderPackStreamIndex = 0; 679 final int numFolders = archive.folders != null ? archive.folders.length : 0; 680 streamMap.folderFirstPackStreamIndex = new int[numFolders]; 681 for (int i = 0; i < numFolders; i++) { 682 streamMap.folderFirstPackStreamIndex[i] = nextFolderPackStreamIndex; 683 nextFolderPackStreamIndex += archive.folders[i].packedStreams.length; 684 } 685 686 long nextPackStreamOffset = 0; 687 final int numPackSizes = archive.packSizes.length; 688 streamMap.packStreamOffsets = new long[numPackSizes]; 689 for (int i = 0; i < numPackSizes; i++) { 690 streamMap.packStreamOffsets[i] = nextPackStreamOffset; 691 nextPackStreamOffset += archive.packSizes[i]; 692 } 693 694 streamMap.folderFirstFileIndex = new int[numFolders]; 695 streamMap.fileFolderIndex = new int[archive.files.length]; 696 int nextFolderIndex = 0; 697 int nextFolderUnpackStreamIndex = 0; 698 for (int i = 0; i < archive.files.length; i++) { 699 if (!archive.files[i].hasStream() && nextFolderUnpackStreamIndex == 0) { 700 streamMap.fileFolderIndex[i] = -1; 701 continue; 702 } 703 if (nextFolderUnpackStreamIndex == 0) { 704 for (; nextFolderIndex < archive.folders.length; ++nextFolderIndex) { 705 streamMap.folderFirstFileIndex[nextFolderIndex] = i; 706 if (archive.folders[nextFolderIndex].numUnpackSubStreams > 0) { 707 break; 708 } 709 } 710 if (nextFolderIndex >= archive.folders.length) { 711 throw new IOException("Too few folders in archive"); 712 } 713 } 714 streamMap.fileFolderIndex[i] = nextFolderIndex; 715 if (!archive.files[i].hasStream()) { 716 continue; 717 } 718 ++nextFolderUnpackStreamIndex; 719 if (nextFolderUnpackStreamIndex >= archive.folders[nextFolderIndex].numUnpackSubStreams) { 720 ++nextFolderIndex; 721 nextFolderUnpackStreamIndex = 0; 722 } 723 } 724 725 archive.streamMap = streamMap; 726 } 727 728 private void checkEntryIsInitialized(final Map<Integer, SevenZArchiveEntry> archiveEntries, final int index) { 729 archiveEntries.computeIfAbsent(index, i -> new SevenZArchiveEntry()); 730 } 731 732 /** 733 * Closes the archive. 734 * @throws IOException if closing the file fails 735 */ 736 @Override 737 public void close() throws IOException { 738 if (channel != null) { 739 try { 740 channel.close(); 741 } finally { 742 channel = null; 743 if (password != null) { 744 Arrays.fill(password, (byte) 0); 745 } 746 password = null; 747 } 748 } 749 } 750 751 private InputStream getCurrentStream() throws IOException { 752 if (archive.files[currentEntryIndex].getSize() == 0) { 753 return new ByteArrayInputStream(ByteUtils.EMPTY_BYTE_ARRAY); 754 } 755 if (deferredBlockStreams.isEmpty()) { 756 throw new IllegalStateException("No current 7z entry (call getNextEntry() first)."); 757 } 758 759 while (deferredBlockStreams.size() > 1) { 760 // In solid compression mode we need to decompress all leading folder' 761 // streams to get access to an entry. We defer this until really needed 762 // so that entire blocks can be skipped without wasting time for decompression. 763 try (final InputStream stream = deferredBlockStreams.remove(0)) { 764 IOUtils.skip(stream, Long.MAX_VALUE); 765 } 766 compressedBytesReadFromCurrentEntry = 0; 767 } 768 769 return deferredBlockStreams.get(0); 770 } 771 772 /** 773 * Gets a default file name from the archive name - if known. 774 * 775 * <p>This implements the same heuristics the 7z tools use. In 776 * 7z's case if an archive contains entries without a name - 777 * i.e. {@link SevenZArchiveEntry#getName} returns {@code null} - 778 * then its command line and GUI tools will use this default name 779 * when extracting the entries.</p> 780 * 781 * @return null if the name of the archive is unknown. Otherwise, 782 * if the name of the archive has got any extension, it is 783 * stripped and the remainder returned. Finally, if the name of the 784 * archive hasn't got any extension, then a {@code ~} character is 785 * appended to the archive name. 786 * 787 * @since 1.19 788 */ 789 public String getDefaultName() { 790 if (DEFAULT_FILE_NAME.equals(fileName) || fileName == null) { 791 return null; 792 } 793 794 final String lastSegment = new File(fileName).getName(); 795 final int dotPos = lastSegment.lastIndexOf("."); 796 if (dotPos > 0) { // if the file starts with a dot then this is not an extension 797 return lastSegment.substring(0, dotPos); 798 } 799 return lastSegment + "~"; 800 } 801 802 /** 803 * Gets a copy of meta-data of all archive entries. 804 * 805 * <p>This method only provides meta-data, the entries can not be 806 * used to read the contents, you still need to process all 807 * entries in order using {@link #getNextEntry} for that.</p> 808 * 809 * <p>The content methods are only available for entries that have 810 * already been reached via {@link #getNextEntry}.</p> 811 * 812 * @return a copy of meta-data of all archive entries. 813 * @since 1.11 814 */ 815 public Iterable<SevenZArchiveEntry> getEntries() { 816 return new ArrayList<>(Arrays.asList(archive.files)); 817 } 818 819 /** 820 * Gets an InputStream for reading the contents of the given entry. 821 * 822 * <p>For archives using solid compression randomly accessing 823 * entries will be significantly slower than reading the archive 824 * sequentially.</p> 825 * 826 * @param entry the entry to get the stream for. 827 * @return a stream to read the entry from. 828 * @throws IOException if unable to create an input stream from the entry 829 * @since 1.20 830 */ 831 public InputStream getInputStream(final SevenZArchiveEntry entry) throws IOException { 832 int entryIndex = -1; 833 for (int i = 0; i < this.archive.files.length;i++) { 834 if (entry == this.archive.files[i]) { 835 entryIndex = i; 836 break; 837 } 838 } 839 840 if (entryIndex < 0) { 841 throw new IllegalArgumentException("Can not find " + entry.getName() + " in " + this.fileName); 842 } 843 844 buildDecodingStream(entryIndex, true); 845 currentEntryIndex = entryIndex; 846 currentFolderIndex = archive.streamMap.fileFolderIndex[entryIndex]; 847 return getCurrentStream(); 848 } 849 850 /** 851 * Gets the next Archive Entry in this archive. 852 * 853 * @return the next entry, 854 * or {@code null} if there are no more entries 855 * @throws IOException if the next entry could not be read 856 */ 857 public SevenZArchiveEntry getNextEntry() throws IOException { 858 if (currentEntryIndex >= archive.files.length - 1) { 859 return null; 860 } 861 ++currentEntryIndex; 862 final SevenZArchiveEntry entry = archive.files[currentEntryIndex]; 863 if (entry.getName() == null && options.getUseDefaultNameForUnnamedEntries()) { 864 entry.setName(getDefaultName()); 865 } 866 buildDecodingStream(currentEntryIndex, false); 867 uncompressedBytesReadFromCurrentEntry = compressedBytesReadFromCurrentEntry = 0; 868 return entry; 869 } 870 871 /** 872 * Gets statistics for bytes read from the current entry. 873 * 874 * @return statistics for bytes read from the current entry 875 * @since 1.17 876 */ 877 public InputStreamStatistics getStatisticsForCurrentEntry() { 878 return new InputStreamStatistics() { 879 @Override 880 public long getCompressedCount() { 881 return compressedBytesReadFromCurrentEntry; 882 } 883 @Override 884 public long getUncompressedCount() { 885 return uncompressedBytesReadFromCurrentEntry; 886 } 887 }; 888 } 889 890 /** 891 * Tests if any data of current entry has been read or not. 892 * This is achieved by comparing the bytes remaining to read 893 * and the size of the file. 894 * 895 * @return true if any data of current entry has been read 896 * @since 1.21 897 */ 898 private boolean hasCurrentEntryBeenRead() { 899 boolean hasCurrentEntryBeenRead = false; 900 if (!deferredBlockStreams.isEmpty()) { 901 final InputStream currentEntryInputStream = deferredBlockStreams.get(deferredBlockStreams.size() - 1); 902 // get the bytes remaining to read, and compare it with the size of 903 // the file to figure out if the file has been read 904 if (currentEntryInputStream instanceof CRC32VerifyingInputStream) { 905 hasCurrentEntryBeenRead = ((CRC32VerifyingInputStream) currentEntryInputStream).getBytesRemaining() != archive.files[currentEntryIndex].getSize(); 906 } 907 908 if (currentEntryInputStream instanceof BoundedInputStream) { 909 hasCurrentEntryBeenRead = ((BoundedInputStream) currentEntryInputStream).getBytesRemaining() != archive.files[currentEntryIndex].getSize(); 910 } 911 } 912 return hasCurrentEntryBeenRead; 913 } 914 915 private Archive initializeArchive(final StartHeader startHeader, final byte[] password, final boolean verifyCrc) throws IOException { 916 assertFitsIntoNonNegativeInt("nextHeaderSize", startHeader.nextHeaderSize); 917 final int nextHeaderSizeInt = (int) startHeader.nextHeaderSize; 918 channel.position(SIGNATURE_HEADER_SIZE + startHeader.nextHeaderOffset); 919 if (verifyCrc) { 920 final long position = channel.position(); 921 final CheckedInputStream cis = new CheckedInputStream(Channels.newInputStream(channel), new CRC32()); 922 if (cis.skip(nextHeaderSizeInt) != nextHeaderSizeInt) { 923 throw new IOException("Problem computing NextHeader CRC-32"); 924 } 925 if (startHeader.nextHeaderCrc != cis.getChecksum().getValue()) { 926 throw new IOException("NextHeader CRC-32 mismatch"); 927 } 928 channel.position(position); 929 } 930 Archive archive = new Archive(); 931 ByteBuffer buf = ByteBuffer.allocate(nextHeaderSizeInt).order(ByteOrder.LITTLE_ENDIAN); 932 readFully(buf); 933 int nid = getUnsignedByte(buf); 934 if (nid == NID.kEncodedHeader) { 935 buf = readEncodedHeader(buf, archive, password); 936 // Archive gets rebuilt with the new header 937 archive = new Archive(); 938 nid = getUnsignedByte(buf); 939 } 940 if (nid != NID.kHeader) { 941 throw new IOException("Broken or unsupported archive: no Header"); 942 } 943 readHeader(buf, archive); 944 archive.subStreamsInfo = null; 945 return archive; 946 } 947 948 /** 949 * Reads a byte of data. 950 * 951 * @return the byte read, or -1 if end of input is reached 952 * @throws IOException 953 * if an I/O error has occurred 954 */ 955 public int read() throws IOException { 956 final int b = getCurrentStream().read(); 957 if (b >= 0) { 958 uncompressedBytesReadFromCurrentEntry++; 959 } 960 return b; 961 } 962 963 /** 964 * Reads data into an array of bytes. 965 * 966 * @param b the array to write data to 967 * @return the number of bytes read, or -1 if end of input is reached 968 * @throws IOException 969 * if an I/O error has occurred 970 */ 971 public int read(final byte[] b) throws IOException { 972 return read(b, 0, b.length); 973 } 974 975 /** 976 * Reads data into an array of bytes. 977 * 978 * @param b the array to write data to 979 * @param off offset into the buffer to start filling at 980 * @param len of bytes to read 981 * @return the number of bytes read, or -1 if end of input is reached 982 * @throws IOException 983 * if an I/O error has occurred 984 */ 985 public int read(final byte[] b, final int off, final int len) throws IOException { 986 if (len == 0) { 987 return 0; 988 } 989 final int cnt = getCurrentStream().read(b, off, len); 990 if (cnt > 0) { 991 uncompressedBytesReadFromCurrentEntry += cnt; 992 } 993 return cnt; 994 } 995 996 private BitSet readAllOrBits(final ByteBuffer header, final int size) throws IOException { 997 final int areAllDefined = getUnsignedByte(header); 998 final BitSet bits; 999 if (areAllDefined != 0) { 1000 bits = new BitSet(size); 1001 for (int i = 0; i < size; i++) { 1002 bits.set(i, true); 1003 } 1004 } else { 1005 bits = readBits(header, size); 1006 } 1007 return bits; 1008 } 1009 1010 private void readArchiveProperties(final ByteBuffer input) throws IOException { 1011 // FIXME: the reference implementation just throws them away? 1012 int nid = getUnsignedByte(input); 1013 while (nid != NID.kEnd) { 1014 final long propertySize = readUint64(input); 1015 final byte[] property = new byte[(int) propertySize]; 1016 get(input, property); 1017 nid = getUnsignedByte(input); 1018 } 1019 } 1020 1021 private BitSet readBits(final ByteBuffer header, final int size) throws IOException { 1022 final BitSet bits = new BitSet(size); 1023 int mask = 0; 1024 int cache = 0; 1025 for (int i = 0; i < size; i++) { 1026 if (mask == 0) { 1027 mask = 0x80; 1028 cache = getUnsignedByte(header); 1029 } 1030 bits.set(i, (cache & mask) != 0); 1031 mask >>>= 1; 1032 } 1033 return bits; 1034 } 1035 1036 private ByteBuffer readEncodedHeader(final ByteBuffer header, final Archive archive, 1037 final byte[] password) throws IOException { 1038 final int pos = header.position(); 1039 final ArchiveStatistics stats = new ArchiveStatistics(); 1040 sanityCheckStreamsInfo(header, stats); 1041 stats.assertValidity(options.getMaxMemoryLimitInKb()); 1042 header.position(pos); 1043 1044 readStreamsInfo(header, archive); 1045 1046 if (archive.folders == null || archive.folders.length == 0) { 1047 throw new IOException("no folders, can't read encoded header"); 1048 } 1049 if (archive.packSizes == null || archive.packSizes.length == 0) { 1050 throw new IOException("no packed streams, can't read encoded header"); 1051 } 1052 1053 // FIXME: merge with buildDecodingStream()/buildDecoderStack() at some stage? 1054 final Folder folder = archive.folders[0]; 1055 final int firstPackStreamIndex = 0; 1056 final long folderOffset = SIGNATURE_HEADER_SIZE + archive.packPos + 1057 0; 1058 1059 channel.position(folderOffset); 1060 InputStream inputStreamStack = new BoundedSeekableByteChannelInputStream(channel, 1061 archive.packSizes[firstPackStreamIndex]); 1062 for (final Coder coder : folder.getOrderedCoders()) { 1063 if (coder.numInStreams != 1 || coder.numOutStreams != 1) { 1064 throw new IOException("Multi input/output stream coders are not yet supported"); 1065 } 1066 inputStreamStack = Coders.addDecoder(fileName, inputStreamStack, //NOSONAR 1067 folder.getUnpackSizeForCoder(coder), coder, password, options.getMaxMemoryLimitInKb()); 1068 } 1069 if (folder.hasCrc) { 1070 inputStreamStack = new CRC32VerifyingInputStream(inputStreamStack, 1071 folder.getUnpackSize(), folder.crc); 1072 } 1073 final int unpackSize = assertFitsIntoNonNegativeInt("unpackSize", folder.getUnpackSize()); 1074 final byte[] nextHeader = IOUtils.readRange(inputStreamStack, unpackSize); 1075 if (nextHeader.length < unpackSize) { 1076 throw new IOException("premature end of stream"); 1077 } 1078 inputStreamStack.close(); 1079 return ByteBuffer.wrap(nextHeader).order(ByteOrder.LITTLE_ENDIAN); 1080 } 1081 1082 private void readFilesInfo(final ByteBuffer header, final Archive archive) throws IOException { 1083 final int numFilesInt = (int) readUint64(header); 1084 final Map<Integer, SevenZArchiveEntry> fileMap = new LinkedHashMap<>(); 1085 BitSet isEmptyStream = null; 1086 BitSet isEmptyFile = null; 1087 BitSet isAnti = null; 1088 while (true) { 1089 final int propertyType = getUnsignedByte(header); 1090 if (propertyType == 0) { 1091 break; 1092 } 1093 final long size = readUint64(header); 1094 switch (propertyType) { 1095 case NID.kEmptyStream: { 1096 isEmptyStream = readBits(header, numFilesInt); 1097 break; 1098 } 1099 case NID.kEmptyFile: { 1100 isEmptyFile = readBits(header, isEmptyStream.cardinality()); 1101 break; 1102 } 1103 case NID.kAnti: { 1104 isAnti = readBits(header, isEmptyStream.cardinality()); 1105 break; 1106 } 1107 case NID.kName: { 1108 /* final int external = */ getUnsignedByte(header); 1109 final byte[] names = new byte[(int) (size - 1)]; 1110 final int namesLength = names.length; 1111 get(header, names); 1112 int nextFile = 0; 1113 int nextName = 0; 1114 for (int i = 0; i < namesLength; i += 2) { 1115 if (names[i] == 0 && names[i + 1] == 0) { 1116 checkEntryIsInitialized(fileMap, nextFile); 1117 fileMap.get(nextFile).setName(new String(names, nextName, i - nextName, UTF_16LE)); 1118 nextName = i + 2; 1119 nextFile++; 1120 } 1121 } 1122 if (nextName != namesLength || nextFile != numFilesInt) { 1123 throw new IOException("Error parsing file names"); 1124 } 1125 break; 1126 } 1127 case NID.kCTime: { 1128 final BitSet timesDefined = readAllOrBits(header, numFilesInt); 1129 /* final int external = */ getUnsignedByte(header); 1130 for (int i = 0; i < numFilesInt; i++) { 1131 checkEntryIsInitialized(fileMap, i); 1132 final SevenZArchiveEntry entryAtIndex = fileMap.get(i); 1133 entryAtIndex.setHasCreationDate(timesDefined.get(i)); 1134 if (entryAtIndex.getHasCreationDate()) { 1135 entryAtIndex.setCreationDate(getLong(header)); 1136 } 1137 } 1138 break; 1139 } 1140 case NID.kATime: { 1141 final BitSet timesDefined = readAllOrBits(header, numFilesInt); 1142 /* final int external = */ getUnsignedByte(header); 1143 for (int i = 0; i < numFilesInt; i++) { 1144 checkEntryIsInitialized(fileMap, i); 1145 final SevenZArchiveEntry entryAtIndex = fileMap.get(i); 1146 entryAtIndex.setHasAccessDate(timesDefined.get(i)); 1147 if (entryAtIndex.getHasAccessDate()) { 1148 entryAtIndex.setAccessDate(getLong(header)); 1149 } 1150 } 1151 break; 1152 } 1153 case NID.kMTime: { 1154 final BitSet timesDefined = readAllOrBits(header, numFilesInt); 1155 /* final int external = */ getUnsignedByte(header); 1156 for (int i = 0; i < numFilesInt; i++) { 1157 checkEntryIsInitialized(fileMap, i); 1158 final SevenZArchiveEntry entryAtIndex = fileMap.get(i); 1159 entryAtIndex.setHasLastModifiedDate(timesDefined.get(i)); 1160 if (entryAtIndex.getHasLastModifiedDate()) { 1161 entryAtIndex.setLastModifiedDate(getLong(header)); 1162 } 1163 } 1164 break; 1165 } 1166 case NID.kWinAttributes: { 1167 final BitSet attributesDefined = readAllOrBits(header, numFilesInt); 1168 /* final int external = */ getUnsignedByte(header); 1169 for (int i = 0; i < numFilesInt; i++) { 1170 checkEntryIsInitialized(fileMap, i); 1171 final SevenZArchiveEntry entryAtIndex = fileMap.get(i); 1172 entryAtIndex.setHasWindowsAttributes(attributesDefined.get(i)); 1173 if (entryAtIndex.getHasWindowsAttributes()) { 1174 entryAtIndex.setWindowsAttributes(getInt(header)); 1175 } 1176 } 1177 break; 1178 } 1179 case NID.kDummy: { 1180 // 7z 9.20 asserts the content is all zeros and ignores the property 1181 // Compress up to 1.8.1 would throw an exception, now we ignore it (see COMPRESS-287 1182 1183 skipBytesFully(header, size); 1184 break; 1185 } 1186 1187 default: { 1188 // Compress up to 1.8.1 would throw an exception, now we ignore it (see COMPRESS-287 1189 skipBytesFully(header, size); 1190 break; 1191 } 1192 } 1193 } 1194 int nonEmptyFileCounter = 0; 1195 int emptyFileCounter = 0; 1196 for (int i = 0; i < numFilesInt; i++) { 1197 final SevenZArchiveEntry entryAtIndex = fileMap.get(i); 1198 if (entryAtIndex == null) { 1199 continue; 1200 } 1201 entryAtIndex.setHasStream(isEmptyStream == null || !isEmptyStream.get(i)); 1202 if (entryAtIndex.hasStream()) { 1203 if (archive.subStreamsInfo == null) { 1204 throw new IOException("Archive contains file with streams but no subStreamsInfo"); 1205 } 1206 entryAtIndex.setDirectory(false); 1207 entryAtIndex.setAntiItem(false); 1208 entryAtIndex.setHasCrc(archive.subStreamsInfo.hasCrc.get(nonEmptyFileCounter)); 1209 entryAtIndex.setCrcValue(archive.subStreamsInfo.crcs[nonEmptyFileCounter]); 1210 entryAtIndex.setSize(archive.subStreamsInfo.unpackSizes[nonEmptyFileCounter]); 1211 if (entryAtIndex.getSize() < 0) { 1212 throw new IOException("broken archive, entry with negative size"); 1213 } 1214 ++nonEmptyFileCounter; 1215 } else { 1216 entryAtIndex.setDirectory(isEmptyFile == null || !isEmptyFile.get(emptyFileCounter)); 1217 entryAtIndex.setAntiItem(isAnti != null && isAnti.get(emptyFileCounter)); 1218 entryAtIndex.setHasCrc(false); 1219 entryAtIndex.setSize(0); 1220 ++emptyFileCounter; 1221 } 1222 } 1223 archive.files = fileMap.values().stream().filter(Objects::nonNull).toArray(SevenZArchiveEntry[]::new); 1224 calculateStreamMap(archive); 1225 } 1226 1227 private Folder readFolder(final ByteBuffer header) throws IOException { 1228 final Folder folder = new Folder(); 1229 1230 final long numCoders = readUint64(header); 1231 final Coder[] coders = new Coder[(int) numCoders]; 1232 long totalInStreams = 0; 1233 long totalOutStreams = 0; 1234 for (int i = 0; i < coders.length; i++) { 1235 coders[i] = new Coder(); 1236 final int bits = getUnsignedByte(header); 1237 final int idSize = bits & 0xf; 1238 final boolean isSimple = (bits & 0x10) == 0; 1239 final boolean hasAttributes = (bits & 0x20) != 0; 1240 final boolean moreAlternativeMethods = (bits & 0x80) != 0; 1241 1242 coders[i].decompressionMethodId = new byte[idSize]; 1243 get(header, coders[i].decompressionMethodId); 1244 if (isSimple) { 1245 coders[i].numInStreams = 1; 1246 coders[i].numOutStreams = 1; 1247 } else { 1248 coders[i].numInStreams = readUint64(header); 1249 coders[i].numOutStreams = readUint64(header); 1250 } 1251 totalInStreams += coders[i].numInStreams; 1252 totalOutStreams += coders[i].numOutStreams; 1253 if (hasAttributes) { 1254 final long propertiesSize = readUint64(header); 1255 coders[i].properties = new byte[(int) propertiesSize]; 1256 get(header, coders[i].properties); 1257 } 1258 // would need to keep looping as above: 1259 if (moreAlternativeMethods) { 1260 throw new IOException("Alternative methods are unsupported, please report. " + // NOSONAR 1261 "The reference implementation doesn't support them either."); 1262 } 1263 } 1264 folder.coders = coders; 1265 folder.totalInputStreams = totalInStreams; 1266 folder.totalOutputStreams = totalOutStreams; 1267 1268 final long numBindPairs = totalOutStreams - 1; 1269 final BindPair[] bindPairs = new BindPair[(int) numBindPairs]; 1270 for (int i = 0; i < bindPairs.length; i++) { 1271 bindPairs[i] = new BindPair(); 1272 bindPairs[i].inIndex = readUint64(header); 1273 bindPairs[i].outIndex = readUint64(header); 1274 } 1275 folder.bindPairs = bindPairs; 1276 1277 final long numPackedStreams = totalInStreams - numBindPairs; 1278 final long[] packedStreams = new long[(int) numPackedStreams]; 1279 if (numPackedStreams == 1) { 1280 int i; 1281 for (i = 0; i < (int) totalInStreams; i++) { 1282 if (folder.findBindPairForInStream(i) < 0) { 1283 break; 1284 } 1285 } 1286 packedStreams[0] = i; 1287 } else { 1288 for (int i = 0; i < (int) numPackedStreams; i++) { 1289 packedStreams[i] = readUint64(header); 1290 } 1291 } 1292 folder.packedStreams = packedStreams; 1293 1294 return folder; 1295 } 1296 1297 private void readFully(final ByteBuffer buf) throws IOException { 1298 buf.rewind(); 1299 IOUtils.readFully(channel, buf); 1300 buf.flip(); 1301 } 1302 1303 private void readHeader(final ByteBuffer header, final Archive archive) throws IOException { 1304 final int pos = header.position(); 1305 final ArchiveStatistics stats = sanityCheckAndCollectStatistics(header); 1306 stats.assertValidity(options.getMaxMemoryLimitInKb()); 1307 header.position(pos); 1308 1309 int nid = getUnsignedByte(header); 1310 1311 if (nid == NID.kArchiveProperties) { 1312 readArchiveProperties(header); 1313 nid = getUnsignedByte(header); 1314 } 1315 1316 if (nid == NID.kAdditionalStreamsInfo) { 1317 throw new IOException("Additional streams unsupported"); 1318 //nid = getUnsignedByte(header); 1319 } 1320 1321 if (nid == NID.kMainStreamsInfo) { 1322 readStreamsInfo(header, archive); 1323 nid = getUnsignedByte(header); 1324 } 1325 1326 if (nid == NID.kFilesInfo) { 1327 readFilesInfo(header, archive); 1328 nid = getUnsignedByte(header); 1329 } 1330 } 1331 1332 private Archive readHeaders(final byte[] password) throws IOException { 1333 final ByteBuffer buf = ByteBuffer.allocate(12 /* signature + 2 bytes version + 4 bytes CRC */) 1334 .order(ByteOrder.LITTLE_ENDIAN); 1335 readFully(buf); 1336 final byte[] signature = new byte[6]; 1337 buf.get(signature); 1338 if (!Arrays.equals(signature, sevenZSignature)) { 1339 throw new IOException("Bad 7z signature"); 1340 } 1341 // 7zFormat.txt has it wrong - it's first major then minor 1342 final byte archiveVersionMajor = buf.get(); 1343 final byte archiveVersionMinor = buf.get(); 1344 if (archiveVersionMajor != 0) { 1345 throw new IOException(String.format("Unsupported 7z version (%d,%d)", 1346 archiveVersionMajor, archiveVersionMinor)); 1347 } 1348 1349 boolean headerLooksValid = false; // See https://www.7-zip.org/recover.html - "There is no correct End Header at the end of archive" 1350 final long startHeaderCrc = 0xffffFFFFL & buf.getInt(); 1351 if (startHeaderCrc == 0) { 1352 // This is an indication of a corrupt header - peek the next 20 bytes 1353 final long currentPosition = channel.position(); 1354 final ByteBuffer peekBuf = ByteBuffer.allocate(20); 1355 readFully(peekBuf); 1356 channel.position(currentPosition); 1357 // Header invalid if all data is 0 1358 while (peekBuf.hasRemaining()) { 1359 if (peekBuf.get()!=0) { 1360 headerLooksValid = true; 1361 break; 1362 } 1363 } 1364 } else { 1365 headerLooksValid = true; 1366 } 1367 1368 if (headerLooksValid) { 1369 return initializeArchive(readStartHeader(startHeaderCrc), password, true); 1370 } 1371 // No valid header found - probably first file of multipart archive was removed too early. Scan for end header. 1372 if (options.getTryToRecoverBrokenArchives()) { 1373 return tryToLocateEndHeader(password); 1374 } 1375 throw new IOException("archive seems to be invalid.\nYou may want to retry and enable the" 1376 + " tryToRecoverBrokenArchives if the archive could be a multi volume archive that has been closed" 1377 + " prematurely."); 1378 } 1379 1380 private void readPackInfo(final ByteBuffer header, final Archive archive) throws IOException { 1381 archive.packPos = readUint64(header); 1382 final int numPackStreamsInt = (int) readUint64(header); 1383 int nid = getUnsignedByte(header); 1384 if (nid == NID.kSize) { 1385 archive.packSizes = new long[numPackStreamsInt]; 1386 for (int i = 0; i < archive.packSizes.length; i++) { 1387 archive.packSizes[i] = readUint64(header); 1388 } 1389 nid = getUnsignedByte(header); 1390 } 1391 1392 if (nid == NID.kCRC) { 1393 archive.packCrcsDefined = readAllOrBits(header, numPackStreamsInt); 1394 archive.packCrcs = new long[numPackStreamsInt]; 1395 for (int i = 0; i < numPackStreamsInt; i++) { 1396 if (archive.packCrcsDefined.get(i)) { 1397 archive.packCrcs[i] = 0xffffFFFFL & getInt(header); 1398 } 1399 } 1400 1401 nid = getUnsignedByte(header); 1402 } 1403 } 1404 1405 private StartHeader readStartHeader(final long startHeaderCrc) throws IOException { 1406 final StartHeader startHeader = new StartHeader(); 1407 // using Stream rather than ByteBuffer for the benefit of the 1408 // built-in CRC check 1409 try (DataInputStream dataInputStream = new DataInputStream(new CRC32VerifyingInputStream( 1410 new BoundedSeekableByteChannelInputStream(channel, 20), 20, startHeaderCrc))) { 1411 startHeader.nextHeaderOffset = Long.reverseBytes(dataInputStream.readLong()); 1412 if (startHeader.nextHeaderOffset < 0 1413 || startHeader.nextHeaderOffset + SIGNATURE_HEADER_SIZE > channel.size()) { 1414 throw new IOException("nextHeaderOffset is out of bounds"); 1415 } 1416 1417 startHeader.nextHeaderSize = Long.reverseBytes(dataInputStream.readLong()); 1418 final long nextHeaderEnd = startHeader.nextHeaderOffset + startHeader.nextHeaderSize; 1419 if (nextHeaderEnd < startHeader.nextHeaderOffset 1420 || nextHeaderEnd + SIGNATURE_HEADER_SIZE > channel.size()) { 1421 throw new IOException("nextHeaderSize is out of bounds"); 1422 } 1423 1424 startHeader.nextHeaderCrc = 0xffffFFFFL & Integer.reverseBytes(dataInputStream.readInt()); 1425 1426 return startHeader; 1427 } 1428 } 1429 1430 private void readStreamsInfo(final ByteBuffer header, final Archive archive) throws IOException { 1431 int nid = getUnsignedByte(header); 1432 1433 if (nid == NID.kPackInfo) { 1434 readPackInfo(header, archive); 1435 nid = getUnsignedByte(header); 1436 } 1437 1438 if (nid == NID.kUnpackInfo) { 1439 readUnpackInfo(header, archive); 1440 nid = getUnsignedByte(header); 1441 } else { 1442 // archive without unpack/coders info 1443 archive.folders = Folder.EMPTY_FOLDER_ARRAY; 1444 } 1445 1446 if (nid == NID.kSubStreamsInfo) { 1447 readSubStreamsInfo(header, archive); 1448 nid = getUnsignedByte(header); 1449 } 1450 } 1451 1452 private void readSubStreamsInfo(final ByteBuffer header, final Archive archive) throws IOException { 1453 for (final Folder folder : archive.folders) { 1454 folder.numUnpackSubStreams = 1; 1455 } 1456 long unpackStreamsCount = archive.folders.length; 1457 1458 int nid = getUnsignedByte(header); 1459 if (nid == NID.kNumUnpackStream) { 1460 unpackStreamsCount = 0; 1461 for (final Folder folder : archive.folders) { 1462 final long numStreams = readUint64(header); 1463 folder.numUnpackSubStreams = (int) numStreams; 1464 unpackStreamsCount += numStreams; 1465 } 1466 nid = getUnsignedByte(header); 1467 } 1468 1469 final int totalUnpackStreams = (int) unpackStreamsCount; 1470 final SubStreamsInfo subStreamsInfo = new SubStreamsInfo(); 1471 subStreamsInfo.unpackSizes = new long[totalUnpackStreams]; 1472 subStreamsInfo.hasCrc = new BitSet(totalUnpackStreams); 1473 subStreamsInfo.crcs = new long[totalUnpackStreams]; 1474 1475 int nextUnpackStream = 0; 1476 for (final Folder folder : archive.folders) { 1477 if (folder.numUnpackSubStreams == 0) { 1478 continue; 1479 } 1480 long sum = 0; 1481 if (nid == NID.kSize) { 1482 for (int i = 0; i < folder.numUnpackSubStreams - 1; i++) { 1483 final long size = readUint64(header); 1484 subStreamsInfo.unpackSizes[nextUnpackStream++] = size; 1485 sum += size; 1486 } 1487 } 1488 if (sum > folder.getUnpackSize()) { 1489 throw new IOException("sum of unpack sizes of folder exceeds total unpack size"); 1490 } 1491 subStreamsInfo.unpackSizes[nextUnpackStream++] = folder.getUnpackSize() - sum; 1492 } 1493 if (nid == NID.kSize) { 1494 nid = getUnsignedByte(header); 1495 } 1496 1497 int numDigests = 0; 1498 for (final Folder folder : archive.folders) { 1499 if (folder.numUnpackSubStreams != 1 || !folder.hasCrc) { 1500 numDigests += folder.numUnpackSubStreams; 1501 } 1502 } 1503 1504 if (nid == NID.kCRC) { 1505 final BitSet hasMissingCrc = readAllOrBits(header, numDigests); 1506 final long[] missingCrcs = new long[numDigests]; 1507 for (int i = 0; i < numDigests; i++) { 1508 if (hasMissingCrc.get(i)) { 1509 missingCrcs[i] = 0xffffFFFFL & getInt(header); 1510 } 1511 } 1512 int nextCrc = 0; 1513 int nextMissingCrc = 0; 1514 for (final Folder folder: archive.folders) { 1515 if (folder.numUnpackSubStreams == 1 && folder.hasCrc) { 1516 subStreamsInfo.hasCrc.set(nextCrc, true); 1517 subStreamsInfo.crcs[nextCrc] = folder.crc; 1518 ++nextCrc; 1519 } else { 1520 for (int i = 0; i < folder.numUnpackSubStreams; i++) { 1521 subStreamsInfo.hasCrc.set(nextCrc, hasMissingCrc.get(nextMissingCrc)); 1522 subStreamsInfo.crcs[nextCrc] = missingCrcs[nextMissingCrc]; 1523 ++nextCrc; 1524 ++nextMissingCrc; 1525 } 1526 } 1527 } 1528 1529 nid = getUnsignedByte(header); 1530 } 1531 1532 archive.subStreamsInfo = subStreamsInfo; 1533 } 1534 1535 private void readUnpackInfo(final ByteBuffer header, final Archive archive) throws IOException { 1536 int nid = getUnsignedByte(header); 1537 final int numFoldersInt = (int) readUint64(header); 1538 final Folder[] folders = new Folder[numFoldersInt]; 1539 archive.folders = folders; 1540 /* final int external = */ getUnsignedByte(header); 1541 for (int i = 0; i < numFoldersInt; i++) { 1542 folders[i] = readFolder(header); 1543 } 1544 1545 nid = getUnsignedByte(header); 1546 for (final Folder folder : folders) { 1547 assertFitsIntoNonNegativeInt("totalOutputStreams", folder.totalOutputStreams); 1548 folder.unpackSizes = new long[(int) folder.totalOutputStreams]; 1549 for (int i = 0; i < folder.totalOutputStreams; i++) { 1550 folder.unpackSizes[i] = readUint64(header); 1551 } 1552 } 1553 1554 nid = getUnsignedByte(header); 1555 if (nid == NID.kCRC) { 1556 final BitSet crcsDefined = readAllOrBits(header, numFoldersInt); 1557 for (int i = 0; i < numFoldersInt; i++) { 1558 if (crcsDefined.get(i)) { 1559 folders[i].hasCrc = true; 1560 folders[i].crc = 0xffffFFFFL & getInt(header); 1561 } else { 1562 folders[i].hasCrc = false; 1563 } 1564 } 1565 1566 nid = getUnsignedByte(header); 1567 } 1568 } 1569 1570 /** 1571 * Discard any queued streams/ folder stream, and reopen the current folder input stream. 1572 * 1573 * @param folderIndex the index of the folder to reopen 1574 * @param file the 7z entry to read 1575 * @throws IOException if exceptions occur when reading the 7z file 1576 */ 1577 private void reopenFolderInputStream(final int folderIndex, final SevenZArchiveEntry file) throws IOException { 1578 deferredBlockStreams.clear(); 1579 if (currentFolderInputStream != null) { 1580 currentFolderInputStream.close(); 1581 currentFolderInputStream = null; 1582 } 1583 final Folder folder = archive.folders[folderIndex]; 1584 final int firstPackStreamIndex = archive.streamMap.folderFirstPackStreamIndex[folderIndex]; 1585 final long folderOffset = SIGNATURE_HEADER_SIZE + archive.packPos + 1586 archive.streamMap.packStreamOffsets[firstPackStreamIndex]; 1587 1588 currentFolderInputStream = buildDecoderStack(folder, folderOffset, firstPackStreamIndex, file); 1589 } 1590 1591 private ArchiveStatistics sanityCheckAndCollectStatistics(final ByteBuffer header) 1592 throws IOException { 1593 final ArchiveStatistics stats = new ArchiveStatistics(); 1594 1595 int nid = getUnsignedByte(header); 1596 1597 if (nid == NID.kArchiveProperties) { 1598 sanityCheckArchiveProperties(header); 1599 nid = getUnsignedByte(header); 1600 } 1601 1602 if (nid == NID.kAdditionalStreamsInfo) { 1603 throw new IOException("Additional streams unsupported"); 1604 //nid = getUnsignedByte(header); 1605 } 1606 1607 if (nid == NID.kMainStreamsInfo) { 1608 sanityCheckStreamsInfo(header, stats); 1609 nid = getUnsignedByte(header); 1610 } 1611 1612 if (nid == NID.kFilesInfo) { 1613 sanityCheckFilesInfo(header, stats); 1614 nid = getUnsignedByte(header); 1615 } 1616 1617 if (nid != NID.kEnd) { 1618 throw new IOException("Badly terminated header, found " + nid); 1619 } 1620 1621 return stats; 1622 } 1623 1624 private void sanityCheckArchiveProperties(final ByteBuffer header) 1625 throws IOException { 1626 int nid = getUnsignedByte(header); 1627 while (nid != NID.kEnd) { 1628 final int propertySize = 1629 assertFitsIntoNonNegativeInt("propertySize", readUint64(header)); 1630 if (skipBytesFully(header, propertySize) < propertySize) { 1631 throw new IOException("invalid property size"); 1632 } 1633 nid = getUnsignedByte(header); 1634 } 1635 } 1636 1637 private void sanityCheckFilesInfo(final ByteBuffer header, final ArchiveStatistics stats) throws IOException { 1638 stats.numberOfEntries = assertFitsIntoNonNegativeInt("numFiles", readUint64(header)); 1639 1640 int emptyStreams = -1; 1641 while (true) { 1642 final int propertyType = getUnsignedByte(header); 1643 if (propertyType == 0) { 1644 break; 1645 } 1646 final long size = readUint64(header); 1647 switch (propertyType) { 1648 case NID.kEmptyStream: { 1649 emptyStreams = readBits(header, stats.numberOfEntries).cardinality(); 1650 break; 1651 } 1652 case NID.kEmptyFile: { 1653 if (emptyStreams == -1) { 1654 throw new IOException("Header format error: kEmptyStream must appear before kEmptyFile"); 1655 } 1656 readBits(header, emptyStreams); 1657 break; 1658 } 1659 case NID.kAnti: { 1660 if (emptyStreams == -1) { 1661 throw new IOException("Header format error: kEmptyStream must appear before kAnti"); 1662 } 1663 readBits(header, emptyStreams); 1664 break; 1665 } 1666 case NID.kName: { 1667 final int external = getUnsignedByte(header); 1668 if (external != 0) { 1669 throw new IOException("Not implemented"); 1670 } 1671 final int namesLength = 1672 assertFitsIntoNonNegativeInt("file names length", size - 1); 1673 if ((namesLength & 1) != 0) { 1674 throw new IOException("File names length invalid"); 1675 } 1676 1677 int filesSeen = 0; 1678 for (int i = 0; i < namesLength; i += 2) { 1679 final char c = getChar(header); 1680 if (c == 0) { 1681 filesSeen++; 1682 } 1683 } 1684 if (filesSeen != stats.numberOfEntries) { 1685 throw new IOException("Invalid number of file names (" + filesSeen + " instead of " 1686 + stats.numberOfEntries + ")"); 1687 } 1688 break; 1689 } 1690 case NID.kCTime: { 1691 final int timesDefined = readAllOrBits(header, stats.numberOfEntries) 1692 .cardinality(); 1693 final int external = getUnsignedByte(header); 1694 if (external != 0) { 1695 throw new IOException("Not implemented"); 1696 } 1697 if (skipBytesFully(header, 8 * timesDefined) < 8 * timesDefined) { 1698 throw new IOException("invalid creation dates size"); 1699 } 1700 break; 1701 } 1702 case NID.kATime: { 1703 final int timesDefined = readAllOrBits(header, stats.numberOfEntries) 1704 .cardinality(); 1705 final int external = getUnsignedByte(header); 1706 if (external != 0) { 1707 throw new IOException("Not implemented"); 1708 } 1709 if (skipBytesFully(header, 8 * timesDefined) < 8 * timesDefined) { 1710 throw new IOException("invalid access dates size"); 1711 } 1712 break; 1713 } 1714 case NID.kMTime: { 1715 final int timesDefined = readAllOrBits(header, stats.numberOfEntries) 1716 .cardinality(); 1717 final int external = getUnsignedByte(header); 1718 if (external != 0) { 1719 throw new IOException("Not implemented"); 1720 } 1721 if (skipBytesFully(header, 8 * timesDefined) < 8 * timesDefined) { 1722 throw new IOException("invalid modification dates size"); 1723 } 1724 break; 1725 } 1726 case NID.kWinAttributes: { 1727 final int attributesDefined = readAllOrBits(header, stats.numberOfEntries) 1728 .cardinality(); 1729 final int external = getUnsignedByte(header); 1730 if (external != 0) { 1731 throw new IOException("Not implemented"); 1732 } 1733 if (skipBytesFully(header, 4 * attributesDefined) < 4 * attributesDefined) { 1734 throw new IOException("invalid windows attributes size"); 1735 } 1736 break; 1737 } 1738 case NID.kStartPos: { 1739 throw new IOException("kStartPos is unsupported, please report"); 1740 } 1741 case NID.kDummy: { 1742 // 7z 9.20 asserts the content is all zeros and ignores the property 1743 // Compress up to 1.8.1 would throw an exception, now we ignore it (see COMPRESS-287 1744 1745 if (skipBytesFully(header, size) < size) { 1746 throw new IOException("Incomplete kDummy property"); 1747 } 1748 break; 1749 } 1750 1751 default: { 1752 // Compress up to 1.8.1 would throw an exception, now we ignore it (see COMPRESS-287 1753 if (skipBytesFully(header, size) < size) { 1754 throw new IOException("Incomplete property of type " + propertyType); 1755 } 1756 break; 1757 } 1758 } 1759 } 1760 stats.numberOfEntriesWithStream = stats.numberOfEntries - Math.max(emptyStreams, 0); 1761 } 1762 1763 private int sanityCheckFolder(final ByteBuffer header, final ArchiveStatistics stats) 1764 throws IOException { 1765 1766 final int numCoders = assertFitsIntoNonNegativeInt("numCoders", readUint64(header)); 1767 if (numCoders == 0) { 1768 throw new IOException("Folder without coders"); 1769 } 1770 stats.numberOfCoders += numCoders; 1771 1772 long totalOutStreams = 0; 1773 long totalInStreams = 0; 1774 for (int i = 0; i < numCoders; i++) { 1775 final int bits = getUnsignedByte(header); 1776 final int idSize = bits & 0xf; 1777 get(header, new byte[idSize]); 1778 1779 final boolean isSimple = (bits & 0x10) == 0; 1780 final boolean hasAttributes = (bits & 0x20) != 0; 1781 final boolean moreAlternativeMethods = (bits & 0x80) != 0; 1782 if (moreAlternativeMethods) { 1783 throw new IOException("Alternative methods are unsupported, please report. " + // NOSONAR 1784 "The reference implementation doesn't support them either."); 1785 } 1786 1787 if (isSimple) { 1788 totalInStreams++; 1789 totalOutStreams++; 1790 } else { 1791 totalInStreams += 1792 assertFitsIntoNonNegativeInt("numInStreams", readUint64(header)); 1793 totalOutStreams += 1794 assertFitsIntoNonNegativeInt("numOutStreams", readUint64(header)); 1795 } 1796 1797 if (hasAttributes) { 1798 final int propertiesSize = 1799 assertFitsIntoNonNegativeInt("propertiesSize", readUint64(header)); 1800 if (skipBytesFully(header, propertiesSize) < propertiesSize) { 1801 throw new IOException("invalid propertiesSize in folder"); 1802 } 1803 } 1804 } 1805 assertFitsIntoNonNegativeInt("totalInStreams", totalInStreams); 1806 assertFitsIntoNonNegativeInt("totalOutStreams", totalOutStreams); 1807 stats.numberOfOutStreams += totalOutStreams; 1808 stats.numberOfInStreams += totalInStreams; 1809 1810 if (totalOutStreams == 0) { 1811 throw new IOException("Total output streams can't be 0"); 1812 } 1813 1814 final int numBindPairs = 1815 assertFitsIntoNonNegativeInt("numBindPairs", totalOutStreams - 1); 1816 if (totalInStreams < numBindPairs) { 1817 throw new IOException("Total input streams can't be less than the number of bind pairs"); 1818 } 1819 final BitSet inStreamsBound = new BitSet((int) totalInStreams); 1820 for (int i = 0; i < numBindPairs; i++) { 1821 final int inIndex = assertFitsIntoNonNegativeInt("inIndex", readUint64(header)); 1822 if (totalInStreams <= inIndex) { 1823 throw new IOException("inIndex is bigger than number of inStreams"); 1824 } 1825 inStreamsBound.set(inIndex); 1826 final int outIndex = assertFitsIntoNonNegativeInt("outIndex", readUint64(header)); 1827 if (totalOutStreams <= outIndex) { 1828 throw new IOException("outIndex is bigger than number of outStreams"); 1829 } 1830 } 1831 1832 final int numPackedStreams = 1833 assertFitsIntoNonNegativeInt("numPackedStreams", totalInStreams - numBindPairs); 1834 1835 if (numPackedStreams == 1) { 1836 if (inStreamsBound.nextClearBit(0) == -1) { 1837 throw new IOException("Couldn't find stream's bind pair index"); 1838 } 1839 } else { 1840 for (int i = 0; i < numPackedStreams; i++) { 1841 final int packedStreamIndex = 1842 assertFitsIntoNonNegativeInt("packedStreamIndex", readUint64(header)); 1843 if (packedStreamIndex >= totalInStreams) { 1844 throw new IOException("packedStreamIndex is bigger than number of totalInStreams"); 1845 } 1846 } 1847 } 1848 1849 return (int) totalOutStreams; 1850 } 1851 1852 private void sanityCheckPackInfo(final ByteBuffer header, final ArchiveStatistics stats) throws IOException { 1853 final long packPos = readUint64(header); 1854 if (packPos < 0 || SIGNATURE_HEADER_SIZE + packPos > channel.size() 1855 || SIGNATURE_HEADER_SIZE + packPos < 0) { 1856 throw new IOException("packPos (" + packPos + ") is out of range"); 1857 } 1858 final long numPackStreams = readUint64(header); 1859 stats.numberOfPackedStreams = assertFitsIntoNonNegativeInt("numPackStreams", numPackStreams); 1860 int nid = getUnsignedByte(header); 1861 if (nid == NID.kSize) { 1862 long totalPackSizes = 0; 1863 for (int i = 0; i < stats.numberOfPackedStreams; i++) { 1864 final long packSize = readUint64(header); 1865 totalPackSizes += packSize; 1866 final long endOfPackStreams = SIGNATURE_HEADER_SIZE + packPos + totalPackSizes; 1867 if (packSize < 0 1868 || endOfPackStreams > channel.size() 1869 || endOfPackStreams < packPos) { 1870 throw new IOException("packSize (" + packSize + ") is out of range"); 1871 } 1872 } 1873 nid = getUnsignedByte(header); 1874 } 1875 1876 if (nid == NID.kCRC) { 1877 final int crcsDefined = readAllOrBits(header, stats.numberOfPackedStreams) 1878 .cardinality(); 1879 if (skipBytesFully(header, 4 * crcsDefined) < 4 * crcsDefined) { 1880 throw new IOException("invalid number of CRCs in PackInfo"); 1881 } 1882 nid = getUnsignedByte(header); 1883 } 1884 1885 if (nid != NID.kEnd) { 1886 throw new IOException("Badly terminated PackInfo (" + nid + ")"); 1887 } 1888 } 1889 1890 private void sanityCheckStreamsInfo(final ByteBuffer header, 1891 final ArchiveStatistics stats) throws IOException { 1892 int nid = getUnsignedByte(header); 1893 1894 if (nid == NID.kPackInfo) { 1895 sanityCheckPackInfo(header, stats); 1896 nid = getUnsignedByte(header); 1897 } 1898 1899 if (nid == NID.kUnpackInfo) { 1900 sanityCheckUnpackInfo(header, stats); 1901 nid = getUnsignedByte(header); 1902 } 1903 1904 if (nid == NID.kSubStreamsInfo) { 1905 sanityCheckSubStreamsInfo(header, stats); 1906 nid = getUnsignedByte(header); 1907 } 1908 1909 if (nid != NID.kEnd) { 1910 throw new IOException("Badly terminated StreamsInfo"); 1911 } 1912 } 1913 1914 private void sanityCheckSubStreamsInfo(final ByteBuffer header, final ArchiveStatistics stats) throws IOException { 1915 1916 int nid = getUnsignedByte(header); 1917 final List<Integer> numUnpackSubStreamsPerFolder = new LinkedList<>(); 1918 if (nid == NID.kNumUnpackStream) { 1919 for (int i = 0; i < stats.numberOfFolders; i++) { 1920 numUnpackSubStreamsPerFolder.add(assertFitsIntoNonNegativeInt("numStreams", readUint64(header))); 1921 } 1922 stats.numberOfUnpackSubStreams = numUnpackSubStreamsPerFolder.stream().mapToLong(Integer::longValue).sum(); 1923 nid = getUnsignedByte(header); 1924 } else { 1925 stats.numberOfUnpackSubStreams = stats.numberOfFolders; 1926 } 1927 1928 assertFitsIntoNonNegativeInt("totalUnpackStreams", stats.numberOfUnpackSubStreams); 1929 1930 if (nid == NID.kSize) { 1931 for (final int numUnpackSubStreams : numUnpackSubStreamsPerFolder) { 1932 if (numUnpackSubStreams == 0) { 1933 continue; 1934 } 1935 for (int i = 0; i < numUnpackSubStreams - 1; i++) { 1936 final long size = readUint64(header); 1937 if (size < 0) { 1938 throw new IOException("negative unpackSize"); 1939 } 1940 } 1941 } 1942 nid = getUnsignedByte(header); 1943 } 1944 1945 int numDigests = 0; 1946 if (numUnpackSubStreamsPerFolder.isEmpty()) { 1947 numDigests = stats.folderHasCrc == null ? stats.numberOfFolders 1948 : stats.numberOfFolders - stats.folderHasCrc.cardinality(); 1949 } else { 1950 int folderIdx = 0; 1951 for (final int numUnpackSubStreams : numUnpackSubStreamsPerFolder) { 1952 if (numUnpackSubStreams != 1 || stats.folderHasCrc == null 1953 || !stats.folderHasCrc.get(folderIdx++)) { 1954 numDigests += numUnpackSubStreams; 1955 } 1956 } 1957 } 1958 1959 if (nid == NID.kCRC) { 1960 assertFitsIntoNonNegativeInt("numDigests", numDigests); 1961 final int missingCrcs = readAllOrBits(header, numDigests) 1962 .cardinality(); 1963 if (skipBytesFully(header, 4 * missingCrcs) < 4 * missingCrcs) { 1964 throw new IOException("invalid number of missing CRCs in SubStreamInfo"); 1965 } 1966 nid = getUnsignedByte(header); 1967 } 1968 1969 if (nid != NID.kEnd) { 1970 throw new IOException("Badly terminated SubStreamsInfo"); 1971 } 1972 } 1973 1974 private void sanityCheckUnpackInfo(final ByteBuffer header, final ArchiveStatistics stats) 1975 throws IOException { 1976 int nid = getUnsignedByte(header); 1977 if (nid != NID.kFolder) { 1978 throw new IOException("Expected kFolder, got " + nid); 1979 } 1980 final long numFolders = readUint64(header); 1981 stats.numberOfFolders = assertFitsIntoNonNegativeInt("numFolders", numFolders); 1982 final int external = getUnsignedByte(header); 1983 if (external != 0) { 1984 throw new IOException("External unsupported"); 1985 } 1986 1987 final List<Integer> numberOfOutputStreamsPerFolder = new LinkedList<>(); 1988 for (int i = 0; i < stats.numberOfFolders; i++) { 1989 numberOfOutputStreamsPerFolder.add(sanityCheckFolder(header, stats)); 1990 } 1991 1992 final long totalNumberOfBindPairs = stats.numberOfOutStreams - stats.numberOfFolders; 1993 final long packedStreamsRequiredByFolders = stats.numberOfInStreams - totalNumberOfBindPairs; 1994 if (packedStreamsRequiredByFolders < stats.numberOfPackedStreams) { 1995 throw new IOException("archive doesn't contain enough packed streams"); 1996 } 1997 1998 nid = getUnsignedByte(header); 1999 if (nid != NID.kCodersUnpackSize) { 2000 throw new IOException("Expected kCodersUnpackSize, got " + nid); 2001 } 2002 2003 for (final int numberOfOutputStreams : numberOfOutputStreamsPerFolder) { 2004 for (int i = 0; i < numberOfOutputStreams; i++) { 2005 final long unpackSize = readUint64(header); 2006 if (unpackSize < 0) { 2007 throw new IllegalArgumentException("negative unpackSize"); 2008 } 2009 } 2010 } 2011 2012 nid = getUnsignedByte(header); 2013 if (nid == NID.kCRC) { 2014 stats.folderHasCrc = readAllOrBits(header, stats.numberOfFolders); 2015 final int crcsDefined = stats.folderHasCrc.cardinality(); 2016 if (skipBytesFully(header, 4 * crcsDefined) < 4 * crcsDefined) { 2017 throw new IOException("invalid number of CRCs in UnpackInfo"); 2018 } 2019 nid = getUnsignedByte(header); 2020 } 2021 2022 if (nid != NID.kEnd) { 2023 throw new IOException("Badly terminated UnpackInfo"); 2024 } 2025 } 2026 2027 /** 2028 * Skip all the entries if needed. 2029 * Entries need to be skipped when: 2030 * <p> 2031 * 1. it's a random access 2032 * 2. one of these 2 condition is meet : 2033 * <p> 2034 * 2.1 currentEntryIndex != entryIndex : this means there are some entries 2035 * to be skipped(currentEntryIndex < entryIndex) or the entry has already 2036 * been read(currentEntryIndex > entryIndex) 2037 * <p> 2038 * 2.2 currentEntryIndex == entryIndex && !hasCurrentEntryBeenRead: 2039 * if the entry to be read is the current entry, but some data of it has 2040 * been read before, then we need to reopen the stream of the folder and 2041 * skip all the entries before the current entries 2042 * 2043 * @param entryIndex the entry to be read 2044 * @param isInSameFolder are the entry to be read and the current entry in the same folder 2045 * @param folderIndex the index of the folder which contains the entry 2046 * @return true if there are entries actually skipped 2047 * @throws IOException there are exceptions when skipping entries 2048 * @since 1.21 2049 */ 2050 private boolean skipEntriesWhenNeeded(final int entryIndex, final boolean isInSameFolder, final int folderIndex) throws IOException { 2051 final SevenZArchiveEntry file = archive.files[entryIndex]; 2052 // if the entry to be read is the current entry, and the entry has not 2053 // been read yet, then there's nothing we need to do 2054 if (currentEntryIndex == entryIndex && !hasCurrentEntryBeenRead()) { 2055 return false; 2056 } 2057 2058 // 1. if currentEntryIndex < entryIndex : 2059 // this means there are some entries to be skipped(currentEntryIndex < entryIndex) 2060 // 2. if currentEntryIndex > entryIndex || (currentEntryIndex == entryIndex && hasCurrentEntryBeenRead) : 2061 // this means the entry has already been read before, and we need to reopen the 2062 // stream of the folder and skip all the entries before the current entries 2063 int filesToSkipStartIndex = archive.streamMap.folderFirstFileIndex[currentFolderIndex]; 2064 if (isInSameFolder) { 2065 if (currentEntryIndex < entryIndex) { 2066 // the entries between filesToSkipStartIndex and currentEntryIndex had already been skipped 2067 filesToSkipStartIndex = currentEntryIndex + 1; 2068 } else { 2069 // the entry is in the same folder of current entry, but it has already been read before, we need to reset 2070 // the position of the currentFolderInputStream to the beginning of folder, and then skip the files 2071 // from the start entry of the folder again 2072 reopenFolderInputStream(folderIndex, file); 2073 } 2074 } 2075 2076 for (int i = filesToSkipStartIndex; i < entryIndex; i++) { 2077 final SevenZArchiveEntry fileToSkip = archive.files[i]; 2078 InputStream fileStreamToSkip = new BoundedInputStream(currentFolderInputStream, fileToSkip.getSize()); 2079 if (fileToSkip.getHasCrc()) { 2080 fileStreamToSkip = new CRC32VerifyingInputStream(fileStreamToSkip, fileToSkip.getSize(), fileToSkip.getCrcValue()); 2081 } 2082 deferredBlockStreams.add(fileStreamToSkip); 2083 2084 // set the content methods as well, it equals to file.getContentMethods() because they are in same folder 2085 fileToSkip.setContentMethods(file.getContentMethods()); 2086 } 2087 return true; 2088 } 2089 2090 @Override 2091 public String toString() { 2092 return archive.toString(); 2093 } 2094 2095 private Archive tryToLocateEndHeader(final byte[] password) throws IOException { 2096 final ByteBuffer nidBuf = ByteBuffer.allocate(1); 2097 final long searchLimit = 1024L * 1024 * 1; 2098 // Main header, plus bytes that readStartHeader would read 2099 final long previousDataSize = channel.position() + 20; 2100 final long minPos; 2101 // Determine minimal position - can't start before current position 2102 if (channel.position() + searchLimit > channel.size()) { 2103 minPos = channel.position(); 2104 } else { 2105 minPos = channel.size() - searchLimit; 2106 } 2107 long pos = channel.size() - 1; 2108 // Loop: Try from end of archive 2109 while (pos > minPos) { 2110 pos--; 2111 channel.position(pos); 2112 nidBuf.rewind(); 2113 if (channel.read(nidBuf) < 1) { 2114 throw new EOFException(); 2115 } 2116 final int nid = nidBuf.array()[0]; 2117 // First indicator: Byte equals one of these header identifiers 2118 if (nid == NID.kEncodedHeader || nid == NID.kHeader) { 2119 try { 2120 // Try to initialize Archive structure from here 2121 final StartHeader startHeader = new StartHeader(); 2122 startHeader.nextHeaderOffset = pos - previousDataSize; 2123 startHeader.nextHeaderSize = channel.size() - pos; 2124 final Archive result = initializeArchive(startHeader, password, false); 2125 // Sanity check: There must be some data... 2126 if (result.packSizes.length > 0 && result.files.length > 0) { 2127 return result; 2128 } 2129 } catch (final Exception ignore) { 2130 // Wrong guess... 2131 } 2132 } 2133 } 2134 throw new IOException("Start header corrupt and unable to guess end header"); 2135 } 2136}