001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.compress.archivers.sevenz; 018 019import static java.nio.charset.StandardCharsets.UTF_16LE; 020 021import java.io.BufferedInputStream; 022import java.io.ByteArrayInputStream; 023import java.io.Closeable; 024import java.io.DataInputStream; 025import java.io.EOFException; 026import java.io.File; 027import java.io.FilterInputStream; 028import java.io.IOException; 029import java.io.InputStream; 030import java.nio.ByteBuffer; 031import java.nio.ByteOrder; 032import java.nio.channels.Channels; 033import java.nio.channels.SeekableByteChannel; 034import java.nio.file.Files; 035import java.nio.file.OpenOption; 036import java.nio.file.Path; 037import java.nio.file.StandardOpenOption; 038import java.util.ArrayList; 039import java.util.Arrays; 040import java.util.BitSet; 041import java.util.EnumSet; 042import java.util.LinkedHashMap; 043import java.util.LinkedList; 044import java.util.List; 045import java.util.Map; 046import java.util.Objects; 047import java.util.zip.CRC32; 048import java.util.zip.CheckedInputStream; 049 050import org.apache.commons.compress.MemoryLimitException; 051import org.apache.commons.compress.utils.BoundedInputStream; 052import org.apache.commons.compress.utils.ByteUtils; 053import org.apache.commons.compress.utils.CRC32VerifyingInputStream; 054import org.apache.commons.compress.utils.IOUtils; 055import org.apache.commons.compress.utils.InputStreamStatistics; 056import org.apache.commons.compress.utils.SeekableInMemoryByteChannel; 057import org.apache.commons.io.build.AbstractOrigin.ByteArrayOrigin; 058import org.apache.commons.io.build.AbstractStreamBuilder; 059 060/** 061 * Reads a 7z file, using SeekableByteChannel under the covers. 062 * <p> 063 * The 7z file format is a flexible container that can contain many compression and encryption types, but at the moment only only Copy, LZMA, LZMA2, BZIP2, 064 * Deflate and AES-256 + SHA-256 are supported. 065 * </p> 066 * <p> 067 * The format is very Windows/Intel specific, so it uses little-endian byte order, doesn't store user/group or permission bits, and represents times using NTFS 068 * timestamps (100 nanosecond units since 1 January 1601). Hence the official tools recommend against using it for backup purposes on *nix, and recommend 069 * .tar.7z or .tar.lzma or .tar.xz instead. 070 * </p> 071 * <p> 072 * Both the header and file contents may be compressed and/or encrypted. With both encrypted, neither file names nor file contents can be read, but the use of 073 * encryption isn't plausibly deniable. 074 * </p> 075 * <p> 076 * Multi volume archives can be read by concatenating the parts in correct order - either manually or by using {link 077 * org.apache.commons.compress.utils.MultiReadOnlySeekableByteChannel} for example. 078 * </p> 079 * 080 * @NotThreadSafe 081 * @since 1.6 082 */ 083public class SevenZFile implements Closeable { 084 085 private static final class ArchiveStatistics { 086 private int numberOfPackedStreams; 087 private long numberOfCoders; 088 private long numberOfOutStreams; 089 private long numberOfInStreams; 090 private long numberOfUnpackSubStreams; 091 private int numberOfFolders; 092 private BitSet folderHasCrc; 093 private int numberOfEntries; 094 private int numberOfEntriesWithStream; 095 096 void assertValidity(final int maxMemoryLimitInKb) throws IOException { 097 if (numberOfEntriesWithStream > 0 && numberOfFolders == 0) { 098 throw new IOException("archive with entries but no folders"); 099 } 100 if (numberOfEntriesWithStream > numberOfUnpackSubStreams) { 101 throw new IOException("archive doesn't contain enough substreams for entries"); 102 } 103 104 final long memoryNeededInKb = estimateSize() / 1024; 105 if (maxMemoryLimitInKb < memoryNeededInKb) { 106 throw new MemoryLimitException(memoryNeededInKb, maxMemoryLimitInKb); 107 } 108 } 109 110 private long bindPairSize() { 111 return 16; 112 } 113 114 private long coderSize() { 115 return 2 /* methodId is between 1 and four bytes currently, COPY and LZMA2 are the most common with 1 */ 116 + 16 + 4 /* properties, guess */ 117 ; 118 } 119 120 private long entrySize() { 121 return 100; /* real size depends on name length, everything without name is about 70 bytes */ 122 } 123 124 long estimateSize() { 125 final long lowerBound = 16L * numberOfPackedStreams /* packSizes, packCrcs in Archive */ 126 + numberOfPackedStreams / 8 /* packCrcsDefined in Archive */ 127 + numberOfFolders * folderSize() /* folders in Archive */ 128 + numberOfCoders * coderSize() /* coders in Folder */ 129 + (numberOfOutStreams - numberOfFolders) * bindPairSize() /* bindPairs in Folder */ 130 + 8L * (numberOfInStreams - numberOfOutStreams + numberOfFolders) /* packedStreams in Folder */ 131 + 8L * numberOfOutStreams /* unpackSizes in Folder */ 132 + numberOfEntries * entrySize() /* files in Archive */ 133 + streamMapSize(); 134 return 2 * lowerBound /* conservative guess */; 135 } 136 137 private long folderSize() { 138 return 30; /* nested arrays are accounted for separately */ 139 } 140 141 private long streamMapSize() { 142 return 8 * numberOfFolders /* folderFirstPackStreamIndex, folderFirstFileIndex */ 143 + 8 * numberOfPackedStreams /* packStreamOffsets */ 144 + 4 * numberOfEntries /* fileFolderIndex */ 145 ; 146 } 147 148 @Override 149 public String toString() { 150 return "Archive with " + numberOfEntries + " entries in " + numberOfFolders + " folders. Estimated size " + estimateSize() / 1024L + " kB."; 151 } 152 } 153 154 /** 155 * Builds new instances of {@link SevenZFile}. 156 * 157 * @since 1.26.0 158 */ 159 public static class Builder extends AbstractStreamBuilder<SevenZFile, Builder> { 160 161 static final int MEMORY_LIMIT_IN_KB = Integer.MAX_VALUE; 162 static final boolean USE_DEFAULTNAME_FOR_UNNAMED_ENTRIES = false; 163 static final boolean TRY_TO_RECOVER_BROKEN_ARCHIVES = false; 164 165 private SeekableByteChannel seekableByteChannel; 166 private String defaultName = DEFAULT_FILE_NAME; 167 private byte[] password; 168 private int maxMemoryLimitKb = MEMORY_LIMIT_IN_KB; 169 private boolean useDefaultNameForUnnamedEntries = USE_DEFAULTNAME_FOR_UNNAMED_ENTRIES; 170 private boolean tryToRecoverBrokenArchives = TRY_TO_RECOVER_BROKEN_ARCHIVES; 171 172 @SuppressWarnings("resource") // Caller closes 173 @Override 174 public SevenZFile get() throws IOException { 175 final SeekableByteChannel actualChannel; 176 final String actualDescription; 177 if (seekableByteChannel != null) { 178 actualChannel = seekableByteChannel; 179 actualDescription = defaultName; 180 } else if (checkOrigin() instanceof ByteArrayOrigin) { 181 actualChannel = new SeekableInMemoryByteChannel(checkOrigin().getByteArray()); 182 actualDescription = defaultName; 183 } else { 184 OpenOption[] openOptions = getOpenOptions(); 185 if (openOptions.length == 0) { 186 openOptions = new OpenOption[] { StandardOpenOption.READ }; 187 } 188 final Path path = getPath(); 189 actualChannel = Files.newByteChannel(path, openOptions); 190 actualDescription = path.toAbsolutePath().toString(); 191 } 192 final boolean closeOnError = seekableByteChannel != null; 193 return new SevenZFile(actualChannel, actualDescription, password, closeOnError, maxMemoryLimitKb, useDefaultNameForUnnamedEntries, 194 tryToRecoverBrokenArchives); 195 } 196 197 /** 198 * Sets the default name. 199 * 200 * @param defaultName the default name. 201 * @return this. 202 */ 203 public Builder setDefaultName(final String defaultName) { 204 this.defaultName = defaultName; 205 return this; 206 } 207 208 /** 209 * Sets the maximum amount of memory in kilobytes to use for parsing the archive and during extraction. 210 * <p> 211 * Not all codecs honor this setting. Currently only LZMA and LZMA2 are supported. 212 * </p> 213 * 214 * @param maxMemoryLimitKb the max memory limit in kilobytes. 215 * @return this. 216 */ 217 public Builder setMaxMemoryLimitKb(final int maxMemoryLimitKb) { 218 this.maxMemoryLimitKb = maxMemoryLimitKb; 219 return this; 220 } 221 222 /** 223 * Sets the password. 224 * 225 * @param password the password. 226 * @return this. 227 */ 228 public Builder setPassword(final byte[] password) { 229 this.password = password != null ? password.clone() : null; 230 return this; 231 } 232 233 /** 234 * Sets the password. 235 * 236 * @param password the password. 237 * @return this. 238 */ 239 public Builder setPassword(final char[] password) { 240 this.password = password != null ? AES256SHA256Decoder.utf16Decode(password.clone()) : null; 241 return this; 242 } 243 244 /** 245 * Sets the password. 246 * 247 * @param password the password. 248 * @return this. 249 */ 250 public Builder setPassword(final String password) { 251 this.password = password != null ? AES256SHA256Decoder.utf16Decode(password.toCharArray()) : null; 252 return this; 253 } 254 255 /** 256 * Sets the input channel. 257 * 258 * @param seekableByteChannel the input channel. 259 * @return this. 260 */ 261 public Builder setSeekableByteChannel(final SeekableByteChannel seekableByteChannel) { 262 this.seekableByteChannel = seekableByteChannel; 263 return this; 264 } 265 266 /** 267 * Sets whether {@link SevenZFile} will try to recover broken archives where the CRC of the file's metadata is 0. 268 * <p> 269 * This special kind of broken archive is encountered when mutli volume archives are closed prematurely. If you enable this option SevenZFile will trust 270 * data that looks as if it could contain metadata of an archive and allocate big amounts of memory. It is strongly recommended to not enable this 271 * option without setting {@link #setMaxMemoryLimitKb(int)} at the same time. 272 * </p> 273 * 274 * @param tryToRecoverBrokenArchives whether {@link SevenZFile} will try to recover broken archives where the CRC of the file's metadata is 0. 275 * @return this. 276 */ 277 public Builder setTryToRecoverBrokenArchives(final boolean tryToRecoverBrokenArchives) { 278 this.tryToRecoverBrokenArchives = tryToRecoverBrokenArchives; 279 return this; 280 } 281 282 /** 283 * Sets whether entries without a name should get their names set to the archive's default file name. 284 * 285 * @param useDefaultNameForUnnamedEntries whether entries without a name should get their names set to the archive's default file name. 286 * @return this. 287 */ 288 public Builder setUseDefaultNameForUnnamedEntries(final boolean useDefaultNameForUnnamedEntries) { 289 this.useDefaultNameForUnnamedEntries = useDefaultNameForUnnamedEntries; 290 return this; 291 } 292 293 } 294 295 static final int SIGNATURE_HEADER_SIZE = 32; 296 297 private static final String DEFAULT_FILE_NAME = "unknown archive"; 298 299 /** Shared with SevenZOutputFile and tests, neither mutates it. */ 300 static final byte[] sevenZSignature = { // NOSONAR 301 (byte) '7', (byte) 'z', (byte) 0xBC, (byte) 0xAF, (byte) 0x27, (byte) 0x1C }; 302 303 private static int assertFitsIntoNonNegativeInt(final String what, final long value) throws IOException { 304 if (value > Integer.MAX_VALUE || value < 0) { 305 throw new IOException(String.format("Cannot handle % %,d", what, value)); 306 } 307 return (int) value; 308 } 309 310 /** 311 * Creates a new Builder. 312 * 313 * @return a new Builder. 314 * @since 1.26.0 315 */ 316 public static Builder builder() { 317 return new Builder(); 318 } 319 320 private static ByteBuffer checkEndOfFile(final ByteBuffer buf, final int expectRemaining) throws EOFException { 321 final int remaining = buf.remaining(); 322 if (remaining < expectRemaining) { 323 throw new EOFException(String.format("remaining %,d < expectRemaining %,d", remaining, expectRemaining)); 324 } 325 return buf; 326 } 327 328 private static void get(final ByteBuffer buf, final byte[] to) throws EOFException { 329 checkEndOfFile(buf, to.length).get(to); 330 } 331 332 private static char getChar(final ByteBuffer buf) throws EOFException { 333 return checkEndOfFile(buf, Character.BYTES).getChar(); 334 } 335 336 private static int getInt(final ByteBuffer buf) throws EOFException { 337 return checkEndOfFile(buf, Integer.BYTES).getInt(); 338 } 339 340 private static long getLong(final ByteBuffer buf) throws EOFException { 341 return checkEndOfFile(buf, Long.BYTES).getLong(); 342 } 343 344 private static int getUnsignedByte(final ByteBuffer buf) throws EOFException { 345 if (!buf.hasRemaining()) { 346 throw new EOFException(); 347 } 348 return buf.get() & 0xff; 349 } 350 351 /** 352 * Checks if the signature matches what is expected for a 7z file. 353 * 354 * @param signature the bytes to check 355 * @param length the number of bytes to check 356 * @return true, if this is the signature of a 7z archive. 357 * @since 1.8 358 */ 359 public static boolean matches(final byte[] signature, final int length) { 360 if (length < sevenZSignature.length) { 361 return false; 362 } 363 for (int i = 0; i < sevenZSignature.length; i++) { 364 if (signature[i] != sevenZSignature[i]) { 365 return false; 366 } 367 } 368 return true; 369 } 370 371 private static SeekableByteChannel newByteChannel(final File file) throws IOException { 372 return Files.newByteChannel(file.toPath(), EnumSet.of(StandardOpenOption.READ)); 373 } 374 375 private static long readUint64(final ByteBuffer in) throws IOException { 376 // long rather than int as it might get shifted beyond the range of an int 377 final long firstByte = getUnsignedByte(in); 378 int mask = 0x80; 379 long value = 0; 380 for (int i = 0; i < 8; i++) { 381 if ((firstByte & mask) == 0) { 382 return value | (firstByte & mask - 1) << 8 * i; 383 } 384 final long nextByte = getUnsignedByte(in); 385 value |= nextByte << 8 * i; 386 mask >>>= 1; 387 } 388 return value; 389 } 390 391 private static long skipBytesFully(final ByteBuffer input, long bytesToSkip) { 392 if (bytesToSkip < 1) { 393 return 0; 394 } 395 final int current = input.position(); 396 final int maxSkip = input.remaining(); 397 if (maxSkip < bytesToSkip) { 398 bytesToSkip = maxSkip; 399 } 400 input.position(current + (int) bytesToSkip); 401 return bytesToSkip; 402 } 403 404 private final String fileName; 405 private SeekableByteChannel channel; 406 private final Archive archive; 407 private int currentEntryIndex = -1; 408 private int currentFolderIndex = -1; 409 private InputStream currentFolderInputStream; 410 private byte[] password; 411 private long compressedBytesReadFromCurrentEntry; 412 private long uncompressedBytesReadFromCurrentEntry; 413 private final ArrayList<InputStream> deferredBlockStreams = new ArrayList<>(); 414 private final int maxMemoryLimitKb; 415 private final boolean useDefaultNameForUnnamedEntries; 416 417 private final boolean tryToRecoverBrokenArchives; 418 419 /** 420 * Reads a file as unencrypted 7z archive. 421 * 422 * @param fileName the file to read. 423 * @throws IOException if reading the archive fails. 424 * @deprecated Use {@link Builder#get()}. 425 */ 426 @Deprecated 427 public SevenZFile(final File fileName) throws IOException { 428 this(fileName, SevenZFileOptions.DEFAULT); 429 } 430 431 /** 432 * Reads a file as 7z archive 433 * 434 * @param file the file to read 435 * @param password optional password if the archive is encrypted - the byte array is supposed to be the UTF16-LE encoded representation of the password. 436 * @throws IOException if reading the archive fails 437 * @deprecated Use {@link Builder#get()}. 438 */ 439 @SuppressWarnings("resource") // caller closes 440 @Deprecated 441 public SevenZFile(final File file, final byte[] password) throws IOException { 442 this(newByteChannel(file), file.getAbsolutePath(), password, true, SevenZFileOptions.DEFAULT); 443 } 444 445 /** 446 * Reads a file as 7z archive 447 * 448 * @param file the file to read 449 * @param password optional password if the archive is encrypted 450 * @throws IOException if reading the archive fails 451 * @since 1.17 452 * @deprecated Use {@link Builder#get()}. 453 */ 454 @Deprecated 455 public SevenZFile(final File file, final char[] password) throws IOException { 456 this(file, password, SevenZFileOptions.DEFAULT); 457 } 458 459 /** 460 * Reads a file as 7z archive with additional options. 461 * 462 * @param file the file to read 463 * @param password optional password if the archive is encrypted 464 * @param options the options to apply 465 * @throws IOException if reading the archive fails or the memory limit (if set) is too small 466 * @since 1.19 467 * @deprecated Use {@link Builder#get()}. 468 */ 469 @SuppressWarnings("resource") // caller closes 470 @Deprecated 471 public SevenZFile(final File file, final char[] password, final SevenZFileOptions options) throws IOException { 472 this(newByteChannel(file), // NOSONAR 473 file.getAbsolutePath(), AES256SHA256Decoder.utf16Decode(password), true, options); 474 } 475 476 /** 477 * Reads a file as unencrypted 7z archive 478 * 479 * @param file the file to read 480 * @param options the options to apply 481 * @throws IOException if reading the archive fails or the memory limit (if set) is too small 482 * @since 1.19 483 * @deprecated Use {@link Builder#get()}. 484 */ 485 @Deprecated 486 public SevenZFile(final File file, final SevenZFileOptions options) throws IOException { 487 this(file, null, options); 488 } 489 490 /** 491 * Reads a SeekableByteChannel as 7z archive 492 * <p> 493 * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive. 494 * </p> 495 * 496 * @param channel the channel to read 497 * @throws IOException if reading the archive fails 498 * @since 1.13 499 * @deprecated Use {@link Builder#get()}. 500 */ 501 @Deprecated 502 public SevenZFile(final SeekableByteChannel channel) throws IOException { 503 this(channel, SevenZFileOptions.DEFAULT); 504 } 505 506 /** 507 * Reads a SeekableByteChannel as 7z archive 508 * <p> 509 * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive. 510 * </p> 511 * 512 * @param channel the channel to read 513 * @param password optional password if the archive is encrypted - the byte array is supposed to be the UTF16-LE encoded representation of the password. 514 * @throws IOException if reading the archive fails 515 * @since 1.13 516 * @deprecated Use {@link Builder#get()}. 517 */ 518 @Deprecated 519 public SevenZFile(final SeekableByteChannel channel, final byte[] password) throws IOException { 520 this(channel, DEFAULT_FILE_NAME, password); 521 } 522 523 /** 524 * Reads a SeekableByteChannel as 7z archive 525 * <p> 526 * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive. 527 * </p> 528 * 529 * @param channel the channel to read 530 * @param password optional password if the archive is encrypted 531 * @throws IOException if reading the archive fails 532 * @since 1.17 533 * @deprecated Use {@link Builder#get()}. 534 */ 535 @Deprecated 536 public SevenZFile(final SeekableByteChannel channel, final char[] password) throws IOException { 537 this(channel, password, SevenZFileOptions.DEFAULT); 538 } 539 540 /** 541 * Reads a SeekableByteChannel as 7z archive with additional options. 542 * <p> 543 * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive. 544 * </p> 545 * 546 * @param channel the channel to read 547 * @param password optional password if the archive is encrypted 548 * @param options the options to apply 549 * @throws IOException if reading the archive fails or the memory limit (if set) is too small 550 * @since 1.19 551 * @deprecated Use {@link Builder#get()}. 552 */ 553 @Deprecated 554 public SevenZFile(final SeekableByteChannel channel, final char[] password, final SevenZFileOptions options) throws IOException { 555 this(channel, DEFAULT_FILE_NAME, password, options); 556 } 557 558 /** 559 * Reads a SeekableByteChannel as 7z archive with additional options. 560 * <p> 561 * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive. 562 * </p> 563 * 564 * @param channel the channel to read 565 * @param options the options to apply 566 * @throws IOException if reading the archive fails or the memory limit (if set) is too small 567 * @since 1.19 568 * @deprecated Use {@link Builder#get()}. 569 */ 570 @Deprecated 571 public SevenZFile(final SeekableByteChannel channel, final SevenZFileOptions options) throws IOException { 572 this(channel, DEFAULT_FILE_NAME, null, options); 573 } 574 575 /** 576 * Reads a SeekableByteChannel as 7z archive 577 * <p> 578 * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive. 579 * </p> 580 * 581 * @param channel the channel to read 582 * @param fileName name of the archive - only used for error reporting 583 * @throws IOException if reading the archive fails 584 * @since 1.17 585 * @deprecated Use {@link Builder#get()}. 586 */ 587 @Deprecated 588 public SevenZFile(final SeekableByteChannel channel, final String fileName) throws IOException { 589 this(channel, fileName, SevenZFileOptions.DEFAULT); 590 } 591 592 /** 593 * Reads a SeekableByteChannel as 7z archive 594 * <p> 595 * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive. 596 * </p> 597 * 598 * @param channel the channel to read 599 * @param fileName name of the archive - only used for error reporting 600 * @param password optional password if the archive is encrypted - the byte array is supposed to be the UTF16-LE encoded representation of the password. 601 * @throws IOException if reading the archive fails 602 * @since 1.13 603 * @deprecated Use {@link Builder#get()}. 604 */ 605 @Deprecated 606 public SevenZFile(final SeekableByteChannel channel, final String fileName, final byte[] password) throws IOException { 607 this(channel, fileName, password, false, SevenZFileOptions.DEFAULT); 608 } 609 610 private SevenZFile(final SeekableByteChannel channel, final String fileName, final byte[] password, final boolean closeOnError, final int maxMemoryLimitKb, 611 final boolean useDefaultNameForUnnamedEntries, final boolean tryToRecoverBrokenArchives) throws IOException { 612 boolean succeeded = false; 613 this.channel = channel; 614 this.fileName = fileName; 615 this.maxMemoryLimitKb = maxMemoryLimitKb; 616 this.useDefaultNameForUnnamedEntries = useDefaultNameForUnnamedEntries; 617 this.tryToRecoverBrokenArchives = tryToRecoverBrokenArchives; 618 try { 619 archive = readHeaders(password); 620 if (password != null) { 621 this.password = Arrays.copyOf(password, password.length); 622 } else { 623 this.password = null; 624 } 625 succeeded = true; 626 } finally { 627 if (!succeeded && closeOnError) { 628 this.channel.close(); 629 } 630 } 631 } 632 633 /** 634 * Constructs a new instance. 635 * 636 * @param channel the channel to read. 637 * @param fileName name of the archive - only used for error reporting. 638 * @param password optional password if the archive is encrypted. 639 * @param closeOnError closes the channel on error. 640 * @param options options. 641 * @throws IOException if reading the archive fails 642 * @deprecated Use {@link Builder#get()}. 643 */ 644 @Deprecated 645 private SevenZFile(final SeekableByteChannel channel, final String fileName, final byte[] password, final boolean closeOnError, 646 final SevenZFileOptions options) throws IOException { 647 this(channel, fileName, password, closeOnError, options.getMaxMemoryLimitInKb(), options.getUseDefaultNameForUnnamedEntries(), 648 options.getTryToRecoverBrokenArchives()); 649 } 650 651 /** 652 * Reads a SeekableByteChannel as 7z archive 653 * <p> 654 * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive. 655 * </p> 656 * 657 * @param channel the channel to read 658 * @param fileName name of the archive - only used for error reporting 659 * @param password optional password if the archive is encrypted 660 * @throws IOException if reading the archive fails 661 * @since 1.17 662 * @deprecated Use {@link Builder#get()}. 663 */ 664 @Deprecated 665 public SevenZFile(final SeekableByteChannel channel, final String fileName, final char[] password) throws IOException { 666 this(channel, fileName, password, SevenZFileOptions.DEFAULT); 667 } 668 669 /** 670 * Reads a SeekableByteChannel as 7z archive with additional options. 671 * <p> 672 * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive. 673 * </p> 674 * 675 * @param channel the channel to read 676 * @param fileName name of the archive - only used for error reporting 677 * @param password optional password if the archive is encrypted 678 * @param options the options to apply 679 * @throws IOException if reading the archive fails or the memory limit (if set) is too small 680 * @since 1.19 681 * @deprecated Use {@link Builder#get()}. 682 */ 683 @Deprecated 684 public SevenZFile(final SeekableByteChannel channel, final String fileName, final char[] password, final SevenZFileOptions options) throws IOException { 685 this(channel, fileName, AES256SHA256Decoder.utf16Decode(password), false, options); 686 } 687 688 /** 689 * Reads a SeekableByteChannel as 7z archive with additional options. 690 * <p> 691 * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive. 692 * </p> 693 * 694 * @param channel the channel to read 695 * @param fileName name of the archive - only used for error reporting 696 * @param options the options to apply 697 * @throws IOException if reading the archive fails or the memory limit (if set) is too small 698 * @since 1.19 699 * @deprecated Use {@link Builder#get()}. 700 */ 701 @Deprecated 702 public SevenZFile(final SeekableByteChannel channel, final String fileName, final SevenZFileOptions options) throws IOException { 703 this(channel, fileName, null, false, options); 704 } 705 706 private InputStream buildDecoderStack(final Folder folder, final long folderOffset, final int firstPackStreamIndex, final SevenZArchiveEntry entry) 707 throws IOException { 708 channel.position(folderOffset); 709 InputStream inputStreamStack = new FilterInputStream( 710 new BufferedInputStream(new BoundedSeekableByteChannelInputStream(channel, archive.packSizes[firstPackStreamIndex]))) { 711 private void count(final int c) { 712 compressedBytesReadFromCurrentEntry += c; 713 } 714 715 @Override 716 public int read() throws IOException { 717 final int r = in.read(); 718 if (r >= 0) { 719 count(1); 720 } 721 return r; 722 } 723 724 @Override 725 public int read(final byte[] b) throws IOException { 726 return read(b, 0, b.length); 727 } 728 729 @Override 730 public int read(final byte[] b, final int off, final int len) throws IOException { 731 if (len == 0) { 732 return 0; 733 } 734 final int r = in.read(b, off, len); 735 if (r >= 0) { 736 count(r); 737 } 738 return r; 739 } 740 }; 741 final LinkedList<SevenZMethodConfiguration> methods = new LinkedList<>(); 742 for (final Coder coder : folder.getOrderedCoders()) { 743 if (coder.numInStreams != 1 || coder.numOutStreams != 1) { 744 throw new IOException("Multi input/output stream coders are not yet supported"); 745 } 746 final SevenZMethod method = SevenZMethod.byId(coder.decompressionMethodId); 747 inputStreamStack = Coders.addDecoder(fileName, inputStreamStack, folder.getUnpackSizeForCoder(coder), coder, password, maxMemoryLimitKb); 748 methods.addFirst(new SevenZMethodConfiguration(method, Coders.findByMethod(method).getOptionsFromCoder(coder, inputStreamStack))); 749 } 750 entry.setContentMethods(methods); 751 if (folder.hasCrc) { 752 return new CRC32VerifyingInputStream(inputStreamStack, folder.getUnpackSize(), folder.crc); 753 } 754 return inputStreamStack; 755 } 756 757 /** 758 * Builds the decoding stream for the entry to be read. This method may be called from a random access(getInputStream) or sequential access(getNextEntry). 759 * If this method is called from a random access, some entries may need to be skipped(we put them to the deferredBlockStreams and skip them when actually 760 * needed to improve the performance) 761 * 762 * @param entryIndex the index of the entry to be read 763 * @param isRandomAccess is this called in a random access 764 * @throws IOException if there are exceptions when reading the file 765 */ 766 private void buildDecodingStream(final int entryIndex, final boolean isRandomAccess) throws IOException { 767 if (archive.streamMap == null) { 768 throw new IOException("Archive doesn't contain stream information to read entries"); 769 } 770 final int folderIndex = archive.streamMap.fileFolderIndex[entryIndex]; 771 if (folderIndex < 0) { 772 deferredBlockStreams.clear(); 773 // TODO: previously it'd return an empty stream? 774 // new BoundedInputStream(new ByteArrayInputStream(ByteUtils.EMPTY_BYTE_ARRAY), 0); 775 return; 776 } 777 final SevenZArchiveEntry file = archive.files[entryIndex]; 778 boolean isInSameFolder = false; 779 if (currentFolderIndex == folderIndex) { 780 // (COMPRESS-320). 781 // The current entry is within the same (potentially opened) folder. The 782 // previous stream has to be fully decoded before we can start reading 783 // but don't do it eagerly -- if the user skips over the entire folder nothing 784 // is effectively decompressed. 785 if (entryIndex > 0) { 786 file.setContentMethods(archive.files[entryIndex - 1].getContentMethods()); 787 } 788 789 // if this is called in a random access, then the content methods of previous entry may be null 790 // the content methods should be set to methods of the first entry as it must not be null, 791 // and the content methods would only be set if the content methods was not set 792 if (isRandomAccess && file.getContentMethods() == null) { 793 final int folderFirstFileIndex = archive.streamMap.folderFirstFileIndex[folderIndex]; 794 final SevenZArchiveEntry folderFirstFile = archive.files[folderFirstFileIndex]; 795 file.setContentMethods(folderFirstFile.getContentMethods()); 796 } 797 isInSameFolder = true; 798 } else { 799 currentFolderIndex = folderIndex; 800 // We're opening a new folder. Discard any queued streams/ folder stream. 801 reopenFolderInputStream(folderIndex, file); 802 } 803 804 boolean haveSkippedEntries = false; 805 if (isRandomAccess) { 806 // entries will only need to be skipped if it's a random access 807 haveSkippedEntries = skipEntriesWhenNeeded(entryIndex, isInSameFolder, folderIndex); 808 } 809 810 if (isRandomAccess && currentEntryIndex == entryIndex && !haveSkippedEntries) { 811 // we don't need to add another entry to the deferredBlockStreams when : 812 // 1. If this method is called in a random access and the entry index 813 // to be read equals to the current entry index, the input stream 814 // has already been put in the deferredBlockStreams 815 // 2. If this entry has not been read(which means no entries are skipped) 816 return; 817 } 818 819 InputStream fileStream = new BoundedInputStream(currentFolderInputStream, file.getSize()); 820 if (file.getHasCrc()) { 821 fileStream = new CRC32VerifyingInputStream(fileStream, file.getSize(), file.getCrcValue()); 822 } 823 824 deferredBlockStreams.add(fileStream); 825 } 826 827 private void calculateStreamMap(final Archive archive) throws IOException { 828 int nextFolderPackStreamIndex = 0; 829 final int numFolders = archive.folders != null ? archive.folders.length : 0; 830 final int[] folderFirstPackStreamIndex = new int[numFolders]; 831 for (int i = 0; i < numFolders; i++) { 832 folderFirstPackStreamIndex[i] = nextFolderPackStreamIndex; 833 nextFolderPackStreamIndex += archive.folders[i].packedStreams.length; 834 } 835 long nextPackStreamOffset = 0; 836 final int numPackSizes = archive.packSizes.length; 837 final long[] packStreamOffsets = new long[numPackSizes]; 838 for (int i = 0; i < numPackSizes; i++) { 839 packStreamOffsets[i] = nextPackStreamOffset; 840 nextPackStreamOffset += archive.packSizes[i]; 841 } 842 final int[] folderFirstFileIndex = new int[numFolders]; 843 final int[] fileFolderIndex = new int[archive.files.length]; 844 int nextFolderIndex = 0; 845 int nextFolderUnpackStreamIndex = 0; 846 for (int i = 0; i < archive.files.length; i++) { 847 if (!archive.files[i].hasStream() && nextFolderUnpackStreamIndex == 0) { 848 fileFolderIndex[i] = -1; 849 continue; 850 } 851 if (nextFolderUnpackStreamIndex == 0) { 852 for (; nextFolderIndex < archive.folders.length; ++nextFolderIndex) { 853 folderFirstFileIndex[nextFolderIndex] = i; 854 if (archive.folders[nextFolderIndex].numUnpackSubStreams > 0) { 855 break; 856 } 857 } 858 if (nextFolderIndex >= archive.folders.length) { 859 throw new IOException("Too few folders in archive"); 860 } 861 } 862 fileFolderIndex[i] = nextFolderIndex; 863 if (!archive.files[i].hasStream()) { 864 continue; 865 } 866 ++nextFolderUnpackStreamIndex; 867 if (nextFolderUnpackStreamIndex >= archive.folders[nextFolderIndex].numUnpackSubStreams) { 868 ++nextFolderIndex; 869 nextFolderUnpackStreamIndex = 0; 870 } 871 } 872 archive.streamMap = new StreamMap(folderFirstPackStreamIndex, packStreamOffsets, folderFirstFileIndex, fileFolderIndex); 873 } 874 875 private void checkEntryIsInitialized(final Map<Integer, SevenZArchiveEntry> archiveEntries, final int index) { 876 archiveEntries.computeIfAbsent(index, i -> new SevenZArchiveEntry()); 877 } 878 879 /** 880 * Closes the archive. 881 * 882 * @throws IOException if closing the file fails 883 */ 884 @Override 885 public void close() throws IOException { 886 if (channel != null) { 887 try { 888 channel.close(); 889 } finally { 890 channel = null; 891 if (password != null) { 892 Arrays.fill(password, (byte) 0); 893 } 894 password = null; 895 } 896 } 897 } 898 899 private InputStream getCurrentStream() throws IOException { 900 if (archive.files[currentEntryIndex].getSize() == 0) { 901 return new ByteArrayInputStream(ByteUtils.EMPTY_BYTE_ARRAY); 902 } 903 if (deferredBlockStreams.isEmpty()) { 904 throw new IllegalStateException("No current 7z entry (call getNextEntry() first)."); 905 } 906 while (deferredBlockStreams.size() > 1) { 907 // In solid compression mode we need to decompress all leading folder' 908 // streams to get access to an entry. We defer this until really needed 909 // so that entire blocks can be skipped without wasting time for decompression. 910 try (InputStream stream = deferredBlockStreams.remove(0)) { 911 org.apache.commons.io.IOUtils.skip(stream, Long.MAX_VALUE, org.apache.commons.io.IOUtils::byteArray); 912 } 913 compressedBytesReadFromCurrentEntry = 0; 914 } 915 return deferredBlockStreams.get(0); 916 } 917 918 /** 919 * Gets a default file name from the archive name - if known. 920 * <p> 921 * This implements the same heuristics the 7z tools use. In 7z's case if an archive contains entries without a name - i.e. 922 * {@link SevenZArchiveEntry#getName} returns {@code null} - then its command line and GUI tools will use this default name when extracting the entries. 923 * </p> 924 * 925 * @return null if the name of the archive is unknown. Otherwise, if the name of the archive has got any extension, it is stripped and the remainder 926 * returned. Finally, if the name of the archive hasn't got any extension, then a {@code ~} character is appended to the archive name. 927 * @since 1.19 928 */ 929 public String getDefaultName() { 930 if (DEFAULT_FILE_NAME.equals(fileName) || fileName == null) { 931 return null; 932 } 933 934 final String lastSegment = new File(fileName).getName(); 935 final int dotPos = lastSegment.lastIndexOf("."); 936 if (dotPos > 0) { // if the file starts with a dot then this is not an extension 937 return lastSegment.substring(0, dotPos); 938 } 939 return lastSegment + "~"; 940 } 941 942 /** 943 * Gets a copy of meta-data of all archive entries. 944 * <p> 945 * This method only provides meta-data, the entries can not be used to read the contents, you still need to process all entries in order using 946 * {@link #getNextEntry} for that. 947 * </p> 948 * <p> 949 * The content methods are only available for entries that have already been reached via {@link #getNextEntry}. 950 * </p> 951 * 952 * @return a copy of meta-data of all archive entries. 953 * @since 1.11 954 */ 955 public Iterable<SevenZArchiveEntry> getEntries() { 956 return new ArrayList<>(Arrays.asList(archive.files)); 957 } 958 959 /** 960 * Gets an InputStream for reading the contents of the given entry. 961 * <p> 962 * For archives using solid compression randomly accessing entries will be significantly slower than reading the archive sequentially. 963 * </p> 964 * 965 * @param entry the entry to get the stream for. 966 * @return a stream to read the entry from. 967 * @throws IOException if unable to create an input stream from the entry 968 * @since 1.20 969 */ 970 public InputStream getInputStream(final SevenZArchiveEntry entry) throws IOException { 971 int entryIndex = -1; 972 for (int i = 0; i < this.archive.files.length; i++) { 973 if (entry == this.archive.files[i]) { 974 entryIndex = i; 975 break; 976 } 977 } 978 979 if (entryIndex < 0) { 980 throw new IllegalArgumentException("Can not find " + entry.getName() + " in " + this.fileName); 981 } 982 983 buildDecodingStream(entryIndex, true); 984 currentEntryIndex = entryIndex; 985 currentFolderIndex = archive.streamMap.fileFolderIndex[entryIndex]; 986 return getCurrentStream(); 987 } 988 989 /** 990 * Gets the next Archive Entry in this archive. 991 * 992 * @return the next entry, or {@code null} if there are no more entries 993 * @throws IOException if the next entry could not be read 994 */ 995 public SevenZArchiveEntry getNextEntry() throws IOException { 996 if (currentEntryIndex >= archive.files.length - 1) { 997 return null; 998 } 999 ++currentEntryIndex; 1000 final SevenZArchiveEntry entry = archive.files[currentEntryIndex]; 1001 if (entry.getName() == null && useDefaultNameForUnnamedEntries) { 1002 entry.setName(getDefaultName()); 1003 } 1004 buildDecodingStream(currentEntryIndex, false); 1005 uncompressedBytesReadFromCurrentEntry = compressedBytesReadFromCurrentEntry = 0; 1006 return entry; 1007 } 1008 1009 /** 1010 * Gets statistics for bytes read from the current entry. 1011 * 1012 * @return statistics for bytes read from the current entry 1013 * @since 1.17 1014 */ 1015 public InputStreamStatistics getStatisticsForCurrentEntry() { 1016 return new InputStreamStatistics() { 1017 @Override 1018 public long getCompressedCount() { 1019 return compressedBytesReadFromCurrentEntry; 1020 } 1021 1022 @Override 1023 public long getUncompressedCount() { 1024 return uncompressedBytesReadFromCurrentEntry; 1025 } 1026 }; 1027 } 1028 1029 /** 1030 * Tests if any data of current entry has been read or not. This is achieved by comparing the bytes remaining to read and the size of the file. 1031 * 1032 * @return true if any data of current entry has been read 1033 * @since 1.21 1034 */ 1035 private boolean hasCurrentEntryBeenRead() { 1036 boolean hasCurrentEntryBeenRead = false; 1037 if (!deferredBlockStreams.isEmpty()) { 1038 final InputStream currentEntryInputStream = deferredBlockStreams.get(deferredBlockStreams.size() - 1); 1039 // get the bytes remaining to read, and compare it with the size of 1040 // the file to figure out if the file has been read 1041 if (currentEntryInputStream instanceof CRC32VerifyingInputStream) { 1042 hasCurrentEntryBeenRead = ((CRC32VerifyingInputStream) currentEntryInputStream).getBytesRemaining() != archive.files[currentEntryIndex] 1043 .getSize(); 1044 } 1045 1046 if (currentEntryInputStream instanceof BoundedInputStream) { 1047 hasCurrentEntryBeenRead = ((BoundedInputStream) currentEntryInputStream).getBytesRemaining() != archive.files[currentEntryIndex].getSize(); 1048 } 1049 } 1050 return hasCurrentEntryBeenRead; 1051 } 1052 1053 private Archive initializeArchive(final StartHeader startHeader, final byte[] password, final boolean verifyCrc) throws IOException { 1054 assertFitsIntoNonNegativeInt("nextHeaderSize", startHeader.nextHeaderSize); 1055 final int nextHeaderSizeInt = (int) startHeader.nextHeaderSize; 1056 channel.position(SIGNATURE_HEADER_SIZE + startHeader.nextHeaderOffset); 1057 if (verifyCrc) { 1058 final long position = channel.position(); 1059 final CheckedInputStream cis = new CheckedInputStream(Channels.newInputStream(channel), new CRC32()); 1060 if (cis.skip(nextHeaderSizeInt) != nextHeaderSizeInt) { 1061 throw new IOException("Problem computing NextHeader CRC-32"); 1062 } 1063 if (startHeader.nextHeaderCrc != cis.getChecksum().getValue()) { 1064 throw new IOException("NextHeader CRC-32 mismatch"); 1065 } 1066 channel.position(position); 1067 } 1068 Archive archive = new Archive(); 1069 ByteBuffer buf = ByteBuffer.allocate(nextHeaderSizeInt).order(ByteOrder.LITTLE_ENDIAN); 1070 readFully(buf); 1071 int nid = getUnsignedByte(buf); 1072 if (nid == NID.kEncodedHeader) { 1073 buf = readEncodedHeader(buf, archive, password); 1074 // Archive gets rebuilt with the new header 1075 archive = new Archive(); 1076 nid = getUnsignedByte(buf); 1077 } 1078 if (nid != NID.kHeader) { 1079 throw new IOException("Broken or unsupported archive: no Header"); 1080 } 1081 readHeader(buf, archive); 1082 archive.subStreamsInfo = null; 1083 return archive; 1084 } 1085 1086 /** 1087 * Reads a byte of data. 1088 * 1089 * @return the byte read, or -1 if end of input is reached 1090 * @throws IOException if an I/O error has occurred 1091 */ 1092 public int read() throws IOException { 1093 final int b = getCurrentStream().read(); 1094 if (b >= 0) { 1095 uncompressedBytesReadFromCurrentEntry++; 1096 } 1097 return b; 1098 } 1099 1100 /** 1101 * Reads data into an array of bytes. 1102 * 1103 * @param b the array to write data to 1104 * @return the number of bytes read, or -1 if end of input is reached 1105 * @throws IOException if an I/O error has occurred 1106 */ 1107 public int read(final byte[] b) throws IOException { 1108 return read(b, 0, b.length); 1109 } 1110 1111 /** 1112 * Reads data into an array of bytes. 1113 * 1114 * @param b the array to write data to 1115 * @param off offset into the buffer to start filling at 1116 * @param len of bytes to read 1117 * @return the number of bytes read, or -1 if end of input is reached 1118 * @throws IOException if an I/O error has occurred 1119 */ 1120 public int read(final byte[] b, final int off, final int len) throws IOException { 1121 if (len == 0) { 1122 return 0; 1123 } 1124 final int cnt = getCurrentStream().read(b, off, len); 1125 if (cnt > 0) { 1126 uncompressedBytesReadFromCurrentEntry += cnt; 1127 } 1128 return cnt; 1129 } 1130 1131 private BitSet readAllOrBits(final ByteBuffer header, final int size) throws IOException { 1132 final int areAllDefined = getUnsignedByte(header); 1133 final BitSet bits; 1134 if (areAllDefined != 0) { 1135 bits = new BitSet(size); 1136 for (int i = 0; i < size; i++) { 1137 bits.set(i, true); 1138 } 1139 } else { 1140 bits = readBits(header, size); 1141 } 1142 return bits; 1143 } 1144 1145 private void readArchiveProperties(final ByteBuffer input) throws IOException { 1146 // FIXME: the reference implementation just throws them away? 1147 int nid = getUnsignedByte(input); 1148 while (nid != NID.kEnd) { 1149 final long propertySize = readUint64(input); 1150 final byte[] property = new byte[(int) propertySize]; 1151 get(input, property); 1152 nid = getUnsignedByte(input); 1153 } 1154 } 1155 1156 private BitSet readBits(final ByteBuffer header, final int size) throws IOException { 1157 final BitSet bits = new BitSet(size); 1158 int mask = 0; 1159 int cache = 0; 1160 for (int i = 0; i < size; i++) { 1161 if (mask == 0) { 1162 mask = 0x80; 1163 cache = getUnsignedByte(header); 1164 } 1165 bits.set(i, (cache & mask) != 0); 1166 mask >>>= 1; 1167 } 1168 return bits; 1169 } 1170 1171 private ByteBuffer readEncodedHeader(final ByteBuffer header, final Archive archive, final byte[] password) throws IOException { 1172 final int pos = header.position(); 1173 final ArchiveStatistics stats = new ArchiveStatistics(); 1174 sanityCheckStreamsInfo(header, stats); 1175 stats.assertValidity(maxMemoryLimitKb); 1176 header.position(pos); 1177 1178 readStreamsInfo(header, archive); 1179 1180 if (archive.folders == null || archive.folders.length == 0) { 1181 throw new IOException("no folders, can't read encoded header"); 1182 } 1183 if (archive.packSizes == null || archive.packSizes.length == 0) { 1184 throw new IOException("no packed streams, can't read encoded header"); 1185 } 1186 1187 // FIXME: merge with buildDecodingStream()/buildDecoderStack() at some stage? 1188 final Folder folder = archive.folders[0]; 1189 final int firstPackStreamIndex = 0; 1190 final long folderOffset = SIGNATURE_HEADER_SIZE + archive.packPos + 0; 1191 1192 channel.position(folderOffset); 1193 InputStream inputStreamStack = new BoundedSeekableByteChannelInputStream(channel, archive.packSizes[firstPackStreamIndex]); 1194 for (final Coder coder : folder.getOrderedCoders()) { 1195 if (coder.numInStreams != 1 || coder.numOutStreams != 1) { 1196 throw new IOException("Multi input/output stream coders are not yet supported"); 1197 } 1198 inputStreamStack = Coders.addDecoder(fileName, inputStreamStack, // NOSONAR 1199 folder.getUnpackSizeForCoder(coder), coder, password, maxMemoryLimitKb); 1200 } 1201 if (folder.hasCrc) { 1202 inputStreamStack = new CRC32VerifyingInputStream(inputStreamStack, folder.getUnpackSize(), folder.crc); 1203 } 1204 final int unpackSize = assertFitsIntoNonNegativeInt("unpackSize", folder.getUnpackSize()); 1205 final byte[] nextHeader = IOUtils.readRange(inputStreamStack, unpackSize); 1206 if (nextHeader.length < unpackSize) { 1207 throw new IOException("premature end of stream"); 1208 } 1209 inputStreamStack.close(); 1210 return ByteBuffer.wrap(nextHeader).order(ByteOrder.LITTLE_ENDIAN); 1211 } 1212 1213 private void readFilesInfo(final ByteBuffer header, final Archive archive) throws IOException { 1214 final int numFilesInt = (int) readUint64(header); 1215 final Map<Integer, SevenZArchiveEntry> fileMap = new LinkedHashMap<>(); 1216 BitSet isEmptyStream = null; 1217 BitSet isEmptyFile = null; 1218 BitSet isAnti = null; 1219 while (true) { 1220 final int propertyType = getUnsignedByte(header); 1221 if (propertyType == 0) { 1222 break; 1223 } 1224 final long size = readUint64(header); 1225 switch (propertyType) { 1226 case NID.kEmptyStream: { 1227 isEmptyStream = readBits(header, numFilesInt); 1228 break; 1229 } 1230 case NID.kEmptyFile: { 1231 isEmptyFile = readBits(header, isEmptyStream.cardinality()); 1232 break; 1233 } 1234 case NID.kAnti: { 1235 isAnti = readBits(header, isEmptyStream.cardinality()); 1236 break; 1237 } 1238 case NID.kName: { 1239 /* final int external = */ getUnsignedByte(header); 1240 final byte[] names = new byte[(int) (size - 1)]; 1241 final int namesLength = names.length; 1242 get(header, names); 1243 int nextFile = 0; 1244 int nextName = 0; 1245 for (int i = 0; i < namesLength; i += 2) { 1246 if (names[i] == 0 && names[i + 1] == 0) { 1247 checkEntryIsInitialized(fileMap, nextFile); 1248 fileMap.get(nextFile).setName(new String(names, nextName, i - nextName, UTF_16LE)); 1249 nextName = i + 2; 1250 nextFile++; 1251 } 1252 } 1253 if (nextName != namesLength || nextFile != numFilesInt) { 1254 throw new IOException("Error parsing file names"); 1255 } 1256 break; 1257 } 1258 case NID.kCTime: { 1259 final BitSet timesDefined = readAllOrBits(header, numFilesInt); 1260 /* final int external = */ getUnsignedByte(header); 1261 for (int i = 0; i < numFilesInt; i++) { 1262 checkEntryIsInitialized(fileMap, i); 1263 final SevenZArchiveEntry entryAtIndex = fileMap.get(i); 1264 entryAtIndex.setHasCreationDate(timesDefined.get(i)); 1265 if (entryAtIndex.getHasCreationDate()) { 1266 entryAtIndex.setCreationDate(getLong(header)); 1267 } 1268 } 1269 break; 1270 } 1271 case NID.kATime: { 1272 final BitSet timesDefined = readAllOrBits(header, numFilesInt); 1273 /* final int external = */ getUnsignedByte(header); 1274 for (int i = 0; i < numFilesInt; i++) { 1275 checkEntryIsInitialized(fileMap, i); 1276 final SevenZArchiveEntry entryAtIndex = fileMap.get(i); 1277 entryAtIndex.setHasAccessDate(timesDefined.get(i)); 1278 if (entryAtIndex.getHasAccessDate()) { 1279 entryAtIndex.setAccessDate(getLong(header)); 1280 } 1281 } 1282 break; 1283 } 1284 case NID.kMTime: { 1285 final BitSet timesDefined = readAllOrBits(header, numFilesInt); 1286 /* final int external = */ getUnsignedByte(header); 1287 for (int i = 0; i < numFilesInt; i++) { 1288 checkEntryIsInitialized(fileMap, i); 1289 final SevenZArchiveEntry entryAtIndex = fileMap.get(i); 1290 entryAtIndex.setHasLastModifiedDate(timesDefined.get(i)); 1291 if (entryAtIndex.getHasLastModifiedDate()) { 1292 entryAtIndex.setLastModifiedDate(getLong(header)); 1293 } 1294 } 1295 break; 1296 } 1297 case NID.kWinAttributes: { 1298 final BitSet attributesDefined = readAllOrBits(header, numFilesInt); 1299 /* final int external = */ getUnsignedByte(header); 1300 for (int i = 0; i < numFilesInt; i++) { 1301 checkEntryIsInitialized(fileMap, i); 1302 final SevenZArchiveEntry entryAtIndex = fileMap.get(i); 1303 entryAtIndex.setHasWindowsAttributes(attributesDefined.get(i)); 1304 if (entryAtIndex.getHasWindowsAttributes()) { 1305 entryAtIndex.setWindowsAttributes(getInt(header)); 1306 } 1307 } 1308 break; 1309 } 1310 case NID.kDummy: { 1311 // 7z 9.20 asserts the content is all zeros and ignores the property 1312 // Compress up to 1.8.1 would throw an exception, now we ignore it (see COMPRESS-287 1313 1314 skipBytesFully(header, size); 1315 break; 1316 } 1317 1318 default: { 1319 // Compress up to 1.8.1 would throw an exception, now we ignore it (see COMPRESS-287 1320 skipBytesFully(header, size); 1321 break; 1322 } 1323 } 1324 } 1325 int nonEmptyFileCounter = 0; 1326 int emptyFileCounter = 0; 1327 for (int i = 0; i < numFilesInt; i++) { 1328 final SevenZArchiveEntry entryAtIndex = fileMap.get(i); 1329 if (entryAtIndex == null) { 1330 continue; 1331 } 1332 entryAtIndex.setHasStream(isEmptyStream == null || !isEmptyStream.get(i)); 1333 if (entryAtIndex.hasStream()) { 1334 if (archive.subStreamsInfo == null) { 1335 throw new IOException("Archive contains file with streams but no subStreamsInfo"); 1336 } 1337 entryAtIndex.setDirectory(false); 1338 entryAtIndex.setAntiItem(false); 1339 entryAtIndex.setHasCrc(archive.subStreamsInfo.hasCrc.get(nonEmptyFileCounter)); 1340 entryAtIndex.setCrcValue(archive.subStreamsInfo.crcs[nonEmptyFileCounter]); 1341 entryAtIndex.setSize(archive.subStreamsInfo.unpackSizes[nonEmptyFileCounter]); 1342 if (entryAtIndex.getSize() < 0) { 1343 throw new IOException("broken archive, entry with negative size"); 1344 } 1345 ++nonEmptyFileCounter; 1346 } else { 1347 entryAtIndex.setDirectory(isEmptyFile == null || !isEmptyFile.get(emptyFileCounter)); 1348 entryAtIndex.setAntiItem(isAnti != null && isAnti.get(emptyFileCounter)); 1349 entryAtIndex.setHasCrc(false); 1350 entryAtIndex.setSize(0); 1351 ++emptyFileCounter; 1352 } 1353 } 1354 archive.files = fileMap.values().stream().filter(Objects::nonNull).toArray(SevenZArchiveEntry[]::new); 1355 calculateStreamMap(archive); 1356 } 1357 1358 private Folder readFolder(final ByteBuffer header) throws IOException { 1359 final Folder folder = new Folder(); 1360 1361 final long numCoders = readUint64(header); 1362 final Coder[] coders = new Coder[(int) numCoders]; 1363 long totalInStreams = 0; 1364 long totalOutStreams = 0; 1365 for (int i = 0; i < coders.length; i++) { 1366 final int bits = getUnsignedByte(header); 1367 final int idSize = bits & 0xf; 1368 final boolean isSimple = (bits & 0x10) == 0; 1369 final boolean hasAttributes = (bits & 0x20) != 0; 1370 final boolean moreAlternativeMethods = (bits & 0x80) != 0; 1371 1372 final byte[] decompressionMethodId = new byte[idSize]; 1373 get(header, decompressionMethodId); 1374 final long numInStreams; 1375 final long numOutStreams; 1376 if (isSimple) { 1377 numInStreams = 1; 1378 numOutStreams = 1; 1379 } else { 1380 numInStreams = readUint64(header); 1381 numOutStreams = readUint64(header); 1382 } 1383 totalInStreams += numInStreams; 1384 totalOutStreams += numOutStreams; 1385 byte[] properties = null; 1386 if (hasAttributes) { 1387 final long propertiesSize = readUint64(header); 1388 properties = new byte[(int) propertiesSize]; 1389 get(header, properties); 1390 } 1391 // would need to keep looping as above: 1392 if (moreAlternativeMethods) { 1393 throw new IOException("Alternative methods are unsupported, please report. " + // NOSONAR 1394 "The reference implementation doesn't support them either."); 1395 } 1396 coders[i] = new Coder(decompressionMethodId, numInStreams, numOutStreams, properties); 1397 } 1398 folder.coders = coders; 1399 folder.totalInputStreams = totalInStreams; 1400 folder.totalOutputStreams = totalOutStreams; 1401 1402 final long numBindPairs = totalOutStreams - 1; 1403 final BindPair[] bindPairs = new BindPair[(int) numBindPairs]; 1404 for (int i = 0; i < bindPairs.length; i++) { 1405 bindPairs[i] = new BindPair(readUint64(header), readUint64(header)); 1406 } 1407 folder.bindPairs = bindPairs; 1408 1409 final long numPackedStreams = totalInStreams - numBindPairs; 1410 final long[] packedStreams = new long[(int) numPackedStreams]; 1411 if (numPackedStreams == 1) { 1412 int i; 1413 for (i = 0; i < (int) totalInStreams; i++) { 1414 if (folder.findBindPairForInStream(i) < 0) { 1415 break; 1416 } 1417 } 1418 packedStreams[0] = i; 1419 } else { 1420 for (int i = 0; i < (int) numPackedStreams; i++) { 1421 packedStreams[i] = readUint64(header); 1422 } 1423 } 1424 folder.packedStreams = packedStreams; 1425 1426 return folder; 1427 } 1428 1429 private void readFully(final ByteBuffer buf) throws IOException { 1430 buf.rewind(); 1431 IOUtils.readFully(channel, buf); 1432 buf.flip(); 1433 } 1434 1435 private void readHeader(final ByteBuffer header, final Archive archive) throws IOException { 1436 final int pos = header.position(); 1437 final ArchiveStatistics stats = sanityCheckAndCollectStatistics(header); 1438 stats.assertValidity(maxMemoryLimitKb); 1439 header.position(pos); 1440 1441 int nid = getUnsignedByte(header); 1442 1443 if (nid == NID.kArchiveProperties) { 1444 readArchiveProperties(header); 1445 nid = getUnsignedByte(header); 1446 } 1447 1448 if (nid == NID.kAdditionalStreamsInfo) { 1449 throw new IOException("Additional streams unsupported"); 1450 // nid = getUnsignedByte(header); 1451 } 1452 1453 if (nid == NID.kMainStreamsInfo) { 1454 readStreamsInfo(header, archive); 1455 nid = getUnsignedByte(header); 1456 } 1457 1458 if (nid == NID.kFilesInfo) { 1459 readFilesInfo(header, archive); 1460 nid = getUnsignedByte(header); 1461 } 1462 } 1463 1464 private Archive readHeaders(final byte[] password) throws IOException { 1465 final ByteBuffer buf = ByteBuffer.allocate(12 /* signature + 2 bytes version + 4 bytes CRC */).order(ByteOrder.LITTLE_ENDIAN); 1466 readFully(buf); 1467 final byte[] signature = new byte[6]; 1468 buf.get(signature); 1469 if (!Arrays.equals(signature, sevenZSignature)) { 1470 throw new IOException("Bad 7z signature"); 1471 } 1472 // 7zFormat.txt has it wrong - it's first major then minor 1473 final byte archiveVersionMajor = buf.get(); 1474 final byte archiveVersionMinor = buf.get(); 1475 if (archiveVersionMajor != 0) { 1476 throw new IOException(String.format("Unsupported 7z version (%d,%d)", archiveVersionMajor, archiveVersionMinor)); 1477 } 1478 1479 boolean headerLooksValid = false; // See https://www.7-zip.org/recover.html - "There is no correct End Header at the end of archive" 1480 final long startHeaderCrc = 0xffffFFFFL & buf.getInt(); 1481 if (startHeaderCrc == 0) { 1482 // This is an indication of a corrupt header - peek the next 20 bytes 1483 final long currentPosition = channel.position(); 1484 final ByteBuffer peekBuf = ByteBuffer.allocate(20); 1485 readFully(peekBuf); 1486 channel.position(currentPosition); 1487 // Header invalid if all data is 0 1488 while (peekBuf.hasRemaining()) { 1489 if (peekBuf.get() != 0) { 1490 headerLooksValid = true; 1491 break; 1492 } 1493 } 1494 } else { 1495 headerLooksValid = true; 1496 } 1497 1498 if (headerLooksValid) { 1499 return initializeArchive(readStartHeader(startHeaderCrc), password, true); 1500 } 1501 // No valid header found - probably first file of multipart archive was removed too early. Scan for end header. 1502 if (tryToRecoverBrokenArchives) { 1503 return tryToLocateEndHeader(password); 1504 } 1505 throw new IOException("archive seems to be invalid.\nYou may want to retry and enable the" 1506 + " tryToRecoverBrokenArchives if the archive could be a multi volume archive that has been closed" + " prematurely."); 1507 } 1508 1509 private void readPackInfo(final ByteBuffer header, final Archive archive) throws IOException { 1510 archive.packPos = readUint64(header); 1511 final int numPackStreamsInt = (int) readUint64(header); 1512 int nid = getUnsignedByte(header); 1513 if (nid == NID.kSize) { 1514 archive.packSizes = new long[numPackStreamsInt]; 1515 for (int i = 0; i < archive.packSizes.length; i++) { 1516 archive.packSizes[i] = readUint64(header); 1517 } 1518 nid = getUnsignedByte(header); 1519 } 1520 1521 if (nid == NID.kCRC) { 1522 archive.packCrcsDefined = readAllOrBits(header, numPackStreamsInt); 1523 archive.packCrcs = new long[numPackStreamsInt]; 1524 for (int i = 0; i < numPackStreamsInt; i++) { 1525 if (archive.packCrcsDefined.get(i)) { 1526 archive.packCrcs[i] = 0xffffFFFFL & getInt(header); 1527 } 1528 } 1529 1530 nid = getUnsignedByte(header); 1531 } 1532 } 1533 1534 private StartHeader readStartHeader(final long startHeaderCrc) throws IOException { 1535 // using Stream rather than ByteBuffer for the benefit of the 1536 // built-in CRC check 1537 try (DataInputStream dataInputStream = new DataInputStream( 1538 new CRC32VerifyingInputStream(new BoundedSeekableByteChannelInputStream(channel, 20), 20, startHeaderCrc))) { 1539 final long nextHeaderOffset = Long.reverseBytes(dataInputStream.readLong()); 1540 if (nextHeaderOffset < 0 || nextHeaderOffset + SIGNATURE_HEADER_SIZE > channel.size()) { 1541 throw new IOException("nextHeaderOffset is out of bounds"); 1542 } 1543 final long nextHeaderSize = Long.reverseBytes(dataInputStream.readLong()); 1544 final long nextHeaderEnd = nextHeaderOffset + nextHeaderSize; 1545 if (nextHeaderEnd < nextHeaderOffset || nextHeaderEnd + SIGNATURE_HEADER_SIZE > channel.size()) { 1546 throw new IOException("nextHeaderSize is out of bounds"); 1547 } 1548 final long nextHeaderCrc = 0xffffFFFFL & Integer.reverseBytes(dataInputStream.readInt()); 1549 return new StartHeader(nextHeaderOffset, nextHeaderSize, nextHeaderCrc); 1550 } 1551 } 1552 1553 private void readStreamsInfo(final ByteBuffer header, final Archive archive) throws IOException { 1554 int nid = getUnsignedByte(header); 1555 1556 if (nid == NID.kPackInfo) { 1557 readPackInfo(header, archive); 1558 nid = getUnsignedByte(header); 1559 } 1560 1561 if (nid == NID.kUnpackInfo) { 1562 readUnpackInfo(header, archive); 1563 nid = getUnsignedByte(header); 1564 } else { 1565 // archive without unpack/coders info 1566 archive.folders = Folder.EMPTY_FOLDER_ARRAY; 1567 } 1568 1569 if (nid == NID.kSubStreamsInfo) { 1570 readSubStreamsInfo(header, archive); 1571 nid = getUnsignedByte(header); 1572 } 1573 } 1574 1575 private void readSubStreamsInfo(final ByteBuffer header, final Archive archive) throws IOException { 1576 for (final Folder folder : archive.folders) { 1577 folder.numUnpackSubStreams = 1; 1578 } 1579 long unpackStreamsCount = archive.folders.length; 1580 1581 int nid = getUnsignedByte(header); 1582 if (nid == NID.kNumUnpackStream) { 1583 unpackStreamsCount = 0; 1584 for (final Folder folder : archive.folders) { 1585 final long numStreams = readUint64(header); 1586 folder.numUnpackSubStreams = (int) numStreams; 1587 unpackStreamsCount += numStreams; 1588 } 1589 nid = getUnsignedByte(header); 1590 } 1591 1592 final int totalUnpackStreams = (int) unpackStreamsCount; 1593 final SubStreamsInfo subStreamsInfo = new SubStreamsInfo(totalUnpackStreams); 1594 int nextUnpackStream = 0; 1595 for (final Folder folder : archive.folders) { 1596 if (folder.numUnpackSubStreams == 0) { 1597 continue; 1598 } 1599 long sum = 0; 1600 if (nid == NID.kSize) { 1601 for (int i = 0; i < folder.numUnpackSubStreams - 1; i++) { 1602 final long size = readUint64(header); 1603 subStreamsInfo.unpackSizes[nextUnpackStream++] = size; 1604 sum += size; 1605 } 1606 } 1607 if (sum > folder.getUnpackSize()) { 1608 throw new IOException("sum of unpack sizes of folder exceeds total unpack size"); 1609 } 1610 subStreamsInfo.unpackSizes[nextUnpackStream++] = folder.getUnpackSize() - sum; 1611 } 1612 if (nid == NID.kSize) { 1613 nid = getUnsignedByte(header); 1614 } 1615 1616 int numDigests = 0; 1617 for (final Folder folder : archive.folders) { 1618 if (folder.numUnpackSubStreams != 1 || !folder.hasCrc) { 1619 numDigests += folder.numUnpackSubStreams; 1620 } 1621 } 1622 1623 if (nid == NID.kCRC) { 1624 final BitSet hasMissingCrc = readAllOrBits(header, numDigests); 1625 final long[] missingCrcs = new long[numDigests]; 1626 for (int i = 0; i < numDigests; i++) { 1627 if (hasMissingCrc.get(i)) { 1628 missingCrcs[i] = 0xffffFFFFL & getInt(header); 1629 } 1630 } 1631 int nextCrc = 0; 1632 int nextMissingCrc = 0; 1633 for (final Folder folder : archive.folders) { 1634 if (folder.numUnpackSubStreams == 1 && folder.hasCrc) { 1635 subStreamsInfo.hasCrc.set(nextCrc, true); 1636 subStreamsInfo.crcs[nextCrc] = folder.crc; 1637 ++nextCrc; 1638 } else { 1639 for (int i = 0; i < folder.numUnpackSubStreams; i++) { 1640 subStreamsInfo.hasCrc.set(nextCrc, hasMissingCrc.get(nextMissingCrc)); 1641 subStreamsInfo.crcs[nextCrc] = missingCrcs[nextMissingCrc]; 1642 ++nextCrc; 1643 ++nextMissingCrc; 1644 } 1645 } 1646 } 1647 1648 nid = getUnsignedByte(header); 1649 } 1650 1651 archive.subStreamsInfo = subStreamsInfo; 1652 } 1653 1654 private void readUnpackInfo(final ByteBuffer header, final Archive archive) throws IOException { 1655 int nid = getUnsignedByte(header); 1656 final int numFoldersInt = (int) readUint64(header); 1657 final Folder[] folders = new Folder[numFoldersInt]; 1658 archive.folders = folders; 1659 /* final int external = */ getUnsignedByte(header); 1660 for (int i = 0; i < numFoldersInt; i++) { 1661 folders[i] = readFolder(header); 1662 } 1663 1664 nid = getUnsignedByte(header); 1665 for (final Folder folder : folders) { 1666 assertFitsIntoNonNegativeInt("totalOutputStreams", folder.totalOutputStreams); 1667 folder.unpackSizes = new long[(int) folder.totalOutputStreams]; 1668 for (int i = 0; i < folder.totalOutputStreams; i++) { 1669 folder.unpackSizes[i] = readUint64(header); 1670 } 1671 } 1672 1673 nid = getUnsignedByte(header); 1674 if (nid == NID.kCRC) { 1675 final BitSet crcsDefined = readAllOrBits(header, numFoldersInt); 1676 for (int i = 0; i < numFoldersInt; i++) { 1677 if (crcsDefined.get(i)) { 1678 folders[i].hasCrc = true; 1679 folders[i].crc = 0xffffFFFFL & getInt(header); 1680 } else { 1681 folders[i].hasCrc = false; 1682 } 1683 } 1684 1685 nid = getUnsignedByte(header); 1686 } 1687 } 1688 1689 /** 1690 * Discard any queued streams/ folder stream, and reopen the current folder input stream. 1691 * 1692 * @param folderIndex the index of the folder to reopen 1693 * @param file the 7z entry to read 1694 * @throws IOException if exceptions occur when reading the 7z file 1695 */ 1696 private void reopenFolderInputStream(final int folderIndex, final SevenZArchiveEntry file) throws IOException { 1697 deferredBlockStreams.clear(); 1698 if (currentFolderInputStream != null) { 1699 currentFolderInputStream.close(); 1700 currentFolderInputStream = null; 1701 } 1702 final Folder folder = archive.folders[folderIndex]; 1703 final int firstPackStreamIndex = archive.streamMap.folderFirstPackStreamIndex[folderIndex]; 1704 final long folderOffset = SIGNATURE_HEADER_SIZE + archive.packPos + archive.streamMap.packStreamOffsets[firstPackStreamIndex]; 1705 1706 currentFolderInputStream = buildDecoderStack(folder, folderOffset, firstPackStreamIndex, file); 1707 } 1708 1709 private ArchiveStatistics sanityCheckAndCollectStatistics(final ByteBuffer header) throws IOException { 1710 final ArchiveStatistics stats = new ArchiveStatistics(); 1711 1712 int nid = getUnsignedByte(header); 1713 1714 if (nid == NID.kArchiveProperties) { 1715 sanityCheckArchiveProperties(header); 1716 nid = getUnsignedByte(header); 1717 } 1718 1719 if (nid == NID.kAdditionalStreamsInfo) { 1720 throw new IOException("Additional streams unsupported"); 1721 // nid = getUnsignedByte(header); 1722 } 1723 1724 if (nid == NID.kMainStreamsInfo) { 1725 sanityCheckStreamsInfo(header, stats); 1726 nid = getUnsignedByte(header); 1727 } 1728 1729 if (nid == NID.kFilesInfo) { 1730 sanityCheckFilesInfo(header, stats); 1731 nid = getUnsignedByte(header); 1732 } 1733 1734 if (nid != NID.kEnd) { 1735 throw new IOException("Badly terminated header, found " + nid); 1736 } 1737 1738 return stats; 1739 } 1740 1741 private void sanityCheckArchiveProperties(final ByteBuffer header) throws IOException { 1742 int nid = getUnsignedByte(header); 1743 while (nid != NID.kEnd) { 1744 final int propertySize = assertFitsIntoNonNegativeInt("propertySize", readUint64(header)); 1745 if (skipBytesFully(header, propertySize) < propertySize) { 1746 throw new IOException("invalid property size"); 1747 } 1748 nid = getUnsignedByte(header); 1749 } 1750 } 1751 1752 private void sanityCheckFilesInfo(final ByteBuffer header, final ArchiveStatistics stats) throws IOException { 1753 stats.numberOfEntries = assertFitsIntoNonNegativeInt("numFiles", readUint64(header)); 1754 1755 int emptyStreams = -1; 1756 while (true) { 1757 final int propertyType = getUnsignedByte(header); 1758 if (propertyType == 0) { 1759 break; 1760 } 1761 final long size = readUint64(header); 1762 switch (propertyType) { 1763 case NID.kEmptyStream: { 1764 emptyStreams = readBits(header, stats.numberOfEntries).cardinality(); 1765 break; 1766 } 1767 case NID.kEmptyFile: { 1768 if (emptyStreams == -1) { 1769 throw new IOException("Header format error: kEmptyStream must appear before kEmptyFile"); 1770 } 1771 readBits(header, emptyStreams); 1772 break; 1773 } 1774 case NID.kAnti: { 1775 if (emptyStreams == -1) { 1776 throw new IOException("Header format error: kEmptyStream must appear before kAnti"); 1777 } 1778 readBits(header, emptyStreams); 1779 break; 1780 } 1781 case NID.kName: { 1782 final int external = getUnsignedByte(header); 1783 if (external != 0) { 1784 throw new IOException("Not implemented"); 1785 } 1786 final int namesLength = assertFitsIntoNonNegativeInt("file names length", size - 1); 1787 if ((namesLength & 1) != 0) { 1788 throw new IOException("File names length invalid"); 1789 } 1790 1791 int filesSeen = 0; 1792 for (int i = 0; i < namesLength; i += 2) { 1793 final char c = getChar(header); 1794 if (c == 0) { 1795 filesSeen++; 1796 } 1797 } 1798 if (filesSeen != stats.numberOfEntries) { 1799 throw new IOException("Invalid number of file names (" + filesSeen + " instead of " + stats.numberOfEntries + ")"); 1800 } 1801 break; 1802 } 1803 case NID.kCTime: { 1804 final int timesDefined = readAllOrBits(header, stats.numberOfEntries).cardinality(); 1805 final int external = getUnsignedByte(header); 1806 if (external != 0) { 1807 throw new IOException("Not implemented"); 1808 } 1809 if (skipBytesFully(header, 8 * timesDefined) < 8 * timesDefined) { 1810 throw new IOException("invalid creation dates size"); 1811 } 1812 break; 1813 } 1814 case NID.kATime: { 1815 final int timesDefined = readAllOrBits(header, stats.numberOfEntries).cardinality(); 1816 final int external = getUnsignedByte(header); 1817 if (external != 0) { 1818 throw new IOException("Not implemented"); 1819 } 1820 if (skipBytesFully(header, 8 * timesDefined) < 8 * timesDefined) { 1821 throw new IOException("invalid access dates size"); 1822 } 1823 break; 1824 } 1825 case NID.kMTime: { 1826 final int timesDefined = readAllOrBits(header, stats.numberOfEntries).cardinality(); 1827 final int external = getUnsignedByte(header); 1828 if (external != 0) { 1829 throw new IOException("Not implemented"); 1830 } 1831 if (skipBytesFully(header, 8 * timesDefined) < 8 * timesDefined) { 1832 throw new IOException("invalid modification dates size"); 1833 } 1834 break; 1835 } 1836 case NID.kWinAttributes: { 1837 final int attributesDefined = readAllOrBits(header, stats.numberOfEntries).cardinality(); 1838 final int external = getUnsignedByte(header); 1839 if (external != 0) { 1840 throw new IOException("Not implemented"); 1841 } 1842 if (skipBytesFully(header, 4 * attributesDefined) < 4 * attributesDefined) { 1843 throw new IOException("invalid windows attributes size"); 1844 } 1845 break; 1846 } 1847 case NID.kStartPos: { 1848 throw new IOException("kStartPos is unsupported, please report"); 1849 } 1850 case NID.kDummy: { 1851 // 7z 9.20 asserts the content is all zeros and ignores the property 1852 // Compress up to 1.8.1 would throw an exception, now we ignore it (see COMPRESS-287 1853 1854 if (skipBytesFully(header, size) < size) { 1855 throw new IOException("Incomplete kDummy property"); 1856 } 1857 break; 1858 } 1859 1860 default: { 1861 // Compress up to 1.8.1 would throw an exception, now we ignore it (see COMPRESS-287 1862 if (skipBytesFully(header, size) < size) { 1863 throw new IOException("Incomplete property of type " + propertyType); 1864 } 1865 break; 1866 } 1867 } 1868 } 1869 stats.numberOfEntriesWithStream = stats.numberOfEntries - Math.max(emptyStreams, 0); 1870 } 1871 1872 private int sanityCheckFolder(final ByteBuffer header, final ArchiveStatistics stats) throws IOException { 1873 1874 final int numCoders = assertFitsIntoNonNegativeInt("numCoders", readUint64(header)); 1875 if (numCoders == 0) { 1876 throw new IOException("Folder without coders"); 1877 } 1878 stats.numberOfCoders += numCoders; 1879 1880 long totalOutStreams = 0; 1881 long totalInStreams = 0; 1882 for (int i = 0; i < numCoders; i++) { 1883 final int bits = getUnsignedByte(header); 1884 final int idSize = bits & 0xf; 1885 get(header, new byte[idSize]); 1886 1887 final boolean isSimple = (bits & 0x10) == 0; 1888 final boolean hasAttributes = (bits & 0x20) != 0; 1889 final boolean moreAlternativeMethods = (bits & 0x80) != 0; 1890 if (moreAlternativeMethods) { 1891 throw new IOException("Alternative methods are unsupported, please report. " + // NOSONAR 1892 "The reference implementation doesn't support them either."); 1893 } 1894 1895 if (isSimple) { 1896 totalInStreams++; 1897 totalOutStreams++; 1898 } else { 1899 totalInStreams += assertFitsIntoNonNegativeInt("numInStreams", readUint64(header)); 1900 totalOutStreams += assertFitsIntoNonNegativeInt("numOutStreams", readUint64(header)); 1901 } 1902 1903 if (hasAttributes) { 1904 final int propertiesSize = assertFitsIntoNonNegativeInt("propertiesSize", readUint64(header)); 1905 if (skipBytesFully(header, propertiesSize) < propertiesSize) { 1906 throw new IOException("invalid propertiesSize in folder"); 1907 } 1908 } 1909 } 1910 assertFitsIntoNonNegativeInt("totalInStreams", totalInStreams); 1911 assertFitsIntoNonNegativeInt("totalOutStreams", totalOutStreams); 1912 stats.numberOfOutStreams += totalOutStreams; 1913 stats.numberOfInStreams += totalInStreams; 1914 1915 if (totalOutStreams == 0) { 1916 throw new IOException("Total output streams can't be 0"); 1917 } 1918 1919 final int numBindPairs = assertFitsIntoNonNegativeInt("numBindPairs", totalOutStreams - 1); 1920 if (totalInStreams < numBindPairs) { 1921 throw new IOException("Total input streams can't be less than the number of bind pairs"); 1922 } 1923 final BitSet inStreamsBound = new BitSet((int) totalInStreams); 1924 for (int i = 0; i < numBindPairs; i++) { 1925 final int inIndex = assertFitsIntoNonNegativeInt("inIndex", readUint64(header)); 1926 if (totalInStreams <= inIndex) { 1927 throw new IOException("inIndex is bigger than number of inStreams"); 1928 } 1929 inStreamsBound.set(inIndex); 1930 final int outIndex = assertFitsIntoNonNegativeInt("outIndex", readUint64(header)); 1931 if (totalOutStreams <= outIndex) { 1932 throw new IOException("outIndex is bigger than number of outStreams"); 1933 } 1934 } 1935 1936 final int numPackedStreams = assertFitsIntoNonNegativeInt("numPackedStreams", totalInStreams - numBindPairs); 1937 1938 if (numPackedStreams == 1) { 1939 if (inStreamsBound.nextClearBit(0) == -1) { 1940 throw new IOException("Couldn't find stream's bind pair index"); 1941 } 1942 } else { 1943 for (int i = 0; i < numPackedStreams; i++) { 1944 final int packedStreamIndex = assertFitsIntoNonNegativeInt("packedStreamIndex", readUint64(header)); 1945 if (packedStreamIndex >= totalInStreams) { 1946 throw new IOException("packedStreamIndex is bigger than number of totalInStreams"); 1947 } 1948 } 1949 } 1950 1951 return (int) totalOutStreams; 1952 } 1953 1954 private void sanityCheckPackInfo(final ByteBuffer header, final ArchiveStatistics stats) throws IOException { 1955 final long packPos = readUint64(header); 1956 if (packPos < 0 || SIGNATURE_HEADER_SIZE + packPos > channel.size() || SIGNATURE_HEADER_SIZE + packPos < 0) { 1957 throw new IOException("packPos (" + packPos + ") is out of range"); 1958 } 1959 final long numPackStreams = readUint64(header); 1960 stats.numberOfPackedStreams = assertFitsIntoNonNegativeInt("numPackStreams", numPackStreams); 1961 int nid = getUnsignedByte(header); 1962 if (nid == NID.kSize) { 1963 long totalPackSizes = 0; 1964 for (int i = 0; i < stats.numberOfPackedStreams; i++) { 1965 final long packSize = readUint64(header); 1966 totalPackSizes += packSize; 1967 final long endOfPackStreams = SIGNATURE_HEADER_SIZE + packPos + totalPackSizes; 1968 if (packSize < 0 || endOfPackStreams > channel.size() || endOfPackStreams < packPos) { 1969 throw new IOException("packSize (" + packSize + ") is out of range"); 1970 } 1971 } 1972 nid = getUnsignedByte(header); 1973 } 1974 1975 if (nid == NID.kCRC) { 1976 final int crcsDefined = readAllOrBits(header, stats.numberOfPackedStreams).cardinality(); 1977 if (skipBytesFully(header, 4 * crcsDefined) < 4 * crcsDefined) { 1978 throw new IOException("invalid number of CRCs in PackInfo"); 1979 } 1980 nid = getUnsignedByte(header); 1981 } 1982 1983 if (nid != NID.kEnd) { 1984 throw new IOException("Badly terminated PackInfo (" + nid + ")"); 1985 } 1986 } 1987 1988 private void sanityCheckStreamsInfo(final ByteBuffer header, final ArchiveStatistics stats) throws IOException { 1989 int nid = getUnsignedByte(header); 1990 1991 if (nid == NID.kPackInfo) { 1992 sanityCheckPackInfo(header, stats); 1993 nid = getUnsignedByte(header); 1994 } 1995 1996 if (nid == NID.kUnpackInfo) { 1997 sanityCheckUnpackInfo(header, stats); 1998 nid = getUnsignedByte(header); 1999 } 2000 2001 if (nid == NID.kSubStreamsInfo) { 2002 sanityCheckSubStreamsInfo(header, stats); 2003 nid = getUnsignedByte(header); 2004 } 2005 2006 if (nid != NID.kEnd) { 2007 throw new IOException("Badly terminated StreamsInfo"); 2008 } 2009 } 2010 2011 private void sanityCheckSubStreamsInfo(final ByteBuffer header, final ArchiveStatistics stats) throws IOException { 2012 2013 int nid = getUnsignedByte(header); 2014 final List<Integer> numUnpackSubStreamsPerFolder = new LinkedList<>(); 2015 if (nid == NID.kNumUnpackStream) { 2016 for (int i = 0; i < stats.numberOfFolders; i++) { 2017 numUnpackSubStreamsPerFolder.add(assertFitsIntoNonNegativeInt("numStreams", readUint64(header))); 2018 } 2019 stats.numberOfUnpackSubStreams = numUnpackSubStreamsPerFolder.stream().mapToLong(Integer::longValue).sum(); 2020 nid = getUnsignedByte(header); 2021 } else { 2022 stats.numberOfUnpackSubStreams = stats.numberOfFolders; 2023 } 2024 2025 assertFitsIntoNonNegativeInt("totalUnpackStreams", stats.numberOfUnpackSubStreams); 2026 2027 if (nid == NID.kSize) { 2028 for (final int numUnpackSubStreams : numUnpackSubStreamsPerFolder) { 2029 if (numUnpackSubStreams == 0) { 2030 continue; 2031 } 2032 for (int i = 0; i < numUnpackSubStreams - 1; i++) { 2033 final long size = readUint64(header); 2034 if (size < 0) { 2035 throw new IOException("negative unpackSize"); 2036 } 2037 } 2038 } 2039 nid = getUnsignedByte(header); 2040 } 2041 2042 int numDigests = 0; 2043 if (numUnpackSubStreamsPerFolder.isEmpty()) { 2044 numDigests = stats.folderHasCrc == null ? stats.numberOfFolders : stats.numberOfFolders - stats.folderHasCrc.cardinality(); 2045 } else { 2046 int folderIdx = 0; 2047 for (final int numUnpackSubStreams : numUnpackSubStreamsPerFolder) { 2048 if (numUnpackSubStreams != 1 || stats.folderHasCrc == null || !stats.folderHasCrc.get(folderIdx++)) { 2049 numDigests += numUnpackSubStreams; 2050 } 2051 } 2052 } 2053 2054 if (nid == NID.kCRC) { 2055 assertFitsIntoNonNegativeInt("numDigests", numDigests); 2056 final int missingCrcs = readAllOrBits(header, numDigests).cardinality(); 2057 if (skipBytesFully(header, 4 * missingCrcs) < 4 * missingCrcs) { 2058 throw new IOException("invalid number of missing CRCs in SubStreamInfo"); 2059 } 2060 nid = getUnsignedByte(header); 2061 } 2062 2063 if (nid != NID.kEnd) { 2064 throw new IOException("Badly terminated SubStreamsInfo"); 2065 } 2066 } 2067 2068 private void sanityCheckUnpackInfo(final ByteBuffer header, final ArchiveStatistics stats) throws IOException { 2069 int nid = getUnsignedByte(header); 2070 if (nid != NID.kFolder) { 2071 throw new IOException("Expected kFolder, got " + nid); 2072 } 2073 final long numFolders = readUint64(header); 2074 stats.numberOfFolders = assertFitsIntoNonNegativeInt("numFolders", numFolders); 2075 final int external = getUnsignedByte(header); 2076 if (external != 0) { 2077 throw new IOException("External unsupported"); 2078 } 2079 2080 final List<Integer> numberOfOutputStreamsPerFolder = new LinkedList<>(); 2081 for (int i = 0; i < stats.numberOfFolders; i++) { 2082 numberOfOutputStreamsPerFolder.add(sanityCheckFolder(header, stats)); 2083 } 2084 2085 final long totalNumberOfBindPairs = stats.numberOfOutStreams - stats.numberOfFolders; 2086 final long packedStreamsRequiredByFolders = stats.numberOfInStreams - totalNumberOfBindPairs; 2087 if (packedStreamsRequiredByFolders < stats.numberOfPackedStreams) { 2088 throw new IOException("archive doesn't contain enough packed streams"); 2089 } 2090 2091 nid = getUnsignedByte(header); 2092 if (nid != NID.kCodersUnpackSize) { 2093 throw new IOException("Expected kCodersUnpackSize, got " + nid); 2094 } 2095 2096 for (final int numberOfOutputStreams : numberOfOutputStreamsPerFolder) { 2097 for (int i = 0; i < numberOfOutputStreams; i++) { 2098 final long unpackSize = readUint64(header); 2099 if (unpackSize < 0) { 2100 throw new IllegalArgumentException("negative unpackSize"); 2101 } 2102 } 2103 } 2104 2105 nid = getUnsignedByte(header); 2106 if (nid == NID.kCRC) { 2107 stats.folderHasCrc = readAllOrBits(header, stats.numberOfFolders); 2108 final int crcsDefined = stats.folderHasCrc.cardinality(); 2109 if (skipBytesFully(header, 4 * crcsDefined) < 4 * crcsDefined) { 2110 throw new IOException("invalid number of CRCs in UnpackInfo"); 2111 } 2112 nid = getUnsignedByte(header); 2113 } 2114 2115 if (nid != NID.kEnd) { 2116 throw new IOException("Badly terminated UnpackInfo"); 2117 } 2118 } 2119 2120 /** 2121 * Skips all the entries if needed. Entries need to be skipped when: 2122 * <p> 2123 * 1. it's a random access 2. one of these 2 condition is meet : 2124 * </p> 2125 * <p> 2126 * 2.1 currentEntryIndex != entryIndex : this means there are some entries to be skipped(currentEntryIndex < entryIndex) or the entry has already been 2127 * read(currentEntryIndex > entryIndex) 2128 * </p> 2129 * <p> 2130 * 2.2 currentEntryIndex == entryIndex && !hasCurrentEntryBeenRead: if the entry to be read is the current entry, but some data of it has been read before, 2131 * then we need to reopen the stream of the folder and skip all the entries before the current entries 2132 * </p> 2133 * 2134 * @param entryIndex the entry to be read 2135 * @param isInSameFolder are the entry to be read and the current entry in the same folder 2136 * @param folderIndex the index of the folder which contains the entry 2137 * @return true if there are entries actually skipped 2138 * @throws IOException there are exceptions when skipping entries 2139 * @since 1.21 2140 */ 2141 private boolean skipEntriesWhenNeeded(final int entryIndex, final boolean isInSameFolder, final int folderIndex) throws IOException { 2142 final SevenZArchiveEntry file = archive.files[entryIndex]; 2143 // if the entry to be read is the current entry, and the entry has not 2144 // been read yet, then there's nothing we need to do 2145 if (currentEntryIndex == entryIndex && !hasCurrentEntryBeenRead()) { 2146 return false; 2147 } 2148 2149 // 1. if currentEntryIndex < entryIndex : 2150 // this means there are some entries to be skipped(currentEntryIndex < entryIndex) 2151 // 2. if currentEntryIndex > entryIndex || (currentEntryIndex == entryIndex && hasCurrentEntryBeenRead) : 2152 // this means the entry has already been read before, and we need to reopen the 2153 // stream of the folder and skip all the entries before the current entries 2154 int filesToSkipStartIndex = archive.streamMap.folderFirstFileIndex[currentFolderIndex]; 2155 if (isInSameFolder) { 2156 if (currentEntryIndex < entryIndex) { 2157 // the entries between filesToSkipStartIndex and currentEntryIndex had already been skipped 2158 filesToSkipStartIndex = currentEntryIndex + 1; 2159 } else { 2160 // the entry is in the same folder of current entry, but it has already been read before, we need to reset 2161 // the position of the currentFolderInputStream to the beginning of folder, and then skip the files 2162 // from the start entry of the folder again 2163 reopenFolderInputStream(folderIndex, file); 2164 } 2165 } 2166 2167 for (int i = filesToSkipStartIndex; i < entryIndex; i++) { 2168 final SevenZArchiveEntry fileToSkip = archive.files[i]; 2169 InputStream fileStreamToSkip = new BoundedInputStream(currentFolderInputStream, fileToSkip.getSize()); 2170 if (fileToSkip.getHasCrc()) { 2171 fileStreamToSkip = new CRC32VerifyingInputStream(fileStreamToSkip, fileToSkip.getSize(), fileToSkip.getCrcValue()); 2172 } 2173 deferredBlockStreams.add(fileStreamToSkip); 2174 2175 // set the content methods as well, it equals to file.getContentMethods() because they are in same folder 2176 fileToSkip.setContentMethods(file.getContentMethods()); 2177 } 2178 return true; 2179 } 2180 2181 @Override 2182 public String toString() { 2183 return archive.toString(); 2184 } 2185 2186 private Archive tryToLocateEndHeader(final byte[] password) throws IOException { 2187 final ByteBuffer nidBuf = ByteBuffer.allocate(1); 2188 final long searchLimit = 1024L * 1024 * 1; 2189 // Main header, plus bytes that readStartHeader would read 2190 final long previousDataSize = channel.position() + 20; 2191 final long minPos; 2192 // Determine minimal position - can't start before current position 2193 if (channel.position() + searchLimit > channel.size()) { 2194 minPos = channel.position(); 2195 } else { 2196 minPos = channel.size() - searchLimit; 2197 } 2198 long pos = channel.size() - 1; 2199 // Loop: Try from end of archive 2200 while (pos > minPos) { 2201 pos--; 2202 channel.position(pos); 2203 nidBuf.rewind(); 2204 if (channel.read(nidBuf) < 1) { 2205 throw new EOFException(); 2206 } 2207 final int nid = nidBuf.array()[0]; 2208 // First indicator: Byte equals one of these header identifiers 2209 if (nid == NID.kEncodedHeader || nid == NID.kHeader) { 2210 try { 2211 // Try to initialize Archive structure from here 2212 final long nextHeaderOffset = pos - previousDataSize; 2213 final long nextHeaderSize = channel.size() - pos; 2214 final StartHeader startHeader = new StartHeader(nextHeaderOffset, nextHeaderSize, 0); 2215 final Archive result = initializeArchive(startHeader, password, false); 2216 // Sanity check: There must be some data... 2217 if (result.packSizes.length > 0 && result.files.length > 0) { 2218 return result; 2219 } 2220 } catch (final Exception ignore) { 2221 // Wrong guess... 2222 } 2223 } 2224 } 2225 throw new IOException("Start header corrupt and unable to guess end header"); 2226 } 2227}