001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.compress.archivers.zip; 018 019import java.io.BufferedInputStream; 020import java.io.ByteArrayInputStream; 021import java.io.Closeable; 022import java.io.EOFException; 023import java.io.File; 024import java.io.IOException; 025import java.io.InputStream; 026import java.io.SequenceInputStream; 027import java.nio.ByteBuffer; 028import java.nio.channels.FileChannel; 029import java.nio.channels.SeekableByteChannel; 030import java.nio.file.Files; 031import java.nio.file.Path; 032import java.nio.file.StandardOpenOption; 033import java.util.Arrays; 034import java.util.Collections; 035import java.util.Comparator; 036import java.util.EnumSet; 037import java.util.Enumeration; 038import java.util.HashMap; 039import java.util.LinkedList; 040import java.util.List; 041import java.util.Map; 042import java.util.zip.Inflater; 043import java.util.zip.ZipException; 044 045import org.apache.commons.compress.archivers.EntryStreamOffsets; 046import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; 047import org.apache.commons.compress.compressors.deflate64.Deflate64CompressorInputStream; 048import org.apache.commons.compress.utils.BoundedArchiveInputStream; 049import org.apache.commons.compress.utils.BoundedSeekableByteChannelInputStream; 050import org.apache.commons.compress.utils.CharsetNames; 051import org.apache.commons.compress.utils.CountingInputStream; 052import org.apache.commons.compress.utils.IOUtils; 053import org.apache.commons.compress.utils.InputStreamStatistics; 054 055/** 056 * Replacement for {@code java.util.ZipFile}. 057 * 058 * <p> 059 * This class adds support for file name encodings other than UTF-8 (which is required to work on ZIP files created by native ZIP tools and is able to skip a 060 * preamble like the one found in self extracting archives. Furthermore it returns instances of 061 * {@code org.apache.commons.compress.archivers.zip.ZipArchiveEntry} instead of {@code java.util.zip.ZipEntry}. 062 * </p> 063 * 064 * <p> 065 * It doesn't extend {@code java.util.zip.ZipFile} as it would have to reimplement all methods anyway. Like {@code java.util.ZipFile}, it uses 066 * SeekableByteChannel under the covers and supports compressed and uncompressed entries. As of Apache Commons Compress 1.3 it also transparently supports Zip64 067 * extensions and thus individual entries and archives larger than 4 GB or with more than 65536 entries. 068 * </p> 069 * 070 * <p> 071 * The method signatures mimic the ones of {@code java.util.zip.ZipFile}, with a couple of exceptions: 072 * 073 * <ul> 074 * <li>There is no getName method.</li> 075 * <li>entries has been renamed to getEntries.</li> 076 * <li>getEntries and getEntry return {@code org.apache.commons.compress.archivers.zip.ZipArchiveEntry} instances.</li> 077 * <li>close is allowed to throw IOException.</li> 078 * </ul> 079 */ 080public class ZipFile implements Closeable { 081 082 /** 083 * Lock-free implementation of BoundedInputStream. The implementation uses positioned reads on the underlying archive file channel and therefore performs 084 * significantly faster in concurrent environment. 085 */ 086 private static class BoundedFileChannelInputStream extends BoundedArchiveInputStream { 087 private final FileChannel archive; 088 089 BoundedFileChannelInputStream(final long start, final long remaining, final FileChannel archive) { 090 super(start, remaining); 091 this.archive = archive; 092 } 093 094 @Override 095 protected int read(final long pos, final ByteBuffer buf) throws IOException { 096 final int read = archive.read(buf, pos); 097 buf.flip(); 098 return read; 099 } 100 } 101 102 /** 103 * Extends ZipArchiveEntry to store the offset within the archive. 104 */ 105 private static final class Entry extends ZipArchiveEntry { 106 107 @Override 108 public boolean equals(final Object other) { 109 if (super.equals(other)) { 110 // super.equals would return false if other were not an Entry 111 final Entry otherEntry = (Entry) other; 112 return getLocalHeaderOffset() == otherEntry.getLocalHeaderOffset() && super.getDataOffset() == otherEntry.getDataOffset() 113 && super.getDiskNumberStart() == otherEntry.getDiskNumberStart(); 114 } 115 return false; 116 } 117 118 @Override 119 public int hashCode() { 120 return 3 * super.hashCode() + (int) getLocalHeaderOffset() + (int) (getLocalHeaderOffset() >> 32); 121 } 122 } 123 124 private static final class NameAndComment { 125 private final byte[] name; 126 private final byte[] comment; 127 128 private NameAndComment(final byte[] name, final byte[] comment) { 129 this.name = name; 130 this.comment = comment; 131 } 132 } 133 134 private static final class StoredStatisticsStream extends CountingInputStream implements InputStreamStatistics { 135 StoredStatisticsStream(final InputStream in) { 136 super(in); 137 } 138 139 @Override 140 public long getCompressedCount() { 141 return super.getBytesRead(); 142 } 143 144 @Override 145 public long getUncompressedCount() { 146 return getCompressedCount(); 147 } 148 } 149 150 private static final int HASH_SIZE = 509; 151 static final int NIBLET_MASK = 0x0f; 152 static final int BYTE_SHIFT = 8; 153 private static final int POS_0 = 0; 154 private static final int POS_1 = 1; 155 private static final int POS_2 = 2; 156 private static final int POS_3 = 3; 157 private static final byte[] ONE_ZERO_BYTE = new byte[1]; 158 159 /** 160 * Length of a "central directory" entry structure without file name, extra fields or comment. 161 */ 162 private static final int CFH_LEN = 163 // @formatter:off 164 /* version made by */ ZipConstants.SHORT 165 /* version needed to extract */ + ZipConstants.SHORT 166 /* general purpose bit flag */ + ZipConstants.SHORT 167 /* compression method */ + ZipConstants.SHORT 168 /* last mod file time */ + ZipConstants.SHORT 169 /* last mod file date */ + ZipConstants.SHORT 170 /* crc-32 */ + ZipConstants.WORD 171 /* compressed size */ + ZipConstants.WORD 172 /* uncompressed size */ + ZipConstants.WORD 173 /* file name length */ + ZipConstants. SHORT 174 /* extra field length */ + ZipConstants.SHORT 175 /* file comment length */ + ZipConstants.SHORT 176 /* disk number start */ + ZipConstants.SHORT 177 /* internal file attributes */ + ZipConstants.SHORT 178 /* external file attributes */ + ZipConstants.WORD 179 /* relative offset of local header */ + ZipConstants.WORD; 180 // @formatter:on 181 182 private static final long CFH_SIG = ZipLong.getValue(ZipArchiveOutputStream.CFH_SIG); 183 184 /** 185 * Length of the "End of central directory record" - which is supposed to be the last structure of the archive - without file comment. 186 */ 187 static final int MIN_EOCD_SIZE = 188 // @formatter:off 189 /* end of central dir signature */ ZipConstants.WORD 190 /* number of this disk */ + ZipConstants.SHORT 191 /* number of the disk with the */ 192 /* start of the central directory */ + ZipConstants.SHORT 193 /* total number of entries in */ 194 /* the central dir on this disk */ + ZipConstants.SHORT 195 /* total number of entries in */ 196 /* the central dir */ + ZipConstants.SHORT 197 /* size of the central directory */ + ZipConstants.WORD 198 /* offset of start of central */ 199 /* directory with respect to */ 200 /* the starting disk number */ + ZipConstants.WORD 201 /* ZIP file comment length */ + ZipConstants.SHORT; 202 // @formatter:on 203 204 /** 205 * Maximum length of the "End of central directory record" with a file comment. 206 */ 207 private static final int MAX_EOCD_SIZE = MIN_EOCD_SIZE 208 // @formatter:off 209 /* maximum length of ZIP file comment */ + ZipConstants.ZIP64_MAGIC_SHORT; 210 // @formatter:on 211 212 /** 213 * Offset of the field that holds the location of the length of the central directory inside the "End of central directory record" relative to the start of 214 * the "End of central directory record". 215 */ 216 private static final int CFD_LENGTH_OFFSET = 217 // @formatter:off 218 /* end of central dir signature */ ZipConstants.WORD 219 /* number of this disk */ + ZipConstants.SHORT 220 /* number of the disk with the */ 221 /* start of the central directory */ + ZipConstants.SHORT 222 /* total number of entries in */ 223 /* the central dir on this disk */ + ZipConstants.SHORT 224 /* total number of entries in */ 225 /* the central dir */ + ZipConstants.SHORT; 226 // @formatter:on 227 228 /** 229 * Offset of the field that holds the disk number of the first central directory entry inside the "End of central directory record" relative to the start of 230 * the "End of central directory record". 231 */ 232 private static final int CFD_DISK_OFFSET = 233 // @formatter:off 234 /* end of central dir signature */ ZipConstants.WORD 235 /* number of this disk */ + ZipConstants.SHORT; 236 // @formatter:on 237 238 /** 239 * Offset of the field that holds the location of the first central directory entry inside the "End of central directory record" relative to the "number of 240 * the disk with the start of the central directory". 241 */ 242 private static final int CFD_LOCATOR_RELATIVE_OFFSET = 243 // @formatter:off 244 /* total number of entries in */ 245 /* the central dir on this disk */ + ZipConstants.SHORT 246 /* total number of entries in */ 247 /* the central dir */ + ZipConstants.SHORT 248 /* size of the central directory */ + ZipConstants.WORD; 249 // @formatter:on 250 251 /** 252 * Length of the "Zip64 end of central directory locator" - which should be right in front of the "end of central directory record" if one is present at 253 * all. 254 */ 255 private static final int ZIP64_EOCDL_LENGTH = 256 // @formatter:off 257 /* zip64 end of central dir locator sig */ ZipConstants.WORD 258 /* number of the disk with the start */ 259 /* start of the zip64 end of */ 260 /* central directory */ + ZipConstants.WORD 261 /* relative offset of the zip64 */ 262 /* end of central directory record */ + ZipConstants.DWORD 263 /* total number of disks */ + ZipConstants.WORD; 264 // @formatter:on 265 266 /** 267 * Offset of the field that holds the location of the "Zip64 end of central directory record" inside the "Zip64 end of central directory locator" relative 268 * to the start of the "Zip64 end of central directory locator". 269 */ 270 private static final int ZIP64_EOCDL_LOCATOR_OFFSET = 271 // @formatter:off 272 /* zip64 end of central dir locator sig */ ZipConstants.WORD 273 /* number of the disk with the start */ 274 /* start of the zip64 end of */ 275 /* central directory */ + ZipConstants.WORD; 276 // @formatter:on 277 278 /** 279 * Offset of the field that holds the location of the first central directory entry inside the "Zip64 end of central directory record" relative to the start 280 * of the "Zip64 end of central directory record". 281 */ 282 private static final int ZIP64_EOCD_CFD_LOCATOR_OFFSET = 283 // @formatter:off 284 /* zip64 end of central dir */ 285 /* signature */ ZipConstants.WORD 286 /* size of zip64 end of central */ 287 /* directory record */ + ZipConstants.DWORD 288 /* version made by */ + ZipConstants.SHORT 289 /* version needed to extract */ + ZipConstants.SHORT 290 /* number of this disk */ + ZipConstants.WORD 291 /* number of the disk with the */ 292 /* start of the central directory */ + ZipConstants.WORD 293 /* total number of entries in the */ 294 /* central directory on this disk */ + ZipConstants.DWORD 295 /* total number of entries in the */ 296 /* central directory */ + ZipConstants.DWORD 297 /* size of the central directory */ + ZipConstants.DWORD; 298 // @formatter:on 299 300 /** 301 * Offset of the field that holds the disk number of the first central directory entry inside the "Zip64 end of central directory record" relative to the 302 * start of the "Zip64 end of central directory record". 303 */ 304 private static final int ZIP64_EOCD_CFD_DISK_OFFSET = 305 // @formatter:off 306 /* zip64 end of central dir */ 307 /* signature */ ZipConstants.WORD 308 /* size of zip64 end of central */ 309 /* directory record */ + ZipConstants.DWORD 310 /* version made by */ + ZipConstants.SHORT 311 /* version needed to extract */ + ZipConstants.SHORT 312 /* number of this disk */ + ZipConstants.WORD; 313 // @formatter:on 314 315 /** 316 * Offset of the field that holds the location of the first central directory entry inside the "Zip64 end of central directory record" relative to the 317 * "number of the disk with the start of the central directory". 318 */ 319 private static final int ZIP64_EOCD_CFD_LOCATOR_RELATIVE_OFFSET = 320 // @formatter:off 321 /* total number of entries in the */ 322 /* central directory on this disk */ ZipConstants.DWORD 323 /* total number of entries in the */ 324 /* central directory */ + ZipConstants.DWORD 325 /* size of the central directory */ + ZipConstants.DWORD; 326 // @formatter:on 327 328 /** 329 * Number of bytes in local file header up to the "length of file name" entry. 330 */ 331 private static final long LFH_OFFSET_FOR_FILENAME_LENGTH = 332 // @formatter:off 333 /* local file header signature */ ZipConstants.WORD 334 /* version needed to extract */ + ZipConstants.SHORT 335 /* general purpose bit flag */ + ZipConstants.SHORT 336 /* compression method */ + ZipConstants.SHORT 337 /* last mod file time */ + ZipConstants.SHORT 338 /* last mod file date */ + ZipConstants.SHORT 339 /* crc-32 */ + ZipConstants.WORD 340 /* compressed size */ + ZipConstants.WORD 341 /* uncompressed size */ + (long) ZipConstants.WORD; 342 // @formatter:on 343 344 /** 345 * Compares two ZipArchiveEntries based on their offset within the archive. 346 * 347 * <p> 348 * Won't return any meaningful results if one of the entries isn't part of the archive at all. 349 * </p> 350 * 351 * @since 1.1 352 */ 353 private static final Comparator<ZipArchiveEntry> offsetComparator = Comparator.comparingLong(ZipArchiveEntry::getDiskNumberStart) 354 .thenComparingLong(ZipArchiveEntry::getLocalHeaderOffset); 355 356 /** 357 * Closes a ZIP file quietly; throwing no IOException, does nothing on null input. 358 * 359 * @param zipFile file to close, can be null 360 */ 361 public static void closeQuietly(final ZipFile zipFile) { 362 IOUtils.closeQuietly(zipFile); 363 } 364 365 /** 366 * List of entries in the order they appear inside the central directory. 367 */ 368 private final List<ZipArchiveEntry> entries = new LinkedList<>(); 369 370 /** 371 * Maps String to list of ZipArchiveEntrys, name -> actual entries. 372 */ 373 private final Map<String, LinkedList<ZipArchiveEntry>> nameMap = new HashMap<>(HASH_SIZE); 374 375 /** 376 * The encoding to use for file names and the file comment. 377 * 378 * <p> 379 * For a list of possible values see <a href="Supported Encodings">https://docs.oracle.com/javase/8/docs/technotes/guides/intl/encoding.doc.html</a>. 380 * Defaults to UTF-8. 381 * </p> 382 */ 383 private final String encoding; 384 385 /** 386 * The ZIP encoding to use for file names and the file comment. 387 */ 388 private final ZipEncoding zipEncoding; 389 390 /** 391 * File name of actual source. 392 */ 393 private final String archiveName; 394 395 /** 396 * The actual data source. 397 */ 398 private final SeekableByteChannel archive; 399 400 /** 401 * Whether to look for and use Unicode extra fields. 402 */ 403 private final boolean useUnicodeExtraFields; 404 405 /** 406 * Whether the file is closed. 407 */ 408 private volatile boolean closed = true; 409 410 /** 411 * Whether the ZIP archive is a split ZIP archive 412 */ 413 private final boolean isSplitZipArchive; 414 415 // cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection) 416 private final byte[] dwordBuf = new byte[ZipConstants.DWORD]; 417 418 private final byte[] wordBuf = new byte[ZipConstants.WORD]; 419 420 private final byte[] cfhBuf = new byte[CFH_LEN]; 421 422 private final byte[] shortBuf = new byte[ZipConstants.SHORT]; 423 424 private final ByteBuffer dwordBbuf = ByteBuffer.wrap(dwordBuf); 425 426 private final ByteBuffer wordBbuf = ByteBuffer.wrap(wordBuf); 427 428 private final ByteBuffer cfhBbuf = ByteBuffer.wrap(cfhBuf); 429 430 private final ByteBuffer shortBbuf = ByteBuffer.wrap(shortBuf); 431 432 private long centralDirectoryStartDiskNumber, centralDirectoryStartRelativeOffset; 433 434 private long centralDirectoryStartOffset; 435 436 private long firstLocalFileHeaderOffset; 437 438 /** 439 * Opens the given file for reading, assuming "UTF8" for file names. 440 * 441 * @param f the archive. 442 * 443 * @throws IOException if an error occurs while reading the file. 444 */ 445 public ZipFile(final File f) throws IOException { 446 this(f, CharsetNames.UTF_8); 447 } 448 449 /** 450 * Opens the given file for reading, assuming the specified encoding for file names and scanning for Unicode extra fields. 451 * 452 * @param f the archive. 453 * @param encoding the encoding to use for file names, use null for the platform's default encoding 454 * 455 * @throws IOException if an error occurs while reading the file. 456 */ 457 public ZipFile(final File f, final String encoding) throws IOException { 458 this(f.toPath(), encoding, true); 459 } 460 461 /** 462 * Opens the given file for reading, assuming the specified encoding for file names. 463 * 464 * @param f the archive. 465 * @param encoding the encoding to use for file names, use null for the platform's default encoding 466 * @param useUnicodeExtraFields whether to use InfoZIP Unicode Extra Fields (if present) to set the file names. 467 * 468 * @throws IOException if an error occurs while reading the file. 469 */ 470 public ZipFile(final File f, final String encoding, final boolean useUnicodeExtraFields) throws IOException { 471 this(f.toPath(), encoding, useUnicodeExtraFields, false); 472 } 473 474 /** 475 * Opens the given file for reading, assuming the specified encoding for file names. 476 * 477 * <p> 478 * By default the central directory record and all local file headers of the archive will be read immediately which may take a considerable amount of time 479 * when the archive is big. The {@code ignoreLocalFileHeader} parameter can be set to {@code true} which restricts parsing to the central directory. 480 * Unfortunately the local file header may contain information not present inside of the central directory which will not be available when the argument is 481 * set to {@code true}. This includes the content of the Unicode extra field, so setting {@code 482 * ignoreLocalFileHeader} to {@code true} means {@code useUnicodeExtraFields} will be ignored effectively. 483 * </p> 484 * 485 * @param f the archive. 486 * @param encoding the encoding to use for file names, use null for the platform's default encoding 487 * @param useUnicodeExtraFields whether to use InfoZIP Unicode Extra Fields (if present) to set the file names. 488 * @param ignoreLocalFileHeader whether to ignore information stored inside the local file header (see the notes in this method's javadoc) 489 * 490 * @throws IOException if an error occurs while reading the file. 491 * @since 1.19 492 */ 493 public ZipFile(final File f, final String encoding, final boolean useUnicodeExtraFields, final boolean ignoreLocalFileHeader) throws IOException { 494 this(Files.newByteChannel(f.toPath(), EnumSet.of(StandardOpenOption.READ)), f.getAbsolutePath(), encoding, useUnicodeExtraFields, true, 495 ignoreLocalFileHeader); 496 } 497 498 /** 499 * Opens the given path for reading, assuming "UTF8" for file names. 500 * 501 * @param path path to the archive. 502 * @throws IOException if an error occurs while reading the file. 503 * @since 1.22 504 */ 505 public ZipFile(final Path path) throws IOException { 506 this(path, CharsetNames.UTF_8); 507 } 508 509 /** 510 * Opens the given path for reading, assuming the specified encoding for file names and scanning for Unicode extra fields. 511 * 512 * @param path path to the archive. 513 * @param encoding the encoding to use for file names, use null for the platform's default encoding 514 * @throws IOException if an error occurs while reading the file. 515 * @since 1.22 516 */ 517 public ZipFile(final Path path, final String encoding) throws IOException { 518 this(path, encoding, true); 519 } 520 521 /** 522 * Opens the given path for reading, assuming the specified encoding for file names. 523 * 524 * @param path path to the archive. 525 * @param encoding the encoding to use for file names, use null for the platform's default encoding 526 * @param useUnicodeExtraFields whether to use InfoZIP Unicode Extra Fields (if present) to set the file names. 527 * @throws IOException if an error occurs while reading the file. 528 * @since 1.22 529 */ 530 public ZipFile(final Path path, final String encoding, final boolean useUnicodeExtraFields) throws IOException { 531 this(path, encoding, useUnicodeExtraFields, false); 532 } 533 534 /** 535 * Opens the given path for reading, assuming the specified encoding for file names. 536 * <p> 537 * By default the central directory record and all local file headers of the archive will be read immediately which may take a considerable amount of time 538 * when the archive is big. The {@code ignoreLocalFileHeader} parameter can be set to {@code true} which restricts parsing to the central directory. 539 * Unfortunately the local file header may contain information not present inside of the central directory which will not be available when the argument is 540 * set to {@code true}. This includes the content of the Unicode extra field, so setting {@code 541 * ignoreLocalFileHeader} to {@code true} means {@code useUnicodeExtraFields} will be ignored effectively. 542 * </p> 543 * 544 * @param path path to the archive. 545 * @param encoding the encoding to use for file names, use null for the platform's default encoding 546 * @param useUnicodeExtraFields whether to use InfoZIP Unicode Extra Fields (if present) to set the file names. 547 * @param ignoreLocalFileHeader whether to ignore information stored inside the local file header (see the notes in this method's javadoc) 548 * @throws IOException if an error occurs while reading the file. 549 * @since 1.22 550 */ 551 public ZipFile(final Path path, final String encoding, final boolean useUnicodeExtraFields, final boolean ignoreLocalFileHeader) throws IOException { 552 this(Files.newByteChannel(path, EnumSet.of(StandardOpenOption.READ)), path.toAbsolutePath().toString(), encoding, useUnicodeExtraFields, true, 553 ignoreLocalFileHeader); 554 } 555 556 /** 557 * Opens the given channel for reading, assuming "UTF8" for file names. 558 * 559 * <p> 560 * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive. 561 * </p> 562 * 563 * @param channel the archive. 564 * 565 * @throws IOException if an error occurs while reading the file. 566 * @since 1.13 567 */ 568 public ZipFile(final SeekableByteChannel channel) throws IOException { 569 this(channel, "unknown archive", CharsetNames.UTF_8, true); 570 } 571 572 /** 573 * Opens the given channel for reading, assuming the specified encoding for file names. 574 * 575 * <p> 576 * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive. 577 * </p> 578 * 579 * @param channel the archive. 580 * @param encoding the encoding to use for file names, use null for the platform's default encoding 581 * 582 * @throws IOException if an error occurs while reading the file. 583 * @since 1.13 584 */ 585 public ZipFile(final SeekableByteChannel channel, final String encoding) throws IOException { 586 this(channel, "unknown archive", encoding, true); 587 } 588 589 /** 590 * Opens the given channel for reading, assuming the specified encoding for file names. 591 * 592 * <p> 593 * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive. 594 * </p> 595 * 596 * @param channel the archive. 597 * @param archiveName name of the archive, used for error messages only. 598 * @param encoding the encoding to use for file names, use null for the platform's default encoding 599 * @param useUnicodeExtraFields whether to use InfoZIP Unicode Extra Fields (if present) to set the file names. 600 * 601 * @throws IOException if an error occurs while reading the file. 602 * @since 1.13 603 */ 604 public ZipFile(final SeekableByteChannel channel, final String archiveName, final String encoding, final boolean useUnicodeExtraFields) throws IOException { 605 this(channel, archiveName, encoding, useUnicodeExtraFields, false, false); 606 } 607 608 /** 609 * Opens the given channel for reading, assuming the specified encoding for file names. 610 * 611 * <p> 612 * {@link org.apache.commons.compress.utils.SeekableInMemoryByteChannel} allows you to read from an in-memory archive. 613 * </p> 614 * 615 * <p> 616 * By default the central directory record and all local file headers of the archive will be read immediately which may take a considerable amount of time 617 * when the archive is big. The {@code ignoreLocalFileHeader} parameter can be set to {@code true} which restricts parsing to the central directory. 618 * Unfortunately the local file header may contain information not present inside of the central directory which will not be available when the argument is 619 * set to {@code true}. This includes the content of the Unicode extra field, so setting {@code 620 * ignoreLocalFileHeader} to {@code true} means {@code useUnicodeExtraFields} will be ignored effectively. 621 * </p> 622 * 623 * @param channel the archive. 624 * @param archiveName name of the archive, used for error messages only. 625 * @param encoding the encoding to use for file names, use null for the platform's default encoding 626 * @param useUnicodeExtraFields whether to use InfoZIP Unicode Extra Fields (if present) to set the file names. 627 * @param ignoreLocalFileHeader whether to ignore information stored inside the local file header (see the notes in this method's javadoc) 628 * 629 * @throws IOException if an error occurs while reading the file. 630 * @since 1.19 631 */ 632 public ZipFile(final SeekableByteChannel channel, final String archiveName, final String encoding, final boolean useUnicodeExtraFields, 633 final boolean ignoreLocalFileHeader) throws IOException { 634 this(channel, archiveName, encoding, useUnicodeExtraFields, false, ignoreLocalFileHeader); 635 } 636 637 private ZipFile(final SeekableByteChannel channel, final String archiveName, final String encoding, final boolean useUnicodeExtraFields, 638 final boolean closeOnError, final boolean ignoreLocalFileHeader) throws IOException { 639 isSplitZipArchive = channel instanceof ZipSplitReadOnlySeekableByteChannel; 640 641 this.archiveName = archiveName; 642 this.encoding = encoding; 643 this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); 644 this.useUnicodeExtraFields = useUnicodeExtraFields; 645 archive = channel; 646 boolean success = false; 647 try { 648 final Map<ZipArchiveEntry, NameAndComment> entriesWithoutUTF8Flag = populateFromCentralDirectory(); 649 if (!ignoreLocalFileHeader) { 650 resolveLocalFileHeaderData(entriesWithoutUTF8Flag); 651 } 652 fillNameMap(); 653 success = true; 654 } catch (final IOException e) { 655 throw new IOException("Error on ZipFile " + archiveName, e); 656 } finally { 657 closed = !success; 658 if (!success && closeOnError) { 659 IOUtils.closeQuietly(archive); 660 } 661 } 662 } 663 664 /** 665 * Opens the given file for reading, assuming "UTF8". 666 * 667 * @param name name of the archive. 668 * 669 * @throws IOException if an error occurs while reading the file. 670 */ 671 public ZipFile(final String name) throws IOException { 672 this(new File(name).toPath(), CharsetNames.UTF_8); 673 } 674 675 /** 676 * Opens the given file for reading, assuming the specified encoding for file names, scanning unicode extra fields. 677 * 678 * @param name name of the archive. 679 * @param encoding the encoding to use for file names, use null for the platform's default encoding 680 * 681 * @throws IOException if an error occurs while reading the file. 682 */ 683 public ZipFile(final String name, final String encoding) throws IOException { 684 this(new File(name).toPath(), encoding, true); 685 } 686 687 /** 688 * Whether this class is able to read the given entry. 689 * 690 * <p> 691 * May return false if it is set up to use encryption or a compression method that hasn't been implemented yet. 692 * </p> 693 * 694 * @since 1.1 695 * @param entry the entry 696 * @return whether this class is able to read the given entry. 697 */ 698 public boolean canReadEntryData(final ZipArchiveEntry entry) { 699 return ZipUtil.canHandleEntryData(entry); 700 } 701 702 /** 703 * Closes the archive. 704 * 705 * @throws IOException if an error occurs closing the archive. 706 */ 707 @Override 708 public void close() throws IOException { 709 // this flag is only written here and read in finalize() which 710 // can never be run in parallel. 711 // no synchronization needed. 712 closed = true; 713 archive.close(); 714 } 715 716 /** 717 * Transfer selected entries from this ZIP file to a given #ZipArchiveOutputStream. Compression and all other attributes will be as in this file. 718 * <p> 719 * This method transfers entries based on the central directory of the ZIP file. 720 * </p> 721 * 722 * @param target The zipArchiveOutputStream to write the entries to 723 * @param predicate A predicate that selects which entries to write 724 * @throws IOException on error 725 */ 726 public void copyRawEntries(final ZipArchiveOutputStream target, final ZipArchiveEntryPredicate predicate) throws IOException { 727 final Enumeration<ZipArchiveEntry> src = getEntriesInPhysicalOrder(); 728 while (src.hasMoreElements()) { 729 final ZipArchiveEntry entry = src.nextElement(); 730 if (predicate.test(entry)) { 731 target.addRawArchiveEntry(entry, getRawInputStream(entry)); 732 } 733 } 734 } 735 736 /** 737 * Creates new BoundedInputStream, according to implementation of underlying archive channel. 738 */ 739 private BoundedArchiveInputStream createBoundedInputStream(final long start, final long remaining) { 740 if (start < 0 || remaining < 0 || start + remaining < start) { 741 throw new IllegalArgumentException("Corrupted archive, stream boundaries" + " are out of range"); 742 } 743 return archive instanceof FileChannel ? new BoundedFileChannelInputStream(start, remaining, (FileChannel) archive) 744 : new BoundedSeekableByteChannelInputStream(start, remaining, archive); 745 } 746 747 private void fillNameMap() { 748 entries.forEach(ze -> { 749 // entries are filled in populateFromCentralDirectory and 750 // never modified 751 final String name = ze.getName(); 752 final LinkedList<ZipArchiveEntry> entriesOfThatName = nameMap.computeIfAbsent(name, k -> new LinkedList<>()); 753 entriesOfThatName.addLast(ze); 754 }); 755 } 756 757 /** 758 * Ensures that the close method of this ZIP file is called when there are no more references to it. 759 * 760 * @see #close() 761 */ 762 @Override 763 protected void finalize() throws Throwable { 764 try { 765 if (!closed) { 766 close(); 767 } 768 } finally { 769 super.finalize(); 770 } 771 } 772 773 /** 774 * Gets an InputStream for reading the content before the first local file header. 775 * 776 * @return null if there is no content before the first local file header. Otherwise, returns a stream to read the content before the first local file 777 * header. 778 * @since 1.23 779 */ 780 public InputStream getContentBeforeFirstLocalFileHeader() { 781 return firstLocalFileHeaderOffset == 0 ? null : createBoundedInputStream(0, firstLocalFileHeaderOffset); 782 } 783 784 private long getDataOffset(final ZipArchiveEntry ze) throws IOException { 785 final long s = ze.getDataOffset(); 786 if (s == EntryStreamOffsets.OFFSET_UNKNOWN) { 787 setDataOffset(ze); 788 return ze.getDataOffset(); 789 } 790 return s; 791 } 792 793 /** 794 * Gets the encoding to use for file names and the file comment. 795 * 796 * @return null if using the platform's default character encoding. 797 */ 798 public String getEncoding() { 799 return encoding; 800 } 801 802 /** 803 * Gets all entries. 804 * 805 * <p> 806 * Entries will be returned in the same order they appear within the archive's central directory. 807 * </p> 808 * 809 * @return all entries as {@link ZipArchiveEntry} instances 810 */ 811 public Enumeration<ZipArchiveEntry> getEntries() { 812 return Collections.enumeration(entries); 813 } 814 815 /** 816 * Gets all named entries in the same order they appear within the archive's central directory. 817 * 818 * @param name name of the entry. 819 * @return the Iterable<ZipArchiveEntry> corresponding to the given name 820 * @since 1.6 821 */ 822 public Iterable<ZipArchiveEntry> getEntries(final String name) { 823 return nameMap.getOrDefault(name, ZipArchiveEntry.EMPTY_LINKED_LIST); 824 } 825 826 /** 827 * Gets all entries in physical order. 828 * 829 * <p> 830 * Entries will be returned in the same order their contents appear within the archive. 831 * </p> 832 * 833 * @return all entries as {@link ZipArchiveEntry} instances 834 * 835 * @since 1.1 836 */ 837 public Enumeration<ZipArchiveEntry> getEntriesInPhysicalOrder() { 838 final ZipArchiveEntry[] allEntries = entries.toArray(ZipArchiveEntry.EMPTY_ARRAY); 839 return Collections.enumeration(Arrays.asList(sortByOffset(allEntries))); 840 } 841 842 /** 843 * Gets all named entries in the same order their contents appear within the archive. 844 * 845 * @param name name of the entry. 846 * @return the Iterable<ZipArchiveEntry> corresponding to the given name 847 * @since 1.6 848 */ 849 public Iterable<ZipArchiveEntry> getEntriesInPhysicalOrder(final String name) { 850 final LinkedList<ZipArchiveEntry> linkedList = nameMap.getOrDefault(name, ZipArchiveEntry.EMPTY_LINKED_LIST); 851 return Arrays.asList(sortByOffset(linkedList.toArray(ZipArchiveEntry.EMPTY_ARRAY))); 852 } 853 854 /** 855 * Gets a named entry or {@code null} if no entry by that name exists. 856 * 857 * <p> 858 * If multiple entries with the same name exist the first entry in the archive's central directory by that name is returned. 859 * </p> 860 * 861 * @param name name of the entry. 862 * @return the ZipArchiveEntry corresponding to the given name - or {@code null} if not present. 863 */ 864 public ZipArchiveEntry getEntry(final String name) { 865 final LinkedList<ZipArchiveEntry> entries = nameMap.get(name); 866 return entries != null ? entries.getFirst() : null; 867 } 868 869 /** 870 * Gets the offset of the first local file header in the file. 871 * 872 * @return the length of the content before the first local file header 873 * @since 1.23 874 */ 875 public long getFirstLocalFileHeaderOffset() { 876 return firstLocalFileHeaderOffset; 877 } 878 879 /** 880 * Gets an InputStream for reading the contents of the given entry. 881 * 882 * @param entry the entry to get the stream for. 883 * @return a stream to read the entry from. The returned stream implements {@link InputStreamStatistics}. 884 * @throws IOException if unable to create an input stream from the zipEntry. 885 */ 886 public InputStream getInputStream(final ZipArchiveEntry entry) throws IOException { 887 if (!(entry instanceof Entry)) { 888 return null; 889 } 890 // cast validity is checked just above 891 ZipUtil.checkRequestedFeatures(entry); 892 893 // doesn't get closed if the method is not supported - which 894 // should never happen because of the checkRequestedFeatures 895 // call above 896 final InputStream is = new BufferedInputStream(getRawInputStream(entry)); // NOSONAR 897 switch (ZipMethod.getMethodByCode(entry.getMethod())) { 898 case STORED: 899 return new StoredStatisticsStream(is); 900 case UNSHRINKING: 901 return new UnshrinkingInputStream(is); 902 case IMPLODING: 903 try { 904 return new ExplodingInputStream(entry.getGeneralPurposeBit().getSlidingDictionarySize(), 905 entry.getGeneralPurposeBit().getNumberOfShannonFanoTrees(), is); 906 } catch (final IllegalArgumentException ex) { 907 throw new IOException("bad IMPLODE data", ex); 908 } 909 case DEFLATED: 910 final Inflater inflater = new Inflater(true); 911 // Inflater with nowrap=true has this odd contract for a zero padding 912 // byte following the data stream; this used to be zlib's requirement 913 // and has been fixed a long time ago, but the contract persists so 914 // we comply. 915 // https://docs.oracle.com/javase/8/docs/api/java/util/zip/Inflater.html#Inflater(boolean) 916 return new InflaterInputStreamWithStatistics(new SequenceInputStream(is, new ByteArrayInputStream(ONE_ZERO_BYTE)), inflater) { 917 @Override 918 public void close() throws IOException { 919 try { 920 super.close(); 921 } finally { 922 inflater.end(); 923 } 924 } 925 }; 926 case BZIP2: 927 return new BZip2CompressorInputStream(is); 928 case ENHANCED_DEFLATED: 929 return new Deflate64CompressorInputStream(is); 930 case AES_ENCRYPTED: 931 case EXPANDING_LEVEL_1: 932 case EXPANDING_LEVEL_2: 933 case EXPANDING_LEVEL_3: 934 case EXPANDING_LEVEL_4: 935 case JPEG: 936 case LZMA: 937 case PKWARE_IMPLODING: 938 case PPMD: 939 case TOKENIZATION: 940 case UNKNOWN: 941 case WAVPACK: 942 case XZ: 943 default: 944 throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(entry.getMethod()), entry); 945 } 946 } 947 948 /** 949 * Gets the raw stream of the archive entry (compressed form). 950 * 951 * <p> 952 * This method does not relate to how/if we understand the payload in the stream, since we really only intend to move it on to somewhere else. 953 * </p> 954 * 955 * <p> 956 * Since version 1.22, this method will make an attempt to read the entry's data stream offset, even if the {@code ignoreLocalFileHeader} parameter was 957 * {@code true} in the constructor. An IOException can also be thrown from the body of the method if this lookup fails for some reason. 958 * </p> 959 * 960 * @param entry The entry to get the stream for 961 * @return The raw input stream containing (possibly) compressed data. 962 * @since 1.11 963 * @throws IOException if there is a problem reading data offset (added in version 1.22). 964 */ 965 public InputStream getRawInputStream(final ZipArchiveEntry entry) throws IOException { 966 if (!(entry instanceof Entry)) { 967 return null; 968 } 969 final long start = getDataOffset(entry); 970 if (start == EntryStreamOffsets.OFFSET_UNKNOWN) { 971 return null; 972 } 973 return createBoundedInputStream(start, entry.getCompressedSize()); 974 } 975 976 /** 977 * Gets the entry's content as a String if isUnixSymlink() returns true for it, otherwise returns null. 978 * <p> 979 * This method assumes the symbolic link's file name uses the same encoding that as been specified for this ZipFile. 980 * </p> 981 * 982 * @param entry ZipArchiveEntry object that represents the symbolic link 983 * @return entry's content as a String 984 * @throws IOException problem with content's input stream 985 * @since 1.5 986 */ 987 public String getUnixSymlink(final ZipArchiveEntry entry) throws IOException { 988 if (entry != null && entry.isUnixSymlink()) { 989 try (InputStream in = getInputStream(entry)) { 990 return zipEncoding.decode(IOUtils.toByteArray(in)); 991 } 992 } 993 return null; 994 } 995 996 /** 997 * Reads the central directory of the given archive and populates the internal tables with ZipArchiveEntry instances. 998 * 999 * <p> 1000 * The ZipArchiveEntrys will know all data that can be obtained from the central directory alone, but not the data that requires the local file header or 1001 * additional data to be read. 1002 * </p> 1003 * 1004 * @return a map of zip entries that didn't have the language encoding flag set when read. 1005 */ 1006 private Map<ZipArchiveEntry, NameAndComment> populateFromCentralDirectory() throws IOException { 1007 final HashMap<ZipArchiveEntry, NameAndComment> noUTF8Flag = new HashMap<>(); 1008 1009 positionAtCentralDirectory(); 1010 centralDirectoryStartOffset = archive.position(); 1011 1012 wordBbuf.rewind(); 1013 IOUtils.readFully(archive, wordBbuf); 1014 long sig = ZipLong.getValue(wordBuf); 1015 1016 if (sig != CFH_SIG && startsWithLocalFileHeader()) { 1017 throw new IOException("Central directory is empty, can't expand" + " corrupt archive."); 1018 } 1019 1020 while (sig == CFH_SIG) { 1021 readCentralDirectoryEntry(noUTF8Flag); 1022 wordBbuf.rewind(); 1023 IOUtils.readFully(archive, wordBbuf); 1024 sig = ZipLong.getValue(wordBuf); 1025 } 1026 return noUTF8Flag; 1027 } 1028 1029 /** 1030 * Searches for either the "Zip64 end of central directory locator" or the "End of central dir record", parses it and positions the 1031 * stream at the first central directory record. 1032 */ 1033 private void positionAtCentralDirectory() throws IOException { 1034 positionAtEndOfCentralDirectoryRecord(); 1035 boolean found = false; 1036 final boolean searchedForZip64EOCD = archive.position() > ZIP64_EOCDL_LENGTH; 1037 if (searchedForZip64EOCD) { 1038 archive.position(archive.position() - ZIP64_EOCDL_LENGTH); 1039 wordBbuf.rewind(); 1040 IOUtils.readFully(archive, wordBbuf); 1041 found = Arrays.equals(ZipArchiveOutputStream.ZIP64_EOCD_LOC_SIG, wordBuf); 1042 } 1043 if (!found) { 1044 // not a ZIP64 archive 1045 if (searchedForZip64EOCD) { 1046 skipBytes(ZIP64_EOCDL_LENGTH - ZipConstants.WORD); 1047 } 1048 positionAtCentralDirectory32(); 1049 } else { 1050 positionAtCentralDirectory64(); 1051 } 1052 } 1053 1054 /** 1055 * Parses the "End of central dir record" and positions the stream at the first central directory record. 1056 * 1057 * Expects stream to be positioned at the beginning of the "End of central dir record". 1058 */ 1059 private void positionAtCentralDirectory32() throws IOException { 1060 final long endOfCentralDirectoryRecordOffset = archive.position(); 1061 if (isSplitZipArchive) { 1062 skipBytes(CFD_DISK_OFFSET); 1063 shortBbuf.rewind(); 1064 IOUtils.readFully(archive, shortBbuf); 1065 centralDirectoryStartDiskNumber = ZipShort.getValue(shortBuf); 1066 1067 skipBytes(CFD_LOCATOR_RELATIVE_OFFSET); 1068 1069 wordBbuf.rewind(); 1070 IOUtils.readFully(archive, wordBbuf); 1071 centralDirectoryStartRelativeOffset = ZipLong.getValue(wordBuf); 1072 ((ZipSplitReadOnlySeekableByteChannel) archive).position(centralDirectoryStartDiskNumber, centralDirectoryStartRelativeOffset); 1073 } else { 1074 skipBytes(CFD_LENGTH_OFFSET); 1075 wordBbuf.rewind(); 1076 IOUtils.readFully(archive, wordBbuf); 1077 final long centralDirectoryLength = ZipLong.getValue(wordBuf); 1078 1079 wordBbuf.rewind(); 1080 IOUtils.readFully(archive, wordBbuf); 1081 centralDirectoryStartDiskNumber = 0; 1082 centralDirectoryStartRelativeOffset = ZipLong.getValue(wordBuf); 1083 1084 firstLocalFileHeaderOffset = Long.max(endOfCentralDirectoryRecordOffset - centralDirectoryLength - centralDirectoryStartRelativeOffset, 0L); 1085 archive.position(centralDirectoryStartRelativeOffset + firstLocalFileHeaderOffset); 1086 } 1087 } 1088 1089 /** 1090 * Parses the "Zip64 end of central directory locator", finds the "Zip64 end of central directory record" using the parsed information, 1091 * parses that and positions the stream at the first central directory record. 1092 * 1093 * Expects stream to be positioned right behind the "Zip64 end of central directory locator"'s signature. 1094 */ 1095 private void positionAtCentralDirectory64() throws IOException { 1096 if (isSplitZipArchive) { 1097 wordBbuf.rewind(); 1098 IOUtils.readFully(archive, wordBbuf); 1099 final long diskNumberOfEOCD = ZipLong.getValue(wordBuf); 1100 1101 dwordBbuf.rewind(); 1102 IOUtils.readFully(archive, dwordBbuf); 1103 final long relativeOffsetOfEOCD = ZipEightByteInteger.getLongValue(dwordBuf); 1104 ((ZipSplitReadOnlySeekableByteChannel) archive).position(diskNumberOfEOCD, relativeOffsetOfEOCD); 1105 } else { 1106 skipBytes(ZIP64_EOCDL_LOCATOR_OFFSET - ZipConstants.WORD /* signature has already been read */); 1107 dwordBbuf.rewind(); 1108 IOUtils.readFully(archive, dwordBbuf); 1109 archive.position(ZipEightByteInteger.getLongValue(dwordBuf)); 1110 } 1111 1112 wordBbuf.rewind(); 1113 IOUtils.readFully(archive, wordBbuf); 1114 if (!Arrays.equals(wordBuf, ZipArchiveOutputStream.ZIP64_EOCD_SIG)) { 1115 throw new ZipException("Archive's ZIP64 end of central directory locator is corrupt."); 1116 } 1117 1118 if (isSplitZipArchive) { 1119 skipBytes(ZIP64_EOCD_CFD_DISK_OFFSET - ZipConstants.WORD /* signature has already been read */); 1120 wordBbuf.rewind(); 1121 IOUtils.readFully(archive, wordBbuf); 1122 centralDirectoryStartDiskNumber = ZipLong.getValue(wordBuf); 1123 1124 skipBytes(ZIP64_EOCD_CFD_LOCATOR_RELATIVE_OFFSET); 1125 1126 dwordBbuf.rewind(); 1127 IOUtils.readFully(archive, dwordBbuf); 1128 centralDirectoryStartRelativeOffset = ZipEightByteInteger.getLongValue(dwordBuf); 1129 ((ZipSplitReadOnlySeekableByteChannel) archive).position(centralDirectoryStartDiskNumber, centralDirectoryStartRelativeOffset); 1130 } else { 1131 skipBytes(ZIP64_EOCD_CFD_LOCATOR_OFFSET - ZipConstants.WORD /* signature has already been read */); 1132 dwordBbuf.rewind(); 1133 IOUtils.readFully(archive, dwordBbuf); 1134 centralDirectoryStartDiskNumber = 0; 1135 centralDirectoryStartRelativeOffset = ZipEightByteInteger.getLongValue(dwordBuf); 1136 archive.position(centralDirectoryStartRelativeOffset); 1137 } 1138 } 1139 1140 /** 1141 * Searches for the and positions the stream at the start of the "End of central dir record". 1142 */ 1143 private void positionAtEndOfCentralDirectoryRecord() throws IOException { 1144 final boolean found = tryToLocateSignature(MIN_EOCD_SIZE, MAX_EOCD_SIZE, ZipArchiveOutputStream.EOCD_SIG); 1145 if (!found) { 1146 throw new ZipException("Archive is not a ZIP archive"); 1147 } 1148 } 1149 1150 /** 1151 * Reads an individual entry of the central directory, creates an ZipArchiveEntry from it and adds it to the global maps. 1152 * 1153 * @param noUTF8Flag map used to collect entries that don't have their UTF-8 flag set and whose name will be set by data read from the local file header 1154 * later. The current entry may be added to this map. 1155 */ 1156 private void readCentralDirectoryEntry(final Map<ZipArchiveEntry, NameAndComment> noUTF8Flag) throws IOException { 1157 cfhBbuf.rewind(); 1158 IOUtils.readFully(archive, cfhBbuf); 1159 int off = 0; 1160 final Entry ze = new Entry(); 1161 1162 final int versionMadeBy = ZipShort.getValue(cfhBuf, off); 1163 off += ZipConstants.SHORT; 1164 ze.setVersionMadeBy(versionMadeBy); 1165 ze.setPlatform(versionMadeBy >> BYTE_SHIFT & NIBLET_MASK); 1166 1167 ze.setVersionRequired(ZipShort.getValue(cfhBuf, off)); 1168 off += ZipConstants.SHORT; // version required 1169 1170 final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(cfhBuf, off); 1171 final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames(); 1172 final ZipEncoding entryEncoding = hasUTF8Flag ? ZipEncodingHelper.ZIP_ENCODING_UTF_8 : zipEncoding; 1173 if (hasUTF8Flag) { 1174 ze.setNameSource(ZipArchiveEntry.NameSource.NAME_WITH_EFS_FLAG); 1175 } 1176 ze.setGeneralPurposeBit(gpFlag); 1177 ze.setRawFlag(ZipShort.getValue(cfhBuf, off)); 1178 1179 off += ZipConstants.SHORT; 1180 1181 // noinspection MagicConstant 1182 ze.setMethod(ZipShort.getValue(cfhBuf, off)); 1183 off += ZipConstants.SHORT; 1184 1185 final long time = ZipUtil.dosToJavaTime(ZipLong.getValue(cfhBuf, off)); 1186 ze.setTime(time); 1187 off += ZipConstants.WORD; 1188 1189 ze.setCrc(ZipLong.getValue(cfhBuf, off)); 1190 off += ZipConstants.WORD; 1191 1192 long size = ZipLong.getValue(cfhBuf, off); 1193 if (size < 0) { 1194 throw new IOException("broken archive, entry with negative compressed size"); 1195 } 1196 ze.setCompressedSize(size); 1197 off += ZipConstants.WORD; 1198 1199 size = ZipLong.getValue(cfhBuf, off); 1200 if (size < 0) { 1201 throw new IOException("broken archive, entry with negative size"); 1202 } 1203 ze.setSize(size); 1204 off += ZipConstants.WORD; 1205 1206 final int fileNameLen = ZipShort.getValue(cfhBuf, off); 1207 off += ZipConstants.SHORT; 1208 if (fileNameLen < 0) { 1209 throw new IOException("broken archive, entry with negative fileNameLen"); 1210 } 1211 1212 final int extraLen = ZipShort.getValue(cfhBuf, off); 1213 off += ZipConstants.SHORT; 1214 if (extraLen < 0) { 1215 throw new IOException("broken archive, entry with negative extraLen"); 1216 } 1217 1218 final int commentLen = ZipShort.getValue(cfhBuf, off); 1219 off += ZipConstants.SHORT; 1220 if (commentLen < 0) { 1221 throw new IOException("broken archive, entry with negative commentLen"); 1222 } 1223 1224 ze.setDiskNumberStart(ZipShort.getValue(cfhBuf, off)); 1225 off += ZipConstants.SHORT; 1226 1227 ze.setInternalAttributes(ZipShort.getValue(cfhBuf, off)); 1228 off += ZipConstants.SHORT; 1229 1230 ze.setExternalAttributes(ZipLong.getValue(cfhBuf, off)); 1231 off += ZipConstants.WORD; 1232 1233 final byte[] fileName = IOUtils.readRange(archive, fileNameLen); 1234 if (fileName.length < fileNameLen) { 1235 throw new EOFException(); 1236 } 1237 ze.setName(entryEncoding.decode(fileName), fileName); 1238 1239 // LFH offset, 1240 ze.setLocalHeaderOffset(ZipLong.getValue(cfhBuf, off) + firstLocalFileHeaderOffset); 1241 // data offset will be filled later 1242 entries.add(ze); 1243 1244 final byte[] cdExtraData = IOUtils.readRange(archive, extraLen); 1245 if (cdExtraData.length < extraLen) { 1246 throw new EOFException(); 1247 } 1248 try { 1249 ze.setCentralDirectoryExtra(cdExtraData); 1250 } catch (final RuntimeException ex) { 1251 final ZipException z = new ZipException("Invalid extra data in entry " + ze.getName()); 1252 z.initCause(ex); 1253 throw z; 1254 } 1255 1256 setSizesAndOffsetFromZip64Extra(ze); 1257 sanityCheckLFHOffset(ze); 1258 1259 final byte[] comment = IOUtils.readRange(archive, commentLen); 1260 if (comment.length < commentLen) { 1261 throw new EOFException(); 1262 } 1263 ze.setComment(entryEncoding.decode(comment)); 1264 1265 if (!hasUTF8Flag && useUnicodeExtraFields) { 1266 noUTF8Flag.put(ze, new NameAndComment(fileName, comment)); 1267 } 1268 1269 ze.setStreamContiguous(true); 1270 } 1271 1272 /** 1273 * Walks through all recorded entries and adds the data available from the local file header. 1274 * 1275 * <p> 1276 * Also records the offsets for the data to read from the entries. 1277 * </p> 1278 */ 1279 private void resolveLocalFileHeaderData(final Map<ZipArchiveEntry, NameAndComment> entriesWithoutUTF8Flag) throws IOException { 1280 for (final ZipArchiveEntry zipArchiveEntry : entries) { 1281 // entries are filled in populateFromCentralDirectory and never modified 1282 final Entry ze = (Entry) zipArchiveEntry; 1283 final int[] lens = setDataOffset(ze); 1284 final int fileNameLen = lens[0]; 1285 final int extraFieldLen = lens[1]; 1286 skipBytes(fileNameLen); 1287 final byte[] localExtraData = IOUtils.readRange(archive, extraFieldLen); 1288 if (localExtraData.length < extraFieldLen) { 1289 throw new EOFException(); 1290 } 1291 try { 1292 ze.setExtra(localExtraData); 1293 } catch (final RuntimeException ex) { 1294 final ZipException z = new ZipException("Invalid extra data in entry " + ze.getName()); 1295 z.initCause(ex); 1296 throw z; 1297 } 1298 1299 if (entriesWithoutUTF8Flag.containsKey(ze)) { 1300 final NameAndComment nc = entriesWithoutUTF8Flag.get(ze); 1301 ZipUtil.setNameAndCommentFromExtraFields(ze, nc.name, nc.comment); 1302 } 1303 } 1304 } 1305 1306 private void sanityCheckLFHOffset(final ZipArchiveEntry entry) throws IOException { 1307 if (entry.getDiskNumberStart() < 0) { 1308 throw new IOException("broken archive, entry with negative disk number"); 1309 } 1310 if (entry.getLocalHeaderOffset() < 0) { 1311 throw new IOException("broken archive, entry with negative local file header offset"); 1312 } 1313 if (isSplitZipArchive) { 1314 if (entry.getDiskNumberStart() > centralDirectoryStartDiskNumber) { 1315 throw new IOException("local file header for " + entry.getName() + " starts on a later disk than central directory"); 1316 } 1317 if (entry.getDiskNumberStart() == centralDirectoryStartDiskNumber && entry.getLocalHeaderOffset() > centralDirectoryStartRelativeOffset) { 1318 throw new IOException("local file header for " + entry.getName() + " starts after central directory"); 1319 } 1320 } else if (entry.getLocalHeaderOffset() > centralDirectoryStartOffset) { 1321 throw new IOException("local file header for " + entry.getName() + " starts after central directory"); 1322 } 1323 } 1324 1325 private int[] setDataOffset(final ZipArchiveEntry entry) throws IOException { 1326 long offset = entry.getLocalHeaderOffset(); 1327 if (isSplitZipArchive) { 1328 ((ZipSplitReadOnlySeekableByteChannel) archive).position(entry.getDiskNumberStart(), offset + LFH_OFFSET_FOR_FILENAME_LENGTH); 1329 // the offset should be updated to the global offset 1330 offset = archive.position() - LFH_OFFSET_FOR_FILENAME_LENGTH; 1331 } else { 1332 archive.position(offset + LFH_OFFSET_FOR_FILENAME_LENGTH); 1333 } 1334 wordBbuf.rewind(); 1335 IOUtils.readFully(archive, wordBbuf); 1336 wordBbuf.flip(); 1337 wordBbuf.get(shortBuf); 1338 final int fileNameLen = ZipShort.getValue(shortBuf); 1339 wordBbuf.get(shortBuf); 1340 final int extraFieldLen = ZipShort.getValue(shortBuf); 1341 entry.setDataOffset(offset + LFH_OFFSET_FOR_FILENAME_LENGTH + ZipConstants.SHORT + ZipConstants.SHORT + fileNameLen + extraFieldLen); 1342 if (entry.getDataOffset() + entry.getCompressedSize() > centralDirectoryStartOffset) { 1343 throw new IOException("data for " + entry.getName() + " overlaps with central directory."); 1344 } 1345 return new int[] { fileNameLen, extraFieldLen }; 1346 } 1347 1348 /** 1349 * If the entry holds a Zip64 extended information extra field, read sizes from there if the entry's sizes are set to 0xFFFFFFFFF, do the same for the 1350 * offset of the local file header. 1351 * 1352 * <p> 1353 * Ensures the Zip64 extra either knows both compressed and uncompressed size or neither of both as the internal logic in ExtraFieldUtils forces the field 1354 * to create local header data even if they are never used - and here a field with only one size would be invalid. 1355 * </p> 1356 */ 1357 private void setSizesAndOffsetFromZip64Extra(final ZipArchiveEntry entry) throws IOException { 1358 final ZipExtraField extra = entry.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID); 1359 if (extra != null && !(extra instanceof Zip64ExtendedInformationExtraField)) { 1360 throw new ZipException("archive contains unparseable zip64 extra field"); 1361 } 1362 final Zip64ExtendedInformationExtraField z64 = (Zip64ExtendedInformationExtraField) extra; 1363 if (z64 != null) { 1364 final boolean hasUncompressedSize = entry.getSize() == ZipConstants.ZIP64_MAGIC; 1365 final boolean hasCompressedSize = entry.getCompressedSize() == ZipConstants.ZIP64_MAGIC; 1366 final boolean hasRelativeHeaderOffset = entry.getLocalHeaderOffset() == ZipConstants.ZIP64_MAGIC; 1367 final boolean hasDiskStart = entry.getDiskNumberStart() == ZipConstants.ZIP64_MAGIC_SHORT; 1368 z64.reparseCentralDirectoryData(hasUncompressedSize, hasCompressedSize, hasRelativeHeaderOffset, hasDiskStart); 1369 1370 if (hasUncompressedSize) { 1371 final long size = z64.getSize().getLongValue(); 1372 if (size < 0) { 1373 throw new IOException("broken archive, entry with negative size"); 1374 } 1375 entry.setSize(size); 1376 } else if (hasCompressedSize) { 1377 z64.setSize(new ZipEightByteInteger(entry.getSize())); 1378 } 1379 1380 if (hasCompressedSize) { 1381 final long size = z64.getCompressedSize().getLongValue(); 1382 if (size < 0) { 1383 throw new IOException("broken archive, entry with negative compressed size"); 1384 } 1385 entry.setCompressedSize(size); 1386 } else if (hasUncompressedSize) { 1387 z64.setCompressedSize(new ZipEightByteInteger(entry.getCompressedSize())); 1388 } 1389 1390 if (hasRelativeHeaderOffset) { 1391 entry.setLocalHeaderOffset(z64.getRelativeHeaderOffset().getLongValue()); 1392 } 1393 1394 if (hasDiskStart) { 1395 entry.setDiskNumberStart(z64.getDiskStartNumber().getValue()); 1396 } 1397 } 1398 } 1399 1400 /** 1401 * Skips the given number of bytes or throws an EOFException if skipping failed. 1402 */ 1403 private void skipBytes(final int count) throws IOException { 1404 final long currentPosition = archive.position(); 1405 final long newPosition = currentPosition + count; 1406 if (newPosition > archive.size()) { 1407 throw new EOFException(); 1408 } 1409 archive.position(newPosition); 1410 } 1411 1412 /** 1413 * Sorts entries in place by offset. 1414 * 1415 * @param allEntries entries to sort 1416 * @return the given entries, sorted. 1417 */ 1418 private ZipArchiveEntry[] sortByOffset(final ZipArchiveEntry[] allEntries) { 1419 Arrays.sort(allEntries, offsetComparator); 1420 return allEntries; 1421 } 1422 1423 /** 1424 * Checks whether the archive starts with an LFH. If it doesn't, it may be an empty archive. 1425 */ 1426 private boolean startsWithLocalFileHeader() throws IOException { 1427 archive.position(firstLocalFileHeaderOffset); 1428 wordBbuf.rewind(); 1429 IOUtils.readFully(archive, wordBbuf); 1430 return Arrays.equals(wordBuf, ZipArchiveOutputStream.LFH_SIG); 1431 } 1432 1433 /** 1434 * Searches the archive backwards from minDistance to maxDistance for the given signature, positions the RandomaccessFile right at the signature if it has 1435 * been found. 1436 */ 1437 private boolean tryToLocateSignature(final long minDistanceFromEnd, final long maxDistanceFromEnd, final byte[] sig) throws IOException { 1438 boolean found = false; 1439 long off = archive.size() - minDistanceFromEnd; 1440 final long stopSearching = Math.max(0L, archive.size() - maxDistanceFromEnd); 1441 if (off >= 0) { 1442 for (; off >= stopSearching; off--) { 1443 archive.position(off); 1444 try { 1445 wordBbuf.rewind(); 1446 IOUtils.readFully(archive, wordBbuf); 1447 wordBbuf.flip(); 1448 } catch (final EOFException ex) { // NOSONAR 1449 break; 1450 } 1451 int curr = wordBbuf.get(); 1452 if (curr == sig[POS_0]) { 1453 curr = wordBbuf.get(); 1454 if (curr == sig[POS_1]) { 1455 curr = wordBbuf.get(); 1456 if (curr == sig[POS_2]) { 1457 curr = wordBbuf.get(); 1458 if (curr == sig[POS_3]) { 1459 found = true; 1460 break; 1461 } 1462 } 1463 } 1464 } 1465 } 1466 } 1467 if (found) { 1468 archive.position(off); 1469 } 1470 return found; 1471 } 1472}