001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.archivers.dump; 020 021import java.io.EOFException; 022import java.io.IOException; 023import java.io.InputStream; 024import java.util.Arrays; 025import java.util.HashMap; 026import java.util.Map; 027import java.util.PriorityQueue; 028import java.util.Queue; 029import java.util.Stack; 030 031import org.apache.commons.compress.archivers.ArchiveException; 032import org.apache.commons.compress.archivers.ArchiveInputStream; 033import org.apache.commons.compress.archivers.zip.ZipEncoding; 034import org.apache.commons.compress.archivers.zip.ZipEncodingHelper; 035import org.apache.commons.compress.utils.IOUtils; 036 037/** 038 * The DumpArchiveInputStream reads a UNIX dump archive as an InputStream. 039 * Methods are provided to position at each successive entry in 040 * the archive, and the read each entry as a normal input stream 041 * using read(). 042 * 043 * There doesn't seem to exist a hint on the encoding of string values 044 * in any piece documentation. Given the main purpose of dump/restore 045 * is backing up a system it seems very likely the format uses the 046 * current default encoding of the system. 047 * 048 * @NotThreadSafe 049 */ 050public class DumpArchiveInputStream extends ArchiveInputStream<DumpArchiveEntry> { 051 /** 052 * Look at the first few bytes of the file to decide if it's a dump 053 * archive. With 32 bytes we can look at the magic value, with a full 054 * 1k we can verify the checksum. 055 * @param buffer data to match 056 * @param length length of data 057 * @return whether the buffer seems to contain dump data 058 */ 059 public static boolean matches(final byte[] buffer, final int length) { 060 // do we have enough of the header? 061 if (length < 32) { 062 return false; 063 } 064 065 // this is the best test 066 if (length >= DumpArchiveConstants.TP_SIZE) { 067 return DumpArchiveUtil.verify(buffer); 068 } 069 070 // this will work in a pinch. 071 return DumpArchiveConstants.NFS_MAGIC == DumpArchiveUtil.convert32(buffer, 072 24); 073 } 074 private final DumpArchiveSummary summary; 075 private DumpArchiveEntry active; 076 private boolean isClosed; 077 private boolean hasHitEOF; 078 private long entrySize; 079 private long entryOffset; 080 private int readIdx; 081 private final byte[] readBuf = new byte[DumpArchiveConstants.TP_SIZE]; 082 private byte[] blockBuffer; 083 private int recordOffset; 084 private long filepos; 085 086 protected TapeInputStream raw; 087 088 // map of ino -> dirent entry. We can use this to reconstruct full paths. 089 private final Map<Integer, Dirent> names = new HashMap<>(); 090 091 // map of ino -> (directory) entry when we're missing one or more elements in the path. 092 private final Map<Integer, DumpArchiveEntry> pending = new HashMap<>(); 093 094 // queue of (directory) entries where we now have the full path. 095 private final Queue<DumpArchiveEntry> queue; 096 097 /** 098 * The encoding to use for file names and labels. 099 */ 100 private final ZipEncoding zipEncoding; 101 102 // the provided encoding (for unit tests) 103 final String encoding; 104 105 /** 106 * Constructor using the platform's default encoding for file 107 * names. 108 * 109 * @param is stream to read from 110 * @throws ArchiveException on error 111 */ 112 public DumpArchiveInputStream(final InputStream is) throws ArchiveException { 113 this(is, null); 114 } 115 116 /** 117 * Constructor. 118 * 119 * @param is stream to read from 120 * @param encoding the encoding to use for file names, use null 121 * for the platform's default encoding 122 * @since 1.6 123 * @throws ArchiveException on error 124 */ 125 public DumpArchiveInputStream(final InputStream is, final String encoding) 126 throws ArchiveException { 127 this.raw = new TapeInputStream(is); 128 this.hasHitEOF = false; 129 this.encoding = encoding; 130 this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding); 131 132 try { 133 // read header, verify it's a dump archive. 134 final byte[] headerBytes = raw.readRecord(); 135 136 if (!DumpArchiveUtil.verify(headerBytes)) { 137 throw new UnrecognizedFormatException(); 138 } 139 140 // get summary information 141 summary = new DumpArchiveSummary(headerBytes, this.zipEncoding); 142 143 // reset buffer with actual block size. 144 raw.resetBlockSize(summary.getNTRec(), summary.isCompressed()); 145 146 // allocate our read buffer. 147 blockBuffer = new byte[4 * DumpArchiveConstants.TP_SIZE]; 148 149 // skip past CLRI and BITS segments since we don't handle them yet. 150 readCLRI(); 151 readBITS(); 152 } catch (final IOException ex) { 153 throw new ArchiveException(ex.getMessage(), ex); 154 } 155 156 // put in a dummy record for the root node. 157 final Dirent root = new Dirent(2, 2, 4, "."); 158 names.put(2, root); 159 160 // use priority based on queue to ensure parent directories are 161 // released first. 162 queue = new PriorityQueue<>(10, 163 (p, q) -> { 164 if (p.getOriginalName() == null || q.getOriginalName() == null) { 165 return Integer.MAX_VALUE; 166 } 167 168 return p.getOriginalName().compareTo(q.getOriginalName()); 169 }); 170 } 171 172 /** 173 * Closes the stream for this entry. 174 */ 175 @Override 176 public void close() throws IOException { 177 if (!isClosed) { 178 isClosed = true; 179 raw.close(); 180 } 181 } 182 183 @Override 184 public long getBytesRead() { 185 return raw.getBytesRead(); 186 } 187 188 @Deprecated 189 @Override 190 public int getCount() { 191 return (int) getBytesRead(); 192 } 193 194 /** 195 * Reads the next entry. 196 * 197 * @return the next entry 198 * @throws IOException on error 199 * @deprecated Use {@link #getNextEntry()}. 200 */ 201 @Deprecated 202 public DumpArchiveEntry getNextDumpEntry() throws IOException { 203 return getNextEntry(); 204 } 205 206 @Override 207 public DumpArchiveEntry getNextEntry() throws IOException { 208 DumpArchiveEntry entry = null; 209 String path = null; 210 211 // is there anything in the queue? 212 if (!queue.isEmpty()) { 213 return queue.remove(); 214 } 215 216 while (entry == null) { 217 if (hasHitEOF) { 218 return null; 219 } 220 221 // skip any remaining records in this segment for prior file. 222 // we might still have holes... easiest to do it 223 // block by block. We may want to revisit this if 224 // the unnecessary decompression time adds up. 225 while (readIdx < active.getHeaderCount()) { 226 if (!active.isSparseRecord(readIdx++) 227 && raw.skip(DumpArchiveConstants.TP_SIZE) == -1) { 228 throw new EOFException(); 229 } 230 } 231 232 readIdx = 0; 233 filepos = raw.getBytesRead(); 234 235 byte[] headerBytes = raw.readRecord(); 236 237 if (!DumpArchiveUtil.verify(headerBytes)) { 238 throw new InvalidFormatException(); 239 } 240 241 active = DumpArchiveEntry.parse(headerBytes); 242 243 // skip any remaining segments for prior file. 244 while (DumpArchiveConstants.SEGMENT_TYPE.ADDR == active.getHeaderType()) { 245 if (raw.skip((long) DumpArchiveConstants.TP_SIZE 246 * (active.getHeaderCount() 247 - active.getHeaderHoles())) == -1) { 248 throw new EOFException(); 249 } 250 251 filepos = raw.getBytesRead(); 252 headerBytes = raw.readRecord(); 253 254 if (!DumpArchiveUtil.verify(headerBytes)) { 255 throw new InvalidFormatException(); 256 } 257 258 active = DumpArchiveEntry.parse(headerBytes); 259 } 260 261 // check if this is an end-of-volume marker. 262 if (DumpArchiveConstants.SEGMENT_TYPE.END == active.getHeaderType()) { 263 hasHitEOF = true; 264 265 return null; 266 } 267 268 entry = active; 269 270 if (entry.isDirectory()) { 271 readDirectoryEntry(active); 272 273 // now we create an empty InputStream. 274 entryOffset = 0; 275 entrySize = 0; 276 readIdx = active.getHeaderCount(); 277 } else { 278 entryOffset = 0; 279 entrySize = active.getEntrySize(); 280 readIdx = 0; 281 } 282 283 recordOffset = readBuf.length; 284 285 path = getPath(entry); 286 287 if (path == null) { 288 entry = null; 289 } 290 } 291 292 entry.setName(path); 293 entry.setSimpleName(names.get(entry.getIno()).getName()); 294 entry.setOffset(filepos); 295 296 return entry; 297 } 298 299 /** 300 * Gets full path for specified archive entry, or null if there's a gap. 301 * 302 * @param entry 303 * @return full path for specified archive entry, or null if there's a gap. 304 */ 305 private String getPath(final DumpArchiveEntry entry) { 306 // build the stack of elements. It's possible that we're 307 // still missing an intermediate value and if so we 308 final Stack<String> elements = new Stack<>(); 309 Dirent dirent = null; 310 311 for (int i = entry.getIno();; i = dirent.getParentIno()) { 312 if (!names.containsKey(i)) { 313 elements.clear(); 314 break; 315 } 316 317 dirent = names.get(i); 318 elements.push(dirent.getName()); 319 320 if (dirent.getIno() == dirent.getParentIno()) { 321 break; 322 } 323 } 324 325 // if an element is missing defer the work and read next entry. 326 if (elements.isEmpty()) { 327 pending.put(entry.getIno(), entry); 328 329 return null; 330 } 331 332 // generate full path from stack of elements. 333 final StringBuilder sb = new StringBuilder(elements.pop()); 334 335 while (!elements.isEmpty()) { 336 sb.append('/'); 337 sb.append(elements.pop()); 338 } 339 340 return sb.toString(); 341 } 342 343 /** 344 * Return the archive summary information. 345 * @return the summary 346 */ 347 public DumpArchiveSummary getSummary() { 348 return summary; 349 } 350 351 /** 352 * Reads bytes from the current dump archive entry. 353 * 354 * This method is aware of the boundaries of the current 355 * entry in the archive and will deal with them as if they 356 * were this stream's start and EOF. 357 * 358 * @param buf The buffer into which to place bytes read. 359 * @param off The offset at which to place bytes read. 360 * @param len The number of bytes to read. 361 * @return The number of bytes read, or -1 at EOF. 362 * @throws IOException on error 363 */ 364 @Override 365 public int read(final byte[] buf, int off, int len) throws IOException { 366 if (len == 0) { 367 return 0; 368 } 369 int totalRead = 0; 370 371 if (hasHitEOF || isClosed || entryOffset >= entrySize) { 372 return -1; 373 } 374 375 if (active == null) { 376 throw new IllegalStateException("No current dump entry"); 377 } 378 379 if (len + entryOffset > entrySize) { 380 len = (int) (entrySize - entryOffset); 381 } 382 383 while (len > 0) { 384 final int sz = Math.min(len, readBuf.length - recordOffset); 385 386 // copy any data we have 387 if (recordOffset + sz <= readBuf.length) { 388 System.arraycopy(readBuf, recordOffset, buf, off, sz); 389 totalRead += sz; 390 recordOffset += sz; 391 len -= sz; 392 off += sz; 393 } 394 395 // load next block if necessary. 396 if (len > 0) { 397 if (readIdx >= 512) { 398 final byte[] headerBytes = raw.readRecord(); 399 400 if (!DumpArchiveUtil.verify(headerBytes)) { 401 throw new InvalidFormatException(); 402 } 403 404 active = DumpArchiveEntry.parse(headerBytes); 405 readIdx = 0; 406 } 407 408 if (!active.isSparseRecord(readIdx++)) { 409 final int r = raw.read(readBuf, 0, readBuf.length); 410 if (r != readBuf.length) { 411 throw new EOFException(); 412 } 413 } else { 414 Arrays.fill(readBuf, (byte) 0); 415 } 416 417 recordOffset = 0; 418 } 419 } 420 421 entryOffset += totalRead; 422 423 return totalRead; 424 } 425 426 /** 427 * Read BITS segment. 428 */ 429 private void readBITS() throws IOException { 430 final byte[] buffer = raw.readRecord(); 431 432 if (!DumpArchiveUtil.verify(buffer)) { 433 throw new InvalidFormatException(); 434 } 435 436 active = DumpArchiveEntry.parse(buffer); 437 438 if (DumpArchiveConstants.SEGMENT_TYPE.BITS != active.getHeaderType()) { 439 throw new InvalidFormatException(); 440 } 441 442 // we don't do anything with this yet. 443 if (raw.skip((long) DumpArchiveConstants.TP_SIZE * active.getHeaderCount()) 444 == -1) { 445 throw new EOFException(); 446 } 447 readIdx = active.getHeaderCount(); 448 } 449 450 /** 451 * Read CLRI (deleted inode) segment. 452 */ 453 private void readCLRI() throws IOException { 454 final byte[] buffer = raw.readRecord(); 455 456 if (!DumpArchiveUtil.verify(buffer)) { 457 throw new InvalidFormatException(); 458 } 459 460 active = DumpArchiveEntry.parse(buffer); 461 462 if (DumpArchiveConstants.SEGMENT_TYPE.CLRI != active.getHeaderType()) { 463 throw new InvalidFormatException(); 464 } 465 466 // we don't do anything with this yet. 467 if (raw.skip((long) DumpArchiveConstants.TP_SIZE * active.getHeaderCount()) 468 == -1) { 469 throw new EOFException(); 470 } 471 readIdx = active.getHeaderCount(); 472 } 473 474 /** 475 * Read directory entry. 476 */ 477 private void readDirectoryEntry(DumpArchiveEntry entry) 478 throws IOException { 479 long size = entry.getEntrySize(); 480 boolean first = true; 481 482 while (first || 483 DumpArchiveConstants.SEGMENT_TYPE.ADDR == entry.getHeaderType()) { 484 // read the header that we just peeked at. 485 if (!first) { 486 raw.readRecord(); 487 } 488 489 if (!names.containsKey(entry.getIno()) && 490 DumpArchiveConstants.SEGMENT_TYPE.INODE == entry.getHeaderType()) { 491 pending.put(entry.getIno(), entry); 492 } 493 494 final int datalen = DumpArchiveConstants.TP_SIZE * entry.getHeaderCount(); 495 496 if (blockBuffer.length < datalen) { 497 blockBuffer = IOUtils.readRange(raw, datalen); 498 if (blockBuffer.length != datalen) { 499 throw new EOFException(); 500 } 501 } else if (raw.read(blockBuffer, 0, datalen) != datalen) { 502 throw new EOFException(); 503 } 504 505 int reclen = 0; 506 507 for (int i = 0; i < datalen - 8 && i < size - 8; 508 i += reclen) { 509 final int ino = DumpArchiveUtil.convert32(blockBuffer, i); 510 reclen = DumpArchiveUtil.convert16(blockBuffer, i + 4); 511 512 final byte type = blockBuffer[i + 6]; 513 514 final String name = DumpArchiveUtil.decode(zipEncoding, blockBuffer, i + 8, blockBuffer[i + 7]); 515 516 if (".".equals(name) || "..".equals(name)) { 517 // do nothing... 518 continue; 519 } 520 521 final Dirent d = new Dirent(ino, entry.getIno(), type, name); 522 523 /* 524 if ((type == 4) && names.containsKey(ino)) { 525 System.out.println("we already have ino: " + 526 names.get(ino)); 527 } 528 */ 529 530 names.put(ino, d); 531 532 // check whether this allows us to fill anything in the pending list. 533 pending.forEach((k, v) -> { 534 final String path = getPath(v); 535 536 if (path != null) { 537 v.setName(path); 538 v.setSimpleName(names.get(k).getName()); 539 queue.add(v); 540 } 541 }); 542 543 // remove anything that we found. (We can't do it earlier 544 // because of concurrent modification exceptions.) 545 queue.forEach(e -> pending.remove(e.getIno())); 546 } 547 548 final byte[] peekBytes = raw.peek(); 549 550 if (!DumpArchiveUtil.verify(peekBytes)) { 551 throw new InvalidFormatException(); 552 } 553 554 entry = DumpArchiveEntry.parse(peekBytes); 555 first = false; 556 size -= DumpArchiveConstants.TP_SIZE; 557 } 558 } 559 560}