001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.archivers.ar; 020 021import java.io.EOFException; 022import java.io.IOException; 023import java.io.InputStream; 024import java.util.Arrays; 025import java.util.regex.Pattern; 026 027import org.apache.commons.compress.archivers.ArchiveInputStream; 028import org.apache.commons.compress.utils.ArchiveUtils; 029import org.apache.commons.compress.utils.IOUtils; 030 031/** 032 * Implements the "ar" archive format as an input stream. 033 * 034 * @NotThreadSafe 035 */ 036public class ArArchiveInputStream extends ArchiveInputStream<ArArchiveEntry> { 037 038 // offsets and length of meta data parts 039 private static final int NAME_OFFSET = 0; 040 private static final int NAME_LEN = 16; 041 private static final int LAST_MODIFIED_OFFSET = NAME_LEN; 042 043 private static final int LAST_MODIFIED_LEN = 12; 044 045 private static final int USER_ID_OFFSET = LAST_MODIFIED_OFFSET + LAST_MODIFIED_LEN; 046 047 private static final int USER_ID_LEN = 6; 048 049 private static final int GROUP_ID_OFFSET = USER_ID_OFFSET + USER_ID_LEN; 050 private static final int GROUP_ID_LEN = 6; 051 private static final int FILE_MODE_OFFSET = GROUP_ID_OFFSET + GROUP_ID_LEN; 052 private static final int FILE_MODE_LEN = 8; 053 private static final int LENGTH_OFFSET = FILE_MODE_OFFSET + FILE_MODE_LEN; 054 private static final int LENGTH_LEN = 10; 055 static final String BSD_LONGNAME_PREFIX = "#1/"; 056 private static final int BSD_LONGNAME_PREFIX_LEN = 057 BSD_LONGNAME_PREFIX.length(); 058 private static final Pattern BSD_LONGNAME_PATTERN = Pattern.compile("^" + BSD_LONGNAME_PREFIX + "\\d+"); 059 private static final String GNU_STRING_TABLE_NAME = "//"; 060 private static final Pattern GNU_LONGNAME_PATTERN = Pattern.compile("^/\\d+"); 061 /** 062 * Does the name look like it is a long name (or a name containing 063 * spaces) as encoded by BSD ar? 064 * 065 * <p>From the FreeBSD ar(5) man page:</p> 066 * <pre> 067 * BSD In the BSD variant, names that are shorter than 16 068 * characters and without embedded spaces are stored 069 * directly in this field. If a name has an embedded 070 * space, or if it is longer than 16 characters, then 071 * the string "#1/" followed by the decimal represen- 072 * tation of the length of the file name is placed in 073 * this field. The actual file name is stored immedi- 074 * ately after the archive header. The content of the 075 * archive member follows the file name. The ar_size 076 * field of the header (see below) will then hold the 077 * sum of the size of the file name and the size of 078 * the member. 079 * </pre> 080 * 081 * @since 1.3 082 */ 083 private static boolean isBSDLongName(final String name) { 084 return name != null && BSD_LONGNAME_PATTERN.matcher(name).matches(); 085 } 086 087 /** 088 * Is this the name of the "Archive String Table" as used by 089 * SVR4/GNU to store long file names? 090 * 091 * <p>GNU ar stores multiple extended file names in the data section 092 * of a file with the name "//", this record is referred to by 093 * future headers.</p> 094 * 095 * <p>A header references an extended file name by storing a "/" 096 * followed by a decimal offset to the start of the file name in 097 * the extended file name data section.</p> 098 * 099 * <p>The format of the "//" file itself is simply a list of the 100 * long file names, each separated by one or more LF 101 * characters. Note that the decimal offsets are number of 102 * characters, not line or string number within the "//" file.</p> 103 */ 104 private static boolean isGNUStringTable(final String name) { 105 return GNU_STRING_TABLE_NAME.equals(name); 106 } 107 108 /** 109 * Checks if the signature matches ASCII "!<arch>" followed by a single LF 110 * control character 111 * 112 * @param signature 113 * the bytes to check 114 * @param length 115 * the number of bytes to check 116 * @return true, if this stream is an Ar archive stream, false otherwise 117 */ 118 public static boolean matches(final byte[] signature, final int length) { 119 // 3c21 7261 6863 0a3e 120 121 return length >= 8 && signature[0] == 0x21 && 122 signature[1] == 0x3c && signature[2] == 0x61 && 123 signature[3] == 0x72 && signature[4] == 0x63 && 124 signature[5] == 0x68 && signature[6] == 0x3e && 125 signature[7] == 0x0a; 126 } 127 128 private final InputStream input; 129 130 private long offset; 131 132 private boolean closed; 133 134 /* 135 * If getNextEntry has been called, the entry metadata is stored in 136 * currentEntry. 137 */ 138 private ArArchiveEntry currentEntry; 139 140 // Storage area for extra long names (GNU ar) 141 private byte[] namebuffer; 142 143 /* 144 * The offset where the current entry started. -1 if no entry has been 145 * called 146 */ 147 private long entryOffset = -1; 148 149 // cached buffer for meta data - must only be used locally in the class (COMPRESS-172 - reduce garbage collection) 150 private final byte[] metaData = 151 new byte[NAME_LEN + LAST_MODIFIED_LEN + USER_ID_LEN + GROUP_ID_LEN + FILE_MODE_LEN + LENGTH_LEN]; 152 153 /** 154 * Constructs an Ar input stream with the referenced stream 155 * 156 * @param inputStream 157 * the ar input stream 158 */ 159 public ArArchiveInputStream(final InputStream inputStream) { 160 this.input = inputStream; 161 } 162 163 private int asInt(final byte[] byteArray, final int offset, final int len) { 164 return asInt(byteArray, offset, len, 10, false); 165 } 166 167 private int asInt(final byte[] byteArray, final int offset, final int len, final boolean treatBlankAsZero) { 168 return asInt(byteArray, offset, len, 10, treatBlankAsZero); 169 } 170 171 private int asInt(final byte[] byteArray, final int offset, final int len, final int base) { 172 return asInt(byteArray, offset, len, base, false); 173 } 174 175 private int asInt(final byte[] byteArray, final int offset, final int len, final int base, final boolean treatBlankAsZero) { 176 final String string = ArchiveUtils.toAsciiString(byteArray, offset, len).trim(); 177 if (string.isEmpty() && treatBlankAsZero) { 178 return 0; 179 } 180 return Integer.parseInt(string, base); 181 } 182 private long asLong(final byte[] byteArray, final int offset, final int len) { 183 return Long.parseLong(ArchiveUtils.toAsciiString(byteArray, offset, len).trim()); 184 } 185 /* 186 * (non-Javadoc) 187 * 188 * @see java.io.InputStream#close() 189 */ 190 @Override 191 public void close() throws IOException { 192 if (!closed) { 193 closed = true; 194 input.close(); 195 } 196 currentEntry = null; 197 } 198 199 /** 200 * Reads the real name from the current stream assuming the very 201 * first bytes to be read are the real file name. 202 * 203 * @see #isBSDLongName 204 * 205 * @since 1.3 206 */ 207 private String getBSDLongName(final String bsdLongName) throws IOException { 208 final int nameLen = 209 Integer.parseInt(bsdLongName.substring(BSD_LONGNAME_PREFIX_LEN)); 210 final byte[] name = IOUtils.readRange(input, nameLen); 211 final int read = name.length; 212 trackReadBytes(read); 213 if (read != nameLen) { 214 throw new EOFException(); 215 } 216 return ArchiveUtils.toAsciiString(name); 217 } 218 219 /** 220 * Gets an extended name from the GNU extended name buffer. 221 * 222 * @param offset pointer to entry within the buffer 223 * @return the extended file name; without trailing "/" if present. 224 * @throws IOException if name not found or buffer not set up 225 */ 226 private String getExtendedName(final int offset) throws IOException { 227 if (namebuffer == null) { 228 throw new IOException("Cannot process GNU long file name as no // record was found"); 229 } 230 for (int i = offset; i < namebuffer.length; i++) { 231 if (namebuffer[i] == '\012' || namebuffer[i] == 0) { 232 if (namebuffer[i - 1] == '/') { 233 i--; // drop trailing / 234 } 235 return ArchiveUtils.toAsciiString(namebuffer, offset, i - offset); 236 } 237 } 238 throw new IOException("Failed to read entry: " + offset); 239 } 240 241 /** 242 * Returns the next AR entry in this stream. 243 * 244 * @return the next AR entry. 245 * @throws IOException 246 * if the entry could not be read 247 * @deprecated Use {@link #getNextEntry()}. 248 */ 249 @Deprecated 250 public ArArchiveEntry getNextArEntry() throws IOException { 251 if (currentEntry != null) { 252 final long entryEnd = entryOffset + currentEntry.getLength(); 253 final long skipped = IOUtils.skip(input, entryEnd - offset); 254 trackReadBytes(skipped); 255 currentEntry = null; 256 } 257 258 if (offset == 0) { 259 final byte[] expected = ArchiveUtils.toAsciiBytes(ArArchiveEntry.HEADER); 260 final byte[] realized = IOUtils.readRange(input, expected.length); 261 final int read = realized.length; 262 trackReadBytes(read); 263 if (read != expected.length) { 264 throw new IOException("Failed to read header. Occurred at byte: " + getBytesRead()); 265 } 266 if (!Arrays.equals(expected, realized)) { 267 throw new IOException("Invalid header " + ArchiveUtils.toAsciiString(realized)); 268 } 269 } 270 271 if (offset % 2 != 0) { 272 if (input.read() < 0) { 273 // hit eof 274 return null; 275 } 276 trackReadBytes(1); 277 } 278 279 { 280 final int read = IOUtils.readFully(input, metaData); 281 trackReadBytes(read); 282 if (read == 0) { 283 return null; 284 } 285 if (read < metaData.length) { 286 throw new IOException("Truncated ar archive"); 287 } 288 } 289 290 { 291 final byte[] expected = ArchiveUtils.toAsciiBytes(ArArchiveEntry.TRAILER); 292 final byte[] realized = IOUtils.readRange(input, expected.length); 293 final int read = realized.length; 294 trackReadBytes(read); 295 if (read != expected.length) { 296 throw new IOException("Failed to read entry trailer. Occurred at byte: " + getBytesRead()); 297 } 298 if (!Arrays.equals(expected, realized)) { 299 throw new IOException("Invalid entry trailer. not read the content? Occurred at byte: " + getBytesRead()); 300 } 301 } 302 303 entryOffset = offset; 304 305// GNU ar uses a '/' to mark the end of the file name; this allows for the use of spaces without the use of an extended file name. 306 307 // entry name is stored as ASCII string 308 String temp = ArchiveUtils.toAsciiString(metaData, NAME_OFFSET, NAME_LEN).trim(); 309 if (isGNUStringTable(temp)) { // GNU extended file names entry 310 currentEntry = readGNUStringTable(metaData, LENGTH_OFFSET, LENGTH_LEN); 311 return getNextArEntry(); 312 } 313 314 long len = asLong(metaData, LENGTH_OFFSET, LENGTH_LEN); 315 if (temp.endsWith("/")) { // GNU terminator 316 temp = temp.substring(0, temp.length() - 1); 317 } else if (isGNULongName(temp)) { 318 final int off = Integer.parseInt(temp.substring(1));// get the offset 319 temp = getExtendedName(off); // convert to the long name 320 } else if (isBSDLongName(temp)) { 321 temp = getBSDLongName(temp); 322 // entry length contained the length of the file name in 323 // addition to the real length of the entry. 324 // assume file name was ASCII, there is no "standard" otherwise 325 final int nameLen = temp.length(); 326 len -= nameLen; 327 entryOffset += nameLen; 328 } 329 330 if (len < 0) { 331 throw new IOException("broken archive, entry with negative size"); 332 } 333 334 currentEntry = new ArArchiveEntry(temp, len, 335 asInt(metaData, USER_ID_OFFSET, USER_ID_LEN, true), 336 asInt(metaData, GROUP_ID_OFFSET, GROUP_ID_LEN, true), 337 asInt(metaData, FILE_MODE_OFFSET, FILE_MODE_LEN, 8), 338 asLong(metaData, LAST_MODIFIED_OFFSET, LAST_MODIFIED_LEN)); 339 return currentEntry; 340 } 341 342 /* 343 * (non-Javadoc) 344 * 345 * @see 346 * org.apache.commons.compress.archivers.ArchiveInputStream#getNextEntry() 347 */ 348 @Override 349 public ArArchiveEntry getNextEntry() throws IOException { 350 return getNextArEntry(); 351 } 352 353 /** 354 * Does the name look like it is a long name (or a name containing 355 * spaces) as encoded by SVR4/GNU ar? 356 * 357 * @see #isGNUStringTable 358 */ 359 private boolean isGNULongName(final String name) { 360 return name != null && GNU_LONGNAME_PATTERN.matcher(name).matches(); 361 } 362 363 /* 364 * (non-Javadoc) 365 * 366 * @see java.io.InputStream#read(byte[], int, int) 367 */ 368 @Override 369 public int read(final byte[] b, final int off, final int len) throws IOException { 370 if (len == 0) { 371 return 0; 372 } 373 if (currentEntry == null) { 374 throw new IllegalStateException("No current ar entry"); 375 } 376 final long entryEnd = entryOffset + currentEntry.getLength(); 377 if (len < 0 || offset >= entryEnd) { 378 return -1; 379 } 380 final int toRead = (int) Math.min(len, entryEnd - offset); 381 final int ret = this.input.read(b, off, toRead); 382 trackReadBytes(ret); 383 return ret; 384 } 385 386 /** 387 * Reads the GNU archive String Table. 388 * 389 * @see #isGNUStringTable 390 */ 391 private ArArchiveEntry readGNUStringTable(final byte[] length, final int offset, final int len) throws IOException { 392 final int bufflen = asInt(length, offset, len); // Assume length will fit in an int 393 namebuffer = IOUtils.readRange(input, bufflen); 394 final int read = namebuffer.length; 395 trackReadBytes(read); 396 if (read != bufflen){ 397 throw new IOException("Failed to read complete // record: expected=" 398 + bufflen + " read=" + read); 399 } 400 return new ArArchiveEntry(GNU_STRING_TABLE_NAME, bufflen); 401 } 402 403 private void trackReadBytes(final long read) { 404 count(read); 405 if (read > 0) { 406 offset += read; 407 } 408 } 409}