001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.compressors.z; 020 021import java.io.IOException; 022import java.io.InputStream; 023import java.nio.ByteOrder; 024 025import org.apache.commons.compress.compressors.lzw.LZWInputStream; 026 027/** 028 * Input stream that decompresses .Z files. 029 * @NotThreadSafe 030 * @since 1.7 031 */ 032public class ZCompressorInputStream extends LZWInputStream { 033 private static final int MAGIC_1 = 0x1f; 034 private static final int MAGIC_2 = 0x9d; 035 private static final int BLOCK_MODE_MASK = 0x80; 036 private static final int MAX_CODE_SIZE_MASK = 0x1f; 037 /** 038 * Checks if the signature matches what is expected for a Unix compress file. 039 * 040 * @param signature 041 * the bytes to check 042 * @param length 043 * the number of bytes to check 044 * @return true, if this stream is a Unix compress compressed 045 * stream, false otherwise 046 * 047 * @since 1.9 048 */ 049 public static boolean matches(final byte[] signature, final int length) { 050 return length > 3 && signature[0] == MAGIC_1 && signature[1] == (byte) MAGIC_2; 051 } 052 private final boolean blockMode; 053 private final int maxCodeSize; 054 055 private long totalCodesRead; 056 057 public ZCompressorInputStream(final InputStream inputStream) throws IOException { 058 this(inputStream, -1); 059 } 060 061 public ZCompressorInputStream(final InputStream inputStream, final int memoryLimitInKb) 062 throws IOException { 063 super(inputStream, ByteOrder.LITTLE_ENDIAN); 064 final int firstByte = (int) in.readBits(8); 065 final int secondByte = (int) in.readBits(8); 066 final int thirdByte = (int) in.readBits(8); 067 if (firstByte != MAGIC_1 || secondByte != MAGIC_2 || thirdByte < 0) { 068 throw new IOException("Input is not in .Z format"); 069 } 070 blockMode = (thirdByte & BLOCK_MODE_MASK) != 0; 071 maxCodeSize = thirdByte & MAX_CODE_SIZE_MASK; 072 if (blockMode) { 073 setClearCode(DEFAULT_CODE_SIZE); 074 } 075 initializeTables(maxCodeSize, memoryLimitInKb); 076 clearEntries(); 077 } 078 079 /** 080 * {@inheritDoc} 081 * <p><strong>This method is only protected for technical reasons 082 * and is not part of Commons Compress' published API. It may 083 * change or disappear without warning.</strong></p> 084 */ 085 @Override 086 protected int addEntry(final int previousCode, final byte character) throws IOException { 087 final int maxTableSize = 1 << getCodeSize(); 088 final int r = addEntry(previousCode, character, maxTableSize); 089 if (getTableSize() == maxTableSize && getCodeSize() < maxCodeSize) { 090 reAlignReading(); 091 incrementCodeSize(); 092 } 093 return r; 094 } 095 096 private void clearEntries() { 097 setTableSize((1 << 8) + (blockMode ? 1 : 0)); 098 } 099 100 /** 101 * {@inheritDoc} 102 * <p><strong>This method is only protected for technical reasons 103 * and is not part of Commons Compress' published API. It may 104 * change or disappear without warning.</strong></p> 105 */ 106 @Override 107 protected int decompressNextSymbol() throws IOException { 108 // 109 // table entry table entry 110 // _____________ _____ 111 // table entry / \ / \ 112 // ____________/ \ \ 113 // / / \ / \ \ 114 // +---+---+---+---+---+---+---+---+---+---+ 115 // | . | . | . | . | . | . | . | . | . | . | 116 // +---+---+---+---+---+---+---+---+---+---+ 117 // |<--------->|<------------->|<----->|<->| 118 // symbol symbol symbol symbol 119 // 120 final int code = readNextCode(); 121 if (code < 0) { 122 return -1; 123 } 124 if (blockMode && code == getClearCode()) { 125 clearEntries(); 126 reAlignReading(); 127 resetCodeSize(); 128 resetPreviousCode(); 129 return 0; 130 } 131 boolean addedUnfinishedEntry = false; 132 if (code == getTableSize()) { 133 addRepeatOfPreviousCode(); 134 addedUnfinishedEntry = true; 135 } else if (code > getTableSize()) { 136 throw new IOException(String.format("Invalid %d bit code 0x%x", getCodeSize(), code)); 137 } 138 return expandCodeToOutputStack(code, addedUnfinishedEntry); 139 } 140 141 /** 142 * {@inheritDoc} 143 * <p><strong>This method is only protected for technical reasons 144 * and is not part of Commons Compress' published API. It may 145 * change or disappear without warning.</strong></p> 146 */ 147 @Override 148 protected int readNextCode() throws IOException { 149 final int code = super.readNextCode(); 150 if (code >= 0) { 151 ++totalCodesRead; 152 } 153 return code; 154 } 155 156 private void reAlignReading() throws IOException { 157 // "compress" works in multiples of 8 symbols, each codeBits bits long. 158 // When codeBits changes, the remaining unused symbols in the current 159 // group of 8 are still written out, in the old codeSize, 160 // as garbage values (usually zeroes) that need to be skipped. 161 long codeReadsToThrowAway = 8 - totalCodesRead % 8; 162 if (codeReadsToThrowAway == 8) { 163 codeReadsToThrowAway = 0; 164 } 165 for (long i = 0; i < codeReadsToThrowAway; i++) { 166 readNextCode(); 167 } 168 in.clearBitCache(); 169 } 170 171}