001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.compressors.lz4;
020
021import java.io.IOException;
022import java.io.InputStream;
023import java.util.Arrays;
024
025import org.apache.commons.compress.compressors.CompressorInputStream;
026import org.apache.commons.compress.utils.BoundedInputStream;
027import org.apache.commons.compress.utils.ByteUtils;
028import org.apache.commons.compress.utils.ChecksumCalculatingInputStream;
029import org.apache.commons.compress.utils.CountingInputStream;
030import org.apache.commons.compress.utils.IOUtils;
031import org.apache.commons.compress.utils.InputStreamStatistics;
032
033/**
034 * CompressorInputStream for the LZ4 frame format.
035 *
036 * <p>Based on the "spec" in the version "1.5.1 (31/03/2015)"</p>
037 *
038 * @see <a href="http://lz4.github.io/lz4/lz4_Frame_format.html">LZ4 Frame Format Description</a>
039 * @since 1.14
040 * @NotThreadSafe
041 */
042public class FramedLZ4CompressorInputStream extends CompressorInputStream
043    implements InputStreamStatistics {
044
045    // used by FramedLZ4CompressorOutputStream as well
046    static final byte[] LZ4_SIGNATURE = { //NOSONAR
047        4, 0x22, 0x4d, 0x18
048    };
049    private static final byte[] SKIPPABLE_FRAME_TRAILER = {
050        0x2a, 0x4d, 0x18
051    };
052    private static final byte SKIPPABLE_FRAME_PREFIX_BYTE_MASK = 0x50;
053
054    static final int VERSION_MASK = 0xC0;
055    static final int SUPPORTED_VERSION = 0x40;
056    static final int BLOCK_INDEPENDENCE_MASK = 0x20;
057    static final int BLOCK_CHECKSUM_MASK = 0x10;
058    static final int CONTENT_SIZE_MASK = 0x08;
059    static final int CONTENT_CHECKSUM_MASK = 0x04;
060    static final int BLOCK_MAX_SIZE_MASK = 0x70;
061    static final int UNCOMPRESSED_FLAG_MASK = 0x80000000;
062
063    private static boolean isSkippableFrameSignature(final byte[] b) {
064        if ((b[0] & SKIPPABLE_FRAME_PREFIX_BYTE_MASK) != SKIPPABLE_FRAME_PREFIX_BYTE_MASK) {
065            return false;
066        }
067        for (int i = 1; i < 4; i++) {
068            if (b[i] != SKIPPABLE_FRAME_TRAILER[i - 1]) {
069                return false;
070            }
071        }
072        return true;
073    }
074
075    /**
076     * Checks if the signature matches what is expected for a .lz4 file.
077     *
078     * <p>.lz4 files start with a four byte signature.</p>
079     *
080     * @param signature the bytes to check
081     * @param length    the number of bytes to check
082     * @return          true if this is a .sz stream, false otherwise
083     */
084    public static boolean matches(final byte[] signature, final int length) {
085
086        if (length < LZ4_SIGNATURE.length) {
087            return false;
088        }
089
090        byte[] shortenedSig = signature;
091        if (signature.length > LZ4_SIGNATURE.length) {
092            shortenedSig = Arrays.copyOf(signature, LZ4_SIGNATURE.length);
093        }
094
095        return Arrays.equals(shortenedSig, LZ4_SIGNATURE);
096    }
097
098    // used in no-arg read method
099    private final byte[] oneByte = new byte[1];
100    private final ByteUtils.ByteSupplier supplier = this::readOneByte;
101
102    private final CountingInputStream inputStream;
103    private final boolean decompressConcatenated;
104    private boolean expectBlockChecksum;
105    private boolean expectBlockDependency;
106
107    private boolean expectContentSize;
108    private boolean expectContentChecksum;
109
110    private InputStream currentBlock;
111
112    private boolean endReached, inUncompressed;
113
114    // used for frame header checksum and content checksum, if present
115    private final XXHash32 contentHash = new XXHash32();
116
117    // used for block checksum, if present
118    private final XXHash32 blockHash = new XXHash32();
119
120    // only created if the frame doesn't set the block independence flag
121    private byte[] blockDependencyBuffer;
122
123    /**
124     * Creates a new input stream that decompresses streams compressed
125     * using the LZ4 frame format and stops after decompressing the
126     * first frame.
127     * @param in  the InputStream from which to read the compressed data
128     * @throws IOException if reading fails
129     */
130    public FramedLZ4CompressorInputStream(final InputStream in) throws IOException {
131        this(in, false);
132    }
133
134    /**
135     * Creates a new input stream that decompresses streams compressed
136     * using the LZ4 frame format.
137     * @param in  the InputStream from which to read the compressed data
138     * @param decompressConcatenated if true, decompress until the end
139     *          of the input; if false, stop after the first LZ4 frame
140     *          and leave the input position to point to the next byte
141     *          after the frame stream
142     * @throws IOException if reading fails
143     */
144    public FramedLZ4CompressorInputStream(final InputStream in, final boolean decompressConcatenated) throws IOException {
145        this.inputStream = new CountingInputStream(in);
146        this.decompressConcatenated = decompressConcatenated;
147        init(true);
148    }
149
150    private void appendToBlockDependencyBuffer(final byte[] b, final int off, int len) {
151        len = Math.min(len, blockDependencyBuffer.length);
152        if (len > 0) {
153            final int keep = blockDependencyBuffer.length - len;
154            if (keep > 0) {
155                // move last keep bytes towards the start of the buffer
156                System.arraycopy(blockDependencyBuffer, len, blockDependencyBuffer, 0, keep);
157            }
158            // append new data
159            System.arraycopy(b, off, blockDependencyBuffer, keep, len);
160        }
161    }
162
163    /** {@inheritDoc} */
164    @Override
165    public void close() throws IOException {
166        try {
167            if (currentBlock != null) {
168                currentBlock.close();
169                currentBlock = null;
170            }
171        } finally {
172            inputStream.close();
173        }
174    }
175
176    /**
177     * @since 1.17
178     */
179    @Override
180    public long getCompressedCount() {
181        return inputStream.getBytesRead();
182    }
183
184    private void init(final boolean firstFrame) throws IOException {
185        if (readSignature(firstFrame)) {
186            readFrameDescriptor();
187            nextBlock();
188        }
189    }
190
191    private void maybeFinishCurrentBlock() throws IOException {
192        if (currentBlock != null) {
193            currentBlock.close();
194            currentBlock = null;
195            if (expectBlockChecksum) {
196                verifyChecksum(blockHash, "block");
197                blockHash.reset();
198            }
199        }
200    }
201
202    private void nextBlock() throws IOException {
203        maybeFinishCurrentBlock();
204        final long len = ByteUtils.fromLittleEndian(supplier, 4);
205        final boolean uncompressed = (len & UNCOMPRESSED_FLAG_MASK) != 0;
206        final int realLen = (int) (len & ~UNCOMPRESSED_FLAG_MASK);
207        if (realLen == 0) {
208            verifyContentChecksum();
209            if (!decompressConcatenated) {
210                endReached = true;
211            } else {
212                init(false);
213            }
214            return;
215        }
216        InputStream capped = new BoundedInputStream(inputStream, realLen);
217        if (expectBlockChecksum) {
218            capped = new ChecksumCalculatingInputStream(blockHash, capped);
219        }
220        if (uncompressed) {
221            inUncompressed = true;
222            currentBlock = capped;
223        } else {
224            inUncompressed = false;
225            final BlockLZ4CompressorInputStream s = new BlockLZ4CompressorInputStream(capped);
226            if (expectBlockDependency) {
227                s.prefill(blockDependencyBuffer);
228            }
229            currentBlock = s;
230        }
231    }
232
233    /** {@inheritDoc} */
234    @Override
235    public int read() throws IOException {
236        return read(oneByte, 0, 1) == -1 ? -1 : oneByte[0] & 0xFF;
237    }
238
239    /** {@inheritDoc} */
240    @Override
241    public int read(final byte[] b, final int off, final int len) throws IOException {
242        if (len == 0) {
243            return 0;
244        }
245        if (endReached) {
246            return -1;
247        }
248        int r = readOnce(b, off, len);
249        if (r == -1) {
250            nextBlock();
251            if (!endReached) {
252                r = readOnce(b, off, len);
253            }
254        }
255        if (r != -1) {
256            if (expectBlockDependency) {
257                appendToBlockDependencyBuffer(b, off, r);
258            }
259            if (expectContentChecksum) {
260                contentHash.update(b, off, r);
261            }
262        }
263        return r;
264    }
265
266    private void readFrameDescriptor() throws IOException {
267        final int flags = readOneByte();
268        if (flags == -1) {
269            throw new IOException("Premature end of stream while reading frame flags");
270        }
271        contentHash.update(flags);
272        if ((flags & VERSION_MASK) != SUPPORTED_VERSION) {
273            throw new IOException("Unsupported version " + (flags >> 6));
274        }
275        expectBlockDependency = (flags & BLOCK_INDEPENDENCE_MASK) == 0;
276        if (expectBlockDependency) {
277            if (blockDependencyBuffer == null) {
278                blockDependencyBuffer = new byte[BlockLZ4CompressorInputStream.WINDOW_SIZE];
279            }
280        } else {
281            blockDependencyBuffer = null;
282        }
283        expectBlockChecksum = (flags & BLOCK_CHECKSUM_MASK) != 0;
284        expectContentSize = (flags & CONTENT_SIZE_MASK) != 0;
285        expectContentChecksum = (flags & CONTENT_CHECKSUM_MASK) != 0;
286        final int bdByte = readOneByte();
287        if (bdByte == -1) { // max size is irrelevant for this implementation
288            throw new IOException("Premature end of stream while reading frame BD byte");
289        }
290        contentHash.update(bdByte);
291        if (expectContentSize) { // for now, we don't care, contains the uncompressed size
292            final byte[] contentSize = new byte[8];
293            final int skipped = IOUtils.readFully(inputStream, contentSize);
294            count(skipped);
295            if (8 != skipped) {
296                throw new IOException("Premature end of stream while reading content size");
297            }
298            contentHash.update(contentSize, 0, contentSize.length);
299        }
300        final int headerHash = readOneByte();
301        if (headerHash == -1) { // partial hash of header.
302            throw new IOException("Premature end of stream while reading frame header checksum");
303        }
304        final int expectedHash = (int) (contentHash.getValue() >> 8 & 0xff);
305        contentHash.reset();
306        if (headerHash != expectedHash) {
307            throw new IOException("Frame header checksum mismatch");
308        }
309    }
310
311    private int readOnce(final byte[] b, final int off, final int len) throws IOException {
312        if (inUncompressed) {
313            final int cnt = currentBlock.read(b, off, len);
314            count(cnt);
315            return cnt;
316        }
317        final BlockLZ4CompressorInputStream l = (BlockLZ4CompressorInputStream) currentBlock;
318        final long before = l.getBytesRead();
319        final int cnt = currentBlock.read(b, off, len);
320        count(l.getBytesRead() - before);
321        return cnt;
322    }
323
324    private int readOneByte() throws IOException {
325        final int b = inputStream.read();
326        if (b != -1) {
327            count(1);
328            return b & 0xFF;
329        }
330        return -1;
331    }
332
333    private boolean readSignature(final boolean firstFrame) throws IOException {
334        final String garbageMessage = firstFrame ? "Not a LZ4 frame stream" : "LZ4 frame stream followed by garbage";
335        final byte[] b = new byte[4];
336        int read = IOUtils.readFully(inputStream, b);
337        count(read);
338        if (0 == read && !firstFrame) {
339            // good LZ4 frame and nothing after it
340            endReached = true;
341            return false;
342        }
343        if (4 != read) {
344            throw new IOException(garbageMessage);
345        }
346
347        read = skipSkippableFrame(b);
348        if (0 == read && !firstFrame) {
349            // good LZ4 frame with only some skippable frames after it
350            endReached = true;
351            return false;
352        }
353        if (4 != read || !matches(b, 4)) {
354            throw new IOException(garbageMessage);
355        }
356        return true;
357    }
358
359    /**
360     * Skips over the contents of a skippable frame as well as
361     * skippable frames following it.
362     *
363     * <p>It then tries to read four more bytes which are supposed to
364     * hold an LZ4 signature and returns the number of bytes read
365     * while storing the bytes in the given array.</p>
366     */
367    private int skipSkippableFrame(final byte[] b) throws IOException {
368        int read = 4;
369        while (read == 4 && isSkippableFrameSignature(b)) {
370            final long len = ByteUtils.fromLittleEndian(supplier, 4);
371            if (len < 0) {
372                throw new IOException("Found illegal skippable frame with negative size");
373            }
374            final long skipped = IOUtils.skip(inputStream, len);
375            count(skipped);
376            if (len != skipped) {
377                throw new IOException("Premature end of stream while skipping frame");
378            }
379            read = IOUtils.readFully(inputStream, b);
380            count(read);
381        }
382        return read;
383    }
384
385    private void verifyChecksum(final XXHash32 hash, final String kind) throws IOException {
386        final byte[] checksum = new byte[4];
387        final int read = IOUtils.readFully(inputStream, checksum);
388        count(read);
389        if (4 != read) {
390            throw new IOException("Premature end of stream while reading " + kind + " checksum");
391        }
392        final long expectedHash = hash.getValue();
393        if (expectedHash != ByteUtils.fromLittleEndian(checksum)) {
394            throw new IOException(kind + " checksum mismatch.");
395        }
396    }
397
398    private void verifyContentChecksum() throws IOException {
399        if (expectContentChecksum) {
400            verifyChecksum(contentHash, "content");
401        }
402        contentHash.reset();
403    }
404}