001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.compressors.lz4; 020 021import java.io.ByteArrayOutputStream; 022import java.io.IOException; 023import java.io.OutputStream; 024 025import org.apache.commons.compress.compressors.CompressorOutputStream; 026import org.apache.commons.compress.utils.ByteUtils; 027 028/** 029 * CompressorOutputStream for the LZ4 frame format. 030 * 031 * <p>Based on the "spec" in the version "1.5.1 (31/03/2015)"</p> 032 * 033 * @see <a href="http://lz4.github.io/lz4/lz4_Frame_format.html">LZ4 Frame Format Description</a> 034 * @since 1.14 035 * @NotThreadSafe 036 */ 037public class FramedLZ4CompressorOutputStream extends CompressorOutputStream { 038 039 /** 040 * The block sizes supported by the format. 041 */ 042 public enum BlockSize { 043 /** Block size of 64K */ 044 K64(64 * 1024, 4), 045 /** Block size of 256K */ 046 K256(256 * 1024, 5), 047 /** Block size of 1M */ 048 M1(1024 * 1024, 6), 049 /** Block size of 4M */ 050 M4(4096 * 1024, 7); 051 052 private final int size, index; 053 BlockSize(final int size, final int index) { 054 this.size = size; 055 this.index = index; 056 } 057 int getIndex() { 058 return index; 059 } 060 int getSize() { 061 return size; 062 } 063 } 064 065 /** 066 * Parameters of the LZ4 frame format. 067 */ 068 public static class Parameters { 069 /** 070 * The default parameters of 4M block size, enabled content 071 * checksum, disabled block checksums and independent blocks. 072 * 073 * <p>This matches the defaults of the lz4 command line utility.</p> 074 */ 075 public static final Parameters DEFAULT = new Parameters(BlockSize.M4, true, false, false); 076 private final BlockSize blockSize; 077 private final boolean withContentChecksum, withBlockChecksum, withBlockDependency; 078 079 private final org.apache.commons.compress.compressors.lz77support.Parameters lz77params; 080 081 /** 082 * Sets up custom a custom block size for the LZ4 stream but 083 * otherwise uses the defaults of enabled content checksum, 084 * disabled block checksums and independent blocks. 085 * @param blockSize the size of a single block. 086 */ 087 public Parameters(final BlockSize blockSize) { 088 this(blockSize, true, false, false); 089 } 090 /** 091 * Sets up custom parameters for the LZ4 stream. 092 * @param blockSize the size of a single block. 093 * @param withContentChecksum whether to write a content checksum 094 * @param withBlockChecksum whether to write a block checksum. 095 * Note that block checksums are not supported by the lz4 096 * command line utility 097 * @param withBlockDependency whether a block may depend on 098 * the content of a previous block. Enabling this may improve 099 * compression ratio but makes it impossible to decompress the 100 * output in parallel. 101 */ 102 public Parameters(final BlockSize blockSize, final boolean withContentChecksum, final boolean withBlockChecksum, 103 final boolean withBlockDependency) { 104 this(blockSize, withContentChecksum, withBlockChecksum, withBlockDependency, 105 BlockLZ4CompressorOutputStream.createParameterBuilder().build()); 106 } 107 /** 108 * Sets up custom parameters for the LZ4 stream. 109 * @param blockSize the size of a single block. 110 * @param withContentChecksum whether to write a content checksum 111 * @param withBlockChecksum whether to write a block checksum. 112 * Note that block checksums are not supported by the lz4 113 * command line utility 114 * @param withBlockDependency whether a block may depend on 115 * the content of a previous block. Enabling this may improve 116 * compression ratio but makes it impossible to decompress the 117 * output in parallel. 118 * @param lz77params parameters used to fine-tune compression, 119 * in particular to balance compression ratio vs compression 120 * speed. 121 */ 122 public Parameters(final BlockSize blockSize, final boolean withContentChecksum, final boolean withBlockChecksum, 123 final boolean withBlockDependency, 124 final org.apache.commons.compress.compressors.lz77support.Parameters lz77params) { 125 this.blockSize = blockSize; 126 this.withContentChecksum = withContentChecksum; 127 this.withBlockChecksum = withBlockChecksum; 128 this.withBlockDependency = withBlockDependency; 129 this.lz77params = lz77params; 130 } 131 132 /** 133 * Sets up custom a custom block size for the LZ4 stream but 134 * otherwise uses the defaults of enabled content checksum, 135 * disabled block checksums and independent blocks. 136 * @param blockSize the size of a single block. 137 * @param lz77params parameters used to fine-tune compression, 138 * in particular to balance compression ratio vs compression 139 * speed. 140 */ 141 public Parameters(final BlockSize blockSize, 142 final org.apache.commons.compress.compressors.lz77support.Parameters lz77params) { 143 this(blockSize, true, false, false, lz77params); 144 } 145 146 @Override 147 public String toString() { 148 return "LZ4 Parameters with BlockSize " + blockSize + ", withContentChecksum " + withContentChecksum 149 + ", withBlockChecksum " + withBlockChecksum + ", withBlockDependency " + withBlockDependency; 150 } 151 } 152 153 private static final byte[] END_MARK = new byte[4]; 154 // used in one-arg write method 155 private final byte[] oneByte = new byte[1]; 156 private final byte[] blockData; 157 private final OutputStream out; 158 private final Parameters params; 159 160 private boolean finished; 161 162 // used for frame header checksum and content checksum, if requested 163 private final XXHash32 contentHash = new XXHash32(); 164 // used for block checksum, if requested 165 private final XXHash32 blockHash; 166 167 // only created if the config requires block dependency 168 private final byte[] blockDependencyBuffer; 169 170 private int collectedBlockDependencyBytes; 171 private int currentIndex; 172 173 /** 174 * Constructs a new output stream that compresses data using the 175 * LZ4 frame format using the default block size of 4MB. 176 * @param out the OutputStream to which to write the compressed data 177 * @throws IOException if writing the signature fails 178 */ 179 public FramedLZ4CompressorOutputStream(final OutputStream out) throws IOException { 180 this(out, Parameters.DEFAULT); 181 } 182 183 /** 184 * Constructs a new output stream that compresses data using the 185 * LZ4 frame format using the given block size. 186 * @param out the OutputStream to which to write the compressed data 187 * @param params the parameters to use 188 * @throws IOException if writing the signature fails 189 */ 190 public FramedLZ4CompressorOutputStream(final OutputStream out, final Parameters params) throws IOException { 191 this.params = params; 192 blockData = new byte[params.blockSize.getSize()]; 193 this.out = out; 194 blockHash = params.withBlockChecksum ? new XXHash32() : null; 195 out.write(FramedLZ4CompressorInputStream.LZ4_SIGNATURE); 196 writeFrameDescriptor(); 197 blockDependencyBuffer = params.withBlockDependency 198 ? new byte[BlockLZ4CompressorInputStream.WINDOW_SIZE] 199 : null; 200 } 201 202 private void appendToBlockDependencyBuffer(final byte[] b, final int off, int len) { 203 len = Math.min(len, blockDependencyBuffer.length); 204 if (len > 0) { 205 final int keep = blockDependencyBuffer.length - len; 206 if (keep > 0) { 207 // move last keep bytes towards the start of the buffer 208 System.arraycopy(blockDependencyBuffer, len, blockDependencyBuffer, 0, keep); 209 } 210 // append new data 211 System.arraycopy(b, off, blockDependencyBuffer, keep, len); 212 collectedBlockDependencyBytes = Math.min(collectedBlockDependencyBytes + len, 213 blockDependencyBuffer.length); 214 } 215 } 216 217 @Override 218 public void close() throws IOException { 219 try { 220 finish(); 221 } finally { 222 out.close(); 223 } 224 } 225 226 /** 227 * Compresses all blockDataRemaining data and writes it to the stream, 228 doesn't close the underlying stream. 229 * @throws IOException if an error occurs 230 */ 231 public void finish() throws IOException { 232 if (!finished) { 233 flushBlock(); 234 writeTrailer(); 235 finished = true; 236 } 237 } 238 239 private void flushBlock() throws IOException { 240 if (currentIndex == 0) { 241 return; 242 } 243 final boolean withBlockDependency = params.withBlockDependency; 244 final ByteArrayOutputStream baos = new ByteArrayOutputStream(); 245 try (BlockLZ4CompressorOutputStream o = new BlockLZ4CompressorOutputStream(baos, params.lz77params)) { 246 if (withBlockDependency) { 247 o.prefill(blockDependencyBuffer, blockDependencyBuffer.length - collectedBlockDependencyBytes, 248 collectedBlockDependencyBytes); 249 } 250 o.write(blockData, 0, currentIndex); 251 } 252 if (withBlockDependency) { 253 appendToBlockDependencyBuffer(blockData, 0, currentIndex); 254 } 255 final byte[] b = baos.toByteArray(); 256 if (b.length > currentIndex) { // compression increased size, maybe beyond blocksize 257 ByteUtils.toLittleEndian(out, currentIndex | FramedLZ4CompressorInputStream.UNCOMPRESSED_FLAG_MASK, 258 4); 259 out.write(blockData, 0, currentIndex); 260 if (params.withBlockChecksum) { 261 blockHash.update(blockData, 0, currentIndex); 262 } 263 } else { 264 ByteUtils.toLittleEndian(out, b.length, 4); 265 out.write(b); 266 if (params.withBlockChecksum) { 267 blockHash.update(b, 0, b.length); 268 } 269 } 270 if (params.withBlockChecksum) { 271 ByteUtils.toLittleEndian(out, blockHash.getValue(), 4); 272 blockHash.reset(); 273 } 274 currentIndex = 0; 275 } 276 277 @Override 278 public void write(final byte[] data, int off, int len) throws IOException { 279 if (params.withContentChecksum) { 280 contentHash.update(data, off, len); 281 } 282 int blockDataRemaining = blockData.length - currentIndex; 283 while (len > 0) { 284 int copyLen = Math.min(len, blockDataRemaining); 285 System.arraycopy(data, off, blockData, currentIndex, copyLen); 286 off += copyLen; 287 blockDataRemaining -= copyLen; 288 len -= copyLen; 289 currentIndex += copyLen; 290 if (blockDataRemaining == 0) { 291 flushBlock(); 292 blockDataRemaining = blockData.length; 293 } 294 } 295 } 296 297 @Override 298 public void write(final int b) throws IOException { 299 oneByte[0] = (byte) (b & 0xff); 300 write(oneByte); 301 } 302 303 private void writeFrameDescriptor() throws IOException { 304 int flags = FramedLZ4CompressorInputStream.SUPPORTED_VERSION; 305 if (!params.withBlockDependency) { 306 flags |= FramedLZ4CompressorInputStream.BLOCK_INDEPENDENCE_MASK; 307 } 308 if (params.withContentChecksum) { 309 flags |= FramedLZ4CompressorInputStream.CONTENT_CHECKSUM_MASK; 310 } 311 if (params.withBlockChecksum) { 312 flags |= FramedLZ4CompressorInputStream.BLOCK_CHECKSUM_MASK; 313 } 314 out.write(flags); 315 contentHash.update(flags); 316 final int bd = params.blockSize.getIndex() << 4 & FramedLZ4CompressorInputStream.BLOCK_MAX_SIZE_MASK; 317 out.write(bd); 318 contentHash.update(bd); 319 out.write((int) (contentHash.getValue() >> 8 & 0xff)); 320 contentHash.reset(); 321 } 322 323 private void writeTrailer() throws IOException { 324 out.write(END_MARK); 325 if (params.withContentChecksum) { 326 ByteUtils.toLittleEndian(out, contentHash.getValue(), 4); 327 } 328 } 329 330} 331