001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019 020/* 021 * This package is based on the work done by Keiron Liddle, Aftex Software 022 * <keiron@aftexsw.com> to whom the Ant project is very grateful for his 023 * great code. 024 */ 025package org.apache.commons.compress.compressors.bzip2; 026 027import java.io.IOException; 028import java.io.InputStream; 029import java.nio.ByteOrder; 030import java.util.Arrays; 031 032import org.apache.commons.compress.compressors.CompressorInputStream; 033import org.apache.commons.compress.utils.BitInputStream; 034import org.apache.commons.compress.utils.CloseShieldFilterInputStream; 035import org.apache.commons.compress.utils.InputStreamStatistics; 036 037/** 038 * An input stream that decompresses from the BZip2 format to be read as any other stream. 039 * 040 * @NotThreadSafe 041 */ 042public class BZip2CompressorInputStream extends CompressorInputStream 043 implements BZip2Constants, InputStreamStatistics { 044 045 private static final class Data { 046 047 // (with blockSize 900k) 048 final boolean[] inUse = new boolean[256]; // 256 byte 049 050 final byte[] seqToUnseq = new byte[256]; // 256 byte 051 final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte 052 final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte 053 054 /** 055 * Freq table collected to save a pass over the data during 056 * decompression. 057 */ 058 final int[] unzftab = new int[256]; // 1024 byte 059 060 final int[][] limit = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte 061 final int[][] base = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte 062 final int[][] perm = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte 063 final int[] minLens = new int[N_GROUPS]; // 24 byte 064 065 final int[] cftab = new int[257]; // 1028 byte 066 final char[] getAndMoveToFrontDecode_yy = new char[256]; // 512 byte 067 final char[][] temp_charArray2d = new char[N_GROUPS][MAX_ALPHA_SIZE]; // 3096 068 // byte 069 final byte[] recvDecodingTables_pos = new byte[N_GROUPS]; // 6 byte 070 // --------------- 071 // 60798 byte 072 073 int[] tt; // 3600000 byte 074 final byte[] ll8; // 900000 byte 075 076 // --------------- 077 // 4560782 byte 078 // =============== 079 080 Data(final int blockSize100k) { 081 this.ll8 = new byte[blockSize100k * BZip2Constants.BASEBLOCKSIZE]; 082 } 083 084 /** 085 * Initializes the {@link #tt} array. 086 * 087 * This method is called when the required length of the array is known. 088 * I don't initialize it at construction time to avoid unnecessary 089 * memory allocation when compressing small files. 090 */ 091 int[] initTT(final int length) { 092 int[] ttShadow = this.tt; 093 094 // tt.length should always be >= length, but theoretically 095 // it can happen, if the compressor mixed small and large 096 // blocks. Normally only the last block will be smaller 097 // than others. 098 if (ttShadow == null || ttShadow.length < length) { 099 this.tt = ttShadow = new int[length]; 100 } 101 102 return ttShadow; 103 } 104 105 } 106 107 private static final int EOF = 0; 108 109 private static final int START_BLOCK_STATE = 1; 110 111 private static final int RAND_PART_A_STATE = 2; 112 113 private static final int RAND_PART_B_STATE = 3; 114 115 private static final int RAND_PART_C_STATE = 4; 116 117 private static final int NO_RAND_PART_A_STATE = 5; 118 private static final int NO_RAND_PART_B_STATE = 6; 119 120 private static final int NO_RAND_PART_C_STATE = 7; 121 private static boolean bsGetBit(final BitInputStream bin) throws IOException { 122 return bsR(bin, 1) != 0; 123 } 124 private static int bsGetInt(final BitInputStream bin) throws IOException { 125 return bsR(bin, 32); 126 } 127 private static char bsGetUByte(final BitInputStream bin) throws IOException { 128 return (char) bsR(bin, 8); 129 } 130 /** 131 * read bits from the input stream 132 * @param n the number of bits to read, must not exceed 32? 133 * @return the requested bits combined into an int 134 * @throws IOException 135 */ 136 private static int bsR(final BitInputStream bin, final int n) throws IOException { 137 final long thech = bin.readBits(n); 138 if (thech < 0) { 139 throw new IOException("Unexpected end of stream"); 140 } 141 return (int) thech; 142 } 143 private static void checkBounds(final int checkVal, final int limitExclusive, final String name) 144 throws IOException { 145 if (checkVal < 0) { 146 throw new IOException("Corrupted input, " + name + " value negative"); 147 } 148 if (checkVal >= limitExclusive) { 149 throw new IOException("Corrupted input, " + name + " value too big"); 150 } 151 } 152 /** 153 * Called by createHuffmanDecodingTables() exclusively. 154 */ 155 private static void hbCreateDecodeTables(final int[] limit, 156 final int[] base, final int[] perm, final char[] length, 157 final int minLen, final int maxLen, final int alphaSize) 158 throws IOException { 159 for (int i = minLen, pp = 0; i <= maxLen; i++) { 160 for (int j = 0; j < alphaSize; j++) { 161 if (length[j] == i) { 162 perm[pp++] = j; 163 } 164 } 165 } 166 167 for (int i = MAX_CODE_LEN; --i > 0;) { 168 base[i] = 0; 169 limit[i] = 0; 170 } 171 172 for (int i = 0; i < alphaSize; i++) { 173 final int l = length[i]; 174 checkBounds(l, MAX_ALPHA_SIZE, "length"); 175 base[l + 1]++; 176 } 177 178 for (int i = 1, b = base[0]; i < MAX_CODE_LEN; i++) { 179 b += base[i]; 180 base[i] = b; 181 } 182 183 for (int i = minLen, vec = 0, b = base[i]; i <= maxLen; i++) { 184 final int nb = base[i + 1]; 185 vec += nb - b; 186 b = nb; 187 limit[i] = vec - 1; 188 vec <<= 1; 189 } 190 191 for (int i = minLen + 1; i <= maxLen; i++) { 192 base[i] = (limit[i - 1] + 1 << 1) - base[i]; 193 } 194 } 195 /** 196 * Checks if the signature matches what is expected for a bzip2 file. 197 * 198 * @param signature 199 * the bytes to check 200 * @param length 201 * the number of bytes to check 202 * @return true, if this stream is a bzip2 compressed stream, false otherwise 203 * 204 * @since 1.1 205 */ 206 public static boolean matches(final byte[] signature, final int length) { 207 return length >= 3 && signature[0] == 'B' && 208 signature[1] == 'Z' && signature[2] == 'h'; 209 } 210 211 /** 212 * Index of the last char in the block, so the block size == last + 1. 213 */ 214 private int last; 215 216 /** 217 * Index in zptr[] of original string after sorting. 218 */ 219 private int origPtr; 220 /** 221 * always: in the range 0 .. 9. The current block size is 100000 * this 222 * number. 223 */ 224 private int blockSize100k; 225 226 // Variables used by setup* methods exclusively 227 228 private boolean blockRandomised; 229 private final CRC crc = new CRC(); 230 private int nInUse; 231 private BitInputStream bin; 232 private final boolean decompressConcatenated; 233 private int currentState = START_BLOCK_STATE; 234 private int storedBlockCRC, storedCombinedCRC; 235 private int computedBlockCRC, computedCombinedCRC; 236 private int su_count; 237 238 private int su_ch2; 239 240 private int su_chPrev; 241 242 private int su_i2; 243 244 private int su_j2; 245 246 private int su_rNToGo; 247 248 private int su_rTPos; 249 250 private int su_tPos; 251 252 private char su_z; 253 254 /** 255 * All memory intensive stuff. This field is initialized by initBlock(). 256 */ 257 private BZip2CompressorInputStream.Data data; 258 259 /** 260 * Constructs a new BZip2CompressorInputStream which decompresses bytes 261 * read from the specified stream. This doesn't support decompressing 262 * concatenated .bz2 files. 263 * 264 * @param in the InputStream from which this object should be created 265 * @throws IOException 266 * if the stream content is malformed or an I/O error occurs. 267 * @throws NullPointerException 268 * if {@code in == null} 269 */ 270 public BZip2CompressorInputStream(final InputStream in) throws IOException { 271 this(in, false); 272 } 273 274 /** 275 * Constructs a new BZip2CompressorInputStream which decompresses bytes 276 * read from the specified stream. 277 * 278 * @param in the InputStream from which this object should be created 279 * @param decompressConcatenated 280 * if true, decompress until the end of the input; 281 * if false, stop after the first .bz2 stream and 282 * leave the input position to point to the next 283 * byte after the .bz2 stream 284 * 285 * @throws IOException 286 * if {@code in == null}, the stream content is malformed, or an I/O error occurs. 287 */ 288 public BZip2CompressorInputStream(final InputStream in, final boolean decompressConcatenated) throws IOException { 289 this.bin = new BitInputStream(in == System.in ? new CloseShieldFilterInputStream(in) : in, 290 ByteOrder.BIG_ENDIAN); 291 this.decompressConcatenated = decompressConcatenated; 292 293 init(true); 294 initBlock(); 295 } 296 297 @Override 298 public void close() throws IOException { 299 final BitInputStream inShadow = this.bin; 300 if (inShadow != null) { 301 try { 302 inShadow.close(); 303 } finally { 304 this.data = null; 305 this.bin = null; 306 } 307 } 308 } 309 310 private boolean complete() throws IOException { 311 this.storedCombinedCRC = bsGetInt(bin); 312 this.currentState = EOF; 313 this.data = null; 314 315 if (this.storedCombinedCRC != this.computedCombinedCRC) { 316 throw new IOException("BZip2 CRC error"); 317 } 318 319 // Look for the next .bz2 stream if decompressing 320 // concatenated files. 321 return !decompressConcatenated || !init(false); 322 } 323 324 /** 325 * Called by recvDecodingTables() exclusively. 326 */ 327 private void createHuffmanDecodingTables(final int alphaSize, 328 final int nGroups) throws IOException { 329 final Data dataShadow = this.data; 330 final char[][] len = dataShadow.temp_charArray2d; 331 final int[] minLens = dataShadow.minLens; 332 final int[][] limit = dataShadow.limit; 333 final int[][] base = dataShadow.base; 334 final int[][] perm = dataShadow.perm; 335 336 for (int t = 0; t < nGroups; t++) { 337 int minLen = 32; 338 int maxLen = 0; 339 final char[] len_t = len[t]; 340 for (int i = alphaSize; --i >= 0;) { 341 final char lent = len_t[i]; 342 if (lent > maxLen) { 343 maxLen = lent; 344 } 345 if (lent < minLen) { 346 minLen = lent; 347 } 348 } 349 hbCreateDecodeTables(limit[t], base[t], perm[t], len[t], minLen, 350 maxLen, alphaSize); 351 minLens[t] = minLen; 352 } 353 } 354 355 private void endBlock() throws IOException { 356 this.computedBlockCRC = this.crc.getFinalCRC(); 357 358 // A bad CRC is considered a fatal error. 359 if (this.storedBlockCRC != this.computedBlockCRC) { 360 // make next blocks readable without error 361 // (repair feature, not yet documented, not tested) 362 this.computedCombinedCRC = this.storedCombinedCRC << 1 363 | this.storedCombinedCRC >>> 31; 364 this.computedCombinedCRC ^= this.storedBlockCRC; 365 366 throw new IOException("BZip2 CRC error"); 367 } 368 369 this.computedCombinedCRC = this.computedCombinedCRC << 1 370 | this.computedCombinedCRC >>> 31; 371 this.computedCombinedCRC ^= this.computedBlockCRC; 372 } 373 374 private void getAndMoveToFrontDecode() throws IOException { 375 final BitInputStream bin = this.bin; 376 this.origPtr = bsR(bin, 24); 377 recvDecodingTables(); 378 379 final Data dataShadow = this.data; 380 final byte[] ll8 = dataShadow.ll8; 381 final int[] unzftab = dataShadow.unzftab; 382 final byte[] selector = dataShadow.selector; 383 final byte[] seqToUnseq = dataShadow.seqToUnseq; 384 final char[] yy = dataShadow.getAndMoveToFrontDecode_yy; 385 final int[] minLens = dataShadow.minLens; 386 final int[][] limit = dataShadow.limit; 387 final int[][] base = dataShadow.base; 388 final int[][] perm = dataShadow.perm; 389 final int limitLast = this.blockSize100k * 100000; 390 391 /* 392 * Setting up the unzftab entries here is not strictly necessary, but it 393 * does save having to do it later in a separate pass, and so saves a 394 * block's worth of cache misses. 395 */ 396 for (int i = 256; --i >= 0;) { 397 yy[i] = (char) i; 398 unzftab[i] = 0; 399 } 400 401 int groupNo = 0; 402 int groupPos = G_SIZE - 1; 403 final int eob = this.nInUse + 1; 404 int nextSym = getAndMoveToFrontDecode0(); 405 int lastShadow = -1; 406 int zt = selector[groupNo] & 0xff; 407 checkBounds(zt, N_GROUPS, "zt"); 408 int[] base_zt = base[zt]; 409 int[] limit_zt = limit[zt]; 410 int[] perm_zt = perm[zt]; 411 int minLens_zt = minLens[zt]; 412 413 while (nextSym != eob) { 414 if (nextSym == RUNA || nextSym == RUNB) { 415 int s = -1; 416 417 for (int n = 1; true; n <<= 1) { 418 if (nextSym == RUNA) { 419 s += n; 420 } else if (nextSym == RUNB) { 421 s += n << 1; 422 } else { 423 break; 424 } 425 426 if (groupPos == 0) { 427 groupPos = G_SIZE - 1; 428 checkBounds(++groupNo, MAX_SELECTORS, "groupNo"); 429 zt = selector[groupNo] & 0xff; 430 checkBounds(zt, N_GROUPS, "zt"); 431 base_zt = base[zt]; 432 limit_zt = limit[zt]; 433 perm_zt = perm[zt]; 434 minLens_zt = minLens[zt]; 435 } else { 436 groupPos--; 437 } 438 439 int zn = minLens_zt; 440 checkBounds(zn, MAX_ALPHA_SIZE, "zn"); 441 int zvec = bsR(bin, zn); 442 while(zvec > limit_zt[zn]) { 443 checkBounds(++zn, MAX_ALPHA_SIZE, "zn"); 444 zvec = zvec << 1 | bsR(bin, 1); 445 } 446 final int tmp = zvec - base_zt[zn]; 447 checkBounds(tmp, MAX_ALPHA_SIZE, "zvec"); 448 nextSym = perm_zt[tmp]; 449 } 450 checkBounds(s, this.data.ll8.length, "s"); 451 452 final int yy0 = yy[0]; 453 checkBounds(yy0, 256, "yy"); 454 final byte ch = seqToUnseq[yy0]; 455 unzftab[ch & 0xff] += s + 1; 456 457 final int from = ++lastShadow; 458 lastShadow += s; 459 checkBounds(lastShadow, this.data.ll8.length, "lastShadow"); 460 Arrays.fill(ll8, from, lastShadow + 1, ch); 461 462 if (lastShadow >= limitLast) { 463 throw new IOException("Block overrun while expanding RLE in MTF, " 464 + lastShadow + " exceeds " + limitLast); 465 } 466 } else { 467 if (++lastShadow >= limitLast) { 468 throw new IOException("Block overrun in MTF, " 469 + lastShadow + " exceeds " + limitLast); 470 } 471 checkBounds(nextSym, 256 + 1, "nextSym"); 472 473 final char tmp = yy[nextSym - 1]; 474 checkBounds(tmp, 256, "yy"); 475 unzftab[seqToUnseq[tmp] & 0xff]++; 476 ll8[lastShadow] = seqToUnseq[tmp]; 477 478 /* 479 * This loop is hammered during decompression, hence avoid 480 * native method call overhead of System.arraycopy for very 481 * small ranges to copy. 482 */ 483 if (nextSym <= 16) { 484 for (int j = nextSym - 1; j > 0;) { 485 yy[j] = yy[--j]; 486 } 487 } else { 488 System.arraycopy(yy, 0, yy, 1, nextSym - 1); 489 } 490 491 yy[0] = tmp; 492 493 if (groupPos == 0) { 494 groupPos = G_SIZE - 1; 495 checkBounds(++groupNo, MAX_SELECTORS, "groupNo"); 496 zt = selector[groupNo] & 0xff; 497 checkBounds(zt, N_GROUPS, "zt"); 498 base_zt = base[zt]; 499 limit_zt = limit[zt]; 500 perm_zt = perm[zt]; 501 minLens_zt = minLens[zt]; 502 } else { 503 groupPos--; 504 } 505 506 int zn = minLens_zt; 507 checkBounds(zn, MAX_ALPHA_SIZE, "zn"); 508 int zvec = bsR(bin, zn); 509 while(zvec > limit_zt[zn]) { 510 checkBounds(++zn, MAX_ALPHA_SIZE, "zn"); 511 zvec = zvec << 1 | bsR(bin, 1); 512 } 513 final int idx = zvec - base_zt[zn]; 514 checkBounds(idx, MAX_ALPHA_SIZE, "zvec"); 515 nextSym = perm_zt[idx]; 516 } 517 } 518 519 this.last = lastShadow; 520 } 521 522 private int getAndMoveToFrontDecode0() throws IOException { 523 final Data dataShadow = this.data; 524 final int zt = dataShadow.selector[0] & 0xff; 525 checkBounds(zt, N_GROUPS, "zt"); 526 final int[] limit_zt = dataShadow.limit[zt]; 527 int zn = dataShadow.minLens[zt]; 528 checkBounds(zn, MAX_ALPHA_SIZE, "zn"); 529 int zvec = bsR(bin, zn); 530 while (zvec > limit_zt[zn]) { 531 checkBounds(++zn, MAX_ALPHA_SIZE, "zn"); 532 zvec = zvec << 1 | bsR(bin, 1); 533 } 534 final int tmp = zvec - dataShadow.base[zt][zn]; 535 checkBounds(tmp, MAX_ALPHA_SIZE, "zvec"); 536 537 return dataShadow.perm[zt][tmp]; 538 } 539 540 /** 541 * @since 1.17 542 */ 543 @Override 544 public long getCompressedCount() { 545 return bin.getBytesRead(); 546 } 547 548 private boolean init(final boolean isFirstStream) throws IOException { 549 if (null == bin) { 550 throw new IOException("No InputStream"); 551 } 552 553 if (!isFirstStream) { 554 bin.clearBitCache(); 555 } 556 557 final int magic0 = readNextByte(this.bin); 558 if (magic0 == -1 && !isFirstStream) { 559 return false; 560 } 561 final int magic1 = readNextByte(this.bin); 562 final int magic2 = readNextByte(this.bin); 563 564 if (magic0 != 'B' || magic1 != 'Z' || magic2 != 'h') { 565 throw new IOException(isFirstStream 566 ? "Stream is not in the BZip2 format" 567 : "Garbage after a valid BZip2 stream"); 568 } 569 570 final int blockSize = readNextByte(this.bin); 571 if (blockSize < '1' || blockSize > '9') { 572 throw new IOException("BZip2 block size is invalid"); 573 } 574 575 this.blockSize100k = blockSize - '0'; 576 577 this.computedCombinedCRC = 0; 578 579 return true; 580 } 581 582 private void initBlock() throws IOException { 583 final BitInputStream bin = this.bin; 584 char magic0; 585 char magic1; 586 char magic2; 587 char magic3; 588 char magic4; 589 char magic5; 590 591 while (true) { 592 // Get the block magic bytes. 593 magic0 = bsGetUByte(bin); 594 magic1 = bsGetUByte(bin); 595 magic2 = bsGetUByte(bin); 596 magic3 = bsGetUByte(bin); 597 magic4 = bsGetUByte(bin); 598 magic5 = bsGetUByte(bin); 599 600 // If isn't end of stream magic, break out of the loop. 601 if (magic0 != 0x17 || magic1 != 0x72 || magic2 != 0x45 602 || magic3 != 0x38 || magic4 != 0x50 || magic5 != 0x90) { 603 break; 604 } 605 606 // End of stream was reached. Check the combined CRC and 607 // advance to the next .bz2 stream if decoding concatenated 608 // streams. 609 if (complete()) { 610 return; 611 } 612 } 613 614 if (magic0 != 0x31 || // '1' 615 magic1 != 0x41 || // ')' 616 magic2 != 0x59 || // 'Y' 617 magic3 != 0x26 || // '&' 618 magic4 != 0x53 || // 'S' 619 magic5 != 0x59 // 'Y' 620 ) { 621 this.currentState = EOF; 622 throw new IOException("Bad block header"); 623 } 624 this.storedBlockCRC = bsGetInt(bin); 625 this.blockRandomised = bsR(bin, 1) == 1; 626 627 /* 628 * Allocate data here instead in constructor, so we do not allocate 629 * it if the input file is empty. 630 */ 631 if (this.data == null) { 632 this.data = new Data(this.blockSize100k); 633 } 634 635 // currBlockNo++; 636 getAndMoveToFrontDecode(); 637 638 this.crc.initializeCRC(); 639 this.currentState = START_BLOCK_STATE; 640 } 641 642 private void makeMaps() { 643 final boolean[] inUse = this.data.inUse; 644 final byte[] seqToUnseq = this.data.seqToUnseq; 645 646 int nInUseShadow = 0; 647 648 for (int i = 0; i < 256; i++) { 649 if (inUse[i]) { 650 seqToUnseq[nInUseShadow++] = (byte) i; 651 } 652 } 653 654 this.nInUse = nInUseShadow; 655 } 656 657 @Override 658 public int read() throws IOException { 659 if (this.bin != null) { 660 final int r = read0(); 661 count(r < 0 ? -1 : 1); 662 return r; 663 } 664 throw new IOException("Stream closed"); 665 } 666 667 /* 668 * (non-Javadoc) 669 * 670 * @see java.io.InputStream#read(byte[], int, int) 671 */ 672 @Override 673 public int read(final byte[] dest, final int offs, final int len) 674 throws IOException { 675 if (offs < 0) { 676 throw new IndexOutOfBoundsException("offs(" + offs + ") < 0."); 677 } 678 if (len < 0) { 679 throw new IndexOutOfBoundsException("len(" + len + ") < 0."); 680 } 681 if (offs + len > dest.length) { 682 throw new IndexOutOfBoundsException("offs(" + offs + ") + len(" 683 + len + ") > dest.length(" + dest.length + ")."); 684 } 685 if (this.bin == null) { 686 throw new IOException("Stream closed"); 687 } 688 if (len == 0) { 689 return 0; 690 } 691 692 final int hi = offs + len; 693 int destOffs = offs; 694 int b; 695 while (destOffs < hi && (b = read0()) >= 0) { 696 dest[destOffs++] = (byte) b; 697 count(1); 698 } 699 700 return destOffs == offs ? -1 : destOffs - offs; 701 } 702 703 private int read0() throws IOException { 704 switch (currentState) { 705 case EOF: 706 return -1; 707 708 case START_BLOCK_STATE: 709 return setupBlock(); 710 711 case RAND_PART_A_STATE: 712 throw new IllegalStateException(); 713 714 case RAND_PART_B_STATE: 715 return setupRandPartB(); 716 717 case RAND_PART_C_STATE: 718 return setupRandPartC(); 719 720 case NO_RAND_PART_A_STATE: 721 throw new IllegalStateException(); 722 723 case NO_RAND_PART_B_STATE: 724 return setupNoRandPartB(); 725 726 case NO_RAND_PART_C_STATE: 727 return setupNoRandPartC(); 728 729 default: 730 throw new IllegalStateException(); 731 } 732 } 733 734 private int readNextByte(final BitInputStream in) throws IOException { 735 final long b = in.readBits(8); 736 return (int) b; 737 } 738 739 private void recvDecodingTables() throws IOException { 740 final BitInputStream bin = this.bin; 741 final Data dataShadow = this.data; 742 final boolean[] inUse = dataShadow.inUse; 743 final byte[] pos = dataShadow.recvDecodingTables_pos; 744 final byte[] selector = dataShadow.selector; 745 final byte[] selectorMtf = dataShadow.selectorMtf; 746 747 int inUse16 = 0; 748 749 /* Receive the mapping table */ 750 for (int i = 0; i < 16; i++) { 751 if (bsGetBit(bin)) { 752 inUse16 |= 1 << i; 753 } 754 } 755 756 Arrays.fill(inUse, false); 757 for (int i = 0; i < 16; i++) { 758 if ((inUse16 & 1 << i) != 0) { 759 final int i16 = i << 4; 760 for (int j = 0; j < 16; j++) { 761 if (bsGetBit(bin)) { 762 inUse[i16 + j] = true; 763 } 764 } 765 } 766 } 767 768 makeMaps(); 769 final int alphaSize = this.nInUse + 2; 770 /* Now the selectors */ 771 final int nGroups = bsR(bin, 3); 772 final int selectors = bsR(bin, 15); 773 if (selectors < 0) { 774 throw new IOException("Corrupted input, nSelectors value negative"); 775 } 776 checkBounds(alphaSize, MAX_ALPHA_SIZE + 1, "alphaSize"); 777 checkBounds(nGroups, N_GROUPS + 1, "nGroups"); 778 779 // Don't fail on nSelectors overflowing boundaries but discard the values in overflow 780 // See https://gnu.wildebeest.org/blog/mjw/2019/08/02/bzip2-and-the-cve-that-wasnt/ 781 // and https://sourceware.org/ml/bzip2-devel/2019-q3/msg00007.html 782 783 for (int i = 0; i < selectors; i++) { 784 int j = 0; 785 while (bsGetBit(bin)) { 786 j++; 787 } 788 if (i < MAX_SELECTORS) { 789 selectorMtf[i] = (byte) j; 790 } 791 } 792 final int nSelectors = Math.min(selectors, MAX_SELECTORS); 793 794 /* Undo the MTF values for the selectors. */ 795 for (int v = nGroups; --v >= 0;) { 796 pos[v] = (byte) v; 797 } 798 799 for (int i = 0; i < nSelectors; i++) { 800 int v = selectorMtf[i] & 0xff; 801 checkBounds(v, N_GROUPS, "selectorMtf"); 802 final byte tmp = pos[v]; 803 while (v > 0) { 804 // nearly all times v is zero, 4 in most other cases 805 pos[v] = pos[v - 1]; 806 v--; 807 } 808 pos[0] = tmp; 809 selector[i] = tmp; 810 } 811 812 final char[][] len = dataShadow.temp_charArray2d; 813 814 /* Now the coding tables */ 815 for (int t = 0; t < nGroups; t++) { 816 int curr = bsR(bin, 5); 817 final char[] len_t = len[t]; 818 for (int i = 0; i < alphaSize; i++) { 819 while (bsGetBit(bin)) { 820 curr += bsGetBit(bin) ? -1 : 1; 821 } 822 len_t[i] = (char) curr; 823 } 824 } 825 826 // finally create the Huffman tables 827 createHuffmanDecodingTables(alphaSize, nGroups); 828 } 829 830 private int setupBlock() throws IOException { 831 if (currentState == EOF || this.data == null) { 832 return -1; 833 } 834 835 final int[] cftab = this.data.cftab; 836 final int ttLen = this.last + 1; 837 final int[] tt = this.data.initTT(ttLen); 838 final byte[] ll8 = this.data.ll8; 839 cftab[0] = 0; 840 System.arraycopy(this.data.unzftab, 0, cftab, 1, 256); 841 842 for (int i = 1, c = cftab[0]; i <= 256; i++) { 843 c += cftab[i]; 844 cftab[i] = c; 845 } 846 847 for (int i = 0, lastShadow = this.last; i <= lastShadow; i++) { 848 final int tmp = cftab[ll8[i] & 0xff]++; 849 checkBounds(tmp, ttLen, "tt index"); 850 tt[tmp] = i; 851 } 852 853 if (this.origPtr < 0 || this.origPtr >= tt.length) { 854 throw new IOException("Stream corrupted"); 855 } 856 857 this.su_tPos = tt[this.origPtr]; 858 this.su_count = 0; 859 this.su_i2 = 0; 860 this.su_ch2 = 256; /* not a char and not EOF */ 861 862 if (this.blockRandomised) { 863 this.su_rNToGo = 0; 864 this.su_rTPos = 0; 865 return setupRandPartA(); 866 } 867 return setupNoRandPartA(); 868 } 869 870 private int setupNoRandPartA() throws IOException { 871 if (this.su_i2 <= this.last) { 872 this.su_chPrev = this.su_ch2; 873 final int su_ch2Shadow = this.data.ll8[this.su_tPos] & 0xff; 874 this.su_ch2 = su_ch2Shadow; 875 checkBounds(this.su_tPos, this.data.tt.length, "su_tPos"); 876 this.su_tPos = this.data.tt[this.su_tPos]; 877 this.su_i2++; 878 this.currentState = NO_RAND_PART_B_STATE; 879 this.crc.updateCRC(su_ch2Shadow); 880 return su_ch2Shadow; 881 } 882 this.currentState = NO_RAND_PART_A_STATE; 883 endBlock(); 884 initBlock(); 885 return setupBlock(); 886 } 887 888 private int setupNoRandPartB() throws IOException { 889 if (this.su_ch2 != this.su_chPrev) { 890 this.su_count = 1; 891 return setupNoRandPartA(); 892 } 893 if (++this.su_count >= 4) { 894 checkBounds(this.su_tPos, this.data.ll8.length, "su_tPos"); 895 this.su_z = (char) (this.data.ll8[this.su_tPos] & 0xff); 896 this.su_tPos = this.data.tt[this.su_tPos]; 897 this.su_j2 = 0; 898 return setupNoRandPartC(); 899 } 900 return setupNoRandPartA(); 901 } 902 903 private int setupNoRandPartC() throws IOException { 904 if (this.su_j2 < this.su_z) { 905 final int su_ch2Shadow = this.su_ch2; 906 this.crc.updateCRC(su_ch2Shadow); 907 this.su_j2++; 908 this.currentState = NO_RAND_PART_C_STATE; 909 return su_ch2Shadow; 910 } 911 this.su_i2++; 912 this.su_count = 0; 913 return setupNoRandPartA(); 914 } 915 916 private int setupRandPartA() throws IOException { 917 if (this.su_i2 <= this.last) { 918 this.su_chPrev = this.su_ch2; 919 int su_ch2Shadow = this.data.ll8[this.su_tPos] & 0xff; 920 checkBounds(this.su_tPos, this.data.tt.length, "su_tPos"); 921 this.su_tPos = this.data.tt[this.su_tPos]; 922 if (this.su_rNToGo == 0) { 923 this.su_rNToGo = Rand.rNums(this.su_rTPos) - 1; 924 if (++this.su_rTPos == 512) { 925 this.su_rTPos = 0; 926 } 927 } else { 928 this.su_rNToGo--; 929 } 930 this.su_ch2 = su_ch2Shadow ^= this.su_rNToGo == 1 ? 1 : 0; 931 this.su_i2++; 932 this.currentState = RAND_PART_B_STATE; 933 this.crc.updateCRC(su_ch2Shadow); 934 return su_ch2Shadow; 935 } 936 endBlock(); 937 initBlock(); 938 return setupBlock(); 939 } 940 941 private int setupRandPartB() throws IOException { 942 if (this.su_ch2 != this.su_chPrev) { 943 this.currentState = RAND_PART_A_STATE; 944 this.su_count = 1; 945 return setupRandPartA(); 946 } 947 if (++this.su_count < 4) { 948 this.currentState = RAND_PART_A_STATE; 949 return setupRandPartA(); 950 } 951 this.su_z = (char) (this.data.ll8[this.su_tPos] & 0xff); 952 checkBounds(this.su_tPos, this.data.tt.length, "su_tPos"); 953 this.su_tPos = this.data.tt[this.su_tPos]; 954 if (this.su_rNToGo == 0) { 955 this.su_rNToGo = Rand.rNums(this.su_rTPos) - 1; 956 if (++this.su_rTPos == 512) { 957 this.su_rTPos = 0; 958 } 959 } else { 960 this.su_rNToGo--; 961 } 962 this.su_j2 = 0; 963 this.currentState = RAND_PART_C_STATE; 964 if (this.su_rNToGo == 1) { 965 this.su_z ^= 1; 966 } 967 return setupRandPartC(); 968 } 969 970 private int setupRandPartC() throws IOException { 971 if (this.su_j2 < this.su_z) { 972 this.crc.updateCRC(this.su_ch2); 973 this.su_j2++; 974 return this.su_ch2; 975 } 976 this.currentState = RAND_PART_A_STATE; 977 this.su_i2++; 978 this.su_count = 0; 979 return setupRandPartA(); 980 } 981}